init
This commit is contained in:
247
db_include/tsearch/dicts/spell.h
Executable file
247
db_include/tsearch/dicts/spell.h
Executable file
@@ -0,0 +1,247 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* spell.h
|
||||
*
|
||||
* Declarations for ISpell dictionary
|
||||
*
|
||||
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
|
||||
*
|
||||
* src/include/tsearch/dicts/spell.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef __SPELL_H__
|
||||
#define __SPELL_H__
|
||||
|
||||
#include "regex/regex.h"
|
||||
#include "tsearch/dicts/regis.h"
|
||||
#include "tsearch/ts_public.h"
|
||||
|
||||
/*
|
||||
* SPNode and SPNodeData are used to represent prefix tree (Trie) to store
|
||||
* a words list.
|
||||
*/
|
||||
struct SPNode;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32 val:8,
|
||||
isword:1,
|
||||
/* Stores compound flags listed below */
|
||||
compoundflag:4,
|
||||
/* Reference to an entry of the AffixData field */
|
||||
affix:19;
|
||||
struct SPNode *node;
|
||||
} SPNodeData;
|
||||
|
||||
/*
|
||||
* Names of FF_ are correlated with Hunspell options in affix file
|
||||
* http://hunspell.sourceforge.net/
|
||||
*/
|
||||
#define FF_COMPOUNDONLY 0x01
|
||||
#define FF_COMPOUNDBEGIN 0x02
|
||||
#define FF_COMPOUNDMIDDLE 0x04
|
||||
#define FF_COMPOUNDLAST 0x08
|
||||
#define FF_COMPOUNDFLAG ( FF_COMPOUNDBEGIN | FF_COMPOUNDMIDDLE | \
|
||||
FF_COMPOUNDLAST )
|
||||
#define FF_COMPOUNDFLAGMASK 0x0f
|
||||
|
||||
typedef struct SPNode
|
||||
{
|
||||
uint32 length;
|
||||
SPNodeData data[FLEXIBLE_ARRAY_MEMBER];
|
||||
} SPNode;
|
||||
|
||||
#define SPNHDRSZ (offsetof(SPNode,data))
|
||||
|
||||
/*
|
||||
* Represents an entry in a words list.
|
||||
*/
|
||||
typedef struct spell_struct
|
||||
{
|
||||
union
|
||||
{
|
||||
/*
|
||||
* flag is filled in by NIImportDictionary(). After
|
||||
* NISortDictionary(), d is used instead of flag.
|
||||
*/
|
||||
char *flag;
|
||||
/* d is used in mkSPNode() */
|
||||
struct
|
||||
{
|
||||
/* Reference to an entry of the AffixData field */
|
||||
int affix;
|
||||
/* Length of the word */
|
||||
int len;
|
||||
} d;
|
||||
} p;
|
||||
char word[FLEXIBLE_ARRAY_MEMBER];
|
||||
} SPELL;
|
||||
|
||||
#define SPELLHDRSZ (offsetof(SPELL, word))
|
||||
|
||||
/*
|
||||
* If an affix uses a regex, we have to store that separately in a struct
|
||||
* that won't move around when arrays of affixes are enlarged or sorted.
|
||||
* This is so that it can be found to be cleaned up at context destruction.
|
||||
*/
|
||||
typedef struct aff_regex_struct
|
||||
{
|
||||
regex_t regex;
|
||||
MemoryContextCallback mcallback;
|
||||
} aff_regex_struct;
|
||||
|
||||
/*
|
||||
* Represents an entry in an affix list.
|
||||
*/
|
||||
typedef struct aff_struct
|
||||
{
|
||||
char *flag;
|
||||
/* FF_SUFFIX or FF_PREFIX */
|
||||
uint32 type:1,
|
||||
flagflags:7,
|
||||
issimple:1,
|
||||
isregis:1,
|
||||
replen:14;
|
||||
char *find;
|
||||
char *repl;
|
||||
union
|
||||
{
|
||||
aff_regex_struct *pregex;
|
||||
Regis regis;
|
||||
} reg;
|
||||
} AFFIX;
|
||||
|
||||
/*
|
||||
* affixes use dictionary flags too
|
||||
*/
|
||||
#define FF_COMPOUNDPERMITFLAG 0x10
|
||||
#define FF_COMPOUNDFORBIDFLAG 0x20
|
||||
#define FF_CROSSPRODUCT 0x40
|
||||
|
||||
/*
|
||||
* Don't change the order of these. Initialization sorts by these,
|
||||
* and expects prefixes to come first after sorting.
|
||||
*/
|
||||
#define FF_SUFFIX 1
|
||||
#define FF_PREFIX 0
|
||||
|
||||
/*
|
||||
* AffixNode and AffixNodeData are used to represent prefix tree (Trie) to store
|
||||
* an affix list.
|
||||
*/
|
||||
struct AffixNode;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32 val:8,
|
||||
naff:24;
|
||||
AFFIX **aff;
|
||||
struct AffixNode *node;
|
||||
} AffixNodeData;
|
||||
|
||||
typedef struct AffixNode
|
||||
{
|
||||
uint32 isvoid:1,
|
||||
length:31;
|
||||
AffixNodeData data[FLEXIBLE_ARRAY_MEMBER];
|
||||
} AffixNode;
|
||||
|
||||
#define ANHRDSZ (offsetof(AffixNode, data))
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char *affix;
|
||||
int len;
|
||||
bool issuffix;
|
||||
} CMPDAffix;
|
||||
|
||||
/*
|
||||
* Type of encoding affix flags in Hunspell dictionaries
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
FM_CHAR, /* one character (like ispell) */
|
||||
FM_LONG, /* two characters */
|
||||
FM_NUM /* number, >= 0 and < 65536 */
|
||||
} FlagMode;
|
||||
|
||||
/*
|
||||
* Structure to store Hunspell options. Flag representation depends on flag
|
||||
* type. These flags are about support of compound words.
|
||||
*/
|
||||
typedef struct CompoundAffixFlag
|
||||
{
|
||||
union
|
||||
{
|
||||
/* Flag name if flagMode is FM_CHAR or FM_LONG */
|
||||
char *s;
|
||||
/* Flag name if flagMode is FM_NUM */
|
||||
uint32 i;
|
||||
} flag;
|
||||
/* we don't have a bsearch_arg version, so, copy FlagMode */
|
||||
FlagMode flagMode;
|
||||
uint32 value;
|
||||
} CompoundAffixFlag;
|
||||
|
||||
#define FLAGNUM_MAXSIZE (1 << 16)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int maffixes;
|
||||
int naffixes;
|
||||
AFFIX *Affix;
|
||||
|
||||
AffixNode *Suffix;
|
||||
AffixNode *Prefix;
|
||||
|
||||
SPNode *Dictionary;
|
||||
/* Array of sets of affixes */
|
||||
char **AffixData;
|
||||
int lenAffixData;
|
||||
int nAffixData;
|
||||
bool useFlagAliases;
|
||||
|
||||
CMPDAffix *CompoundAffix;
|
||||
|
||||
bool usecompound;
|
||||
FlagMode flagMode;
|
||||
|
||||
/*
|
||||
* All follow fields are actually needed only for initialization
|
||||
*/
|
||||
|
||||
/* Array of Hunspell options in affix file */
|
||||
CompoundAffixFlag *CompoundAffixFlags;
|
||||
/* number of entries in CompoundAffixFlags array */
|
||||
int nCompoundAffixFlag;
|
||||
/* allocated length of CompoundAffixFlags array */
|
||||
int mCompoundAffixFlag;
|
||||
|
||||
/*
|
||||
* Remaining fields are only used during dictionary construction; they are
|
||||
* set up by NIStartBuild and cleared by NIFinishBuild.
|
||||
*/
|
||||
MemoryContext buildCxt; /* temp context for construction */
|
||||
|
||||
/* Temporary array of all words in the dict file */
|
||||
SPELL **Spell;
|
||||
int nspell; /* number of valid entries in Spell array */
|
||||
int mspell; /* allocated length of Spell array */
|
||||
|
||||
/* These are used to allocate "compact" data without palloc overhead */
|
||||
char *firstfree; /* first free address (always maxaligned) */
|
||||
size_t avail; /* free space remaining at firstfree */
|
||||
} IspellDict;
|
||||
|
||||
extern TSLexeme *NINormalizeWord(IspellDict *Conf, char *word);
|
||||
|
||||
extern void NIStartBuild(IspellDict *Conf);
|
||||
extern void NIImportAffixes(IspellDict *Conf, const char *filename);
|
||||
extern void NIImportDictionary(IspellDict *Conf, const char *filename);
|
||||
extern void NISortDictionary(IspellDict *Conf);
|
||||
extern void NISortAffixes(IspellDict *Conf);
|
||||
extern void NIFinishBuild(IspellDict *Conf);
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user