This commit is contained in:
blue-lemon0104
2026-04-07 13:35:22 +08:00
commit 0120fa9ce3
1530 changed files with 424864 additions and 0 deletions

View File

@@ -0,0 +1,49 @@
/*-------------------------------------------------------------------------
*
* regis.h
*
* Declarations for fast regex subset, used by ISpell
*
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
*
* src/include/tsearch/dicts/regis.h
*
*-------------------------------------------------------------------------
*/
#ifndef __REGIS_H__
#define __REGIS_H__
typedef struct RegisNode
{
uint32
type:2,
len:16,
unused:14;
struct RegisNode *next;
unsigned char data[FLEXIBLE_ARRAY_MEMBER];
} RegisNode;
#define RNHDRSZ (offsetof(RegisNode,data))
#define RSF_ONEOF 1
#define RSF_NONEOF 2
typedef struct Regis
{
RegisNode *node;
uint32
issuffix:1,
nchar:16,
unused:15;
} Regis;
bool RS_isRegis(const char *str);
void RS_compile(Regis *r, bool issuffix, const char *str);
void RS_free(Regis *r);
/*returns true if matches */
bool RS_execute(Regis *r, char *str);
#endif

247
db_include/tsearch/dicts/spell.h Executable file
View File

@@ -0,0 +1,247 @@
/*-------------------------------------------------------------------------
*
* spell.h
*
* Declarations for ISpell dictionary
*
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
*
* src/include/tsearch/dicts/spell.h
*
*-------------------------------------------------------------------------
*/
#ifndef __SPELL_H__
#define __SPELL_H__
#include "regex/regex.h"
#include "tsearch/dicts/regis.h"
#include "tsearch/ts_public.h"
/*
* SPNode and SPNodeData are used to represent prefix tree (Trie) to store
* a words list.
*/
struct SPNode;
typedef struct
{
uint32 val:8,
isword:1,
/* Stores compound flags listed below */
compoundflag:4,
/* Reference to an entry of the AffixData field */
affix:19;
struct SPNode *node;
} SPNodeData;
/*
* Names of FF_ are correlated with Hunspell options in affix file
* http://hunspell.sourceforge.net/
*/
#define FF_COMPOUNDONLY 0x01
#define FF_COMPOUNDBEGIN 0x02
#define FF_COMPOUNDMIDDLE 0x04
#define FF_COMPOUNDLAST 0x08
#define FF_COMPOUNDFLAG ( FF_COMPOUNDBEGIN | FF_COMPOUNDMIDDLE | \
FF_COMPOUNDLAST )
#define FF_COMPOUNDFLAGMASK 0x0f
typedef struct SPNode
{
uint32 length;
SPNodeData data[FLEXIBLE_ARRAY_MEMBER];
} SPNode;
#define SPNHDRSZ (offsetof(SPNode,data))
/*
* Represents an entry in a words list.
*/
typedef struct spell_struct
{
union
{
/*
* flag is filled in by NIImportDictionary(). After
* NISortDictionary(), d is used instead of flag.
*/
char *flag;
/* d is used in mkSPNode() */
struct
{
/* Reference to an entry of the AffixData field */
int affix;
/* Length of the word */
int len;
} d;
} p;
char word[FLEXIBLE_ARRAY_MEMBER];
} SPELL;
#define SPELLHDRSZ (offsetof(SPELL, word))
/*
* If an affix uses a regex, we have to store that separately in a struct
* that won't move around when arrays of affixes are enlarged or sorted.
* This is so that it can be found to be cleaned up at context destruction.
*/
typedef struct aff_regex_struct
{
regex_t regex;
MemoryContextCallback mcallback;
} aff_regex_struct;
/*
* Represents an entry in an affix list.
*/
typedef struct aff_struct
{
char *flag;
/* FF_SUFFIX or FF_PREFIX */
uint32 type:1,
flagflags:7,
issimple:1,
isregis:1,
replen:14;
char *find;
char *repl;
union
{
aff_regex_struct *pregex;
Regis regis;
} reg;
} AFFIX;
/*
* affixes use dictionary flags too
*/
#define FF_COMPOUNDPERMITFLAG 0x10
#define FF_COMPOUNDFORBIDFLAG 0x20
#define FF_CROSSPRODUCT 0x40
/*
* Don't change the order of these. Initialization sorts by these,
* and expects prefixes to come first after sorting.
*/
#define FF_SUFFIX 1
#define FF_PREFIX 0
/*
* AffixNode and AffixNodeData are used to represent prefix tree (Trie) to store
* an affix list.
*/
struct AffixNode;
typedef struct
{
uint32 val:8,
naff:24;
AFFIX **aff;
struct AffixNode *node;
} AffixNodeData;
typedef struct AffixNode
{
uint32 isvoid:1,
length:31;
AffixNodeData data[FLEXIBLE_ARRAY_MEMBER];
} AffixNode;
#define ANHRDSZ (offsetof(AffixNode, data))
typedef struct
{
char *affix;
int len;
bool issuffix;
} CMPDAffix;
/*
* Type of encoding affix flags in Hunspell dictionaries
*/
typedef enum
{
FM_CHAR, /* one character (like ispell) */
FM_LONG, /* two characters */
FM_NUM /* number, >= 0 and < 65536 */
} FlagMode;
/*
* Structure to store Hunspell options. Flag representation depends on flag
* type. These flags are about support of compound words.
*/
typedef struct CompoundAffixFlag
{
union
{
/* Flag name if flagMode is FM_CHAR or FM_LONG */
char *s;
/* Flag name if flagMode is FM_NUM */
uint32 i;
} flag;
/* we don't have a bsearch_arg version, so, copy FlagMode */
FlagMode flagMode;
uint32 value;
} CompoundAffixFlag;
#define FLAGNUM_MAXSIZE (1 << 16)
typedef struct
{
int maffixes;
int naffixes;
AFFIX *Affix;
AffixNode *Suffix;
AffixNode *Prefix;
SPNode *Dictionary;
/* Array of sets of affixes */
char **AffixData;
int lenAffixData;
int nAffixData;
bool useFlagAliases;
CMPDAffix *CompoundAffix;
bool usecompound;
FlagMode flagMode;
/*
* All follow fields are actually needed only for initialization
*/
/* Array of Hunspell options in affix file */
CompoundAffixFlag *CompoundAffixFlags;
/* number of entries in CompoundAffixFlags array */
int nCompoundAffixFlag;
/* allocated length of CompoundAffixFlags array */
int mCompoundAffixFlag;
/*
* Remaining fields are only used during dictionary construction; they are
* set up by NIStartBuild and cleared by NIFinishBuild.
*/
MemoryContext buildCxt; /* temp context for construction */
/* Temporary array of all words in the dict file */
SPELL **Spell;
int nspell; /* number of valid entries in Spell array */
int mspell; /* allocated length of Spell array */
/* These are used to allocate "compact" data without palloc overhead */
char *firstfree; /* first free address (always maxaligned) */
size_t avail; /* free space remaining at firstfree */
} IspellDict;
extern TSLexeme *NINormalizeWord(IspellDict *Conf, char *word);
extern void NIStartBuild(IspellDict *Conf);
extern void NIImportAffixes(IspellDict *Conf, const char *filename);
extern void NIImportDictionary(IspellDict *Conf, const char *filename);
extern void NISortDictionary(IspellDict *Conf);
extern void NISortAffixes(IspellDict *Conf);
extern void NIFinishBuild(IspellDict *Conf);
#endif

98
db_include/tsearch/ts_cache.h Executable file
View File

@@ -0,0 +1,98 @@
/*-------------------------------------------------------------------------
*
* ts_cache.h
* Tsearch related object caches.
*
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/tsearch/ts_cache.h
*
*-------------------------------------------------------------------------
*/
#ifndef TS_CACHE_H
#define TS_CACHE_H
#include "utils/guc.h"
/*
* All TS*CacheEntry structs must share this common header
* (see InvalidateTSCacheCallBack)
*/
typedef struct TSAnyCacheEntry
{
Oid objId;
bool isvalid;
} TSAnyCacheEntry;
typedef struct TSParserCacheEntry
{
/* prsId is the hash lookup key and MUST BE FIRST */
Oid prsId; /* OID of the parser */
bool isvalid;
Oid startOid;
Oid tokenOid;
Oid endOid;
Oid headlineOid;
Oid lextypeOid;
/*
* Pre-set-up fmgr call of most needed parser's methods
*/
FmgrInfo prsstart;
FmgrInfo prstoken;
FmgrInfo prsend;
FmgrInfo prsheadline;
} TSParserCacheEntry;
typedef struct TSDictionaryCacheEntry
{
/* dictId is the hash lookup key and MUST BE FIRST */
Oid dictId;
bool isvalid;
/* most frequent fmgr call */
Oid lexizeOid;
FmgrInfo lexize;
MemoryContext dictCtx; /* memory context to store private data */
void *dictData;
} TSDictionaryCacheEntry;
typedef struct
{
int len;
Oid *dictIds;
} ListDictionary;
typedef struct
{
/* cfgId is the hash lookup key and MUST BE FIRST */
Oid cfgId;
bool isvalid;
Oid prsId;
int lenmap;
ListDictionary *map;
} TSConfigCacheEntry;
/*
* GUC variable for current configuration
*/
extern char *TSCurrentConfig;
extern TSParserCacheEntry *lookup_ts_parser_cache(Oid prsId);
extern TSDictionaryCacheEntry *lookup_ts_dictionary_cache(Oid dictId);
extern TSConfigCacheEntry *lookup_ts_config_cache(Oid cfgId);
extern Oid getTSCurrentConfig(bool emitError);
extern bool check_TSCurrentConfig(char **newval, void **extra, GucSource source);
extern void assign_TSCurrentConfig(const char *newval, void *extra);
#endif /* TS_CACHE_H */

63
db_include/tsearch/ts_locale.h Executable file
View File

@@ -0,0 +1,63 @@
/*-------------------------------------------------------------------------
*
* ts_locale.h
* locale compatibility layer for tsearch
*
* Copyright (c) 1998-2021, PostgreSQL Global Development Group
*
* src/include/tsearch/ts_locale.h
*
*-------------------------------------------------------------------------
*/
#ifndef __TSLOCALE_H__
#define __TSLOCALE_H__
#include <ctype.h>
#include <limits.h>
#include "lib/stringinfo.h"
#include "mb/pg_wchar.h"
#include "utils/pg_locale.h"
/*
* towlower() and friends should be in <wctype.h>, but some pre-C99 systems
* declare them in <wchar.h>, so include that too.
*/
#include <wchar.h>
#ifdef HAVE_WCTYPE_H
#include <wctype.h>
#endif
/* working state for tsearch_readline (should be a local var in caller) */
typedef struct
{
FILE *fp;
const char *filename;
int lineno;
StringInfoData buf; /* current input line, in UTF-8 */
char *curline; /* current input line, in DB's encoding */
/* curline may be NULL, or equal to buf.data, or a palloc'd string */
ErrorContextCallback cb;
} tsearch_readline_state;
#define TOUCHAR(x) (*((const unsigned char *) (x)))
/* The second argument of t_iseq() must be a plain ASCII character */
#define t_iseq(x,c) (TOUCHAR(x) == (unsigned char) (c))
#define COPYCHAR(d,s) memcpy(d, s, pg_mblen(s))
extern int t_isdigit(const char *ptr);
extern int t_isspace(const char *ptr);
extern int t_isalpha(const char *ptr);
extern int t_isprint(const char *ptr);
extern char *lowerstr(const char *str);
extern char *lowerstr_with_len(const char *str, int len);
extern bool tsearch_readline_begin(tsearch_readline_state *stp,
const char *filename);
extern char *tsearch_readline(tsearch_readline_state *stp);
extern void tsearch_readline_end(tsearch_readline_state *stp);
#endif /* __TSLOCALE_H__ */

132
db_include/tsearch/ts_public.h Executable file
View File

@@ -0,0 +1,132 @@
/*-------------------------------------------------------------------------
*
* ts_public.h
* Public interface to various tsearch modules, such as
* parsers and dictionaries.
*
* Copyright (c) 1998-2021, PostgreSQL Global Development Group
*
* src/include/tsearch/ts_public.h
*
*-------------------------------------------------------------------------
*/
#ifndef _PG_TS_PUBLIC_H_
#define _PG_TS_PUBLIC_H_
#include "tsearch/ts_type.h"
/*
* Parser's framework
*/
/*
* returning type for prslextype method of parser
*/
typedef struct
{
int lexid;
char *alias;
char *descr;
} LexDescr;
/*
* Interface to headline generator
*/
typedef struct
{
uint32 selected:1,
in:1,
replace:1,
repeated:1,
skip:1,
unused:3,
type:8,
len:16;
WordEntryPos pos;
char *word;
QueryOperand *item;
} HeadlineWordEntry;
typedef struct
{
HeadlineWordEntry *words;
int32 lenwords;
int32 curwords;
int32 vectorpos; /* positions a-la tsvector */
char *startsel;
char *stopsel;
char *fragdelim;
int16 startsellen;
int16 stopsellen;
int16 fragdelimlen;
} HeadlineParsedText;
/*
* Common useful things for tsearch subsystem
*/
extern char *get_tsearch_config_filename(const char *basename,
const char *extension);
/*
* Often useful stopword list management
*/
typedef struct
{
int len;
char **stop;
} StopList;
extern void readstoplist(const char *fname, StopList *s,
char *(*wordop) (const char *));
extern bool searchstoplist(StopList *s, char *key);
/*
* Interface with dictionaries
*/
/* return struct for any lexize function */
typedef struct
{
/*----------
* Number of current variant of split word. For example the Norwegian
* word 'fotballklubber' has two variants to split: ( fotball, klubb )
* and ( fot, ball, klubb ). So, dictionary should return:
*
* nvariant lexeme
* 1 fotball
* 1 klubb
* 2 fot
* 2 ball
* 2 klubb
*
* In general, a TSLexeme will be considered to belong to the same split
* variant as the previous one if they have the same nvariant value.
* The exact values don't matter, only changes from one lexeme to next.
*----------
*/
uint16 nvariant;
uint16 flags; /* See flag bits below */
char *lexeme; /* C string */
} TSLexeme;
/* Flag bits that can appear in TSLexeme.flags */
#define TSL_ADDPOS 0x01
#define TSL_PREFIX 0x02
#define TSL_FILTER 0x04
/*
* Struct for supporting complex dictionaries like thesaurus.
* 4th argument for dictlexize method is a pointer to this
*/
typedef struct
{
bool isend; /* in: marks for lexize_info about text end is
* reached */
bool getnext; /* out: dict wants next lexeme */
void *private_state; /* internal dict state between calls with
* getnext == true */
} DictSubState;
#endif /* _PG_TS_PUBLIC_H_ */

242
db_include/tsearch/ts_type.h Executable file
View File

@@ -0,0 +1,242 @@
/*-------------------------------------------------------------------------
*
* ts_type.h
* Definitions for the tsvector and tsquery types
*
* Copyright (c) 1998-2021, PostgreSQL Global Development Group
*
* src/include/tsearch/ts_type.h
*
*-------------------------------------------------------------------------
*/
#ifndef _PG_TSTYPE_H_
#define _PG_TSTYPE_H_
#include "fmgr.h"
#include "utils/memutils.h"
/*
* TSVector type.
*
* Structure of tsvector datatype:
* 1) standard varlena header
* 2) int32 size - number of lexemes (WordEntry array entries)
* 3) Array of WordEntry - one per lexeme; must be sorted according to
* tsCompareString() (ie, memcmp of lexeme strings).
* WordEntry->pos gives the number of bytes from end of WordEntry
* array to start of lexeme's string, which is of length len.
* 4) Per-lexeme data storage:
* lexeme string (not null-terminated)
* if haspos is true:
* padding byte if necessary to make the position data 2-byte aligned
* uint16 number of positions that follow
* WordEntryPos[] positions
*
* The positions for each lexeme must be sorted.
*
* Note, tsvectorsend/recv believe that sizeof(WordEntry) == 4
*/
typedef struct
{
uint32
haspos:1,
len:11, /* MAX 2Kb */
pos:20; /* MAX 1Mb */
} WordEntry;
#define MAXSTRLEN ( (1<<11) - 1)
#define MAXSTRPOS ( (1<<20) - 1)
extern int compareWordEntryPos(const void *a, const void *b);
/*
* Equivalent to
* typedef struct {
* uint16
* weight:2,
* pos:14;
* }
*/
typedef uint16 WordEntryPos;
typedef struct
{
uint16 npos;
WordEntryPos pos[FLEXIBLE_ARRAY_MEMBER];
} WordEntryPosVector;
/* WordEntryPosVector with exactly 1 entry */
typedef struct
{
uint16 npos;
WordEntryPos pos[1];
} WordEntryPosVector1;
#define WEP_GETWEIGHT(x) ( (x) >> 14 )
#define WEP_GETPOS(x) ( (x) & 0x3fff )
#define WEP_SETWEIGHT(x,v) ( (x) = ( (v) << 14 ) | ( (x) & 0x3fff ) )
#define WEP_SETPOS(x,v) ( (x) = ( (x) & 0xc000 ) | ( (v) & 0x3fff ) )
#define MAXENTRYPOS (1<<14)
#define MAXNUMPOS (256)
#define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
/* This struct represents a complete tsvector datum */
typedef struct
{
int32 vl_len_; /* varlena header (do not touch directly!) */
int32 size;
WordEntry entries[FLEXIBLE_ARRAY_MEMBER];
/* lexemes follow the entries[] array */
} TSVectorData;
typedef TSVectorData *TSVector;
#define DATAHDRSIZE (offsetof(TSVectorData, entries))
#define CALCDATASIZE(nentries, lenstr) (DATAHDRSIZE + (nentries) * sizeof(WordEntry) + (lenstr) )
/* pointer to start of a tsvector's WordEntry array */
#define ARRPTR(x) ( (x)->entries )
/* pointer to start of a tsvector's lexeme storage */
#define STRPTR(x) ( (char *) &(x)->entries[(x)->size] )
#define _POSVECPTR(x, e) ((WordEntryPosVector *)(STRPTR(x) + SHORTALIGN((e)->pos + (e)->len)))
#define POSDATALEN(x,e) ( ( (e)->haspos ) ? (_POSVECPTR(x,e)->npos) : 0 )
#define POSDATAPTR(x,e) (_POSVECPTR(x,e)->pos)
/*
* fmgr interface macros
*/
#define DatumGetTSVector(X) ((TSVector) PG_DETOAST_DATUM(X))
#define DatumGetTSVectorCopy(X) ((TSVector) PG_DETOAST_DATUM_COPY(X))
#define TSVectorGetDatum(X) PointerGetDatum(X)
#define PG_GETARG_TSVECTOR(n) DatumGetTSVector(PG_GETARG_DATUM(n))
#define PG_GETARG_TSVECTOR_COPY(n) DatumGetTSVectorCopy(PG_GETARG_DATUM(n))
#define PG_RETURN_TSVECTOR(x) return TSVectorGetDatum(x)
/*
* TSQuery
*
*
*/
typedef int8 QueryItemType;
/* Valid values for QueryItemType: */
#define QI_VAL 1
#define QI_OPR 2
#define QI_VALSTOP 3 /* This is only used in an intermediate stack
* representation in parse_tsquery. It's not a
* legal type elsewhere. */
/*
* QueryItem is one node in tsquery - operator or operand.
*/
typedef struct
{
QueryItemType type; /* operand or kind of operator (ts_tokentype) */
uint8 weight; /* weights of operand to search. It's a
* bitmask of allowed weights. if it =0 then
* any weight are allowed. Weights and bit
* map: A: 1<<3 B: 1<<2 C: 1<<1 D: 1<<0 */
bool prefix; /* true if it's a prefix search */
int32 valcrc; /* XXX: pg_crc32 would be a more appropriate
* data type, but we use comparisons to signed
* integers in the code. They would need to be
* changed as well. */
/* pointer to text value of operand, must correlate with WordEntry */
uint32
length:12,
distance:20;
} QueryOperand;
/*
* Legal values for QueryOperator.operator.
*/
#define OP_NOT 1
#define OP_AND 2
#define OP_OR 3
#define OP_PHRASE 4 /* highest code, tsquery_cleanup.c */
#define OP_COUNT 4
extern const int tsearch_op_priority[OP_COUNT];
/* get operation priority by its code*/
#define OP_PRIORITY(x) ( tsearch_op_priority[(x) - 1] )
/* get QueryOperator priority */
#define QO_PRIORITY(x) OP_PRIORITY(((QueryOperator *) (x))->oper)
typedef struct
{
QueryItemType type;
int8 oper; /* see above */
int16 distance; /* distance between agrs for OP_PHRASE */
uint32 left; /* pointer to left operand. Right operand is
* item + 1, left operand is placed
* item+item->left */
} QueryOperator;
/*
* Note: TSQuery is 4-bytes aligned, so make sure there's no fields
* inside QueryItem requiring 8-byte alignment, like int64.
*/
typedef union
{
QueryItemType type;
QueryOperator qoperator;
QueryOperand qoperand;
} QueryItem;
/*
* Storage:
* (len)(size)(array of QueryItem)(operands as '\0'-terminated c-strings)
*/
typedef struct
{
int32 vl_len_; /* varlena header (do not touch directly!) */
int32 size; /* number of QueryItems */
char data[FLEXIBLE_ARRAY_MEMBER]; /* data starts here */
} TSQueryData;
typedef TSQueryData *TSQuery;
#define HDRSIZETQ ( VARHDRSZ + sizeof(int32) )
/* Computes the size of header and all QueryItems. size is the number of
* QueryItems, and lenofoperand is the total length of all operands
*/
#define COMPUTESIZE(size, lenofoperand) ( HDRSIZETQ + (size) * sizeof(QueryItem) + (lenofoperand) )
#define TSQUERY_TOO_BIG(size, lenofoperand) \
((size) > (MaxAllocSize - HDRSIZETQ - (lenofoperand)) / sizeof(QueryItem))
/* Returns a pointer to the first QueryItem in a TSQuery */
#define GETQUERY(x) ((QueryItem*)( (char*)(x)+HDRSIZETQ ))
/* Returns a pointer to the beginning of operands in a TSQuery */
#define GETOPERAND(x) ( (char*)GETQUERY(x) + ((TSQuery)(x))->size * sizeof(QueryItem) )
/*
* fmgr interface macros
* Note, TSQuery type marked as plain storage, so it can't be toasted
* but PG_DETOAST_DATUM_COPY is used for simplicity
*/
#define DatumGetTSQuery(X) ((TSQuery) DatumGetPointer(X))
#define DatumGetTSQueryCopy(X) ((TSQuery) PG_DETOAST_DATUM_COPY(X))
#define TSQueryGetDatum(X) PointerGetDatum(X)
#define PG_GETARG_TSQUERY(n) DatumGetTSQuery(PG_GETARG_DATUM(n))
#define PG_GETARG_TSQUERY_COPY(n) DatumGetTSQueryCopy(PG_GETARG_DATUM(n))
#define PG_RETURN_TSQUERY(x) return TSQueryGetDatum(x)
#endif /* _PG_TSTYPE_H_ */

266
db_include/tsearch/ts_utils.h Executable file
View File

@@ -0,0 +1,266 @@
/*-------------------------------------------------------------------------
*
* ts_utils.h
* helper utilities for tsearch
*
* Copyright (c) 1998-2021, PostgreSQL Global Development Group
*
* src/include/tsearch/ts_utils.h
*
*-------------------------------------------------------------------------
*/
#ifndef _PG_TS_UTILS_H_
#define _PG_TS_UTILS_H_
#include "nodes/pg_list.h"
#include "tsearch/ts_public.h"
#include "tsearch/ts_type.h"
/*
* Common parse definitions for tsvector and tsquery
*/
/* tsvector parser support. */
struct TSVectorParseStateData; /* opaque struct in tsvector_parser.c */
typedef struct TSVectorParseStateData *TSVectorParseState;
#define P_TSV_OPR_IS_DELIM (1 << 0)
#define P_TSV_IS_TSQUERY (1 << 1)
#define P_TSV_IS_WEB (1 << 2)
extern TSVectorParseState init_tsvector_parser(char *input, int flags);
extern void reset_tsvector_parser(TSVectorParseState state, char *input);
extern bool gettoken_tsvector(TSVectorParseState state,
char **token, int *len,
WordEntryPos **pos, int *poslen,
char **endptr);
extern void close_tsvector_parser(TSVectorParseState state);
/* phrase operator begins with '<' */
#define ISOPERATOR(x) \
( pg_mblen(x) == 1 && ( *(x) == '!' || \
*(x) == '&' || \
*(x) == '|' || \
*(x) == '(' || \
*(x) == ')' || \
*(x) == '<' \
) )
/* parse_tsquery */
struct TSQueryParserStateData; /* private in backend/utils/adt/tsquery.c */
typedef struct TSQueryParserStateData *TSQueryParserState;
typedef void (*PushFunction) (Datum opaque, TSQueryParserState state,
char *token, int tokenlen,
int16 tokenweights, /* bitmap as described in
* QueryOperand struct */
bool prefix);
#define P_TSQ_PLAIN (1 << 0)
#define P_TSQ_WEB (1 << 1)
extern TSQuery parse_tsquery(char *buf,
PushFunction pushval,
Datum opaque,
int flags);
/* Functions for use by PushFunction implementations */
extern void pushValue(TSQueryParserState state,
char *strval, int lenval, int16 weight, bool prefix);
extern void pushStop(TSQueryParserState state);
extern void pushOperator(TSQueryParserState state, int8 oper, int16 distance);
/*
* parse plain text and lexize words
*/
typedef struct
{
uint16 len;
uint16 nvariant;
union
{
uint16 pos;
/*
* When apos array is used, apos[0] is the number of elements in the
* array (excluding apos[0]), and alen is the allocated size of the
* array.
*/
uint16 *apos;
} pos;
uint16 flags; /* currently, only TSL_PREFIX */
char *word;
uint32 alen;
} ParsedWord;
typedef struct
{
ParsedWord *words;
int32 lenwords;
int32 curwords;
int32 pos;
} ParsedText;
extern void parsetext(Oid cfgId, ParsedText *prs, char *buf, int32 buflen);
/*
* headline framework, flow in common to generate:
* 1 parse text with hlparsetext
* 2 parser-specific function to find part
* 3 generateHeadline to generate result text
*/
extern void hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query,
char *buf, int32 buflen);
extern text *generateHeadline(HeadlineParsedText *prs);
/*
* TSQuery execution support
*
* TS_execute() executes a tsquery against data that can be represented in
* various forms. The TSExecuteCallback callback function is called to check
* whether a given primitive tsquery value is matched in the data.
*/
/* TS_execute requires ternary logic to handle NOT with phrase matches */
typedef enum
{
TS_NO, /* definitely no match */
TS_YES, /* definitely does match */
TS_MAYBE /* can't verify match for lack of pos data */
} TSTernaryValue;
/*
* struct ExecPhraseData is passed to a TSExecuteCallback function if we need
* lexeme position data (because of a phrase-match operator in the tsquery).
* The callback should fill in position data when it returns TS_YES (success).
* If it cannot return position data, it should leave "data" unchanged and
* return TS_MAYBE. The caller of TS_execute() must then arrange for a later
* recheck with position data available.
*
* The reported lexeme positions must be sorted and unique. Callers must only
* consult the position bits of the pos array, ie, WEP_GETPOS(data->pos[i]).
* This allows the returned "pos" to point directly to the WordEntryPos
* portion of a tsvector value. If "allocated" is true then the pos array
* is palloc'd workspace and caller may free it when done.
*
* "negate" means that the pos array contains positions where the query does
* not match, rather than positions where it does. "width" is positive when
* the match is wider than one lexeme. Neither of these fields normally need
* to be touched by TSExecuteCallback functions; they are used for
* phrase-search processing within TS_execute.
*
* All fields of the ExecPhraseData struct are initially zeroed by caller.
*/
typedef struct ExecPhraseData
{
int npos; /* number of positions reported */
bool allocated; /* pos points to palloc'd data? */
bool negate; /* positions are where query is NOT matched */
WordEntryPos *pos; /* ordered, non-duplicate lexeme positions */
int width; /* width of match in lexemes, less 1 */
} ExecPhraseData;
/*
* Signature for TSQuery lexeme check functions
*
* arg: opaque value passed through from caller of TS_execute
* val: lexeme to test for presence of
* data: to be filled with lexeme positions; NULL if position data not needed
*
* Return TS_YES if lexeme is present in data, TS_MAYBE if it might be
* present, TS_NO if it definitely is not present. If data is not NULL,
* it must be filled with lexeme positions if available. If position data
* is not available, leave *data as zeroes and return TS_MAYBE, never TS_YES.
*/
typedef TSTernaryValue (*TSExecuteCallback) (void *arg, QueryOperand *val,
ExecPhraseData *data);
/*
* Flag bits for TS_execute
*/
#define TS_EXEC_EMPTY (0x00)
/*
* If TS_EXEC_SKIP_NOT is set, then NOT sub-expressions are automatically
* evaluated to be true. This was formerly the default behavior. It's now
* deprecated because it tends to give silly answers, but some applications
* might still have a use for it.
*/
#define TS_EXEC_SKIP_NOT (0x01)
/*
* If TS_EXEC_PHRASE_NO_POS is set, allow OP_PHRASE to be executed lossily
* in the absence of position information: a true result indicates that the
* phrase might be present. Without this flag, OP_PHRASE always returns
* false if lexeme position information is not available.
*/
#define TS_EXEC_PHRASE_NO_POS (0x02)
extern bool TS_execute(QueryItem *curitem, void *arg, uint32 flags,
TSExecuteCallback chkcond);
extern TSTernaryValue TS_execute_ternary(QueryItem *curitem, void *arg,
uint32 flags,
TSExecuteCallback chkcond);
extern bool tsquery_requires_match(QueryItem *curitem);
/*
* to_ts* - text transformation to tsvector, tsquery
*/
extern TSVector make_tsvector(ParsedText *prs);
extern int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix);
/*
* Possible strategy numbers for indexes
* TSearchStrategyNumber - (tsvector|text) @@ tsquery
* TSearchWithClassStrategyNumber - tsvector @@@ tsquery
*/
#define TSearchStrategyNumber 1
#define TSearchWithClassStrategyNumber 2
/*
* TSQuery Utilities
*/
extern QueryItem *clean_NOT(QueryItem *ptr, int32 *len);
extern TSQuery cleanup_tsquery_stopwords(TSQuery in);
typedef struct QTNode
{
QueryItem *valnode;
uint32 flags;
int32 nchild;
char *word;
uint32 sign;
struct QTNode **child;
} QTNode;
/* bits in QTNode.flags */
#define QTN_NEEDFREE 0x01
#define QTN_NOCHANGE 0x02
#define QTN_WORDFREE 0x04
typedef uint64 TSQuerySign;
#define TSQS_SIGLEN (sizeof(TSQuerySign)*BITS_PER_BYTE)
#define TSQuerySignGetDatum(X) Int64GetDatum((int64) (X))
#define DatumGetTSQuerySign(X) ((TSQuerySign) DatumGetInt64(X))
#define PG_RETURN_TSQUERYSIGN(X) return TSQuerySignGetDatum(X)
#define PG_GETARG_TSQUERYSIGN(n) DatumGetTSQuerySign(PG_GETARG_DATUM(n))
extern QTNode *QT2QTN(QueryItem *in, char *operand);
extern TSQuery QTN2QT(QTNode *in);
extern void QTNFree(QTNode *in);
extern void QTNSort(QTNode *in);
extern void QTNTernary(QTNode *in);
extern void QTNBinary(QTNode *in);
extern int QTNodeCompare(QTNode *an, QTNode *bn);
extern QTNode *QTNCopy(QTNode *in);
extern void QTNClearFlags(QTNode *in, uint32 flags);
extern bool QTNEq(QTNode *a, QTNode *b);
extern TSQuerySign makeTSQuerySign(TSQuery a);
extern QTNode *findsubquery(QTNode *root, QTNode *ex, QTNode *subs,
bool *isfind);
#endif /* _PG_TS_UTILS_H_ */