You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

dict_xsyn.c 5.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. /*-------------------------------------------------------------------------
  2. *
  3. * dict_xsyn.c
  4. * Extended synonym dictionary
  5. *
  6. * Copyright (c) 2007-2019, PostgreSQL Global Development Group
  7. *
  8. * IDENTIFICATION
  9. * contrib/dict_xsyn/dict_xsyn.c
  10. *
  11. *-------------------------------------------------------------------------
  12. */
  13. #include "postgres.h"
  14. #include <ctype.h>
  15. #include "commands/defrem.h"
  16. #include "tsearch/ts_locale.h"
  17. #include "tsearch/ts_utils.h"
  18. PG_MODULE_MAGIC;
  19. typedef struct
  20. {
  21. char *key; /* Word */
  22. char *value; /* Unparsed list of synonyms, including the
  23. * word itself */
  24. } Syn;
  25. typedef struct
  26. {
  27. int len;
  28. Syn *syn;
  29. bool matchorig;
  30. bool keeporig;
  31. bool matchsynonyms;
  32. bool keepsynonyms;
  33. } DictSyn;
  34. PG_FUNCTION_INFO_V1(dxsyn_init);
  35. PG_FUNCTION_INFO_V1(dxsyn_lexize);
  36. static char *
  37. find_word(char *in, char **end)
  38. {
  39. char *start;
  40. *end = NULL;
  41. while (*in && t_isspace(in))
  42. in += pg_mblen(in);
  43. if (!*in || *in == '#')
  44. return NULL;
  45. start = in;
  46. while (*in && !t_isspace(in))
  47. in += pg_mblen(in);
  48. *end = in;
  49. return start;
  50. }
  51. static int
  52. compare_syn(const void *a, const void *b)
  53. {
  54. return strcmp(((const Syn *) a)->key, ((const Syn *) b)->key);
  55. }
  56. static void
  57. read_dictionary(DictSyn *d, const char *filename)
  58. {
  59. char *real_filename = get_tsearch_config_filename(filename, "rules");
  60. tsearch_readline_state trst;
  61. char *line;
  62. int cur = 0;
  63. if (!tsearch_readline_begin(&trst, real_filename))
  64. ereport(ERROR,
  65. (errcode(ERRCODE_CONFIG_FILE_ERROR),
  66. errmsg("could not open synonym file \"%s\": %m",
  67. real_filename)));
  68. while ((line = tsearch_readline(&trst)) != NULL)
  69. {
  70. char *value;
  71. char *key;
  72. char *pos;
  73. char *end;
  74. if (*line == '\0')
  75. continue;
  76. value = lowerstr(line);
  77. pfree(line);
  78. pos = value;
  79. while ((key = find_word(pos, &end)) != NULL)
  80. {
  81. /* Enlarge syn structure if full */
  82. if (cur == d->len)
  83. {
  84. d->len = (d->len > 0) ? 2 * d->len : 16;
  85. if (d->syn)
  86. d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
  87. else
  88. d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
  89. }
  90. /* Save first word only if we will match it */
  91. if (pos != value || d->matchorig)
  92. {
  93. d->syn[cur].key = pnstrdup(key, end - key);
  94. d->syn[cur].value = pstrdup(value);
  95. cur++;
  96. }
  97. pos = end;
  98. /* Don't bother scanning synonyms if we will not match them */
  99. if (!d->matchsynonyms)
  100. break;
  101. }
  102. pfree(value);
  103. }
  104. tsearch_readline_end(&trst);
  105. d->len = cur;
  106. if (cur > 1)
  107. qsort(d->syn, d->len, sizeof(Syn), compare_syn);
  108. pfree(real_filename);
  109. }
  110. Datum
  111. dxsyn_init(PG_FUNCTION_ARGS)
  112. {
  113. List *dictoptions = (List *) PG_GETARG_POINTER(0);
  114. DictSyn *d;
  115. ListCell *l;
  116. char *filename = NULL;
  117. d = (DictSyn *) palloc0(sizeof(DictSyn));
  118. d->len = 0;
  119. d->syn = NULL;
  120. d->matchorig = true;
  121. d->keeporig = true;
  122. d->matchsynonyms = false;
  123. d->keepsynonyms = true;
  124. foreach(l, dictoptions)
  125. {
  126. DefElem *defel = (DefElem *) lfirst(l);
  127. if (strcmp(defel->defname, "matchorig") == 0)
  128. {
  129. d->matchorig = defGetBoolean(defel);
  130. }
  131. else if (strcmp(defel->defname, "keeporig") == 0)
  132. {
  133. d->keeporig = defGetBoolean(defel);
  134. }
  135. else if (strcmp(defel->defname, "matchsynonyms") == 0)
  136. {
  137. d->matchsynonyms = defGetBoolean(defel);
  138. }
  139. else if (strcmp(defel->defname, "keepsynonyms") == 0)
  140. {
  141. d->keepsynonyms = defGetBoolean(defel);
  142. }
  143. else if (strcmp(defel->defname, "rules") == 0)
  144. {
  145. /* we can't read the rules before parsing all options! */
  146. filename = defGetString(defel);
  147. }
  148. else
  149. {
  150. ereport(ERROR,
  151. (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
  152. errmsg("unrecognized xsyn parameter: \"%s\"",
  153. defel->defname)));
  154. }
  155. }
  156. if (filename)
  157. read_dictionary(d, filename);
  158. PG_RETURN_POINTER(d);
  159. }
  160. Datum
  161. dxsyn_lexize(PG_FUNCTION_ARGS)
  162. {
  163. DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0);
  164. char *in = (char *) PG_GETARG_POINTER(1);
  165. int length = PG_GETARG_INT32(2);
  166. Syn word;
  167. Syn *found;
  168. TSLexeme *res = NULL;
  169. if (!length || d->len == 0)
  170. PG_RETURN_POINTER(NULL);
  171. /* Create search pattern */
  172. {
  173. char *temp = pnstrdup(in, length);
  174. word.key = lowerstr(temp);
  175. pfree(temp);
  176. word.value = NULL;
  177. }
  178. /* Look for matching syn */
  179. found = (Syn *) bsearch(&word, d->syn, d->len, sizeof(Syn), compare_syn);
  180. pfree(word.key);
  181. if (!found)
  182. PG_RETURN_POINTER(NULL);
  183. /* Parse string of synonyms and return array of words */
  184. {
  185. char *value = found->value;
  186. char *syn;
  187. char *pos;
  188. char *end;
  189. int nsyns = 0;
  190. res = palloc(sizeof(TSLexeme));
  191. pos = value;
  192. while ((syn = find_word(pos, &end)) != NULL)
  193. {
  194. res = repalloc(res, sizeof(TSLexeme) * (nsyns + 2));
  195. /* The first word is output only if keeporig=true */
  196. if (pos != value || d->keeporig)
  197. {
  198. res[nsyns].lexeme = pnstrdup(syn, end - syn);
  199. res[nsyns].nvariant = 0;
  200. res[nsyns].flags = 0;
  201. nsyns++;
  202. }
  203. pos = end;
  204. /* Stop if we are not to output the synonyms */
  205. if (!d->keepsynonyms)
  206. break;
  207. }
  208. res[nsyns].lexeme = NULL;
  209. }
  210. PG_RETURN_POINTER(res);
  211. }