You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ginbulk.c 7.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. /*-------------------------------------------------------------------------
  2. *
  3. * ginbulk.c
  4. * routines for fast build of inverted index
  5. *
  6. *
  7. * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
  8. * Portions Copyright (c) 1994, Regents of the University of California
  9. *
  10. * IDENTIFICATION
  11. * src/backend/access/gin/ginbulk.c
  12. *-------------------------------------------------------------------------
  13. */
  14. #include "postgres.h"
  15. #include <limits.h>
  16. #include "access/gin_private.h"
  17. #include "utils/datum.h"
  18. #include "utils/memutils.h"
  19. #define DEF_NENTRY 2048 /* GinEntryAccumulator allocation quantum */
  20. #define DEF_NPTR 5 /* ItemPointer initial allocation quantum */
  21. /* Combiner function for rbtree.c */
  22. static void
  23. ginCombineData(RBTNode *existing, const RBTNode *newdata, void *arg)
  24. {
  25. GinEntryAccumulator *eo = (GinEntryAccumulator *) existing;
  26. const GinEntryAccumulator *en = (const GinEntryAccumulator *) newdata;
  27. BuildAccumulator *accum = (BuildAccumulator *) arg;
  28. /*
  29. * Note this code assumes that newdata contains only one itempointer.
  30. */
  31. if (eo->count >= eo->maxcount)
  32. {
  33. if (eo->maxcount > INT_MAX)
  34. ereport(ERROR,
  35. (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
  36. errmsg("posting list is too long"),
  37. errhint("Reduce maintenance_work_mem.")));
  38. accum->allocatedMemory -= GetMemoryChunkSpace(eo->list);
  39. eo->maxcount *= 2;
  40. eo->list = (ItemPointerData *)
  41. repalloc_huge(eo->list, sizeof(ItemPointerData) * eo->maxcount);
  42. accum->allocatedMemory += GetMemoryChunkSpace(eo->list);
  43. }
  44. /* If item pointers are not ordered, they will need to be sorted later */
  45. if (eo->shouldSort == false)
  46. {
  47. int res;
  48. res = ginCompareItemPointers(eo->list + eo->count - 1, en->list);
  49. Assert(res != 0);
  50. if (res > 0)
  51. eo->shouldSort = true;
  52. }
  53. eo->list[eo->count] = en->list[0];
  54. eo->count++;
  55. }
  56. /* Comparator function for rbtree.c */
  57. static int
  58. cmpEntryAccumulator(const RBTNode *a, const RBTNode *b, void *arg)
  59. {
  60. const GinEntryAccumulator *ea = (const GinEntryAccumulator *) a;
  61. const GinEntryAccumulator *eb = (const GinEntryAccumulator *) b;
  62. BuildAccumulator *accum = (BuildAccumulator *) arg;
  63. return ginCompareAttEntries(accum->ginstate,
  64. ea->attnum, ea->key, ea->category,
  65. eb->attnum, eb->key, eb->category);
  66. }
  67. /* Allocator function for rbtree.c */
  68. static RBTNode *
  69. ginAllocEntryAccumulator(void *arg)
  70. {
  71. BuildAccumulator *accum = (BuildAccumulator *) arg;
  72. GinEntryAccumulator *ea;
  73. /*
  74. * Allocate memory by rather big chunks to decrease overhead. We have no
  75. * need to reclaim RBTNodes individually, so this costs nothing.
  76. */
  77. if (accum->entryallocator == NULL || accum->eas_used >= DEF_NENTRY)
  78. {
  79. accum->entryallocator = palloc(sizeof(GinEntryAccumulator) * DEF_NENTRY);
  80. accum->allocatedMemory += GetMemoryChunkSpace(accum->entryallocator);
  81. accum->eas_used = 0;
  82. }
  83. /* Allocate new RBTNode from current chunk */
  84. ea = accum->entryallocator + accum->eas_used;
  85. accum->eas_used++;
  86. return (RBTNode *) ea;
  87. }
  88. void
  89. ginInitBA(BuildAccumulator *accum)
  90. {
  91. /* accum->ginstate is intentionally not set here */
  92. accum->allocatedMemory = 0;
  93. accum->entryallocator = NULL;
  94. accum->eas_used = 0;
  95. accum->tree = rbt_create(sizeof(GinEntryAccumulator),
  96. cmpEntryAccumulator,
  97. ginCombineData,
  98. ginAllocEntryAccumulator,
  99. NULL, /* no freefunc needed */
  100. (void *) accum);
  101. }
  102. /*
  103. * This is basically the same as datumCopy(), but extended to count
  104. * palloc'd space in accum->allocatedMemory.
  105. */
  106. static Datum
  107. getDatumCopy(BuildAccumulator *accum, OffsetNumber attnum, Datum value)
  108. {
  109. Form_pg_attribute att;
  110. Datum res;
  111. att = TupleDescAttr(accum->ginstate->origTupdesc, attnum - 1);
  112. if (att->attbyval)
  113. res = value;
  114. else
  115. {
  116. res = datumCopy(value, false, att->attlen);
  117. accum->allocatedMemory += GetMemoryChunkSpace(DatumGetPointer(res));
  118. }
  119. return res;
  120. }
  121. /*
  122. * Find/store one entry from indexed value.
  123. */
  124. static void
  125. ginInsertBAEntry(BuildAccumulator *accum,
  126. ItemPointer heapptr, OffsetNumber attnum,
  127. Datum key, GinNullCategory category)
  128. {
  129. GinEntryAccumulator eatmp;
  130. GinEntryAccumulator *ea;
  131. bool isNew;
  132. /*
  133. * For the moment, fill only the fields of eatmp that will be looked at by
  134. * cmpEntryAccumulator or ginCombineData.
  135. */
  136. eatmp.attnum = attnum;
  137. eatmp.key = key;
  138. eatmp.category = category;
  139. /* temporarily set up single-entry itempointer list */
  140. eatmp.list = heapptr;
  141. ea = (GinEntryAccumulator *) rbt_insert(accum->tree, (RBTNode *) &eatmp,
  142. &isNew);
  143. if (isNew)
  144. {
  145. /*
  146. * Finish initializing new tree entry, including making permanent
  147. * copies of the datum (if it's not null) and itempointer.
  148. */
  149. if (category == GIN_CAT_NORM_KEY)
  150. ea->key = getDatumCopy(accum, attnum, key);
  151. ea->maxcount = DEF_NPTR;
  152. ea->count = 1;
  153. ea->shouldSort = false;
  154. ea->list =
  155. (ItemPointerData *) palloc(sizeof(ItemPointerData) * DEF_NPTR);
  156. ea->list[0] = *heapptr;
  157. accum->allocatedMemory += GetMemoryChunkSpace(ea->list);
  158. }
  159. else
  160. {
  161. /*
  162. * ginCombineData did everything needed.
  163. */
  164. }
  165. }
  166. /*
  167. * Insert the entries for one heap pointer.
  168. *
  169. * Since the entries are being inserted into a balanced binary tree, you
  170. * might think that the order of insertion wouldn't be critical, but it turns
  171. * out that inserting the entries in sorted order results in a lot of
  172. * rebalancing operations and is slow. To prevent this, we attempt to insert
  173. * the nodes in an order that will produce a nearly-balanced tree if the input
  174. * is in fact sorted.
  175. *
  176. * We do this as follows. First, we imagine that we have an array whose size
  177. * is the smallest power of two greater than or equal to the actual array
  178. * size. Second, we insert the middle entry of our virtual array into the
  179. * tree; then, we insert the middles of each half of our virtual array, then
  180. * middles of quarters, etc.
  181. */
  182. void
  183. ginInsertBAEntries(BuildAccumulator *accum,
  184. ItemPointer heapptr, OffsetNumber attnum,
  185. Datum *entries, GinNullCategory *categories,
  186. int32 nentries)
  187. {
  188. uint32 step = nentries;
  189. if (nentries <= 0)
  190. return;
  191. Assert(ItemPointerIsValid(heapptr) && attnum >= FirstOffsetNumber);
  192. /*
  193. * step will contain largest power of 2 and <= nentries
  194. */
  195. step |= (step >> 1);
  196. step |= (step >> 2);
  197. step |= (step >> 4);
  198. step |= (step >> 8);
  199. step |= (step >> 16);
  200. step >>= 1;
  201. step++;
  202. while (step > 0)
  203. {
  204. int i;
  205. for (i = step - 1; i < nentries && i >= 0; i += step << 1 /* *2 */ )
  206. ginInsertBAEntry(accum, heapptr, attnum,
  207. entries[i], categories[i]);
  208. step >>= 1; /* /2 */
  209. }
  210. }
  211. static int
  212. qsortCompareItemPointers(const void *a, const void *b)
  213. {
  214. int res = ginCompareItemPointers((ItemPointer) a, (ItemPointer) b);
  215. /* Assert that there are no equal item pointers being sorted */
  216. Assert(res != 0);
  217. return res;
  218. }
  219. /* Prepare to read out the rbtree contents using ginGetBAEntry */
  220. void
  221. ginBeginBAScan(BuildAccumulator *accum)
  222. {
  223. rbt_begin_iterate(accum->tree, LeftRightWalk, &accum->tree_walk);
  224. }
  225. /*
  226. * Get the next entry in sequence from the BuildAccumulator's rbtree.
  227. * This consists of a single key datum and a list (array) of one or more
  228. * heap TIDs in which that key is found. The list is guaranteed sorted.
  229. */
  230. ItemPointerData *
  231. ginGetBAEntry(BuildAccumulator *accum,
  232. OffsetNumber *attnum, Datum *key, GinNullCategory *category,
  233. uint32 *n)
  234. {
  235. GinEntryAccumulator *entry;
  236. ItemPointerData *list;
  237. entry = (GinEntryAccumulator *) rbt_iterate(&accum->tree_walk);
  238. if (entry == NULL)
  239. return NULL; /* no more entries */
  240. *attnum = entry->attnum;
  241. *key = entry->key;
  242. *category = entry->category;
  243. list = entry->list;
  244. *n = entry->count;
  245. Assert(list != NULL && entry->count > 0);
  246. if (entry->shouldSort && entry->count > 1)
  247. qsort(list, entry->count, sizeof(ItemPointerData),
  248. qsortCompareItemPointers);
  249. return list;
  250. }