You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

blutils.c 13KB


  1. /*-------------------------------------------------------------------------
  2. *
  3. * blutils.c
  4. * Bloom index utilities.
  5. *
  6. * Portions Copyright (c) 2016-2019, PostgreSQL Global Development Group
  7. * Portions Copyright (c) 1990-1993, Regents of the University of California
  8. *
  9. * IDENTIFICATION
  10. * contrib/bloom/blutils.c
  11. *
  12. *-------------------------------------------------------------------------
  13. */
  14. #include "postgres.h"
  15. #include "access/amapi.h"
  16. #include "access/generic_xlog.h"
  17. #include "catalog/index.h"
  18. #include "storage/lmgr.h"
  19. #include "miscadmin.h"
  20. #include "storage/bufmgr.h"
  21. #include "storage/indexfsm.h"
  22. #include "utils/memutils.h"
  23. #include "access/reloptions.h"
  24. #include "storage/freespace.h"
  25. #include "storage/indexfsm.h"
  26. #include "bloom.h"
  27. /* Signature dealing macros - note i is assumed to be of type int */
  28. #define GETWORD(x,i) ( *( (BloomSignatureWord *)(x) + ( (i) / SIGNWORDBITS ) ) )
  29. #define CLRBIT(x,i) GETWORD(x,i) &= ~( 0x01 << ( (i) % SIGNWORDBITS ) )
  30. #define SETBIT(x,i) GETWORD(x,i) |= ( 0x01 << ( (i) % SIGNWORDBITS ) )
  31. #define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % SIGNWORDBITS )) & 0x01 )
  32. PG_FUNCTION_INFO_V1(blhandler);
  33. /* Kind of relation options for bloom index */
  34. static relopt_kind bl_relopt_kind;
  35. /* parse table for fillRelOptions */
  36. static relopt_parse_elt bl_relopt_tab[INDEX_MAX_KEYS + 1];
  37. static int32 myRand(void);
  38. static void mySrand(uint32 seed);
  39. /*
  40. * Module initialize function: initialize info about Bloom relation options.
  41. *
  42. * Note: keep this in sync with makeDefaultBloomOptions().
  43. */
  44. void
  45. _PG_init(void)
  46. {
  47. int i;
  48. char buf[16];
  49. bl_relopt_kind = add_reloption_kind();
  50. /* Option for length of signature */
  51. add_int_reloption(bl_relopt_kind, "length",
  52. "Length of signature in bits",
  53. DEFAULT_BLOOM_LENGTH, 1, MAX_BLOOM_LENGTH);
  54. bl_relopt_tab[0].optname = "length";
  55. bl_relopt_tab[0].opttype = RELOPT_TYPE_INT;
  56. bl_relopt_tab[0].offset = offsetof(BloomOptions, bloomLength);
  57. /* Number of bits for each possible index column: col1, col2, ... */
  58. for (i = 0; i < INDEX_MAX_KEYS; i++)
  59. {
  60. snprintf(buf, sizeof(buf), "col%d", i + 1);
  61. add_int_reloption(bl_relopt_kind, buf,
  62. "Number of bits generated for each index column",
  63. DEFAULT_BLOOM_BITS, 1, MAX_BLOOM_BITS);
  64. bl_relopt_tab[i + 1].optname = MemoryContextStrdup(TopMemoryContext,
  65. buf);
  66. bl_relopt_tab[i + 1].opttype = RELOPT_TYPE_INT;
  67. bl_relopt_tab[i + 1].offset = offsetof(BloomOptions, bitSize[0]) + sizeof(int) * i;
  68. }
  69. }
  70. /*
  71. * Construct a default set of Bloom options.
  72. */
  73. static BloomOptions *
  74. makeDefaultBloomOptions(void)
  75. {
  76. BloomOptions *opts;
  77. int i;
  78. opts = (BloomOptions *) palloc0(sizeof(BloomOptions));
  79. /* Convert DEFAULT_BLOOM_LENGTH from # of bits to # of words */
  80. opts->bloomLength = (DEFAULT_BLOOM_LENGTH + SIGNWORDBITS - 1) / SIGNWORDBITS;
  81. for (i = 0; i < INDEX_MAX_KEYS; i++)
  82. opts->bitSize[i] = DEFAULT_BLOOM_BITS;
  83. SET_VARSIZE(opts, sizeof(BloomOptions));
  84. return opts;
  85. }
  86. /*
  87. * Bloom handler function: return IndexAmRoutine with access method parameters
  88. * and callbacks.
  89. */
  90. Datum
  91. blhandler(PG_FUNCTION_ARGS)
  92. {
  93. IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
  94. amroutine->amstrategies = BLOOM_NSTRATEGIES;
  95. amroutine->amsupport = BLOOM_NPROC;
  96. amroutine->amcanorder = false;
  97. amroutine->amcanorderbyop = false;
  98. amroutine->amcanbackward = false;
  99. amroutine->amcanunique = false;
  100. amroutine->amcanmulticol = true;
  101. amroutine->amoptionalkey = true;
  102. amroutine->amsearcharray = false;
  103. amroutine->amsearchnulls = false;
  104. amroutine->amstorage = false;
  105. amroutine->amclusterable = false;
  106. amroutine->ampredlocks = false;
  107. amroutine->amcanparallel = false;
  108. amroutine->amcaninclude = false;
  109. amroutine->amkeytype = InvalidOid;
  110. amroutine->ambuild = blbuild;
  111. amroutine->ambuildempty = blbuildempty;
  112. amroutine->aminsert = blinsert;
  113. amroutine->ambulkdelete = blbulkdelete;
  114. amroutine->amvacuumcleanup = blvacuumcleanup;
  115. amroutine->amcanreturn = NULL;
  116. amroutine->amcostestimate = blcostestimate;
  117. amroutine->amoptions = bloptions;
  118. amroutine->amproperty = NULL;
  119. amroutine->amvalidate = blvalidate;
  120. amroutine->ambeginscan = blbeginscan;
  121. amroutine->amrescan = blrescan;
  122. amroutine->amgettuple = NULL;
  123. amroutine->amgetbitmap = blgetbitmap;
  124. amroutine->amendscan = blendscan;
  125. amroutine->ammarkpos = NULL;
  126. amroutine->amrestrpos = NULL;
  127. amroutine->amestimateparallelscan = NULL;
  128. amroutine->aminitparallelscan = NULL;
  129. amroutine->amparallelrescan = NULL;
  130. PG_RETURN_POINTER(amroutine);
  131. }
  132. /*
  133. * Fill BloomState structure for particular index.
  134. */
  135. void
  136. initBloomState(BloomState *state, Relation index)
  137. {
  138. int i;
  139. state->nColumns = index->rd_att->natts;
  140. /* Initialize hash function for each attribute */
  141. for (i = 0; i < index->rd_att->natts; i++)
  142. {
  143. fmgr_info_copy(&(state->hashFn[i]),
  144. index_getprocinfo(index, i + 1, BLOOM_HASH_PROC),
  145. CurrentMemoryContext);
  146. }
  147. /* Initialize amcache if needed with options from metapage */
  148. if (!index->rd_amcache)
  149. {
  150. Buffer buffer;
  151. Page page;
  152. BloomMetaPageData *meta;
  153. BloomOptions *opts;
  154. opts = MemoryContextAlloc(index->rd_indexcxt, sizeof(BloomOptions));
  155. buffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO);
  156. LockBuffer(buffer, BUFFER_LOCK_SHARE);
  157. page = BufferGetPage(buffer);
  158. if (!BloomPageIsMeta(page))
  159. elog(ERROR, "Relation is not a bloom index");
  160. meta = BloomPageGetMeta(BufferGetPage(buffer));
  161. if (meta->magickNumber != BLOOM_MAGICK_NUMBER)
  162. elog(ERROR, "Relation is not a bloom index");
  163. *opts = meta->opts;
  164. UnlockReleaseBuffer(buffer);
  165. index->rd_amcache = (void *) opts;
  166. }
  167. memcpy(&state->opts, index->rd_amcache, sizeof(state->opts));
  168. state->sizeOfBloomTuple = BLOOMTUPLEHDRSZ +
  169. sizeof(BloomSignatureWord) * state->opts.bloomLength;
  170. }
  171. /*
  172. * Random generator copied from FreeBSD. Using own random generator here for
  173. * two reasons:
  174. *
  175. * 1) In this case random numbers are used for on-disk storage. Usage of
  176. * PostgreSQL number generator would obstruct it from all possible changes.
  177. * 2) Changing seed of PostgreSQL random generator would be undesirable side
  178. * effect.
  179. */
  180. static int32 next;
  181. static int32
  182. myRand(void)
  183. {
  184. /*----------
  185. * Compute x = (7^5 * x) mod (2^31 - 1)
  186. * without overflowing 31 bits:
  187. * (2^31 - 1) = 127773 * (7^5) + 2836
  188. * From "Random number generators: good ones are hard to find",
  189. * Park and Miller, Communications of the ACM, vol. 31, no. 10,
  190. * October 1988, p. 1195.
  191. *----------
  192. */
  193. int32 hi,
  194. lo,
  195. x;
  196. /* Must be in [1, 0x7ffffffe] range at this point. */
  197. hi = next / 127773;
  198. lo = next % 127773;
  199. x = 16807 * lo - 2836 * hi;
  200. if (x < 0)
  201. x += 0x7fffffff;
  202. next = x;
  203. /* Transform to [0, 0x7ffffffd] range. */
  204. return (x - 1);
  205. }
  206. static void
  207. mySrand(uint32 seed)
  208. {
  209. next = seed;
  210. /* Transform to [1, 0x7ffffffe] range. */
  211. next = (next % 0x7ffffffe) + 1;
  212. }
  213. /*
  214. * Add bits of given value to the signature.
  215. */
  216. void
  217. signValue(BloomState *state, BloomSignatureWord *sign, Datum value, int attno)
  218. {
  219. uint32 hashVal;
  220. int nBit,
  221. j;
  222. /*
  223. * init generator with "column's" number to get "hashed" seed for new
  224. * value. We don't want to map the same numbers from different columns
  225. * into the same bits!
  226. */
  227. mySrand(attno);
  228. /*
  229. * Init hash sequence to map our value into bits. the same values in
  230. * different columns will be mapped into different bits because of step
  231. * above
  232. */
  233. hashVal = DatumGetInt32(FunctionCall1(&state->hashFn[attno], value));
  234. mySrand(hashVal ^ myRand());
  235. for (j = 0; j < state->opts.bitSize[attno]; j++)
  236. {
  237. /* prevent multiple evaluation in SETBIT macro */
  238. nBit = myRand() % (state->opts.bloomLength * SIGNWORDBITS);
  239. SETBIT(sign, nBit);
  240. }
  241. }
  242. /*
  243. * Make bloom tuple from values.
  244. */
  245. BloomTuple *
  246. BloomFormTuple(BloomState *state, ItemPointer iptr, Datum *values, bool *isnull)
  247. {
  248. int i;
  249. BloomTuple *res = (BloomTuple *) palloc0(state->sizeOfBloomTuple);
  250. res->heapPtr = *iptr;
  251. /* Blooming each column */
  252. for (i = 0; i < state->nColumns; i++)
  253. {
  254. /* skip nulls */
  255. if (isnull[i])
  256. continue;
  257. signValue(state, res->sign, values[i], i);
  258. }
  259. return res;
  260. }
  261. /*
  262. * Add new bloom tuple to the page. Returns true if new tuple was successfully
  263. * added to the page. Returns false if it doesn't fit on the page.
  264. */
  265. bool
  266. BloomPageAddItem(BloomState *state, Page page, BloomTuple *tuple)
  267. {
  268. BloomTuple *itup;
  269. BloomPageOpaque opaque;
  270. Pointer ptr;
  271. /* We shouldn't be pointed to an invalid page */
  272. Assert(!PageIsNew(page) && !BloomPageIsDeleted(page));
  273. /* Does new tuple fit on the page? */
  274. if (BloomPageGetFreeSpace(state, page) < state->sizeOfBloomTuple)
  275. return false;
  276. /* Copy new tuple to the end of page */
  277. opaque = BloomPageGetOpaque(page);
  278. itup = BloomPageGetTuple(state, page, opaque->maxoff + 1);
  279. memcpy((Pointer) itup, (Pointer) tuple, state->sizeOfBloomTuple);
  280. /* Adjust maxoff and pd_lower */
  281. opaque->maxoff++;
  282. ptr = (Pointer) BloomPageGetTuple(state, page, opaque->maxoff + 1);
  283. ((PageHeader) page)->pd_lower = ptr - page;
  284. /* Assert we didn't overrun available space */
  285. Assert(((PageHeader) page)->pd_lower <= ((PageHeader) page)->pd_upper);
  286. return true;
  287. }
  288. /*
  289. * Allocate a new page (either by recycling, or by extending the index file)
  290. * The returned buffer is already pinned and exclusive-locked
  291. * Caller is responsible for initializing the page by calling BloomInitBuffer
  292. */
  293. Buffer
  294. BloomNewBuffer(Relation index)
  295. {
  296. Buffer buffer;
  297. bool needLock;
  298. /* First, try to get a page from FSM */
  299. for (;;)
  300. {
  301. BlockNumber blkno = GetFreeIndexPage(index);
  302. if (blkno == InvalidBlockNumber)
  303. break;
  304. buffer = ReadBuffer(index, blkno);
  305. /*
  306. * We have to guard against the possibility that someone else already
  307. * recycled this page; the buffer may be locked if so.
  308. */
  309. if (ConditionalLockBuffer(buffer))
  310. {
  311. Page page = BufferGetPage(buffer);
  312. if (PageIsNew(page))
  313. return buffer; /* OK to use, if never initialized */
  314. if (BloomPageIsDeleted(page))
  315. return buffer; /* OK to use */
  316. LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
  317. }
  318. /* Can't use it, so release buffer and try again */
  319. ReleaseBuffer(buffer);
  320. }
  321. /* Must extend the file */
  322. needLock = !RELATION_IS_LOCAL(index);
  323. if (needLock)
  324. LockRelationForExtension(index, ExclusiveLock);
  325. buffer = ReadBuffer(index, P_NEW);
  326. LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
  327. if (needLock)
  328. UnlockRelationForExtension(index, ExclusiveLock);
  329. return buffer;
  330. }
  331. /*
  332. * Initialize any page of a bloom index.
  333. */
  334. void
  335. BloomInitPage(Page page, uint16 flags)
  336. {
  337. BloomPageOpaque opaque;
  338. PageInit(page, BLCKSZ, sizeof(BloomPageOpaqueData));
  339. opaque = BloomPageGetOpaque(page);
  340. memset(opaque, 0, sizeof(BloomPageOpaqueData));
  341. opaque->flags = flags;
  342. opaque->bloom_page_id = BLOOM_PAGE_ID;
  343. }
  344. /*
  345. * Fill in metapage for bloom index.
  346. */
  347. void
  348. BloomFillMetapage(Relation index, Page metaPage)
  349. {
  350. BloomOptions *opts;
  351. BloomMetaPageData *metadata;
  352. /*
  353. * Choose the index's options. If reloptions have been assigned, use
  354. * those, otherwise create default options.
  355. */
  356. opts = (BloomOptions *) index->rd_options;
  357. if (!opts)
  358. opts = makeDefaultBloomOptions();
  359. /*
  360. * Initialize contents of meta page, including a copy of the options,
  361. * which are now frozen for the life of the index.
  362. */
  363. BloomInitPage(metaPage, BLOOM_META);
  364. metadata = BloomPageGetMeta(metaPage);
  365. memset(metadata, 0, sizeof(BloomMetaPageData));
  366. metadata->magickNumber = BLOOM_MAGICK_NUMBER;
  367. metadata->opts = *opts;
  368. ((PageHeader) metaPage)->pd_lower += sizeof(BloomMetaPageData);
  369. /* If this fails, probably FreeBlockNumberArray size calc is wrong: */
  370. Assert(((PageHeader) metaPage)->pd_lower <= ((PageHeader) metaPage)->pd_upper);
  371. }
  372. /*
  373. * Initialize metapage for bloom index.
  374. */
  375. void
  376. BloomInitMetapage(Relation index)
  377. {
  378. Buffer metaBuffer;
  379. Page metaPage;
  380. GenericXLogState *state;
  381. /*
  382. * Make a new page; since it is first page it should be associated with
  383. * block number 0 (BLOOM_METAPAGE_BLKNO).
  384. */
  385. metaBuffer = BloomNewBuffer(index);
  386. Assert(BufferGetBlockNumber(metaBuffer) == BLOOM_METAPAGE_BLKNO);
  387. /* Initialize contents of meta page */
  388. state = GenericXLogStart(index);
  389. metaPage = GenericXLogRegisterBuffer(state, metaBuffer,
  390. GENERIC_XLOG_FULL_IMAGE);
  391. BloomFillMetapage(index, metaPage);
  392. GenericXLogFinish(state);
  393. UnlockReleaseBuffer(metaBuffer);
  394. }
  395. /*
  396. * Parse reloptions for bloom index, producing a BloomOptions struct.
  397. */
  398. bytea *
  399. bloptions(Datum reloptions, bool validate)
  400. {
  401. relopt_value *options;
  402. int numoptions;
  403. BloomOptions *rdopts;
  404. /* Parse the user-given reloptions */
  405. options = parseRelOptions(reloptions, validate, bl_relopt_kind, &numoptions);
  406. rdopts = allocateReloptStruct(sizeof(BloomOptions), options, numoptions);
  407. fillRelOptions((void *) rdopts, sizeof(BloomOptions), options, numoptions,
  408. validate, bl_relopt_tab, lengthof(bl_relopt_tab));
  409. /* Convert signature length from # of bits to # to words, rounding up */
  410. rdopts->bloomLength = (rdopts->bloomLength + SIGNWORDBITS - 1) / SIGNWORDBITS;
  411. return (bytea *) rdopts;
  412. }