You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

brin_tuple.c 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580
  1. /*
  2. * brin_tuples.c
  3. * Method implementations for tuples in BRIN indexes.
  4. *
  5. * Intended usage is that code outside this file only deals with
  6. * BrinMemTuples, and convert to and from the on-disk representation through
  7. * functions in this file.
  8. *
  9. * NOTES
  10. *
  11. * A BRIN tuple is similar to a heap tuple, with a few key differences. The
  12. * first interesting difference is that the tuple header is much simpler, only
  13. * containing its total length and a small area for flags. Also, the stored
  14. * data does not match the relation tuple descriptor exactly: for each
  15. * attribute in the descriptor, the index tuple carries an arbitrary number
  16. * of values, depending on the opclass.
  17. *
  18. * Also, for each column of the index relation there are two null bits: one
  19. * (hasnulls) stores whether any tuple within the page range has that column
  20. * set to null; the other one (allnulls) stores whether the column values are
  21. * all null. If allnulls is true, then the tuple data area does not contain
  22. * values for that column at all; whereas it does if the hasnulls is set.
  23. * Note the size of the null bitmask may not be the same as that of the
  24. * datum array.
  25. *
  26. * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
  27. * Portions Copyright (c) 1994, Regents of the University of California
  28. *
  29. * IDENTIFICATION
  30. * src/backend/access/brin/brin_tuple.c
  31. */
  32. #include "postgres.h"
  33. #include "access/htup_details.h"
  34. #include "access/brin_tuple.h"
  35. #include "access/tupdesc.h"
  36. #include "access/tupmacs.h"
  37. #include "utils/datum.h"
  38. #include "utils/memutils.h"
  39. static inline void brin_deconstruct_tuple(BrinDesc *brdesc,
  40. char *tp, bits8 *nullbits, bool nulls,
  41. Datum *values, bool *allnulls, bool *hasnulls);
  42. /*
  43. * Return a tuple descriptor used for on-disk storage of BRIN tuples.
  44. */
  45. static TupleDesc
  46. brtuple_disk_tupdesc(BrinDesc *brdesc)
  47. {
  48. /* We cache these in the BrinDesc */
  49. if (brdesc->bd_disktdesc == NULL)
  50. {
  51. int i;
  52. int j;
  53. AttrNumber attno = 1;
  54. TupleDesc tupdesc;
  55. MemoryContext oldcxt;
  56. /* make sure it's in the bdesc's context */
  57. oldcxt = MemoryContextSwitchTo(brdesc->bd_context);
  58. tupdesc = CreateTemplateTupleDesc(brdesc->bd_totalstored);
  59. for (i = 0; i < brdesc->bd_tupdesc->natts; i++)
  60. {
  61. for (j = 0; j < brdesc->bd_info[i]->oi_nstored; j++)
  62. TupleDescInitEntry(tupdesc, attno++, NULL,
  63. brdesc->bd_info[i]->oi_typcache[j]->type_id,
  64. -1, 0);
  65. }
  66. MemoryContextSwitchTo(oldcxt);
  67. brdesc->bd_disktdesc = tupdesc;
  68. }
  69. return brdesc->bd_disktdesc;
  70. }
  71. /*
  72. * Generate a new on-disk tuple to be inserted in a BRIN index.
  73. *
  74. * See brin_form_placeholder_tuple if you touch this.
  75. */
  76. BrinTuple *
  77. brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple,
  78. Size *size)
  79. {
  80. Datum *values;
  81. bool *nulls;
  82. bool anynulls = false;
  83. BrinTuple *rettuple;
  84. int keyno;
  85. int idxattno;
  86. uint16 phony_infomask = 0;
  87. bits8 *phony_nullbitmap;
  88. Size len,
  89. hoff,
  90. data_len;
  91. Assert(brdesc->bd_totalstored > 0);
  92. values = (Datum *) palloc(sizeof(Datum) * brdesc->bd_totalstored);
  93. nulls = (bool *) palloc0(sizeof(bool) * brdesc->bd_totalstored);
  94. phony_nullbitmap = (bits8 *)
  95. palloc(sizeof(bits8) * BITMAPLEN(brdesc->bd_totalstored));
  96. /*
  97. * Set up the values/nulls arrays for heap_fill_tuple
  98. */
  99. idxattno = 0;
  100. for (keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++)
  101. {
  102. int datumno;
  103. /*
  104. * "allnulls" is set when there's no nonnull value in any row in the
  105. * column; when this happens, there is no data to store. Thus set the
  106. * nullable bits for all data elements of this column and we're done.
  107. */
  108. if (tuple->bt_columns[keyno].bv_allnulls)
  109. {
  110. for (datumno = 0;
  111. datumno < brdesc->bd_info[keyno]->oi_nstored;
  112. datumno++)
  113. nulls[idxattno++] = true;
  114. anynulls = true;
  115. continue;
  116. }
  117. /*
  118. * The "hasnulls" bit is set when there are some null values in the
  119. * data. We still need to store a real value, but the presence of
  120. * this means we need a null bitmap.
  121. */
  122. if (tuple->bt_columns[keyno].bv_hasnulls)
  123. anynulls = true;
  124. for (datumno = 0;
  125. datumno < brdesc->bd_info[keyno]->oi_nstored;
  126. datumno++)
  127. values[idxattno++] = tuple->bt_columns[keyno].bv_values[datumno];
  128. }
  129. /* Assert we did not overrun temp arrays */
  130. Assert(idxattno <= brdesc->bd_totalstored);
  131. /* compute total space needed */
  132. len = SizeOfBrinTuple;
  133. if (anynulls)
  134. {
  135. /*
  136. * We need a double-length bitmap on an on-disk BRIN index tuple; the
  137. * first half stores the "allnulls" bits, the second stores
  138. * "hasnulls".
  139. */
  140. len += BITMAPLEN(brdesc->bd_tupdesc->natts * 2);
  141. }
  142. len = hoff = MAXALIGN(len);
  143. data_len = heap_compute_data_size(brtuple_disk_tupdesc(brdesc),
  144. values, nulls);
  145. len += data_len;
  146. len = MAXALIGN(len);
  147. rettuple = palloc0(len);
  148. rettuple->bt_blkno = blkno;
  149. rettuple->bt_info = hoff;
  150. /* Assert that hoff fits in the space available */
  151. Assert((rettuple->bt_info & BRIN_OFFSET_MASK) == hoff);
  152. /*
  153. * The infomask and null bitmap as computed by heap_fill_tuple are useless
  154. * to us. However, that function will not accept a null infomask; and we
  155. * need to pass a valid null bitmap so that it will correctly skip
  156. * outputting null attributes in the data area.
  157. */
  158. heap_fill_tuple(brtuple_disk_tupdesc(brdesc),
  159. values,
  160. nulls,
  161. (char *) rettuple + hoff,
  162. data_len,
  163. &phony_infomask,
  164. phony_nullbitmap);
  165. /* done with these */
  166. pfree(values);
  167. pfree(nulls);
  168. pfree(phony_nullbitmap);
  169. /*
  170. * Now fill in the real null bitmasks. allnulls first.
  171. */
  172. if (anynulls)
  173. {
  174. bits8 *bitP;
  175. int bitmask;
  176. rettuple->bt_info |= BRIN_NULLS_MASK;
  177. /*
  178. * Note that we reverse the sense of null bits in this module: we
  179. * store a 1 for a null attribute rather than a 0. So we must reverse
  180. * the sense of the att_isnull test in br_deconstruct_tuple as well.
  181. */
  182. bitP = ((bits8 *) ((char *) rettuple + SizeOfBrinTuple)) - 1;
  183. bitmask = HIGHBIT;
  184. for (keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++)
  185. {
  186. if (bitmask != HIGHBIT)
  187. bitmask <<= 1;
  188. else
  189. {
  190. bitP += 1;
  191. *bitP = 0x0;
  192. bitmask = 1;
  193. }
  194. if (!tuple->bt_columns[keyno].bv_allnulls)
  195. continue;
  196. *bitP |= bitmask;
  197. }
  198. /* hasnulls bits follow */
  199. for (keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++)
  200. {
  201. if (bitmask != HIGHBIT)
  202. bitmask <<= 1;
  203. else
  204. {
  205. bitP += 1;
  206. *bitP = 0x0;
  207. bitmask = 1;
  208. }
  209. if (!tuple->bt_columns[keyno].bv_hasnulls)
  210. continue;
  211. *bitP |= bitmask;
  212. }
  213. bitP = ((bits8 *) (rettuple + SizeOfBrinTuple)) - 1;
  214. }
  215. if (tuple->bt_placeholder)
  216. rettuple->bt_info |= BRIN_PLACEHOLDER_MASK;
  217. *size = len;
  218. return rettuple;
  219. }
  220. /*
  221. * Generate a new on-disk tuple with no data values, marked as placeholder.
  222. *
  223. * This is a cut-down version of brin_form_tuple.
  224. */
  225. BrinTuple *
  226. brin_form_placeholder_tuple(BrinDesc *brdesc, BlockNumber blkno, Size *size)
  227. {
  228. Size len;
  229. Size hoff;
  230. BrinTuple *rettuple;
  231. int keyno;
  232. bits8 *bitP;
  233. int bitmask;
  234. /* compute total space needed: always add nulls */
  235. len = SizeOfBrinTuple;
  236. len += BITMAPLEN(brdesc->bd_tupdesc->natts * 2);
  237. len = hoff = MAXALIGN(len);
  238. rettuple = palloc0(len);
  239. rettuple->bt_blkno = blkno;
  240. rettuple->bt_info = hoff;
  241. rettuple->bt_info |= BRIN_NULLS_MASK | BRIN_PLACEHOLDER_MASK;
  242. bitP = ((bits8 *) ((char *) rettuple + SizeOfBrinTuple)) - 1;
  243. bitmask = HIGHBIT;
  244. /* set allnulls true for all attributes */
  245. for (keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++)
  246. {
  247. if (bitmask != HIGHBIT)
  248. bitmask <<= 1;
  249. else
  250. {
  251. bitP += 1;
  252. *bitP = 0x0;
  253. bitmask = 1;
  254. }
  255. *bitP |= bitmask;
  256. }
  257. /* no need to set hasnulls */
  258. *size = len;
  259. return rettuple;
  260. }
  261. /*
  262. * Free a tuple created by brin_form_tuple
  263. */
  264. void
  265. brin_free_tuple(BrinTuple *tuple)
  266. {
  267. pfree(tuple);
  268. }
  269. /*
  270. * Given a brin tuple of size len, create a copy of it. If 'dest' is not
  271. * NULL, its size is destsz, and can be used as output buffer; if the tuple
  272. * to be copied does not fit, it is enlarged by repalloc, and the size is
  273. * updated to match. This avoids palloc/free cycles when many brin tuples
  274. * are being processed in loops.
  275. */
  276. BrinTuple *
  277. brin_copy_tuple(BrinTuple *tuple, Size len, BrinTuple *dest, Size *destsz)
  278. {
  279. if (!destsz || *destsz == 0)
  280. dest = palloc(len);
  281. else if (len > *destsz)
  282. {
  283. dest = repalloc(dest, len);
  284. *destsz = len;
  285. }
  286. memcpy(dest, tuple, len);
  287. return dest;
  288. }
  289. /*
  290. * Return whether two BrinTuples are bitwise identical.
  291. */
  292. bool
  293. brin_tuples_equal(const BrinTuple *a, Size alen, const BrinTuple *b, Size blen)
  294. {
  295. if (alen != blen)
  296. return false;
  297. if (memcmp(a, b, alen) != 0)
  298. return false;
  299. return true;
  300. }
  301. /*
  302. * Create a new BrinMemTuple from scratch, and initialize it to an empty
  303. * state.
  304. *
  305. * Note: we don't provide any means to free a deformed tuple, so make sure to
  306. * use a temporary memory context.
  307. */
  308. BrinMemTuple *
  309. brin_new_memtuple(BrinDesc *brdesc)
  310. {
  311. BrinMemTuple *dtup;
  312. long basesize;
  313. basesize = MAXALIGN(sizeof(BrinMemTuple) +
  314. sizeof(BrinValues) * brdesc->bd_tupdesc->natts);
  315. dtup = palloc0(basesize + sizeof(Datum) * brdesc->bd_totalstored);
  316. dtup->bt_values = palloc(sizeof(Datum) * brdesc->bd_totalstored);
  317. dtup->bt_allnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
  318. dtup->bt_hasnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
  319. dtup->bt_context = AllocSetContextCreate(CurrentMemoryContext,
  320. "brin dtuple",
  321. ALLOCSET_DEFAULT_SIZES);
  322. brin_memtuple_initialize(dtup, brdesc);
  323. return dtup;
  324. }
  325. /*
  326. * Reset a BrinMemTuple to initial state. We return the same tuple, for
  327. * notational convenience.
  328. */
  329. BrinMemTuple *
  330. brin_memtuple_initialize(BrinMemTuple *dtuple, BrinDesc *brdesc)
  331. {
  332. int i;
  333. char *currdatum;
  334. MemoryContextReset(dtuple->bt_context);
  335. currdatum = (char *) dtuple +
  336. MAXALIGN(sizeof(BrinMemTuple) +
  337. sizeof(BrinValues) * brdesc->bd_tupdesc->natts);
  338. for (i = 0; i < brdesc->bd_tupdesc->natts; i++)
  339. {
  340. dtuple->bt_columns[i].bv_allnulls = true;
  341. dtuple->bt_columns[i].bv_hasnulls = false;
  342. dtuple->bt_columns[i].bv_attno = i + 1;
  343. dtuple->bt_columns[i].bv_allnulls = true;
  344. dtuple->bt_columns[i].bv_hasnulls = false;
  345. dtuple->bt_columns[i].bv_values = (Datum *) currdatum;
  346. currdatum += sizeof(Datum) * brdesc->bd_info[i]->oi_nstored;
  347. }
  348. return dtuple;
  349. }
  350. /*
  351. * Convert a BrinTuple back to a BrinMemTuple. This is the reverse of
  352. * brin_form_tuple.
  353. *
  354. * As an optimization, the caller can pass a previously allocated 'dMemtuple'.
  355. * This avoids having to allocate it here, which can be useful when this
  356. * function is called many times in a loop. It is caller's responsibility
  357. * that the given BrinMemTuple matches what we need here.
  358. *
  359. * Note we don't need the "on disk tupdesc" here; we rely on our own routine to
  360. * deconstruct the tuple from the on-disk format.
  361. */
  362. BrinMemTuple *
  363. brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple, BrinMemTuple *dMemtuple)
  364. {
  365. BrinMemTuple *dtup;
  366. Datum *values;
  367. bool *allnulls;
  368. bool *hasnulls;
  369. char *tp;
  370. bits8 *nullbits;
  371. int keyno;
  372. int valueno;
  373. MemoryContext oldcxt;
  374. dtup = dMemtuple ? brin_memtuple_initialize(dMemtuple, brdesc) :
  375. brin_new_memtuple(brdesc);
  376. if (BrinTupleIsPlaceholder(tuple))
  377. dtup->bt_placeholder = true;
  378. dtup->bt_blkno = tuple->bt_blkno;
  379. values = dtup->bt_values;
  380. allnulls = dtup->bt_allnulls;
  381. hasnulls = dtup->bt_hasnulls;
  382. tp = (char *) tuple + BrinTupleDataOffset(tuple);
  383. if (BrinTupleHasNulls(tuple))
  384. nullbits = (bits8 *) ((char *) tuple + SizeOfBrinTuple);
  385. else
  386. nullbits = NULL;
  387. brin_deconstruct_tuple(brdesc,
  388. tp, nullbits, BrinTupleHasNulls(tuple),
  389. values, allnulls, hasnulls);
  390. /*
  391. * Iterate to assign each of the values to the corresponding item in the
  392. * values array of each column. The copies occur in the tuple's context.
  393. */
  394. oldcxt = MemoryContextSwitchTo(dtup->bt_context);
  395. for (valueno = 0, keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++)
  396. {
  397. int i;
  398. if (allnulls[keyno])
  399. {
  400. valueno += brdesc->bd_info[keyno]->oi_nstored;
  401. continue;
  402. }
  403. /*
  404. * We would like to skip datumCopy'ing the values datum in some cases,
  405. * caller permitting ...
  406. */
  407. for (i = 0; i < brdesc->bd_info[keyno]->oi_nstored; i++)
  408. dtup->bt_columns[keyno].bv_values[i] =
  409. datumCopy(values[valueno++],
  410. brdesc->bd_info[keyno]->oi_typcache[i]->typbyval,
  411. brdesc->bd_info[keyno]->oi_typcache[i]->typlen);
  412. dtup->bt_columns[keyno].bv_hasnulls = hasnulls[keyno];
  413. dtup->bt_columns[keyno].bv_allnulls = false;
  414. }
  415. MemoryContextSwitchTo(oldcxt);
  416. return dtup;
  417. }
  418. /*
  419. * brin_deconstruct_tuple
  420. * Guts of attribute extraction from an on-disk BRIN tuple.
  421. *
  422. * Its arguments are:
  423. * brdesc BRIN descriptor for the stored tuple
  424. * tp pointer to the tuple data area
  425. * nullbits pointer to the tuple nulls bitmask
  426. * nulls "has nulls" bit in tuple infomask
  427. * values output values, array of size brdesc->bd_totalstored
  428. * allnulls output "allnulls", size brdesc->bd_tupdesc->natts
  429. * hasnulls output "hasnulls", size brdesc->bd_tupdesc->natts
  430. *
  431. * Output arrays must have been allocated by caller.
  432. */
  433. static inline void
  434. brin_deconstruct_tuple(BrinDesc *brdesc,
  435. char *tp, bits8 *nullbits, bool nulls,
  436. Datum *values, bool *allnulls, bool *hasnulls)
  437. {
  438. int attnum;
  439. int stored;
  440. TupleDesc diskdsc;
  441. long off;
  442. /*
  443. * First iterate to natts to obtain both null flags for each attribute.
  444. * Note that we reverse the sense of the att_isnull test, because we store
  445. * 1 for a null value (rather than a 1 for a not null value as is the
  446. * att_isnull convention used elsewhere.) See brin_form_tuple.
  447. */
  448. for (attnum = 0; attnum < brdesc->bd_tupdesc->natts; attnum++)
  449. {
  450. /*
  451. * the "all nulls" bit means that all values in the page range for
  452. * this column are nulls. Therefore there are no values in the tuple
  453. * data area.
  454. */
  455. allnulls[attnum] = nulls && !att_isnull(attnum, nullbits);
  456. /*
  457. * the "has nulls" bit means that some tuples have nulls, but others
  458. * have not-null values. Therefore we know the tuple contains data
  459. * for this column.
  460. *
  461. * The hasnulls bits follow the allnulls bits in the same bitmask.
  462. */
  463. hasnulls[attnum] =
  464. nulls && !att_isnull(brdesc->bd_tupdesc->natts + attnum, nullbits);
  465. }
  466. /*
  467. * Iterate to obtain each attribute's stored values. Note that since we
  468. * may reuse attribute entries for more than one column, we cannot cache
  469. * offsets here.
  470. */
  471. diskdsc = brtuple_disk_tupdesc(brdesc);
  472. stored = 0;
  473. off = 0;
  474. for (attnum = 0; attnum < brdesc->bd_tupdesc->natts; attnum++)
  475. {
  476. int datumno;
  477. if (allnulls[attnum])
  478. {
  479. stored += brdesc->bd_info[attnum]->oi_nstored;
  480. continue;
  481. }
  482. for (datumno = 0;
  483. datumno < brdesc->bd_info[attnum]->oi_nstored;
  484. datumno++)
  485. {
  486. Form_pg_attribute thisatt = TupleDescAttr(diskdsc, stored);
  487. if (thisatt->attlen == -1)
  488. {
  489. off = att_align_pointer(off, thisatt->attalign, -1,
  490. tp + off);
  491. }
  492. else
  493. {
  494. /* not varlena, so safe to use att_align_nominal */
  495. off = att_align_nominal(off, thisatt->attalign);
  496. }
  497. values[stored++] = fetchatt(thisatt, tp + off);
  498. off = att_addlength_pointer(off, thisatt->attlen, tp + off);
  499. }
  500. }
  501. }