You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

hashfuncs.c 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575
  1. /*
  2. * hashfuncs.c
  3. * Functions to investigate the content of HASH indexes
  4. *
  5. * Copyright (c) 2017-2019, PostgreSQL Global Development Group
  6. *
  7. * IDENTIFICATION
  8. * contrib/pageinspect/hashfuncs.c
  9. */
  10. #include "postgres.h"
  11. #include "pageinspect.h"
  12. #include "access/hash.h"
  13. #include "access/htup_details.h"
  14. #include "catalog/pg_type.h"
  15. #include "catalog/pg_am.h"
  16. #include "funcapi.h"
  17. #include "miscadmin.h"
  18. #include "utils/builtins.h"
  19. #include "utils/rel.h"
  20. PG_FUNCTION_INFO_V1(hash_page_type);
  21. PG_FUNCTION_INFO_V1(hash_page_stats);
  22. PG_FUNCTION_INFO_V1(hash_page_items);
  23. PG_FUNCTION_INFO_V1(hash_bitmap_info);
  24. PG_FUNCTION_INFO_V1(hash_metapage_info);
  25. #define IS_HASH(r) ((r)->rd_rel->relam == HASH_AM_OID)
  26. /* ------------------------------------------------
  27. * structure for single hash page statistics
  28. * ------------------------------------------------
  29. */
  30. typedef struct HashPageStat
  31. {
  32. int live_items;
  33. int dead_items;
  34. int page_size;
  35. int free_size;
  36. /* opaque data */
  37. BlockNumber hasho_prevblkno;
  38. BlockNumber hasho_nextblkno;
  39. Bucket hasho_bucket;
  40. uint16 hasho_flag;
  41. uint16 hasho_page_id;
  42. } HashPageStat;
  43. /*
  44. * Verify that the given bytea contains a HASH page, or die in the attempt.
  45. * A pointer to a palloc'd, properly aligned copy of the page is returned.
  46. */
  47. static Page
  48. verify_hash_page(bytea *raw_page, int flags)
  49. {
  50. Page page = get_page_from_raw(raw_page);
  51. int pagetype = LH_UNUSED_PAGE;
  52. /* Treat new pages as unused. */
  53. if (!PageIsNew(page))
  54. {
  55. HashPageOpaque pageopaque;
  56. if (PageGetSpecialSize(page) != MAXALIGN(sizeof(HashPageOpaqueData)))
  57. ereport(ERROR,
  58. (errcode(ERRCODE_INDEX_CORRUPTED),
  59. errmsg("index table contains corrupted page")));
  60. pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
  61. if (pageopaque->hasho_page_id != HASHO_PAGE_ID)
  62. ereport(ERROR,
  63. (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
  64. errmsg("page is not a hash page"),
  65. errdetail("Expected %08x, got %08x.",
  66. HASHO_PAGE_ID, pageopaque->hasho_page_id)));
  67. pagetype = pageopaque->hasho_flag & LH_PAGE_TYPE;
  68. }
  69. /* Check that page type is sane. */
  70. if (pagetype != LH_OVERFLOW_PAGE && pagetype != LH_BUCKET_PAGE &&
  71. pagetype != LH_BITMAP_PAGE && pagetype != LH_META_PAGE &&
  72. pagetype != LH_UNUSED_PAGE)
  73. ereport(ERROR,
  74. (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
  75. errmsg("invalid hash page type %08x", pagetype)));
  76. /* If requested, verify page type. */
  77. if (flags != 0 && (pagetype & flags) == 0)
  78. {
  79. switch (flags)
  80. {
  81. case LH_META_PAGE:
  82. ereport(ERROR,
  83. (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
  84. errmsg("page is not a hash meta page")));
  85. break;
  86. case LH_BUCKET_PAGE | LH_OVERFLOW_PAGE:
  87. ereport(ERROR,
  88. (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
  89. errmsg("page is not a hash bucket or overflow page")));
  90. break;
  91. case LH_OVERFLOW_PAGE:
  92. ereport(ERROR,
  93. (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
  94. errmsg("page is not a hash overflow page")));
  95. break;
  96. default:
  97. elog(ERROR,
  98. "hash page of type %08x not in mask %08x",
  99. pagetype, flags);
  100. break;
  101. }
  102. }
  103. /*
  104. * If it is the metapage, also verify magic number and version.
  105. */
  106. if (pagetype == LH_META_PAGE)
  107. {
  108. HashMetaPage metap = HashPageGetMeta(page);
  109. if (metap->hashm_magic != HASH_MAGIC)
  110. ereport(ERROR,
  111. (errcode(ERRCODE_INDEX_CORRUPTED),
  112. errmsg("invalid magic number for metadata"),
  113. errdetail("Expected 0x%08x, got 0x%08x.",
  114. HASH_MAGIC, metap->hashm_magic)));
  115. if (metap->hashm_version != HASH_VERSION)
  116. ereport(ERROR,
  117. (errcode(ERRCODE_INDEX_CORRUPTED),
  118. errmsg("invalid version for metadata"),
  119. errdetail("Expected %d, got %d",
  120. HASH_VERSION, metap->hashm_version)));
  121. }
  122. return page;
  123. }
  124. /* -------------------------------------------------
  125. * GetHashPageStatistics()
  126. *
  127. * Collect statistics of single hash page
  128. * -------------------------------------------------
  129. */
  130. static void
  131. GetHashPageStatistics(Page page, HashPageStat *stat)
  132. {
  133. OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
  134. HashPageOpaque opaque = (HashPageOpaque) PageGetSpecialPointer(page);
  135. int off;
  136. stat->dead_items = stat->live_items = 0;
  137. stat->page_size = PageGetPageSize(page);
  138. /* hash page opaque data */
  139. stat->hasho_prevblkno = opaque->hasho_prevblkno;
  140. stat->hasho_nextblkno = opaque->hasho_nextblkno;
  141. stat->hasho_bucket = opaque->hasho_bucket;
  142. stat->hasho_flag = opaque->hasho_flag;
  143. stat->hasho_page_id = opaque->hasho_page_id;
  144. /* count live and dead tuples, and free space */
  145. for (off = FirstOffsetNumber; off <= maxoff; off++)
  146. {
  147. ItemId id = PageGetItemId(page, off);
  148. if (!ItemIdIsDead(id))
  149. stat->live_items++;
  150. else
  151. stat->dead_items++;
  152. }
  153. stat->free_size = PageGetFreeSpace(page);
  154. }
  155. /* ---------------------------------------------------
  156. * hash_page_type()
  157. *
  158. * Usage: SELECT hash_page_type(get_raw_page('con_hash_index', 1));
  159. * ---------------------------------------------------
  160. */
  161. Datum
  162. hash_page_type(PG_FUNCTION_ARGS)
  163. {
  164. bytea *raw_page = PG_GETARG_BYTEA_P(0);
  165. Page page;
  166. HashPageOpaque opaque;
  167. int pagetype;
  168. const char *type;
  169. if (!superuser())
  170. ereport(ERROR,
  171. (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
  172. (errmsg("must be superuser to use raw page functions"))));
  173. page = verify_hash_page(raw_page, 0);
  174. if (PageIsNew(page))
  175. type = "unused";
  176. else
  177. {
  178. opaque = (HashPageOpaque) PageGetSpecialPointer(page);
  179. /* page type (flags) */
  180. pagetype = opaque->hasho_flag & LH_PAGE_TYPE;
  181. if (pagetype == LH_META_PAGE)
  182. type = "metapage";
  183. else if (pagetype == LH_OVERFLOW_PAGE)
  184. type = "overflow";
  185. else if (pagetype == LH_BUCKET_PAGE)
  186. type = "bucket";
  187. else if (pagetype == LH_BITMAP_PAGE)
  188. type = "bitmap";
  189. else
  190. type = "unused";
  191. }
  192. PG_RETURN_TEXT_P(cstring_to_text(type));
  193. }
  194. /* ---------------------------------------------------
  195. * hash_page_stats()
  196. *
  197. * Usage: SELECT * FROM hash_page_stats(get_raw_page('con_hash_index', 1));
  198. * ---------------------------------------------------
  199. */
  200. Datum
  201. hash_page_stats(PG_FUNCTION_ARGS)
  202. {
  203. bytea *raw_page = PG_GETARG_BYTEA_P(0);
  204. Page page;
  205. int j;
  206. Datum values[9];
  207. bool nulls[9];
  208. HashPageStat stat;
  209. HeapTuple tuple;
  210. TupleDesc tupleDesc;
  211. if (!superuser())
  212. ereport(ERROR,
  213. (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
  214. (errmsg("must be superuser to use raw page functions"))));
  215. page = verify_hash_page(raw_page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
  216. /* keep compiler quiet */
  217. stat.hasho_prevblkno = stat.hasho_nextblkno = InvalidBlockNumber;
  218. stat.hasho_flag = stat.hasho_page_id = stat.free_size = 0;
  219. GetHashPageStatistics(page, &stat);
  220. /* Build a tuple descriptor for our result type */
  221. if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
  222. elog(ERROR, "return type must be a row type");
  223. tupleDesc = BlessTupleDesc(tupleDesc);
  224. MemSet(nulls, 0, sizeof(nulls));
  225. j = 0;
  226. values[j++] = Int32GetDatum(stat.live_items);
  227. values[j++] = Int32GetDatum(stat.dead_items);
  228. values[j++] = Int32GetDatum(stat.page_size);
  229. values[j++] = Int32GetDatum(stat.free_size);
  230. values[j++] = Int64GetDatum((int64) stat.hasho_prevblkno);
  231. values[j++] = Int64GetDatum((int64) stat.hasho_nextblkno);
  232. values[j++] = Int64GetDatum((int64) stat.hasho_bucket);
  233. values[j++] = Int32GetDatum((int32) stat.hasho_flag);
  234. values[j++] = Int32GetDatum((int32) stat.hasho_page_id);
  235. tuple = heap_form_tuple(tupleDesc, values, nulls);
  236. PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
  237. }
  238. /*
  239. * cross-call data structure for SRF
  240. */
  241. struct user_args
  242. {
  243. Page page;
  244. OffsetNumber offset;
  245. };
  246. /*-------------------------------------------------------
  247. * hash_page_items()
  248. *
  249. * Get IndexTupleData set in a hash page
  250. *
  251. * Usage: SELECT * FROM hash_page_items(get_raw_page('con_hash_index', 1));
  252. *-------------------------------------------------------
  253. */
  254. Datum
  255. hash_page_items(PG_FUNCTION_ARGS)
  256. {
  257. bytea *raw_page = PG_GETARG_BYTEA_P(0);
  258. Page page;
  259. Datum result;
  260. Datum values[3];
  261. bool nulls[3];
  262. uint32 hashkey;
  263. HeapTuple tuple;
  264. FuncCallContext *fctx;
  265. MemoryContext mctx;
  266. struct user_args *uargs;
  267. if (!superuser())
  268. ereport(ERROR,
  269. (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
  270. (errmsg("must be superuser to use raw page functions"))));
  271. if (SRF_IS_FIRSTCALL())
  272. {
  273. TupleDesc tupleDesc;
  274. fctx = SRF_FIRSTCALL_INIT();
  275. mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
  276. page = verify_hash_page(raw_page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
  277. uargs = palloc(sizeof(struct user_args));
  278. uargs->page = page;
  279. uargs->offset = FirstOffsetNumber;
  280. fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
  281. /* Build a tuple descriptor for our result type */
  282. if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
  283. elog(ERROR, "return type must be a row type");
  284. tupleDesc = BlessTupleDesc(tupleDesc);
  285. fctx->attinmeta = TupleDescGetAttInMetadata(tupleDesc);
  286. fctx->user_fctx = uargs;
  287. MemoryContextSwitchTo(mctx);
  288. }
  289. fctx = SRF_PERCALL_SETUP();
  290. uargs = fctx->user_fctx;
  291. if (fctx->call_cntr < fctx->max_calls)
  292. {
  293. ItemId id;
  294. IndexTuple itup;
  295. int j;
  296. id = PageGetItemId(uargs->page, uargs->offset);
  297. if (!ItemIdIsValid(id))
  298. elog(ERROR, "invalid ItemId");
  299. itup = (IndexTuple) PageGetItem(uargs->page, id);
  300. MemSet(nulls, 0, sizeof(nulls));
  301. j = 0;
  302. values[j++] = Int32GetDatum((int32) uargs->offset);
  303. values[j++] = PointerGetDatum(&itup->t_tid);
  304. hashkey = _hash_get_indextuple_hashkey(itup);
  305. values[j] = Int64GetDatum((int64) hashkey);
  306. tuple = heap_form_tuple(fctx->attinmeta->tupdesc, values, nulls);
  307. result = HeapTupleGetDatum(tuple);
  308. uargs->offset = uargs->offset + 1;
  309. SRF_RETURN_NEXT(fctx, result);
  310. }
  311. else
  312. {
  313. pfree(uargs);
  314. SRF_RETURN_DONE(fctx);
  315. }
  316. }
  317. /* ------------------------------------------------
  318. * hash_bitmap_info()
  319. *
  320. * Get bitmap information for a particular overflow page
  321. *
  322. * Usage: SELECT * FROM hash_bitmap_info('con_hash_index'::regclass, 5);
  323. * ------------------------------------------------
  324. */
  325. Datum
  326. hash_bitmap_info(PG_FUNCTION_ARGS)
  327. {
  328. Oid indexRelid = PG_GETARG_OID(0);
  329. uint64 ovflblkno = PG_GETARG_INT64(1);
  330. HashMetaPage metap;
  331. Buffer metabuf,
  332. mapbuf;
  333. BlockNumber bitmapblkno;
  334. Page mappage;
  335. bool bit = false;
  336. TupleDesc tupleDesc;
  337. Relation indexRel;
  338. uint32 ovflbitno;
  339. int32 bitmappage,
  340. bitmapbit;
  341. HeapTuple tuple;
  342. int i,
  343. j;
  344. Datum values[3];
  345. bool nulls[3];
  346. uint32 *freep;
  347. if (!superuser())
  348. ereport(ERROR,
  349. (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
  350. (errmsg("must be superuser to use raw page functions"))));
  351. indexRel = index_open(indexRelid, AccessShareLock);
  352. if (!IS_HASH(indexRel))
  353. elog(ERROR, "relation \"%s\" is not a hash index",
  354. RelationGetRelationName(indexRel));
  355. if (RELATION_IS_OTHER_TEMP(indexRel))
  356. ereport(ERROR,
  357. (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  358. errmsg("cannot access temporary tables of other sessions")));
  359. if (ovflblkno >= RelationGetNumberOfBlocks(indexRel))
  360. ereport(ERROR,
  361. (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
  362. errmsg("block number " UINT64_FORMAT " is out of range for relation \"%s\"",
  363. ovflblkno, RelationGetRelationName(indexRel))));
  364. /* Read the metapage so we can determine which bitmap page to use */
  365. metabuf = _hash_getbuf(indexRel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
  366. metap = HashPageGetMeta(BufferGetPage(metabuf));
  367. /*
  368. * Reject attempt to read the bit for a metapage or bitmap page; this is
  369. * only meaningful for overflow pages.
  370. */
  371. if (ovflblkno == 0)
  372. ereport(ERROR,
  373. (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
  374. errmsg("invalid overflow block number %u",
  375. (BlockNumber) ovflblkno)));
  376. for (i = 0; i < metap->hashm_nmaps; i++)
  377. if (metap->hashm_mapp[i] == ovflblkno)
  378. ereport(ERROR,
  379. (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
  380. errmsg("invalid overflow block number %u",
  381. (BlockNumber) ovflblkno)));
  382. /*
  383. * Identify overflow bit number. This will error out for primary bucket
  384. * pages, and we've already rejected the metapage and bitmap pages above.
  385. */
  386. ovflbitno = _hash_ovflblkno_to_bitno(metap, (BlockNumber) ovflblkno);
  387. bitmappage = ovflbitno >> BMPG_SHIFT(metap);
  388. bitmapbit = ovflbitno & BMPG_MASK(metap);
  389. if (bitmappage >= metap->hashm_nmaps)
  390. ereport(ERROR,
  391. (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
  392. errmsg("invalid overflow block number %u",
  393. (BlockNumber) ovflblkno)));
  394. bitmapblkno = metap->hashm_mapp[bitmappage];
  395. _hash_relbuf(indexRel, metabuf);
  396. /* Check the status of bitmap bit for overflow page */
  397. mapbuf = _hash_getbuf(indexRel, bitmapblkno, HASH_READ, LH_BITMAP_PAGE);
  398. mappage = BufferGetPage(mapbuf);
  399. freep = HashPageGetBitmap(mappage);
  400. bit = ISSET(freep, bitmapbit) != 0;
  401. _hash_relbuf(indexRel, mapbuf);
  402. index_close(indexRel, AccessShareLock);
  403. /* Build a tuple descriptor for our result type */
  404. if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
  405. elog(ERROR, "return type must be a row type");
  406. tupleDesc = BlessTupleDesc(tupleDesc);
  407. MemSet(nulls, 0, sizeof(nulls));
  408. j = 0;
  409. values[j++] = Int64GetDatum((int64) bitmapblkno);
  410. values[j++] = Int32GetDatum(bitmapbit);
  411. values[j++] = BoolGetDatum(bit);
  412. tuple = heap_form_tuple(tupleDesc, values, nulls);
  413. PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
  414. }
  415. /* ------------------------------------------------
  416. * hash_metapage_info()
  417. *
  418. * Get the meta-page information for a hash index
  419. *
  420. * Usage: SELECT * FROM hash_metapage_info(get_raw_page('con_hash_index', 0))
  421. * ------------------------------------------------
  422. */
  423. Datum
  424. hash_metapage_info(PG_FUNCTION_ARGS)
  425. {
  426. bytea *raw_page = PG_GETARG_BYTEA_P(0);
  427. Page page;
  428. HashMetaPageData *metad;
  429. TupleDesc tupleDesc;
  430. HeapTuple tuple;
  431. int i,
  432. j;
  433. Datum values[16];
  434. bool nulls[16];
  435. Datum spares[HASH_MAX_SPLITPOINTS];
  436. Datum mapp[HASH_MAX_BITMAPS];
  437. if (!superuser())
  438. ereport(ERROR,
  439. (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
  440. (errmsg("must be superuser to use raw page functions"))));
  441. page = verify_hash_page(raw_page, LH_META_PAGE);
  442. /* Build a tuple descriptor for our result type */
  443. if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
  444. elog(ERROR, "return type must be a row type");
  445. tupleDesc = BlessTupleDesc(tupleDesc);
  446. metad = HashPageGetMeta(page);
  447. MemSet(nulls, 0, sizeof(nulls));
  448. j = 0;
  449. values[j++] = Int64GetDatum((int64) metad->hashm_magic);
  450. values[j++] = Int64GetDatum((int64) metad->hashm_version);
  451. values[j++] = Float8GetDatum(metad->hashm_ntuples);
  452. values[j++] = Int32GetDatum((int32) metad->hashm_ffactor);
  453. values[j++] = Int32GetDatum((int32) metad->hashm_bsize);
  454. values[j++] = Int32GetDatum((int32) metad->hashm_bmsize);
  455. values[j++] = Int32GetDatum((int32) metad->hashm_bmshift);
  456. values[j++] = Int64GetDatum((int64) metad->hashm_maxbucket);
  457. values[j++] = Int64GetDatum((int64) metad->hashm_highmask);
  458. values[j++] = Int64GetDatum((int64) metad->hashm_lowmask);
  459. values[j++] = Int64GetDatum((int64) metad->hashm_ovflpoint);
  460. values[j++] = Int64GetDatum((int64) metad->hashm_firstfree);
  461. values[j++] = Int64GetDatum((int64) metad->hashm_nmaps);
  462. values[j++] = ObjectIdGetDatum((Oid) metad->hashm_procid);
  463. for (i = 0; i < HASH_MAX_SPLITPOINTS; i++)
  464. spares[i] = Int64GetDatum((int64) metad->hashm_spares[i]);
  465. values[j++] = PointerGetDatum(construct_array(spares,
  466. HASH_MAX_SPLITPOINTS,
  467. INT8OID,
  468. 8, FLOAT8PASSBYVAL, 'd'));
  469. for (i = 0; i < HASH_MAX_BITMAPS; i++)
  470. mapp[i] = Int64GetDatum((int64) metad->hashm_mapp[i]);
  471. values[j++] = PointerGetDatum(construct_array(mapp,
  472. HASH_MAX_BITMAPS,
  473. INT8OID,
  474. 8, FLOAT8PASSBYVAL, 'd'));
  475. tuple = heap_form_tuple(tupleDesc, values, nulls);
  476. PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
  477. }