You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

brinfuncs.c 10KB


  1. /*
  2. * brinfuncs.c
  3. * Functions to investigate BRIN indexes
  4. *
  5. * Copyright (c) 2014-2019, PostgreSQL Global Development Group
  6. *
  7. * IDENTIFICATION
  8. * contrib/pageinspect/brinfuncs.c
  9. */
  10. #include "postgres.h"
  11. #include "pageinspect.h"
  12. #include "access/htup_details.h"
  13. #include "access/brin.h"
  14. #include "access/brin_internal.h"
  15. #include "access/brin_page.h"
  16. #include "access/brin_revmap.h"
  17. #include "access/brin_tuple.h"
  18. #include "catalog/index.h"
  19. #include "catalog/pg_type.h"
  20. #include "funcapi.h"
  21. #include "lib/stringinfo.h"
  22. #include "utils/array.h"
  23. #include "utils/builtins.h"
  24. #include "utils/lsyscache.h"
  25. #include "utils/rel.h"
  26. #include "miscadmin.h"
  27. PG_FUNCTION_INFO_V1(brin_page_type);
  28. PG_FUNCTION_INFO_V1(brin_page_items);
  29. PG_FUNCTION_INFO_V1(brin_metapage_info);
  30. PG_FUNCTION_INFO_V1(brin_revmap_data);
  31. typedef struct brin_column_state
  32. {
  33. int nstored;
  34. FmgrInfo outputFn[FLEXIBLE_ARRAY_MEMBER];
  35. } brin_column_state;
  36. static Page verify_brin_page(bytea *raw_page, uint16 type,
  37. const char *strtype);
  38. Datum
  39. brin_page_type(PG_FUNCTION_ARGS)
  40. {
  41. bytea *raw_page = PG_GETARG_BYTEA_P(0);
  42. Page page = VARDATA(raw_page);
  43. int raw_page_size;
  44. char *type;
  45. if (!superuser())
  46. ereport(ERROR,
  47. (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
  48. (errmsg("must be superuser to use raw page functions"))));
  49. raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
  50. if (raw_page_size != BLCKSZ)
  51. ereport(ERROR,
  52. (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
  53. errmsg("input page too small"),
  54. errdetail("Expected size %d, got %d",
  55. BLCKSZ, raw_page_size)));
  56. switch (BrinPageType(page))
  57. {
  58. case BRIN_PAGETYPE_META:
  59. type = "meta";
  60. break;
  61. case BRIN_PAGETYPE_REVMAP:
  62. type = "revmap";
  63. break;
  64. case BRIN_PAGETYPE_REGULAR:
  65. type = "regular";
  66. break;
  67. default:
  68. type = psprintf("unknown (%02x)", BrinPageType(page));
  69. break;
  70. }
  71. PG_RETURN_TEXT_P(cstring_to_text(type));
  72. }
  73. /*
  74. * Verify that the given bytea contains a BRIN page of the indicated page
  75. * type, or die in the attempt. A pointer to the page is returned.
  76. */
  77. static Page
  78. verify_brin_page(bytea *raw_page, uint16 type, const char *strtype)
  79. {
  80. Page page;
  81. int raw_page_size;
  82. raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
  83. if (raw_page_size != BLCKSZ)
  84. ereport(ERROR,
  85. (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
  86. errmsg("input page too small"),
  87. errdetail("Expected size %d, got %d",
  88. BLCKSZ, raw_page_size)));
  89. page = VARDATA(raw_page);
  90. /* verify the special space says this page is what we want */
  91. if (BrinPageType(page) != type)
  92. ereport(ERROR,
  93. (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
  94. errmsg("page is not a BRIN page of type \"%s\"", strtype),
  95. errdetail("Expected special type %08x, got %08x.",
  96. type, BrinPageType(page))));
  97. return page;
  98. }
  99. /*
  100. * Extract all item values from a BRIN index page
  101. *
  102. * Usage: SELECT * FROM brin_page_items(get_raw_page('idx', 1), 'idx'::regclass);
  103. */
  104. Datum
  105. brin_page_items(PG_FUNCTION_ARGS)
  106. {
  107. bytea *raw_page = PG_GETARG_BYTEA_P(0);
  108. Oid indexRelid = PG_GETARG_OID(1);
  109. ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
  110. TupleDesc tupdesc;
  111. MemoryContext oldcontext;
  112. Tuplestorestate *tupstore;
  113. Relation indexRel;
  114. brin_column_state **columns;
  115. BrinDesc *bdesc;
  116. BrinMemTuple *dtup;
  117. Page page;
  118. OffsetNumber offset;
  119. AttrNumber attno;
  120. bool unusedItem;
  121. if (!superuser())
  122. ereport(ERROR,
  123. (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
  124. (errmsg("must be superuser to use raw page functions"))));
  125. /* check to see if caller supports us returning a tuplestore */
  126. if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
  127. ereport(ERROR,
  128. (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  129. errmsg("set-valued function called in context that cannot accept a set")));
  130. if (!(rsinfo->allowedModes & SFRM_Materialize) ||
  131. rsinfo->expectedDesc == NULL)
  132. ereport(ERROR,
  133. (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  134. errmsg("materialize mode required, but it is not allowed in this context")));
  135. /* Build a tuple descriptor for our result type */
  136. if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
  137. elog(ERROR, "return type must be a row type");
  138. /* Build tuplestore to hold the result rows */
  139. oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory);
  140. tupstore = tuplestore_begin_heap(true, false, work_mem);
  141. rsinfo->returnMode = SFRM_Materialize;
  142. rsinfo->setResult = tupstore;
  143. rsinfo->setDesc = tupdesc;
  144. MemoryContextSwitchTo(oldcontext);
  145. indexRel = index_open(indexRelid, AccessShareLock);
  146. bdesc = brin_build_desc(indexRel);
  147. /* minimally verify the page we got */
  148. page = verify_brin_page(raw_page, BRIN_PAGETYPE_REGULAR, "regular");
  149. /*
  150. * Initialize output functions for all indexed datatypes; simplifies
  151. * calling them later.
  152. */
  153. columns = palloc(sizeof(brin_column_state *) * RelationGetDescr(indexRel)->natts);
  154. for (attno = 1; attno <= bdesc->bd_tupdesc->natts; attno++)
  155. {
  156. Oid output;
  157. bool isVarlena;
  158. BrinOpcInfo *opcinfo;
  159. int i;
  160. brin_column_state *column;
  161. opcinfo = bdesc->bd_info[attno - 1];
  162. column = palloc(offsetof(brin_column_state, outputFn) +
  163. sizeof(FmgrInfo) * opcinfo->oi_nstored);
  164. column->nstored = opcinfo->oi_nstored;
  165. for (i = 0; i < opcinfo->oi_nstored; i++)
  166. {
  167. getTypeOutputInfo(opcinfo->oi_typcache[i]->type_id, &output, &isVarlena);
  168. fmgr_info(output, &column->outputFn[i]);
  169. }
  170. columns[attno - 1] = column;
  171. }
  172. offset = FirstOffsetNumber;
  173. unusedItem = false;
  174. dtup = NULL;
  175. for (;;)
  176. {
  177. Datum values[7];
  178. bool nulls[7];
  179. /*
  180. * This loop is called once for every attribute of every tuple in the
  181. * page. At the start of a tuple, we get a NULL dtup; that's our
  182. * signal for obtaining and decoding the next one. If that's not the
  183. * case, we output the next attribute.
  184. */
  185. if (dtup == NULL)
  186. {
  187. ItemId itemId;
  188. /* verify item status: if there's no data, we can't decode */
  189. itemId = PageGetItemId(page, offset);
  190. if (ItemIdIsUsed(itemId))
  191. {
  192. dtup = brin_deform_tuple(bdesc,
  193. (BrinTuple *) PageGetItem(page, itemId),
  194. NULL);
  195. attno = 1;
  196. unusedItem = false;
  197. }
  198. else
  199. unusedItem = true;
  200. }
  201. else
  202. attno++;
  203. MemSet(nulls, 0, sizeof(nulls));
  204. if (unusedItem)
  205. {
  206. values[0] = UInt16GetDatum(offset);
  207. nulls[1] = true;
  208. nulls[2] = true;
  209. nulls[3] = true;
  210. nulls[4] = true;
  211. nulls[5] = true;
  212. nulls[6] = true;
  213. }
  214. else
  215. {
  216. int att = attno - 1;
  217. values[0] = UInt16GetDatum(offset);
  218. values[1] = UInt32GetDatum(dtup->bt_blkno);
  219. values[2] = UInt16GetDatum(attno);
  220. values[3] = BoolGetDatum(dtup->bt_columns[att].bv_allnulls);
  221. values[4] = BoolGetDatum(dtup->bt_columns[att].bv_hasnulls);
  222. values[5] = BoolGetDatum(dtup->bt_placeholder);
  223. if (!dtup->bt_columns[att].bv_allnulls)
  224. {
  225. BrinValues *bvalues = &dtup->bt_columns[att];
  226. StringInfoData s;
  227. bool first;
  228. int i;
  229. initStringInfo(&s);
  230. appendStringInfoChar(&s, '{');
  231. first = true;
  232. for (i = 0; i < columns[att]->nstored; i++)
  233. {
  234. char *val;
  235. if (!first)
  236. appendStringInfoString(&s, " .. ");
  237. first = false;
  238. val = OutputFunctionCall(&columns[att]->outputFn[i],
  239. bvalues->bv_values[i]);
  240. appendStringInfoString(&s, val);
  241. pfree(val);
  242. }
  243. appendStringInfoChar(&s, '}');
  244. values[6] = CStringGetTextDatum(s.data);
  245. pfree(s.data);
  246. }
  247. else
  248. {
  249. nulls[6] = true;
  250. }
  251. }
  252. tuplestore_putvalues(tupstore, tupdesc, values, nulls);
  253. /*
  254. * If the item was unused, jump straight to the next one; otherwise,
  255. * the only cleanup needed here is to set our signal to go to the next
  256. * tuple in the following iteration, by freeing the current one.
  257. */
  258. if (unusedItem)
  259. offset = OffsetNumberNext(offset);
  260. else if (attno >= bdesc->bd_tupdesc->natts)
  261. {
  262. pfree(dtup);
  263. dtup = NULL;
  264. offset = OffsetNumberNext(offset);
  265. }
  266. /*
  267. * If we're beyond the end of the page, we're done.
  268. */
  269. if (offset > PageGetMaxOffsetNumber(page))
  270. break;
  271. }
  272. /* clean up and return the tuplestore */
  273. brin_free_desc(bdesc);
  274. tuplestore_donestoring(tupstore);
  275. index_close(indexRel, AccessShareLock);
  276. return (Datum) 0;
  277. }
  278. Datum
  279. brin_metapage_info(PG_FUNCTION_ARGS)
  280. {
  281. bytea *raw_page = PG_GETARG_BYTEA_P(0);
  282. Page page;
  283. BrinMetaPageData *meta;
  284. TupleDesc tupdesc;
  285. Datum values[4];
  286. bool nulls[4];
  287. HeapTuple htup;
  288. if (!superuser())
  289. ereport(ERROR,
  290. (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
  291. (errmsg("must be superuser to use raw page functions"))));
  292. page = verify_brin_page(raw_page, BRIN_PAGETYPE_META, "metapage");
  293. /* Build a tuple descriptor for our result type */
  294. if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
  295. elog(ERROR, "return type must be a row type");
  296. tupdesc = BlessTupleDesc(tupdesc);
  297. /* Extract values from the metapage */
  298. meta = (BrinMetaPageData *) PageGetContents(page);
  299. MemSet(nulls, 0, sizeof(nulls));
  300. values[0] = CStringGetTextDatum(psprintf("0x%08X", meta->brinMagic));
  301. values[1] = Int32GetDatum(meta->brinVersion);
  302. values[2] = Int32GetDatum(meta->pagesPerRange);
  303. values[3] = Int64GetDatum(meta->lastRevmapPage);
  304. htup = heap_form_tuple(tupdesc, values, nulls);
  305. PG_RETURN_DATUM(HeapTupleGetDatum(htup));
  306. }
  307. /*
  308. * Return the TID array stored in a BRIN revmap page
  309. */
  310. Datum
  311. brin_revmap_data(PG_FUNCTION_ARGS)
  312. {
  313. struct
  314. {
  315. ItemPointerData *tids;
  316. int idx;
  317. } *state;
  318. FuncCallContext *fctx;
  319. if (!superuser())
  320. ereport(ERROR,
  321. (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
  322. (errmsg("must be superuser to use raw page functions"))));
  323. if (SRF_IS_FIRSTCALL())
  324. {
  325. bytea *raw_page = PG_GETARG_BYTEA_P(0);
  326. MemoryContext mctx;
  327. Page page;
  328. /* minimally verify the page we got */
  329. page = verify_brin_page(raw_page, BRIN_PAGETYPE_REVMAP, "revmap");
  330. /* create a function context for cross-call persistence */
  331. fctx = SRF_FIRSTCALL_INIT();
  332. /* switch to memory context appropriate for multiple function calls */
  333. mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
  334. state = palloc(sizeof(*state));
  335. state->tids = ((RevmapContents *) PageGetContents(page))->rm_tids;
  336. state->idx = 0;
  337. fctx->user_fctx = state;
  338. MemoryContextSwitchTo(mctx);
  339. }
  340. fctx = SRF_PERCALL_SETUP();
  341. state = fctx->user_fctx;
  342. if (state->idx < REVMAP_PAGE_MAXITEMS)
  343. SRF_RETURN_NEXT(fctx, PointerGetDatum(&state->tids[state->idx++]));
  344. SRF_RETURN_DONE(fctx);
  345. }