You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

_int_selfuncs.c 8.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. /*-------------------------------------------------------------------------
  2. *
  3. * _int_selfuncs.c
  4. * Functions for selectivity estimation of intarray operators
  5. *
  6. * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
  7. * Portions Copyright (c) 1994, Regents of the University of California
  8. *
  9. *
  10. * IDENTIFICATION
  11. * contrib/intarray/_int_selfuncs.c
  12. *
  13. *-------------------------------------------------------------------------
  14. */
  15. #include "postgres.h"
  16. #include "_int.h"
  17. #include "access/htup_details.h"
  18. #include "catalog/pg_operator.h"
  19. #include "catalog/pg_statistic.h"
  20. #include "catalog/pg_type.h"
  21. #include "utils/builtins.h"
  22. #include "utils/selfuncs.h"
  23. #include "utils/syscache.h"
  24. #include "utils/lsyscache.h"
  25. #include "miscadmin.h"
  26. PG_FUNCTION_INFO_V1(_int_overlap_sel);
  27. PG_FUNCTION_INFO_V1(_int_contains_sel);
  28. PG_FUNCTION_INFO_V1(_int_contained_sel);
  29. PG_FUNCTION_INFO_V1(_int_overlap_joinsel);
  30. PG_FUNCTION_INFO_V1(_int_contains_joinsel);
  31. PG_FUNCTION_INFO_V1(_int_contained_joinsel);
  32. PG_FUNCTION_INFO_V1(_int_matchsel);
  33. static Selectivity int_query_opr_selec(ITEM *item, Datum *values, float4 *freqs,
  34. int nmncelems, float4 minfreq);
  35. static int compare_val_int4(const void *a, const void *b);
  36. /*
  37. * Wrappers around the default array selectivity estimation functions.
  38. *
  39. * The default array selectivity operators for the @>, && and @< operators
  40. * work fine for integer arrays. However, if we tried to just use arraycontsel
  41. * and arracontjoinsel directly as the cost estimator functions for our
  42. * operators, they would not work as intended, because they look at the
  43. * operator's OID. Our operators behave exactly like the built-in anyarray
  44. * versions, but we must tell the cost estimator functions which built-in
  45. * operators they correspond to. These wrappers just replace the operator
  46. * OID with the corresponding built-in operator's OID, and call the built-in
  47. * function.
  48. */
  49. Datum
  50. _int_overlap_sel(PG_FUNCTION_ARGS)
  51. {
  52. PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
  53. PG_GETARG_DATUM(0),
  54. ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
  55. PG_GETARG_DATUM(2),
  56. PG_GETARG_DATUM(3)));
  57. }
  58. Datum
  59. _int_contains_sel(PG_FUNCTION_ARGS)
  60. {
  61. PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
  62. PG_GETARG_DATUM(0),
  63. ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
  64. PG_GETARG_DATUM(2),
  65. PG_GETARG_DATUM(3)));
  66. }
  67. Datum
  68. _int_contained_sel(PG_FUNCTION_ARGS)
  69. {
  70. PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
  71. PG_GETARG_DATUM(0),
  72. ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
  73. PG_GETARG_DATUM(2),
  74. PG_GETARG_DATUM(3)));
  75. }
  76. Datum
  77. _int_overlap_joinsel(PG_FUNCTION_ARGS)
  78. {
  79. PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
  80. PG_GETARG_DATUM(0),
  81. ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
  82. PG_GETARG_DATUM(2),
  83. PG_GETARG_DATUM(3),
  84. PG_GETARG_DATUM(4)));
  85. }
  86. Datum
  87. _int_contains_joinsel(PG_FUNCTION_ARGS)
  88. {
  89. PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
  90. PG_GETARG_DATUM(0),
  91. ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
  92. PG_GETARG_DATUM(2),
  93. PG_GETARG_DATUM(3),
  94. PG_GETARG_DATUM(4)));
  95. }
  96. Datum
  97. _int_contained_joinsel(PG_FUNCTION_ARGS)
  98. {
  99. PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
  100. PG_GETARG_DATUM(0),
  101. ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
  102. PG_GETARG_DATUM(2),
  103. PG_GETARG_DATUM(3),
  104. PG_GETARG_DATUM(4)));
  105. }
  106. /*
  107. * _int_matchsel -- restriction selectivity function for intarray @@ query_int
  108. */
  109. Datum
  110. _int_matchsel(PG_FUNCTION_ARGS)
  111. {
  112. PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
  113. List *args = (List *) PG_GETARG_POINTER(2);
  114. int varRelid = PG_GETARG_INT32(3);
  115. VariableStatData vardata;
  116. Node *other;
  117. bool varonleft;
  118. Selectivity selec;
  119. QUERYTYPE *query;
  120. Datum *mcelems = NULL;
  121. float4 *mcefreqs = NULL;
  122. int nmcelems = 0;
  123. float4 minfreq = 0.0;
  124. float4 nullfrac = 0.0;
  125. AttStatsSlot sslot;
  126. /*
  127. * If expression is not "variable @@ something" or "something @@ variable"
  128. * then punt and return a default estimate.
  129. */
  130. if (!get_restriction_variable(root, args, varRelid,
  131. &vardata, &other, &varonleft))
  132. PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
  133. /*
  134. * Variable should be int[]. We don't support cases where variable is
  135. * query_int.
  136. */
  137. if (vardata.vartype != INT4ARRAYOID)
  138. PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
  139. /*
  140. * Can't do anything useful if the something is not a constant, either.
  141. */
  142. if (!IsA(other, Const))
  143. {
  144. ReleaseVariableStats(vardata);
  145. PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
  146. }
  147. /*
  148. * The "@@" operator is strict, so we can cope with NULL right away.
  149. */
  150. if (((Const *) other)->constisnull)
  151. {
  152. ReleaseVariableStats(vardata);
  153. PG_RETURN_FLOAT8(0.0);
  154. }
  155. /* The caller made sure the const is a query, so get it now */
  156. query = DatumGetQueryTypeP(((Const *) other)->constvalue);
  157. /* Empty query matches nothing */
  158. if (query->size == 0)
  159. {
  160. ReleaseVariableStats(vardata);
  161. return (Selectivity) 0.0;
  162. }
  163. /*
  164. * Get the statistics for the intarray column.
  165. *
  166. * We're interested in the Most-Common-Elements list, and the NULL
  167. * fraction.
  168. */
  169. if (HeapTupleIsValid(vardata.statsTuple))
  170. {
  171. Form_pg_statistic stats;
  172. stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
  173. nullfrac = stats->stanullfrac;
  174. /*
  175. * For an int4 array, the default array type analyze function will
  176. * collect a Most Common Elements list, which is an array of int4s.
  177. */
  178. if (get_attstatsslot(&sslot, vardata.statsTuple,
  179. STATISTIC_KIND_MCELEM, InvalidOid,
  180. ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
  181. {
  182. Assert(sslot.valuetype == INT4OID);
  183. /*
  184. * There should be three more Numbers than Values, because the
  185. * last three (for intarray) cells are taken for minimal, maximal
  186. * and nulls frequency. Punt if not.
  187. */
  188. if (sslot.nnumbers == sslot.nvalues + 3)
  189. {
  190. /* Grab the lowest frequency. */
  191. minfreq = sslot.numbers[sslot.nnumbers - (sslot.nnumbers - sslot.nvalues)];
  192. mcelems = sslot.values;
  193. mcefreqs = sslot.numbers;
  194. nmcelems = sslot.nvalues;
  195. }
  196. }
  197. }
  198. else
  199. memset(&sslot, 0, sizeof(sslot));
  200. /* Process the logical expression in the query, using the stats */
  201. selec = int_query_opr_selec(GETQUERY(query) + query->size - 1,
  202. mcelems, mcefreqs, nmcelems, minfreq);
  203. /* MCE stats count only non-null rows, so adjust for null rows. */
  204. selec *= (1.0 - nullfrac);
  205. free_attstatsslot(&sslot);
  206. ReleaseVariableStats(vardata);
  207. CLAMP_PROBABILITY(selec);
  208. PG_RETURN_FLOAT8((float8) selec);
  209. }
  210. /*
  211. * Estimate selectivity of single intquery operator
  212. */
  213. static Selectivity
  214. int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs,
  215. int nmcelems, float4 minfreq)
  216. {
  217. Selectivity selec;
  218. /* since this function recurses, it could be driven to stack overflow */
  219. check_stack_depth();
  220. if (item->type == VAL)
  221. {
  222. Datum *searchres;
  223. if (mcelems == NULL)
  224. return (Selectivity) DEFAULT_EQ_SEL;
  225. searchres = (Datum *) bsearch(&item->val, mcelems, nmcelems,
  226. sizeof(Datum), compare_val_int4);
  227. if (searchres)
  228. {
  229. /*
  230. * The element is in MCELEM. Return precise selectivity (or at
  231. * least as precise as ANALYZE could find out).
  232. */
  233. selec = mcefreqs[searchres - mcelems];
  234. }
  235. else
  236. {
  237. /*
  238. * The element is not in MCELEM. Punt, but assume that the
  239. * selectivity cannot be more than minfreq / 2.
  240. */
  241. selec = Min(DEFAULT_EQ_SEL, minfreq / 2);
  242. }
  243. }
  244. else if (item->type == OPR)
  245. {
  246. /* Current query node is an operator */
  247. Selectivity s1,
  248. s2;
  249. s1 = int_query_opr_selec(item - 1, mcelems, mcefreqs, nmcelems,
  250. minfreq);
  251. switch (item->val)
  252. {
  253. case (int32) '!':
  254. selec = 1.0 - s1;
  255. break;
  256. case (int32) '&':
  257. s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
  258. nmcelems, minfreq);
  259. selec = s1 * s2;
  260. break;
  261. case (int32) '|':
  262. s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
  263. nmcelems, minfreq);
  264. selec = s1 + s2 - s1 * s2;
  265. break;
  266. default:
  267. elog(ERROR, "unrecognized operator: %d", item->val);
  268. selec = 0; /* keep compiler quiet */
  269. break;
  270. }
  271. }
  272. else
  273. {
  274. elog(ERROR, "unrecognized int query item type: %u", item->type);
  275. selec = 0; /* keep compiler quiet */
  276. }
  277. /* Clamp intermediate results to stay sane despite roundoff error */
  278. CLAMP_PROBABILITY(selec);
  279. return selec;
  280. }
  281. /*
  282. * Comparison function for binary search in mcelem array.
  283. */
  284. static int
  285. compare_val_int4(const void *a, const void *b)
  286. {
  287. int32 key = *(int32 *) a;
  288. const Datum *t = (const Datum *) b;
  289. return key - DatumGetInt32(*t);
  290. }