Browse Source

postgres_fdw: Fix costing of pre-sorted foreign paths with local stats.

Commit aa09cd242 modified estimate_path_cost_size() so that it reuses
cached costs of a basic foreign path for a given foreign-base/join
relation when costing pre-sorted foreign paths for that relation, but it
incorrectly re-computed retrieved_rows, an estimated number of rows
fetched from the remote side, which is needed for costing both the basic
and pre-sorted foreign paths.  To fix, handle retrieved_rows the same way
as the cached costs: store in that relation's fpinfo the retrieved_rows
estimate computed for costing the basic foreign path, and reuse it when
costing the pre-sorted foreign paths.  Also, reuse the rows/width
estimates stored in that relation's fpinfo when costing the pre-sorted
foreign paths, to make the code consistent.

In commit ffab494a4, to extend the costing mentioned above to the
foreign-grouping case, I made a change to add_foreign_grouping_paths() to
store in a given foreign-grouped relation's RelOptInfo the rows estimate
for that relation for reuse, but this patch makes that change unnecessary
since we already store the row estimate in that relation's fpinfo, which
this patch reuses when costing a foreign path for that relation with the
sortClause ordering; remove that change.

In passing, fix thinko in commit 7012b132d: in estimate_path_cost_size(),
the width estimate for a given foreign-grouped relation to be stored in
that relation's fpinfo was reset incorrectly when costing a basic foreign
path for that relation with local stats.

Apply the patch to HEAD only to avoid destabilizing existing plan choices.

Author: Etsuro Fujita
Discussion: https://postgr.es/m/CAPmGK17jaJLPDEkgnP2VmkOg=5wT8YQ1CqssU8JRpZ_NSE+dqQ@mail.gmail.com
tags/REL_12_BETA2
Etsuro Fujita 1 month ago
parent
commit
08d2d58a2a
2 changed files with 63 additions and 41 deletions
  1. 56
    39
      contrib/postgres_fdw/postgres_fdw.c
  2. 7
    2
      contrib/postgres_fdw/postgres_fdw.h

+ 56
- 39
contrib/postgres_fdw/postgres_fdw.c View File

@@ -661,10 +661,11 @@ postgresGetForeignRelSize(PlannerInfo *root,
661 661
 	cost_qual_eval(&fpinfo->local_conds_cost, fpinfo->local_conds, root);
662 662
 
663 663
 	/*
664
-	 * Set cached relation costs to some negative value, so that we can detect
665
-	 * when they are set to some sensible costs during one (usually the first)
666
-	 * of the calls to estimate_path_cost_size().
664
+	 * Set # of retrieved rows and cached relation costs to some negative
665
+	 * value, so that we can detect when they are set to some sensible values,
666
+	 * during one (usually the first) of the calls to estimate_path_cost_size.
667 667
 	 */
668
+	fpinfo->retrieved_rows = -1;
668 669
 	fpinfo->rel_startup_cost = -1;
669 670
 	fpinfo->rel_total_cost = -1;
670 671
 
@@ -2623,7 +2624,6 @@ estimate_path_cost_size(PlannerInfo *root,
2623 2624
 	int			width;
2624 2625
 	Cost		startup_cost;
2625 2626
 	Cost		total_cost;
2626
-	Cost		cpu_per_tuple;
2627 2627
 
2628 2628
 	/* Make sure the core code has set up the relation's reltarget */
2629 2629
 	Assert(foreignrel->reltarget);
@@ -2736,26 +2736,20 @@ estimate_path_cost_size(PlannerInfo *root,
2736 2736
 		 */
2737 2737
 		Assert(param_join_conds == NIL);
2738 2738
 
2739
-		/*
2740
-		 * Use rows/width estimates made by set_baserel_size_estimates() for
2741
-		 * base foreign relations and set_joinrel_size_estimates() for join
2742
-		 * between foreign relations.
2743
-		 */
2744
-		rows = foreignrel->rows;
2745
-		width = foreignrel->reltarget->width;
2746
-
2747
-		/* Back into an estimate of the number of retrieved rows. */
2748
-		retrieved_rows = clamp_row_est(rows / fpinfo->local_conds_sel);
2749
-
2750 2739
 		/*
2751 2740
 		 * We will come here again and again with different set of pathkeys or
2752 2741
 		 * additional post-scan/join-processing steps that caller wants to
2753
-		 * cost.  We don't need to calculate the costs of the underlying scan,
2754
-		 * join, or grouping each time.  Instead, use the costs if we have
2755
-		 * cached them already.
2742
+		 * cost.  We don't need to calculate the cost/size estimates for the
2743
+		 * underlying scan, join, or grouping each time.  Instead, use those
2744
+		 * estimates if we have cached them already.
2756 2745
 		 */
2757 2746
 		if (fpinfo->rel_startup_cost >= 0 && fpinfo->rel_total_cost >= 0)
2758 2747
 		{
2748
+			Assert(fpinfo->retrieved_rows >= 1);
2749
+
2750
+			rows = fpinfo->rows;
2751
+			retrieved_rows = fpinfo->retrieved_rows;
2752
+			width = fpinfo->width;
2759 2753
 			startup_cost = fpinfo->rel_startup_cost;
2760 2754
 			run_cost = fpinfo->rel_total_cost - fpinfo->rel_startup_cost;
2761 2755
 
@@ -2785,6 +2779,10 @@ estimate_path_cost_size(PlannerInfo *root,
2785 2779
 			QualCost	remote_conds_cost;
2786 2780
 			double		nrows;
2787 2781
 
2782
+			/* Use rows/width estimates made by the core code. */
2783
+			rows = foreignrel->rows;
2784
+			width = foreignrel->reltarget->width;
2785
+
2788 2786
 			/* For join we expect inner and outer relations set */
2789 2787
 			Assert(fpinfo->innerrel && fpinfo->outerrel);
2790 2788
 
@@ -2793,7 +2791,12 @@ estimate_path_cost_size(PlannerInfo *root,
2793 2791
 
2794 2792
 			/* Estimate of number of rows in cross product */
2795 2793
 			nrows = fpinfo_i->rows * fpinfo_o->rows;
2796
-			/* Clamp retrieved rows estimate to at most size of cross product */
2794
+
2795
+			/*
2796
+			 * Back into an estimate of the number of retrieved rows.  Just in
2797
+			 * case this is nuts, clamp to at most nrow.
2798
+			 */
2799
+			retrieved_rows = clamp_row_est(rows / fpinfo->local_conds_sel);
2797 2800
 			retrieved_rows = Min(retrieved_rows, nrows);
2798 2801
 
2799 2802
 			/*
@@ -2871,9 +2874,8 @@ estimate_path_cost_size(PlannerInfo *root,
2871 2874
 
2872 2875
 			ofpinfo = (PgFdwRelationInfo *) outerrel->fdw_private;
2873 2876
 
2874
-			/* Get rows and width from input rel */
2877
+			/* Get rows from input rel */
2875 2878
 			input_rows = ofpinfo->rows;
2876
-			width = ofpinfo->width;
2877 2879
 
2878 2880
 			/* Collect statistics about aggregates for estimating costs. */
2879 2881
 			MemSet(&aggcosts, 0, sizeof(AggClauseCosts));
@@ -2920,6 +2922,9 @@ estimate_path_cost_size(PlannerInfo *root,
2920 2922
 				rows = retrieved_rows = numGroups;
2921 2923
 			}
2922 2924
 
2925
+			/* Use width estimate made by the core code. */
2926
+			width = foreignrel->reltarget->width;
2927
+
2923 2928
 			/*-----
2924 2929
 			 * Startup cost includes:
2925 2930
 			 *	  1. Startup cost for underneath input relation, adjusted for
@@ -2966,7 +2971,17 @@ estimate_path_cost_size(PlannerInfo *root,
2966 2971
 		}
2967 2972
 		else
2968 2973
 		{
2969
-			/* Clamp retrieved rows estimates to at most foreignrel->tuples. */
2974
+			Cost		cpu_per_tuple;
2975
+
2976
+			/* Use rows/width estimates made by set_baserel_size_estimates. */
2977
+			rows = foreignrel->rows;
2978
+			width = foreignrel->reltarget->width;
2979
+
2980
+			/*
2981
+			 * Back into an estimate of the number of retrieved rows.  Just in
2982
+			 * case this is nuts, clamp to at most foreignrel->tuples.
2983
+			 */
2984
+			retrieved_rows = clamp_row_est(rows / fpinfo->local_conds_sel);
2970 2985
 			retrieved_rows = Min(retrieved_rows, foreignrel->tuples);
2971 2986
 
2972 2987
 			/*
@@ -3043,18 +3058,20 @@ estimate_path_cost_size(PlannerInfo *root,
3043 3058
 	}
3044 3059
 
3045 3060
 	/*
3046
-	 * Cache the costs for scans, joins, or groupings without any
3047
-	 * parameterization, pathkeys, or additional post-scan/join-processing
3048
-	 * steps, before adding the costs for transferring data from the foreign
3049
-	 * server.  These costs are useful for costing remote joins involving this
3050
-	 * relation or costing other remote operations for this relation such as
3051
-	 * remote sorts and remote LIMIT restrictions, when the costs can not be
3052
-	 * obtained from the foreign server.  This function will be called at
3053
-	 * least once for every foreign relation without any parameterization,
3054
-	 * pathkeys, or additional post-scan/join-processing steps.
3061
+	 * Cache the retrieved rows and cost estimates for scans, joins, or
3062
+	 * groupings without any parameterization, pathkeys, or additional
3063
+	 * post-scan/join-processing steps, before adding the costs for
3064
+	 * transferring data from the foreign server.  These estimates are useful
3065
+	 * for costing remote joins involving this relation or costing other
3066
+	 * remote operations on this relation such as remote sorts and remote
3067
+	 * LIMIT restrictions, when the costs can not be obtained from the foreign
3068
+	 * server.  This function will be called at least once for every foreign
3069
+	 * relation without any parameterization, pathkeys, or additional
3070
+	 * post-scan/join-processing steps.
3055 3071
 	 */
3056 3072
 	if (pathkeys == NIL && param_join_conds == NIL && fpextra == NULL)
3057 3073
 	{
3074
+		fpinfo->retrieved_rows = retrieved_rows;
3058 3075
 		fpinfo->rel_startup_cost = startup_cost;
3059 3076
 		fpinfo->rel_total_cost = total_cost;
3060 3077
 	}
@@ -5157,10 +5174,11 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype,
5157 5174
 		fpinfo->user = NULL;
5158 5175
 
5159 5176
 	/*
5160
-	 * Set cached relation costs to some negative value, so that we can detect
5161
-	 * when they are set to some sensible costs, during one (usually the
5162
-	 * first) of the calls to estimate_path_cost_size().
5177
+	 * Set # of retrieved rows and cached relation costs to some negative
5178
+	 * value, so that we can detect when they are set to some sensible values,
5179
+	 * during one (usually the first) of the calls to estimate_path_cost_size.
5163 5180
 	 */
5181
+	fpinfo->retrieved_rows = -1;
5164 5182
 	fpinfo->rel_startup_cost = -1;
5165 5183
 	fpinfo->rel_total_cost = -1;
5166 5184
 
@@ -5708,10 +5726,11 @@ foreign_grouping_ok(PlannerInfo *root, RelOptInfo *grouped_rel,
5708 5726
 	fpinfo->pushdown_safe = true;
5709 5727
 
5710 5728
 	/*
5711
-	 * Set cached relation costs to some negative value, so that we can detect
5712
-	 * when they are set to some sensible costs, during one (usually the
5713
-	 * first) of the calls to estimate_path_cost_size().
5729
+	 * Set # of retrieved rows and cached relation costs to some negative
5730
+	 * value, so that we can detect when they are set to some sensible values,
5731
+	 * during one (usually the first) of the calls to estimate_path_cost_size.
5714 5732
 	 */
5733
+	fpinfo->retrieved_rows = -1;
5715 5734
 	fpinfo->rel_startup_cost = -1;
5716 5735
 	fpinfo->rel_total_cost = -1;
5717 5736
 
@@ -5853,8 +5872,6 @@ add_foreign_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
5853 5872
 	fpinfo->startup_cost = startup_cost;
5854 5873
 	fpinfo->total_cost = total_cost;
5855 5874
 
5856
-	grouped_rel->rows = fpinfo->rows;
5857
-
5858 5875
 	/* Create and add foreign path to the grouping relation. */
5859 5876
 	grouppath = create_foreign_upper_path(root,
5860 5877
 										  grouped_rel,

+ 7
- 2
contrib/postgres_fdw/postgres_fdw.h View File

@@ -59,12 +59,17 @@ typedef struct PgFdwRelationInfo
59 59
 	/* Selectivity of join conditions */
60 60
 	Selectivity joinclause_sel;
61 61
 
62
-	/* Estimated size and cost for a scan or join. */
62
+	/* Estimated size and cost for a scan, join, or grouping/aggregation. */
63 63
 	double		rows;
64 64
 	int			width;
65 65
 	Cost		startup_cost;
66 66
 	Cost		total_cost;
67
-	/* Costs excluding costs for transferring data from the foreign server */
67
+	/*
68
+	 * Estimated number of rows fetched from the foreign server, and costs
69
+	 * excluding costs for transferring those rows from the foreign server.
70
+	 * These are only used by estimate_path_cost_size().
71
+	 */
72
+	double		retrieved_rows;
68 73
 	Cost		rel_startup_cost;
69 74
 	Cost		rel_total_cost;
70 75
 

Loading…
Cancel
Save