@@ -244,6 +244,7 @@ var_eq_const(VariableStatData *vardata, Oid operator,
244244 bool varonleft )
245245{
246246 double selec ;
247+ bool isdefault ;
247248
248249 /*
249250 * If the constant is NULL, assume operator is strict and return zero, ie,
@@ -344,7 +345,7 @@ var_eq_const(VariableStatData *vardata, Oid operator,
344345 * all the not-common values share this remaining fraction
345346 * equally, so we divide by the number of other distinct values.
346347 */
347- otherdistinct = get_variable_numdistinct (vardata ) - nnumbers ;
348+ otherdistinct = get_variable_numdistinct (vardata , & isdefault ) - nnumbers ;
348349 if (otherdistinct > 1 )
349350 selec /= otherdistinct ;
350351
@@ -366,7 +367,7 @@ var_eq_const(VariableStatData *vardata, Oid operator,
366367 * of distinct values and assuming they are equally common. (The guess
367368 * is unlikely to be very good, but we do know a few special cases.)
368369 */
369- selec = 1.0 / get_variable_numdistinct (vardata );
370+ selec = 1.0 / get_variable_numdistinct (vardata , & isdefault );
370371 }
371372
372373 /* result should be in range, but make sure... */
@@ -384,6 +385,7 @@ var_eq_non_const(VariableStatData *vardata, Oid operator,
384385 bool varonleft )
385386{
386387 double selec ;
388+ bool isdefault ;
387389
388390 /*
389391 * If we matched the var to a unique index, assume there is exactly one
@@ -414,7 +416,7 @@ var_eq_non_const(VariableStatData *vardata, Oid operator,
414416 * idea?)
415417 */
416418 selec = 1.0 - stats -> stanullfrac ;
417- ndistinct = get_variable_numdistinct (vardata );
419+ ndistinct = get_variable_numdistinct (vardata , & isdefault );
418420 if (ndistinct > 1 )
419421 selec /= ndistinct ;
420422
@@ -441,7 +443,7 @@ var_eq_non_const(VariableStatData *vardata, Oid operator,
441443 * of distinct values and assuming they are equally common. (The guess
442444 * is unlikely to be very good, but we do know a few special cases.)
443445 */
444- selec = 1.0 / get_variable_numdistinct (vardata );
446+ selec = 1.0 / get_variable_numdistinct (vardata , & isdefault );
445447 }
446448
447449 /* result should be in range, but make sure... */
@@ -2071,6 +2073,8 @@ eqjoinsel_inner(Oid operator,
20712073 double selec ;
20722074 double nd1 ;
20732075 double nd2 ;
2076+ bool isdefault1 ;
2077+ bool isdefault2 ;
20742078 Form_pg_statistic stats1 = NULL ;
20752079 Form_pg_statistic stats2 = NULL ;
20762080 bool have_mcvs1 = false;
@@ -2084,8 +2088,8 @@ eqjoinsel_inner(Oid operator,
20842088 float4 * numbers2 = NULL ;
20852089 int nnumbers2 = 0 ;
20862090
2087- nd1 = get_variable_numdistinct (vardata1 );
2088- nd2 = get_variable_numdistinct (vardata2 );
2091+ nd1 = get_variable_numdistinct (vardata1 , & isdefault1 );
2092+ nd2 = get_variable_numdistinct (vardata2 , & isdefault2 );
20892093
20902094 if (HeapTupleIsValid (vardata1 -> statsTuple ))
20912095 {
@@ -2296,6 +2300,8 @@ eqjoinsel_semi(Oid operator,
22962300 double selec ;
22972301 double nd1 ;
22982302 double nd2 ;
2303+ bool isdefault1 ;
2304+ bool isdefault2 ;
22992305 Form_pg_statistic stats1 = NULL ;
23002306 bool have_mcvs1 = false;
23012307 Datum * values1 = NULL ;
@@ -2308,8 +2314,8 @@ eqjoinsel_semi(Oid operator,
23082314 float4 * numbers2 = NULL ;
23092315 int nnumbers2 = 0 ;
23102316
2311- nd1 = get_variable_numdistinct (vardata1 );
2312- nd2 = get_variable_numdistinct (vardata2 );
2317+ nd1 = get_variable_numdistinct (vardata1 , & isdefault1 );
2318+ nd2 = get_variable_numdistinct (vardata2 , & isdefault2 );
23132319
23142320 /*
23152321 * We clamp nd2 to be not more than what we estimate the inner relation's
@@ -2441,7 +2447,7 @@ eqjoinsel_semi(Oid operator,
24412447 * nd2 is default, punt and assume half of the uncertain rows have
24422448 * join partners.
24432449 */
2444- if (nd1 != DEFAULT_NUM_DISTINCT && nd2 != DEFAULT_NUM_DISTINCT )
2450+ if (! isdefault1 && ! isdefault2 )
24452451 {
24462452 nd1 -= nmatches ;
24472453 nd2 -= nmatches ;
@@ -2464,7 +2470,7 @@ eqjoinsel_semi(Oid operator,
24642470 */
24652471 double nullfrac1 = stats1 ? stats1 -> stanullfrac : 0.0 ;
24662472
2467- if (nd1 != DEFAULT_NUM_DISTINCT && nd2 != DEFAULT_NUM_DISTINCT )
2473+ if (! isdefault1 && ! isdefault2 )
24682474 {
24692475 if (nd1 <= nd2 || nd2 < 0 )
24702476 selec = 1.0 - nullfrac1 ;
@@ -2955,9 +2961,10 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
29552961{
29562962 GroupVarInfo * varinfo ;
29572963 double ndistinct ;
2964+ bool isdefault ;
29582965 ListCell * lc ;
29592966
2960- ndistinct = get_variable_numdistinct (vardata );
2967+ ndistinct = get_variable_numdistinct (vardata , & isdefault );
29612968
29622969 /* cannot use foreach here because of possible list_delete */
29632970 lc = list_head (varinfos );
@@ -3292,14 +3299,23 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
32923299 stanullfrac ,
32933300 mcvfreq ,
32943301 avgfreq ;
3302+ bool isdefault ;
32953303 float4 * numbers ;
32963304 int nnumbers ;
32973305
32983306 examine_variable (root , hashkey , 0 , & vardata );
32993307
3300- /* Get number of distinct values and fraction that are null */
3301- ndistinct = get_variable_numdistinct (& vardata );
3308+ /* Get number of distinct values */
3309+ ndistinct = get_variable_numdistinct (& vardata , & isdefault );
33023310
3311+ /* If ndistinct isn't real, punt and return 0.1, per comments above */
3312+ if (isdefault )
3313+ {
3314+ ReleaseVariableStats (vardata );
3315+ return (Selectivity ) 0.1 ;
3316+ }
3317+
3318+ /* Get fraction that are null */
33033319 if (HeapTupleIsValid (vardata .statsTuple ))
33043320 {
33053321 Form_pg_statistic stats ;
@@ -3308,19 +3324,7 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
33083324 stanullfrac = stats -> stanullfrac ;
33093325 }
33103326 else
3311- {
3312- /*
3313- * Believe a default ndistinct only if it came from stats. Otherwise
3314- * punt and return 0.1, per comments above.
3315- */
3316- if (ndistinct == DEFAULT_NUM_DISTINCT )
3317- {
3318- ReleaseVariableStats (vardata );
3319- return (Selectivity ) 0.1 ;
3320- }
3321-
33223327 stanullfrac = 0.0 ;
3323- }
33243328
33253329 /* Compute avg freq of all distinct data values in raw relation */
33263330 avgfreq = (1.0 - stanullfrac ) / ndistinct ;
@@ -4414,16 +4418,20 @@ examine_simple_variable(PlannerInfo *root, Var *var,
44144418 * Estimate the number of distinct values of a variable.
44154419 *
44164420 * vardata: results of examine_variable
4421+ * *isdefault: set to TRUE if the result is a default rather than based on
4422+ * anything meaningful.
44174423 *
44184424 * NB: be careful to produce an integral result, since callers may compare
44194425 * the result to exact integer counts.
44204426 */
44214427double
4422- get_variable_numdistinct (VariableStatData * vardata )
4428+ get_variable_numdistinct (VariableStatData * vardata , bool * isdefault )
44234429{
44244430 double stadistinct ;
44254431 double ntuples ;
44264432
4433+ * isdefault = false;
4434+
44274435 /*
44284436 * Determine the stadistinct value to use. There are cases where we can
44294437 * get an estimate even without a pg_statistic entry, or can get a better
@@ -4496,10 +4504,16 @@ get_variable_numdistinct(VariableStatData *vardata)
44964504 * Otherwise we need to get the relation size; punt if not available.
44974505 */
44984506 if (vardata -> rel == NULL )
4507+ {
4508+ * isdefault = true;
44994509 return DEFAULT_NUM_DISTINCT ;
4510+ }
45004511 ntuples = vardata -> rel -> tuples ;
45014512 if (ntuples <= 0.0 )
4513+ {
4514+ * isdefault = true;
45024515 return DEFAULT_NUM_DISTINCT ;
4516+ }
45034517
45044518 /*
45054519 * If we had a relative estimate, use that.
@@ -4509,11 +4523,13 @@ get_variable_numdistinct(VariableStatData *vardata)
45094523
45104524 /*
45114525 * With no data, estimate ndistinct = ntuples if the table is small, else
4512- * use default.
4526+ * use default. We use DEFAULT_NUM_DISTINCT as the cutoff for "small"
4527+ * so that the behavior isn't discontinuous.
45134528 */
45144529 if (ntuples < DEFAULT_NUM_DISTINCT )
45154530 return ntuples ;
45164531
4532+ * isdefault = true;
45174533 return DEFAULT_NUM_DISTINCT ;
45184534}
45194535
0 commit comments