@@ -46,7 +46,8 @@ static bool _bt_steppage(IndexScanDesc scan, ScanDirection dir);
4646static bool _bt_readfirstpage (IndexScanDesc scan , OffsetNumber offnum ,
4747 ScanDirection dir );
4848static bool _bt_readnextpage (IndexScanDesc scan , BlockNumber blkno ,
49- BlockNumber lastcurrblkno , ScanDirection dir );
49+ BlockNumber lastcurrblkno , ScanDirection dir ,
50+ bool seized );
5051static Buffer _bt_lock_and_validate_left (Relation rel , BlockNumber * blkno ,
5152 BlockNumber lastcurrblkno );
5253static bool _bt_endpoint (IndexScanDesc scan , ScanDirection dir );
@@ -888,7 +889,6 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
888889 ScanKey startKeys [INDEX_MAX_KEYS ];
889890 ScanKeyData notnullkeys [INDEX_MAX_KEYS ];
890891 int keysz = 0 ;
891- int i ;
892892 StrategyNumber strat_total ;
893893 BTScanPosItem * currItem ;
894894
@@ -924,33 +924,31 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
924924 {
925925 BlockNumber blkno ,
926926 lastcurrblkno ;
927- bool status ;
928927
929- status = _bt_parallel_seize (scan , & blkno , & lastcurrblkno , true);
928+ if (!_bt_parallel_seize (scan , & blkno , & lastcurrblkno , true))
929+ return false;
930930
931931 /*
932+ * Successfully seized the scan, which _bt_readfirstpage or possibly
933+ * _bt_readnextpage will release (unless the scan ends right away, in
934+ * which case we'll call _bt_parallel_done directly).
935+ *
932936 * Initialize arrays (when _bt_parallel_seize didn't already set up
933- * the next primitive index scan)
937+ * the next primitive index scan).
934938 */
935939 if (so -> numArrayKeys && !so -> needPrimScan )
936940 _bt_start_array_keys (scan , dir );
937941
938- if (!status )
939- return false;
940- else if (blkno == P_NONE )
941- {
942- _bt_parallel_done (scan );
943- return false;
944- }
945- else if (blkno != InvalidBlockNumber )
942+ Assert (blkno != P_NONE );
943+ if (blkno != InvalidBlockNumber )
946944 {
947945 Assert (!so -> needPrimScan );
948946
949947 /*
950948 * We anticipated starting another primitive scan, but some other
951949 * worker bet us to it
952950 */
953- if (!_bt_readnextpage (scan , blkno , lastcurrblkno , dir ))
951+ if (!_bt_readnextpage (scan , blkno , lastcurrblkno , dir , true ))
954952 return false;
955953 goto readcomplete ;
956954 }
@@ -1043,6 +1041,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
10431041 * We don't cast the decision in stone until we reach keys for the
10441042 * next attribute.
10451043 */
1044+ cur = so -> keyData ;
10461045 curattr = 1 ;
10471046 chosen = NULL ;
10481047 /* Also remember any scankey that implies a NOT NULL constraint */
@@ -1053,7 +1052,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
10531052 * pass to handle after-last-key processing. Actual exit from the
10541053 * loop is at one of the "break" statements below.
10551054 */
1056- for (cur = so -> keyData , i = 0 ;; cur ++ , i ++ )
1055+ for (int i = 0 ;; cur ++ , i ++ )
10571056 {
10581057 if (i >= so -> numberOfKeys || cur -> sk_attno != curattr )
10591058 {
@@ -1168,7 +1167,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
11681167 * initialized after initial-positioning scan keys are finalized.)
11691168 */
11701169 Assert (keysz <= INDEX_MAX_KEYS );
1171- for (i = 0 ; i < keysz ; i ++ )
1170+ for (int i = 0 ; i < keysz ; i ++ )
11721171 {
11731172 ScanKey cur = startKeys [i ];
11741173
@@ -2006,18 +2005,12 @@ _bt_savepostingitem(BTScanOpaque so, int itemIndex, OffsetNumber offnum,
20062005/*
20072006 * _bt_steppage() -- Step to next page containing valid data for scan
20082007 *
2008+ * Wrapper on _bt_readnextpage that performs final steps for the current page.
2009+ *
20092010 * On entry, if so->currPos.buf is valid the buffer is pinned but not locked.
20102011 * If there's no pin held, it's because _bt_drop_lock_and_maybe_pin dropped
20112012 * the pin eagerly earlier on. The scan must have so->currPos.currPage set to
20122013 * a valid block, in any case.
2013- *
2014- * This is a wrapper on _bt_readnextpage that performs final steps for the
2015- * current page. It sets up the _bt_readnextpage call using either local
2016- * state saved in so->currPos by the most recent _bt_readpage call, or using
2017- * shared parallel scan state (obtained by seizing the parallel scan here).
2018- *
2019- * Parallel scan callers that have already seized the scan should directly
2020- * call _bt_readnextpage, rather than calling here.
20212014 */
20222015static bool
20232016_bt_steppage (IndexScanDesc scan , ScanDirection dir )
@@ -2081,37 +2074,22 @@ _bt_steppage(IndexScanDesc scan, ScanDirection dir)
20812074 BTScanPosUnpinIfPinned (so -> currPos );
20822075
20832076 /* Walk to the next page with data */
2084- if (!scan -> parallel_scan )
2085- {
2086- /* Not parallel, so use local state set by the last _bt_readpage */
2087- if (ScanDirectionIsForward (dir ))
2088- blkno = so -> currPos .nextPage ;
2089- else
2090- blkno = so -> currPos .prevPage ;
2091- lastcurrblkno = so -> currPos .currPage ;
2092-
2093- /*
2094- * Cancel primitive index scans that were scheduled when the call to
2095- * _bt_readpage for currPos happened to use the opposite direction to
2096- * the one that we're stepping in now. (It's okay to leave the scan's
2097- * array keys as-is, since the next _bt_readpage will advance them.)
2098- */
2099- if (so -> currPos .dir != dir )
2100- so -> needPrimScan = false;
2101- }
2077+ if (ScanDirectionIsForward (dir ))
2078+ blkno = so -> currPos .nextPage ;
21022079 else
2103- {
2104- /*
2105- * Seize the scan to get the nextPage and currPage from shared
2106- * parallel state (saved from parallel scan's last _bt_readpage)
2107- */
2108- if (!_bt_parallel_seize (scan , & blkno , & lastcurrblkno , false))
2109- return false;
2080+ blkno = so -> currPos .prevPage ;
2081+ lastcurrblkno = so -> currPos .currPage ;
21102082
2111- Assert (!so -> needPrimScan );
2112- }
2083+ /*
2084+ * Cancel primitive index scans that were scheduled when the call to
2085+ * _bt_readpage for currPos happened to use the opposite direction to the
2086+ * one that we're stepping in now. (It's okay to leave the scan's array
2087+ * keys as-is, since the next _bt_readpage will advance them.)
2088+ */
2089+ if (so -> currPos .dir != dir )
2090+ so -> needPrimScan = false;
21132091
2114- return _bt_readnextpage (scan , blkno , lastcurrblkno , dir );
2092+ return _bt_readnextpage (scan , blkno , lastcurrblkno , dir , false );
21152093}
21162094
21172095/*
@@ -2203,14 +2181,19 @@ _bt_readfirstpage(IndexScanDesc scan, OffsetNumber offnum, ScanDirection dir)
22032181 *
22042182 * On entry, caller shouldn't hold any locks or pins on any page (we work
22052183 * directly off of blkno and lastcurrblkno instead). Parallel scan callers
2206- * must have seized the scan before calling here (blkno and lastcurrblkno
2207- * arguments should come from the seized scan).
2184+ * that seized the scan before calling here should pass seized=true; such a
2185+ * caller's blkno and lastcurrblkno arguments come from the seized scan.
2186+ * seized=false callers just pass us the blkno/lastcurrblkno taken from their
2187+ * so->currPos, which (along with so->currPos itself) can be used to end the
2188+ * scan. A seized=false caller's blkno can never be assumed to be the page
2189+ * that must be read next during a parallel scan, though. We must figure that
2190+ * part out for ourselves by seizing the scan (the correct page to read might
2191+ * already be beyond the seized=false caller's blkno during a parallel scan).
22082192 *
22092193 * On success exit, so->currPos is updated to contain data from the next
2210- * interesting page, and we return true (parallel scan callers should not use
2211- * so->currPos to determine which page to scan next, though). We hold a pin
2212- * on the buffer on success exit, except when _bt_drop_lock_and_maybe_pin
2213- * decided it was safe to eagerly drop the pin (to avoid blocking VACUUM).
2194+ * interesting page, and we return true. We hold a pin on the buffer on
2195+ * success exit, except when _bt_drop_lock_and_maybe_pin decided it was safe
2196+ * to eagerly drop the pin (to avoid blocking VACUUM).
22142197 *
22152198 * If there are no more matching records in the given direction, we drop all
22162199 * locks and pins, invalidate so->currPos, and return false.
@@ -2220,12 +2203,12 @@ _bt_readfirstpage(IndexScanDesc scan, OffsetNumber offnum, ScanDirection dir)
22202203 */
22212204static bool
22222205_bt_readnextpage (IndexScanDesc scan , BlockNumber blkno ,
2223- BlockNumber lastcurrblkno , ScanDirection dir )
2206+ BlockNumber lastcurrblkno , ScanDirection dir , bool seized )
22242207{
22252208 Relation rel = scan -> indexRelation ;
22262209 BTScanOpaque so = (BTScanOpaque ) scan -> opaque ;
22272210
2228- Assert (so -> currPos .currPage == lastcurrblkno || scan -> parallel_scan != NULL );
2211+ Assert (so -> currPos .currPage == lastcurrblkno || seized );
22292212 Assert (!BTScanPosIsPinned (so -> currPos ));
22302213
22312214 /*
@@ -2254,6 +2237,15 @@ _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno,
22542237
22552238 Assert (!so -> needPrimScan );
22562239
2240+ /* parallel scan must never actually visit so->currPos blkno */
2241+ if (!seized && scan -> parallel_scan != NULL &&
2242+ !_bt_parallel_seize (scan , & blkno , & lastcurrblkno , false))
2243+ {
2244+ /* whole scan is now done (or another primitive scan required) */
2245+ BTScanPosInvalidate (so -> currPos );
2246+ return false;
2247+ }
2248+
22572249 if (ScanDirectionIsForward (dir ))
22582250 {
22592251 /* read blkno, but check for interrupts first */
@@ -2308,14 +2300,7 @@ _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno,
23082300
23092301 /* no matching tuples on this page */
23102302 _bt_relbuf (rel , so -> currPos .buf );
2311-
2312- /* parallel scan seizes another page (won't use so->currPos blkno) */
2313- if (scan -> parallel_scan != NULL &&
2314- !_bt_parallel_seize (scan , & blkno , & lastcurrblkno , false))
2315- {
2316- BTScanPosInvalidate (so -> currPos );
2317- return false;
2318- }
2303+ seized = false; /* released by _bt_readpage (or by us) */
23192304 }
23202305
23212306 /*
0 commit comments