@@ -157,6 +157,9 @@ static void bt_check_every_level(Relation rel, Relation heaprel,
157157 bool rootdescend , bool checkunique );
158158static BtreeLevel bt_check_level_from_leftmost (BtreeCheckState * state ,
159159 BtreeLevel level );
160+ static bool bt_leftmost_ignoring_half_dead (BtreeCheckState * state ,
161+ BlockNumber start ,
162+ BTPageOpaque start_opaque );
160163static void bt_recheck_sibling_links (BtreeCheckState * state ,
161164 BlockNumber btpo_prev_from_target ,
162165 BlockNumber leftcurrent );
@@ -826,7 +829,7 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level)
826829 */
827830 if (state -> readonly )
828831 {
829- if (!P_LEFTMOST ( opaque ))
832+ if (!bt_leftmost_ignoring_half_dead ( state , current , opaque ))
830833 ereport (ERROR ,
831834 (errcode (ERRCODE_INDEX_CORRUPTED ),
832835 errmsg ("block %u is not leftmost in index \"%s\"" ,
@@ -880,8 +883,16 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level)
880883 */
881884 }
882885
883- /* Sibling links should be in mutual agreement */
884- if (opaque -> btpo_prev != leftcurrent )
886+ /*
887+ * Sibling links should be in mutual agreement. There arises
888+ * leftcurrent == P_NONE && btpo_prev != P_NONE when the left sibling
889+ * of the parent's low-key downlink is half-dead. (A half-dead page
890+ * has no downlink from its parent.) Under heavyweight locking, the
891+ * last bt_leftmost_ignoring_half_dead() validated this btpo_prev.
892+ * Without heavyweight locking, validation of the P_NONE case remains
893+ * unimplemented.
894+ */
895+ if (opaque -> btpo_prev != leftcurrent && leftcurrent != P_NONE )
885896 bt_recheck_sibling_links (state , opaque -> btpo_prev , leftcurrent );
886897
887898 /* Check level */
@@ -1117,6 +1128,66 @@ bt_entry_unique_check(BtreeCheckState *state, IndexTuple itup,
11171128 }
11181129}
11191130
1131+ /*
1132+ * Like P_LEFTMOST(start_opaque), but accept an arbitrarily-long chain of
1133+ * half-dead, sibling-linked pages to the left. If a half-dead page appears
1134+ * under state->readonly, the database exited recovery between the first-stage
1135+ * and second-stage WAL records of a deletion.
1136+ */
1137+ static bool
1138+ bt_leftmost_ignoring_half_dead (BtreeCheckState * state ,
1139+ BlockNumber start ,
1140+ BTPageOpaque start_opaque )
1141+ {
1142+ BlockNumber reached = start_opaque -> btpo_prev ,
1143+ reached_from = start ;
1144+ bool all_half_dead = true;
1145+
1146+ /*
1147+ * To handle the !readonly case, we'd need to accept BTP_DELETED pages and
1148+ * potentially observe nbtree/README "Page deletion and backwards scans".
1149+ */
1150+ Assert (state -> readonly );
1151+
1152+ while (reached != P_NONE && all_half_dead )
1153+ {
1154+ Page page = palloc_btree_page (state , reached );
1155+ BTPageOpaque reached_opaque = BTPageGetOpaque (page );
1156+
1157+ CHECK_FOR_INTERRUPTS ();
1158+
1159+ /*
1160+ * Try to detect btpo_prev circular links. _bt_unlink_halfdead_page()
1161+ * writes that side-links will continue to point to the siblings.
1162+ * Check btpo_next for that property.
1163+ */
1164+ all_half_dead = P_ISHALFDEAD (reached_opaque ) &&
1165+ reached != start &&
1166+ reached != reached_from &&
1167+ reached_opaque -> btpo_next == reached_from ;
1168+ if (all_half_dead )
1169+ {
1170+ XLogRecPtr pagelsn = PageGetLSN (page );
1171+
1172+ /* pagelsn should point to an XLOG_BTREE_MARK_PAGE_HALFDEAD */
1173+ ereport (DEBUG1 ,
1174+ (errcode (ERRCODE_NO_DATA ),
1175+ errmsg_internal ("harmless interrupted page deletion detected in index \"%s\"" ,
1176+ RelationGetRelationName (state -> rel )),
1177+ errdetail_internal ("Block=%u right block=%u page lsn=%X/%X." ,
1178+ reached , reached_from ,
1179+ LSN_FORMAT_ARGS (pagelsn ))));
1180+
1181+ reached_from = reached ;
1182+ reached = reached_opaque -> btpo_prev ;
1183+ }
1184+
1185+ pfree (page );
1186+ }
1187+
1188+ return all_half_dead ;
1189+ }
1190+
11201191/*
11211192 * Raise an error when target page's left link does not point back to the
11221193 * previous target page, called leftcurrent here. The leftcurrent page's
@@ -1157,6 +1228,9 @@ bt_recheck_sibling_links(BtreeCheckState *state,
11571228 BlockNumber btpo_prev_from_target ,
11581229 BlockNumber leftcurrent )
11591230{
1231+ /* passing metapage to BTPageGetOpaque() would give irrelevant findings */
1232+ Assert (leftcurrent != P_NONE );
1233+
11601234 if (!state -> readonly )
11611235 {
11621236 Buffer lbuf ;
@@ -2235,7 +2309,8 @@ bt_child_highkey_check(BtreeCheckState *state,
22352309 opaque = BTPageGetOpaque (page );
22362310
22372311 /* The first page we visit at the level should be leftmost */
2238- if (first && !BlockNumberIsValid (state -> prevrightlink ) && !P_LEFTMOST (opaque ))
2312+ if (first && !BlockNumberIsValid (state -> prevrightlink ) &&
2313+ !bt_leftmost_ignoring_half_dead (state , blkno , opaque ))
22392314 ereport (ERROR ,
22402315 (errcode (ERRCODE_INDEX_CORRUPTED ),
22412316 errmsg ("the first child of leftmost target page is not leftmost of its level in index \"%s\"" ,
0 commit comments