@@ -6326,6 +6326,8 @@ heap_inplace_update_and_unlock(Relation relation,
63266326 HeapTupleHeader htup = oldtup -> t_data ;
63276327 uint32 oldlen ;
63286328 uint32 newlen ;
6329+ char * dst ;
6330+ char * src ;
63296331 int nmsgs = 0 ;
63306332 SharedInvalidationMessage * invalMessages = NULL ;
63316333 bool RelcacheInitFileInval = false;
@@ -6336,6 +6338,9 @@ heap_inplace_update_and_unlock(Relation relation,
63366338 if (oldlen != newlen || htup -> t_hoff != tuple -> t_data -> t_hoff )
63376339 elog (ERROR , "wrong tuple length" );
63386340
6341+ dst = (char * ) htup + htup -> t_hoff ;
6342+ src = (char * ) tuple -> t_data + tuple -> t_data -> t_hoff ;
6343+
63396344 /*
63406345 * Construct shared cache inval if necessary. Note that because we only
63416346 * pass the new version of the tuple, this mustn't be used for any
@@ -6359,15 +6364,15 @@ heap_inplace_update_and_unlock(Relation relation,
63596364 */
63606365 PreInplace_Inval ();
63616366
6362- /* NO EREPORT(ERROR) from here till changes are logged */
6363- START_CRIT_SECTION ();
6364-
6365- memcpy ((char * ) htup + htup -> t_hoff ,
6366- (char * ) tuple -> t_data + tuple -> t_data -> t_hoff ,
6367- newlen );
6368-
63696367 /*----------
6370- * XXX A crash here can allow datfrozenxid() to get ahead of relfrozenxid:
6368+ * NO EREPORT(ERROR) from here till changes are complete
6369+ *
6370+ * Our buffer lock won't stop a reader having already pinned and checked
6371+ * visibility for this tuple. Hence, we write WAL first, then mutate the
6372+ * buffer. Like in MarkBufferDirtyHint() or RecordTransactionCommit(),
6373+ * checkpoint delay makes that acceptable. With the usual order of
6374+ * changes, a crash after memcpy() and before XLogInsert() could allow
6375+ * datfrozenxid to overtake relfrozenxid:
63716376 *
63726377 * ["D" is a VACUUM (ONLY_DATABASE_STATS)]
63736378 * ["R" is a VACUUM tbl]
@@ -6377,14 +6382,28 @@ heap_inplace_update_and_unlock(Relation relation,
63776382 * D: raise pg_database.datfrozenxid, XLogInsert(), finish
63786383 * [crash]
63796384 * [recovery restores datfrozenxid w/o relfrozenxid]
6385+ *
6386+ * Like in MarkBufferDirtyHint() subroutine XLogSaveBufferForHint(), copy
6387+ * the buffer to the stack before logging. Here, that facilitates a FPI
6388+ * of the post-mutation block before we accept other sessions seeing it.
63806389 */
6381-
6382- MarkBufferDirty (buffer );
6390+ Assert ((MyProc -> delayChkptFlags & DELAY_CHKPT_START ) == 0 );
6391+ START_CRIT_SECTION ();
6392+ MyProc -> delayChkptFlags |= DELAY_CHKPT_START ;
63836393
63846394 /* XLOG stuff */
63856395 if (RelationNeedsWAL (relation ))
63866396 {
63876397 xl_heap_inplace xlrec ;
6398+ PGAlignedBlock copied_buffer ;
6399+ char * origdata = (char * ) BufferGetBlock (buffer );
6400+ Page page = BufferGetPage (buffer );
6401+ uint16 lower = ((PageHeader ) page )-> pd_lower ;
6402+ uint16 upper = ((PageHeader ) page )-> pd_upper ;
6403+ uintptr_t dst_offset_in_block ;
6404+ RelFileLocator rlocator ;
6405+ ForkNumber forkno ;
6406+ BlockNumber blkno ;
63886407 XLogRecPtr recptr ;
63896408
63906409 xlrec .offnum = ItemPointerGetOffsetNumber (& tuple -> t_self );
@@ -6399,16 +6418,28 @@ heap_inplace_update_and_unlock(Relation relation,
63996418 XLogRegisterData ((char * ) invalMessages ,
64006419 nmsgs * sizeof (SharedInvalidationMessage ));
64016420
6402- XLogRegisterBuffer (0 , buffer , REGBUF_STANDARD );
6403- XLogRegisterBufData (0 , (char * ) htup + htup -> t_hoff , newlen );
6421+ /* register block matching what buffer will look like after changes */
6422+ memcpy (copied_buffer .data , origdata , lower );
6423+ memcpy (copied_buffer .data + upper , origdata + upper , BLCKSZ - upper );
6424+ dst_offset_in_block = dst - origdata ;
6425+ memcpy (copied_buffer .data + dst_offset_in_block , src , newlen );
6426+ BufferGetTag (buffer , & rlocator , & forkno , & blkno );
6427+ Assert (forkno == MAIN_FORKNUM );
6428+ XLogRegisterBlock (0 , & rlocator , forkno , blkno , copied_buffer .data ,
6429+ REGBUF_STANDARD );
6430+ XLogRegisterBufData (0 , src , newlen );
64046431
64056432 /* inplace updates aren't decoded atm, don't log the origin */
64066433
64076434 recptr = XLogInsert (RM_HEAP_ID , XLOG_HEAP_INPLACE );
64086435
6409- PageSetLSN (BufferGetPage ( buffer ) , recptr );
6436+ PageSetLSN (page , recptr );
64106437 }
64116438
6439+ memcpy (dst , src , newlen );
6440+
6441+ MarkBufferDirty (buffer );
6442+
64126443 LockBuffer (buffer , BUFFER_LOCK_UNLOCK );
64136444
64146445 /*
@@ -6421,6 +6452,7 @@ heap_inplace_update_and_unlock(Relation relation,
64216452 */
64226453 AtInplace_Inval ();
64236454
6455+ MyProc -> delayChkptFlags &= ~DELAY_CHKPT_START ;
64246456 END_CRIT_SECTION ();
64256457 UnlockTuple (relation , & tuple -> t_self , InplaceUpdateTupleLock );
64266458
0 commit comments