@@ -162,9 +162,12 @@ static StringInfoData output_message;
162162static StringInfoData reply_message ;
163163static StringInfoData tmpbuf ;
164164
165+ /* Timestamp of last ProcessRepliesIfAny(). */
166+ static TimestampTz last_processing = 0 ;
167+
165168/*
166- * Timestamp of the last receipt of the reply from the standby. Set to 0 if
167- * wal_sender_timeout doesn't need to be active.
169+ * Timestamp of last ProcessRepliesIfAny() that saw a reply from the
170+ * standby. Set to 0 if wal_sender_timeout doesn't need to be active.
168171 */
169172static TimestampTz last_reply_timestamp = 0 ;
170173
@@ -241,8 +244,8 @@ static void ProcessStandbyReplyMessage(void);
241244static void ProcessStandbyHSFeedbackMessage (void );
242245static void ProcessRepliesIfAny (void );
243246static void WalSndKeepalive (bool requestReply );
244- static void WalSndKeepaliveIfNecessary (TimestampTz now );
245- static void WalSndCheckTimeOut (TimestampTz now );
247+ static void WalSndKeepaliveIfNecessary (void );
248+ static void WalSndCheckTimeOut (void );
246249static long WalSndComputeSleeptime (TimestampTz now );
247250static void WalSndPrepareWrite (LogicalDecodingContext * ctx , XLogRecPtr lsn , TransactionId xid , bool last_write );
248251static void WalSndWriteData (LogicalDecodingContext * ctx , XLogRecPtr lsn , TransactionId xid , bool last_write );
@@ -1195,18 +1198,16 @@ WalSndWriteData(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,
11951198 /* Check for input from the client */
11961199 ProcessRepliesIfAny ();
11971200
1198- now = GetCurrentTimestamp ();
1199-
12001201 /* die if timeout was reached */
1201- WalSndCheckTimeOut (now );
1202+ WalSndCheckTimeOut ();
12021203
12031204 /* Send keepalive if the time has come */
1204- WalSndKeepaliveIfNecessary (now );
1205+ WalSndKeepaliveIfNecessary ();
12051206
12061207 if (!pq_is_send_pending ())
12071208 break ;
12081209
1209- sleeptime = WalSndComputeSleeptime (now );
1210+ sleeptime = WalSndComputeSleeptime (GetCurrentTimestamp () );
12101211
12111212 wakeEvents = WL_LATCH_SET | WL_POSTMASTER_DEATH |
12121213 WL_SOCKET_WRITEABLE | WL_SOCKET_READABLE | WL_TIMEOUT ;
@@ -1301,7 +1302,6 @@ WalSndWaitForWal(XLogRecPtr loc)
13011302 for (;;)
13021303 {
13031304 long sleeptime ;
1304- TimestampTz now ;
13051305
13061306 /*
13071307 * Emergency bailout if postmaster has died. This is to avoid the
@@ -1386,13 +1386,11 @@ WalSndWaitForWal(XLogRecPtr loc)
13861386 !pq_is_send_pending ())
13871387 break ;
13881388
1389- now = GetCurrentTimestamp ();
1390-
13911389 /* die if timeout was reached */
1392- WalSndCheckTimeOut (now );
1390+ WalSndCheckTimeOut ();
13931391
13941392 /* Send keepalive if the time has come */
1395- WalSndKeepaliveIfNecessary (now );
1393+ WalSndKeepaliveIfNecessary ();
13961394
13971395 /*
13981396 * Sleep until something happens or we time out. Also wait for the
@@ -1401,7 +1399,7 @@ WalSndWaitForWal(XLogRecPtr loc)
14011399 * new WAL to be generated. (But if we have nothing to send, we don't
14021400 * want to wake on socket-writable.)
14031401 */
1404- sleeptime = WalSndComputeSleeptime (now );
1402+ sleeptime = WalSndComputeSleeptime (GetCurrentTimestamp () );
14051403
14061404 wakeEvents = WL_LATCH_SET | WL_POSTMASTER_DEATH |
14071405 WL_SOCKET_READABLE | WL_TIMEOUT ;
@@ -1598,6 +1596,8 @@ ProcessRepliesIfAny(void)
15981596 int r ;
15991597 bool received = false;
16001598
1599+ last_processing = GetCurrentTimestamp ();
1600+
16011601 for (;;)
16021602 {
16031603 pq_startmsgread ();
@@ -1685,7 +1685,7 @@ ProcessRepliesIfAny(void)
16851685 */
16861686 if (received )
16871687 {
1688- last_reply_timestamp = GetCurrentTimestamp () ;
1688+ last_reply_timestamp = last_processing ;
16891689 waiting_for_ping_response = false;
16901690 }
16911691}
@@ -2064,10 +2064,18 @@ WalSndComputeSleeptime(TimestampTz now)
20642064
20652065/*
20662066 * Check whether there have been responses by the client within
2067- * wal_sender_timeout and shutdown if not.
2067+ * wal_sender_timeout and shutdown if not. Using last_processing as the
2068+ * reference point avoids counting server-side stalls against the client.
2069+ * However, a long server-side stall can make WalSndKeepaliveIfNecessary()
2070+ * postdate last_processing by more than wal_sender_timeout. If that happens,
2071+ * the client must reply almost immediately to avoid a timeout. This rarely
2072+ * affects the default configuration, under which clients spontaneously send a
2073+ * message every standby_message_timeout = wal_sender_timeout/6 = 10s. We
2074+ * could eliminate that problem by recognizing timeout expiration at
2075+ * wal_sender_timeout/2 after the keepalive.
20682076 */
20692077static void
2070- WalSndCheckTimeOut (TimestampTz now )
2078+ WalSndCheckTimeOut (void )
20712079{
20722080 TimestampTz timeout ;
20732081
@@ -2078,7 +2086,7 @@ WalSndCheckTimeOut(TimestampTz now)
20782086 timeout = TimestampTzPlusMilliseconds (last_reply_timestamp ,
20792087 wal_sender_timeout );
20802088
2081- if (wal_sender_timeout > 0 && now >= timeout )
2089+ if (wal_sender_timeout > 0 && last_processing >= timeout )
20822090 {
20832091 /*
20842092 * Since typically expiration of replication timeout means
@@ -2109,8 +2117,6 @@ WalSndLoop(WalSndSendDataCallback send_data)
21092117 */
21102118 for (;;)
21112119 {
2112- TimestampTz now ;
2113-
21142120 /*
21152121 * Emergency bailout if postmaster has died. This is to avoid the
21162122 * necessity for manual cleanup of all postmaster children.
@@ -2188,13 +2194,11 @@ WalSndLoop(WalSndSendDataCallback send_data)
21882194 WalSndDone (send_data );
21892195 }
21902196
2191- now = GetCurrentTimestamp ();
2192-
21932197 /* Check for replication timeout. */
2194- WalSndCheckTimeOut (now );
2198+ WalSndCheckTimeOut ();
21952199
21962200 /* Send keepalive if the time has come */
2197- WalSndKeepaliveIfNecessary (now );
2201+ WalSndKeepaliveIfNecessary ();
21982202
21992203 /*
22002204 * We don't block if not caught up, unless there is unsent data
@@ -2212,7 +2216,11 @@ WalSndLoop(WalSndSendDataCallback send_data)
22122216 wakeEvents = WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_TIMEOUT |
22132217 WL_SOCKET_READABLE ;
22142218
2215- sleeptime = WalSndComputeSleeptime (now );
2219+ /*
2220+ * Use fresh timestamp, not last_processed, to reduce the chance
2221+ * of reaching wal_sender_timeout before sending a keepalive.
2222+ */
2223+ sleeptime = WalSndComputeSleeptime (GetCurrentTimestamp ());
22162224
22172225 if (pq_is_send_pending ())
22182226 wakeEvents |= WL_SOCKET_WRITEABLE ;
@@ -3364,7 +3372,7 @@ WalSndKeepalive(bool requestReply)
33643372 * Send keepalive message if too much time has elapsed.
33653373 */
33663374static void
3367- WalSndKeepaliveIfNecessary (TimestampTz now )
3375+ WalSndKeepaliveIfNecessary (void )
33683376{
33693377 TimestampTz ping_time ;
33703378
@@ -3385,7 +3393,7 @@ WalSndKeepaliveIfNecessary(TimestampTz now)
33853393 */
33863394 ping_time = TimestampTzPlusMilliseconds (last_reply_timestamp ,
33873395 wal_sender_timeout / 2 );
3388- if (now >= ping_time )
3396+ if (last_processing >= ping_time )
33893397 {
33903398 WalSndKeepalive (true);
33913399 waiting_for_ping_response = true;
0 commit comments