Fix performance regression with flush of pending fixed-numbered stats
authorMichael Paquier <michael@paquier.xyz>
Sun, 27 Jul 2025 23:15:11 +0000 (08:15 +0900)
committerMichael Paquier <michael@paquier.xyz>
Sun, 27 Jul 2025 23:15:11 +0000 (08:15 +0900)
The callback added in fc415edf8ca8 used to check if there is any pending
data to flush for fixed-numbered statistics, done by looping across all
the builtin and custom stats kinds with a call to have_fixed_pending_cb,
is proving to able to show in workloads that do not report any stats
(read-only, no function calls, no WAL, no IO, etc).  The code used in
v17 was cheaper than that what HEAD has introduced, relying on three
boolean checks for WAL, SLRU and IO stats.

This commit switches the code to use a more efficient approach than
fc415edf8ca8, with a single boolean flag that can be switched to "true"
by any fixed-numbered stats kinds to force pgstat_report_stat() to go
through one round of reports.  The flag is reset by pgstat_report_stat()
once a full round of reports is done.  The flag being false means that
fixed-numbered stats kinds saw no activity, and that there is no pending
data to flush.

ac000fca743e took one step in improving the performance by reducing the
number of stats kinds that the backend can hold.  This commit takes a
more drastic step by bringing back the code efficiency to what it was
before v18 with a cheap check at the beginning of pgstat_report_stat()
for its fast-exit path.

The callback have_static_pending_cb is removed as an effect of all that.

Reported-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Bertrand Drouvot <bertranddrouvot.pg@gmail.com>
Discussion: http://postgr.es/m/eb224uegsga2hgq7dfq3ps5cduhpqej7ir2hjxzzozjthrekx5@dysei6buqthe
Backpatch-through: 18

src/backend/access/transam/xlog.c
src/backend/utils/activity/pgstat.c
src/backend/utils/activity/pgstat_backend.c
src/backend/utils/activity/pgstat_io.c
src/backend/utils/activity/pgstat_slru.c
src/backend/utils/activity/pgstat_wal.c
src/include/utils/pgstat_internal.h

index eefffc4277a1a9c61a58295068a31c415ef29867..b0891998b243f4524f63fabbcc45835a04cae639 100644 (file)
@@ -96,6 +96,7 @@
 #include "utils/guc_hooks.h"
 #include "utils/guc_tables.h"
 #include "utils/injection_point.h"
+#include "utils/pgstat_internal.h"
 #include "utils/ps_status.h"
 #include "utils/relmapper.h"
 #include "utils/snapmgr.h"
@@ -1091,6 +1092,9 @@ XLogInsertRecord(XLogRecData *rdata,
        pgWalUsage.wal_bytes += rechdr->xl_tot_len;
        pgWalUsage.wal_records++;
        pgWalUsage.wal_fpi += num_fpi;
+
+       /* Required for the flush of pending stats WAL data */
+       pgstat_report_fixed = true;
    }
 
    return EndPos;
@@ -2108,6 +2112,12 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
                    LWLockRelease(WALWriteLock);
                    pgWalUsage.wal_buffers_full++;
                    TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
+
+                   /*
+                    * Required for the flush of pending stats WAL data, per
+                    * update of pgWalUsage.
+                    */
+                   pgstat_report_fixed = true;
                }
            }
        }
index 8b57845e8709f8b478b43017dc09a40dd5b5199e..6bc91ce0daddac461bbee3c456591e04b652fcd0 100644 (file)
@@ -212,6 +212,11 @@ int            pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_CACHE;
 
 PgStat_LocalState pgStatLocal;
 
+/*
+ * Track pending reports for fixed-numbered stats, used by
+ * pgstat_report_stat().
+ */
+bool       pgstat_report_fixed = false;
 
 /* ----------
  * Local data
@@ -370,7 +375,6 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
        .shared_data_off = offsetof(PgStatShared_Backend, stats),
        .shared_data_len = sizeof(((PgStatShared_Backend *) 0)->stats),
 
-       .have_static_pending_cb = pgstat_backend_have_pending_cb,
        .flush_static_cb = pgstat_backend_flush_cb,
        .reset_timestamp_cb = pgstat_backend_reset_timestamp_cb,
    },
@@ -437,7 +441,6 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
        .shared_data_len = sizeof(((PgStatShared_IO *) 0)->stats),
 
        .flush_static_cb = pgstat_io_flush_cb,
-       .have_static_pending_cb = pgstat_io_have_pending_cb,
        .init_shmem_cb = pgstat_io_init_shmem_cb,
        .reset_all_cb = pgstat_io_reset_all_cb,
        .snapshot_cb = pgstat_io_snapshot_cb,
@@ -455,7 +458,6 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
        .shared_data_len = sizeof(((PgStatShared_SLRU *) 0)->stats),
 
        .flush_static_cb = pgstat_slru_flush_cb,
-       .have_static_pending_cb = pgstat_slru_have_pending_cb,
        .init_shmem_cb = pgstat_slru_init_shmem_cb,
        .reset_all_cb = pgstat_slru_reset_all_cb,
        .snapshot_cb = pgstat_slru_snapshot_cb,
@@ -474,7 +476,6 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE]
 
        .init_backend_cb = pgstat_wal_init_backend_cb,
        .flush_static_cb = pgstat_wal_flush_cb,
-       .have_static_pending_cb = pgstat_wal_have_pending_cb,
        .init_shmem_cb = pgstat_wal_init_shmem_cb,
        .reset_all_cb = pgstat_wal_reset_all_cb,
        .snapshot_cb = pgstat_wal_snapshot_cb,
@@ -708,29 +709,10 @@ pgstat_report_stat(bool force)
    }
 
    /* Don't expend a clock check if nothing to do */
-   if (dlist_is_empty(&pgStatPending))
+   if (dlist_is_empty(&pgStatPending) &&
+       !pgstat_report_fixed)
    {
-       bool        do_flush = false;
-
-       /* Check for pending stats */
-       for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
-       {
-           const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
-
-           if (!kind_info)
-               continue;
-           if (!kind_info->have_static_pending_cb)
-               continue;
-
-           if (kind_info->have_static_pending_cb())
-           {
-               do_flush = true;
-               break;
-           }
-       }
-
-       if (!do_flush)
-           return 0;
+       return 0;
    }
 
    /*
@@ -784,16 +766,19 @@ pgstat_report_stat(bool force)
    partial_flush |= pgstat_flush_pending_entries(nowait);
 
    /* flush of other stats kinds */
-   for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
+   if (pgstat_report_fixed)
    {
-       const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
+       for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++)
+       {
+           const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
 
-       if (!kind_info)
-           continue;
-       if (!kind_info->flush_static_cb)
-           continue;
+           if (!kind_info)
+               continue;
+           if (!kind_info->flush_static_cb)
+               continue;
 
-       partial_flush |= kind_info->flush_static_cb(nowait);
+           partial_flush |= kind_info->flush_static_cb(nowait);
+       }
    }
 
    last_flush = now;
@@ -815,6 +800,7 @@ pgstat_report_stat(bool force)
    }
 
    pending_since = 0;
+   pgstat_report_fixed = false;
 
    return 0;
 }
index 51256277e8d37ff06f38a0d1f9af6b5aaf7bbe7e..8714a85e2d9366acc904812240e80f3b0bbb9419 100644 (file)
@@ -66,6 +66,7 @@ pgstat_count_backend_io_op_time(IOObject io_object, IOContext io_context,
                   io_time);
 
    backend_has_iostats = true;
+   pgstat_report_fixed = true;
 }
 
 void
@@ -81,6 +82,7 @@ pgstat_count_backend_io_op(IOObject io_object, IOContext io_context,
    PendingBackendStats.pending_io.bytes[io_object][io_context][io_op] += bytes;
 
    backend_has_iostats = true;
+   pgstat_report_fixed = true;
 }
 
 /*
@@ -301,18 +303,6 @@ pgstat_flush_backend(bool nowait, bits32 flags)
    return false;
 }
 
-/*
- * Check if there are any backend stats waiting for flush.
- */
-bool
-pgstat_backend_have_pending_cb(void)
-{
-   if (!pgstat_tracks_backend_bktype(MyBackendType))
-       return false;
-
-   return (backend_has_iostats || pgstat_backend_wal_have_pending());
-}
-
 /*
  * Callback to flush out locally pending backend statistics.
  *
index d8d26379a571e7d0288852fb168592e09c36e7b9..13ae57ed6498d9f4bb1fa028d3c94fc7b50769b9 100644 (file)
@@ -80,6 +80,7 @@ pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op,
    pgstat_count_backend_io_op(io_object, io_context, io_op, cnt, bytes);
 
    have_iostats = true;
+   pgstat_report_fixed = true;
 }
 
 /*
@@ -167,15 +168,6 @@ pgstat_fetch_stat_io(void)
    return &pgStatLocal.snapshot.io;
 }
 
-/*
- * Check if there any IO stats waiting for flush.
- */
-bool
-pgstat_io_have_pending_cb(void)
-{
-   return have_iostats;
-}
-
 /*
  * Simpler wrapper of pgstat_io_flush_cb()
  */
index b9e940dde45b671eca120b2876e8af8d8c5d4fdf..7bd8744accb0e2dd19abdfb0da7741d399c5621c 100644 (file)
@@ -143,15 +143,6 @@ pgstat_get_slru_index(const char *name)
    return (SLRU_NUM_ELEMENTS - 1);
 }
 
-/*
- * Check if there are any SLRU stats entries waiting for flush.
- */
-bool
-pgstat_slru_have_pending_cb(void)
-{
-   return have_slrustats;
-}
-
 /*
  * Flush out locally pending SLRU stats entries
  *
@@ -247,6 +238,7 @@ get_slru_entry(int slru_idx)
    Assert((slru_idx >= 0) && (slru_idx < SLRU_NUM_ELEMENTS));
 
    have_slrustats = true;
+   pgstat_report_fixed = true;
 
    return &pending_SLRUStats[slru_idx];
 }
index 16a1ecb4d90d23680d90fbab1985b3d4c0d3038f..0d04480d2f6d0f1450d9f8bad2cb004b75953187 100644 (file)
@@ -71,6 +71,15 @@ pgstat_fetch_stat_wal(void)
    return &pgStatLocal.snapshot.wal;
 }
 
+/*
+ * To determine whether WAL usage happened.
+ */
+static inline bool
+pgstat_wal_have_pending(void)
+{
+   return pgWalUsage.wal_records != prevWalUsage.wal_records;
+}
+
 /*
  * Calculate how much WAL usage counters have increased by subtracting the
  * previous counters from the current ones.
@@ -92,7 +101,7 @@ pgstat_wal_flush_cb(bool nowait)
     * This function can be called even if nothing at all has happened. Avoid
     * taking lock for nothing in that case.
     */
-   if (!pgstat_wal_have_pending_cb())
+   if (!pgstat_wal_have_pending())
        return false;
 
    /*
@@ -136,15 +145,6 @@ pgstat_wal_init_backend_cb(void)
    prevWalUsage = pgWalUsage;
 }
 
-/*
- * To determine whether WAL usage happened.
- */
-bool
-pgstat_wal_have_pending_cb(void)
-{
-   return pgWalUsage.wal_records != prevWalUsage.wal_records;
-}
-
 void
 pgstat_wal_init_shmem_cb(void *stats)
 {
index d5557e6e998cdaf237fe9a161f479ff632c058c8..6cf00008f6333575f3f160fab6f32c61f7c2d4c7 100644 (file)
@@ -295,18 +295,11 @@ typedef struct PgStat_KindInfo
     *
     * Returns true if some of the stats could not be flushed, due to lock
     * contention for example. Optional.
-    */
-   bool        (*flush_static_cb) (bool nowait);
-
-   /*
-    * For fixed-numbered or variable-numbered statistics: Check for pending
-    * stats in need of flush with flush_static_cb, when these do not use
-    * PgStat_EntryRef->pending.
     *
-    * Returns true if there are any stats pending for flush, triggering
-    * flush_static_cb. Optional.
+    * "pgstat_report_fixed" needs to be set to trigger the flush of pending
+    * stats.
     */
-   bool        (*have_static_pending_cb) (void);
+   bool        (*flush_static_cb) (bool nowait);
 
    /*
     * For fixed-numbered statistics: Reset All.
@@ -627,7 +620,6 @@ extern void pgstat_archiver_snapshot_cb(void);
 
 extern bool pgstat_flush_backend(bool nowait, bits32 flags);
 extern bool pgstat_backend_flush_cb(bool nowait);
-extern bool pgstat_backend_have_pending_cb(void);
 extern void pgstat_backend_reset_timestamp_cb(PgStatShared_Common *header,
                                              TimestampTz ts);
 
@@ -676,7 +668,6 @@ extern bool pgstat_function_flush_cb(PgStat_EntryRef *entry_ref, bool nowait);
 
 extern void pgstat_flush_io(bool nowait);
 
-extern bool pgstat_io_have_pending_cb(void);
 extern bool pgstat_io_flush_cb(bool nowait);
 extern void pgstat_io_init_shmem_cb(void *stats);
 extern void pgstat_io_reset_all_cb(TimestampTz ts);
@@ -738,7 +729,6 @@ extern PgStatShared_Common *pgstat_init_entry(PgStat_Kind kind,
  * Functions in pgstat_slru.c
  */
 
-extern bool pgstat_slru_have_pending_cb(void);
 extern bool pgstat_slru_flush_cb(bool nowait);
 extern void pgstat_slru_init_shmem_cb(void *stats);
 extern void pgstat_slru_reset_all_cb(TimestampTz ts);
@@ -750,7 +740,6 @@ extern void pgstat_slru_snapshot_cb(void);
  */
 
 extern void pgstat_wal_init_backend_cb(void);
-extern bool pgstat_wal_have_pending_cb(void);
 extern bool pgstat_wal_flush_cb(bool nowait);
 extern void pgstat_wal_init_shmem_cb(void *stats);
 extern void pgstat_wal_reset_all_cb(TimestampTz ts);
@@ -778,8 +767,23 @@ extern void pgstat_create_transactional(PgStat_Kind kind, Oid dboid, uint64 obji
  * Variables in pgstat.c
  */
 
-extern PGDLLIMPORT PgStat_LocalState pgStatLocal;
+/*
+ * Track if *any* pending fixed-numbered statistics should be flushed to
+ * shared memory.
+ *
+ * This flag can be switched to true by fixed-numbered statistics to let
+ * pgstat_report_stat() know if it needs to go through one round of
+ * reports, calling flush_static_cb for each fixed-numbered statistics
+ * kind.  When this flag is not set, pgstat_report_stat() is able to do
+ * a fast exit, knowing that there are no pending fixed-numbered statistics.
+ *
+ * Statistics callbacks should never reset this flag; pgstat_report_stat()
+ * is in charge of doing that.
+ */
+extern PGDLLIMPORT bool pgstat_report_fixed;
 
+/* Backend-local stats state */
+extern PGDLLIMPORT PgStat_LocalState pgStatLocal;
 
 /*
  * Implementation of inline functions declared above.