From 216d268f480c2ce47c26e7fd806496c78277260a Mon Sep 17 00:00:00 2001 From: Jakub Zelenka Date: Sat, 7 Sep 2024 23:11:16 +0100 Subject: [PATCH] Fix GH-13437: FPM: ERROR: scoreboard: failed to lock (already locked) This changes locking for scoreboard to reduce contention between readers and adds retries for acquiring scoreboard for read. --- sapi/fpm/fpm/fpm_atomic.h | 19 ++++++++++ sapi/fpm/fpm/fpm_scoreboard.c | 68 ++++++++++++++++++++++++++++++----- sapi/fpm/fpm/fpm_scoreboard.h | 4 +++ 3 files changed, 83 insertions(+), 8 deletions(-) diff --git a/sapi/fpm/fpm/fpm_atomic.h b/sapi/fpm/fpm/fpm_atomic.h index e3926e708c542..02009f6af9118 100644 --- a/sapi/fpm/fpm/fpm_atomic.h +++ b/sapi/fpm/fpm/fpm_atomic.h @@ -156,6 +156,25 @@ static inline int fpm_spinlock(atomic_t *lock, int try_once) /* {{{ */ } /* }}} */ +static inline int fpm_spinlock_with_max_retries(atomic_t *lock, unsigned int max_retries) +{ + unsigned int retries = 0; + + for (;;) { + if (atomic_cmp_set(lock, 0, 1)) { + return 1; + } + + sched_yield(); + + if (++retries > max_retries) { + return 0; + } + } + + return 1; +} + #define fpm_unlock(lock) lock = 0 #endif diff --git a/sapi/fpm/fpm/fpm_scoreboard.c b/sapi/fpm/fpm/fpm_scoreboard.c index 52d10a0416832..75b3adde27e33 100644 --- a/sapi/fpm/fpm/fpm_scoreboard.c +++ b/sapi/fpm/fpm/fpm_scoreboard.c @@ -73,6 +73,17 @@ int fpm_scoreboard_init_main(void) return 0; } +static inline void fpm_scoreboard_readers_decrement(struct fpm_scoreboard_s *scoreboard) +{ + fpm_spinlock(&scoreboard->lock, 1); + if (scoreboard->reader_count > 0) { + scoreboard->reader_count -= 1; + } + unsigned int current_reader_count = (unsigned) scoreboard->reader_count; + fpm_unlock(scoreboard->lock); + zlog(ZLOG_DEBUG, "scoreboard: for proc %d reader decremented to value %u", getpid(), current_reader_count); +} + static struct fpm_scoreboard_s *fpm_scoreboard_get_for_update(struct fpm_scoreboard_s *scoreboard) /* {{{ */ { if (!scoreboard) { @@ -93,7 +104,30 @@ void fpm_scoreboard_update_begin(struct fpm_scoreboard_s *scoreboard) /* {{{ */ return; } - fpm_spinlock(&scoreboard->lock, 0); + int retries = 0; + while (1) { + fpm_spinlock(&scoreboard->lock, 1); + if (scoreboard->reader_count == 0) { + if (!fpm_spinlock_with_max_retries(&scoreboard->writer_active, FPM_SCOREBOARD_SPINLOCK_MAX_RETRIES)) { + /* in this case the writer might have crashed so just warn and continue as the lock was acquired */ + zlog(ZLOG_WARNING, "scoreboard: writer %d waited too long for another writer to release lock.", getpid()); + } else { + zlog(ZLOG_DEBUG, "scoreboard: writer lock acquired by writer %d", getpid()); + } + fpm_unlock(scoreboard->lock); + break; + } + fpm_unlock(scoreboard->lock); + + if (++retries > FPM_SCOREBOARD_SPINLOCK_MAX_RETRIES) { + /* decrement reader count by 1 (assuming a killed or crashed reader) */ + fpm_scoreboard_readers_decrement(scoreboard); + zlog(ZLOG_WARNING, "scoreboard: writer detected a potential crashed reader, decrementing reader count."); + retries = 0; + } + + sched_yield(); + } } /* }}} */ @@ -170,7 +204,8 @@ void fpm_scoreboard_update_commit( scoreboard->active_max = scoreboard->active; } - fpm_unlock(scoreboard->lock); + fpm_unlock(scoreboard->writer_active); + zlog(ZLOG_DEBUG, "scoreboard: writer lock released by writer %d", getpid()); } /* }}} */ @@ -234,16 +269,33 @@ struct fpm_scoreboard_proc_s *fpm_scoreboard_proc_get_from_child(struct fpm_chil struct fpm_scoreboard_s *fpm_scoreboard_acquire(struct fpm_scoreboard_s *scoreboard, int nohang) /* {{{ */ { - struct fpm_scoreboard_s *s; - - s = scoreboard ? scoreboard : fpm_scoreboard; + struct fpm_scoreboard_s *s = scoreboard ? scoreboard : fpm_scoreboard; if (!s) { return NULL; } - if (!fpm_spinlock(&s->lock, nohang)) { - return NULL; + int retries = 0; + while (1) { + /* increment reader if no writer active */ + fpm_spinlock(&scoreboard->lock, 1); + if (!s->writer_active) { + scoreboard->reader_count += 1; + unsigned int current_reader_count = (unsigned) scoreboard->reader_count; + fpm_unlock(scoreboard->lock); + zlog(ZLOG_DEBUG, "scoreboard: for proc %d reader incremented to value %u", getpid(), current_reader_count); + break; + } + fpm_unlock(scoreboard->lock); + + sched_yield(); + + if (++retries > FPM_SCOREBOARD_SPINLOCK_MAX_RETRIES) { + zlog(ZLOG_WARNING, "scoreboard: reader waited too long for writer to release lock."); + fpm_scoreboard_readers_decrement(s); + return NULL; + } } + return s; } /* }}} */ @@ -253,7 +305,7 @@ void fpm_scoreboard_release(struct fpm_scoreboard_s *scoreboard) { return; } - scoreboard->lock = 0; + fpm_scoreboard_readers_decrement(scoreboard); } struct fpm_scoreboard_s *fpm_scoreboard_copy(struct fpm_scoreboard_s *scoreboard, int copy_procs) diff --git a/sapi/fpm/fpm/fpm_scoreboard.h b/sapi/fpm/fpm/fpm_scoreboard.h index c488c64bfefc4..bd61eedd11767 100644 --- a/sapi/fpm/fpm/fpm_scoreboard.h +++ b/sapi/fpm/fpm/fpm_scoreboard.h @@ -18,6 +18,8 @@ #define FPM_SCOREBOARD_LOCK_HANG 0 #define FPM_SCOREBOARD_LOCK_NOHANG 1 +#define FPM_SCOREBOARD_SPINLOCK_MAX_RETRIES 50000 + struct fpm_scoreboard_proc_s { union { atomic_t lock; @@ -52,6 +54,8 @@ struct fpm_scoreboard_s { atomic_t lock; char dummy[16]; }; + atomic_t writer_active; + atomic_t reader_count; char pool[32]; int pm; time_t start_epoch;