From 953a5d82b85cf9bd8a96c1a385e9781c78acb2b5 Mon Sep 17 00:00:00 2001 From: Sergio Cazzolato Date: Fri, 24 Jan 2025 11:07:29 -0300 Subject: [PATCH] overlord/state, tests: measure state lock (#14874) * spike to measure locking time os snapd state file * Changes required to collect the state locks for the whole run * fix shellcheck error * measure the lock time after the lock is done * Save unlock times before the unlock This will prevent raises. * Added wait time and several improvements * update unit test a minor fix on comments * fix flock unit test * osutil: tweak env vars for state lock tracing, improve tracing Signed-off-by: Maciej Borzecki * tests: update for new env varibles names Signed-off-by: Maciej Borzecki * osutil: simplify init, tweak naming and lock tracking API Signed-off-by: Maciej Borzecki * overlord/state: tweak naming and match lock tracing API Signed-off-by: Maciej Borzecki * overlord/state: move state lock tracing, unexport functions, change build tag Move state lock tracing from osutil into overlord/state. Tweak the code to not export functions since there aren't any external consumers. Signed-off-by: Maciej Borzecki * many: update state lock trace build tag use locations Signed-off-by: Maciej Borzecki * overlord/state: take timestamp before unlocking Signed-off-by: Maciej Borzecki * github: do not collect coverage for statelocktrace tag The code is already executed in spread tests. Signed-off-by: Maciej Borzecki --------- Signed-off-by: Maciej Borzecki Co-authored-by: Maciej Borzecki --- .github/workflows/test.yaml | 1 + build-aux/snap/snapcraft.yaml | 4 +- osutil/flock.go | 6 ++ osutil/flock_test.go | 13 +++ overlord/state/locktrace.go | 121 ++++++++++++++++++++++++++ overlord/state/locktrace_fake.go | 28 ++++++ overlord/state/state.go | 10 +++ packaging/ubuntu-16.04/rules | 4 +- spread.yaml | 2 + tests/lib/prepare-restore.sh | 4 +- tests/lib/prepare.sh | 35 ++++++++ tests/main/snapd-state-lock/task.yaml | 16 ++++ 12 files changed, 239 insertions(+), 5 deletions(-) create mode 100644 overlord/state/locktrace.go create mode 100644 overlord/state/locktrace_fake.go create mode 100644 tests/main/snapd-state-lock/task.yaml diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 02140993a49..72b3cf6f444 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -203,6 +203,7 @@ jobs: - { go-build-tags: withbootassetstesting, skip-coverage: false, snapd-debug: false, go-test-race: false} - { go-build-tags: nosecboot, skip-coverage: false, snapd-debug: false, go-test-race: false} - { go-build-tags: faultinject, skip-coverage: false, snapd-debug: false, go-test-race: false} + - { go-build-tags: statelocktrace, skip-coverage: true, snapd-debug: false, go-test-race: false} - { go-build-tags: snapdusergo, skip-coverage: false, snapd-debug: false, go-test-race: false} - { go-build-tags: "", skip-coverage: true, snapd-debug: false, go-test-race: true } diff --git a/build-aux/snap/snapcraft.yaml b/build-aux/snap/snapcraft.yaml index 9d53c2c7417..316d7e52d2f 100644 --- a/build-aux/snap/snapcraft.yaml +++ b/build-aux/snap/snapcraft.yaml @@ -366,14 +366,14 @@ parts: TAGS+=(nomanagers) case "${VERSION}" in 1337.*) - TAGS+=(withtestkeys faultinject) + TAGS+=(withtestkeys faultinject statelocktrace) ;; esac ;; *) case "${VERSION}" in 1337.*) - TAGS+=(withtestkeys withbootassetstesting faultinject) + TAGS+=(withtestkeys withbootassetstesting faultinject statelocktrace) ;; esac ;; diff --git a/osutil/flock.go b/osutil/flock.go index b3a3fd74e64..ac81eb1c5c8 100644 --- a/osutil/flock.go +++ b/osutil/flock.go @@ -55,6 +55,12 @@ func NewFileLockWithMode(path string, mode os.FileMode) (*FileLock, error) { return l, nil } +// NewFileLockWithFile wraps an open file with a file lock which can be used for +// locking. +func NewFileLockWithFile(f *os.File) *FileLock { + return &FileLock{file: f} +} + // NewFileLock creates and opens the lock file given by "path" with mode 0600. func NewFileLock(path string) (*FileLock, error) { return NewFileLockWithMode(path, 0600) diff --git a/osutil/flock_test.go b/osutil/flock_test.go index 47826c4c6f9..3ad63a64d76 100644 --- a/osutil/flock_test.go +++ b/osutil/flock_test.go @@ -83,6 +83,19 @@ func (s *flockSuite) TestNewFileLock(c *C) { c.Assert(fi.Mode().Perm(), Equals, os.FileMode(0600)) } +// Test that opening and closing a lock works as expected, and that the mode is right. +func (s *flockSuite) TestNewFileLockWithFile(c *C) { + myfile, err := os.OpenFile(filepath.Join(c.MkDir(), "name"), os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0600) + c.Assert(err, IsNil) + + lock := osutil.NewFileLockWithFile(myfile) + defer lock.Close() + + fi, err := os.Stat(lock.Path()) + c.Assert(err, IsNil) + c.Assert(fi.Mode().Perm(), Equals, os.FileMode(0600)) +} + // Test that we can access the underlying open file. func (s *flockSuite) TestFile(c *C) { fname := filepath.Join(c.MkDir(), "name") diff --git a/overlord/state/locktrace.go b/overlord/state/locktrace.go new file mode 100644 index 00000000000..9c2877d2603 --- /dev/null +++ b/overlord/state/locktrace.go @@ -0,0 +1,121 @@ +// -*- Mode: Go; indent-tabs-mode: t -*- +//go:build statelocktrace + +/* + * Copyright (C) 2021 Canonical Ltd + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 3 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +package state + +import ( + "fmt" + "os" + "runtime" + "time" + + "github.com/snapcore/snapd/osutil" +) + +var ( + traceStateLock = false + + traceThreshold = int64(0) + traceFilePath = "" +) + +func init() { + if !osutil.GetenvBool("SNAPPY_TESTING") { + return + } + + threshold := osutil.GetenvInt64("SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS") + logFilePath := os.Getenv("SNAPD_STATE_LOCK_TRACE_FILE") + + if threshold <= 0 || logFilePath == "" { + return + } + + traceThreshold = threshold + traceFilePath = logFilePath + traceStateLock = true +} + +func traceCallers(ts, heldMs, waitMs int64) error { + if traceFilePath == "" { + return fmt.Errorf("internal error: trace file path unset") + } + + logFile, err := os.OpenFile(traceFilePath, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0600) + if err != nil { + return fmt.Errorf("cannot not open/create log trace file: %v", err) + } + lockFile := osutil.NewFileLockWithFile(logFile) + defer lockFile.Close() + + if err := lockFile.Lock(); err != nil { + return fmt.Errorf("cannot take file lock: %v", err) + } + + pc := make([]uintptr, 10) + // avoid 3 first callers on the stack: runtime.Callers(), this function and the parent + n := runtime.Callers(3, pc) + frames := runtime.CallersFrames(pc[:n]) + + _, err = fmt.Fprintf(logFile, "### %s lock: held: %d ms wait %d ms\n", + time.UnixMilli(ts), + heldMs, waitMs) + if err != nil { + return err + } + + for { + frame, more := frames.Next() + _, err := fmt.Fprintf(logFile, "%s:%d %s\n", frame.File, frame.Line, frame.Function) + if err != nil { + return err + } + + if !more { + break + } + } + + return nil +} + +func lockTimestamp() int64 { + if !traceStateLock { + return 0 + } + + return time.Now().UnixMilli() +} + +// maybeSaveLockTime allows to save lock times when this overpass the threshold +// defined by through the SNAPD_STATE_LOCK_THRESHOLD_MS environment settings. +func maybeSaveLockTime(lockWaitStart, lockHoldStart, now int64) { + if !traceStateLock { + return + } + + heldMs := now - lockHoldStart + waitMs := lockHoldStart - lockWaitStart + if heldMs > traceThreshold || waitMs > traceThreshold { + if err := traceCallers(now, heldMs, waitMs); err != nil { + fmt.Fprintf(os.Stderr, "could write state lock trace: %v\n", err) + } + } +} diff --git a/overlord/state/locktrace_fake.go b/overlord/state/locktrace_fake.go new file mode 100644 index 00000000000..577ad0bc2d2 --- /dev/null +++ b/overlord/state/locktrace_fake.go @@ -0,0 +1,28 @@ +// -*- Mode: Go; indent-tabs-mode: t -*- +//go:build !statelocktrace + +/* + * Copyright (C) 2021 Canonical Ltd + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 3 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +package state + +func lockTimestamp() int64 { + return int64(0) +} + +func maybeSaveLockTime(lockWaitStart, lockHoldStart, now int64) { +} diff --git a/overlord/state/state.go b/overlord/state/state.go index 21e0f3cbeb6..f56ff27735d 100644 --- a/overlord/state/state.go +++ b/overlord/state/state.go @@ -111,6 +111,9 @@ type State struct { // task/changes observing taskHandlers map[int]func(t *Task, old, new Status) (remove bool) changeHandlers map[int]func(chg *Change, old, new Status) + + lockWaitStart int64 + lockHoldStart int64 } // New returns a new empty state. @@ -139,8 +142,11 @@ func (s *State) Modified() bool { // Lock acquires the state lock. func (s *State) Lock() { + lockWait := lockTimestamp() s.mu.Lock() atomic.AddInt32(&s.muC, 1) + s.lockWaitStart = lockWait + s.lockHoldStart = lockTimestamp() } func (s *State) reading() { @@ -158,7 +164,11 @@ func (s *State) writing() { func (s *State) unlock() { atomic.AddInt32(&s.muC, -1) + lockWaitStart, lockHoldStart := s.lockWaitStart, s.lockHoldStart + s.lockWaitStart, s.lockHoldStart = 0, 0 + lockHoldEnd := lockTimestamp() s.mu.Unlock() + maybeSaveLockTime(lockWaitStart, lockHoldStart, lockHoldEnd) } type marshalledState struct { diff --git a/packaging/ubuntu-16.04/rules b/packaging/ubuntu-16.04/rules index ea874f5aee9..eda1b9df7ee 100755 --- a/packaging/ubuntu-16.04/rules +++ b/packaging/ubuntu-16.04/rules @@ -103,8 +103,8 @@ _SNAP_TAGS= # check if we need to include the testkeys in the binary ifneq (,$(filter testkeys,$(DEB_BUILD_OPTIONS))) # if enabled also enable bootloader assets testing and fault injection - _TAGS := withtestkeys,withbootassetstesting,faultinject - _SNAP_TAGS := nomanagers,withtestkeys,faultinject + _TAGS := withtestkeys,withbootassetstesting,faultinject,statelocktrace + _SNAP_TAGS := nomanagers,withtestkeys,faultinject,statelocktrace else _SNAP_TAGS=nomanagers endif diff --git a/spread.yaml b/spread.yaml index cc1e95265a5..8f2da5ba95a 100644 --- a/spread.yaml +++ b/spread.yaml @@ -84,6 +84,8 @@ environment: SNAPD_DEB_FROM_REPO: '$(HOST: echo "${SPREAD_SNAPD_DEB_FROM_REPO:-true}")' # Directory where the built snapd snaps and other assets are stored SNAPD_WORK_DIR: '$(HOST: echo "${SPREAD_SNAPD_WORK_DIR:-/var/tmp/work-dir}")' + # Threshold used to determine which stake locks have to be collected during runtime + SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS: '$(HOST: echo "${SPREAD_SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS:-0}")' # Directory where the nested images and test assets are stored NESTED_WORK_DIR: '$(HOST: echo "${NESTED_WORK_DIR:-/var/tmp/work-dir}")' diff --git a/tests/lib/prepare-restore.sh b/tests/lib/prepare-restore.sh index 65cff46b36d..8c0dedd95e5 100755 --- a/tests/lib/prepare-restore.sh +++ b/tests/lib/prepare-restore.sh @@ -707,6 +707,8 @@ prepare_suite_each() { fi if [[ "$variant" = full ]]; then + # shellcheck source=tests/lib/prepare.sh + . "$TESTSLIB"/prepare.sh # shellcheck source=tests/lib/prepare.sh . "$TESTSLIB"/prepare.sh if os.query is-classic; then @@ -714,7 +716,7 @@ prepare_suite_each() { else prepare_each_core fi - + prepare_state_lock "$SPREAD_JOB" fi case "$SPREAD_SYSTEM" in diff --git a/tests/lib/prepare.sh b/tests/lib/prepare.sh index e114642a4be..4b58e299d72 100755 --- a/tests/lib/prepare.sh +++ b/tests/lib/prepare.sh @@ -462,6 +462,7 @@ prepare_classic() { fi prepare_reexec_override + prepare_state_lock "SNAPD PROJECT" prepare_memory_limit_override disable_refreshes @@ -1524,6 +1525,39 @@ EOF rm -rf "$UNPACK_DIR" } +prepare_state_lock(){ + TAG=$1 + CONF_FILE="/etc/systemd/system/snapd.service.d/state-lock.conf" + LOCKS_FILE="$TESTSTMP"/snapd_lock_traces + RESTART=false + + if [ "$SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS" -gt 0 ]; then + echo "###START: $TAG" >> "$LOCKS_FILE" + + # Generate the config file when it does not exist and when the threshold has changed different + if ! [ -f "$CONF_FILE" ] || ! grep -q "SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS=$SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS" < "$CONF_FILE"; then + echo "Prepare snapd for getting state lock time" + cat < "$CONF_FILE" +[Service] +Environment=SNAPPY_TESTING=1 +Environment=SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS="$SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS" +Environment=SNAPD_STATE_LOCK_TRACE_FILE="$LOCKS_FILE" +EOF + RESTART=true + fi + elif [ -f "$CONF_FILE" ]; then + rm -f "$CONF_FILE" + RESTART=true + fi + + if [ "$RESTART" = "true" ]; then + # the service setting may have changed in the service so we need + # to ensure snapd is reloaded + systemctl daemon-reload + systemctl restart snapd + fi +} + # prepare_ubuntu_core will prepare ubuntu-core 16+ prepare_ubuntu_core() { # we are still a "classic" image, prepare the surgery @@ -1637,6 +1671,7 @@ prepare_ubuntu_core() { # or restore will break remove_disabled_snaps prepare_memory_limit_override + prepare_state_lock "SNAPD PROJECT" setup_experimental_features systemctl stop snapd.service snapd.socket save_snapd_state diff --git a/tests/main/snapd-state-lock/task.yaml b/tests/main/snapd-state-lock/task.yaml new file mode 100644 index 00000000000..9a76eff8b3c --- /dev/null +++ b/tests/main/snapd-state-lock/task.yaml @@ -0,0 +1,16 @@ +summary: smoke test used to retrieve the lock state times + +details: | + Test used to collect artifacts + +priority: -1 + +artifacts: + - snapd_lock_traces + +execute: | + if [ -f "$TESTSTMP"/snapd_lock_traces ]; then + cp -f "$TESTSTMP"/snapd_lock_traces . + else + touch snapd_lock_traces + fi \ No newline at end of file