Skip to content

Commit

Permalink
overlord/state, tests: measure state lock (canonical#14874)
Browse files Browse the repository at this point in the history
* spike to measure locking time os snapd state file

* Changes required to collect the state locks for the whole run

* fix shellcheck error

* measure the lock time after the lock is done

* Save unlock times before the unlock

This will prevent raises.

* Added wait time and several improvements

* update unit test a minor fix on comments

* fix flock unit test

* osutil: tweak env vars for state lock tracing, improve tracing

Signed-off-by: Maciej Borzecki <[email protected]>

* tests: update for new env varibles names

Signed-off-by: Maciej Borzecki <[email protected]>

* osutil: simplify init, tweak naming and lock tracking API

Signed-off-by: Maciej Borzecki <[email protected]>

* overlord/state: tweak naming and match lock tracing API

Signed-off-by: Maciej Borzecki <[email protected]>

* overlord/state: move state lock tracing, unexport functions, change build tag

Move state lock tracing from osutil into overlord/state. Tweak the code
to not export functions since there aren't any external consumers.

Signed-off-by: Maciej Borzecki <[email protected]>

* many: update state lock trace build tag use locations

Signed-off-by: Maciej Borzecki <[email protected]>

* overlord/state: take timestamp before unlocking

Signed-off-by: Maciej Borzecki <[email protected]>

* github: do not collect coverage for statelocktrace tag

The code is already executed in spread tests.

Signed-off-by: Maciej Borzecki <[email protected]>

---------

Signed-off-by: Maciej Borzecki <[email protected]>
Co-authored-by: Maciej Borzecki <[email protected]>
  • Loading branch information
sergiocazzolato and bboozzoo authored Jan 24, 2025
1 parent eb821e9 commit 953a5d8
Show file tree
Hide file tree
Showing 12 changed files with 239 additions and 5 deletions.
1 change: 1 addition & 0 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ jobs:
- { go-build-tags: withbootassetstesting, skip-coverage: false, snapd-debug: false, go-test-race: false}
- { go-build-tags: nosecboot, skip-coverage: false, snapd-debug: false, go-test-race: false}
- { go-build-tags: faultinject, skip-coverage: false, snapd-debug: false, go-test-race: false}
- { go-build-tags: statelocktrace, skip-coverage: true, snapd-debug: false, go-test-race: false}
- { go-build-tags: snapdusergo, skip-coverage: false, snapd-debug: false, go-test-race: false}
- { go-build-tags: "", skip-coverage: true, snapd-debug: false, go-test-race: true }

Expand Down
4 changes: 2 additions & 2 deletions build-aux/snap/snapcraft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -366,14 +366,14 @@ parts:
TAGS+=(nomanagers)
case "${VERSION}" in
1337.*)
TAGS+=(withtestkeys faultinject)
TAGS+=(withtestkeys faultinject statelocktrace)
;;
esac
;;
*)
case "${VERSION}" in
1337.*)
TAGS+=(withtestkeys withbootassetstesting faultinject)
TAGS+=(withtestkeys withbootassetstesting faultinject statelocktrace)
;;
esac
;;
Expand Down
6 changes: 6 additions & 0 deletions osutil/flock.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ func NewFileLockWithMode(path string, mode os.FileMode) (*FileLock, error) {
return l, nil
}

// NewFileLockWithFile wraps an open file with a file lock which can be used for
// locking.
func NewFileLockWithFile(f *os.File) *FileLock {
return &FileLock{file: f}
}

// NewFileLock creates and opens the lock file given by "path" with mode 0600.
func NewFileLock(path string) (*FileLock, error) {
return NewFileLockWithMode(path, 0600)
Expand Down
13 changes: 13 additions & 0 deletions osutil/flock_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,19 @@ func (s *flockSuite) TestNewFileLock(c *C) {
c.Assert(fi.Mode().Perm(), Equals, os.FileMode(0600))
}

// Test that opening and closing a lock works as expected, and that the mode is right.
func (s *flockSuite) TestNewFileLockWithFile(c *C) {
myfile, err := os.OpenFile(filepath.Join(c.MkDir(), "name"), os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0600)
c.Assert(err, IsNil)

lock := osutil.NewFileLockWithFile(myfile)
defer lock.Close()

fi, err := os.Stat(lock.Path())
c.Assert(err, IsNil)
c.Assert(fi.Mode().Perm(), Equals, os.FileMode(0600))
}

// Test that we can access the underlying open file.
func (s *flockSuite) TestFile(c *C) {
fname := filepath.Join(c.MkDir(), "name")
Expand Down
121 changes: 121 additions & 0 deletions overlord/state/locktrace.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
// -*- Mode: Go; indent-tabs-mode: t -*-
//go:build statelocktrace

/*
* Copyright (C) 2021 Canonical Ltd
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 3 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/

package state

import (
"fmt"
"os"
"runtime"
"time"

"github.com/snapcore/snapd/osutil"
)

var (
traceStateLock = false

traceThreshold = int64(0)
traceFilePath = ""
)

func init() {
if !osutil.GetenvBool("SNAPPY_TESTING") {
return
}

threshold := osutil.GetenvInt64("SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS")
logFilePath := os.Getenv("SNAPD_STATE_LOCK_TRACE_FILE")

if threshold <= 0 || logFilePath == "" {
return
}

traceThreshold = threshold
traceFilePath = logFilePath
traceStateLock = true
}

func traceCallers(ts, heldMs, waitMs int64) error {
if traceFilePath == "" {
return fmt.Errorf("internal error: trace file path unset")
}

logFile, err := os.OpenFile(traceFilePath, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0600)
if err != nil {
return fmt.Errorf("cannot not open/create log trace file: %v", err)
}
lockFile := osutil.NewFileLockWithFile(logFile)
defer lockFile.Close()

if err := lockFile.Lock(); err != nil {
return fmt.Errorf("cannot take file lock: %v", err)
}

pc := make([]uintptr, 10)
// avoid 3 first callers on the stack: runtime.Callers(), this function and the parent
n := runtime.Callers(3, pc)
frames := runtime.CallersFrames(pc[:n])

_, err = fmt.Fprintf(logFile, "### %s lock: held: %d ms wait %d ms\n",
time.UnixMilli(ts),
heldMs, waitMs)
if err != nil {
return err
}

for {
frame, more := frames.Next()
_, err := fmt.Fprintf(logFile, "%s:%d %s\n", frame.File, frame.Line, frame.Function)
if err != nil {
return err
}

if !more {
break
}
}

return nil
}

func lockTimestamp() int64 {
if !traceStateLock {
return 0
}

return time.Now().UnixMilli()
}

// maybeSaveLockTime allows to save lock times when this overpass the threshold
// defined by through the SNAPD_STATE_LOCK_THRESHOLD_MS environment settings.
func maybeSaveLockTime(lockWaitStart, lockHoldStart, now int64) {
if !traceStateLock {
return
}

heldMs := now - lockHoldStart
waitMs := lockHoldStart - lockWaitStart
if heldMs > traceThreshold || waitMs > traceThreshold {
if err := traceCallers(now, heldMs, waitMs); err != nil {
fmt.Fprintf(os.Stderr, "could write state lock trace: %v\n", err)
}
}
}
28 changes: 28 additions & 0 deletions overlord/state/locktrace_fake.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// -*- Mode: Go; indent-tabs-mode: t -*-
//go:build !statelocktrace

/*
* Copyright (C) 2021 Canonical Ltd
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 3 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/

package state

func lockTimestamp() int64 {
return int64(0)
}

func maybeSaveLockTime(lockWaitStart, lockHoldStart, now int64) {
}
10 changes: 10 additions & 0 deletions overlord/state/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@ type State struct {
// task/changes observing
taskHandlers map[int]func(t *Task, old, new Status) (remove bool)
changeHandlers map[int]func(chg *Change, old, new Status)

lockWaitStart int64
lockHoldStart int64
}

// New returns a new empty state.
Expand Down Expand Up @@ -139,8 +142,11 @@ func (s *State) Modified() bool {

// Lock acquires the state lock.
func (s *State) Lock() {
lockWait := lockTimestamp()
s.mu.Lock()
atomic.AddInt32(&s.muC, 1)
s.lockWaitStart = lockWait
s.lockHoldStart = lockTimestamp()
}

func (s *State) reading() {
Expand All @@ -158,7 +164,11 @@ func (s *State) writing() {

func (s *State) unlock() {
atomic.AddInt32(&s.muC, -1)
lockWaitStart, lockHoldStart := s.lockWaitStart, s.lockHoldStart
s.lockWaitStart, s.lockHoldStart = 0, 0
lockHoldEnd := lockTimestamp()
s.mu.Unlock()
maybeSaveLockTime(lockWaitStart, lockHoldStart, lockHoldEnd)
}

type marshalledState struct {
Expand Down
4 changes: 2 additions & 2 deletions packaging/ubuntu-16.04/rules
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ _SNAP_TAGS=
# check if we need to include the testkeys in the binary
ifneq (,$(filter testkeys,$(DEB_BUILD_OPTIONS)))
# if enabled also enable bootloader assets testing and fault injection
_TAGS := withtestkeys,withbootassetstesting,faultinject
_SNAP_TAGS := nomanagers,withtestkeys,faultinject
_TAGS := withtestkeys,withbootassetstesting,faultinject,statelocktrace
_SNAP_TAGS := nomanagers,withtestkeys,faultinject,statelocktrace
else
_SNAP_TAGS=nomanagers
endif
Expand Down
2 changes: 2 additions & 0 deletions spread.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ environment:
SNAPD_DEB_FROM_REPO: '$(HOST: echo "${SPREAD_SNAPD_DEB_FROM_REPO:-true}")'
# Directory where the built snapd snaps and other assets are stored
SNAPD_WORK_DIR: '$(HOST: echo "${SPREAD_SNAPD_WORK_DIR:-/var/tmp/work-dir}")'
# Threshold used to determine which stake locks have to be collected during runtime
SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS: '$(HOST: echo "${SPREAD_SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS:-0}")'

# Directory where the nested images and test assets are stored
NESTED_WORK_DIR: '$(HOST: echo "${NESTED_WORK_DIR:-/var/tmp/work-dir}")'
Expand Down
4 changes: 3 additions & 1 deletion tests/lib/prepare-restore.sh
Original file line number Diff line number Diff line change
Expand Up @@ -707,14 +707,16 @@ prepare_suite_each() {
fi

if [[ "$variant" = full ]]; then
# shellcheck source=tests/lib/prepare.sh
. "$TESTSLIB"/prepare.sh
# shellcheck source=tests/lib/prepare.sh
. "$TESTSLIB"/prepare.sh
if os.query is-classic; then
prepare_each_classic
else
prepare_each_core
fi

prepare_state_lock "$SPREAD_JOB"
fi

case "$SPREAD_SYSTEM" in
Expand Down
35 changes: 35 additions & 0 deletions tests/lib/prepare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,7 @@ prepare_classic() {
fi

prepare_reexec_override
prepare_state_lock "SNAPD PROJECT"
prepare_memory_limit_override
disable_refreshes

Expand Down Expand Up @@ -1524,6 +1525,39 @@ EOF
rm -rf "$UNPACK_DIR"
}

prepare_state_lock(){
TAG=$1
CONF_FILE="/etc/systemd/system/snapd.service.d/state-lock.conf"
LOCKS_FILE="$TESTSTMP"/snapd_lock_traces
RESTART=false

if [ "$SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS" -gt 0 ]; then
echo "###START: $TAG" >> "$LOCKS_FILE"

# Generate the config file when it does not exist and when the threshold has changed different
if ! [ -f "$CONF_FILE" ] || ! grep -q "SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS=$SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS" < "$CONF_FILE"; then
echo "Prepare snapd for getting state lock time"
cat <<EOF > "$CONF_FILE"
[Service]
Environment=SNAPPY_TESTING=1
Environment=SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS="$SNAPD_STATE_LOCK_TRACE_THRESHOLD_MS"
Environment=SNAPD_STATE_LOCK_TRACE_FILE="$LOCKS_FILE"
EOF
RESTART=true
fi
elif [ -f "$CONF_FILE" ]; then
rm -f "$CONF_FILE"
RESTART=true
fi

if [ "$RESTART" = "true" ]; then
# the service setting may have changed in the service so we need
# to ensure snapd is reloaded
systemctl daemon-reload
systemctl restart snapd
fi
}

# prepare_ubuntu_core will prepare ubuntu-core 16+
prepare_ubuntu_core() {
# we are still a "classic" image, prepare the surgery
Expand Down Expand Up @@ -1637,6 +1671,7 @@ prepare_ubuntu_core() {
# or restore will break
remove_disabled_snaps
prepare_memory_limit_override
prepare_state_lock "SNAPD PROJECT"
setup_experimental_features
systemctl stop snapd.service snapd.socket
save_snapd_state
Expand Down
16 changes: 16 additions & 0 deletions tests/main/snapd-state-lock/task.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
summary: smoke test used to retrieve the lock state times

details: |
Test used to collect artifacts
priority: -1

artifacts:
- snapd_lock_traces

execute: |
if [ -f "$TESTSTMP"/snapd_lock_traces ]; then
cp -f "$TESTSTMP"/snapd_lock_traces .
else
touch snapd_lock_traces
fi

0 comments on commit 953a5d8

Please sign in to comment.