Skip to content

Commit 027cfd8

Browse files
authored
Merge pull request #184 from Scalingo/fix/183/cpu-monitoring-stop
fix(cpu/monitoring) Correctly handle error when CPU monitoring fails because cgroup has been deleted (container stopped)
2 parents a644ca3 + da109d4 commit 027cfd8

File tree

5 files changed

+56
-5
lines changed

5 files changed

+56
-5
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
## To Be Released
44

5+
## v2.0.2 - 2025-09-25
6+
7+
* Correctly handle error when CPU monitoring fails because cgroup has been deleted (container stopped)
8+
59
## v2.0.1 - 2025-09-09
610

711
* dist(cgo) Disable CGO to build binaries

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Acadock Monitoring - Docker container monitoring v2.0.1
1+
# Acadock Monitoring - Docker container monitoring v2.0.2
22

33
This webservice provides live data on Docker containers. It takes
44
data from the Linux kernel control groups and from the namespace of
@@ -88,7 +88,7 @@ Bump new version number in:
8888
Commit, tag and create a new release:
8989

9090
```sh
91-
version="2.0.1"
91+
version="2.0.2"
9292

9393
git switch --create release/${version}
9494
git add CHANGELOG.md README.md

cgroup/stats.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,14 @@ func (e StatsReaderError) Unwrap() error {
3939
return e.err
4040
}
4141

42+
func NewStatsReaderError(err error) StatsReaderError {
43+
return StatsReaderError{err: err}
44+
}
45+
4246
func (r *StatsReaderImpl) GetStats(ctx context.Context, containerID string) (Stats, error) {
4347
manager, err := NewManager(ctx, containerID)
4448
if err != nil {
45-
return Stats{}, errors.Wrap(ctx, err, "create cgroup manager")
49+
return Stats{}, NewStatsReaderError(errors.Wrap(ctx, err, "create cgroup manager"))
4650
}
4751
var stats Stats
4852
if manager.IsV2() {
@@ -51,7 +55,7 @@ func (r *StatsReaderImpl) GetStats(ctx context.Context, containerID string) (Sta
5155
stats, err = r.getCgroupV1Stats(ctx, manager)
5256
}
5357
if err != nil {
54-
return Stats{}, StatsReaderError{err: errors.Wrap(ctx, err, "get cgroup stats")}
58+
return Stats{}, NewStatsReaderError(errors.Wrap(ctx, err, "get cgroup stats"))
5559
}
5660
return stats, nil
5761
}

cpu/cpu.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ func (m *CPUUsageMonitor) Start(ctx context.Context) {
5656
case docker.ContainerActionStart:
5757
ctx, cancel := context.WithCancel(ctx)
5858
cancels[event.ContainerID] = cancel
59-
log.Infof("Start monitoring CPU")
59+
log.Info("Start monitoring CPU")
6060
go m.monitorContainerCPU(ctx, event.ContainerID)
6161
case docker.ContainerActionStop:
6262
log.Info("Stop monitoring CPU")
@@ -122,6 +122,7 @@ func (m *CPUUsageMonitor) monitorContainerCPU(ctx context.Context, id string) {
122122
if errors.As(err, &cgroupStatsErr) {
123123
log.WithError(err).Infof("Stop monitoring CPU with error")
124124
m.cleanMonitoringData(id)
125+
return
125126
} else if err != nil {
126127
// No Error logging to prevent spamming
127128
log.WithError(err).Info("Fail to update container CPU usage")

cpu/cpu_test.go

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package cpu
22

33
import (
44
"context"
5+
"errors"
56
"sync"
67
"testing"
78
"time"
@@ -68,3 +69,44 @@ func TestCPUUsageMonitor_Start(t *testing.T) {
6869
cancel()
6970
wg.Wait()
7071
}
72+
73+
func TestCPUUsageMonitor_Start_Shutdown(t *testing.T) {
74+
ctrl := gomock.NewController(t)
75+
76+
cgroupStatsReader := cgroupmock.NewMockStatsReader(ctrl)
77+
cpuStatsReader := procfs.NewMockCPUStat(ctrl)
78+
containerRepository := dockermock.NewMockContainerRepository(ctrl)
79+
dockerID := "1"
80+
config.RefreshTime = 100 * time.Millisecond
81+
82+
cpuStatsReader.EXPECT().Read(gomock.Any()).Return(procfs.CPUStats{}, nil).AnyTimes()
83+
err := errors.New("cgroup error")
84+
cgroupStatsReader.EXPECT().GetStats(gomock.Any(), dockerID).Return(cgroup.Stats{}, cgroup.NewStatsReaderError(err)).MaxTimes(1)
85+
86+
eventsChan := make(chan docker.ContainerEvent)
87+
containerRepository.EXPECT().RegisterToContainersStream(gomock.Any()).Return(eventsChan)
88+
go func() {
89+
eventsChan <- docker.ContainerEvent{ContainerID: dockerID, Action: docker.ContainerActionStart}
90+
close(eventsChan)
91+
}()
92+
93+
monitor := NewCPUUsageMonitor(containerRepository, cpuStatsReader, cgroupStatsReader)
94+
95+
ctx, cancel := context.WithCancel(t.Context())
96+
wg := &sync.WaitGroup{}
97+
wg.Add(1)
98+
go func() {
99+
monitor.Start(ctx)
100+
wg.Done()
101+
}()
102+
103+
// After 2 cycles it must have accurate CPU information
104+
time.Sleep(2 * config.RefreshTime)
105+
usage, err := monitor.GetContainerUsage(dockerID)
106+
require.NoError(t, err)
107+
require.Zero(t, usage)
108+
109+
// Check that the context does stop the Monitor
110+
cancel()
111+
wg.Wait()
112+
}

0 commit comments

Comments
 (0)