Skip to content

Commit da9c939

Browse files
committed
feat: add repro test for gmp:matchstuck
Signed-off-by: bwplotka <[email protected]>
1 parent f1eb448 commit da9c939

File tree

4 files changed

+383
-69
lines changed

4 files changed

+383
-69
lines changed

e2e/collector_filter_test.go

Lines changed: 280 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,280 @@
1+
// Copyright 2025 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package e2e
16+
17+
import (
18+
"context"
19+
"fmt"
20+
"testing"
21+
22+
"github.com/GoogleCloudPlatform/prometheus-engine/e2e/deploy"
23+
"github.com/GoogleCloudPlatform/prometheus-engine/pkg/operator"
24+
monitoringv1 "github.com/GoogleCloudPlatform/prometheus-engine/pkg/operator/apis/monitoring/v1"
25+
corev1 "k8s.io/api/core/v1"
26+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
27+
"k8s.io/apimachinery/pkg/labels"
28+
"k8s.io/apimachinery/pkg/util/intstr"
29+
"sigs.k8s.io/controller-runtime/pkg/client"
30+
)
31+
32+
var collectorPodMonitoring = &monitoringv1.PodMonitoring{
33+
ObjectMeta: metav1.ObjectMeta{
34+
Name: "collector-podmon",
35+
Namespace: operator.DefaultOperatorNamespace,
36+
},
37+
Spec: monitoringv1.PodMonitoringSpec{
38+
Selector: metav1.LabelSelector{
39+
MatchLabels: map[string]string{
40+
operator.LabelAppName: operator.NameCollector,
41+
},
42+
},
43+
Endpoints: []monitoringv1.ScrapeEndpoint{
44+
{
45+
Port: intstr.FromString(operator.CollectorPrometheusContainerPortName),
46+
Interval: "5s",
47+
},
48+
{
49+
Port: intstr.FromString(operator.CollectorConfigReloaderContainerPortName),
50+
Interval: "5s",
51+
},
52+
},
53+
},
54+
}
55+
56+
type filterState struct {
57+
match string
58+
}
59+
60+
var (
61+
stateEmpty = filterState{}
62+
stateA = filterState{match: "{__name__='go_goroutines',container='prometheus'}"}
63+
stateB = filterState{match: "{__name__='go_goroutines',container='config-reloader'}"}
64+
)
65+
66+
func (f filterState) expectedForkConfigEntry(t testing.TB) string {
67+
switch f {
68+
case stateEmpty:
69+
return ""
70+
case stateA:
71+
return `
72+
match:
73+
- '{__name__=''go_goroutines'',container=''prometheus''}'`
74+
case stateB:
75+
return `
76+
match:
77+
- '{__name__=''go_goroutines'',container=''config-reloader''}'`
78+
default:
79+
t.Fatalf("invalid filter state: %s", f)
80+
return ""
81+
}
82+
}
83+
84+
func (f filterState) filters(t testing.TB) []string {
85+
switch f {
86+
case stateEmpty:
87+
return nil
88+
case stateA, stateB:
89+
return []string{f.match}
90+
default:
91+
t.Fatalf("invalid filter state: %s", f)
92+
return nil
93+
}
94+
}
95+
96+
// testValidateApplied fails the test if the current filtering state is not applied to "f"
97+
// within the context deadline. This test assumes:
98+
// * collectors are running.
99+
// * collectorPodMonitoring is applied.
100+
// * prometheus and config-reloader expose 'go_goroutines' metric.
101+
// * OperatorConfig as "external_key"=$externalKey label configured (as well as default ones like project, etc.).
102+
func (f filterState) testValidateApplied(ctx context.Context, kubeClient client.Client, externalKey string) func(*testing.T) {
103+
return func(t *testing.T) {
104+
metricClient, err := newMetricClient(ctx)
105+
if err != nil {
106+
t.Fatalf("create metric client: %s", err)
107+
}
108+
defer metricClient.Close()
109+
110+
nodes := corev1.NodeList{}
111+
if err := kubeClient.List(ctx, &nodes); err != nil {
112+
t.Fatalf("list nodes: %s", err)
113+
}
114+
if len(nodes.Items) == 0 {
115+
t.Fatal("expected more than 0 nodes in the cluster")
116+
}
117+
118+
pods := corev1.PodList{}
119+
if err = kubeClient.List(ctx, &pods, client.InNamespace(operator.DefaultOperatorNamespace), &client.ListOptions{
120+
LabelSelector: labels.SelectorFromSet(map[string]string{
121+
operator.LabelAppName: operator.NameCollector,
122+
}),
123+
}); err != nil {
124+
t.Fatalf("list collector pods: %s", err)
125+
}
126+
if got, want := len(pods.Items), len(nodes.Items); got != want {
127+
t.Fatalf("expected %v collector pods, got %v", want, got)
128+
}
129+
130+
for _, pod := range pods.Items {
131+
t.Run(pod.Name, func(t *testing.T) {
132+
var promMatch, configReloaderMatch bool
133+
switch f {
134+
case stateEmpty:
135+
promMatch = true
136+
configReloaderMatch = true
137+
case stateA:
138+
promMatch = true
139+
configReloaderMatch = false
140+
case stateB:
141+
promMatch = false
142+
configReloaderMatch = true
143+
default:
144+
t.Fatalf("invalid filter state: %s", f)
145+
}
146+
147+
t.Run("prometheus", testValidateGCMMetric(ctx, metricClient, listTimeSeriesFilter{
148+
metricType: "prometheus.googleapis.com/go_goroutines/gauge",
149+
job: collectorPodMonitoring.Name,
150+
instance: fmt.Sprintf("%s:%s", pod.Spec.NodeName, operator.CollectorPrometheusContainerPortName),
151+
pod: pod.Name,
152+
container: "prometheus",
153+
externalKey: externalKey,
154+
namespace: operator.DefaultOperatorNamespace,
155+
}, metricExpectation{isQueryable: promMatch}))
156+
157+
t.Run("config-reloader", testValidateGCMMetric(ctx, metricClient, listTimeSeriesFilter{
158+
metricType: "prometheus.googleapis.com/go_goroutines/gauge",
159+
job: collectorPodMonitoring.Name,
160+
instance: fmt.Sprintf("%s:%s", pod.Spec.NodeName, operator.CollectorConfigReloaderContainerPortName),
161+
pod: pod.Name,
162+
container: "config-reloader",
163+
externalKey: externalKey,
164+
namespace: operator.DefaultOperatorNamespace,
165+
}, metricExpectation{isQueryable: configReloaderMatch}))
166+
})
167+
}
168+
}
169+
}
170+
171+
type filterCase struct {
172+
filter filterState
173+
174+
expectedFilter filterState // What we expect to be applied.
175+
}
176+
177+
// TestCollectorFilterMatchOneOf tests various edge cases around filter.matchOneOf settings.
178+
// NOTE: This test does not intend to test collector match filtering cases, those should
179+
// be tested on collector side. What we test here is if filtering is applied correctly
180+
// in general and in the event of the orphaned extra args.
181+
//
182+
// Regression tests against go/gmp:matchstuck.
183+
func TestCollectorMatch(t *testing.T) {
184+
if skipGCM {
185+
t.Skip("this test requires GCM integration")
186+
}
187+
// See go/gmp:matchstuck for 0, A, B, C case definition.
188+
for _, tcase := range []struct {
189+
name string
190+
explicitFilter filterState
191+
filterCases []filterCase
192+
}{
193+
{
194+
name: "0 to A",
195+
explicitFilter: stateEmpty,
196+
filterCases: []filterCase{
197+
// 0
198+
{
199+
filter: stateEmpty,
200+
expectedFilter: stateEmpty,
201+
},
202+
// A
203+
{
204+
filter: stateA,
205+
// Given the go/gmp:matchstuck we expect the noop behaviour.
206+
expectedFilter: stateEmpty, // TODO: Add fix, so it's stateA (when forced).
207+
},
208+
{
209+
filter: stateB,
210+
// Given the go/gmp:matchstuck we expect the noop behaviour.
211+
expectedFilter: stateEmpty, // TODO: Add fix, so it's stateB (when forced).
212+
},
213+
{
214+
filter: stateEmpty,
215+
expectedFilter: stateEmpty,
216+
},
217+
},
218+
},
219+
{
220+
name: "B to C",
221+
explicitFilter: stateB,
222+
filterCases: []filterCase{
223+
{
224+
filter: stateA,
225+
// Given the go/gmp:matchstuck we expect the orphaned setting applied.
226+
expectedFilter: stateB, // TODO: Add fix, so it's stateA (when forced).
227+
},
228+
// B-2
229+
{
230+
filter: stateEmpty,
231+
// Given the go/gmp:matchstuck we expect the orphaned setting applied.
232+
expectedFilter: stateB, // TODO: Add fix, `so it's stateEmpty (when forced).
233+
},
234+
// C
235+
{
236+
filter: stateB,
237+
expectedFilter: stateB,
238+
},
239+
},
240+
},
241+
} {
242+
t.Run(tcase.name, func(t *testing.T) {
243+
ctx := contextWithDeadline(t)
244+
245+
var dOpts []deploy.DeployOption
246+
if tcase.explicitFilter != stateEmpty {
247+
dOpts = append(dOpts, deploy.WithExplicitCollectorFilter(tcase.explicitFilter.match))
248+
}
249+
kubeClient, restConfig, err := setupCluster(ctx, t, dOpts...)
250+
if err != nil {
251+
t.Fatalf("error instantiating clients. err: %s", err)
252+
}
253+
254+
t.Run("collector-deployed", testCollectorDeployed(ctx, restConfig, kubeClient))
255+
t.Run("enable-target-status", testEnableTargetStatus(ctx, kubeClient))
256+
t.Run("self-podmonitoring-ready", testEnsurePodMonitoringReady(ctx, kubeClient, collectorPodMonitoring))
257+
258+
for i, fcase := range tcase.filterCases {
259+
// Ensure a unique external label value so we are sure the existence checks are accurate.
260+
externalKey := fmt.Sprintf("filter%d", i)
261+
262+
// Setup OperatorConfig with an intput filtering state (filter.matchOneOf).
263+
t.Run("collector-operatorconfig", testCollectorOperatorConfigWithParams(
264+
ctx,
265+
kubeClient,
266+
externalKey,
267+
fcase.filter,
268+
true, // Trim scrapeConfigs from diff chceck.
269+
))
270+
t.Run("filter-applied-gcm", fcase.expectedFilter.testValidateApplied(ctx, kubeClient, externalKey))
271+
}
272+
})
273+
}
274+
}
275+
276+
/*
277+
resource.type = "prometheus_target" AND resource.labels.project_id = "gpe-test-1" AND resource.labels.location = "us-central1-c" AND resource.labels.cluster = "gmp-20251410_0932" AND resource.labels.job = "collector-podmon" AND resource.labels.instance = "test-collector-match-f207-control-plane:prom-metrics" AND metric.type = "prometheus.googleapis.com/go_goroutines/gauge"
278+
279+
AND metric.labels.pod = "collector-5gxst" AND metric.labels.container = "config-reloader" AND resource.labels.namespace = "gmp-system" AND metric.labels.external_key = "filter0"
280+
*/

0 commit comments

Comments
 (0)