From 49637ef7fb61047be8338e3658017630a2c33e03 Mon Sep 17 00:00:00 2001 From: Aniruddha Basak Date: Mon, 15 Jun 2026 09:59:43 +0200 Subject: [PATCH 01/17] add genetric metrics for external API --- pkg/metrics/http.go | 8 ++++++-- pkg/metrics/metrics.go | 13 ++++++------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/pkg/metrics/http.go b/pkg/metrics/http.go index 047032e6..10693936 100644 --- a/pkg/metrics/http.go +++ b/pkg/metrics/http.go @@ -3,6 +3,7 @@ package metrics import ( "fmt" "net/http" + "strconv" "strings" "time" @@ -33,8 +34,11 @@ func (rt *InstrumentedRoundTripper) RoundTrip(request *http.Request) (*http.Resp With(prometheus.Labels{operationLabel: operation}). Inc() - if response != nil && response.StatusCode >= http.StatusInternalServerError { - LoadBalancerErrorCount.Inc() + if response != nil && response.StatusCode >= 400 { + HTTPErrorCount.With(prometheus.Labels{ + "method": request.Method, + "code": strconv.Itoa(response.StatusCode), + }).Inc() } return response, err diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index dd21347f..aa6dca6c 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -19,13 +19,12 @@ var ( ConstLabels: nil, }, []string{operationLabel}) - LoadBalancerErrorCount = prometheus.NewCounter(prometheus.CounterOpts{ + HTTPErrorCount = prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: cloudProviderMetricPrefix, - Subsystem: loadBalancerSubSystem, - Name: "errors_total", - Help: "the number of server errors reported when calling the load balancer API", + Name: "http_errors_total", + Help: "Number of HTTP errors returned by external APIs", ConstLabels: nil, - }) + }, []string{"method", "code"}) LoadBalancerResponseTimeHistogram = prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: cloudProviderMetricPrefix, @@ -56,12 +55,12 @@ func (e *Exporter) Collect(metrics chan<- prometheus.Metric) { func (e *Exporter) describeCloudProvider(descs chan<- *prometheus.Desc) { LoadBalancerRequestCount.Describe(descs) - LoadBalancerErrorCount.Describe(descs) + HTTPErrorCount.Describe(descs) LoadBalancerResponseTimeHistogram.Describe(descs) } func (e *Exporter) collectCloudProvider(metrics chan<- prometheus.Metric) { LoadBalancerRequestCount.Collect(metrics) - LoadBalancerErrorCount.Collect(metrics) + HTTPErrorCount.Collect(metrics) LoadBalancerResponseTimeHistogram.Collect(metrics) } From be36e6f2c4d070e22c9dd151d7656309db8fde55 Mon Sep 17 00:00:00 2001 From: Aniruddha Basak Date: Mon, 15 Jun 2026 10:04:52 +0200 Subject: [PATCH 02/17] make fmt --- pkg/metrics/http.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/metrics/http.go b/pkg/metrics/http.go index 10693936..9c005146 100644 --- a/pkg/metrics/http.go +++ b/pkg/metrics/http.go @@ -36,8 +36,8 @@ func (rt *InstrumentedRoundTripper) RoundTrip(request *http.Request) (*http.Resp if response != nil && response.StatusCode >= 400 { HTTPErrorCount.With(prometheus.Labels{ - "method": request.Method, - "code": strconv.Itoa(response.StatusCode), + "method": request.Method, + "code": strconv.Itoa(response.StatusCode), }).Inc() } From 99d7a65bae89782337112cbd2de4647126992b1d Mon Sep 17 00:00:00 2001 From: Aniruddha Basak Date: Tue, 16 Jun 2026 16:38:00 +0200 Subject: [PATCH 03/17] add tests for the new http error metrics --- pkg/metrics/http_test.go | 65 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/pkg/metrics/http_test.go b/pkg/metrics/http_test.go index 1ffcc08f..9a12616e 100644 --- a/pkg/metrics/http_test.go +++ b/pkg/metrics/http_test.go @@ -2,10 +2,13 @@ package metrics import ( "net/http" + "net/http/httptest" "net/url" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" ) var _ = Describe("Metrics", func() { @@ -22,4 +25,66 @@ var _ = Describe("Metrics", func() { Entry("get load-balancers", "GET", "/v2/projects/6-a-4-8-c/regions/eu01/load-balancers", "get_load-balancers"), Entry("get load-balancers instance", "GET", "/v2/projects/6-a-4-8-c/regions/eu01/load-balancers/id", "get_load-balancers_instance"), ) + + Describe("InstrumentedRoundTripper", func() { + It("increments HTTPErrorCount for 400 responses", func() { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusBadRequest) + })) + defer server.Close() + + labels := prometheus.Labels{ + "method": http.MethodGet, + "code": "400", + } + before := testutil.ToFloat64(HTTPErrorCount.With(labels)) + + response, err := NewInstrumentedHTTPClient().Get(server.URL) + Expect(err).NotTo(HaveOccurred()) + defer response.Body.Close() + + after := testutil.ToFloat64(HTTPErrorCount.With(labels)) + Expect(after - before).To(Equal(float64(1))) + }) + + It("increments HTTPErrorCount for 500 responses", func() { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + })) + defer server.Close() + + labels := prometheus.Labels{ + "method": http.MethodPost, + "code": "500", + } + before := testutil.ToFloat64(HTTPErrorCount.With(labels)) + + response, err := NewInstrumentedHTTPClient().Post(server.URL, "application/json", nil) + Expect(err).NotTo(HaveOccurred()) + defer response.Body.Close() + + after := testutil.ToFloat64(HTTPErrorCount.With(labels)) + Expect(after - before).To(Equal(float64(1))) + }) + + It("does not increment HTTPErrorCount for successful responses", func() { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer server.Close() + + labels := prometheus.Labels{ + "method": http.MethodGet, + "code": "200", + } + before := testutil.ToFloat64(HTTPErrorCount.With(labels)) + + response, err := NewInstrumentedHTTPClient().Get(server.URL) + Expect(err).NotTo(HaveOccurred()) + defer response.Body.Close() + + after := testutil.ToFloat64(HTTPErrorCount.With(labels)) + Expect(after - before).To(Equal(float64(0))) + }) + }) }) From ee0176b49fdab3d9c69bebd4c0f24bf8d801d6f2 Mon Sep 17 00:00:00 2001 From: Aniruddha Basak Date: Tue, 16 Jun 2026 16:51:07 +0200 Subject: [PATCH 04/17] remove loadbalancer prefix from metrics names --- pkg/metrics/http.go | 4 ++-- pkg/metrics/metrics.go | 23 ++++++++++------------- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/pkg/metrics/http.go b/pkg/metrics/http.go index 9c005146..2782c7fc 100644 --- a/pkg/metrics/http.go +++ b/pkg/metrics/http.go @@ -27,10 +27,10 @@ func (rt *InstrumentedRoundTripper) RoundTrip(request *http.Request) (*http.Resp response, err := rt.base.RoundTrip(request) duration := time.Since(startTime) - LoadBalancerResponseTimeHistogram. + HTTPRequestDurationHistogram. With(prometheus.Labels{operationLabel: operation}). Observe(float64(duration.Seconds())) - LoadBalancerRequestCount. + HTTPRequestCount. With(prometheus.Labels{operationLabel: operation}). Inc() diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index aa6dca6c..963e3417 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -6,16 +6,14 @@ import ( const ( cloudProviderMetricPrefix = "cloud_provider_stackit" - loadBalancerSubSystem = "lb" operationLabel = "op" ) var ( - LoadBalancerRequestCount = prometheus.NewCounterVec(prometheus.CounterOpts{ + HTTPRequestCount = prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: cloudProviderMetricPrefix, - Subsystem: loadBalancerSubSystem, - Name: "requests_total", - Help: "the number of requests to the load balancer API", + Name: "http_requests_total", + Help: "The number of requests to external APIs", ConstLabels: nil, }, []string{operationLabel}) @@ -26,11 +24,10 @@ var ( ConstLabels: nil, }, []string{"method", "code"}) - LoadBalancerResponseTimeHistogram = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + HTTPRequestDurationHistogram = prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: cloudProviderMetricPrefix, - Subsystem: loadBalancerSubSystem, - Name: "request_duration_seconds", - Help: "the response times of the load balancer API", + Name: "http_request_duration_seconds", + Help: "The response times of external API requests", ConstLabels: nil, Buckets: nil, }, []string{operationLabel}) @@ -54,13 +51,13 @@ func (e *Exporter) Collect(metrics chan<- prometheus.Metric) { } func (e *Exporter) describeCloudProvider(descs chan<- *prometheus.Desc) { - LoadBalancerRequestCount.Describe(descs) + HTTPRequestCount.Describe(descs) HTTPErrorCount.Describe(descs) - LoadBalancerResponseTimeHistogram.Describe(descs) + HTTPRequestDurationHistogram.Describe(descs) } func (e *Exporter) collectCloudProvider(metrics chan<- prometheus.Metric) { - LoadBalancerRequestCount.Collect(metrics) + HTTPRequestCount.Collect(metrics) HTTPErrorCount.Collect(metrics) - LoadBalancerResponseTimeHistogram.Collect(metrics) + HTTPRequestDurationHistogram.Collect(metrics) } From 45ed26bd17e7f752835c0852cff017be5609d326 Mon Sep 17 00:00:00 2001 From: Aniruddha Basak Date: Tue, 16 Jun 2026 17:11:06 +0200 Subject: [PATCH 05/17] add tests for HTTPRequestDurationHistogram and HTTPRequestCount --- pkg/metrics/http_test.go | 49 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/pkg/metrics/http_test.go b/pkg/metrics/http_test.go index 9a12616e..93a81802 100644 --- a/pkg/metrics/http_test.go +++ b/pkg/metrics/http_test.go @@ -9,6 +9,7 @@ import ( . "github.com/onsi/gomega" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/testutil" + dto "github.com/prometheus/client_model/go" ) var _ = Describe("Metrics", func() { @@ -27,6 +28,44 @@ var _ = Describe("Metrics", func() { ) Describe("InstrumentedRoundTripper", func() { + It("increments HTTPRequestCount for responses", func() { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer server.Close() + + labels := prometheus.Labels{ + operationLabel: "get_request-count-test", + } + before := testutil.ToFloat64(HTTPRequestCount.With(labels)) + + response, err := NewInstrumentedHTTPClient().Get(server.URL + "/request-count-test") + Expect(err).NotTo(HaveOccurred()) + defer response.Body.Close() + + after := testutil.ToFloat64(HTTPRequestCount.With(labels)) + Expect(after - before).To(Equal(float64(1))) + }) + + It("records HTTPRequestDurationHistogram observations for responses", func() { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer server.Close() + + labels := prometheus.Labels{ + operationLabel: "get_request-duration-test", + } + before := histogramSampleCount(HTTPRequestDurationHistogram.With(labels)) + + response, err := NewInstrumentedHTTPClient().Get(server.URL + "/request-duration-test") + Expect(err).NotTo(HaveOccurred()) + defer response.Body.Close() + + after := histogramSampleCount(HTTPRequestDurationHistogram.With(labels)) + Expect(after - before).To(Equal(uint64(1))) + }) + It("increments HTTPErrorCount for 400 responses", func() { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusBadRequest) @@ -88,3 +127,13 @@ var _ = Describe("Metrics", func() { }) }) }) + +func histogramSampleCount(observer prometheus.Observer) uint64 { + metric, ok := observer.(prometheus.Metric) + Expect(ok).To(BeTrue()) + + dtoMetric := &dto.Metric{} + Expect(metric.Write(dtoMetric)).To(Succeed()) + + return dtoMetric.GetHistogram().GetSampleCount() +} From b718e18dffaeadd8543e05855a4680fc20f27600 Mon Sep 17 00:00:00 2001 From: Aniruddha Basak Date: Wed, 17 Jun 2026 09:40:47 +0200 Subject: [PATCH 06/17] add api label from client creation --- pkg/ccm/stackit.go | 12 ++++++++++-- pkg/metrics/http.go | 26 +++++++++++++++++++++----- pkg/metrics/http_test.go | 36 +++++++++++++++++++++++++++++++----- pkg/metrics/metrics.go | 10 +++++++--- 4 files changed, 69 insertions(+), 15 deletions(-) diff --git a/pkg/ccm/stackit.go b/pkg/ccm/stackit.go index 950bd933..03c6dcb9 100644 --- a/pkg/ccm/stackit.go +++ b/pkg/ccm/stackit.go @@ -117,8 +117,12 @@ func BuildObservability() (*MetricsRemoteWrite, error) { // NewCloudControllerManager creates a new instance of the stackit struct from a stackitconfig struct func NewCloudControllerManager(cfg *stackitconfig.CCMConfig, obs *MetricsRemoteWrite) (*CloudControllerManager, error) { + lbHTTPClient, err := metrics.NewInstrumentedHTTPClient(metrics.APINameLoadBalancer) + if err != nil { + return nil, fmt.Errorf("create load balancer metrics HTTP client: %w", err) + } lbOpts := []sdkconfig.ConfigurationOption{ - sdkconfig.WithHTTPClient(metrics.NewInstrumentedHTTPClient()), + sdkconfig.WithHTTPClient(lbHTTPClient), } if cfg.Global.APIEndpoints.LoadBalancerAPI != "" { @@ -138,8 +142,12 @@ func NewCloudControllerManager(cfg *stackitconfig.CCMConfig, obs *MetricsRemoteW return nil, fmt.Errorf("failed to create lb client: %v", err) } + iaasHTTPClient, err := metrics.NewInstrumentedHTTPClient(metrics.APINameIaaS) + if err != nil { + return nil, fmt.Errorf("create IaaS metrics HTTP client: %w", err) + } iaasOpts := []sdkconfig.ConfigurationOption{ - sdkconfig.WithHTTPClient(metrics.NewInstrumentedHTTPClient()), + sdkconfig.WithHTTPClient(iaasHTTPClient), } if cfg.Global.APIEndpoints.IaasAPI != "" { diff --git a/pkg/metrics/http.go b/pkg/metrics/http.go index 2782c7fc..1af6f787 100644 --- a/pkg/metrics/http.go +++ b/pkg/metrics/http.go @@ -1,6 +1,7 @@ package metrics import ( + "errors" "fmt" "net/http" "strconv" @@ -10,13 +11,21 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -func NewInstrumentedHTTPClient() *http.Client { - return &http.Client{ - Transport: &InstrumentedRoundTripper{http.DefaultTransport}, +func NewInstrumentedHTTPClient(api string) (*http.Client, error) { + if api == "" { + return nil, errors.New("api name is required") } + + return &http.Client{ + Transport: &InstrumentedRoundTripper{ + api: api, + base: http.DefaultTransport, + }, + }, nil } type InstrumentedRoundTripper struct { + api string base http.RoundTripper } @@ -28,14 +37,21 @@ func (rt *InstrumentedRoundTripper) RoundTrip(request *http.Request) (*http.Resp duration := time.Since(startTime) HTTPRequestDurationHistogram. - With(prometheus.Labels{operationLabel: operation}). + With(prometheus.Labels{ + apiLabel: rt.api, + operationLabel: operation, + }). Observe(float64(duration.Seconds())) HTTPRequestCount. - With(prometheus.Labels{operationLabel: operation}). + With(prometheus.Labels{ + apiLabel: rt.api, + operationLabel: operation, + }). Inc() if response != nil && response.StatusCode >= 400 { HTTPErrorCount.With(prometheus.Labels{ + apiLabel: rt.api, "method": request.Method, "code": strconv.Itoa(response.StatusCode), }).Inc() diff --git a/pkg/metrics/http_test.go b/pkg/metrics/http_test.go index 93a81802..0a6308bd 100644 --- a/pkg/metrics/http_test.go +++ b/pkg/metrics/http_test.go @@ -28,6 +28,12 @@ var _ = Describe("Metrics", func() { ) Describe("InstrumentedRoundTripper", func() { + It("requires an API name", func() { + client, err := NewInstrumentedHTTPClient("") + Expect(err).To(MatchError("api name is required")) + Expect(client).To(BeNil()) + }) + It("increments HTTPRequestCount for responses", func() { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) @@ -35,11 +41,15 @@ var _ = Describe("Metrics", func() { defer server.Close() labels := prometheus.Labels{ + apiLabel: "test", operationLabel: "get_request-count-test", } before := testutil.ToFloat64(HTTPRequestCount.With(labels)) - response, err := NewInstrumentedHTTPClient().Get(server.URL + "/request-count-test") + client, err := NewInstrumentedHTTPClient("test") + Expect(err).NotTo(HaveOccurred()) + + response, err := client.Get(server.URL + "/request-count-test") Expect(err).NotTo(HaveOccurred()) defer response.Body.Close() @@ -54,11 +64,15 @@ var _ = Describe("Metrics", func() { defer server.Close() labels := prometheus.Labels{ + apiLabel: "test", operationLabel: "get_request-duration-test", } before := histogramSampleCount(HTTPRequestDurationHistogram.With(labels)) - response, err := NewInstrumentedHTTPClient().Get(server.URL + "/request-duration-test") + client, err := NewInstrumentedHTTPClient("test") + Expect(err).NotTo(HaveOccurred()) + + response, err := client.Get(server.URL + "/request-duration-test") Expect(err).NotTo(HaveOccurred()) defer response.Body.Close() @@ -73,12 +87,16 @@ var _ = Describe("Metrics", func() { defer server.Close() labels := prometheus.Labels{ + apiLabel: "test", "method": http.MethodGet, "code": "400", } before := testutil.ToFloat64(HTTPErrorCount.With(labels)) - response, err := NewInstrumentedHTTPClient().Get(server.URL) + client, err := NewInstrumentedHTTPClient("test") + Expect(err).NotTo(HaveOccurred()) + + response, err := client.Get(server.URL) Expect(err).NotTo(HaveOccurred()) defer response.Body.Close() @@ -93,12 +111,16 @@ var _ = Describe("Metrics", func() { defer server.Close() labels := prometheus.Labels{ + apiLabel: "test", "method": http.MethodPost, "code": "500", } before := testutil.ToFloat64(HTTPErrorCount.With(labels)) - response, err := NewInstrumentedHTTPClient().Post(server.URL, "application/json", nil) + client, err := NewInstrumentedHTTPClient("test") + Expect(err).NotTo(HaveOccurred()) + + response, err := client.Post(server.URL, "application/json", nil) Expect(err).NotTo(HaveOccurred()) defer response.Body.Close() @@ -113,12 +135,16 @@ var _ = Describe("Metrics", func() { defer server.Close() labels := prometheus.Labels{ + apiLabel: "test", "method": http.MethodGet, "code": "200", } before := testutil.ToFloat64(HTTPErrorCount.With(labels)) - response, err := NewInstrumentedHTTPClient().Get(server.URL) + client, err := NewInstrumentedHTTPClient("test") + Expect(err).NotTo(HaveOccurred()) + + response, err := client.Get(server.URL) Expect(err).NotTo(HaveOccurred()) defer response.Body.Close() diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 963e3417..6eb20520 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -6,7 +6,11 @@ import ( const ( cloudProviderMetricPrefix = "cloud_provider_stackit" + apiLabel = "api" operationLabel = "op" + + APINameLoadBalancer = "loadbalancer" + APINameIaaS = "iaas" ) var ( @@ -15,14 +19,14 @@ var ( Name: "http_requests_total", Help: "The number of requests to external APIs", ConstLabels: nil, - }, []string{operationLabel}) + }, []string{apiLabel, operationLabel}) HTTPErrorCount = prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: cloudProviderMetricPrefix, Name: "http_errors_total", Help: "Number of HTTP errors returned by external APIs", ConstLabels: nil, - }, []string{"method", "code"}) + }, []string{apiLabel, "method", "code"}) HTTPRequestDurationHistogram = prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: cloudProviderMetricPrefix, @@ -30,7 +34,7 @@ var ( Help: "The response times of external API requests", ConstLabels: nil, Buckets: nil, - }, []string{operationLabel}) + }, []string{apiLabel, operationLabel}) ) type Exporter struct { From b56b30a28beef1029dd8947eb1e133b27253a057 Mon Sep 17 00:00:00 2001 From: Aniruddha Basak Date: Wed, 17 Jun 2026 09:50:25 +0200 Subject: [PATCH 07/17] make mod tidy --- go.mod | 2 +- pkg/metrics/http.go | 6 +++--- pkg/metrics/http_test.go | 28 ++++++++++++++-------------- pkg/metrics/metrics.go | 4 +++- 4 files changed, 21 insertions(+), 19 deletions(-) diff --git a/go.mod b/go.mod index 42e3844a..16b93b19 100644 --- a/go.mod +++ b/go.mod @@ -11,6 +11,7 @@ require ( github.com/onsi/ginkgo/v2 v2.32.0 github.com/onsi/gomega v1.42.1 github.com/prometheus/client_golang v1.23.2 + github.com/prometheus/client_model v0.6.2 github.com/spf13/cobra v1.10.2 github.com/spf13/pflag v1.0.10 github.com/stackitcloud/stackit-sdk-go/core v0.26.0 @@ -88,7 +89,6 @@ require ( github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect - github.com/prometheus/client_model v0.6.2 // indirect github.com/prometheus/common v0.67.5 // indirect github.com/prometheus/procfs v0.19.2 // indirect github.com/stackitcloud/stackit-sdk-go/services/resourcemanager v0.24.0 // indirect diff --git a/pkg/metrics/http.go b/pkg/metrics/http.go index 1af6f787..27f7028f 100644 --- a/pkg/metrics/http.go +++ b/pkg/metrics/http.go @@ -51,9 +51,9 @@ func (rt *InstrumentedRoundTripper) RoundTrip(request *http.Request) (*http.Resp if response != nil && response.StatusCode >= 400 { HTTPErrorCount.With(prometheus.Labels{ - apiLabel: rt.api, - "method": request.Method, - "code": strconv.Itoa(response.StatusCode), + apiLabel: rt.api, + methodLabel: request.Method, + codeLabel: strconv.Itoa(response.StatusCode), }).Inc() } diff --git a/pkg/metrics/http_test.go b/pkg/metrics/http_test.go index 0a6308bd..62f3bdb2 100644 --- a/pkg/metrics/http_test.go +++ b/pkg/metrics/http_test.go @@ -35,7 +35,7 @@ var _ = Describe("Metrics", func() { }) It("increments HTTPRequestCount for responses", func() { - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) })) defer server.Close() @@ -58,7 +58,7 @@ var _ = Describe("Metrics", func() { }) It("records HTTPRequestDurationHistogram observations for responses", func() { - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) })) defer server.Close() @@ -81,15 +81,15 @@ var _ = Describe("Metrics", func() { }) It("increments HTTPErrorCount for 400 responses", func() { - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusBadRequest) })) defer server.Close() labels := prometheus.Labels{ - apiLabel: "test", - "method": http.MethodGet, - "code": "400", + apiLabel: "test", + methodLabel: http.MethodGet, + codeLabel: "400", } before := testutil.ToFloat64(HTTPErrorCount.With(labels)) @@ -105,15 +105,15 @@ var _ = Describe("Metrics", func() { }) It("increments HTTPErrorCount for 500 responses", func() { - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusInternalServerError) })) defer server.Close() labels := prometheus.Labels{ - apiLabel: "test", - "method": http.MethodPost, - "code": "500", + apiLabel: "test", + methodLabel: http.MethodPost, + codeLabel: "500", } before := testutil.ToFloat64(HTTPErrorCount.With(labels)) @@ -129,15 +129,15 @@ var _ = Describe("Metrics", func() { }) It("does not increment HTTPErrorCount for successful responses", func() { - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) })) defer server.Close() labels := prometheus.Labels{ - apiLabel: "test", - "method": http.MethodGet, - "code": "200", + apiLabel: "test", + methodLabel: http.MethodGet, + codeLabel: "200", } before := testutil.ToFloat64(HTTPErrorCount.With(labels)) diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 6eb20520..f15ef09a 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -7,6 +7,8 @@ import ( const ( cloudProviderMetricPrefix = "cloud_provider_stackit" apiLabel = "api" + methodLabel = "method" + codeLabel = "code" operationLabel = "op" APINameLoadBalancer = "loadbalancer" @@ -26,7 +28,7 @@ var ( Name: "http_errors_total", Help: "Number of HTTP errors returned by external APIs", ConstLabels: nil, - }, []string{apiLabel, "method", "code"}) + }, []string{apiLabel, methodLabel, codeLabel}) HTTPRequestDurationHistogram = prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: cloudProviderMetricPrefix, From fcbef024682898aefb531afa3dd420ecbbafacbf Mon Sep 17 00:00:00 2001 From: Aniruddha Basak Date: Wed, 17 Jun 2026 10:35:30 +0200 Subject: [PATCH 08/17] add metrics for csi --- cmd/stackit-csi-plugin/main.go | 12 +++++++++++- pkg/stackit/client.go | 3 ++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/cmd/stackit-csi-plugin/main.go b/cmd/stackit-csi-plugin/main.go index 6a06fecc..fc976b99 100644 --- a/cmd/stackit-csi-plugin/main.go +++ b/cmd/stackit-csi-plugin/main.go @@ -6,6 +6,8 @@ import ( "github.com/spf13/cobra" "github.com/spf13/pflag" + "github.com/stackitcloud/cloud-provider-stackit/pkg/metrics" + sdkconfig "github.com/stackitcloud/stackit-sdk-go/core/config" "k8s.io/component-base/cli" "k8s.io/klog/v2" @@ -109,7 +111,15 @@ func handle() { klog.Fatal(err) } - iaasClient, err := stackit.CreateIaaSClient(&cfg) + iaasHTTPClient, err := metrics.NewInstrumentedHTTPClient(metrics.APINameIaaS) + if err != nil { + klog.Fatalf("create IaaS metrics HTTP client: %v", err) + } + iaasOpts := []sdkconfig.ConfigurationOption{ + sdkconfig.WithHTTPClient(iaasHTTPClient), + } + + iaasClient, err := stackit.CreateIaaSClient(&cfg, iaasOpts...) if err != nil { klog.Fatalf("Failed to create IaaS client: %v", err) } diff --git a/pkg/stackit/client.go b/pkg/stackit/client.go index 12960832..f676b6d4 100644 --- a/pkg/stackit/client.go +++ b/pkg/stackit/client.go @@ -171,7 +171,7 @@ func CreateSTACKITProvider(client iaas.DefaultAPI, cfg *stackitconfig.CSIConfig) return instance, nil } -func CreateIaaSClient(cfg *stackitconfig.CSIConfig) (iaas.DefaultAPI, error) { +func CreateIaaSClient(cfg *stackitconfig.CSIConfig, clientOpts ...sdkconfig.ConfigurationOption) (iaas.DefaultAPI, error) { var userAgent []string var opts []sdkconfig.ConfigurationOption userAgent = append(userAgent, fmt.Sprintf("%s/%s", "block-storage-csi-driver", version.Version)) @@ -186,6 +186,7 @@ func CreateIaaSClient(cfg *stackitconfig.CSIConfig) (iaas.DefaultAPI, error) { } opts = append(opts, sdkconfig.WithUserAgent(strings.Join(userAgent, " "))) + opts = append(opts, clientOpts...) client, err := iaas.NewAPIClient(opts...) if err != nil { From bd0311b80e7e1e104838ececc21dbdff939aaea1 Mon Sep 17 00:00:00 2001 From: Aniruddha Basak Date: Wed, 17 Jun 2026 16:00:00 +0200 Subject: [PATCH 09/17] call metric server from csi/main.go --- cmd/stackit-csi-plugin/main.go | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/cmd/stackit-csi-plugin/main.go b/cmd/stackit-csi-plugin/main.go index fc976b99..c3bf6045 100644 --- a/cmd/stackit-csi-plugin/main.go +++ b/cmd/stackit-csi-plugin/main.go @@ -1,8 +1,11 @@ package main import ( + "context" "fmt" "os" + "os/signal" + "syscall" "github.com/spf13/cobra" "github.com/spf13/pflag" @@ -23,7 +26,7 @@ var ( endpoint string cloudConfig string cluster string - httpEndpoint string + metricsAddress string provideControllerService bool provideNodeService bool legacyStorageMode bool @@ -35,7 +38,10 @@ func main() { Use: "stackit-csi-plugin", Short: "STACKIT block-storage CSI plugin", Run: func(_ *cobra.Command, _ []string) { - handle() + ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGINT) + defer cancel() + + handle(ctx) }, PersistentPreRunE: func(cmd *cobra.Command, _ []string) error { f := cmd.Flags() @@ -68,8 +74,8 @@ func main() { cmd.Flags().StringVar(&cloudConfig, "cloud-config", "", "CSI driver cloud config. This option can be given multiple times") cmd.PersistentFlags().StringVar(&cluster, "cluster", "", "The identifier of the cluster that the plugin is running in.") - cmd.PersistentFlags().StringVar(&httpEndpoint, "http-endpoint", "", - "The TCP network address where the HTTP server for providing metrics for diagnostics, will listen (example: `:8080`)."+ + cmd.PersistentFlags().StringVar(&metricsAddress, "metrics-address", "", + "The TCP network address where the HTTP server for providing metrics for diagnostics, will listen (example: `:9090`)."+ "The default is empty string, which means the server is disabled.") cmd.PersistentFlags().BoolVar(&provideControllerService, "provide-controller-service", true, @@ -86,7 +92,14 @@ func main() { os.Exit(code) } -func handle() { +func handle(ctx context.Context) { + if metricsAddress != "" { + go func() { + if err := metrics.Run(ctx, metricsAddress); err != nil { + klog.Fatalf("Run metrics returned an error: %v", err) + } + }() + } // Initialize cloud driverOpts := &blockstorage.DriverOpts{ Endpoint: endpoint, From 6a613936181641e2851478e596608d0a063be576 Mon Sep 17 00:00:00 2001 From: Aniruddha Basak Date: Thu, 18 Jun 2026 11:08:07 +0200 Subject: [PATCH 10/17] create prometheus metrics exporter --- cmd/stackit-csi-plugin/main.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cmd/stackit-csi-plugin/main.go b/cmd/stackit-csi-plugin/main.go index c3bf6045..e0ca99ce 100644 --- a/cmd/stackit-csi-plugin/main.go +++ b/cmd/stackit-csi-plugin/main.go @@ -7,6 +7,7 @@ import ( "os/signal" "syscall" + "github.com/prometheus/client_golang/prometheus" "github.com/spf13/cobra" "github.com/spf13/pflag" "github.com/stackitcloud/cloud-provider-stackit/pkg/metrics" @@ -75,7 +76,7 @@ func main() { cmd.PersistentFlags().StringVar(&cluster, "cluster", "", "The identifier of the cluster that the plugin is running in.") cmd.PersistentFlags().StringVar(&metricsAddress, "metrics-address", "", - "The TCP network address where the HTTP server for providing metrics for diagnostics, will listen (example: `:9090`)."+ + "The TCP network address where the HTTP server for providing metrics for diagnostics, will listen (example: `:8080`)."+ "The default is empty string, which means the server is disabled.") cmd.PersistentFlags().BoolVar(&provideControllerService, "provide-controller-service", true, @@ -94,6 +95,8 @@ func main() { func handle(ctx context.Context) { if metricsAddress != "" { + metricsExporter := metrics.NewExporter() + prometheus.MustRegister(metricsExporter) go func() { if err := metrics.Run(ctx, metricsAddress); err != nil { klog.Fatalf("Run metrics returned an error: %v", err) From 71321c4067e60c06cf60eb890c82c3c9d76b953f Mon Sep 17 00:00:00 2001 From: Aniruddha Basak Date: Wed, 24 Jun 2026 15:09:45 +0200 Subject: [PATCH 11/17] remove api name required check and update tests --- pkg/metrics/http.go | 5 --- pkg/metrics/http_test.go | 72 +++++++++++++++++++++------------------- 2 files changed, 38 insertions(+), 39 deletions(-) diff --git a/pkg/metrics/http.go b/pkg/metrics/http.go index 27f7028f..7fa6cf02 100644 --- a/pkg/metrics/http.go +++ b/pkg/metrics/http.go @@ -1,7 +1,6 @@ package metrics import ( - "errors" "fmt" "net/http" "strconv" @@ -12,10 +11,6 @@ import ( ) func NewInstrumentedHTTPClient(api string) (*http.Client, error) { - if api == "" { - return nil, errors.New("api name is required") - } - return &http.Client{ Transport: &InstrumentedRoundTripper{ api: api, diff --git a/pkg/metrics/http_test.go b/pkg/metrics/http_test.go index 62f3bdb2..4cbd4c18 100644 --- a/pkg/metrics/http_test.go +++ b/pkg/metrics/http_test.go @@ -28,12 +28,6 @@ var _ = Describe("Metrics", func() { ) Describe("InstrumentedRoundTripper", func() { - It("requires an API name", func() { - client, err := NewInstrumentedHTTPClient("") - Expect(err).To(MatchError("api name is required")) - Expect(client).To(BeNil()) - }) - It("increments HTTPRequestCount for responses", func() { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) @@ -80,52 +74,62 @@ var _ = Describe("Metrics", func() { Expect(after - before).To(Equal(uint64(1))) }) - It("increments HTTPErrorCount for 400 responses", func() { - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + It("increments HTTPErrorCount for error responses (400, 404, 500)", func() { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodPost { + w.WriteHeader(http.StatusInternalServerError) + return + } + if r.URL.Path == "/404" { + w.WriteHeader(http.StatusNotFound) + return + } w.WriteHeader(http.StatusBadRequest) })) defer server.Close() - labels := prometheus.Labels{ + labels400 := prometheus.Labels{ apiLabel: "test", methodLabel: http.MethodGet, codeLabel: "400", } - before := testutil.ToFloat64(HTTPErrorCount.With(labels)) - - client, err := NewInstrumentedHTTPClient("test") - Expect(err).NotTo(HaveOccurred()) - - response, err := client.Get(server.URL) - Expect(err).NotTo(HaveOccurred()) - defer response.Body.Close() - - after := testutil.ToFloat64(HTTPErrorCount.With(labels)) - Expect(after - before).To(Equal(float64(1))) - }) - - It("increments HTTPErrorCount for 500 responses", func() { - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - w.WriteHeader(http.StatusInternalServerError) - })) - defer server.Close() - - labels := prometheus.Labels{ + labels404 := prometheus.Labels{ + apiLabel: "test", + methodLabel: http.MethodGet, + codeLabel: "404", + } + labels500 := prometheus.Labels{ apiLabel: "test", methodLabel: http.MethodPost, codeLabel: "500", } - before := testutil.ToFloat64(HTTPErrorCount.With(labels)) + before400 := testutil.ToFloat64(HTTPErrorCount.With(labels400)) + before404 := testutil.ToFloat64(HTTPErrorCount.With(labels404)) + before500 := testutil.ToFloat64(HTTPErrorCount.With(labels500)) client, err := NewInstrumentedHTTPClient("test") Expect(err).NotTo(HaveOccurred()) - response, err := client.Post(server.URL, "application/json", nil) + response1, err := client.Get(server.URL) Expect(err).NotTo(HaveOccurred()) - defer response.Body.Close() + defer response1.Body.Close() - after := testutil.ToFloat64(HTTPErrorCount.With(labels)) - Expect(after - before).To(Equal(float64(1))) + response2, err := client.Get(server.URL + "/404") + Expect(err).NotTo(HaveOccurred()) + defer response2.Body.Close() + + response3, err := client.Post(server.URL, "application/json", nil) + Expect(err).NotTo(HaveOccurred()) + defer response3.Body.Close() + + after400 := testutil.ToFloat64(HTTPErrorCount.With(labels400)) + after404 := testutil.ToFloat64(HTTPErrorCount.With(labels404)) + after500 := testutil.ToFloat64(HTTPErrorCount.With(labels500)) + + Expect(after400 - before400).To(Equal(float64(1))) + Expect(after404 - before404).To(Equal(float64(1))) + Expect(after500 - before500).To(Equal(float64(1))) + Expect((after400 - before400) + (after404 - before404) + (after500 - before500)).To(Equal(float64(3))) }) It("does not increment HTTPErrorCount for successful responses", func() { From 3c50421a71190e3683ebbc46fd171af41e9ddc05 Mon Sep 17 00:00:00 2001 From: Aniruddha Basak Date: Thu, 25 Jun 2026 10:11:46 +0200 Subject: [PATCH 12/17] remove error from func --- cmd/stackit-csi-plugin/main.go | 5 +---- pkg/ccm/stackit.go | 10 ++-------- pkg/metrics/http.go | 4 ++-- pkg/metrics/http_test.go | 12 ++++-------- 4 files changed, 9 insertions(+), 22 deletions(-) diff --git a/cmd/stackit-csi-plugin/main.go b/cmd/stackit-csi-plugin/main.go index e0ca99ce..8330499b 100644 --- a/cmd/stackit-csi-plugin/main.go +++ b/cmd/stackit-csi-plugin/main.go @@ -127,10 +127,7 @@ func handle(ctx context.Context) { klog.Fatal(err) } - iaasHTTPClient, err := metrics.NewInstrumentedHTTPClient(metrics.APINameIaaS) - if err != nil { - klog.Fatalf("create IaaS metrics HTTP client: %v", err) - } + iaasHTTPClient := metrics.NewInstrumentedHTTPClient(metrics.APINameIaaS) iaasOpts := []sdkconfig.ConfigurationOption{ sdkconfig.WithHTTPClient(iaasHTTPClient), } diff --git a/pkg/ccm/stackit.go b/pkg/ccm/stackit.go index 03c6dcb9..e4453f73 100644 --- a/pkg/ccm/stackit.go +++ b/pkg/ccm/stackit.go @@ -117,10 +117,7 @@ func BuildObservability() (*MetricsRemoteWrite, error) { // NewCloudControllerManager creates a new instance of the stackit struct from a stackitconfig struct func NewCloudControllerManager(cfg *stackitconfig.CCMConfig, obs *MetricsRemoteWrite) (*CloudControllerManager, error) { - lbHTTPClient, err := metrics.NewInstrumentedHTTPClient(metrics.APINameLoadBalancer) - if err != nil { - return nil, fmt.Errorf("create load balancer metrics HTTP client: %w", err) - } + lbHTTPClient := metrics.NewInstrumentedHTTPClient(metrics.APINameLoadBalancer) lbOpts := []sdkconfig.ConfigurationOption{ sdkconfig.WithHTTPClient(lbHTTPClient), } @@ -142,10 +139,7 @@ func NewCloudControllerManager(cfg *stackitconfig.CCMConfig, obs *MetricsRemoteW return nil, fmt.Errorf("failed to create lb client: %v", err) } - iaasHTTPClient, err := metrics.NewInstrumentedHTTPClient(metrics.APINameIaaS) - if err != nil { - return nil, fmt.Errorf("create IaaS metrics HTTP client: %w", err) - } + iaasHTTPClient := metrics.NewInstrumentedHTTPClient(metrics.APINameIaaS) iaasOpts := []sdkconfig.ConfigurationOption{ sdkconfig.WithHTTPClient(iaasHTTPClient), } diff --git a/pkg/metrics/http.go b/pkg/metrics/http.go index 7fa6cf02..567314e2 100644 --- a/pkg/metrics/http.go +++ b/pkg/metrics/http.go @@ -10,13 +10,13 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -func NewInstrumentedHTTPClient(api string) (*http.Client, error) { +func NewInstrumentedHTTPClient(api string) *http.Client { return &http.Client{ Transport: &InstrumentedRoundTripper{ api: api, base: http.DefaultTransport, }, - }, nil + } } type InstrumentedRoundTripper struct { diff --git a/pkg/metrics/http_test.go b/pkg/metrics/http_test.go index 4cbd4c18..9a42eb61 100644 --- a/pkg/metrics/http_test.go +++ b/pkg/metrics/http_test.go @@ -40,8 +40,7 @@ var _ = Describe("Metrics", func() { } before := testutil.ToFloat64(HTTPRequestCount.With(labels)) - client, err := NewInstrumentedHTTPClient("test") - Expect(err).NotTo(HaveOccurred()) + client := NewInstrumentedHTTPClient("test") response, err := client.Get(server.URL + "/request-count-test") Expect(err).NotTo(HaveOccurred()) @@ -63,8 +62,7 @@ var _ = Describe("Metrics", func() { } before := histogramSampleCount(HTTPRequestDurationHistogram.With(labels)) - client, err := NewInstrumentedHTTPClient("test") - Expect(err).NotTo(HaveOccurred()) + client := NewInstrumentedHTTPClient("test") response, err := client.Get(server.URL + "/request-duration-test") Expect(err).NotTo(HaveOccurred()) @@ -107,8 +105,7 @@ var _ = Describe("Metrics", func() { before404 := testutil.ToFloat64(HTTPErrorCount.With(labels404)) before500 := testutil.ToFloat64(HTTPErrorCount.With(labels500)) - client, err := NewInstrumentedHTTPClient("test") - Expect(err).NotTo(HaveOccurred()) + client := NewInstrumentedHTTPClient("test") response1, err := client.Get(server.URL) Expect(err).NotTo(HaveOccurred()) @@ -145,8 +142,7 @@ var _ = Describe("Metrics", func() { } before := testutil.ToFloat64(HTTPErrorCount.With(labels)) - client, err := NewInstrumentedHTTPClient("test") - Expect(err).NotTo(HaveOccurred()) + client := NewInstrumentedHTTPClient("test") response, err := client.Get(server.URL) Expect(err).NotTo(HaveOccurred()) From a10ee877520ae91e00c6c28dabbf87e5878eead0 Mon Sep 17 00:00:00 2001 From: Aniruddha Basak Date: Wed, 1 Jul 2026 10:46:01 +0200 Subject: [PATCH 13/17] make status_code --- pkg/metrics/metrics.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index f15ef09a..f3be3e71 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -8,7 +8,7 @@ const ( cloudProviderMetricPrefix = "cloud_provider_stackit" apiLabel = "api" methodLabel = "method" - codeLabel = "code" + codeLabel = "status_code" operationLabel = "op" APINameLoadBalancer = "loadbalancer" From 9119506c70c65db901efe94596c9ac35b3cfc593 Mon Sep 17 00:00:00 2001 From: Aniruddha Basak Date: Wed, 1 Jul 2026 14:17:34 +0200 Subject: [PATCH 14/17] make add more labels in new metrics --- pkg/metrics/http.go | 11 ++++++++--- pkg/metrics/metrics.go | 6 +++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/pkg/metrics/http.go b/pkg/metrics/http.go index 567314e2..bb7f9682 100644 --- a/pkg/metrics/http.go +++ b/pkg/metrics/http.go @@ -34,21 +34,26 @@ func (rt *InstrumentedRoundTripper) RoundTrip(request *http.Request) (*http.Resp HTTPRequestDurationHistogram. With(prometheus.Labels{ apiLabel: rt.api, + methodLabel: request.Method, operationLabel: operation, + codeLabel: strconv.Itoa(response.StatusCode), }). Observe(float64(duration.Seconds())) HTTPRequestCount. With(prometheus.Labels{ apiLabel: rt.api, + methodLabel: request.Method, operationLabel: operation, + codeLabel: strconv.Itoa(response.StatusCode), }). Inc() if response != nil && response.StatusCode >= 400 { HTTPErrorCount.With(prometheus.Labels{ - apiLabel: rt.api, - methodLabel: request.Method, - codeLabel: strconv.Itoa(response.StatusCode), + apiLabel: rt.api, + methodLabel: request.Method, + operationLabel: operation, + codeLabel: strconv.Itoa(response.StatusCode), }).Inc() } diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index f3be3e71..9655c5e0 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -21,14 +21,14 @@ var ( Name: "http_requests_total", Help: "The number of requests to external APIs", ConstLabels: nil, - }, []string{apiLabel, operationLabel}) + }, []string{apiLabel, methodLabel, operationLabel, codeLabel}) HTTPErrorCount = prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: cloudProviderMetricPrefix, Name: "http_errors_total", Help: "Number of HTTP errors returned by external APIs", ConstLabels: nil, - }, []string{apiLabel, methodLabel, codeLabel}) + }, []string{apiLabel, methodLabel, operationLabel, codeLabel}) HTTPRequestDurationHistogram = prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: cloudProviderMetricPrefix, @@ -36,7 +36,7 @@ var ( Help: "The response times of external API requests", ConstLabels: nil, Buckets: nil, - }, []string{apiLabel, operationLabel}) + }, []string{apiLabel, methodLabel, operationLabel, codeLabel}) ) type Exporter struct { From 5e4328c8c2f7a4633e4ffe7398f7dd6d8e351ee5 Mon Sep 17 00:00:00 2001 From: Aniruddha Basak Date: Wed, 1 Jul 2026 14:31:52 +0200 Subject: [PATCH 15/17] make lint --- pkg/metrics/http.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pkg/metrics/http.go b/pkg/metrics/http.go index bb7f9682..b85b9484 100644 --- a/pkg/metrics/http.go +++ b/pkg/metrics/http.go @@ -31,12 +31,17 @@ func (rt *InstrumentedRoundTripper) RoundTrip(request *http.Request) (*http.Resp response, err := rt.base.RoundTrip(request) duration := time.Since(startTime) + statusCode := "" + if response != nil { + statusCode = strconv.Itoa(response.StatusCode) + } + HTTPRequestDurationHistogram. With(prometheus.Labels{ apiLabel: rt.api, methodLabel: request.Method, operationLabel: operation, - codeLabel: strconv.Itoa(response.StatusCode), + codeLabel: statusCode, }). Observe(float64(duration.Seconds())) HTTPRequestCount. @@ -44,7 +49,7 @@ func (rt *InstrumentedRoundTripper) RoundTrip(request *http.Request) (*http.Resp apiLabel: rt.api, methodLabel: request.Method, operationLabel: operation, - codeLabel: strconv.Itoa(response.StatusCode), + codeLabel: statusCode, }). Inc() @@ -53,7 +58,7 @@ func (rt *InstrumentedRoundTripper) RoundTrip(request *http.Request) (*http.Resp apiLabel: rt.api, methodLabel: request.Method, operationLabel: operation, - codeLabel: strconv.Itoa(response.StatusCode), + codeLabel: statusCode, }).Inc() } From 9342a398a421423baf4b9399e76c955ec29e0e90 Mon Sep 17 00:00:00 2001 From: Aniruddha Basak Date: Wed, 1 Jul 2026 14:44:00 +0200 Subject: [PATCH 16/17] fix test --- pkg/metrics/http_test.go | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/pkg/metrics/http_test.go b/pkg/metrics/http_test.go index 9a42eb61..1bf3fccc 100644 --- a/pkg/metrics/http_test.go +++ b/pkg/metrics/http_test.go @@ -36,7 +36,9 @@ var _ = Describe("Metrics", func() { labels := prometheus.Labels{ apiLabel: "test", + methodLabel: "GET", operationLabel: "get_request-count-test", + codeLabel: "200", } before := testutil.ToFloat64(HTTPRequestCount.With(labels)) @@ -58,7 +60,9 @@ var _ = Describe("Metrics", func() { labels := prometheus.Labels{ apiLabel: "test", + methodLabel: "GET", operationLabel: "get_request-duration-test", + codeLabel: "200", } before := histogramSampleCount(HTTPRequestDurationHistogram.With(labels)) @@ -87,19 +91,22 @@ var _ = Describe("Metrics", func() { defer server.Close() labels400 := prometheus.Labels{ - apiLabel: "test", - methodLabel: http.MethodGet, - codeLabel: "400", + apiLabel: "test", + methodLabel: http.MethodGet, + operationLabel: "get_", + codeLabel: "400", } labels404 := prometheus.Labels{ - apiLabel: "test", - methodLabel: http.MethodGet, - codeLabel: "404", + apiLabel: "test", + methodLabel: http.MethodGet, + operationLabel: "get_404", + codeLabel: "404", } labels500 := prometheus.Labels{ - apiLabel: "test", - methodLabel: http.MethodPost, - codeLabel: "500", + apiLabel: "test", + methodLabel: http.MethodPost, + operationLabel: "post_", + codeLabel: "500", } before400 := testutil.ToFloat64(HTTPErrorCount.With(labels400)) before404 := testutil.ToFloat64(HTTPErrorCount.With(labels404)) @@ -136,9 +143,10 @@ var _ = Describe("Metrics", func() { defer server.Close() labels := prometheus.Labels{ - apiLabel: "test", - methodLabel: http.MethodGet, - codeLabel: "200", + apiLabel: "test", + methodLabel: http.MethodGet, + operationLabel: "get_", + codeLabel: "200", } before := testutil.ToFloat64(HTTPErrorCount.With(labels)) From 430e9c6510e37755ef7b5a789da75b00c43a77ec Mon Sep 17 00:00:00 2001 From: Aniruddha Basak Date: Thu, 2 Jul 2026 10:29:23 +0200 Subject: [PATCH 17/17] optimize code --- pkg/metrics/http.go | 41 ++++++++++++++++------------------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/pkg/metrics/http.go b/pkg/metrics/http.go index b85b9484..8fba2dc2 100644 --- a/pkg/metrics/http.go +++ b/pkg/metrics/http.go @@ -31,35 +31,26 @@ func (rt *InstrumentedRoundTripper) RoundTrip(request *http.Request) (*http.Resp response, err := rt.base.RoundTrip(request) duration := time.Since(startTime) - statusCode := "" + statusCode := "network_error" if response != nil { statusCode = strconv.Itoa(response.StatusCode) } - HTTPRequestDurationHistogram. - With(prometheus.Labels{ - apiLabel: rt.api, - methodLabel: request.Method, - operationLabel: operation, - codeLabel: statusCode, - }). - Observe(float64(duration.Seconds())) - HTTPRequestCount. - With(prometheus.Labels{ - apiLabel: rt.api, - methodLabel: request.Method, - operationLabel: operation, - codeLabel: statusCode, - }). - Inc() - - if response != nil && response.StatusCode >= 400 { - HTTPErrorCount.With(prometheus.Labels{ - apiLabel: rt.api, - methodLabel: request.Method, - operationLabel: operation, - codeLabel: statusCode, - }).Inc() + labels := prometheus.Labels{ + apiLabel: rt.api, + methodLabel: request.Method, + operationLabel: operation, + codeLabel: statusCode, + } + + HTTPRequestDurationHistogram.With(labels).Observe(duration.Seconds()) + HTTPRequestCount.With(labels).Inc() + + isHTTPError := response != nil && response.StatusCode >= 400 + isNetworkError := err != nil + + if isHTTPError || isNetworkError { + HTTPErrorCount.With(labels).Inc() } return response, err