Fix Grafana dashboards for Broker with honorLabels, remove unnecessary *_created metrics and improve docs (#593)

* Drop _created metrics for broker and proxy

* Enable all metrics by default for broker

* change default dashboard

* Remove messy dashboards

* Enable default dashboards in Grafana

* Add testing values with more aggressive disk cleanup

* Add VictoriaMetrics debugging instructions

* Set honorLabels to true

* Document disabling monitoring

* Set password in testing values

* Fix linting issue detected by kubeconform
This commit is contained in:
Lari Hotari 2025-03-11 01:46:28 +02:00 committed by GitHub
parent e6f05809bd
commit 637cf11d1a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 243 additions and 14 deletions

View File

@ -280,23 +280,53 @@ You can also checkout out the example values file for different deployments.
In order to disable the victoria-metrics-k8s-stack, you can add the following to your `values.yaml`.
Victoria Metrics components can also be disabled and enabled individually if you only need specific monitoring features.
Please refer to the default [`values.yaml`](charts/pulsar/values.yaml).
```yaml
# disable VictoriaMetrics and related components
victoria-metrics-k8s-stack:
enabled: false
victoria-metrics-operator:
enabled: false
vmsingle:
enabled: false
vmagent:
enabled: false
kube-state-metrics:
enabled: false
prometheus-node-exporter:
enabled: false
grafana:
enabled: false
Additionally, you'll need to set each component's `podMonitor` property to `false`.
```yaml
# disable pod monitors
autorecovery:
podMonitor:
enabled: false
bookkeeper:
podMonitor:
enabled: false
oxia:
server:
podMonitor:
enabled: false
coordinator:
podMonitor:
enabled: false
broker:
podMonitor:
enabled: false
proxy:
podMonitor:
enabled: false
zookeeper:
podMonitor:
enabled: false
```
Additionally, you'll need to set each component's `podMonitor` property to `false`. This is shown in some [examples](./examples) and is
verified in some [tests](./.ci/clusters).
This is shown in some [examples/values-disable-monitoring.yaml](examples/values-disable-monitoring.yaml).
## Pulsar Manager
@ -497,6 +527,36 @@ We've done our best to make these charts as seamless as possible,
occasionally troubles do surface outside of our control. We've collected
tips and tricks for troubleshooting common issues. Please examine these first before raising an [issue](https://github.com/apache/pulsar-helm-chart/issues/new/choose), and feel free to add to them by raising a [Pull Request](https://github.com/apache/pulsar-helm-chart/compare)!
### VictoriaMetrics Troubleshooting
In example commands, k8s is namespace `pulsar` replace with your deployment namespace.
#### VictoriaMetrics Web UI
Connecting to `vmsingle` pod for web UI.
```shell
kubectl port-forward -n pulsar $(kubectl get pods -n pulsar -l app.kubernetes.io/name=vmsingle -o jsonpath='{.items[0].metadata.name}') 8429:8429
```
Now you can access the UI at http://localhost:8429 and http://localhost:8429/vmui (for similar UI as in Prometheus)
#### VictoriaMetrics Scraping debugging UI - Active Targets
Connection to `vmagent` pod for debugging targets.
```shell
kubectl port-forward -n pulsar $(kubectl get pods -n pulsar -l app.kubernetes.io/name=vmagent -o jsonpath='{.items[0].metadata.name}') 8429:8429
```
Now you can access the UI at http://localhost:8429
Active Targets UI
- http://localhost:8429/targets
Scraping Configuration
- http://localhost:8429/config
## Release Process
See [RELEASE.md](RELEASE.md)
See [RELEASE.md](RELEASE.md)

View File

@ -55,6 +55,8 @@ spec:
scheme: http
interval: {{ $valuesPath.podMonitor.interval }}
scrapeTimeout: {{ $valuesPath.podMonitor.scrapeTimeout }}
# Set honor labels to true to allow overriding namespace label with Pulsar's namespace label
honorLabels: true
{{- if index $root.Values "victoria-metrics-k8s-stack" "enabled" }}
relabelConfigs:
{{- else }}
@ -71,14 +73,26 @@ spec:
- sourceLabels: [__meta_kubernetes_pod_name]
action: replace
targetLabel: kubernetes_pod_name
{{- with $valuesPath.podMonitor.metricRelabelings }}
{{- if or $valuesPath.podMonitor.metricRelabelings (and $valuesPath.podMonitor.dropUnderscoreCreatedMetrics (index $valuesPath.podMonitor.dropUnderscoreCreatedMetrics "enabled")) }}
{{- if index $root.Values "victoria-metrics-k8s-stack" "enabled" }}
metricRelabelConfigs:
{{- else }}
metricRelabelings:
{{- end }}
{{- if and $valuesPath.podMonitor.dropUnderscoreCreatedMetrics (index $valuesPath.podMonitor.dropUnderscoreCreatedMetrics "enabled") }}
# Drop metrics that end with _created, auto-created by metrics library to match OpenMetrics format
- sourceLabels: [__name__]
{{- if and (hasKey $valuesPath.podMonitor.dropUnderscoreCreatedMetrics "excludePatterns") $valuesPath.podMonitor.dropUnderscoreCreatedMetrics.excludePatterns }}
regex: "(?!{{ $valuesPath.podMonitor.dropUnderscoreCreatedMetrics.excludePatterns | join "|" }}).*_created$"
{{- else }}
regex: ".*_created$"
{{- end }}
action: drop
{{- end }}
{{- with $valuesPath.podMonitor.metricRelabelings }}
{{ toYaml . | indent 8 }}
{{- end }}
{{- end }}
selector:
matchLabels:
{{- include "pulsar.matchLabels" $root | nindent 6 }}

View File

@ -76,7 +76,21 @@ data:
# Broker settings
clusterName: {{ template "pulsar.cluster.name" . }}
# Enable all metrics by default
exposeTopicLevelMetricsInPrometheus: "true"
exposeConsumerLevelMetricsInPrometheus: "true"
exposeProducerLevelMetricsInPrometheus: "true"
exposeManagedLedgerMetricsInPrometheus: "true"
exposeManagedCursorMetricsInPrometheus: "true"
exposeBundlesMetricsInPrometheus: "true"
exposePublisherStats: "true"
exposePreciseBacklogInPrometheus: "true"
replicationMetricsEnabled: "true"
splitTopicAndPartitionLabelInPrometheus: "true"
aggregatePublisherStatsByProducerName: "true"
bookkeeperClientExposeStatsToPrometheus: "true"
numHttpServerThreads: "8"
statusFilePath: "{{ template "pulsar.home" . }}/logs/status"

View File

@ -975,6 +975,16 @@ broker:
enabled: true
interval: 60s
scrapeTimeout: 60s
# Removes metrics that end with _created suffix
# These metrics are automatically generated by the Prometheus client library to comply with OpenMetrics format
# and aren't currently used. Disable this if you need to use these metrics or add an exclusion pattern when
# a specific metric is needed.
dropUnderscoreCreatedMetrics:
enabled: true
# Optional regex pattern to exclude specific metrics from being dropped
# excludePatterns:
# - pulsar_topic_load_times_created
# Custom metric relabelings to apply to all metrics
metricRelabelings:
# - action: labeldrop
# regex: cluster
@ -1222,6 +1232,16 @@ proxy:
enabled: true
interval: 60s
scrapeTimeout: 60s
# Removes metrics that end with _created suffix
# These metrics are automatically generated by the Prometheus client library to comply with OpenMetrics format
# and aren't currently used. Disable this if you need to use these metrics or add an exclusion pattern when
# a specific metric is needed.
dropUnderscoreCreatedMetrics:
enabled: true
# Optional regex pattern to exclude specific metrics from being dropped
# excludePatterns:
# - pulsar_proxy_new_connections_created
# Custom metric relabelings to apply to all metrics
metricRelabelings:
# - action: labeldrop
# regex: cluster
@ -1534,16 +1554,32 @@ victoria-metrics-k8s-stack:
sidecar:
dashboards:
enabled: false
# grafana.ini settings
grafana.ini:
analytics:
check_for_updates: false
dashboards:
default_home_dashboard_path: /var/lib/grafana/dashboards/pulsar/overview.json
## Configure Pulsar dashboards for Grafana
dashboardProviders:
dashboardproviders.yaml:
apiVersion: 1
providers:
- name: 'default'
orgId: 1
folder: ''
type: file
disableDeletion: true
editable: true
allowUiUpdates: true
options:
path: /var/lib/grafana/dashboards/default
- name: oxia
orgId: 1
folder: Oxia
type: file
disableDeletion: false
disableDeletion: true
editable: true
allowUiUpdates: true
options:
@ -1552,12 +1588,20 @@ victoria-metrics-k8s-stack:
orgId: 1
folder: Pulsar
type: file
disableDeletion: false
disableDeletion: true
editable: true
allowUiUpdates: true
options:
path: /var/lib/grafana/dashboards/pulsar
dashboards:
default:
victoriametrics:
gnetId: 10229
revision: 38
datasource: VictoriaMetrics
kubernetes:
gnetId: 14205
datasource: VictoriaMetrics
oxia:
oxia-containers:
url: https://raw.githubusercontent.com/lhotari/pulsar-grafana-dashboards/master/oxia/oxia-containers.json
@ -1576,10 +1620,6 @@ victoria-metrics-k8s-stack:
pulsar:
bookkeeper-compaction:
url: https://raw.githubusercontent.com/lhotari/pulsar-grafana-dashboards/master/pulsar/bookkeeper-compaction.json
bookkeeper-read-cache:
url: https://raw.githubusercontent.com/lhotari/pulsar-grafana-dashboards/master/pulsar/bookkeeper-read-cache.json
bookkeeper-read-use:
url: https://raw.githubusercontent.com/lhotari/pulsar-grafana-dashboards/master/pulsar/bookkeeper-read-use.json
bookkeeper:
url: https://raw.githubusercontent.com/lhotari/pulsar-grafana-dashboards/master/pulsar/bookkeeper.json
broker-cache-by-broker:
@ -1612,13 +1652,10 @@ victoria-metrics-k8s-stack:
url: https://raw.githubusercontent.com/lhotari/pulsar-grafana-dashboards/master/pulsar/proxy.json
sockets:
url: https://raw.githubusercontent.com/lhotari/pulsar-grafana-dashboards/master/pulsar/sockets.json
tenant:
url: https://raw.githubusercontent.com/lhotari/pulsar-grafana-dashboards/master/pulsar/tenant.json
topic:
url: https://raw.githubusercontent.com/lhotari/pulsar-grafana-dashboards/master/pulsar/topic.json
zookeeper:
url: https://raw.githubusercontent.com/lhotari/pulsar-grafana-dashboards/master/pulsar/zookeeper.json
## Node exporter component
prometheus-node-exporter:
enabled: true

View File

@ -0,0 +1,58 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# disable monitoring
victoria-metrics-k8s-stack:
enabled: false
victoria-metrics-operator:
enabled: false
vmsingle:
enabled: false
vmagent:
enabled: false
kube-state-metrics:
enabled: false
prometheus-node-exporter:
enabled: false
grafana:
enabled: false
# disable pod monitors
autorecovery:
podMonitor:
enabled: false
bookkeeper:
podMonitor:
enabled: false
oxia:
server:
podMonitor:
enabled: false
coordinator:
podMonitor:
enabled: false
broker:
podMonitor:
enabled: false
proxy:
podMonitor:
enabled: false
zookeeper:
podMonitor:
enabled: false

View File

@ -0,0 +1,46 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# disabled AntiAffinity
affinity:
anti_affinity: false
victoria-metrics-k8s-stack:
grafana:
adminPassword: verysecureword123
bookkeeper:
configData:
# more aggressive disk cleanup
journalMaxSizeMB: "256"
majorCompactionInterval: "600"
minorCompactionInterval: "300"
compactionRateByEntries: "5000"
gcWaitTime: "60000"
broker:
configData:
# more aggressive disk cleanup
managedLedgerMinLedgerRolloverTimeMinutes: "1"
managedLedgerMaxLedgerRolloverTimeMinutes: "5"
# configure deletion of inactive topics
brokerDeleteInactiveTopicsMaxInactiveDurationSeconds: "86400"
proxy:
replicaCount: 1