Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions embed/examples/cluster/topology.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ monitored:
# tiflash:
# tiflash-learner:
# kvcdc:
# ng_monitoring:
# storage.type: "sqlite"
# continuous_profiling.enable: true
# continuous_profiling.profile_seconds: 5
# continuous_profiling.interval_seconds: 15

# # Server configs are used to specify the configuration of PD Servers.
pd_servers:
Expand Down Expand Up @@ -363,6 +368,10 @@ monitoring_servers:
# rule_dir: /home/tidb/prometheus_rule
# scrape_interval: 15s
# scrape_timeout: 10s
# # The following configs are used to overwrite the `server_configs.ng_monitoring` values.
# ng_monitoring_config:
# storage.path: "/tidb-data/prometheus-8249/docdb"
# continuous_profiling.data_retention_seconds: 259200
# # Server configs are used to specify the configuration of Grafana Servers.
grafana_servers:
# # The ip address of the Grafana Server.
Expand Down
44 changes: 31 additions & 13 deletions pkg/cluster/spec/monitoring.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ type PrometheusSpec struct {
ScrapeInterval string `yaml:"scrape_interval,omitempty" validate:"scrape_interval:editable"`
ScrapeTimeout string `yaml:"scrape_timeout,omitempty" validate:"scrape_timeout:editable"`

AdditionalArgs []string `yaml:"additional_args,omitempty" validate:"additional_args:ignore"`
AdditionalArgs []string `yaml:"additional_args,omitempty" validate:"additional_args:ignore"`
NgMonitoringConfig map[string]any `yaml:"ng_monitoring_config,omitempty" validate:"ng_monitoring_config:ignore"`
}

// Remote prometheus remote config
Expand Down Expand Up @@ -486,24 +487,37 @@ func (i *MonitorInstance) InitConfig(
}

if spec.NgPort > 0 {
pds := []string{}
pdAddrs := []string{}
if servers, found := topoHasField("PDServers"); found {
for i := 0; i < servers.Len(); i++ {
pd := servers.Index(i).Interface().(*PDSpec)
pds = append(pds, fmt.Sprintf("\"%s\"", utils.JoinHostPort(pd.Host, pd.ClientPort)))
pdAddrs = append(pdAddrs, utils.JoinHostPort(pd.Host, pd.ClientPort))
Comment on lines -489 to +494
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this renames pds to pdAddrs and adds quotes around it? Is that needed?

}
}
ngcfg := &config.NgMonitoringConfig{
ClusterName: clusterName,
Address: utils.JoinHostPort(i.GetListenHost(), spec.NgPort),
AdvertiseAddress: utils.JoinHostPort(i.GetHost(), spec.NgPort),
PDAddrs: strings.Join(pds, ","),
TLSEnabled: enableTLS,

DeployDir: paths.Deploy,
DataDir: paths.Data[0],
LogDir: paths.Log,
// Build base ng-monitoring config as a map so user overrides via
// server_configs.ng_monitoring and per-instance ng_monitoring_config
// are merged on top (same pattern as PD/TiKV/TiDB).
baseConfig := map[string]any{
"address": utils.JoinHostPort(i.GetListenHost(), spec.NgPort),
"advertise-address": utils.JoinHostPort(i.GetHost(), spec.NgPort),
"log.path": paths.Log,
"log.level": "INFO",
"pd.endpoints": pdAddrs,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"pd.endpoints": pdAddrs,
"pd.endpoints": strconv.Quote(pdAddrs),

Maybe adding a quote here and using strconv.Quote() might be better

"storage.path": paths.Data[0],
}
if enableTLS {
baseConfig["security.ca-path"] = fmt.Sprintf("%s/tls/ca.crt", paths.Deploy)
baseConfig["security.cert-path"] = fmt.Sprintf("%s/tls/prometheus.crt", paths.Deploy)
baseConfig["security.key-path"] = fmt.Sprintf("%s/tls/prometheus.pem", paths.Deploy)
}

// Gather global and per-instance ng-monitoring user config.
var globalNgConfig map[string]any
if s, ok := i.topo.(*Specification); ok {
globalNgConfig = s.ServerConfigs.NGMonitoring
}
userConfig := MergeConfig(globalNgConfig, spec.NgMonitoringConfig)

if servers, found := topoHasField("Monitors"); found {
for idx := 0; idx < servers.Len(); idx++ {
Expand All @@ -514,7 +528,11 @@ func (i *MonitorInstance) InitConfig(
}

fp = filepath.Join(paths.Cache, fmt.Sprintf("ngmonitoring_%s_%d.toml", i.GetHost(), i.GetPort()))
if err := ngcfg.ConfigToFile(fp); err != nil {
ngConf, err := Merge2Toml("ng_monitoring", baseConfig, userConfig)
if err != nil {
return err
}
if err := utils.WriteFile(fp, ngConf, 0755); err != nil {
return err
}
dst = filepath.Join(paths.Deploy, "conf", "ngmonitoring.toml")
Expand Down
60 changes: 60 additions & 0 deletions pkg/cluster/spec/server_config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,66 @@ server_configs:
require.True(t, decimal)
}

func TestNGMonitoringServerConfig(t *testing.T) {
yamlData := []byte(`
server_configs:
ng_monitoring:
storage.type: "sqlite"
log.level: "WARN"
continuous_profiling.enable: true
continuous_profiling.profile_seconds: 5
continuous_profiling.interval_seconds: 15

monitoring_servers:
- host: 10.0.1.21
ng_port: 12020
ng_monitoring_config:
storage.path: "/custom/data/path"
continuous_profiling.data_retention_seconds: 259200
`)

topo := new(Specification)
err := yaml.Unmarshal(yamlData, topo)
require.NoError(t, err)

// Verify global config parsed
require.Equal(t, "sqlite", topo.ServerConfigs.NGMonitoring["storage.type"])
require.Equal(t, "WARN", topo.ServerConfigs.NGMonitoring["log.level"])
require.Equal(t, true, topo.ServerConfigs.NGMonitoring["continuous_profiling.enable"])

// Verify per-instance config parsed
require.Len(t, topo.Monitors, 1)
require.Equal(t, "/custom/data/path", topo.Monitors[0].NgMonitoringConfig["storage.path"])

// Build base config (simulating what InitConfig does)
baseConfig := map[string]any{
"address": "0.0.0.0:12020",
"advertise-address": "10.0.1.21:12020",
"log.path": "/tidb-deploy/prometheus-9090/log",
"log.level": "INFO",
"pd.endpoints": []string{"10.0.1.10:2379", "10.0.1.11:2379"},
"storage.path": "/tidb-data/prometheus-9090",
}

// Merge: base + global + per-instance (same as InitConfig logic)
userConfig := MergeConfig(topo.ServerConfigs.NGMonitoring, topo.Monitors[0].NgMonitoringConfig)
got, err := Merge2Toml("ng_monitoring", baseConfig, userConfig)
require.NoError(t, err)

tomlStr := string(got)

// User overrides should take precedence
require.Contains(t, tomlStr, `type = "sqlite"`) // from global server_configs
require.Contains(t, tomlStr, `path = "/custom/data/path"`) // per-instance overrides base
require.Contains(t, tomlStr, `level = "WARN"`) // global overrides default
require.Contains(t, tomlStr, `enable = true`) // from global
require.Contains(t, tomlStr, `data_retention_seconds = 259200`) // from per-instance
require.Contains(t, tomlStr, `address = "0.0.0.0:12020"`) // from base

// pd.endpoints must be a TOML array of quoted strings (matches old template behavior)
require.Contains(t, tomlStr, `endpoints = ["10.0.1.10:2379", "10.0.1.11:2379"]`)
}

func TestGetValueFromPath(t *testing.T) {
yamlData := []byte(`
server_configs:
Expand Down
1 change: 1 addition & 0 deletions pkg/cluster/spec/spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ type (
CDC map[string]any `yaml:"cdc"`
TiKVCDC map[string]any `yaml:"kvcdc"`
Grafana map[string]string `yaml:"grafana"`
NGMonitoring map[string]any `yaml:"ng_monitoring"`
}

// ComponentVersions represents the versions of components
Expand Down