Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions client/src/api/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import axios from 'axios'

// 创建 axios 实例
const api = axios.create({
// baseURL: (import.meta as any).env?.VITE_API_BASE_URL || 'http://10.210.10.33:8080',
timeout: 10000,
headers: {
'Content-Type': 'application/json'
Expand Down
25 changes: 3 additions & 22 deletions client/src/mock/services.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1845,27 +1845,9 @@ loadServiceAlertStatus()
*/
export const serviceVersionAlertStatusMap: Record<string, Record<string, ServiceAlertStatus>> = {}

const saveServiceVersionAlertStatus = () => {
try {
localStorage.setItem('serviceVersionAlertStatusMap', JSON.stringify(serviceVersionAlertStatusMap))
console.log('服务版本告警状态已保存到 localStorage')
} catch (error) {
console.error('保存服务版本告警状态失败:', error)
}
}
const saveServiceVersionAlertStatus = () => {}

const loadServiceVersionAlertStatus = () => {
try {
const data = localStorage.getItem('serviceVersionAlertStatusMap')
if (data) {
const parsed = JSON.parse(data)
Object.assign(serviceVersionAlertStatusMap, parsed)
console.log('已从 localStorage 加载服务版本告警状态')
}
} catch (error) {
console.error('从 localStorage 加载服务版本告警状态失败:', error)
}
}
const loadServiceVersionAlertStatus = () => {}

/**
* 根据告警状态更新服务版本状态
Expand Down Expand Up @@ -1919,8 +1901,7 @@ export const clearServiceVersionAlertStatus = (serviceName: string, version?: st
console.log(`已清除服务 ${serviceName} ${version ? '版本 ' + version : '所有版本'} 的告警状态`)
}

// 页面加载时恢复服务版本告警状态
loadServiceVersionAlertStatus()
// 页面加载时不再从 localStorage 恢复服务版本告警状态(禁用持久化)

// ==================== 发布任务状态管理 ====================
// 管理服务的发布任务状态,用于显示发布指示器
Expand Down
34 changes: 27 additions & 7 deletions client/src/views/ChangeLogView.vue
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,31 @@
import { ref, computed, onMounted, watch } from 'vue'
import { useAppStore, type ChangeItem, type AlarmChangeItem } from '@/stores/app'
import { mockApi } from '@/mock/api'
import type { DeploymentChangelogResponse, DeploymentChangelogItem, AlertRuleChangelogResponse, AlertRuleChangeItem } from '@/mock/services'
import { apiService } from '@/api'
import type { DeploymentChangelogResponse } from '@/mock/services'
import ChangeCard from '@/components/ChangeCard.vue'
import AlarmChangeCard from '@/components/AlarmChangeCard.vue'
import { ArrowLeft, Loading } from '@element-plus/icons-vue'

interface AlertRuleChangeValue {
name: string
old: string
new: string
}

interface AlertRuleChangeItem {
name: string
editTime: string
scope: string
values: AlertRuleChangeValue[]
reason: string
}

interface AlertRuleChangelogResponse {
items: AlertRuleChangeItem[]
next?: string
}

const appStore = useAppStore()

const activeTab = ref('service')
Expand Down Expand Up @@ -160,7 +180,7 @@ const transformAlertRuleChangelogToAlarmChangeItems = (changelogData: AlertRuleC
const serviceName = item.scope?.startsWith('service:') ? item.scope.slice('service:'.length) + '服务' : '全局服务'

// 构建变更描述
const changeDescription = item.values.map(value => {
const changeDescription = item.values.map((value) => {
return `${value.name}: ${value.old} -> ${value.new}`
}).join(', ')

Expand Down Expand Up @@ -200,21 +220,21 @@ const loadDeploymentChangelog = async (start?: string, limit?: number) => {
}
}

// 加载告警规则变更记录
// 加载告警规则变更记录(使用真实 API)
const loadAlertRuleChangelog = async (start?: string, limit?: number) => {
if (alertRuleLoading.value) return // 防止重复加载

try {
alertRuleLoading.value = true
error.value = null

const response = await mockApi.getAlertRuleChangelog(start, limit)
alertRuleChangelog.value = response
const response = await apiService.getAlertRuleChangelog(start, limit ?? 10)
alertRuleChangelog.value = response.data

// 转换数据格式
alarmChangeItems.value = transformAlertRuleChangelogToAlarmChangeItems(response.items)
alarmChangeItems.value = transformAlertRuleChangelogToAlarmChangeItems(response.data.items)

console.log('告警规则变更记录加载成功:', response)
console.log('告警规则变更记录加载成功:', response.data)
} catch (err) {
error.value = '加载告警规则变更记录失败'
console.error('加载告警规则变更记录失败:', err)
Expand Down
2 changes: 1 addition & 1 deletion client/vite.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ export default defineConfig({
server: {
proxy: {
'/v1': {
target: 'http://127.0.0.1:8080',
target: 'http://10.210.10.33:8080',
changeOrigin: true,
secure: false,
}
Expand Down
4 changes: 2 additions & 2 deletions cmd/zeroops/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
FROM golang:1.24-alpine AS builder
FROM docker.m.daocloud.io/library/golang:1.24-alpine AS builder
WORKDIR /src
COPY go.mod go.sum ./
RUN go mod download
COPY . .
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o /out/zeroops ./cmd/zeroops

FROM gcr.io/distroless/base-debian12
FROM gcr.m.daocloud.io/distroless/base-debian12:nonroot
WORKDIR /app
COPY --from=builder /out/zeroops /app/zeroops
# 复制配置文件目录
Expand Down
24 changes: 13 additions & 11 deletions cmd/zeroops/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (
"strconv"
"time"

"github.com/fox-gonic/fox"
"github.com/gin-gonic/gin"
alertapi "github.com/qiniu/zeroops/internal/alerting/api"
adb "github.com/qiniu/zeroops/internal/alerting/database"
"github.com/qiniu/zeroops/internal/alerting/service/healthcheck"
Expand Down Expand Up @@ -96,22 +96,24 @@ func main() {
go rem.Start(ctx, alertCh)

// start Prometheus anomaly detection scheduler
promInterval := parseDuration(cfg.Alerting.Prometheus.SchedulerInterval, 6*time.Hour)
promInterval := parseDuration(cfg.Alerting.Prometheus.SchedulerInterval, 5*time.Minute)
promStep := parseDuration(cfg.Alerting.Prometheus.QueryStep, time.Minute)
promRange := parseDuration(cfg.Alerting.Prometheus.QueryRange, 6*time.Hour)
promCfg := healthcheck.NewPrometheusConfigFromApp(&cfg.Alerting.Prometheus)
promClient := healthcheck.NewPrometheusClient(promCfg)
anomalyDetectClient := healthcheck.NewAnomalyDetectClient(promCfg)
go healthcheck.StartPrometheusScheduler(ctx, healthcheck.PrometheusDeps{
DB: alertDB,
PrometheusClient: promClient,
Interval: promInterval,
QueryStep: promStep,
QueryRange: promRange,
RulesetBase: cfg.Alerting.Ruleset.APIBase,
RulesetTimeout: parseDuration(cfg.Alerting.Ruleset.APITimeout, 10*time.Second),
DB: alertDB,
AnomalyDetectClient: anomalyDetectClient,
Interval: promInterval,
QueryStep: promStep,
QueryRange: promRange,
RulesetBase: cfg.Alerting.Ruleset.APIBase,
RulesetTimeout: parseDuration(cfg.Alerting.Ruleset.APITimeout, 10*time.Second),
})

router := fox.New()
router := gin.New()
router.Use(gin.Logger())
router.Use(gin.Recovery())
router.Use(middleware.Authentication)
alertapi.NewApiWithConfig(router, cfg)
if err := serviceManagerSrv.UseApi(router); err != nil {
Expand Down
8 changes: 4 additions & 4 deletions configs/alerting/rules.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"expr": "histogram_quantile(0.98, sum(rate(http_latency_seconds_bucket{}[2m])) by (service, service_version, le))",
"op": ">",
"severity": "P0",
"watch_time": "5 minutes",
"watch_time": "5m",
"metas": [
{ "labels": { "service": "storage-service", "service_version": "1.0.0" }, "threshold": 1000 },
{ "labels": { "service": "queue-service", "service_version": "1.0.0" }, "threshold": 1000 },
Expand All @@ -19,7 +19,7 @@
"expr": "histogram_quantile(0.98, sum(rate(http_latency_seconds_bucket{}[2m])) by (service, service_version, le))",
"op": ">",
"severity": "P1",
"watch_time": "4 minutes",
"watch_time": "4m",
"metas": [
{ "labels": { "service": "storage-service", "service_version": "1.0.0" }, "threshold": 500 },
{ "labels": { "service": "queue-service", "service_version": "1.0.0" }, "threshold": 500 },
Expand All @@ -31,7 +31,7 @@
"description":"HTTP error rate by service P0",
"op":">",
"severity":"P0",
"watch_time":"5 minutes",
"watch_time":"5m",
"expr":"sum(rate(http_latency_seconds_count{\"http.status_code\"=~\"4..|5..\", \"http.route\"!=\"/metrics\"}[2m])) by (service, service_version) / sum(rate(http_latency_seconds_count{\"http.route\"!=\"/metrics\"}[2m])) by (service, service_version)",
"metas":[
{"labels":{"service":"storage-service","service_version":"1.0.0"},"threshold":5},
Expand All @@ -44,7 +44,7 @@
"description":"HTTP error rate by service P1",
"op":">",
"severity":"P1",
"watch_time":"5 minutes",
"watch_time":"5m",
"expr":"sum(rate(http_latency_seconds_count{\"http.status_code\"=~\"4..|5..\", \"http.route\"!=\"/metrics\"}[2m])) by (service, service_version) / sum(rate(http_latency_seconds_count{\"http.route\"!=\"/metrics\"}[2m])) by (service, service_version)",
"metas":[
{"labels":{"service":"storage-service","service_version":"1.0.0"},"threshold":3},
Expand Down
File renamed without changes.
43 changes: 43 additions & 0 deletions docs/alerting/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,49 @@ curl -X POST http://localhost:8080/v1/integrations/alertmanager/webhook \
}'
```

### 4. 获取告警规则变更记录

用于查询统一化告警规则的变更记录(阈值、观察窗口等),支持按时间游标分页。

**请求:**
```http
GET /v1/changelog/alertrules?start={start}&limit={limit}
```

**查询参数:**

| 参数名 | 类型 | 必填 | 说明 |
|--------|------|------|------|
| start | string | 否 | 游标时间(ISO 8601)。第一页可不传;翻页使用上次响应的 `next` |
| limit | integer | 是 | 返回数量,范围 1-100 |

分页说明:按 `change_time` 倒序返回,`start` 为上界(`<= start`)。响应中的 `next` 为当前页最后一条的 `editTime`。

**响应示例:**
```json
{
"items": [
{
"name": "http_request_latency_p98_seconds_P1",
"editTime": "2024-01-03T03:00:00Z",
"scope": "",
"values": [
{"name": "threshold", "old": "10", "new": "15"}
],
"reason": "Update"
}
],
"next": "2024-01-03T03:00:00Z"
}
```

**状态码:**
- `200 OK`: 成功
- `400 Bad Request`: 参数错误
- `401 Unauthorized`: 认证失败
- `500 Internal Server Error`: 服务器内部错误

## 版本历史

- **v1.1** (2025-10-07): 新增 `GET /v1/changelog/alertrules`
- **v1.0** (2025-09-11): 初始版本,支持基础的告警列表和详情查询
6 changes: 2 additions & 4 deletions docs/alerting/database-design.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

## 数据表设计

### 1) talert_issues(告警问题表)
### 1) alert_issues(告警问题表)

存储告警问题的主要信息。

Expand All @@ -23,7 +23,7 @@
| state | enum(Closed, Open) | 问题状态 |
| level | varchar(32) | 告警等级:如 P0/P1/Px |
| alert_state | enum(Pending, Restored, AutoRestored, InProcessing) | 处理状态 |
| title | varchar(255) | 告警标题 |
| title | varchar(255) | 告警标题
| labels | json | 标签,格式:[{key, value}] |
| alert_since | TIMESTAMP(6) | 告警发生时间 |
| resolved_at | TIMESTAMP(6) | 告警结束时间 |
Expand Down Expand Up @@ -64,8 +64,6 @@
| labels | text | labels 的 JSON 字符串表示(规范化后) |
| old_threshold | numeric | 旧阈值(可空) |
| new_threshold | numeric | 新阈值(可空) |
| old_watch | interval | 旧观察窗口(可空) |
| new_watch | interval | 新观察窗口(可空) |


**索引建议:**
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module github.com/qiniu/zeroops
go 1.24

require (
github.com/fox-gonic/fox v0.0.6
github.com/gin-gonic/gin v1.10.1
github.com/google/uuid v1.6.0
github.com/jackc/pgx/v5 v5.5.5
github.com/lib/pq v1.10.9
Expand Down
7 changes: 4 additions & 3 deletions internal/alerting/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -170,9 +170,9 @@ docker exec -i zeroops-postgres-1 psql -U postgres -d zeroops -c \
"CREATE TABLE IF NOT EXISTS alert_issue_comments (issue_id text, create_at timestamp, content text, PRIMARY KEY(issue_id, create_at));"
```

### 2) 初始化/重置规则表(alert_rules / alert_rule_metas)
### 2) 初始化/重置规则表(alert_rules / alert_rule_metas / alert_meta_change_logs

注意:该脚本会 DROP 并重建 `alert_rules` 与 `alert_rule_metas`,仅用于本地/开发环境。
注意:该脚本会 DROP 并重建 `alert_rules`、`alert_rule_metas` 和 `alert_meta_change_logs`,仅用于本地/开发环境。

脚本位置:`scripts/sql/alert_rules_bootstrap.sql`

Expand All @@ -191,12 +191,13 @@ psql -U postgres -d zeroops -f scripts/sql/alert_rules_bootstrap.sql
```bash
docker exec -i zeroops-postgres-1 psql -U postgres -d zeroops -c "SELECT name, severity FROM alert_rules;"
docker exec -i zeroops-postgres-1 psql -U postgres -d zeroops -c "SELECT alert_name, labels, threshold FROM alert_rule_metas;"
docker exec -i zeroops-postgres-1 psql -U postgres -d zeroops -c "SELECT alert_name, change_type, change_time FROM alert_meta_change_logs;"
```

### 2) 清空数据库与缓存(可选,保证从空开始)

```bash
docker exec -i zeroops-pg psql -U postgres -d zeroops -c "TRUNCATE TABLE alert_issue_comments, service_states, alert_issues;"
docker exec -i zeroops-pg psql -U postgres -d zeroops -c "TRUNCATE TABLE alert_issue_comments, service_states, alert_issues, alert_meta_change_logs;"
docker exec -i zeroops-redis redis-cli --raw DEL $(docker exec -i zeroops-redis redis-cli --raw KEYS 'alert:*' | tr '\n' ' ') 2>/dev/null || true
docker exec -i zeroops-redis redis-cli --raw DEL $(docker exec -i zeroops-redis redis-cli --raw KEYS 'service_state:*' | tr '\n' ' ') 2>/dev/null || true
```
Expand Down
8 changes: 4 additions & 4 deletions internal/alerting/api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package api
import (
"fmt"

"github.com/fox-gonic/fox"
"github.com/gin-gonic/gin"
adb "github.com/qiniu/zeroops/internal/alerting/database"
"github.com/qiniu/zeroops/internal/alerting/service/healthcheck"
receiver "github.com/qiniu/zeroops/internal/alerting/service/receiver"
Expand All @@ -12,15 +12,15 @@ import (

type Api struct{}

func NewApi(router *fox.Engine) *Api { return NewApiWithConfig(router, nil) }
func NewApi(router *gin.Engine) *Api { return NewApiWithConfig(router, nil) }

func NewApiWithConfig(router *fox.Engine, cfg *config.Config) *Api {
func NewApiWithConfig(router *gin.Engine, cfg *config.Config) *Api {
api := &Api{}
api.setupRouters(router, cfg)
return api
}

func (api *Api) setupRouters(router *fox.Engine, cfg *config.Config) {
func (api *Api) setupRouters(router *gin.Engine, cfg *config.Config) {
var h *receiver.Handler
var alertDB *adb.Database
if cfg != nil {
Expand Down
Loading