Skip to content

Commit a5a7e53

Browse files
committed
Expose rule false positive ratio to rule hit table.
The false positive ratio must be computed from the latest outcome of closed cases where a rule appears, this required adding the org ID to case events, ingesting some of the case events, and performing a separate query.
1 parent 37c94b2 commit a5a7e53

15 files changed

+288
-15
lines changed

models/analytics/results.go

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,18 @@ type DecisionsScoreDistribution struct {
2323
}
2424

2525
type RuleHitTable struct {
26-
RuleName string `json:"rule_name"`
27-
HitCount int `json:"hit_count"`
28-
HitRatio float64 `json:"hit_ratio"`
29-
DistinctPivots int `json:"distinct_pivots"`
30-
RepeatRatio float64 `json:"repeat_ratio"`
26+
RuleId uuid.UUID `json:"-"`
27+
RuleName string `json:"rule_name"`
28+
HitCount int `json:"hit_count"`
29+
HitRatio float64 `json:"hit_ratio"`
30+
FalsePositiveRatio float64 `json:"false_positive_ratio"`
31+
DistinctPivots int `json:"distinct_pivots"`
32+
RepeatRatio float64 `json:"repeat_ratio"`
33+
}
34+
35+
type FalsePositiveRatio struct {
36+
RuleId uuid.UUID
37+
FalsePositiveRatio float64
3138
}
3239

3340
type RuleVsDecisionOutcome struct {

models/case_event.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@ package models
33
import (
44
"time"
55

6+
"github.com/google/uuid"
67
"github.com/guregu/null/v5"
78
)
89

910
type CaseEvent struct {
1011
Id string
12+
OrgId uuid.UUID
1113
CaseId string
1214
UserId null.String
1315
CreatedAt time.Time
@@ -59,6 +61,7 @@ const (
5961
)
6062

6163
type CreateCaseEventAttributes struct {
64+
OrgId uuid.UUID
6265
CaseId string
6366
UserId *string
6467
EventType CaseEventType

repositories/analytics_export_repository.go

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"github.com/Masterminds/squirrel"
1111
"github.com/checkmarble/marble-backend/models"
1212
"github.com/checkmarble/marble-backend/models/analytics"
13+
"github.com/checkmarble/marble-backend/repositories/dbmodels"
1314
"github.com/cockroachdb/errors"
1415
"github.com/google/uuid"
1516
)
@@ -263,6 +264,88 @@ func AnalyticsCopyScreenings(ctx context.Context, exec AnalyticsExecutor, req An
263264
return int(nRows), nil
264265
}
265266

267+
func AnalyticsCopyCaseEvents(ctx context.Context, exec AnalyticsExecutor, req AnalyticsCopyRequest) (int, error) {
268+
cte := WithCtesRaw("q", func(b squirrel.StatementBuilderType) squirrel.SelectBuilder {
269+
q := b.Select(
270+
"ce.id",
271+
"ce.org_id",
272+
"d.scenario_id",
273+
"dr.rule_id as rule_id",
274+
"ce.case_id as case_id",
275+
"ce.new_value as outcome",
276+
"ce.created_at as created_at",
277+
"d.trigger_object_type",
278+
"row_number() over (partition by ce.case_id order by ce.created_at desc) rnk",
279+
).
280+
From(dbmodels.TABLE_CASE_EVENTS+" ce").
281+
InnerJoin(dbmodels.TABLE_CASES+" c on c.id = ce.case_id").
282+
InnerJoin(dbmodels.TABLE_DECISIONS+" d on d.case_id = c.id").
283+
InnerJoin(dbmodels.TABLE_DECISION_RULES+" dr on dr.decision_id = d.id").
284+
Where("ce.org_id = ?", req.OrgId).
285+
Where("ce.event_type = 'outcome_updated'").
286+
Where("c.status = 'closed'").
287+
Where("d.trigger_object_type = ?", req.TriggerObject).
288+
Where("ce.created_at < ?", req.EndTime).
289+
OrderBy("ce.created_at, ce.id").
290+
Limit(uint64(req.Limit))
291+
292+
if req.Watermark != nil {
293+
q = q.Where("(ce.created_at, ce.id) > (?::timestamp with time zone, ?)",
294+
req.Watermark.WatermarkTime, req.Watermark.WatermarkId)
295+
}
296+
297+
return q
298+
})
299+
300+
inner := squirrel.
301+
Select(
302+
"q.id",
303+
"q.scenario_id",
304+
"q.rule_id",
305+
"q.case_id",
306+
"q.outcome",
307+
"q.created_at",
308+
"q.org_id",
309+
"extract(year from q.created_at)::int as year",
310+
"extract(month from q.created_at)::int as month",
311+
"q.trigger_object_type",
312+
).
313+
From("q").
314+
PrefixExpr(cte).
315+
InnerJoin(dbmodels.TABLE_DECISIONS + " d on d.case_id = q.case_id").
316+
Where("q.rnk = 1")
317+
318+
for _, f := range req.TriggerObjectFields {
319+
inner = analyticsAddTriggerObjectField(inner, f, false)
320+
}
321+
for _, f := range req.ExtraDbFields {
322+
inner = analyticsAddExtraField(inner, f, false)
323+
}
324+
325+
innerSql, args, err := inner.ToSql()
326+
if err != nil {
327+
return 0, err
328+
}
329+
330+
unsafeQuery, err := unsafeBuildSqlQuery(innerSql, args)
331+
if err != nil {
332+
return 0, err
333+
}
334+
335+
query := fmt.Sprintf(`copy ( select * from postgres_query(?, ?) ) to '%s' (format parquet, compression zstd, partition_by (org_id, year, month, trigger_object_type), append)`, req.Table)
336+
337+
result, err := exec.ExecContext(ctx, query, "pg", unsafeQuery)
338+
if err != nil {
339+
return 0, err
340+
}
341+
nRows, err := result.RowsAffected()
342+
if err != nil {
343+
return 0, err
344+
}
345+
346+
return int(nRows), nil
347+
}
348+
266349
func analyticsAddTriggerObjectField(b squirrel.SelectBuilder, field models.Field, anyValue bool) squirrel.SelectBuilder {
267350
sqlType := "text"
268351

repositories/case_event_repository.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ func (repo *MarbleDbRepository) BatchCreateCaseEvents(ctx context.Context, exec
8686

8787
query := NewQueryBuilder().Insert(dbmodels.TABLE_CASE_EVENTS).
8888
Columns(
89+
"org_id",
8990
"case_id",
9091
"user_id",
9192
"event_type",
@@ -105,6 +106,7 @@ func (repo *MarbleDbRepository) BatchCreateCaseEvents(ctx context.Context, exec
105106
userId = pgtype.Text{Valid: false}
106107
}
107108
query = query.Values(
109+
createCaseEventAttribute.OrgId,
108110
createCaseEventAttribute.CaseId,
109111
userId,
110112
createCaseEventAttribute.EventType,

repositories/dbmodels/db_case_event.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,13 @@ import (
55

66
"github.com/checkmarble/marble-backend/models"
77
"github.com/checkmarble/marble-backend/utils"
8+
"github.com/google/uuid"
89
"github.com/guregu/null/v5"
910
)
1011

1112
type DBCaseEvent struct {
1213
Id string `db:"id"`
14+
OrgId uuid.UUID `db:"org_id"`
1315
CaseId string `db:"case_id"`
1416
UserId null.String `db:"user_id"`
1517
CreatedAt time.Time `db:"created_at"`
@@ -44,6 +46,7 @@ func AdaptCaseEvent(caseEvent DBCaseEvent) (models.CaseEvent, error) {
4446
}
4547
return models.CaseEvent{
4648
Id: caseEvent.Id,
49+
OrgId: caseEvent.OrgId,
4750
CaseId: caseEvent.CaseId,
4851
UserId: caseEvent.UserId,
4952
CreatedAt: caseEvent.CreatedAt,
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
-- +goose Up
2+
-- +goose NO TRANSACTION
3+
-- +goose StatementBegin
4+
5+
alter table case_events
6+
add column org_id uuid;
7+
8+
update case_events
9+
set org_id = c.org_id
10+
from cases c
11+
where c.id = case_events.case_id;
12+
13+
alter table case_events
14+
alter column org_id set not null;
15+
16+
-- +goose StatementEnd
17+
18+
create index concurrently if not exists idx_case_events_by_org
19+
on case_events (org_id, event_type, created_at)
20+
where event_type in ('outcome_updated');
21+
22+
-- +goose Down
23+
24+
drop index idx_case_events_by_org;
25+
alter table case_events drop column org_id;

repositories/sql_utils.go

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,19 +13,32 @@ type cte struct {
1313
}
1414

1515
type QueryCte struct {
16+
builder func() squirrel.StatementBuilderType
1617
queries []cte
1718
}
1819

1920
func WithCtes(name string, cb func(b squirrel.StatementBuilderType) squirrel.SelectBuilder) *QueryCte {
20-
ctes := &QueryCte{}
21+
ctes := &QueryCte{
22+
builder: NewQueryBuilder,
23+
}
24+
25+
return ctes.With(name, cb)
26+
}
27+
28+
func WithCtesRaw(name string, cb func(b squirrel.StatementBuilderType) squirrel.SelectBuilder) *QueryCte {
29+
ctes := &QueryCte{
30+
builder: func() squirrel.StatementBuilderType {
31+
return squirrel.StatementBuilder.PlaceholderFormat(squirrel.Question)
32+
},
33+
}
2134

2235
return ctes.With(name, cb)
2336
}
2437

2538
func (q *QueryCte) With(name string, cb func(b squirrel.StatementBuilderType) squirrel.SelectBuilder) *QueryCte {
2639
q.queries = append(q.queries, cte{
2740
name: pgx.Identifier.Sanitize([]string{name}),
28-
query: cb(NewQueryBuilder()),
41+
query: cb(q.builder()),
2942
})
3043

3144
return q

usecases/analytics_query_usecase.go

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,11 @@ func (uc AnalyticsQueryUsecase) RuleHitTable(ctx context.Context, filters dto.An
9797

9898
query := squirrel.
9999
Select(
100+
"rule_id",
100101
"rule_name",
101102
"count() filter (outcome = 'hit') as hit_count",
102103
"((count() filter (outcome = 'hit')) / count()) * 100 as hit_ratio",
104+
"0 as false_positive_ratio", // Determined in a separate query below
103105
"count(distinct pivot_value) filter (outcome = 'hit') as distinct_pivots",
104106
"IFNULL(100 - (count(distinct pivot_value) filter (outcome = 'hit') / NULLIF(count() filter (outcome = 'hit'), 0) ) * 100, 0) as repeat_ratio",
105107
).
@@ -114,7 +116,60 @@ func (uc AnalyticsQueryUsecase) RuleHitTable(ctx context.Context, filters dto.An
114116
return nil, err
115117
}
116118

117-
return repositories.AnalyticsScanStruct[analytics.RuleHitTable](ctx, exec, query)
119+
ruleHits, err := repositories.AnalyticsScanStruct[analytics.RuleHitTable](ctx, exec, query)
120+
if err != nil {
121+
return nil, err
122+
}
123+
124+
// Fetch false positives from another query.
125+
//
126+
// In case of error (which could happen, for example, because of the
127+
// sepatate ingestion between decision rules and case events, return the
128+
// initial query, which will have a ratio of false positive of 0).
129+
{
130+
var cteErr error
131+
132+
cte := repositories.WithCtes("data", func(b squirrel.StatementBuilderType) squirrel.SelectBuilder {
133+
q := b.Select("*").
134+
From(uc.analyticsFactory.BuildTarget("rule_hit_outcomes")).
135+
Suffix("qualify row_number() over (partition by case_id order by created_at desc) = 1")
136+
137+
q, cteErr = uc.analyticsFactory.ApplyFilters(q, scenario, filters)
138+
if err != nil {
139+
return q
140+
}
141+
142+
return q
143+
})
144+
145+
if cteErr != nil {
146+
return ruleHits, nil
147+
}
148+
149+
query := squirrel.
150+
Select(
151+
"rule_id",
152+
"ifnull(count() filter (where outcome = 'false_positive') / nullif(count(), 0) * 100, 0) as false_positive_ratio",
153+
).
154+
PrefixExpr(cte).
155+
From("data").
156+
GroupBy("rule_id")
157+
158+
falsePositives, err := repositories.AnalyticsScanStruct[analytics.FalsePositiveRatio](ctx, exec, query)
159+
if err != nil {
160+
return ruleHits, nil
161+
}
162+
163+
for _, ratio := range falsePositives {
164+
for idx, rule := range ruleHits {
165+
if ratio.RuleId == rule.RuleId {
166+
ruleHits[idx].FalsePositiveRatio = ratio.FalsePositiveRatio
167+
}
168+
}
169+
}
170+
}
171+
172+
return ruleHits, nil
118173
}
119174

120175
// TODO: could maybe be optimized by storing (d.outcome) denormalized alongside the decision rule.

usecases/auto_assignment_usecase.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ func (uc AutoAssignmentUsecase) assignCase(ctx context.Context, c models.Case, u
9898
}
9999

100100
if err := uc.caseRepository.CreateCaseEvent(ctx, tx, models.CreateCaseEventAttributes{
101+
OrgId: uuid.MustParse(c.OrganizationId),
101102
CaseId: c.Id,
102103
UserId: nil,
103104
EventType: models.CaseAssigned,

0 commit comments

Comments
 (0)