-
Notifications
You must be signed in to change notification settings - Fork 178
Fix timechart OTHER category aggregation for non-cumulative functions #4594
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
4d3875d
6a4fa9d
955c713
3903d36
eb493cf
edb721a
acf5a4a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1926,7 +1926,7 @@ public RelNode visitFlatten(Flatten node, CalcitePlanContext context) { | |
| } | ||
|
|
||
| /** Helper method to get the function name for proper column naming */ | ||
| private String getValueFunctionName(UnresolvedExpression aggregateFunction) { | ||
| private String getAggFieldAlias(UnresolvedExpression aggregateFunction) { | ||
| if (aggregateFunction instanceof Alias) { | ||
| return ((Alias) aggregateFunction).getName(); | ||
| } | ||
|
|
@@ -1976,15 +1976,15 @@ public RelNode visitTimechart( | |
|
|
||
| // Handle no by field case | ||
| if (node.getByField() == null) { | ||
| String valueFunctionName = getValueFunctionName(node.getAggregateFunction()); | ||
| String aggFieldAlias = getAggFieldAlias(node.getAggregateFunction()); | ||
|
|
||
| // Create group expression list with just the timestamp span but use a different alias | ||
| // to avoid @timestamp naming conflict | ||
| List<UnresolvedExpression> simpleGroupExprList = new ArrayList<>(); | ||
| simpleGroupExprList.add(new Alias("timestamp", spanExpr)); | ||
| // Create agg expression list with the aggregate function | ||
| List<UnresolvedExpression> simpleAggExprList = | ||
| List.of(new Alias(valueFunctionName, node.getAggregateFunction())); | ||
| List.of(new Alias(aggFieldAlias, node.getAggregateFunction())); | ||
| // Create an Aggregation object | ||
| Aggregation aggregation = | ||
| new Aggregation( | ||
|
|
@@ -1999,9 +1999,9 @@ public RelNode visitTimechart( | |
| context.relBuilder.push(result); | ||
| // Reorder fields: timestamp first, then count | ||
| context.relBuilder.project( | ||
| context.relBuilder.field("timestamp"), context.relBuilder.field(valueFunctionName)); | ||
| context.relBuilder.field("timestamp"), context.relBuilder.field(aggFieldAlias)); | ||
| // Rename timestamp to @timestamp | ||
| context.relBuilder.rename(List.of("@timestamp", valueFunctionName)); | ||
| context.relBuilder.rename(List.of("@timestamp", aggFieldAlias)); | ||
|
|
||
| context.relBuilder.sort(context.relBuilder.field(0)); | ||
| return context.relBuilder.peek(); | ||
|
|
@@ -2010,7 +2010,7 @@ public RelNode visitTimechart( | |
| // Extract parameters for byField case | ||
| UnresolvedExpression byField = node.getByField(); | ||
| String byFieldName = ((Field) byField).getField().toString(); | ||
| String valueFunctionName = getValueFunctionName(node.getAggregateFunction()); | ||
| String aggFieldAlias = getAggFieldAlias(node.getAggregateFunction()); | ||
|
|
||
| int limit = Optional.ofNullable(node.getLimit()).orElse(10); | ||
| boolean useOther = Optional.ofNullable(node.getUseOther()).orElse(true); | ||
|
|
@@ -2037,11 +2037,11 @@ public RelNode visitTimechart( | |
|
|
||
| // Handle no limit case - just sort and return with proper field aliases | ||
| if (limit == 0) { | ||
| // Add final projection with proper aliases: [@timestamp, byField, valueFunctionName] | ||
| // Add final projection with proper aliases: [@timestamp, byField, aggFieldAlias] | ||
| context.relBuilder.project( | ||
| context.relBuilder.alias(context.relBuilder.field(0), "@timestamp"), | ||
| context.relBuilder.alias(context.relBuilder.field(1), byFieldName), | ||
| context.relBuilder.alias(context.relBuilder.field(2), valueFunctionName)); | ||
| context.relBuilder.alias(context.relBuilder.field(2), aggFieldAlias)); | ||
| context.relBuilder.sort(context.relBuilder.field(0), context.relBuilder.field(1)); | ||
| return context.relBuilder.peek(); | ||
| } | ||
|
|
@@ -2051,32 +2051,61 @@ public RelNode visitTimechart( | |
|
|
||
| // Step 2: Find top N categories using window function approach (more efficient than separate | ||
| // aggregation) | ||
| RelNode topCategories = buildTopCategoriesQuery(completeResults, limit, context); | ||
| String aggFunctionName = getAggFunctionName(node.getAggregateFunction()); | ||
| Optional<BuiltinFunctionName> aggFuncNameOptional = BuiltinFunctionName.of(aggFunctionName); | ||
| if (aggFuncNameOptional.isEmpty()) { | ||
| throw new IllegalArgumentException( | ||
| StringUtils.format("Unrecognized aggregation function: %s", aggFunctionName)); | ||
| } | ||
| BuiltinFunctionName aggFunction = aggFuncNameOptional.get(); | ||
| RelNode topCategories = buildTopCategoriesQuery(completeResults, limit, aggFunction, context); | ||
|
|
||
| // Step 3: Apply OTHER logic with single pass | ||
| return buildFinalResultWithOther( | ||
| completeResults, topCategories, byFieldName, valueFunctionName, useOther, limit, context); | ||
| completeResults, | ||
| topCategories, | ||
| byFieldName, | ||
| aggFunction, | ||
| aggFieldAlias, | ||
| useOther, | ||
| limit, | ||
| context); | ||
|
|
||
| } catch (Exception e) { | ||
| throw new RuntimeException("Error in visitTimechart: " + e.getMessage(), e); | ||
| } | ||
| } | ||
|
|
||
| private String getAggFunctionName(UnresolvedExpression aggregateFunction) { | ||
| if (aggregateFunction instanceof Alias alias) { | ||
| return getAggFunctionName(alias.getDelegated()); | ||
| } | ||
| return ((AggregateFunction) aggregateFunction).getFuncName(); | ||
| } | ||
|
|
||
| /** Build top categories query - simpler approach that works better with OTHER handling */ | ||
| private RelNode buildTopCategoriesQuery( | ||
| RelNode completeResults, int limit, CalcitePlanContext context) { | ||
| RelNode completeResults, | ||
| int limit, | ||
| BuiltinFunctionName aggFunction, | ||
| CalcitePlanContext context) { | ||
| context.relBuilder.push(completeResults); | ||
|
|
||
| // Filter out null values when determining top categories - null should not count towards limit | ||
| context.relBuilder.filter(context.relBuilder.isNotNull(context.relBuilder.field(1))); | ||
|
|
||
| // Get totals for non-null categories - field positions: 0=@timestamp, 1=byField, 2=value | ||
| RexInputRef valueField = context.relBuilder.field(2); | ||
| AggCall call = buildAggCall(context.relBuilder, aggFunction, valueField); | ||
|
|
||
| context.relBuilder.aggregate( | ||
| context.relBuilder.groupKey(context.relBuilder.field(1)), | ||
| context.relBuilder.sum(context.relBuilder.field(2)).as("grand_total")); | ||
| context.relBuilder.groupKey(context.relBuilder.field(1)), call.as("grand_total")); | ||
|
|
||
| // Apply sorting and limit to non-null categories only | ||
| context.relBuilder.sort(context.relBuilder.desc(context.relBuilder.field("grand_total"))); | ||
| RexNode sortField = context.relBuilder.field("grand_total"); | ||
| sortField = | ||
| aggFunction == BuiltinFunctionName.MIN ? sortField : context.relBuilder.desc(sortField); | ||
|
||
| context.relBuilder.sort(sortField); | ||
| if (limit > 0) { | ||
| context.relBuilder.limit(0, limit); | ||
| } | ||
|
|
@@ -2089,18 +2118,25 @@ private RelNode buildFinalResultWithOther( | |
| RelNode completeResults, | ||
| RelNode topCategories, | ||
| String byFieldName, | ||
| String valueFunctionName, | ||
| BuiltinFunctionName aggFunction, | ||
| String aggFieldAlias, | ||
| boolean useOther, | ||
| int limit, | ||
| CalcitePlanContext context) { | ||
|
|
||
| // Use zero-filling for count aggregations, standard result for others | ||
| if (valueFunctionName.equals("count")) { | ||
| if (aggFieldAlias.equals("count")) { | ||
| return buildZeroFilledResult( | ||
| completeResults, topCategories, byFieldName, valueFunctionName, useOther, limit, context); | ||
| completeResults, topCategories, byFieldName, aggFieldAlias, useOther, limit, context); | ||
| } else { | ||
| return buildStandardResult( | ||
| completeResults, topCategories, byFieldName, valueFunctionName, useOther, context); | ||
| completeResults, | ||
| topCategories, | ||
| byFieldName, | ||
| aggFunction, | ||
| aggFieldAlias, | ||
| useOther, | ||
| context); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -2109,7 +2145,8 @@ private RelNode buildStandardResult( | |
| RelNode completeResults, | ||
| RelNode topCategories, | ||
| String byFieldName, | ||
| String valueFunctionName, | ||
| BuiltinFunctionName aggFunctionName, | ||
| String aggFieldAlias, | ||
| boolean useOther, | ||
| CalcitePlanContext context) { | ||
|
|
||
|
|
@@ -2132,11 +2169,13 @@ private RelNode buildStandardResult( | |
| context.relBuilder.project( | ||
| context.relBuilder.alias(context.relBuilder.field(0), "@timestamp"), | ||
| context.relBuilder.alias(categoryExpr, byFieldName), | ||
| context.relBuilder.alias(context.relBuilder.field(2), valueFunctionName)); | ||
| context.relBuilder.alias(context.relBuilder.field(2), aggFieldAlias)); | ||
|
|
||
| RexInputRef valueField = context.relBuilder.field(2); | ||
| AggCall aggCall = buildAggCall(context.relBuilder, aggFunctionName, valueField); | ||
| context.relBuilder.aggregate( | ||
| context.relBuilder.groupKey(context.relBuilder.field(0), context.relBuilder.field(1)), | ||
| context.relBuilder.sum(context.relBuilder.field(2)).as(valueFunctionName)); | ||
| aggCall.as(aggFieldAlias)); | ||
|
|
||
| applyFiltersAndSort(useOther, context); | ||
| return context.relBuilder.peek(); | ||
|
|
@@ -2171,7 +2210,7 @@ private RelNode buildZeroFilledResult( | |
| RelNode completeResults, | ||
| RelNode topCategories, | ||
| String byFieldName, | ||
| String valueFunctionName, | ||
| String aggFieldAlias, | ||
| boolean useOther, | ||
| int limit, | ||
| CalcitePlanContext context) { | ||
|
|
@@ -2210,7 +2249,7 @@ private RelNode buildZeroFilledResult( | |
| context.relBuilder.cast(context.relBuilder.field(0), SqlTypeName.TIMESTAMP), | ||
| "@timestamp"), | ||
| context.relBuilder.alias(context.relBuilder.field(1), byFieldName), | ||
| context.relBuilder.alias(context.relBuilder.literal(0), valueFunctionName)); | ||
| context.relBuilder.alias(context.relBuilder.literal(0), aggFieldAlias)); | ||
| RelNode zeroFilledCombinations = context.relBuilder.build(); | ||
|
|
||
| // Get actual results with OTHER logic applied | ||
|
|
@@ -2232,7 +2271,7 @@ private RelNode buildZeroFilledResult( | |
| context.relBuilder.cast(context.relBuilder.field(0), SqlTypeName.TIMESTAMP), | ||
| "@timestamp"), | ||
| context.relBuilder.alias(actualCategoryExpr, byFieldName), | ||
| context.relBuilder.alias(context.relBuilder.field(2), valueFunctionName)); | ||
| context.relBuilder.alias(context.relBuilder.field(2), aggFieldAlias)); | ||
|
|
||
| context.relBuilder.aggregate( | ||
| context.relBuilder.groupKey(context.relBuilder.field(0), context.relBuilder.field(1)), | ||
|
|
@@ -2247,12 +2286,30 @@ private RelNode buildZeroFilledResult( | |
| // Aggregate to combine actual and zero-filled data | ||
| context.relBuilder.aggregate( | ||
| context.relBuilder.groupKey(context.relBuilder.field(0), context.relBuilder.field(1)), | ||
| context.relBuilder.sum(context.relBuilder.field(2)).as(valueFunctionName)); | ||
| context.relBuilder.sum(context.relBuilder.field(2)).as(aggFieldAlias)); | ||
|
|
||
| applyFiltersAndSort(useOther, context); | ||
| return context.relBuilder.peek(); | ||
| } | ||
|
|
||
| /** | ||
| * Aggregate a field based on a given built-in aggregation function name. | ||
| * | ||
| * <p>It is intended for secondary aggregations in timechart and chart commands. Using it | ||
| * elsewhere may lead to unintended results. It handles explicitly only MIN, MAX, AVG, COUNT, | ||
| * DISTINCT_COUNT, EARLIEST, and LATEST. It sums the results for the rest aggregation types, | ||
| * assuming them to be accumulative. | ||
| */ | ||
| private AggCall buildAggCall( | ||
| RelBuilder relBuilder, BuiltinFunctionName aggFunction, RexNode node) { | ||
| return switch (aggFunction) { | ||
| case MIN, EARLIEST -> relBuilder.min(node); | ||
| case MAX, LATEST -> relBuilder.max(node); | ||
| case AVG -> relBuilder.avg(node); | ||
| default -> relBuilder.sum(node); | ||
| }; | ||
| } | ||
|
|
||
| @Override | ||
| public RelNode visitTrendline(Trendline node, CalcitePlanContext context) { | ||
| visitChildren(node, context); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
getAggFieldAliaslacks precision. How about change togetMeasureAlias?balance: aggregate Field
count(): aggregate function
count(balance): measure
cnt: alias of measure
Do not forget to update the java doc either.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for pointing out! I updated the name as
getMetricAliasas metric conforms to how it is named in other parts of the codebase.