Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
/*
* Copyright 2025 Collate
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.openmetadata.service.search;

import java.io.IOException;
import java.util.List;
import org.openmetadata.schema.api.data.ColumnGridResponse;

public interface ColumnAggregator {

ColumnGridResponse aggregateColumns(ColumnAggregationRequest request) throws IOException;

class ColumnAggregationRequest {
private int size = 1000;
private String cursor;
private String columnNamePattern;
private List<String> entityTypes;
private String serviceName;
private String databaseName;
private String schemaName;
private String domainId;
private Boolean hasConflicts;
private Boolean hasMissingMetadata;

public int getSize() {
return size;
}

public void setSize(int size) {
this.size = size;
}

public String getCursor() {
return cursor;
}

public void setCursor(String cursor) {
this.cursor = cursor;
}

public String getColumnNamePattern() {
return columnNamePattern;
}

public void setColumnNamePattern(String columnNamePattern) {
this.columnNamePattern = columnNamePattern;
}

public List<String> getEntityTypes() {
return entityTypes;
}

public void setEntityTypes(List<String> entityTypes) {
this.entityTypes = entityTypes;
}

public String getServiceName() {
return serviceName;
}

public void setServiceName(String serviceName) {
this.serviceName = serviceName;
}

public String getDatabaseName() {
return databaseName;
}

public void setDatabaseName(String databaseName) {
this.databaseName = databaseName;
}

public String getSchemaName() {
return schemaName;
}

public void setSchemaName(String schemaName) {
this.schemaName = schemaName;
}

public String getDomainId() {
return domainId;
}

public void setDomainId(String domainId) {
this.domainId = domainId;
}

public Boolean getHasConflicts() {
return hasConflicts;
}

public void setHasConflicts(Boolean hasConflicts) {
this.hasConflicts = hasConflicts;
}

public Boolean getHasMissingMetadata() {
return hasMissingMetadata;
}

public void setHasMissingMetadata(Boolean hasMissingMetadata) {
this.hasMissingMetadata = hasMissingMetadata;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
/*
* Copyright 2025 Collate
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.openmetadata.service.search;

import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.codec.binary.Hex;
import org.openmetadata.schema.api.data.ColumnGridItem;
import org.openmetadata.schema.api.data.ColumnMetadataGroup;
import org.openmetadata.schema.api.data.ColumnOccurrenceRef;
import org.openmetadata.schema.type.Column;
import org.openmetadata.schema.type.TagLabel;

@Slf4j
public class ColumnMetadataGrouper {

public static List<ColumnGridItem> groupColumns(
Map<String, List<ColumnWithContext>> columnsByName) {
List<ColumnGridItem> gridItems = new ArrayList<>();

for (Map.Entry<String, List<ColumnWithContext>> entry : columnsByName.entrySet()) {
String columnName = entry.getKey();
List<ColumnWithContext> occurrences = entry.getValue();

Map<String, ColumnMetadataGroup> groups = new HashMap<>();

for (ColumnWithContext columnCtx : occurrences) {
String groupId = generateMetadataHash(columnCtx.column);

ColumnMetadataGroup group =
groups.computeIfAbsent(
groupId,
k -> {
ColumnMetadataGroup newGroup = new ColumnMetadataGroup();
newGroup.setGroupId(groupId);
newGroup.setDisplayName(columnCtx.column.getDisplayName());
newGroup.setDescription(columnCtx.column.getDescription());
newGroup.setTags(columnCtx.column.getTags());
newGroup.setDataType(
columnCtx.column.getDataType() != null
? columnCtx.column.getDataType().toString()
: null);
newGroup.setOccurrenceCount(0);
newGroup.setOccurrences(new ArrayList<>());
return newGroup;
});

ColumnOccurrenceRef occurrence = new ColumnOccurrenceRef();
occurrence.setColumnFQN(columnCtx.column.getFullyQualifiedName());
occurrence.setEntityType(columnCtx.entityType);
occurrence.setEntityFQN(columnCtx.entityFQN);
occurrence.setEntityDisplayName(columnCtx.entityDisplayName);
occurrence.setServiceName(columnCtx.serviceName);
occurrence.setDatabaseName(columnCtx.databaseName);
occurrence.setSchemaName(columnCtx.schemaName);

group.getOccurrences().add(occurrence);
group.setOccurrenceCount(group.getOccurrenceCount() + 1);
}

ColumnGridItem gridItem = new ColumnGridItem();
gridItem.setColumnName(columnName);
gridItem.setTotalOccurrences(occurrences.size());
gridItem.setHasVariations(groups.size() > 1);
gridItem.setGroups(new ArrayList<>(groups.values()));

gridItems.add(gridItem);
}

return gridItems;
}

private static String generateMetadataHash(Column column) {
try {
StringBuilder sb = new StringBuilder();

sb.append(column.getDisplayName() != null ? column.getDisplayName() : "");
sb.append("|");
sb.append(column.getDescription() != null ? column.getDescription() : "");
sb.append("|");

if (column.getTags() != null) {
String tagString =
column.getTags().stream()
.map(TagLabel::getTagFQN)
.sorted()
.collect(Collectors.joining(","));
sb.append(tagString);
}

MessageDigest md = MessageDigest.getInstance("MD5");
byte[] digest = md.digest(sb.toString().getBytes());
return Hex.encodeHexString(digest);

} catch (NoSuchAlgorithmException e) {
LOG.error("Failed to generate metadata hash", e);
return "default";
}
}

public static class ColumnWithContext {
public Column column;
public String entityType;
public String entityFQN;
public String entityDisplayName;
public String serviceName;
public String databaseName;
public String schemaName;

public ColumnWithContext(
Column column,
String entityType,
String entityFQN,
String entityDisplayName,
String serviceName,
String databaseName,
String schemaName) {
this.column = column;
this.entityType = entityType;
this.entityFQN = entityFQN;
this.entityDisplayName = entityDisplayName;
this.serviceName = serviceName;
this.databaseName = databaseName;
this.schemaName = schemaName;
}
}
}
Loading
Loading