Skip to content

Commit 248722d

Browse files
committed
Use the UNIX 'cut' tool for minimizing graphs
1 parent c6a561d commit 248722d

File tree

1 file changed

+18
-25
lines changed

1 file changed

+18
-25
lines changed

graphalytics-core/src/main/java/science/atlarge/graphalytics/util/GraphFileManager.java

Lines changed: 18 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,13 @@
1919

2020
import org.apache.logging.log4j.LogManager;
2121
import org.apache.logging.log4j.Logger;
22-
import org.duckdb.DuckDBConnection;
2322
import science.atlarge.graphalytics.domain.graph.FormattedGraph;
24-
import science.atlarge.graphalytics.domain.graph.PropertyList;
25-
import science.atlarge.graphalytics.util.io.VertexListInputStreamReader;
26-
import science.atlarge.graphalytics.util.io.VertexListPropertyFilter;
27-
import science.atlarge.graphalytics.util.io.VertexListStreamWriter;
2823

29-
import java.io.File;
30-
import java.io.FileInputStream;
31-
import java.io.FileOutputStream;
3224
import java.io.IOException;
3325
import java.nio.file.Files;
3426
import java.nio.file.Paths;
35-
import java.sql.DriverManager;
36-
import java.sql.SQLException;
37-
import java.sql.Statement;
27+
import java.util.ArrayList;
28+
import java.util.List;
3829

3930
/**
4031
* Utility class for managing graph files. Responsible for generating additional graph files from a source dataset
@@ -59,11 +50,11 @@ private GraphFileManager() {
5950
* @param formattedGraph the graph to check the vertex and edge file for
6051
* @throws IOException iff the vertex or edge file can not be generated
6152
*/
62-
public static void ensureGraphFilesExist(FormattedGraph formattedGraph) throws IOException, SQLException {
53+
public static void ensureGraphFilesExist(FormattedGraph formattedGraph) throws IOException, InterruptedException {
6354
ensureEdgeFileExists(formattedGraph);
6455
}
6556

66-
private static void ensureEdgeFileExists(FormattedGraph formattedGraph) throws IOException, SQLException {
57+
private static void ensureEdgeFileExists(FormattedGraph formattedGraph) throws IOException, InterruptedException {
6758
if (Paths.get(formattedGraph.getEdgeFilePath()).toFile().exists()) {
6859
LOG.info("Found edge file for graph \"{}\" at \"{}\".", formattedGraph.getName(), formattedGraph.getEdgeFilePath());
6960
return;
@@ -80,22 +71,24 @@ private static void ensureEdgeFileExists(FormattedGraph formattedGraph) throws I
8071
LOG.info("Done generating edge file for graph \"{}\".", formattedGraph.getGraph().getName());
8172
}
8273

83-
private static void generateEdgeFile(FormattedGraph formattedGraph) throws IOException, SQLException {
74+
private static void generateEdgeFile(FormattedGraph formattedGraph) throws IOException, InterruptedException {
8475
// Ensure that the output directory exists
8576
Files.createDirectories(Paths.get(formattedGraph.getEdgeFilePath()).getParent());
8677

87-
String dbFile = String.format("%s/edge_file.duckdb", Paths.get(formattedGraph.getEdgeFilePath()).toFile().getParent());
88-
new File(dbFile).delete();
78+
List<String> command = new ArrayList<>();
79+
command.add("/bin/bash");
80+
command.add("-c");
81+
command.add(String.format("cut -d' ' -f1,2 %s > %s",
82+
formattedGraph.getGraph().getSourceGraph().getEdgeFilePath(),
83+
formattedGraph.getEdgeFilePath()
84+
));
8985

90-
try (DuckDBConnection conn = (DuckDBConnection) DriverManager.getConnection(
91-
String.format("jdbc:duckdb:%s", dbFile)
92-
)) {
93-
Statement stmt = conn.createStatement();
94-
stmt.execute("SET experimental_parallel_csv=true;");
95-
stmt.execute("CREATE OR REPLACE TABLE e(source BIGINT NOT NULL, target BIGINT NOT NULL, weight DOUBLE);");
96-
stmt.execute(String.format("COPY e FROM '%s' (DELIMITER ' ', FORMAT csv)", formattedGraph.getGraph().getSourceGraph().getEdgeFilePath()));
97-
// Drop a lot of weight with this one weird trick
98-
stmt.execute(String.format("COPY e (source, target) TO '%s' (DELIMITER ' ', FORMAT csv)", formattedGraph.getEdgeFilePath()));
86+
ProcessBuilder processBuilder = new ProcessBuilder(command);
87+
processBuilder.redirectErrorStream(true);
88+
Process process = processBuilder.start();
89+
int returnCode = process.waitFor();
90+
if (returnCode != 0) {
91+
throw new IOException("Creating minimized edge file failed");
9992
}
10093
}
10194

0 commit comments

Comments
 (0)