19
19
20
20
import org .apache .logging .log4j .LogManager ;
21
21
import org .apache .logging .log4j .Logger ;
22
- import org .duckdb .DuckDBConnection ;
23
22
import science .atlarge .graphalytics .domain .graph .FormattedGraph ;
24
- import science .atlarge .graphalytics .domain .graph .PropertyList ;
25
- import science .atlarge .graphalytics .util .io .VertexListInputStreamReader ;
26
- import science .atlarge .graphalytics .util .io .VertexListPropertyFilter ;
27
- import science .atlarge .graphalytics .util .io .VertexListStreamWriter ;
28
23
29
- import java .io .File ;
30
- import java .io .FileInputStream ;
31
- import java .io .FileOutputStream ;
32
24
import java .io .IOException ;
33
25
import java .nio .file .Files ;
34
26
import java .nio .file .Paths ;
35
- import java .sql .DriverManager ;
36
- import java .sql .SQLException ;
37
- import java .sql .Statement ;
27
+ import java .util .ArrayList ;
28
+ import java .util .List ;
38
29
39
30
/**
40
31
* Utility class for managing graph files. Responsible for generating additional graph files from a source dataset
@@ -59,11 +50,11 @@ private GraphFileManager() {
59
50
* @param formattedGraph the graph to check the vertex and edge file for
60
51
* @throws IOException iff the vertex or edge file can not be generated
61
52
*/
62
- public static void ensureGraphFilesExist (FormattedGraph formattedGraph ) throws IOException , SQLException {
53
+ public static void ensureGraphFilesExist (FormattedGraph formattedGraph ) throws IOException , InterruptedException {
63
54
ensureEdgeFileExists (formattedGraph );
64
55
}
65
56
66
- private static void ensureEdgeFileExists (FormattedGraph formattedGraph ) throws IOException , SQLException {
57
+ private static void ensureEdgeFileExists (FormattedGraph formattedGraph ) throws IOException , InterruptedException {
67
58
if (Paths .get (formattedGraph .getEdgeFilePath ()).toFile ().exists ()) {
68
59
LOG .info ("Found edge file for graph \" {}\" at \" {}\" ." , formattedGraph .getName (), formattedGraph .getEdgeFilePath ());
69
60
return ;
@@ -80,22 +71,24 @@ private static void ensureEdgeFileExists(FormattedGraph formattedGraph) throws I
80
71
LOG .info ("Done generating edge file for graph \" {}\" ." , formattedGraph .getGraph ().getName ());
81
72
}
82
73
83
- private static void generateEdgeFile (FormattedGraph formattedGraph ) throws IOException , SQLException {
74
+ private static void generateEdgeFile (FormattedGraph formattedGraph ) throws IOException , InterruptedException {
84
75
// Ensure that the output directory exists
85
76
Files .createDirectories (Paths .get (formattedGraph .getEdgeFilePath ()).getParent ());
86
77
87
- String dbFile = String .format ("%s/edge_file.duckdb" , Paths .get (formattedGraph .getEdgeFilePath ()).toFile ().getParent ());
88
- new File (dbFile ).delete ();
78
+ List <String > command = new ArrayList <>();
79
+ command .add ("/bin/bash" );
80
+ command .add ("-c" );
81
+ command .add (String .format ("cut -d' ' -f1,2 %s > %s" ,
82
+ formattedGraph .getGraph ().getSourceGraph ().getEdgeFilePath (),
83
+ formattedGraph .getEdgeFilePath ()
84
+ ));
89
85
90
- try (DuckDBConnection conn = (DuckDBConnection ) DriverManager .getConnection (
91
- String .format ("jdbc:duckdb:%s" , dbFile )
92
- )) {
93
- Statement stmt = conn .createStatement ();
94
- stmt .execute ("SET experimental_parallel_csv=true;" );
95
- stmt .execute ("CREATE OR REPLACE TABLE e(source BIGINT NOT NULL, target BIGINT NOT NULL, weight DOUBLE);" );
96
- stmt .execute (String .format ("COPY e FROM '%s' (DELIMITER ' ', FORMAT csv)" , formattedGraph .getGraph ().getSourceGraph ().getEdgeFilePath ()));
97
- // Drop a lot of weight with this one weird trick
98
- stmt .execute (String .format ("COPY e (source, target) TO '%s' (DELIMITER ' ', FORMAT csv)" , formattedGraph .getEdgeFilePath ()));
86
+ ProcessBuilder processBuilder = new ProcessBuilder (command );
87
+ processBuilder .redirectErrorStream (true );
88
+ Process process = processBuilder .start ();
89
+ int returnCode = process .waitFor ();
90
+ if (returnCode != 0 ) {
91
+ throw new IOException ("Creating minimized edge file failed" );
99
92
}
100
93
}
101
94
0 commit comments