Skip to content

Commit 02e7470

Browse files
authored
Merge pull request #2234 from jplag/feature/parallel-process
Further parallelization of the JPlag pipeline
2 parents 377b63b + 055b720 commit 02e7470

File tree

25 files changed

+233
-288
lines changed

25 files changed

+233
-288
lines changed

core/src/main/java/de/jplag/SubmissionSet.java

Lines changed: 53 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44

55
import java.util.ArrayList;
66
import java.util.List;
7+
import java.util.concurrent.ExecutorService;
8+
import java.util.concurrent.Executors;
9+
import java.util.concurrent.TimeUnit;
10+
import java.util.concurrent.atomic.AtomicInteger;
711
import java.util.stream.Collectors;
812

913
import org.slf4j.Logger;
@@ -39,11 +43,11 @@ public class SubmissionSet {
3943
private final Submission baseCodeSubmission;
4044

4145
private final JPlagOptions options;
42-
private int errors = 0;
43-
private String currentSubmissionName;
46+
private final AtomicInteger errors = new AtomicInteger(0);
4447

4548
/**
46-
* @param submissions Submissions to check for plagiarism.
49+
* Creates a submissions set and parses all submissions.
50+
* @param submissions list of submissions to check for plagiarism.
4751
* @param baseCode Base code submission if it exists or {@code null}.
4852
* @param options The JPlag options
4953
* @throws ExitException if the submissions cannot be parsed.
@@ -52,7 +56,10 @@ public SubmissionSet(List<Submission> submissions, Submission baseCode, JPlagOpt
5256
this.allSubmissions = submissions;
5357
this.baseCodeSubmission = baseCode;
5458
this.options = options;
55-
parseAllSubmissions();
59+
parseSubmissions(allSubmissions);
60+
if (baseCodeSubmission != null) {
61+
parseBaseCodeSubmission(baseCodeSubmission);
62+
}
5663
this.submissions = filterValidSubmissions();
5764
invalidSubmissions = filterInvalidSubmissions();
5865
}
@@ -111,7 +118,12 @@ public void normalizeSubmissions() {
111118
if (baseCodeSubmission != null) {
112119
baseCodeSubmission.normalize();
113120
}
114-
ProgressBarLogger.iterate(ProgressBarType.TOKEN_STRING_NORMALIZATION, submissions, Submission::normalize);
121+
ProgressBar progressBar = ProgressBarLogger.createProgressBar(ProgressBarType.TOKEN_SEQUENCE_NORMALIZATION, submissions.size());
122+
submissions.parallelStream().forEach(submission -> {
123+
submission.normalize();
124+
progressBar.step();
125+
});
126+
progressBar.dispose();
115127
}
116128

117129
private List<Submission> filterValidSubmissions() {
@@ -122,17 +134,6 @@ private List<Submission> filterInvalidSubmissions() {
122134
return allSubmissions.stream().filter(it -> it.getState() != VALID).toList();
123135
}
124136

125-
private void parseAllSubmissions() throws ExitException {
126-
try {
127-
parseSubmissions(allSubmissions);
128-
if (baseCodeSubmission != null) {
129-
parseBaseCodeSubmission(baseCodeSubmission);
130-
}
131-
} catch (OutOfMemoryError exception) {
132-
throw new SubmissionException("Out of memory during parsing of submission \"" + currentSubmissionName + "\"", exception);
133-
}
134-
}
135-
136137
/**
137138
* Parse the given base code submission.
138139
*/
@@ -152,30 +153,54 @@ private void parseBaseCodeSubmission(Submission baseCode) throws BasecodeExcepti
152153
* Parse all given submissions.
153154
* @param submissions The list of submissions
154155
*/
155-
private void parseSubmissions(List<Submission> submissions) throws LanguageException {
156+
private void parseSubmissions(List<Submission> submissions) throws ExitException {
156157
if (submissions.isEmpty()) {
157158
logger.error("No submissions to parse!");
158159
return;
159160
}
160161

161162
ProgressBar progressBar = ProgressBarLogger.createProgressBar(ProgressBarType.PARSING, submissions.size());
162-
for (Submission submission : submissions) {
163163

164-
logger.trace("------ Parsing submission: " + submission.getName());
165-
currentSubmissionName = submission.getName();
166-
167-
boolean successful = submission.parse(options.debugParser(), options.normalize(), options.minimumTokenMatch());
168-
if (!successful) {
169-
errors++;
170-
logger.debug("ERROR -> Submission {} removed with reason {}", currentSubmissionName, submission.getState());
164+
if (options.language().expectsSubmissionOrder()) {
165+
for (Submission submission : submissions) {
166+
parseSingleSubmission(progressBar, submission);
171167
}
172-
progressBar.step();
168+
} else {
169+
parseSubmissionsInParallel(submissions, progressBar);
173170
}
171+
174172
progressBar.dispose();
175173

176-
int validSubmissions = submissions.size() - errors;
174+
int validSubmissions = submissions.size() - errors.get();
177175
logger.debug("{} submissions parsed successfully!", validSubmissions);
178-
logger.debug("{} parser error{}!", errors, errors != 1 ? "s" : "");
176+
logger.debug("{} parser error{}!", errors, errors.get() != 1 ? "s" : "");
177+
}
178+
179+
private void parseSubmissionsInParallel(List<Submission> submissions, ProgressBar progressBar) throws SubmissionException {
180+
try (ExecutorService executor = Executors.newVirtualThreadPerTaskExecutor()) {
181+
for (Submission submission : submissions) {
182+
executor.submit(() -> {
183+
parseSingleSubmission(progressBar, submission);
184+
return null; // Ensure the lambda is a Callable for exception handling
185+
});
186+
}
187+
executor.shutdown();
188+
executor.awaitTermination(24, TimeUnit.HOURS); // Maximum time all processing can take.
189+
} catch (InterruptedException exception) {
190+
throw new SubmissionException("Error while parsing the submissions.", exception);
191+
}
192+
}
193+
194+
/**
195+
* Parses a single submission (thread safe).
196+
*/
197+
private void parseSingleSubmission(ProgressBar progressBar, Submission submission) throws LanguageException {
198+
boolean successful = submission.parse(options.debugParser(), options.normalize(), options.minimumTokenMatch());
199+
if (!successful) {
200+
errors.incrementAndGet();
201+
logger.debug("ERROR -> Submission {} removed with reason {}", submission.getName(), submission.getState());
202+
}
203+
progressBar.step();
179204
}
180205

181206
}

core/src/main/java/de/jplag/logging/ProgressBarType.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@
44
* The available processes. Used as a hint for the ui, which step JPlag is currently performing.
55
*/
66
public enum ProgressBarType {
7-
LOADING("Loading Submissions ", false),
8-
PARSING("Parsing Submissions ", false),
7+
LOADING("Loading Submissions", false),
8+
PARSING("Parsing Submissions", false),
99
TOKEN_VALUE_CREATION("Preparing Submissions", false),
10-
COMPARING("Comparing Submissions", false),
11-
MATCH_MERGING("Merging matched subsequences ", false),
12-
TOKEN_STRING_NORMALIZATION("Normalizing Token Sequence", false),
13-
CLUSTERING("Finding clusters ", true);
10+
COMPARING("Comparing Submission Pairs", false),
11+
MATCH_MERGING("Merging Matched Subsequences ", false),
12+
TOKEN_SEQUENCE_NORMALIZATION("Normalizing Token Sequences", false),
13+
CLUSTERING("Finding Clusters ", true);
1414

1515
private final String defaultText;
1616
private final boolean isIdleBar;

core/src/main/java/de/jplag/merging/MatchMerging.java

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import de.jplag.SharedTokenType;
1111
import de.jplag.Submission;
1212
import de.jplag.Token;
13+
import de.jplag.logging.ProgressBar;
1314
import de.jplag.logging.ProgressBarLogger;
1415
import de.jplag.logging.ProgressBarType;
1516
import de.jplag.options.JPlagOptions;
@@ -43,29 +44,31 @@ public MatchMerging(JPlagOptions options) {
4344
*/
4445
public JPlagResult mergeMatchesOf(JPlagResult result) {
4546
long timeBeforeStartInMillis = System.currentTimeMillis();
46-
4747
List<JPlagComparison> comparisons = new ArrayList<>(result.getAllComparisons());
48-
List<JPlagComparison> comparisonsMerged = new ArrayList<>();
49-
50-
ProgressBarLogger.iterate(ProgressBarType.MATCH_MERGING, comparisons, comparison -> {
51-
numberOfMerges = 0;
52-
Submission leftSubmission = comparison.firstSubmission().copy();
53-
Submission rightSubmission = comparison.secondSubmission().copy();
54-
List<Match> globalMatches = new ArrayList<>(comparison.matches());
55-
globalMatches.addAll(comparison.ignoredMatches());
56-
globalMatches = mergeNeighbors(globalMatches, leftSubmission, rightSubmission);
57-
globalMatches = globalMatches.stream().filter(it -> it.length() >= options.minimumTokenMatch()).toList();
58-
if (numberOfMerges >= options.mergingOptions().minimumRequiredMerges()) {
59-
comparisonsMerged.add(new JPlagComparison(leftSubmission, rightSubmission, globalMatches, new ArrayList<>()));
60-
} else {
61-
comparisonsMerged.add(comparison);
62-
}
63-
});
48+
49+
ProgressBar progressBar = ProgressBarLogger.createProgressBar(ProgressBarType.MATCH_MERGING, comparisons.size());
50+
List<JPlagComparison> comparisonsMerged = comparisons.parallelStream().map(it -> mergeMatchesOf(it, progressBar)).toList();
51+
progressBar.dispose();
6452

6553
long durationInMillis = System.currentTimeMillis() - timeBeforeStartInMillis;
6654
return new JPlagResult(comparisonsMerged, result.getSubmissions(), result.getDuration() + durationInMillis, options);
6755
}
6856

57+
private JPlagComparison mergeMatchesOf(JPlagComparison comparison, ProgressBar progressBar) {
58+
numberOfMerges = 0;
59+
Submission leftSubmission = comparison.firstSubmission().copy();
60+
Submission rightSubmission = comparison.secondSubmission().copy();
61+
List<Match> globalMatches = new ArrayList<>(comparison.matches());
62+
globalMatches.addAll(comparison.ignoredMatches());
63+
globalMatches = mergeNeighbors(globalMatches, leftSubmission, rightSubmission);
64+
globalMatches = globalMatches.stream().filter(it -> it.length() >= options.minimumTokenMatch()).toList();
65+
progressBar.step();
66+
if (numberOfMerges >= options.mergingOptions().minimumRequiredMerges()) {
67+
return new JPlagComparison(leftSubmission, rightSubmission, globalMatches, new ArrayList<>());
68+
}
69+
return comparison;
70+
}
71+
6972
/**
7073
* Computes neighbors by sorting based on order of matches in the left and right submissions and then checking which are
7174
* next to each other in both.

language-antlr-utils/src/main/java/de/jplag/antlr/AbstractAntlrLanguage.java

Lines changed: 0 additions & 53 deletions
This file was deleted.
Lines changed: 0 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,66 +1,21 @@
11
package de.jplag.antlr;
22

3-
import static org.junit.jupiter.api.Assertions.assertThrows;
4-
5-
import java.io.File;
63
import java.util.Set;
74

85
import org.junit.jupiter.api.Assertions;
96
import org.junit.jupiter.api.Test;
107

118
import de.jplag.ParsingException;
129
import de.jplag.antlr.testLanguage.TestLanguage;
13-
import de.jplag.antlr.testLanguage.TestParserAdapter;
1410

1511
/**
1612
* Some tests for the abstract antlr language
1713
*/
1814
class LanguageTest {
19-
@Test
20-
void testExceptionForNoDefinedParser() {
21-
LanguageWithoutParser lang = new LanguageWithoutParser();
22-
Set<File> emptySet = Set.of();
23-
assertThrows(UnsupportedOperationException.class, () -> lang.parse(emptySet, false));
24-
}
2515

2616
@Test
2717
void testLanguageWithStaticParser() throws ParsingException {
2818
TestLanguage lang = new TestLanguage();
2919
Assertions.assertEquals(0, lang.parse(Set.of(), false).size());
3020
}
31-
32-
@Test
33-
void testLanguageWithLazyParser() throws ParsingException {
34-
LanguageWithLazyParser lang = new LanguageWithLazyParser();
35-
Assertions.assertEquals(0, lang.parse(Set.of(), false).size());
36-
}
37-
38-
private static class LanguageWithoutParser extends AbstractAntlrLanguage {
39-
@Override
40-
public String[] suffixes() {
41-
return new String[0];
42-
}
43-
44-
@Override
45-
public String getName() {
46-
return null;
47-
}
48-
49-
@Override
50-
public String getIdentifier() {
51-
return null;
52-
}
53-
54-
@Override
55-
public int minimumTokenMatch() {
56-
return 0;
57-
}
58-
}
59-
60-
private static class LanguageWithLazyParser extends LanguageWithoutParser {
61-
@Override
62-
protected AbstractAntlrParserAdapter<?> initializeParser(boolean normalize) {
63-
return new TestParserAdapter();
64-
}
65-
}
6621
}

language-antlr-utils/src/test/java/de/jplag/antlr/testLanguage/TestLanguage.java

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
package de.jplag.antlr.testLanguage;
22

3-
import de.jplag.antlr.AbstractAntlrLanguage;
4-
5-
public class TestLanguage extends AbstractAntlrLanguage {
6-
/**
7-
* New instance
8-
*/
9-
public TestLanguage() {
10-
super(new TestParserAdapter());
11-
}
3+
import java.io.File;
4+
import java.util.List;
5+
import java.util.Set;
6+
7+
import de.jplag.Language;
8+
import de.jplag.ParsingException;
9+
import de.jplag.Token;
10+
11+
public class TestLanguage implements Language {
1212

1313
@Override
1414
public String[] suffixes() {
@@ -29,4 +29,9 @@ public String getIdentifier() {
2929
public int minimumTokenMatch() {
3030
return 8;
3131
}
32+
33+
@Override
34+
public List<Token> parse(Set<File> files, boolean normalize) throws ParsingException {
35+
return new TestParserAdapter().parse(files);
36+
}
3237
}

languages/c/src/main/java/de/jplag/c/CLanguage.java

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,6 @@ public class CLanguage implements Language {
1515
private static final String NAME = "C";
1616
private static final String IDENTIFIER = "c";
1717

18-
private final Scanner scanner; // c code is scanned not parsed
19-
20-
public CLanguage() {
21-
scanner = new Scanner();
22-
}
23-
2418
@Override
2519
public String[] suffixes() {
2620
return new String[] {".cpp", ".CPP", ".cxx", ".CXX", ".c++", ".C++", ".c", ".C", ".cc", ".CC", ".h", ".H", ".hpp", ".HPP", ".hh", ".HH"};
@@ -43,6 +37,6 @@ public int minimumTokenMatch() {
4337

4438
@Override
4539
public List<Token> parse(Set<File> files, boolean normalize) throws ParsingException {
46-
return this.scanner.scan(files);
40+
return new Scanner().scan(files);
4741
}
4842
}

0 commit comments

Comments
 (0)