Skip to content

Commit cf86ef9

Browse files
[clang][analyzer] Make per-entry-point metric rows uniquely identifiable
Also remove the gratuitous spaces after "," that break strict CSV compliance. As the debug function name does not uniquely identify an entry point, add the main-TU name and the USR values for each entry point snapshot to reduce the likelihood of collisions between declarations across large projects. While adding a filename to each row increases the file size substantially, the difference in size for the compressed is acceptable. I evaluated it on our set of 200+ open source C and C++ projects with 3M entry points, and got the following results when adding these two columns: - Raw CSV file increased from 530MB to 1.1GB - Compressed file (XZ) increased from 54 MB to 78 MB -- CPP-7098 --------- Co-authored-by: Balazs Benics <[email protected]>
1 parent ef4471e commit cf86ef9

File tree

4 files changed

+42
-11
lines changed

4 files changed

+42
-11
lines changed

clang/include/clang/StaticAnalyzer/Core/PathSensitive/EntryPointStats.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class EntryPointStat {
2525
public:
2626
llvm::StringLiteral name() const { return Name; }
2727

28-
static void lockRegistry();
28+
static void lockRegistry(llvm::StringRef CPPFileName);
2929

3030
static void takeSnapshot(const Decl *EntryPoint);
3131
static void dumpStatsAsCSV(llvm::raw_ostream &OS);

clang/lib/StaticAnalyzer/Core/EntryPointStats.cpp

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@
99
#include "clang/StaticAnalyzer/Core/PathSensitive/EntryPointStats.h"
1010
#include "clang/AST/DeclBase.h"
1111
#include "clang/Analysis/AnalysisDeclContext.h"
12+
#include "clang/Index/USRGeneration.h"
1213
#include "llvm/ADT/STLExtras.h"
14+
#include "llvm/ADT/SmallVector.h"
1315
#include "llvm/ADT/StringExtras.h"
1416
#include "llvm/ADT/StringRef.h"
1517
#include "llvm/Support/FileSystem.h"
@@ -38,6 +40,7 @@ struct Registry {
3840
};
3941

4042
std::vector<Snapshot> Snapshots;
43+
std::string EscapedCPPFileName;
4144
};
4245
} // namespace
4346

@@ -69,7 +72,7 @@ static void checkStatName(const EntryPointStat *M) {
6972
}
7073
}
7174

72-
void EntryPointStat::lockRegistry() {
75+
void EntryPointStat::lockRegistry(llvm::StringRef CPPFileName) {
7376
auto CmpByNames = [](const EntryPointStat *L, const EntryPointStat *R) {
7477
return L->name() < R->name();
7578
};
@@ -78,6 +81,8 @@ void EntryPointStat::lockRegistry() {
7881
enumerateStatVectors(
7982
[](const auto &Stats) { llvm::for_each(Stats, checkStatName); });
8083
StatsRegistry->IsLocked = true;
84+
llvm::raw_string_ostream OS(StatsRegistry->EscapedCPPFileName);
85+
llvm::printEscapedString(CPPFileName, OS);
8186
}
8287

8388
[[maybe_unused]] static bool isRegistered(llvm::StringLiteral Name) {
@@ -144,15 +149,27 @@ static std::vector<llvm::StringLiteral> getStatNames() {
144149
return Ret;
145150
}
146151

152+
static std::string getUSR(const Decl *D) {
153+
llvm::SmallVector<char> Buf;
154+
if (index::generateUSRForDecl(D, Buf)) {
155+
assert(false && "This should never fail");
156+
return AnalysisDeclContext::getFunctionName(D);
157+
}
158+
return llvm::toStringRef(Buf).str();
159+
}
160+
147161
void Registry::Snapshot::dumpAsCSV(llvm::raw_ostream &OS) const {
148162
OS << '"';
163+
llvm::printEscapedString(getUSR(EntryPoint), OS);
164+
OS << "\",\"";
165+
OS << StatsRegistry->EscapedCPPFileName << "\",\"";
149166
llvm::printEscapedString(
150167
clang::AnalysisDeclContext::getFunctionName(EntryPoint), OS);
151-
OS << "\", ";
168+
OS << "\",";
152169
auto PrintAsBool = [&OS](bool B) { OS << (B ? "true" : "false"); };
153-
llvm::interleaveComma(BoolStatValues, OS, PrintAsBool);
154-
OS << ((BoolStatValues.empty() || UnsignedStatValues.empty()) ? "" : ", ");
155-
llvm::interleaveComma(UnsignedStatValues, OS);
170+
llvm::interleave(BoolStatValues, OS, PrintAsBool, ",");
171+
OS << ((BoolStatValues.empty() || UnsignedStatValues.empty()) ? "" : ",");
172+
llvm::interleave(UnsignedStatValues, OS, [&OS](unsigned U) { OS << U; }, ",");
156173
}
157174

158175
static std::vector<bool> consumeBoolStats() {
@@ -181,8 +198,8 @@ void EntryPointStat::dumpStatsAsCSV(llvm::StringRef FileName) {
181198
}
182199

183200
void EntryPointStat::dumpStatsAsCSV(llvm::raw_ostream &OS) {
184-
OS << "EntryPoint, ";
185-
llvm::interleaveComma(getStatNames(), OS);
201+
OS << "USR,File,DebugName,";
202+
llvm::interleave(getStatNames(), OS, [&OS](const auto &a) { OS << a; }, ",");
186203
OS << "\n";
187204

188205
std::vector<std::string> Rows;

clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,15 @@ STAT_MAX(MaxCFGSize, "The maximum number of basic blocks in a function.");
6565

6666
namespace {
6767

68+
StringRef getMainFileName(const CompilerInvocation &Invocation) {
69+
if (!Invocation.getFrontendOpts().Inputs.empty()) {
70+
const FrontendInputFile &Input = Invocation.getFrontendOpts().Inputs[0];
71+
return Input.isFile() ? Input.getFile()
72+
: Input.getBuffer().getBufferIdentifier();
73+
}
74+
return "<no input>";
75+
}
76+
6877
class AnalysisConsumer : public AnalysisASTConsumer,
6978
public DynamicRecursiveASTVisitor {
7079
enum {
@@ -125,7 +134,8 @@ class AnalysisConsumer : public AnalysisASTConsumer,
125134
PP(CI.getPreprocessor()), OutDir(outdir), Opts(opts), Plugins(plugins),
126135
Injector(std::move(injector)), CTU(CI),
127136
MacroExpansions(CI.getLangOpts()) {
128-
EntryPointStat::lockRegistry();
137+
138+
EntryPointStat::lockRegistry(getMainFileName(CI.getInvocation()));
129139
DigestAnalyzerOptions();
130140

131141
if (Opts.AnalyzerDisplayProgress || Opts.PrintStats ||

clang/test/Analysis/analyzer-stats/entry-point-stats.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
// RUN: %csv2json "%t.csv" | FileCheck --check-prefix=CHECK %s
66
//
77
// CHECK: {
8-
// CHECK-NEXT: "fib(unsigned int)": {
8+
// CHECK-NEXT: "c:@F@fib#i#": {
9+
// CHECK-NEXT: "File": "{{.*}}entry-point-stats.cpp",
10+
// CHECK-NEXT: "DebugName": "fib(unsigned int)",
911
// CHECK-NEXT: "NumBlocks": "{{[0-9]+}}",
1012
// CHECK-NEXT: "NumBlocksUnreachable": "{{[0-9]+}}",
1113
// CHECK-NEXT: "NumCTUSteps": "{{[0-9]+}}",
@@ -40,7 +42,9 @@
4042
// CHECK-NEXT: "MaxValidBugClassSize": "{{[0-9]+}}",
4143
// CHECK-NEXT: "PathRunningTime": "{{[0-9]+}}"
4244
// CHECK-NEXT: },
43-
// CHECK-NEXT: "main(int, char **)": {
45+
// CHECK-NEXT: "c:@F@main#I#**C#": {
46+
// CHECK-NEXT: "File": "{{.*}}entry-point-stats.cpp",
47+
// CHECK-NEXT: "DebugName": "main(int, char **)",
4448
// CHECK-NEXT: "NumBlocks": "{{[0-9]+}}",
4549
// CHECK-NEXT: "NumBlocksUnreachable": "{{[0-9]+}}",
4650
// CHECK-NEXT: "NumCTUSteps": "{{[0-9]+}}",

0 commit comments

Comments
 (0)