Skip to content

Commit 3893589

Browse files
feat: Minimize allocations in SCIP symbol parsing.
See PR for benchmarks.
1 parent 79fb777 commit 3893589

File tree

26 files changed

+3632
-2410
lines changed

26 files changed

+3632
-2410
lines changed

.github/workflows/golang.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,4 @@ jobs:
1818
- uses: ./.github/actions/asdf
1919
with:
2020
golang: true
21-
- run: go test ./... -v
21+
- run: go test ./... -v -tags asserts

.prettierignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ reprolang/target
22
reprolang/src/grammar.json
33
reprolang/src/node-types.json
44
bindings/typescript
5+
docs/scip.md
56
.bin

.tool-versions

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
golang 1.20.14
1+
golang 1.22.0
22
nodejs 16.20.2
33
shellcheck 0.7.1
44
yarn 1.22.22

Development.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
- [Project structure](#project-structure)
44
- [Code generation](#code-generation)
55
- [Debugging](#debugging)
6+
- [Benchmarking](#benchmarking)
67
- [Testing and adding new SCIP semantics](#testing-and-adding-new-scip-semantics)
78
- [Release a new version](#release-a-new-version)
89

@@ -69,6 +70,24 @@ and is not recommended for use in other settings.
6970
scip lint /path/to/index.scip
7071
```
7172

73+
## Benchmarking
74+
75+
For benchmarks, one can put test SCIP indexes under `dev/sample_indexes`.
76+
77+
Sourcegraph teammates can download several large indexes
78+
from this [Google drive folder](https://drive.google.com/drive/folders/1z62Se7eHaa5T89a16-y7s0Z1qbRY4VCg).
79+
80+
After that you can run:
81+
82+
```bash
83+
go run ./bindings/go/scip/speedtest
84+
```
85+
86+
to see the results.
87+
88+
Make sure to share benchmark results when making changes to
89+
the symbol parsing logic.
90+
7291
## Testing and adding new SCIP semantics
7392

7493
It is helpful to use reprolang to check the existing code navigation behavior,

bindings/go/scip/assertions.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
//go:build asserts
2+
3+
package scip
4+
5+
func assert(cond bool, msg string) {
6+
if !cond {
7+
panic(msg)
8+
}
9+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
//go:build !asserts
2+
3+
package scip
4+
5+
// assert is a noop in release builds - the implementation is in assertions.go
6+
func assert(cond bool, msg string) {}
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
package internal
2+
3+
import (
4+
"io"
5+
"os"
6+
"path/filepath"
7+
"sync/atomic"
8+
"testing"
9+
10+
"github.com/sourcegraph/beaut"
11+
"github.com/sourcegraph/beaut/lib/knownwf"
12+
conciter "github.com/sourcegraph/conc/iter"
13+
"github.com/sourcegraph/scip/bindings/go/scip"
14+
"github.com/sourcegraph/scip/bindings/go/scip/internal/shared"
15+
"github.com/stretchr/testify/require"
16+
"google.golang.org/protobuf/proto"
17+
)
18+
19+
func TestParseCompat(t *testing.T) {
20+
for _, path := range shared.SampleIndexes() {
21+
t.Run(filepath.Base(path), func(t *testing.T) {
22+
t.Parallel()
23+
scipReader, err := os.Open(path)
24+
require.Nil(t, err)
25+
scipBytes, err := io.ReadAll(scipReader)
26+
require.Nil(t, err)
27+
scipIndex := scip.Index{}
28+
require.NoError(t, proto.Unmarshal(scipBytes, &scipIndex))
29+
var total atomic.Int64
30+
conciter.ForEach(scipIndex.Documents, func(docPtr **scip.Document) {
31+
document := *docPtr
32+
if total.Load() > 1000*1000 {
33+
return
34+
}
35+
total.Add(int64(len(document.Occurrences)))
36+
var newSym scip.Symbol
37+
for i := 0; i < len(document.Occurrences); i++ {
38+
occ := document.Occurrences[i]
39+
oldSym, oldErr := ParsePartialSymbolV1ToBeDeleted(occ.Symbol, true)
40+
var newErr error
41+
require.NotPanics(t, func() {
42+
str := beaut.NewUTF8StringUnchecked(occ.Symbol, knownwf.UTF8DeserializedFromProtobufString)
43+
newErr = scip.ParseSymbolUTF8With(str, scip.ParseSymbolOptions{
44+
IncludeDescriptors: true,
45+
RecordOutput: &newSym,
46+
})
47+
}, "panic for symbol: %q", occ.Symbol)
48+
if oldErr != nil {
49+
require.Error(t, newErr,
50+
"old parser gave error %v but parse was successful with new parser (symbol: %q)",
51+
oldErr.Error(), occ.Symbol)
52+
continue
53+
} else if newErr != nil {
54+
require.NoError(t, newErr,
55+
"new parser gave error %v but parse was successful with old parser (symbol: %q)",
56+
newErr.Error(), occ.Symbol)
57+
}
58+
require.Equal(t, oldSym.Scheme, newSym.Scheme)
59+
require.Equal(t, oldSym.Package, newSym.Package)
60+
require.Equalf(t, len(oldSym.Descriptors), len(newSym.Descriptors), "symbol: %v, d1: %+v, d2: %+v", occ.Symbol,
61+
oldSym.Descriptors, newSym.Descriptors)
62+
for i, d := range oldSym.Descriptors {
63+
dnew := newSym.Descriptors[i]
64+
require.Equal(t, d.Name, dnew.Name, "symbol: %v", occ.Symbol)
65+
require.Equal(t, d.Suffix, dnew.Suffix, "symbol: %v", occ.Symbol)
66+
require.Equal(t, d.Disambiguator, dnew.Disambiguator, "symbol: %v", occ.Symbol)
67+
}
68+
}
69+
})
70+
})
71+
}
72+
}
Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
package internal
2+
3+
import (
4+
"fmt"
5+
"strings"
6+
7+
"github.com/cockroachdb/errors"
8+
"github.com/sourcegraph/scip/bindings/go/scip"
9+
"github.com/sourcegraph/scip/bindings/go/scip/internal/shared"
10+
)
11+
12+
func tryParseLocalSymbol(symbol string) (string, error) {
13+
if !strings.HasPrefix(symbol, "local ") {
14+
return "", nil
15+
}
16+
suffix := symbol[6:]
17+
if len(suffix) > 0 && shared.IsSimpleIdentifier(suffix) {
18+
return suffix, nil
19+
}
20+
return "", errors.Newf("expected format 'local <simple-identifier>' but got: %v", symbol)
21+
}
22+
23+
// ParsePartialSymbolV1ToBeDeleted parses an SCIP string into the Symbol message
24+
// with the option to exclude the `.Descriptor` field.
25+
//
26+
// Nov 30 2024: This is currently only present for benchmarking + compatibility
27+
// reasons. We can remove this in the future once we're confident that the new
28+
// parser handles everything correctly.
29+
func ParsePartialSymbolV1ToBeDeleted(symbol string, includeDescriptors bool) (*scip.Symbol, error) {
30+
// TODO: Rip out this, and call
31+
local, err := tryParseLocalSymbol(symbol)
32+
if err != nil {
33+
return nil, err
34+
}
35+
if local != "" {
36+
return &scip.Symbol{
37+
Scheme: "local",
38+
Descriptors: []*scip.Descriptor{
39+
{Name: local, Suffix: scip.Descriptor_Local},
40+
},
41+
}, nil
42+
}
43+
s := newSymbolParser(symbol)
44+
scheme, err := s.acceptSpaceEscapedIdentifier("scheme")
45+
if err != nil {
46+
return nil, err
47+
}
48+
manager, err := s.acceptSpaceEscapedIdentifier("package manager")
49+
if err != nil {
50+
return nil, err
51+
}
52+
if manager == "." {
53+
manager = ""
54+
}
55+
packageName, err := s.acceptSpaceEscapedIdentifier("package name")
56+
if err != nil {
57+
return nil, err
58+
}
59+
if packageName == "." {
60+
packageName = ""
61+
}
62+
packageVersion, err := s.acceptSpaceEscapedIdentifier("package version")
63+
if err != nil {
64+
return nil, err
65+
}
66+
if packageVersion == "." {
67+
packageVersion = ""
68+
}
69+
var descriptors []*scip.Descriptor
70+
if includeDescriptors {
71+
descriptors, err = s.parseDescriptors()
72+
}
73+
return &scip.Symbol{
74+
Scheme: scheme,
75+
Package: &scip.Package{
76+
Manager: manager,
77+
Name: packageName,
78+
Version: packageVersion,
79+
},
80+
Descriptors: descriptors,
81+
}, err
82+
}
83+
84+
type symbolParser struct {
85+
Symbol []rune
86+
index int
87+
SymbolString string
88+
}
89+
90+
func newSymbolParser(symbol string) *symbolParser {
91+
return &symbolParser{
92+
SymbolString: symbol,
93+
Symbol: []rune(symbol),
94+
index: 0,
95+
}
96+
}
97+
98+
func (s *symbolParser) error(message string) error {
99+
return errors.Newf("%s\n%s\n%s^", message, s.SymbolString, strings.Repeat("_", s.index))
100+
}
101+
102+
func (s *symbolParser) current() rune {
103+
if s.index < len(s.Symbol) {
104+
return s.Symbol[s.index]
105+
}
106+
return '\x00'
107+
}
108+
109+
func (s *symbolParser) peekNext() rune {
110+
if s.index+1 < len(s.Symbol) {
111+
return s.Symbol[s.index]
112+
}
113+
return 0
114+
}
115+
116+
func (s *symbolParser) parseDescriptors() ([]*scip.Descriptor, error) {
117+
var result []*scip.Descriptor
118+
for s.index < len(s.Symbol) {
119+
descriptor, err := s.parseDescriptor()
120+
if err != nil {
121+
return nil, err
122+
}
123+
result = append(result, descriptor)
124+
}
125+
return result, nil
126+
}
127+
128+
func (s *symbolParser) parseDescriptor() (*scip.Descriptor, error) {
129+
start := s.index
130+
switch s.peekNext() {
131+
case '(':
132+
s.index++
133+
name, err := s.acceptIdentifier("parameter name")
134+
if err != nil {
135+
return nil, err
136+
}
137+
return &scip.Descriptor{Name: name, Suffix: scip.Descriptor_Parameter}, s.acceptCharacter(')', "closing parameter name")
138+
case '[':
139+
s.index++
140+
name, err := s.acceptIdentifier("type parameter name")
141+
if err != nil {
142+
return nil, err
143+
}
144+
return &scip.Descriptor{Name: name, Suffix: scip.Descriptor_TypeParameter}, s.acceptCharacter(']', "closing type parameter name")
145+
default:
146+
name, err := s.acceptIdentifier("descriptor name")
147+
if err != nil {
148+
return nil, err
149+
}
150+
suffix := s.current()
151+
s.index++
152+
switch suffix {
153+
case '(':
154+
disambiguator := ""
155+
if s.peekNext() != ')' {
156+
disambiguator, err = s.acceptIdentifier("method disambiguator")
157+
if err != nil {
158+
return nil, err
159+
}
160+
}
161+
err = s.acceptCharacter(')', "closing method")
162+
if err != nil {
163+
return nil, err
164+
}
165+
return &scip.Descriptor{Name: name, Disambiguator: disambiguator, Suffix: scip.Descriptor_Method}, s.acceptCharacter('.', "closing method")
166+
case '/':
167+
return &scip.Descriptor{Name: name, Suffix: scip.Descriptor_Namespace}, nil
168+
case '.':
169+
return &scip.Descriptor{Name: name, Suffix: scip.Descriptor_Term}, nil
170+
case '#':
171+
return &scip.Descriptor{Name: name, Suffix: scip.Descriptor_Type}, nil
172+
case ':':
173+
return &scip.Descriptor{Name: name, Suffix: scip.Descriptor_Meta}, nil
174+
case '!':
175+
return &scip.Descriptor{Name: name, Suffix: scip.Descriptor_Macro}, nil
176+
default:
177+
}
178+
}
179+
180+
end := s.index
181+
if s.index > len(s.Symbol) {
182+
end = len(s.Symbol)
183+
}
184+
return nil, errors.Newf("unrecognized descriptor %q", string(s.Symbol[start:end]))
185+
}
186+
187+
func (s *symbolParser) acceptIdentifier(what string) (string, error) {
188+
if s.current() == '`' {
189+
s.index++
190+
return s.acceptBacktickEscapedIdentifier(what)
191+
}
192+
start := s.index
193+
for s.index < len(s.Symbol) && shared.IsSimpleIdentifierCharacter(s.current()) {
194+
s.index++
195+
}
196+
if start == s.index {
197+
return "", s.error("empty identifier")
198+
}
199+
return string(s.Symbol[start:s.index]), nil
200+
}
201+
202+
func (s *symbolParser) acceptSpaceEscapedIdentifier(what string) (string, error) {
203+
return s.acceptEscapedIdentifier(what, ' ')
204+
}
205+
206+
func (s *symbolParser) acceptBacktickEscapedIdentifier(what string) (string, error) {
207+
return s.acceptEscapedIdentifier(what, '`')
208+
}
209+
210+
func (s *symbolParser) acceptEscapedIdentifier(what string, escapeCharacter rune) (string, error) {
211+
builder := strings.Builder{}
212+
for s.index < len(s.Symbol) {
213+
ch := s.current()
214+
if ch == escapeCharacter {
215+
s.index++
216+
if s.index >= len(s.Symbol) {
217+
break
218+
}
219+
if s.current() == escapeCharacter {
220+
// Escaped space character.
221+
builder.WriteRune(ch)
222+
} else {
223+
return builder.String(), nil
224+
}
225+
} else {
226+
builder.WriteRune(ch)
227+
}
228+
s.index++
229+
}
230+
return "", s.error(fmt.Sprintf("reached end of symbol while parsing <%s>, expected a '%v' character", what, string(escapeCharacter)))
231+
}
232+
233+
func (s *symbolParser) acceptCharacter(r rune, what string) error {
234+
if s.current() == r {
235+
s.index++
236+
return nil
237+
}
238+
return s.error(fmt.Sprintf("expected '%v', obtained '%v', while parsing %v", string(r), string(s.current()), what))
239+
}

0 commit comments

Comments
 (0)