Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/golang.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ jobs:
- uses: ./.github/actions/asdf
with:
golang: true
- run: go test ./... -v
- run: go test ./... -v -tags asserts
1 change: 1 addition & 0 deletions .prettierignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ reprolang/target
reprolang/src/grammar.json
reprolang/src/node-types.json
bindings/typescript
docs/scip.md
.bin
2 changes: 1 addition & 1 deletion .tool-versions
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
golang 1.20.14
golang 1.22.0

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we go to 1.23.x (3 at the moment) while were at it?

Copy link
Contributor Author

@varungandhi-src varungandhi-src Dec 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's potentially too restrictive since we're providing a library. The Go toolchain typically provides support for ~2 major versions, and a bunch of OSS libraries do the same.

In principle, we could split the code into different modules so that we can aggressively bump the version for the CLI and leave the version bound for the bindings lower, but that would make things more complicated, so not doing that for now.

nodejs 16.20.2
shellcheck 0.7.1
yarn 1.22.22
Expand Down
19 changes: 19 additions & 0 deletions Development.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
- [Project structure](#project-structure)
- [Code generation](#code-generation)
- [Debugging](#debugging)
- [Benchmarking](#benchmarking)
- [Testing and adding new SCIP semantics](#testing-and-adding-new-scip-semantics)
- [Release a new version](#release-a-new-version)

Expand Down Expand Up @@ -69,6 +70,24 @@ and is not recommended for use in other settings.
scip lint /path/to/index.scip
```

## Benchmarking

For benchmarks, one can put test SCIP indexes under `dev/sample_indexes`.

Sourcegraph teammates can download several large indexes
from this [Google drive folder](https://drive.google.com/drive/folders/1z62Se7eHaa5T89a16-y7s0Z1qbRY4VCg).

After that you can run:

```bash
go run ./bindings/go/scip/speedtest
```

to see the results.

Make sure to share benchmark results when making changes to
the symbol parsing logic.

## Testing and adding new SCIP semantics

It is helpful to use reprolang to check the existing code navigation behavior,
Expand Down
9 changes: 9 additions & 0 deletions bindings/go/scip/assertions.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
//go:build asserts

package scip

func assert(cond bool, msg string) {
if !cond {
panic(msg)
}
}
6 changes: 6 additions & 0 deletions bindings/go/scip/assertions_noop.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
//go:build !asserts

package scip

// assert is a noop in release builds - the implementation is in assertions.go
func assert(cond bool, msg string) {}
72 changes: 72 additions & 0 deletions bindings/go/scip/internal/compat_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package internal

import (
"io"
"os"
"path/filepath"
"sync/atomic"
"testing"

"github.com/sourcegraph/beaut"
"github.com/sourcegraph/beaut/lib/knownwf"
conciter "github.com/sourcegraph/conc/iter"
"github.com/sourcegraph/scip/bindings/go/scip"
"github.com/sourcegraph/scip/bindings/go/scip/internal/shared"
"github.com/stretchr/testify/require"
"google.golang.org/protobuf/proto"
)

func TestParseCompat(t *testing.T) {
for _, path := range shared.SampleIndexes() {
t.Run(filepath.Base(path), func(t *testing.T) {
t.Parallel()
scipReader, err := os.Open(path)
require.Nil(t, err)
scipBytes, err := io.ReadAll(scipReader)
require.Nil(t, err)
scipIndex := scip.Index{}
require.NoError(t, proto.Unmarshal(scipBytes, &scipIndex))
var total atomic.Int64
conciter.ForEach(scipIndex.Documents, func(docPtr **scip.Document) {
document := *docPtr
if total.Load() > 1000*1000 {
return
}
total.Add(int64(len(document.Occurrences)))
var newSym scip.Symbol
for i := 0; i < len(document.Occurrences); i++ {
occ := document.Occurrences[i]
oldSym, oldErr := ParsePartialSymbolV1ToBeDeleted(occ.Symbol, true)
var newErr error
require.NotPanics(t, func() {
str := beaut.NewUTF8StringUnchecked(occ.Symbol, knownwf.UTF8DeserializedFromProtobufString)
newErr = scip.ParseSymbolUTF8With(str, scip.ParseSymbolOptions{
IncludeDescriptors: true,
RecordOutput: &newSym,
})
}, "panic for symbol: %q", occ.Symbol)
if oldErr != nil {
require.Error(t, newErr,
"old parser gave error %v but parse was successful with new parser (symbol: %q)",
oldErr.Error(), occ.Symbol)
continue
} else if newErr != nil {
require.NoError(t, newErr,
"new parser gave error %v but parse was successful with old parser (symbol: %q)",
newErr.Error(), occ.Symbol)
}
require.Equal(t, oldSym.Scheme, newSym.Scheme)
require.Equal(t, oldSym.Package, newSym.Package)
require.Equalf(t, len(oldSym.Descriptors), len(newSym.Descriptors), "symbol: %v, d1: %+v, d2: %+v", occ.Symbol,
oldSym.Descriptors, newSym.Descriptors)
for i, d := range oldSym.Descriptors {
dnew := newSym.Descriptors[i]
require.Equal(t, d.Name, dnew.Name, "symbol: %v", occ.Symbol)
require.Equal(t, d.Suffix, dnew.Suffix, "symbol: %v", occ.Symbol)
require.Equal(t, d.Disambiguator, dnew.Disambiguator, "symbol: %v", occ.Symbol)
}
}
})
})
}
}
238 changes: 238 additions & 0 deletions bindings/go/scip/internal/old_symbol_parser.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
package internal
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can remove this code after the new symbol parser doesn't show any problems in practice.


import (
"fmt"
"strings"

"github.com/cockroachdb/errors"
"github.com/sourcegraph/scip/bindings/go/scip"
"github.com/sourcegraph/scip/bindings/go/scip/internal/shared"
)

func tryParseLocalSymbol(symbol string) (string, error) {
if !strings.HasPrefix(symbol, "local ") {
return "", nil
}
suffix := symbol[6:]
if len(suffix) > 0 && shared.IsSimpleIdentifier(suffix) {
return suffix, nil
}
return "", errors.Newf("expected format 'local <simple-identifier>' but got: %v", symbol)
}

// ParsePartialSymbolV1ToBeDeleted parses an SCIP string into the Symbol message
// with the option to exclude the `.Descriptor` field.
//
// Nov 30 2024: This is currently only present for benchmarking + compatibility
// reasons. We can remove this in the future once we're confident that the new
// parser handles everything correctly.
func ParsePartialSymbolV1ToBeDeleted(symbol string, includeDescriptors bool) (*scip.Symbol, error) {
local, err := tryParseLocalSymbol(symbol)
if err != nil {
return nil, err
}
if local != "" {
return &scip.Symbol{
Scheme: "local",
Descriptors: []*scip.Descriptor{
{Name: local, Suffix: scip.Descriptor_Local},
},
}, nil
}
s := newSymbolParser(symbol)
scheme, err := s.acceptSpaceEscapedIdentifier("scheme")
if err != nil {
return nil, err
}
manager, err := s.acceptSpaceEscapedIdentifier("package manager")
if err != nil {
return nil, err
}
if manager == "." {
manager = ""
}
packageName, err := s.acceptSpaceEscapedIdentifier("package name")
if err != nil {
return nil, err
}
if packageName == "." {
packageName = ""
}
packageVersion, err := s.acceptSpaceEscapedIdentifier("package version")
if err != nil {
return nil, err
}
if packageVersion == "." {
packageVersion = ""
}
var descriptors []*scip.Descriptor
if includeDescriptors {
descriptors, err = s.parseDescriptors()
}
return &scip.Symbol{
Scheme: scheme,
Package: &scip.Package{
Manager: manager,
Name: packageName,
Version: packageVersion,
},
Descriptors: descriptors,
}, err
}

type symbolParser struct {
Symbol []rune
index int
SymbolString string
}

func newSymbolParser(symbol string) *symbolParser {
return &symbolParser{
SymbolString: symbol,
Symbol: []rune(symbol),
index: 0,
}
}

func (s *symbolParser) error(message string) error {
return errors.Newf("%s\n%s\n%s^", message, s.SymbolString, strings.Repeat("_", s.index))
}

func (s *symbolParser) current() rune {
if s.index < len(s.Symbol) {
return s.Symbol[s.index]
}
return '\x00'
}

func (s *symbolParser) peekNext() rune {
if s.index+1 < len(s.Symbol) {
return s.Symbol[s.index]
}
return 0
}

func (s *symbolParser) parseDescriptors() ([]*scip.Descriptor, error) {
var result []*scip.Descriptor
for s.index < len(s.Symbol) {
descriptor, err := s.parseDescriptor()
if err != nil {
return nil, err
}
result = append(result, descriptor)
}
return result, nil
}

func (s *symbolParser) parseDescriptor() (*scip.Descriptor, error) {
start := s.index
switch s.peekNext() {
case '(':
s.index++
name, err := s.acceptIdentifier("parameter name")
if err != nil {
return nil, err
}
return &scip.Descriptor{Name: name, Suffix: scip.Descriptor_Parameter}, s.acceptCharacter(')', "closing parameter name")
case '[':
s.index++
name, err := s.acceptIdentifier("type parameter name")
if err != nil {
return nil, err
}
return &scip.Descriptor{Name: name, Suffix: scip.Descriptor_TypeParameter}, s.acceptCharacter(']', "closing type parameter name")
default:
name, err := s.acceptIdentifier("descriptor name")
if err != nil {
return nil, err
}
suffix := s.current()
s.index++
switch suffix {
case '(':
disambiguator := ""
if s.peekNext() != ')' {
disambiguator, err = s.acceptIdentifier("method disambiguator")
if err != nil {
return nil, err
}
}
err = s.acceptCharacter(')', "closing method")
if err != nil {
return nil, err
}
return &scip.Descriptor{Name: name, Disambiguator: disambiguator, Suffix: scip.Descriptor_Method}, s.acceptCharacter('.', "closing method")
case '/':
return &scip.Descriptor{Name: name, Suffix: scip.Descriptor_Namespace}, nil
case '.':
return &scip.Descriptor{Name: name, Suffix: scip.Descriptor_Term}, nil
case '#':
return &scip.Descriptor{Name: name, Suffix: scip.Descriptor_Type}, nil
case ':':
return &scip.Descriptor{Name: name, Suffix: scip.Descriptor_Meta}, nil
case '!':
return &scip.Descriptor{Name: name, Suffix: scip.Descriptor_Macro}, nil
default:
}
}

end := s.index
if s.index > len(s.Symbol) {
end = len(s.Symbol)
}
return nil, errors.Newf("unrecognized descriptor %q", string(s.Symbol[start:end]))
}

func (s *symbolParser) acceptIdentifier(what string) (string, error) {
if s.current() == '`' {
s.index++
return s.acceptBacktickEscapedIdentifier(what)
}
start := s.index
for s.index < len(s.Symbol) && shared.IsSimpleIdentifierCharacter(s.current()) {
s.index++
}
if start == s.index {
return "", s.error("empty identifier")
}
return string(s.Symbol[start:s.index]), nil
}

func (s *symbolParser) acceptSpaceEscapedIdentifier(what string) (string, error) {
return s.acceptEscapedIdentifier(what, ' ')
}

func (s *symbolParser) acceptBacktickEscapedIdentifier(what string) (string, error) {
return s.acceptEscapedIdentifier(what, '`')
}

func (s *symbolParser) acceptEscapedIdentifier(what string, escapeCharacter rune) (string, error) {
builder := strings.Builder{}
for s.index < len(s.Symbol) {
ch := s.current()
if ch == escapeCharacter {
s.index++
if s.index >= len(s.Symbol) {
break
}
if s.current() == escapeCharacter {
// Escaped space character.
builder.WriteRune(ch)
} else {
return builder.String(), nil
}
} else {
builder.WriteRune(ch)
}
s.index++
}
return "", s.error(fmt.Sprintf("reached end of symbol while parsing <%s>, expected a '%v' character", what, string(escapeCharacter)))
}

func (s *symbolParser) acceptCharacter(r rune, what string) error {
if s.current() == r {
s.index++
return nil
}
return s.error(fmt.Sprintf("expected '%v', obtained '%v', while parsing %v", string(r), string(s.current()), what))
}
Loading
Loading