Skip to content

Commit

Permalink
Add ChunkMatch.BestLineMatch to return the best-scoring line (#884)
Browse files Browse the repository at this point in the history
This PR adds a new field `ChunkMatch.BestLineMatch` with the line number of top-scoring line in the chunk. This will let us address a long-standing issue with our new flexible keyword search, where chunk matches can become very large. Since our search results UX only shows the start of a chunk, the most relevant line may not even be visible. With this information on the best line match, we can adjust the search results UX to center the chunk on the most relevant line.

Relates to [SPLF-188](https://linear.app/sourcegraph/issue/SPLF-188/ensure-the-best-scoring-line-match-is-shown-in-search-results)
  • Loading branch information
jtibshirani authored Jan 8, 2025
1 parent d0990e0 commit b51a233
Show file tree
Hide file tree
Showing 8 changed files with 181 additions and 104 deletions.
6 changes: 6 additions & 0 deletions api.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,13 @@ type ChunkMatch struct {
// beginning of a line (Column will always be 1).
ContentStart Location

// Score is the overall relevance score of this chunk.
Score float64

// BestLineMatch is the line number of the highest-scoring line match in this chunk.
// The line number represents the index in the full file, and is 1-based. If FileName: true,
// this number will be 0.
BestLineMatch uint32
}

func (cm *ChunkMatch) sizeBytes() (sz uint64) {
Expand Down
30 changes: 16 additions & 14 deletions api_proto.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,14 @@ func ChunkMatchFromProto(p *proto.ChunkMatch) ChunkMatch {
}

return ChunkMatch{
Content: p.GetContent(),
ContentStart: LocationFromProto(p.GetContentStart()),
FileName: p.GetFileName(),
Ranges: ranges,
SymbolInfo: symbols,
Score: p.GetScore(),
DebugScore: p.GetDebugScore(),
Content: p.GetContent(),
ContentStart: LocationFromProto(p.GetContentStart()),
FileName: p.GetFileName(),
Ranges: ranges,
SymbolInfo: symbols,
Score: p.GetScore(),
BestLineMatch: p.GetBestLineMatch(),
DebugScore: p.GetDebugScore(),
}
}

Expand All @@ -118,13 +119,14 @@ func (cm *ChunkMatch) ToProto() *proto.ChunkMatch {
}

return &proto.ChunkMatch{
Content: cm.Content,
ContentStart: cm.ContentStart.ToProto(),
FileName: cm.FileName,
Ranges: ranges,
SymbolInfo: symbolInfo,
Score: cm.Score,
DebugScore: cm.DebugScore,
Content: cm.Content,
ContentStart: cm.ContentStart.ToProto(),
FileName: cm.FileName,
Ranges: ranges,
SymbolInfo: symbolInfo,
Score: cm.Score,
BestLineMatch: cm.BestLineMatch,
DebugScore: cm.DebugScore,
}
}

Expand Down
2 changes: 1 addition & 1 deletion api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ func TestMatchSize(t *testing.T) {
size: 256,
}, {
v: ChunkMatch{},
size: 112,
size: 120,
}, {
v: candidateMatch{},
size: 80,
Expand Down
53 changes: 48 additions & 5 deletions build/scoring_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,12 @@ import (
)

type scoreCase struct {
fileName string
content []byte
query query.Q
language string
wantScore float64
fileName string
content []byte
query query.Q
language string
wantScore float64
wantBestLineMatch uint32
}

func TestFileNameMatch(t *testing.T) {
Expand Down Expand Up @@ -79,6 +80,8 @@ func TestBM25(t *testing.T) {
language: "Java",
// bm25-score: 0.58 <- sum-termFrequencyScore: 14.00, length-ratio: 1.00
wantScore: 0.58,
// line 5: private final int exampleField;
wantBestLineMatch: 5,
}, {
// Matches only on content
fileName: "example.java",
Expand All @@ -91,6 +94,8 @@ func TestBM25(t *testing.T) {
language: "Java",
// bm25-score: 1.81 <- sum-termFrequencyScore: 116.00, length-ratio: 1.00
wantScore: 1.81,
// line 3: public class InnerClasses {
wantBestLineMatch: 3,
},
{
// Matches only on filename
Expand Down Expand Up @@ -130,6 +135,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 5500 (partial symbol at boundary) + 1000 (Java class) + 50 (partial word)
wantScore: 6550,
// line 37: public class InnerClass implements InnerInterface<Integer, Integer> {
wantBestLineMatch: 37,
},
{
fileName: "example.java",
Expand All @@ -138,6 +145,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word)
wantScore: 7000,
// line 32: public static class InnerStaticClass {
wantBestLineMatch: 32,
},
{
fileName: "example.java",
Expand All @@ -146,6 +155,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (symbol) + 900 (Java enum) + 500 (word)
wantScore: 8400,
// line 16: public enum InnerEnum {
wantBestLineMatch: 16,
},
{
fileName: "example.java",
Expand All @@ -154,6 +165,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (symbol) + 800 (Java interface) + 500 (word)
wantScore: 8300,
// line 22: public interface InnerInterface<A, B> {
wantBestLineMatch: 22,
},
{
fileName: "example.java",
Expand All @@ -162,6 +175,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (symbol) + 700 (Java method) + 500 (word)
wantScore: 8200,
// line 44: public void innerMethod() {
wantBestLineMatch: 44,
},
{
fileName: "example.java",
Expand All @@ -170,6 +185,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (symbol) + 600 (Java field) + 500 (word)
wantScore: 8100,
// line 38: private final int field;
wantBestLineMatch: 38,
},
{
fileName: "example.java",
Expand All @@ -178,6 +195,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (symbol) + 500 (Java enum constant) + 500 (word)
wantScore: 8000,
// line 18: B,
wantBestLineMatch: 18,
},
// 2 Atoms (1x content and 1x filename)
{
Expand All @@ -187,6 +206,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 5500 (edge symbol) + 600 (Java field) + 500 (word) + 200 (atom)
wantScore: 6800,
// line 5: private final int exampleField;
wantBestLineMatch: 5,
},
// 3 Atoms (2x content, 1x filename)
{
Expand All @@ -199,6 +220,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (symbol) + 700 (Java method) + 500 (word) + 266.67 (atom)
wantScore: 8466,
// line 54: private static <A, B> B runInnerInterface(InnerInterface<A, B> fn, A a) {
wantBestLineMatch: 54,
},
// 4 Atoms (4x content)
{
Expand All @@ -213,6 +236,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (symbol) + 900 (Java enum) + 500 (word) + 300 (atom)
wantScore: 8700,
// line 16: public enum InnerEnum {
wantBestLineMatch: 16,
},
{
fileName: "example.java",
Expand All @@ -221,6 +246,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 4000 (overlap Symbol) + 700 (Java method) + 50 (partial word)
wantScore: 4750,
// line 54: private static <A, B> B runInnerInterface(InnerInterface<A, B> fn, A a) {
wantBestLineMatch: 54,
},
{
fileName: "example.java",
Expand All @@ -229,6 +256,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 7000 (Symbol) + 900 (Java enum) + 500 (word)
wantScore: 8400,
// line 16: public enum InnerEnum {
wantBestLineMatch: 16,
},
{
fileName: "example.java",
Expand All @@ -237,6 +266,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 5500 (edge Symbol) + 900 (Java enum) + 500 (word)
wantScore: 6900,
// line 16: public enum InnerEnum {
wantBestLineMatch: 16,
},
{
fileName: "example.java",
Expand All @@ -245,6 +276,8 @@ func TestJava(t *testing.T) {
language: "Java",
// 4000 (overlap Symbol) + 900 (Java enum) + 500 (word)
wantScore: 5400,
// line 16: public enum InnerEnum {
wantBestLineMatch: 16,
},
}

Expand Down Expand Up @@ -640,6 +673,16 @@ func checkScoring(t *testing.T, c scoreCase, useBM25 bool, parserType ctags.CTag
t.Fatalf("score: want %f, got %f\ndebug: %s\ndebugscore: %s", c.wantScore, got, srs.Files[0].Debug, srs.Files[0].ChunkMatches[0].DebugScore)
}

if c.wantBestLineMatch != 0 {
if len(srs.Files[0].ChunkMatches) == 0 {
t.Fatalf("want BestLineMatch %d, but no chunk matches were returned", c.wantBestLineMatch)
}
chunkMatch := srs.Files[0].ChunkMatches[0]
if chunkMatch.BestLineMatch != c.wantBestLineMatch {
t.Fatalf("want BestLineMatch %d, got %d", c.wantBestLineMatch, chunkMatch.BestLineMatch)
}
}

if got := srs.Files[0].Language; got != c.language {
t.Fatalf("want %s, got %s", c.language, got)
}
Expand Down
Loading

0 comments on commit b51a233

Please sign in to comment.