Skip to content

Commit 6256c4e

Browse files
authored
rule: make commit_files squashable with blobs (#873)
rule: make commit_files squashable with blobs
2 parents 54df6ad + 1bcf36b commit 6256c4e

File tree

6 files changed

+189
-5
lines changed

6 files changed

+189
-5
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1414
- Added `commit_file_stats` function.
1515
- Added documentation about `commit_stats`.
1616
- Add metrics (engine, analyzer, regex, pilosa) based on go-kit interface. ([#744](https://github.com/src-d/go-mysql-server/pull/744))
17+
- `commit_files` is now squashable with `blobs`.
1718

1819
### Changed
1920

docs/using-gitbase/optimize-queries.md

+5
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,7 @@ You can find the full list of conditions that need to be met for the squash to b
234234
```
235235
repositories -> refs -> ref_commits -> commits -> commit_trees -> tree_entries -> blobs
236236
repositories -> refs -> ref_commits -> commits -> commit_blobs -> blobs
237+
repositories -> refs -> ref_commits -> commits -> commit_files -> blobs
237238
repositories -> refs -> ref_commits -> commits -> commit_files -> files
238239
repositories -> remotes -> refs -> (any of the other hierarchies)
239240
```
@@ -324,6 +325,10 @@ This will pretty-print the analyzed tree of your query. If you see a node named
324325

325326
- `tree_entries.blob_hash = blobs.blob_hash`
326327

328+
### `commit_files` with `blobs`
329+
330+
- `commit_files.blob_hash = blobs.blob_hash`
331+
327332
### `commit_files` with `files`
328333

329334
- `commit_files.file_path = files.file_path`

internal/rule/squashjoins.go

+20-2
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@ import (
55
"reflect"
66

77
"github.com/src-d/gitbase"
8-
errors "gopkg.in/src-d/go-errors.v1"
98
"github.com/src-d/go-mysql-server/sql"
109
"github.com/src-d/go-mysql-server/sql/analyzer"
1110
"github.com/src-d/go-mysql-server/sql/expression"
1211
"github.com/src-d/go-mysql-server/sql/plan"
12+
errors "gopkg.in/src-d/go-errors.v1"
1313
)
1414

1515
// SquashJoinsRule name.
@@ -764,6 +764,19 @@ func buildSquashedTable(
764764
}
765765

766766
iter = gitbase.NewTreeEntryBlobsIter(it, f, readContent)
767+
case gitbase.FilesIter:
768+
var f sql.Expression
769+
f, filters, err = filtersForJoin(
770+
gitbase.CommitFilesTableName,
771+
gitbase.BlobsTableName,
772+
filters,
773+
append(it.Schema(), gitbase.BlobsSchema...),
774+
)
775+
if err != nil {
776+
return nil, err
777+
}
778+
779+
iter = gitbase.NewCommitFileBlobsIter(it, f, readContent)
767780
default:
768781
addUnsquashable(gitbase.BlobsTableName)
769782
continue
@@ -985,8 +998,8 @@ var tableHierarchy = []string{
985998
gitbase.CommitTreesTableName,
986999
gitbase.TreeEntriesTableName,
9871000
gitbase.CommitBlobsTableName,
988-
gitbase.BlobsTableName,
9891001
gitbase.CommitFilesTableName,
1002+
gitbase.BlobsTableName,
9901003
gitbase.FilesTableName,
9911004
}
9921005

@@ -1540,6 +1553,11 @@ func isRedundantFilter(f sql.Expression, t1, t2 string) bool {
15401553
isCol(gitbase.ReferencesTableName, "commit_hash"),
15411554
isCol(gitbase.CommitFilesTableName, "commit_hash"),
15421555
)(f)
1556+
case t1 == gitbase.CommitFilesTableName && t2 == gitbase.BlobsTableName:
1557+
return isEq(
1558+
isCol(gitbase.CommitFilesTableName, "blob_hash"),
1559+
isCol(gitbase.BlobsTableName, "blob_hash"),
1560+
)(f)
15431561
}
15441562
return false
15451563
}

internal/rule/squashjoins_test.go

+48-2
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,13 @@ import (
55
"testing"
66

77
"github.com/src-d/gitbase"
8-
"github.com/stretchr/testify/require"
9-
errors "gopkg.in/src-d/go-errors.v1"
108
"github.com/src-d/go-mysql-server/sql"
119
"github.com/src-d/go-mysql-server/sql/analyzer"
1210
"github.com/src-d/go-mysql-server/sql/expression"
1311
"github.com/src-d/go-mysql-server/sql/parse"
1412
"github.com/src-d/go-mysql-server/sql/plan"
13+
"github.com/stretchr/testify/require"
14+
errors "gopkg.in/src-d/go-errors.v1"
1515
)
1616

1717
func TestAnalyzeSquashJoinsExchange(t *testing.T) {
@@ -407,6 +407,7 @@ func TestBuildSquashedTable(t *testing.T) {
407407
refsCommitFilesSchema := append(gitbase.RefsSchema, gitbase.CommitFilesSchema...)
408408
commitsCommitFilesSchema := append(gitbase.CommitsSchema, gitbase.CommitFilesSchema...)
409409
commitFilesFilesSchema := append(gitbase.CommitFilesSchema, gitbase.FilesSchema...)
410+
commitFilesBlobsSchema := append(gitbase.CommitFilesSchema, gitbase.BlobsSchema...)
410411

411412
repoFilter := eq(
412413
col(0, gitbase.RepositoriesTableName, "repository_id"),
@@ -713,6 +714,16 @@ func TestBuildSquashedTable(t *testing.T) {
713714
col(0, gitbase.FilesTableName, "file_path"),
714715
)
715716

717+
commitFilesBlobsRedundantFilter := eq(
718+
col(0, gitbase.CommitFilesTableName, "blob_hash"),
719+
col(0, gitbase.BlobsTableName, "blob_hash"),
720+
)
721+
722+
commitFilesBlobsFilter := eq(
723+
col(0, gitbase.CommitFilesTableName, "commit_hash"),
724+
col(0, gitbase.BlobsTableName, "blob_size"),
725+
)
726+
716727
idx1, idx2 := &dummyLookup{1}, &dummyLookup{2}
717728

718729
testCases := []struct {
@@ -2085,6 +2096,41 @@ func TestBuildSquashedTable(t *testing.T) {
20852096
gitbase.FilesTableName,
20862097
)),
20872098
},
2099+
{
2100+
"commit_files with blobs",
2101+
[]sql.Table{commitFiles, blobs},
2102+
[]sql.Expression{
2103+
blobFilter,
2104+
commitFilesFilter,
2105+
commitFilesBlobsFilter,
2106+
commitFilesBlobsRedundantFilter,
2107+
},
2108+
nil,
2109+
nil,
2110+
nil,
2111+
plan.NewResolvedTable(gitbase.NewSquashedTable(
2112+
gitbase.NewCommitFileBlobsIter(
2113+
gitbase.NewAllCommitFilesIter(
2114+
fixIdx(t, commitFilesFilter, gitbase.CommitFilesSchema),
2115+
),
2116+
and(
2117+
fixIdx(t, blobFilter, commitFilesBlobsSchema),
2118+
fixIdx(t, commitFilesBlobsFilter, commitFilesBlobsSchema),
2119+
),
2120+
false,
2121+
),
2122+
nil,
2123+
[]sql.Expression{
2124+
blobFilter,
2125+
commitFilesFilter,
2126+
commitFilesBlobsFilter,
2127+
commitFilesBlobsRedundantFilter,
2128+
},
2129+
nil,
2130+
gitbase.CommitFilesTableName,
2131+
gitbase.BlobsTableName,
2132+
)),
2133+
},
20882134
{
20892135
"commit_files with indexes",
20902136
[]sql.Table{commitFiles, files},

squash_iterator.go

+76-1
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@ import (
44
"fmt"
55
"io"
66

7+
"github.com/src-d/go-mysql-server/sql"
78
errors "gopkg.in/src-d/go-errors.v1"
89
git "gopkg.in/src-d/go-git.v4"
910
"gopkg.in/src-d/go-git.v4/plumbing"
1011
"gopkg.in/src-d/go-git.v4/plumbing/filemode"
1112
"gopkg.in/src-d/go-git.v4/plumbing/object"
1213
"gopkg.in/src-d/go-git.v4/plumbing/storer"
13-
"github.com/src-d/go-mysql-server/sql"
1414

1515
"github.com/sirupsen/logrus"
1616
)
@@ -3410,6 +3410,81 @@ func (i *squashCommitFileFilesIter) Close() error {
34103410
return i.files.Close()
34113411
}
34123412

3413+
type squashCommitFileBlobsIter struct {
3414+
files FilesIter
3415+
readContent bool
3416+
row sql.Row
3417+
filters sql.Expression
3418+
ctx *sql.Context
3419+
}
3420+
3421+
// NewCommitFileBlobsIter returns all blobs for the commit files in the given
3422+
// iterator.
3423+
func NewCommitFileBlobsIter(
3424+
files FilesIter,
3425+
filters sql.Expression,
3426+
readContent bool,
3427+
) ChainableIter {
3428+
return &squashCommitFileBlobsIter{
3429+
files: files,
3430+
filters: filters,
3431+
readContent: readContent,
3432+
}
3433+
}
3434+
3435+
func (i *squashCommitFileBlobsIter) New(ctx *sql.Context, repo *Repository) (ChainableIter, error) {
3436+
iter, err := i.files.New(ctx, repo)
3437+
if err != nil {
3438+
return nil, err
3439+
}
3440+
3441+
return &squashCommitFileBlobsIter{
3442+
files: iter.(FilesIter),
3443+
ctx: ctx,
3444+
filters: i.filters,
3445+
readContent: i.readContent,
3446+
}, nil
3447+
}
3448+
3449+
func (i *squashCommitFileBlobsIter) Advance() error {
3450+
for {
3451+
err := i.files.Advance()
3452+
if err != nil {
3453+
return err
3454+
}
3455+
3456+
f := i.files.File()
3457+
row, err := blobToRow(i.Repository().ID, &f.Blob, i.readContent)
3458+
if err != nil {
3459+
return err
3460+
}
3461+
3462+
i.row = append(i.files.Row(), row...)
3463+
3464+
if i.filters != nil {
3465+
ok, err := evalFilters(i.ctx, i.row, i.filters)
3466+
if err != nil {
3467+
return err
3468+
}
3469+
3470+
if !ok {
3471+
continue
3472+
}
3473+
}
3474+
3475+
return nil
3476+
}
3477+
}
3478+
3479+
func (i *squashCommitFileBlobsIter) Repository() *Repository { return i.files.Repository() }
3480+
func (i *squashCommitFileBlobsIter) Row() sql.Row { return i.row }
3481+
func (i *squashCommitFileBlobsIter) Schema() sql.Schema {
3482+
return append(i.files.Schema(), BlobsSchema...)
3483+
}
3484+
func (i *squashCommitFileBlobsIter) Close() error {
3485+
return i.files.Close()
3486+
}
3487+
34133488
func evalFilters(ctx *sql.Context, row sql.Row, filters sql.Expression) (bool, error) {
34143489
v, err := filters.Eval(ctx, row)
34153490
if err != nil {

squash_iterator_test.go

+39
Original file line numberDiff line numberDiff line change
@@ -699,6 +699,45 @@ func TestCommitBlobsIter(t *testing.T) {
699699
require.Len(rows, 52)
700700
}
701701

702+
func TestCommitFileBlobsIter(t *testing.T) {
703+
require := require.New(t)
704+
ctx, cleanup := setupIter(t)
705+
defer cleanup()
706+
707+
rows := chainableIterRows(
708+
t, ctx,
709+
NewCommitFileBlobsIter(
710+
NewAllCommitFilesIter(nil).(FilesIter),
711+
nil,
712+
true,
713+
),
714+
)
715+
716+
expected := chainableIterRows(
717+
t, ctx,
718+
NewCommitFileFilesIter(
719+
NewAllCommitFilesIter(nil).(FilesIter),
720+
nil,
721+
true,
722+
),
723+
)
724+
725+
// transform the result of the files table into the expected for blobs
726+
offset := len(CommitFilesSchema)
727+
size := offset + len(BlobsSchema)
728+
for i, e := range expected {
729+
var newRow = make(sql.Row, size)
730+
copy(newRow[:offset], e[:offset])
731+
newRow[offset] = e[offset] // repository_id
732+
newRow[offset+1] = e[offset+2] // blob_hash
733+
newRow[offset+2] = e[offset+6] // blob_size
734+
newRow[offset+3] = e[offset+5] // blob_content
735+
expected[i] = newRow
736+
}
737+
738+
require.ElementsMatch(expected, rows)
739+
}
740+
702741
func chainableIterRowsError(t *testing.T, ctx *sql.Context, iter ChainableIter) {
703742
table := newSquashTable(iter)
704743
_, err := tableToRows(ctx, table)

0 commit comments

Comments
 (0)