Skip to content

Commit 2e03a50

Browse files
committed
gitbase: update benchmarks with indexes
Signed-off-by: Miguel Molina <[email protected]>
1 parent 1df4314 commit 2e03a50

File tree

2 files changed

+160
-81
lines changed

2 files changed

+160
-81
lines changed

integration_test.go

Lines changed: 151 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,22 @@ func BenchmarkQueries(b *testing.B) {
320320
INNER JOIN refs rr
321321
ON r.repository_id = rr.repository_id`,
322322
},
323+
{
324+
"select by specific id",
325+
`SELECT * FROM ref_commits r
326+
INNER JOIN commits c
327+
ON c.commit_hash = r.commit_hash
328+
WHERE c.commit_hash = '6ecf0ef2c2dffb796033e5a02219af86ec6584e5'
329+
AND r.ref_name = 'refs/heads/master'`,
330+
},
331+
{
332+
"select file by name",
333+
`SELECT * FROM files WHERE file_path = 'LICENSE'`,
334+
},
335+
{
336+
"select files by language",
337+
`SELECT * FROM files WHERE language(file_path, blob_content) = 'Go'`,
338+
},
323339
{
324340
"query with commit_blobs",
325341
`SELECT COUNT(c.commit_hash), c.commit_hash
@@ -336,9 +352,9 @@ func BenchmarkQueries(b *testing.B) {
336352
FROM (
337353
SELECT YEAR(c.commit_author_when) AS first_commit_year
338354
FROM ref_commits r
339-
INNER JOIN commits c
355+
INNER JOIN commits c
340356
ON r.commit_hash = c.commit_hash
341-
ORDER BY c.commit_author_when
357+
ORDER BY c.commit_author_when
342358
LIMIT 1
343359
) repo_years
344360
GROUP BY first_commit_year`,
@@ -369,50 +385,72 @@ func BenchmarkQueries(b *testing.B) {
369385
{
370386
"join refs and blobs",
371387
`SELECT * FROM refs r
372-
INNER JOIN commit_blobs cb
388+
INNER JOIN commit_blobs cb
373389
ON r.commit_hash = cb.commit_hash
374390
INNER JOIN blobs b
375391
ON cb.blob_hash = b.blob_hash`,
376392
},
377393
{
378394
"join refs and blobs with filters",
379395
`SELECT * FROM refs r
380-
INNER JOIN commit_blobs cb
396+
INNER JOIN commit_blobs cb
381397
ON r.commit_hash = cb.commit_hash
382398
INNER JOIN blobs b
383399
ON cb.blob_hash = b.blob_hash
384400
WHERE r.ref_name = 'refs/heads/master'`,
385401
},
386402
}
387403

404+
indexesEngine, pool, cleanup := setup(b)
405+
defer cleanup()
406+
407+
tmpDir, err := ioutil.TempDir(os.TempDir(), "pilosa-idx-gitbase")
408+
require.NoError(b, err)
409+
defer os.RemoveAll(tmpDir)
410+
indexesEngine.Catalog.RegisterIndexDriver(pilosa.NewIndexDriver(tmpDir))
411+
412+
ctx := sql.NewContext(
413+
context.TODO(),
414+
sql.WithSession(gitbase.NewSession(pool)),
415+
)
416+
417+
engine := sqle.New()
418+
engine.AddDatabase(gitbase.NewDatabase("foo"))
419+
engine.Catalog.RegisterFunctions(function.Functions)
420+
421+
squashEngine := sqle.New()
422+
squashEngine.AddDatabase(gitbase.NewDatabase("foo"))
423+
squashEngine.Catalog.RegisterFunctions(function.Functions)
424+
squashEngine.Analyzer.AddRule(rule.SquashJoinsRule, rule.SquashJoins)
425+
426+
cleanupIndexes := createTestIndexes(b, indexesEngine, ctx)
427+
defer cleanupIndexes()
428+
388429
for _, qq := range queries {
389430
b.Run(qq.name, func(b *testing.B) {
390-
benchmarkQuery(b, qq.query)
431+
b.Run("base", func(b *testing.B) {
432+
benchmarkQuery(b, qq.query, engine, ctx)
433+
})
434+
435+
b.Run("indexes", func(b *testing.B) {
436+
benchmarkQuery(b, qq.query, indexesEngine, ctx)
437+
})
438+
439+
b.Run("squash", func(b *testing.B) {
440+
benchmarkQuery(b, qq.query, squashEngine, ctx)
441+
})
391442
})
392443
}
393444
}
394445

395-
func benchmarkQuery(b *testing.B, query string) {
396-
engine, pool, cleanup := setup(b)
397-
defer cleanup()
398-
399-
session := gitbase.NewSession(pool)
400-
ctx := sql.NewContext(context.TODO(), sql.WithSession(session))
401-
402-
run := func(b *testing.B) {
403-
for i := 0; i < b.N; i++ {
404-
_, rows, err := engine.Query(ctx, query)
405-
require.NoError(b, err)
446+
func benchmarkQuery(b *testing.B, query string, engine *sqle.Engine, ctx *sql.Context) {
447+
for i := 0; i < b.N; i++ {
448+
_, rows, err := engine.Query(ctx, query)
449+
require.NoError(b, err)
406450

407-
_, err = sql.RowIterToRows(rows)
408-
require.NoError(b, err)
409-
}
451+
_, err = sql.RowIterToRows(rows)
452+
require.NoError(b, err)
410453
}
411-
412-
b.Run("no squash", run)
413-
414-
engine.Analyzer.AddRule(rule.SquashJoinsRule, rule.SquashJoins)
415-
b.Run("squash", run)
416454
}
417455

418456
func TestIndexes(t *testing.T) {
@@ -429,133 +467,167 @@ func TestIndexes(t *testing.T) {
429467
sql.WithSession(gitbase.NewSession(pool)),
430468
)
431469

432-
db, err := engine.Catalog.Database("foo")
433-
require.NoError(t, err)
434-
tables := db.Tables()
435-
436470
baseEngine := sqle.New()
437471
baseEngine.AddDatabase(gitbase.NewDatabase("foo"))
438472
baseEngine.Catalog.RegisterFunctions(function.Functions)
439473

474+
cleanupIndexes := createTestIndexes(t, engine, ctx)
475+
defer cleanupIndexes()
476+
477+
testCases := []string{
478+
`SELECT ref_name, commit_hash FROM refs WHERE ref_name = 'refs/heads/master'`,
479+
`SELECT remote_name, remote_push_url FROM remotes WHERE remote_name = 'origin'`,
480+
`SELECT commit_hash, commit_author_email FROM commits WHERE commit_hash = '918c48b83bd081e863dbe1b80f8998f058cd8294'`,
481+
`SELECT commit_hash, ref_name FROM ref_commits WHERE ref_name = 'refs/heads/master'`,
482+
`SELECT commit_hash, tree_hash FROM commit_trees WHERE commit_hash = '918c48b83bd081e863dbe1b80f8998f058cd8294'`,
483+
`SELECT commit_hash, blob_hash FROM commit_blobs WHERE commit_hash = '918c48b83bd081e863dbe1b80f8998f058cd8294'`,
484+
`SELECT tree_entry_name, blob_hash FROM tree_entries WHERE tree_entry_name = 'LICENSE'`,
485+
`SELECT blob_hash, blob_size FROM blobs WHERE blob_hash = 'd5c0f4ab811897cadf03aec358ae60d21f91c50d'`,
486+
`SELECT file_path, blob_hash FROM files WHERE file_path = 'LICENSE'`,
487+
}
488+
489+
for _, tt := range testCases {
490+
t.Run(tt, func(t *testing.T) {
491+
require := require.New(t)
492+
493+
_, iter, err := engine.Query(ctx, tt)
494+
require.NoError(err)
495+
496+
rows, err := sql.RowIterToRows(iter)
497+
require.NoError(err)
498+
499+
_, iter, err = baseEngine.Query(ctx, tt)
500+
require.NoError(err)
501+
502+
expected, err := sql.RowIterToRows(iter)
503+
require.NoError(err)
504+
505+
require.ElementsMatch(expected, rows)
506+
})
507+
}
508+
}
509+
510+
func col(t testing.TB, schema sql.Schema, name string) sql.Expression {
511+
for i, col := range schema {
512+
if col.Name == name {
513+
return expression.NewGetFieldWithTable(i, col.Type, col.Source, col.Name, col.Nullable)
514+
}
515+
}
516+
517+
t.Fatalf("unknown column %s in schema", name)
518+
return nil
519+
}
520+
521+
type indexData struct {
522+
id string
523+
expressions []sql.Expression
524+
table sql.Table
525+
columns []string
526+
}
527+
528+
func createTestIndexes(t testing.TB, engine *sqle.Engine, ctx *sql.Context) func() {
529+
db, err := engine.Catalog.Database("foo")
530+
require.NoError(t, err)
531+
tables := db.Tables()
532+
440533
var indexes = []indexData{
441534
{
535+
id: "refs_idx",
442536
table: tables[gitbase.ReferencesTableName],
443537
columns: []string{"ref_name"},
444538
expressions: []sql.Expression{
445539
col(t, gitbase.RefsSchema, "ref_name"),
446540
},
447541
},
448542
{
543+
id: "remotes_idx",
449544
table: tables[gitbase.RemotesTableName],
450545
columns: []string{"remote_name"},
451546
expressions: []sql.Expression{
452547
col(t, gitbase.RemotesSchema, "remote_name"),
453548
},
454549
},
455550
{
551+
id: "ref_commits_idx",
456552
table: tables[gitbase.RefCommitsTableName],
457553
columns: []string{"ref_name"},
458554
expressions: []sql.Expression{
459555
col(t, gitbase.RefCommitsSchema, "ref_name"),
460556
},
461557
},
462558
{
559+
id: "commits_idx",
463560
table: tables[gitbase.CommitsTableName],
464-
columns: []string{"commit_author_email"},
561+
columns: []string{"commit_hash"},
465562
expressions: []sql.Expression{
466-
col(t, gitbase.CommitsSchema, "commit_author_email"),
563+
col(t, gitbase.CommitsSchema, "commit_hash"),
467564
},
468565
},
469566
{
567+
id: "commit_trees_idx",
470568
table: tables[gitbase.CommitTreesTableName],
471569
columns: []string{"commit_hash"},
472570
expressions: []sql.Expression{
473571
col(t, gitbase.CommitTreesSchema, "commit_hash"),
474572
},
475573
},
476574
{
575+
id: "commit_blobs_idx",
477576
table: tables[gitbase.CommitBlobsTableName],
478577
columns: []string{"commit_hash"},
479578
expressions: []sql.Expression{
480579
col(t, gitbase.CommitBlobsSchema, "commit_hash"),
481580
},
482581
},
483582
{
583+
id: "tree_entries_idx",
484584
table: tables[gitbase.TreeEntriesTableName],
485585
columns: []string{"tree_entry_name"},
486586
expressions: []sql.Expression{
487587
col(t, gitbase.TreeEntriesSchema, "tree_entry_name"),
488588
},
489589
},
490590
{
591+
id: "blobs_idx",
491592
table: tables[gitbase.BlobsTableName],
492593
columns: []string{"blob_hash"},
493594
expressions: []sql.Expression{
494595
col(t, gitbase.BlobsSchema, "blob_hash"),
495596
},
496597
},
497598
{
599+
id: "files_idx",
498600
table: tables[gitbase.FilesTableName],
499601
columns: []string{"file_path"},
500602
expressions: []sql.Expression{
501603
col(t, gitbase.FilesSchema, "file_path"),
502604
},
503605
},
606+
{
607+
id: "files_lang_idx",
608+
table: tables[gitbase.FilesTableName],
609+
columns: []string{"file_path"},
610+
expressions: []sql.Expression{
611+
func() sql.Expression {
612+
f, _ := function.NewLanguage(
613+
col(t, gitbase.FilesSchema, "file_path"),
614+
col(t, gitbase.FilesSchema, "blob_content"),
615+
)
616+
return f
617+
}(),
618+
},
619+
},
504620
}
505621

506622
for _, idx := range indexes {
507623
createIndex(t, engine, idx, ctx)
508-
defer deleteIndex(t, engine, idx)
509624
}
510625

511-
testCases := []string{
512-
`SELECT ref_name, commit_hash FROM refs WHERE ref_name = 'refs/heads/master'`,
513-
`SELECT remote_name, remote_push_url FROM remotes WHERE remote_name = 'origin'`,
514-
`SELECT commit_hash, commit_author_email FROM commits WHERE commit_author_email = '[email protected]'`,
515-
`SELECT commit_hash, ref_name FROM ref_commits WHERE ref_name = 'refs/heads/master'`,
516-
`SELECT commit_hash, tree_hash FROM commit_trees WHERE commit_hash = '918c48b83bd081e863dbe1b80f8998f058cd8294'`,
517-
`SELECT commit_hash, blob_hash FROM commit_blobs WHERE commit_hash = '918c48b83bd081e863dbe1b80f8998f058cd8294'`,
518-
`SELECT tree_entry_name, blob_hash FROM tree_entries WHERE tree_entry_name = 'LICENSE'`,
519-
`SELECT blob_hash, blob_size FROM blobs WHERE blob_hash = 'd5c0f4ab811897cadf03aec358ae60d21f91c50d'`,
520-
`SELECT file_path, blob_hash FROM files WHERE file_path = 'LICENSE'`,
521-
}
522-
523-
for _, tt := range testCases {
524-
t.Run(tt, func(t *testing.T) {
525-
require := require.New(t)
526-
527-
_, iter, err := engine.Query(ctx, tt)
528-
require.NoError(err)
529-
530-
rows, err := sql.RowIterToRows(iter)
531-
require.NoError(err)
532-
533-
_, iter, err = baseEngine.Query(ctx, tt)
534-
require.NoError(err)
535-
536-
expected, err := sql.RowIterToRows(iter)
537-
require.NoError(err)
538-
539-
require.ElementsMatch(expected, rows)
540-
})
541-
}
542-
}
543-
544-
func col(t *testing.T, schema sql.Schema, name string) sql.Expression {
545-
for i, col := range schema {
546-
if col.Name == name {
547-
return expression.NewGetFieldWithTable(i, col.Type, col.Source, col.Name, col.Nullable)
626+
return func() {
627+
for _, idx := range indexes {
628+
defer deleteIndex(t, engine, idx)
548629
}
549630
}
550-
551-
t.Fatalf("unknown column %s in schema", name)
552-
return nil
553-
}
554-
555-
type indexData struct {
556-
expressions []sql.Expression
557-
table sql.Table
558-
columns []string
559631
}
560632

561633
func createIndex(
@@ -576,7 +648,7 @@ func createIndex(
576648

577649
idx, err := driver.Create(
578650
"foo", data.table.Name(),
579-
data.table.Name()+"_idx", hashes,
651+
data.id, hashes,
580652
make(map[string]string),
581653
)
582654
require.NoError(err)
@@ -598,7 +670,7 @@ func deleteIndex(
598670
data indexData,
599671
) {
600672
t.Helper()
601-
done, err := e.Catalog.DeleteIndex("foo", data.table.Name()+"_idx")
673+
done, err := e.Catalog.DeleteIndex("foo", data.id)
602674
require.NoError(t, err)
603675
<-done
604676
}

packfiles.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,9 @@ func (d *repoObjectDecoder) get(offset int64) (object.Object, error) {
271271
return object.DecodeObject(d.storage, encodedObj)
272272
}
273273

274-
func (d *repoObjectDecoder) Close() error { return d.decoder.Close() }
274+
func (d *repoObjectDecoder) Close() error {
275+
return d.decoder.Close()
276+
}
275277

276278
type objectDecoder struct {
277279
pool *RepositoryPool
@@ -309,4 +311,9 @@ func (d *objectDecoder) decode(
309311
return getUnpackedObject(d.pool.repositories[repository], hash)
310312
}
311313

312-
func (d *objectDecoder) Close() error { return d.decoder.Close() }
314+
func (d *objectDecoder) Close() error {
315+
if d.decoder != nil {
316+
return d.decoder.Close()
317+
}
318+
return nil
319+
}

0 commit comments

Comments
 (0)