Skip to content

Migrate LDBC benchmark queries to syntax V3 #231

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
//graph schema is on page 19 https://ldbc.github.io/ldbc_snb_docs/ldbc-snb-specification.pdf
//BI 1 query description is on page 89 https://ldbc.github.io/ldbc_snb_docs/ldbc-snb-specification.pdf
CREATE OR REPLACE DISTRIBUTED QUERY bi1(DATETIME date) SYNTAX v3{

TYPEDEF TUPLE <INT year, BOOL isComment, INT lengthCategory, INT messageCount,
DOUBLE averageMessageLength, INT sumMessageLength, DOUBLE percentageOfMessages> RESULT;

HeapAccum<RESULT>(0, year DESC, isComment ASC, lengthCategory ASC) @@result;
GroupByAccum<INT year, BOOL isComment, INT lengthCategory,
SumAccum<INT> messageCount, SumAccum<INT> sumMessageLength> @@midResult;

OrAccum @isComment;
SumAccum<INT> @lengthCategory;

UINT total;
INT epoch = datetime_to_epoch(date) * 1000;
messages = SELECT m FROM (m:Comment:Post) WHERE m.creationDate < epoch;
total = messages.size();
messages =
SELECT m FROM (m:messages)
WHERE m.length > 0
ACCUM
m.@isComment = (m.type == "Comment"),
IF m.length < 40 THEN
m.@lengthCategory = 0
ELSE IF m.length < 80 THEN
m.@lengthCategory = 1
ELSE IF m.length < 160 THEN
m.@lengthCategory = 2
ELSE
m.@lengthCategory = 3
END
POST-ACCUM (m) @@midResult += (year(epoch_to_datetime(m.creationDate/1000)), m.@isComment, m.@lengthCategory -> 1, m.length);

@@result.resize(@@midResult.size());
FOREACH m IN @@midResult DO
@@result += RESULT(m.year, m.isComment, m.lengthCategory, m.messageCount,
(m.sumMessageLength * 1.0 / m.messageCount), m.sumMessageLength, (m.messageCount * 1.0 / total));
END;

PRINT @@result as result;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
//graph schema is on page 19 https://ldbc.github.io/ldbc_snb_docs/ldbc-snb-specification.pdf
//BI 10 query description is on page 98 https://ldbc.github.io/ldbc_snb_docs/ldbc-snb-specification.pdf
CREATE OR REPLACE DISTRIBUTED QUERY bi10(VERTEX<Person> personId, STRING country, STRING tagClass) SYNTAX v3 {
TYPEDEF TUPLE <UINT personId, STRING tagName, UINT messageCount> RESULT;
OrAccum @visited, @selected;
SetAccum<VERTEX<Tag>> @tags;
MapAccum<VERTEX<Tag>, SumAccum<UINT>> @messageCount;
HeapAccum<RESULT> (100, messageCount DESC, tagName ASC, personId ASC) @@result;

candidates = SELECT p FROM (cn:Country {name:country}) <-[:IS_PART_OF]-()<-[:IS_LOCATED_IN]- (p:Person);

S = {personId};
UINT pathLength = 0;
S = SELECT s FROM (s:S) ACCUM s.@visited = TRUE;
WHILE pathLength < 3 DO
S =
SELECT t
FROM (s:S) -[:KNOWS]- (t:Person)
WHERE NOT t.@visited
POST-ACCUM (t) t.@visited = TRUE;
pathLength += 1;
END;
# Here, S is the Person with shortest length of 3
persons = SELECT t
FROM (s:S) -[:KNOWS]- (t:Person)
WHERE NOT t.@visited;
persons = (persons UNION S) INTERSECT candidates;

messages = SELECT m
FROM (p:persons) <-[:HAS_CREATOR]- (m:Comment:Post)
ACCUM m.@selected += TRUE;

messages = SELECT m
FROM (tc:TagClass {name:tagClass}) <-[:HAS_TYPE]- (t:Tag) <-[:HAS_TAG]- (m:Comment:Post)
WHERE m.@selected;

messages = SELECT m
FROM (m:messages) -[:HAS_TAG]-> (t:Tag)
ACCUM m.@tags += t;

persons = SELECT p
FROM (m:messages) -[:HAS_CREATOR]-> (p:Person)
ACCUM
FOREACH t IN m.@tags DO
p.@messageCount += (t -> 1)
END
POST-ACCUM (p)
FOREACH (t,cnt) IN p.@messageCount DO
@@result += RESULT(p.id, t.name, cnt)
END;
PRINT @@result as result;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
//graph schema is on page 19 https://ldbc.github.io/ldbc_snb_docs/ldbc-snb-specification.pdf
//BI 11 query description is on page 99 https://ldbc.github.io/ldbc_snb_docs/ldbc-snb-specification.pdf
CREATE OR REPLACE DISTRIBUTED QUERY bi11(STRING country, DATETIME startDate, DATETIME endDate) SYNTAX v3 {
SumAccum<UINT> @@result;
OrAccum<BOOL> @selected;
SetAccum<VERTEX<Person>> @oneHopNeighbor;

persons = SELECT p FROM (cn:Country {name:country}) <-[:IS_PART_OF]-()<-[:IS_LOCATED_IN]- (p:Person)
ACCUM p.@selected += True;

INT startEpoch = datetime_to_epoch(startDate) * 1000;
INT endEpoch = datetime_to_epoch(endDate) * 1000;

P2 = SELECT p2
FROM (p1:persons) -[e:KNOWS]- (p2:Person)
WHERE p2.@selected AND (e.creationDate BETWEEN startEpoch AND endEpoch) AND p1.id < p2.id
ACCUM p2.@oneHopNeighbor += p1;

P3 = SELECT p3
FROM (p2:P2) -[e:KNOWS]- (p3:Person)
WHERE p3.@selected AND (e.creationDate BETWEEN startEpoch AND endEpoch) AND p2.id < p3.id
ACCUM FOREACH p1 IN p2.@oneHopNeighbor DO
IF p1 IN p3.@oneHopNeighbor THEN
@@result += 1
END
END;

PRINT @@result as result;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
//graph schema is on page 19 https://ldbc.github.io/ldbc_snb_docs/ldbc-snb-specification.pdf
//BI 12 query description is on page 100 https://ldbc.github.io/ldbc_snb_docs/ldbc-snb-specification.pdf
CREATE OR REPLACE DISTRIBUTED QUERY bi12(DATETIME startDate, INT lengthThreshold, SET<STRING> languages) SYNTAX v3 {

TYPEDEF TUPLE <UINT messageCount, UINT personCount> RESULT;
MapAccum<UINT, UINT> @@personCount;
HeapAccum<RESULT>(0, personCount DESC, messageCount DESC) @@result;
SumAccum<UINT> @messageCount;
INT diff;
INT startEpoch = datetime_to_epoch(startDate) * 1000;

posts = SELECT p FROM (p:Post) WHERE p.language IN languages;
M = SELECT m FROM (p:posts)<-[:ROOT_POST]- (m:Comment:Post);
M = M UNION posts;
M = SELECT m FROM (m:M)
WHERE m.length > 0
AND m.length < lengthThreshold
AND m.creationDate > startEpoch;

persons =
SELECT p
FROM (:M) -[:HAS_CREATOR]-> (p:Person)
ACCUM p.@messageCount += 1
POST-ACCUM (p) @@personCount += (p.@messageCount -> 1);

allPersons = {Person.*};
diff = allPersons.size() - persons.size();
IF diff > 0 THEN
@@personCount += (0 -> diff);
END;

@@result.resize(@@personCount.size());
FOREACH (messageCount, personCount) IN @@personCount DO
@@result += RESULT(messageCount, personCount);
END;

PRINT @@result as result;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
//graph schema is on page 19 https://ldbc.github.io/ldbc_snb_docs/ldbc-snb-specification.pdf
//BI 13 query description is on page 101 https://ldbc.github.io/ldbc_snb_docs/ldbc-snb-specification.pdf
CREATE OR REPLACE DISTRIBUTED QUERY bi13(STRING country, DATETIME endDate) SYNTAX v3 {

TYPEDEF TUPLE <UINT zombieId, UINT zombieLikeCount, UINT totalLikeCount, DOUBLE zombieScore> RESULT;
HeapAccum<RESULT>(100, zombieScore DESC, zombieId ASC) @@result;
OrAccum<BOOL> @selected;
SumAccum<UINT> @messageCount;
SumAccum<UINT> @totalLikeCount;
SumAccum<UINT> @zombieLikeCount;
INT endEpoch = datetime_to_epoch(endDate) * 1000;

zombieCandidates =
SELECT p FROM (cn:Country {name:country}) <-[:IS_PART_OF]-()<-[:IS_LOCATED_IN]- (p:Person)
WHERE p.creationDate < endEpoch;

tmp =
SELECT p
FROM (p:zombieCandidates) <-[:HAS_CREATOR]- (m:Comment:Post)
WHERE m.creationDate < endEpoch
ACCUM p.@messageCount += 1;

zombies =
SELECT p
FROM (p:zombieCandidates)
WHERE p.@messageCount < (year(endDate) - year(epoch_to_datetime(p.creationDate/1000))) * 12 +
(month(endDate) - month(epoch_to_datetime(p.creationDate/1000))) + 1
ACCUM p.@selected += true;

M = SELECT m FROM (z:zombies) <-[:HAS_CREATOR]- (m:Comment:Post);

tmp = SELECT p FROM (m:M) <-[:LIKES]- (p:Person)
WHERE p.creationDate < endEpoch
ACCUM
m.@totalLikeCount += 1,
IF p.@selected THEN
m.@zombieLikeCount += 1
END;

tmp = SELECT m FROM (m:M) -[:HAS_CREATOR]-> (p:Person)
ACCUM p.@totalLikeCount += m.@totalLikeCount, p.@zombieLikeCount += m.@zombieLikeCount;

zombies =
SELECT z FROM (z:zombies)
POST-ACCUM (z)
IF z.@totalLikeCount > 0 THEN
@@result += RESULT(z.id, z.@zombieLikeCount, z.@totalLikeCount, 1.0 * z.@zombieLikeCount / z.@totalLikeCount)
ELSE
@@result += RESULT(z.id, 0, 0, 0.0)
END;

PRINT @@result as result;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
//graph schema is on page 19 https://ldbc.github.io/ldbc_snb_docs/ldbc-snb-specification.pdf
//BI 14 query description is on page 102 https://ldbc.github.io/ldbc_snb_docs/ldbc-snb-specification.pdf
CREATE OR REPLACE DISTRIBUTED QUERY bi14(STRING country1, STRING country2) SYNTAX v3 {
TYPEDEF TUPLE<UINT person1Id, UINT person2Id, STRING city1Name, INT score> pairScore;
TYPEDEF TUPLE <UINT pid, UINT score> ps;

HeapAccum<ps>(1, score DESC, pid ASC) @pScore;
MapAccum<VERTEX<Person>, UINT> @score;
MinAccum<VERTEX<Person>> @person2;
SetAccum<VERTEX<Person>> @knows, @p2s1, @p2s2, @p2s3, @p2s4;
OrAccum @valid, @selected;
HeapAccum<pairScore>(1, score DESC, person1Id ASC, person2Id ASC) @cityresults;
HeapAccum<pairScore>(100, score DESC, person1Id ASC, person2Id ASC) @@result;
MinAccum<UINT> @min_pid2;

P1candidate = SELECT p
FROM (cn:Country {name:country1}) <-[:IS_PART_OF]-()<-[:IS_LOCATED_IN]-(p:Person);

P2candidate =
SELECT p2
FROM (cn:Country {name:country2}) <-[:IS_PART_OF]-()<-[:IS_LOCATED_IN]-(p2:Person)
ACCUM p2.@selected += true;

P2 =
SELECT p2 FROM (p1:P1candidate) -[:KNOWS]- (p2:Person)
WHERE p2.@selected
ACCUM p1.@valid += True,
p1.@knows += p2,
p1.@min_pid2 += p2.id;

P1 = SELECT p FROM (p:Person) WHERE p.@valid;
# Case 1
tmp = SELECT p1 FROM (p1:P1) -[e:REPLY_COUNT]-> (p2:Person)
WHERE p2.@selected AND e.cnt > 0
ACCUM p1.@score += (p2 -> 4);

# Case 2
tmp = SELECT p1 FROM (p1:P1) <-[e:REPLY_COUNT]- (p2:Person)
WHERE p2.@selected AND e.cnt > 0
ACCUM p1.@score += (p2 -> 1);

# Case 3
M2 = SELECT m2 FROM (p:P2) <-[:HAS_CREATOR]- (m2)
ACCUM m2.@p2s3 += p;
tmp = SELECT p1 FROM (m2:M2) <-[:LIKES]- (p1:Person)
WHERE p1.@valid
ACCUM p1.@p2s3 += m2.@p2s3
POST-ACCUM (p1) p1.@p2s3 = (p1.@p2s3 INTERSECT p1.@knows);

# Case 4
M2 = SELECT m2 FROM (p2:P2) -[:LIKES]-> (m2)
ACCUM m2.@p2s4 += p2;
tmp = SELECT p1 FROM (m2:M2) -[:HAS_CREATOR]-> (p1:Person)
WHERE p1.@valid
ACCUM p1.@p2s4 += m2.@p2s4
POST-ACCUM (p1) p1.@p2s4 = (p1.@p2s4 INTERSECT p1.@knows);

P1 = SELECT p1 FROM (p1:P1)
ACCUM
FOREACH p2 IN p1.@p2s3 DO
p1.@score += (p2 -> 10)
END,
FOREACH p2 IN p1.@p2s4 DO
p1.@score += (p2 -> 1)
END
POST-ACCUM (p1)
FOREACH (p2,score) IN p1.@score DO
p1.@pScore += ps(p2.id,score)
END;

City1 = SELECT c1 FROM (p1:P1) -[:IS_LOCATED_IN]-> (c1:City)
ACCUM
IF [email protected]() > 0 THEN
c1.@cityresults += pairScore(p1.id, [email protected]().pid, c1.name, [email protected]().score)
ELSE
c1.@cityresults += pairScore(p1.id, p1.@min_pid2, c1.name, 0)
END;

City1 = SELECT c1 FROM (c1:City1)
ACCUM @@result += [email protected]();
PRINT @@result as result;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
//graph schema is on page 19 https://ldbc.github.io/ldbc_snb_docs/ldbc-snb-specification.pdf
//BI 15 query description is on page 103 https://ldbc.github.io/ldbc_snb_docs/ldbc-snb-specification.pdf
CREATE OR REPLACE DISTRIBUTED QUERY bi15(VERTEX<Person> person1Id, VERTEX<Person> person2Id, DATETIME startDate, DATETIME endDate) syntax v3 {
MinAccum<DOUBLE> @@result, @dis;
OrAccum @next;
SetAccum<VERTEX<Person>> @creator;
MapAccum<VERTEX<Person>, SumAccum<UINT>> @replyCount;
OrAccum @visited;
INT startEpoch = datetime_to_epoch(startDate) * 1000;
INT endEpoch = datetime_to_epoch(endDate) * 1000;

F = SELECT f FROM (f:Forum) WHERE f.creationDate BETWEEN startEpoch AND endEpoch;
P = SELECT p FROM (:F) -[:CONTAINER_OF]-> (p:Post);
M = SELECT m FROM (:P) <-[:ROOT_POST]- (m:Comment) ACCUM m.@visited += true;
M = M UNION P;
tmp = SELECT p FROM (m:M) -[:HAS_CREATOR]-> (p:Person) ACCUM m.@creator += p;
M1 = SELECT m1 FROM (m1:M) <-[:REPLY_OF]- (m2:Comment)
WHERE m2.@visited
ACCUM
UINT w = 1,
IF m1.type == "Post" THEN w = 2 END,
FOREACH p1 IN m1.@creator DO
FOREACH p2 IN m2.@creator DO
p1.@replyCount += (p2 -> w)
END
END;
P = {Person.*};
@@result = P.size();
S = {person1Id};
S = SELECT s FROM (s:S) ACCUM s.@dis = 0;
WHILE S.size()>0 DO
S = SELECT t FROM (s:S)-[e:KNOWS]-(t:Person)
ACCUM
DOUBLE t_dis = s.@dis + 2.0/(2 + [email protected](t) + [email protected](s)),
IF t_dis < @@result AND t_dis < t.@dis THEN
t.@dis += t_dis,
t.@next += true,
IF t == person2Id THEN
@@result += t_dis
END
END
HAVING t.@next;
S = SELECT s FROM (s:S) ACCUM s.@next = false;
END;

IF ABS(@@result - P.size()) < 1 THEN
PRINT -1 as result;
ELSE
PRINT @@result as result;
END;
}
Loading
Loading