You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I am using Panda's dataframe to populate node and rel tables. Nodes work fine. Rel tables are not populated correctly.
Attaching a Python file that demonstrates the problem.
Are there known steps to reproduce?
from kuzu import Database, Connection
import pandas as pd
import shutil
schema = """
CREATE NODE TABLE IF NOT EXISTS Person (name STRING PRIMARY KEY);
CREATE NODE TABLE IF NOT EXISTS City (name STRING PRIMARY KEY);
CREATE NODE TABLE IF NOT EXISTS VisitTime (value TIMESTAMP PRIMARY KEY);
CREATE REL TABLE IF NOT EXISTS VISIT_LOCATION (FROM VisitTime TO City);
CREATE REL TABLE IF NOT EXISTS VISITED_BY (FROM VisitTime TO Person);
"""
db_path = "/tmp/kuzu"
shutil.rmtree(db_path)
db = Database(db_path)
conn = Connection(db)
conn.execute(schema)
person = "Alice"
city = "New York"
conn.execute(f"""MERGE (:Person {{name: "{person}"}})""")
conn.execute(f"""MERGE (f:City {{name: "{city}"}})""")
Add visit times
values = ['2020-02-03T19:43:01.572071+00:00', '2024-10-12T12:43:01.572039+00:00', '2025-04-03T20:52:37.745175+00:00']
df = pd.DataFrame(values, columns=["visit_time"])
query = """
COPY VisitTime FROM (
LOAD FROM df WHERE NOT EXISTS {MATCH (v:VisitTime) WHERE v.value = visit_time}
RETURN CAST(visit_time AS TIMESTAMP)
);
"""
conn.execute(query)
values -> person
pairs = {"time": values, "person": ["Alice"] * len(values)}
df = pd.DataFrame(pairs)
query = f"""
COPY VISITED_BY FROM (
LOAD FROM df WHERE NOT EXISTS {{MATCH (v:VisitTime)-[:VISITED_BY]->(n:Person) WHERE v.value = time AND n.name = person}}
RETURN CAST(time AS TIMESTAMP), person
)
(from='VisitTime', to='Person');
"""
conn.execute(query)
values -> city
pairs = {"time": values, "city": [city] * len(values)}
df = pd.DataFrame(pairs)
query = f"""
COPY VISIT_LOCATION FROM (
LOAD FROM df WHERE NOT EXISTS {{MATCH (v:VisitTime)-[:VISIT_LOCATION]->(n:City) WHERE v.value = time AND n.name = city}}
RETURN CAST(time AS TIMESTAMP), city
)
(from='VisitTime', to='City');
"""
conn.execute(query)
for v in values:
print(f"Looking for City visited by {person} at time={v}")
query = f"""
MATCH (c:City)<-[:VISIT_LOCATION]-(v:VisitTime)-[:VISITED_BY]->(p:Person)
WHERE p.name = "{person}" and v.value=timestamp("{v}")
RETURN DISTINCT c.name;
"""
res = conn.execute(query)
found_cities = set()
while res.has_next():
found_cities.update(res.get_next())
print(f"{found_cities=}")
db.close()
The text was updated successfully, but these errors were encountered:
Kuzu version
v0.9.0
What operating system are you using?
MacOS Sequoia 15.4
What happened?
I am using Panda's dataframe to populate node and rel tables. Nodes work fine. Rel tables are not populated correctly.
Attaching a Python file that demonstrates the problem.
Are there known steps to reproduce?
from kuzu import Database, Connection
import pandas as pd
import shutil
schema = """
CREATE NODE TABLE IF NOT EXISTS Person (name STRING PRIMARY KEY);
CREATE NODE TABLE IF NOT EXISTS City (name STRING PRIMARY KEY);
CREATE NODE TABLE IF NOT EXISTS VisitTime (value TIMESTAMP PRIMARY KEY);
CREATE REL TABLE IF NOT EXISTS VISIT_LOCATION (FROM VisitTime TO City);
CREATE REL TABLE IF NOT EXISTS VISITED_BY (FROM VisitTime TO Person);
"""
db_path = "/tmp/kuzu"
shutil.rmtree(db_path)
db = Database(db_path)
conn = Connection(db)
conn.execute(schema)
person = "Alice"
city = "New York"
conn.execute(f"""MERGE (:Person {{name: "{person}"}})""")
conn.execute(f"""MERGE (f:City {{name: "{city}"}})""")
Add visit times
values = ['2020-02-03T19:43:01.572071+00:00', '2024-10-12T12:43:01.572039+00:00', '2025-04-03T20:52:37.745175+00:00']
df = pd.DataFrame(values, columns=["visit_time"])
query = """
COPY VisitTime FROM (
LOAD FROM df WHERE NOT EXISTS {MATCH (v:VisitTime) WHERE v.value = visit_time}
RETURN CAST(visit_time AS TIMESTAMP)
);
"""
conn.execute(query)
values -> person
pairs = {"time": values, "person": ["Alice"] * len(values)}
df = pd.DataFrame(pairs)
query = f"""
COPY VISITED_BY FROM (
LOAD FROM df WHERE NOT EXISTS {{MATCH (v:VisitTime)-[:VISITED_BY]->(n:Person) WHERE v.value = time AND n.name = person}}
RETURN CAST(time AS TIMESTAMP), person
)
(from='VisitTime', to='Person');
"""
conn.execute(query)
values -> city
pairs = {"time": values, "city": [city] * len(values)}
df = pd.DataFrame(pairs)
query = f"""
COPY VISIT_LOCATION FROM (
LOAD FROM df WHERE NOT EXISTS {{MATCH (v:VisitTime)-[:VISIT_LOCATION]->(n:City) WHERE v.value = time AND n.name = city}}
RETURN CAST(time AS TIMESTAMP), city
)
(from='VisitTime', to='City');
"""
conn.execute(query)
for v in values:
print(f"Looking for City visited by {person} at time={v}")
query = f"""
MATCH (c:City)<-[:VISIT_LOCATION]-(v:VisitTime)-[:VISITED_BY]->(p:Person)
WHERE p.name = "{person}" and v.value=timestamp("{v}")
RETURN DISTINCT c.name;
"""
res = conn.execute(query)
db.close()
The text was updated successfully, but these errors were encountered: