-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathablations.py
113 lines (90 loc) · 3.76 KB
/
ablations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import os
import pandas as pd
import torch
from src.utils import load_model_components
def calculate_abalation_scores(exp_path: str):
kge_model, configs, entity_to_idx, relation_to_idx = load_model_components(
kge_path=exp_path
)
test_kg_dir = configs["dataset_dir"] + "/test.txt"
# process test set and map to idx
test_df = pd.read_csv(
test_kg_dir,
sep="\t",
header=None,
names=["head", "relation", "tail"],
)
test_df_isa = test_df[test_df["relation"] == "/m/is_a"].reset_index(drop=True)
test_df_isa["head_idx"] = test_df_isa["head"].map(entity_to_idx)
test_df_isa["rel_idx"] = test_df_isa["relation"].map(relation_to_idx)
test_df_isa["tail_idx"] = test_df_isa["tail"].map(entity_to_idx)
# Create tensor from DataFrame
triples = torch.tensor(
test_df_isa[["head_idx", "rel_idx", "tail_idx"]].values, dtype=torch.long
)
# Forward pass with no gradient computation
with torch.no_grad():
test_df_isa["ranks"] = torch.sigmoid(kge_model(triples)).tolist()
test_df_literals = pd.read_csv(
"KGs/Synthetic/numerical_literals.txt",
sep="\t",
header=None,
names=["head", "relation", "tail"],
)
# Filter and merge data
high_df = test_df_isa[test_df_isa["tail"] == "/m/high"][["head", "ranks"]]
low_df = test_df_isa[test_df_isa["tail"] == "/m/low"][["head", "ranks"]]
merged_df = pd.merge(high_df, low_df, on="head", suffixes=("_high", "_low"))
# Create predicted class column
merged_df["predicted"] = (merged_df["ranks_high"] > merged_df["ranks_low"]).map(
{True: "high", False: "low"}
)
# Prepare test literals data
test_df_literals["class"] = (test_df_literals["tail"].astype(float) > 0.5).map(
{True: "high", False: "low"}
)
# Merge with test literals data
merged = merged_df.merge(test_df_literals[["head", "class"]], on="head", how="left")
# Calculate accuracy
true_high = (merged["predicted"] == "high") & (merged["class"] == "high")
true_low = (merged["predicted"] == "low") & (merged["class"] == "low")
accuracy = (true_high.sum() + true_low.sum()) / len(merged)
return configs["model"], accuracy
def evaluate_ablations(return_df=False):
original_path = "Experiments/new_ablations/Synthetic_0.05_combined"
random_path = "Experiments/new_ablations/Synthetic_random_0.05_combined"
# Step 1: Loop through all folders under actual and random paths
original_scores = {}
for folder in os.listdir(original_path):
model_dir = os.path.join(original_path, folder)
if os.path.isdir(model_dir):
model_name, score = calculate_abalation_scores(model_dir)
original_scores[folder] = score
random_scores = {}
for folder in os.listdir(random_path):
model_dir = os.path.join(random_path, folder)
if os.path.isdir(model_dir):
model_name, score = calculate_abalation_scores(model_dir)
random_scores[folder] = score
# Step 2: Create DataFrame
# Merge both sets using model name as the key
all_models = set(original_scores.keys()).union(random_scores.keys())
rows = []
for model in all_models:
rows.append(
{
"model": model,
"acc_org": original_scores.get(model),
"acc_rand": random_scores.get(model),
}
)
df = pd.DataFrame(rows)
# Optional: sort by accuracy or model name
df = df.sort_values(by="model").reset_index(drop=True)
storage_path = "Experiments/Ablations"
os.makedirs(storage_path, exist_ok=True)
df.to_csv(f'{storage_path}/scores.csv', sep="\t", index=False)
print(df)
return df if return_df else None
if __name__ == "__main__":
evaluate_ablations()