Skip to content

Commit 51b9df1

Browse files
committed
Add YAML parser
1 parent a4e2f6c commit 51b9df1

File tree

3 files changed

+23
-28
lines changed

3 files changed

+23
-28
lines changed

custom-action/action.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ runs:
88
with:
99
python-version: '3.10'
1010
- name: Install Dependencies
11-
run: pip install pandas beautifulsoup4
11+
run: pip install pandas beautifulsoup4 pyyaml
1212
shell: bash
1313
- name: Transform the csvs to html
1414
id: consolidate-csv

projects/alpaca.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ license:
4747
code:
4848
class: closed
4949
link:
50-
code_notes:
50+
notes:
5151

5252
architecture:
5353
class: open

scripts/consolidate_csv.py

Lines changed: 21 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,24 @@
1-
import csv
1+
import yaml
22
import glob
33
import pandas as pd
44
from bs4 import BeautifulSoup
55
import datetime
66

77

88
def create_dataframe(files):
9-
# initialize list of rows
10-
lrows = []
11-
# Read the input CSV file, transpose the rows and columns and save to dictionary
12-
for i, fname in enumerate(files):
9+
# Read the input YAML file, transpose the rows and columns and save to dataframe
10+
df = pd.DataFrame()
11+
source_file = []
12+
for fname in files:
1313
with open(fname, 'r') as file:
14-
file.readline()
15-
reader = csv.reader(file)
16-
rows = list(reader)
17-
transposed = list(zip(*rows))
18-
# get column names
19-
if i == 0:
20-
column_names = transposed[0] + ("source_file",)
21-
# append transposed row to list of tuples
22-
lrows.append(tuple(transposed[1] + (fname[1:],)))
23-
df = pd.DataFrame(lrows, columns = column_names)
14+
file_df = pd.json_normalize(yaml.safe_load(file))
15+
# append transposed row to df
16+
source_file.append(fname[1:])
17+
df = pd.concat([df, file_df], axis=0)
2418
# get rid of rows without a project_name
25-
df = df[df["project_name"] != ""]
26-
df.set_index("project_name", inplace = True)
19+
df = df[df["project.name"] != ""]
20+
df.set_index("project.name", inplace = True)
21+
df["source.file"] = source_file
2722
return df
2823

2924

@@ -54,7 +49,7 @@ def calculate_openness(df):
5449
for p in projects:
5550
cumul_openness = 0
5651
for v, w in openness_weights.items():
57-
vclass = df.loc[p, v + "_class"]
52+
vclass = df.loc[p, v + ".class"]
5853
vvalue = class_values[vclass] if vclass in class_values else 0
5954
cumul_openness += w * vvalue
6055
openness.append(cumul_openness)
@@ -77,19 +72,19 @@ def write_html(df):
7772
# also add classes to the <td> elements for colour coding and links to source of the class judgement: https://github.com/liesenf/awesome-open-chatgpt/issues/12
7873
cells = ["opencode", "llmdata", "llmweights", "rldata", "rlweights", "license", "code", "architecture", "preprint", "paper", "modelcard", "datasheet", "package", "api"]
7974
# first row
80-
r1_html = '<tr class="row-a"><td class="name-cell"><a target="_blank" href="{}" title="{}">{}</a></td>'.format(df.loc[p, "project_link"], df.loc[p, "project_notes"], p)
75+
r1_html = '<tr class="row-a"><td class="name-cell"><a target="_blank" href="{}" title="{}">{}</a></td>'.format(df.loc[p, "project.link"], df.loc[p, "project.notes"], p)
8176
for c in cells:
82-
cl = df.loc[p, c + "_class"]
83-
link = df.loc[p, c + "_link"]
84-
notes = df.loc[p, c + "_notes"]
77+
cl = df.loc[p, c + ".class"]
78+
link = df.loc[p, c + ".link"]
79+
notes = df.loc[p, c + ".notes"]
8580
symbol = "&#10004;&#xFE0E" if cl == "open" else "~" if cl == "partial" else "&#10008;" if cl == "closed" else "?"
8681
r1_html += '<td class="{} data-cell"><a target="_blank" href="{}" title="{}">{}</a></td>'.format(cl, link, notes, symbol)
8782
r1_html += "</tr>\n"
8883
html_table += r1_html
8984
# second row
90-
r2_html = '<tr class="row-b"><td class="org"><a target="_blank" href="{}" title="{}">{}</a></td>'.format(df.loc[p, "org_link"], df.loc[p, "org_name"], df.loc[p, "org_name"])
91-
r2_html += '<td colspan="3" class="llmbase">LLM base: {}</td><td colspan="3" class="rlbase">RL base: {}</td>'.format(df.loc[p, "project_llmbase"], df.loc[p, "project_rlbase"])
92-
source_link = "https://github.com/opening-up-chatgpt/opening-up-chatgpt.github.io/blob/main" + df.loc[p, "source_file"]
85+
r2_html = '<tr class="row-b"><td class="org"><a target="_blank" href="{}" title="{}">{}</a></td>'.format(df.loc[p, "org.link"], df.loc[p, "org.name"], df.loc[p, "org.name"])
86+
r2_html += '<td colspan="3" class="llmbase">LLM base: {}</td><td colspan="3" class="rlbase">RL base: {}</td>'.format(df.loc[p, "project.llmbase"], df.loc[p, "project.rlbase"])
87+
source_link = "https://github.com/opening-up-chatgpt/opening-up-chatgpt.github.io/blob/main" + df.loc[p, "source.file"]
9388
source_file = source_link.split("/")[-1]
9489
r2_html += '<td colspan="7"></td><td class="source-link"><a href="{}" title="{}" target="_blank">&sect;</a></td></tr>\n'.format(source_link, source_file)
9590
html_table += r2_html
@@ -120,7 +115,7 @@ def create_index(table):
120115

121116
#the path of the csv files to combine
122117
path = r'./projects'
123-
all_files = glob.glob(path + "/*.csv")
118+
all_files = glob.glob(path + "/*.yaml")
124119

125120
df = create_dataframe(all_files)
126121
df = calculate_openness(df)

0 commit comments

Comments
 (0)