1
- import csv
1
+ import yaml
2
2
import glob
3
3
import pandas as pd
4
4
from bs4 import BeautifulSoup
5
5
import datetime
6
6
7
7
8
8
def create_dataframe (files ):
9
- # initialize list of rows
10
- lrows = []
11
- # Read the input CSV file, transpose the rows and columns and save to dictionary
12
- for i , fname in enumerate ( files ) :
9
+ # Read the input YAML file, transpose the rows and columns and save to dataframe
10
+ df = pd . DataFrame ()
11
+ source_file = []
12
+ for fname in files :
13
13
with open (fname , 'r' ) as file :
14
- file .readline ()
15
- reader = csv .reader (file )
16
- rows = list (reader )
17
- transposed = list (zip (* rows ))
18
- # get column names
19
- if i == 0 :
20
- column_names = transposed [0 ] + ("source_file" ,)
21
- # append transposed row to list of tuples
22
- lrows .append (tuple (transposed [1 ] + (fname [1 :],)))
23
- df = pd .DataFrame (lrows , columns = column_names )
14
+ file_df = pd .json_normalize (yaml .safe_load (file ))
15
+ # append transposed row to df
16
+ source_file .append (fname [1 :])
17
+ df = pd .concat ([df , file_df ], axis = 0 )
24
18
# get rid of rows without a project_name
25
- df = df [df ["project_name" ] != "" ]
26
- df .set_index ("project_name" , inplace = True )
19
+ df = df [df ["project.name" ] != "" ]
20
+ df .set_index ("project.name" , inplace = True )
21
+ df ["source.file" ] = source_file
27
22
return df
28
23
29
24
@@ -54,7 +49,7 @@ def calculate_openness(df):
54
49
for p in projects :
55
50
cumul_openness = 0
56
51
for v , w in openness_weights .items ():
57
- vclass = df .loc [p , v + "_class " ]
52
+ vclass = df .loc [p , v + ".class " ]
58
53
vvalue = class_values [vclass ] if vclass in class_values else 0
59
54
cumul_openness += w * vvalue
60
55
openness .append (cumul_openness )
@@ -77,19 +72,19 @@ def write_html(df):
77
72
# also add classes to the <td> elements for colour coding and links to source of the class judgement: https://github.com/liesenf/awesome-open-chatgpt/issues/12
78
73
cells = ["opencode" , "llmdata" , "llmweights" , "rldata" , "rlweights" , "license" , "code" , "architecture" , "preprint" , "paper" , "modelcard" , "datasheet" , "package" , "api" ]
79
74
# first row
80
- r1_html = '<tr class="row-a"><td class="name-cell"><a target="_blank" href="{}" title="{}">{}</a></td>' .format (df .loc [p , "project_link " ], df .loc [p , "project_notes " ], p )
75
+ r1_html = '<tr class="row-a"><td class="name-cell"><a target="_blank" href="{}" title="{}">{}</a></td>' .format (df .loc [p , "project.link " ], df .loc [p , "project.notes " ], p )
81
76
for c in cells :
82
- cl = df .loc [p , c + "_class " ]
83
- link = df .loc [p , c + "_link " ]
84
- notes = df .loc [p , c + "_notes " ]
77
+ cl = df .loc [p , c + ".class " ]
78
+ link = df .loc [p , c + ".link " ]
79
+ notes = df .loc [p , c + ".notes " ]
85
80
symbol = "✔︎" if cl == "open" else "~" if cl == "partial" else "✘" if cl == "closed" else "?"
86
81
r1_html += '<td class="{} data-cell"><a target="_blank" href="{}" title="{}">{}</a></td>' .format (cl , link , notes , symbol )
87
82
r1_html += "</tr>\n "
88
83
html_table += r1_html
89
84
# second row
90
- r2_html = '<tr class="row-b"><td class="org"><a target="_blank" href="{}" title="{}">{}</a></td>' .format (df .loc [p , "org_link " ], df .loc [p , "org_name " ], df .loc [p , "org_name " ])
91
- r2_html += '<td colspan="3" class="llmbase">LLM base: {}</td><td colspan="3" class="rlbase">RL base: {}</td>' .format (df .loc [p , "project_llmbase " ], df .loc [p , "project_rlbase " ])
92
- source_link = "https://github.com/opening-up-chatgpt/opening-up-chatgpt.github.io/blob/main" + df .loc [p , "source_file " ]
85
+ r2_html = '<tr class="row-b"><td class="org"><a target="_blank" href="{}" title="{}">{}</a></td>' .format (df .loc [p , "org.link " ], df .loc [p , "org.name " ], df .loc [p , "org.name " ])
86
+ r2_html += '<td colspan="3" class="llmbase">LLM base: {}</td><td colspan="3" class="rlbase">RL base: {}</td>' .format (df .loc [p , "project.llmbase " ], df .loc [p , "project.rlbase " ])
87
+ source_link = "https://github.com/opening-up-chatgpt/opening-up-chatgpt.github.io/blob/main" + df .loc [p , "source.file " ]
93
88
source_file = source_link .split ("/" )[- 1 ]
94
89
r2_html += '<td colspan="7"></td><td class="source-link"><a href="{}" title="{}" target="_blank">§</a></td></tr>\n ' .format (source_link , source_file )
95
90
html_table += r2_html
@@ -120,7 +115,7 @@ def create_index(table):
120
115
121
116
#the path of the csv files to combine
122
117
path = r'./projects'
123
- all_files = glob .glob (path + "/*.csv " )
118
+ all_files = glob .glob (path + "/*.yaml " )
124
119
125
120
df = create_dataframe (all_files )
126
121
df = calculate_openness (df )
0 commit comments