diff --git a/pdf/create_index.py b/pdf/create_index.py index cd6440113..6656e7f9f 100755 --- a/pdf/create_index.py +++ b/pdf/create_index.py @@ -13,6 +13,13 @@ def main(): + """ + This function reads the file index.html and extracts all headings from it. + It then creates a dictionary with the first letter of each heading as key, + and for each key, a dictionary containing all headings starting with that letter as value. + The second level of dictionaries contain the actual heading + text as keys and their ID's (which are also stored in another list) as values. + """ html = read_file('index.html') doc = BeautifulSoup(''.join(html), 'html.parser') hhh = defaultdict(lambda: defaultdict(list)) @@ -26,6 +33,14 @@ def main(): def print_hhh(hhh): + """ + Prints a table of contents for the commands in the given dictionary. + + The keys of `hhh` are letters and each letter is mapped to another dictionary + that maps command names to lists of links. The first link in each list is used as the heading for that command name, so it should be unique among all + commands (and ideally short). All other links should be unique among all lists but not necessarily short. The order of letters and commands within a + letter will match their order in `hhh`. + """ letters = hhh.keys() for letter in sorted(letters): hh = hhh[letter] diff --git a/pdf/remove_links.py b/pdf/remove_links.py index 86f333539..f1da70e13 100755 --- a/pdf/remove_links.py +++ b/pdf/remove_links.py @@ -25,6 +25,18 @@ def main(): + """ + Replaces all occurrences of `from_` with `to_` in the file at the given path. + + Args: + from_ (str): The string to be replaced. + to_ (str): The + replacement string. + + index_path (Path): Path object representing the file that is being modified by this function call. This parameter is not + optional, and it has a default value of None because we are required to pass it as an argument but we have no use for it in our code since we don't + need access to its attributes or methods while calling this function; therefore, there's no sensible default value for us to provide here. + """ index_path = Path('..', 'index.html') lines = read_file(index_path) out = ''.join(lines) diff --git a/web/convert_table.py b/web/convert_table.py index 0f21d789c..a3302c301 100755 --- a/web/convert_table.py +++ b/web/convert_table.py @@ -1,7 +1,20 @@ #!/usr/bin/env python3 def convert_table(lines): + """ + Convert a table from ASCII art to Unicode box drawing characters or vice versa. + + :param lines: A list of strings representing the lines of the table. + :type lines: list(str) + """ def from_ascii(): + """ + Convert a list of lines from an ASCII table to a reStructuredText grid table. + + :param lines: A list of strings representing the rows in the ASCII + table. + :returns: A string containing the equivalent reStructuredText grid table. + """ out = [] first, header, third, *body, last = lines first = first.translate(str.maketrans({'-': '━', '+': '┯'})) @@ -18,6 +31,13 @@ def from_ascii(): out.append(f'┗{last[1:-1]}┛') return '\n'.join(out) def from_unicode(): + """ + Convert a Unicode box-drawing character string to ASCII. + + :param str lines: A string of Unicode box-drawing characters. + :returns str out: The same + text with all the Unicode box drawing characters replaced by ASCII ones. + """ out = [] for line in lines: line = line.translate(str.maketrans('┏┓┗┛┠┼┨┯┷━─┃│', '+++++++++--||')) diff --git a/web/update_plots.py b/web/update_plots.py index 050ad5f14..f6dd22b67 100755 --- a/web/update_plots.py +++ b/web/update_plots.py @@ -12,6 +12,10 @@ def main(): + """ + This function scrapes the data from the web and wrangles it into a pandas DataFrame. + It then creates an interactive plotly line graph of covid cases. + """ print('Updating covid deaths...') update_covid_deaths() print('Updating covid cases...') @@ -19,6 +23,11 @@ def main(): def update_covid_deaths(): + """ + Update the plot of global COVID-19 deaths over time. + + :param df: A pandas DataFrame with columns 'Continent', 'Date', and 'Total Deaths per Million'. + """ covid = pd.read_csv('https://covid.ourworldindata.org/data/owid-covid-data.csv', usecols=['iso_code', 'date', 'total_deaths', 'population']) continents = pd.read_csv('https://gist.githubusercontent.com/stevewithington/20a69c0b6d2ff' @@ -41,7 +50,15 @@ def update_covid_deaths(): def update_confirmed_cases(): + """ + Update the file covid_cases.js with a plot of total cases, gold price, bitcoin price and Dow Jones index. + """ def main(): + """ + This function scrapes the data from the web and wrangles it into a pandas DataFrame. + It then creates an interactive plotly line graph of covid cases + in New York State. + """ df = wrangle_data(*scrape_data()) f = get_figure(df) update_file('covid_cases.js', f) @@ -49,11 +66,29 @@ def main(): write_to_png_file('covid_cases.png', f, width=960, height=315) def scrape_data(): + """ + This function scrapes data from the following sources: + 1. Our World in Data (Total Cases) + 2. Yahoo Finance (Bitcoin, Gold, Dow Jones) + The + function returns a list of pandas Series objects containing the scraped data. + """ def scrape_covid(): + """ + This function scrapes the total number of covid cases from a csv file on the internet. + """ url = 'https://covid.ourworldindata.org/data/owid-covid-data.csv' df = pd.read_csv(url, usecols=['location', 'date', 'total_cases']) return df[df.location == 'World'].set_index('date').total_cases def scrape_yahoo(slug): + """ + Downloads historical stock price data from Yahoo Finance. + + :param str slug: The ticker symbol of the desired security. Expected to be a valid argument + for the `yfinance` function `Ticker()`. + :returns pd.Series(float): A pandas Series with timestamps as indices and adjusted closing prices as values, + sorted by timestamp in ascending order. + """ url = f'https://query1.finance.yahoo.com/v7/finance/download/{slug}' + \ '?period1=1579651200&period2=9999999999&interval=1d&events=history' df = pd.read_csv(url, usecols=['Date', 'Close']) @@ -63,6 +98,14 @@ def scrape_yahoo(slug): return map(pd.Series.rename, out, ['Total Cases', 'Bitcoin', 'Gold', 'Dow Jones']) def wrangle_data(covid, bitcoin, gold, dow): + """ + This function joins the Dow Jones, Gold and Bitcoin dataframes into a single one. + It then sorts them by date and interpolates missing values. It + discards rows before '2020-02-23'. + Finally it calculates percentages relative to day 1 of each series (Dow Jones, Gold, Bitcoin) + and adds a column + with covid cases. The result is returned as a new dataframe sorted by date in descending order. + """ df = pd.concat([dow, gold, bitcoin], axis=1) # Joins columns on dates. df = df.sort_index().interpolate() # Sorts by date and interpolates NaN-s. yesterday = str(datetime.date.today() - datetime.timedelta(1)) @@ -72,6 +115,11 @@ def wrangle_data(covid, bitcoin, gold, dow): return df.sort_values(df.index[-1], axis=1) # Sorts columns by last day's value. def get_figure(df): + """ + This function returns a plotly figure that shows the total cases of COVID-19 in the US and its economic + indicators. The data is taken from [The New + York Times](#) and retrieved using [NYT API](#). + """ figure = go.Figure() for col_name in reversed(df.columns): yaxis = 'y1' if col_name == 'Total Cases' else 'y2' @@ -97,6 +145,12 @@ def get_figure(df): # def update_file(filename, figure): + """ + Updates the file at `filename` with the plotly figure `figure`. + + :param filename: The path to a JSON file. + :param figure: The Plotly figure. + """ lines = read_file(filename) f_json = figure.to_json(pretty=True).replace('\n', '\n ') out = lines[:6] + [f' {f_json}\n', ' )\n', '};\n']