Skip to content

Commit 1fd6774

Browse files
committed
auto update data sources if they're stale. still needs a refactor
1 parent 98f0d8a commit 1fd6774

File tree

4 files changed

+43
-25
lines changed

4 files changed

+43
-25
lines changed

docs/notebook.margot.data.ipynb

+2-2
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,8 @@
5555
{
5656
"output_type": "execute_result",
5757
"data": {
58-
"text/plain": " SPY VTWO \\\n adj_close log_returns realised_vol adj_close \ndate \n2020-06-15 00:00:00+00:00 305.6968 0.009292 0.260969 113.92 \n2020-06-16 00:00:00+00:00 311.5808 0.019065 0.265222 116.76 \n2020-06-17 00:00:00+00:00 310.2865 -0.004163 0.265461 114.60 \n2020-06-18 00:00:00+00:00 310.4060 0.000385 0.264006 114.65 \n2020-06-19 00:00:00+00:00 308.6400 -0.005706 0.263797 113.85 \n\n margot \n log_returns realised_vol spy_russ_ratio \ndate \n2020-06-15 00:00:00+00:00 0.022998 0.433523 2.683434 \n2020-06-16 00:00:00+00:00 0.024624 0.437526 2.668558 \n2020-06-17 00:00:00+00:00 -0.018673 0.442664 2.707561 \n2020-06-18 00:00:00+00:00 0.000436 0.441551 2.707423 \n2020-06-19 00:00:00+00:00 -0.007002 0.441814 2.710935 ",
59-
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead tr th {\n text-align: left;\n }\n\n .dataframe thead tr:last-of-type th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr>\n <th></th>\n <th colspan=\"3\" halign=\"left\">SPY</th>\n <th colspan=\"3\" halign=\"left\">VTWO</th>\n <th>margot</th>\n </tr>\n <tr>\n <th></th>\n <th>adj_close</th>\n <th>log_returns</th>\n <th>realised_vol</th>\n <th>adj_close</th>\n <th>log_returns</th>\n <th>realised_vol</th>\n <th>spy_russ_ratio</th>\n </tr>\n <tr>\n <th>date</th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2020-06-15 00:00:00+00:00</th>\n <td>305.6968</td>\n <td>0.009292</td>\n <td>0.260969</td>\n <td>113.92</td>\n <td>0.022998</td>\n <td>0.433523</td>\n <td>2.683434</td>\n </tr>\n <tr>\n <th>2020-06-16 00:00:00+00:00</th>\n <td>311.5808</td>\n <td>0.019065</td>\n <td>0.265222</td>\n <td>116.76</td>\n <td>0.024624</td>\n <td>0.437526</td>\n <td>2.668558</td>\n </tr>\n <tr>\n <th>2020-06-17 00:00:00+00:00</th>\n <td>310.2865</td>\n <td>-0.004163</td>\n <td>0.265461</td>\n <td>114.60</td>\n <td>-0.018673</td>\n <td>0.442664</td>\n <td>2.707561</td>\n </tr>\n <tr>\n <th>2020-06-18 00:00:00+00:00</th>\n <td>310.4060</td>\n <td>0.000385</td>\n <td>0.264006</td>\n <td>114.65</td>\n <td>0.000436</td>\n <td>0.441551</td>\n <td>2.707423</td>\n </tr>\n <tr>\n <th>2020-06-19 00:00:00+00:00</th>\n <td>308.6400</td>\n <td>-0.005706</td>\n <td>0.263797</td>\n <td>113.85</td>\n <td>-0.007002</td>\n <td>0.441814</td>\n <td>2.710935</td>\n </tr>\n </tbody>\n</table>\n</div>"
58+
"text/plain": " SPY VTWO \\\n adj_close log_returns realised_vol adj_close \ndate \n2020-06-23 00:00:00+00:00 312.05 0.004593 0.260809 115.2159 \n2020-06-24 00:00:00+00:00 304.09 -0.025840 0.265557 111.4867 \n2020-06-25 00:00:00+00:00 307.35 0.010663 0.259729 113.0700 \n2020-06-26 00:00:00+00:00 300.05 -0.024038 0.269796 110.4600 \n2020-06-29 00:00:00+00:00 304.46 0.014591 0.272220 113.9400 \n\n margot \n log_returns realised_vol spy_russ_ratio \ndate \n2020-06-23 00:00:00+00:00 0.004250 0.429186 2.708394 \n2020-06-24 00:00:00+00:00 -0.032902 0.426147 2.727590 \n2020-06-25 00:00:00+00:00 0.014102 0.411958 2.718228 \n2020-06-26 00:00:00+00:00 -0.023354 0.419855 2.716368 \n2020-06-29 00:00:00+00:00 0.031019 0.425613 2.672108 ",
59+
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead tr th {\n text-align: left;\n }\n\n .dataframe thead tr:last-of-type th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr>\n <th></th>\n <th colspan=\"3\" halign=\"left\">SPY</th>\n <th colspan=\"3\" halign=\"left\">VTWO</th>\n <th>margot</th>\n </tr>\n <tr>\n <th></th>\n <th>adj_close</th>\n <th>log_returns</th>\n <th>realised_vol</th>\n <th>adj_close</th>\n <th>log_returns</th>\n <th>realised_vol</th>\n <th>spy_russ_ratio</th>\n </tr>\n <tr>\n <th>date</th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2020-06-23 00:00:00+00:00</th>\n <td>312.05</td>\n <td>0.004593</td>\n <td>0.260809</td>\n <td>115.2159</td>\n <td>0.004250</td>\n <td>0.429186</td>\n <td>2.708394</td>\n </tr>\n <tr>\n <th>2020-06-24 00:00:00+00:00</th>\n <td>304.09</td>\n <td>-0.025840</td>\n <td>0.265557</td>\n <td>111.4867</td>\n <td>-0.032902</td>\n <td>0.426147</td>\n <td>2.727590</td>\n </tr>\n <tr>\n <th>2020-06-25 00:00:00+00:00</th>\n <td>307.35</td>\n <td>0.010663</td>\n <td>0.259729</td>\n <td>113.0700</td>\n <td>0.014102</td>\n <td>0.411958</td>\n <td>2.718228</td>\n </tr>\n <tr>\n <th>2020-06-26 00:00:00+00:00</th>\n <td>300.05</td>\n <td>-0.024038</td>\n <td>0.269796</td>\n <td>110.4600</td>\n <td>-0.023354</td>\n <td>0.419855</td>\n <td>2.716368</td>\n </tr>\n <tr>\n <th>2020-06-29 00:00:00+00:00</th>\n <td>304.46</td>\n <td>0.014591</td>\n <td>0.272220</td>\n <td>113.9400</td>\n <td>0.031019</td>\n <td>0.425613</td>\n <td>2.672108</td>\n </tr>\n </tbody>\n</table>\n</div>"
6060
},
6161
"metadata": {},
6262
"execution_count": 4

margot/data/alphavantage.py

+20-15
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,34 @@
11
import os
22
import logging
3-
from datetime import datetime
4-
import pytz
53

64
from alpha_vantage.timeseries import TimeSeries
75

8-
from margot.data.columns import BaseColumn
6+
from margot.data.columns import BaseColumn, DailyMixin
97

108
logger = logging.getLogger(__name__)
119

1210

13-
class DailyAdjusted(BaseColumn):
14-
"""A single Symbol time series from AlphaVantage.
11+
class DailyAdjusted(BaseColumn, DailyMixin):
12+
"""A daily time series from AlphaVantage.
1513
1614
Example::
1715
18-
from margot.data.column import alphavantage as av
16+
from margot import alphavantage as av
1917
20-
volume = av.Column(time_series='adjusted_close')
18+
volume = av.DailyAdjusted(time_series='adjusted_close')
2119
2220
Args:
23-
time_series (str): the name of the time-series that will be returned
21+
time_series (str): the name of the time-series that will be returned.
22+
Can be one of: 'open', 'high', 'low', 'close', 'adjusted_close',
23+
'volume', 'dividend_amount' or 'split_coefficient'.
2424
"""
2525

2626
def clean(self, df):
27-
"""Clean the df."""
28-
# Standardise the column names
27+
"""
28+
Clean the dataframe.
29+
30+
Alphavantage has odd column names, so we'll fix those.
31+
"""
2932
df = df.rename(mapper={
3033
'1. open': 'open',
3134
'2. high': 'high',
@@ -38,12 +41,14 @@ def clean(self, df):
3841
}, axis='columns')
3942
return super().clean(df)
4043

41-
def stale(self):
42-
now = datetime.now(tz=pytz.UTC)
43-
last_complete_trading_day = self.trading_calendar.previous_close()
44-
4544
def fetch(self, symbol: str):
46-
"""Fetch from remote - this could be the only service specific thing."""
45+
"""
46+
Fetch from remote - this could be the only service specific thing.
47+
48+
Args:
49+
symbol (str): the name of the symbol to fetch
50+
51+
"""
4752
logger.info('fetching ({}) from alphavantage'.format(symbol))
4853
ts = TimeSeries(
4954
key=self.env.get(

margot/data/cboe.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,11 @@
22

33
import pandas as pd
44

5-
from margot.data.columns import BaseColumn
6-
5+
from margot.data.columns import BaseColumn, DailyMixin
76
logger = logging.getLogger(__name__)
87

98

10-
class Column(BaseColumn):
9+
class Column(BaseColumn, DailyMixin):
1110
"""A single OHLC timeiseries from CBOE.
1211
1312
Currently supports the symbols, 'VIX' and 'VIX3M'.

margot/data/columns.py

+19-5
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,25 @@
11
import os
22
import logging
3+
from datetime import datetime
34
from pathlib import Path
5+
46
import pytz
57
import pandas as pd
68

79
logger = logging.getLogger(__name__)
810

911

12+
class DailyMixin(object):
13+
14+
@property
15+
def stale(self):
16+
"""Check if we think there might be new data, and if so - update.
17+
"""
18+
now = pd.Timestamp(datetime.now(tz=pytz.UTC))
19+
if self._full_series.index.max().date() < self.trading_calendar.previous_close(now).date():
20+
return True
21+
22+
1023
class BaseColumn(object):
1124
"""
1225
BaseColumn is the super class for implementing Columns.
@@ -75,15 +88,15 @@ def clone(self):
7588
def setup(self, symbol: str, trading_calendar, env: dict):
7689
"""
7790
Called by Symbol after it is instantiated.
78-
79-
Ensures that this column knows the context in which it operates,
91+
92+
Ensures that this column knows the context in which it operates,
8093
including what Symbol we are using and what the trading_calendar is.
8194
"""
8295
self.symbol = symbol
8396
self.env = env
8497
self.trading_calendar = trading_calendar
8598

86-
# TODO File names should be managed in a central configuration
99+
# TODO: File names should be managed in a central configuration
87100
data_cache = env.get('DATA_CACHE', os.environ.get('DATA_CACHE'))
88101
Path(data_cache).mkdir(parents=True, exist_ok=True)
89102

@@ -107,8 +120,6 @@ def load_or_fetch_series(self, symbol: str):
107120
In order to return the time-series, first determine if we
108121
have it and can return it, or if we need to fetch it.
109122
110-
TODO: Test for up-to-dateness (or maybe that happens in Symbol)?
111-
112123
Args:
113124
symbol (str): the name of the symbol to fetch.
114125
@@ -157,6 +168,9 @@ def series(self):
157168
"""
158169
if self._full_series is None:
159170
self._full_series = self.load_or_fetch_series(self.symbol)
171+
if self.stale:
172+
self.refresh()
173+
self.load(self.symbol)
160174
self._series = self._full_series.copy()
161175

162176
self.INITED = True

0 commit comments

Comments
 (0)