REF: Save only df.index in _Array._opts instead of the full df

kernc · kernc · commit 2c4721233225 · 2021-03-31T21:37:34.000+02:00
This should somewhat decrease serialized size when pickling
`_Array` objects over multiprocessing.
diff --git a/backtesting/_plotting.py b/backtesting/_plotting.py
@@ -110,8 +110,8 @@ def _maybe_resample_data(resample_rule, df, indicators, equity_data, trades):
     indicators = [_Indicator(i.df.resample(freq, label='right').mean()
                              .dropna().reindex(df.index).values.T,
                              **dict(i._opts, name=i.name,
-                                    # HACK: override `data` for its index
-                                    data=pd.Series(np.nan, index=df.index)))
+                                    # Replace saved index with the resampled one
+                                    index=df.index))
                   for i in indicators]
     assert not indicators or indicators[0].df.index.equals(df.index)
 
diff --git a/backtesting/_util.py b/backtesting/_util.py
@@ -85,13 +85,14 @@ def to_series(self):
     @property
     def s(self) -> pd.Series:
         values = np.atleast_2d(self)
-        return pd.Series(values[0], index=self._opts['data'].index, name=self.name)
+        index = self._opts['index'][:values.shape[1]]
+        return pd.Series(values[0], index=index, name=self.name)
 
     @property
     def df(self) -> pd.DataFrame:
         values = np.atleast_2d(np.asarray(self))
-        df = pd.DataFrame(values.T, index=self._opts['data'].index,
-                          columns=[self.name] * len(values))
+        index = self._opts['index'][:values.shape[1]]
+        df = pd.DataFrame(values.T, index=index, columns=[self.name] * len(values))
         return df
 
 
@@ -128,10 +129,11 @@ def _set_length(self, i):
         self.__cache.clear()
 
     def _update(self):
-        self.__arrays = {col: _Array(arr, data=self)
+        index = self.__df.index.copy()
+        self.__arrays = {col: _Array(arr, index=index)
                          for col, arr in self.__df.items()}
         # Leave index as Series because pd.Timestamp nicer API to work with
-        self.__arrays['__index'] = self.__df.index.copy()
+        self.__arrays['__index'] = index
 
     def __repr__(self):
         i = min(self.__i, len(self.__df) - 1)
diff --git a/backtesting/backtesting.py b/backtesting/backtesting.py
@@ -153,7 +153,7 @@ def init():
         value = _Indicator(value, name=name, plot=plot, overlay=overlay,
                            color=color, scatter=scatter,
                            # _Indicator.s Series accessor uses this:
-                           data=self.data)
+                           index=self.data.index)
         self._indicators.append(value)
         return value
 
diff --git a/backtesting/test/_test.py b/backtesting/test/_test.py
@@ -901,9 +901,11 @@ def next(self):
         Backtest(GOOG.iloc[:20], S).run()
 
     def test_indicators_picklable(self):
+        bt = Backtest(SHORT_DATA, SmaCross)
         with ProcessPoolExecutor() as executor:
-            stats = executor.submit(Backtest.run, Backtest(SHORT_DATA, SmaCross)).result()
+            stats = executor.submit(Backtest.run, bt).result()
         assert stats._strategy._indicators[0]._opts, '._opts and .name were not unpickled'
+        bt.plot(results=stats, resample='2d', open_browser=False)
 
 
 class TestDocs(TestCase):