Skip to content

Commit 4a23838

Browse files
authored
Merge pull request #87 from pymc-labs/quickstart-fixes-and-general-improvement
Quickstart fixes and general improvement
2 parents cce4909 + 960c8f2 commit 4a23838

17 files changed

+302
-249
lines changed

CONTRIBUTING.md

+22-1
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,31 @@ If there are autodoc issues/errors in remote builds of the docs, we need to add
7272

7373
## New releases [work in progress]
7474

75+
### Test release to `test.pypi.org` (manual)
76+
7577
1. Bump the release version in `causalpy/version.py`. This is automatically read by `setup.py` and `docs/config.py`.
78+
2. Update on test.pypi.org. _Note that this requires username and password for test.pypi.org_. In the root directory type the following:
79+
```bash
80+
rm -rf dist
81+
python setup.py sdist
82+
twine upload --repository testpypi dist/*
83+
```
84+
3. At this point the updated build is available on test.pypi.org. We can test that this is working as expected by installing (into a test environment) from test.pypi.org with
85+
86+
```bash
87+
conda create -n causalpy-test python
88+
conda activate causalpy-test
89+
python3 -m pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ causalpy
90+
```
91+
92+
4. Now load a python or ipython session and follow the quickstart instructions to confirm things work.
93+
94+
### Actual release to `pypi.org` (manual)
95+
96+
1. If not done in the previous step, bump the release version in `causalpy/version.py`. This is automatically read by `setup.py` and `docs/config.py`.
7697
2. Update on pypi.org. In the root directory:
7798
- `python setup.py sdist`
78-
- update to pypi.org with `twine upload dist/*`
99+
- update to pypi.org with `twine upload dist/*` Note that this requires username and password for pypi.org.
79100
3. Readthedocs:
80101
- Docs should be built remotely every time there is a pull request
81102
- See here https://docs.readthedocs.io/en/stable/tutorial/#versioning-documentation for versioning the docs

README.md

+4-8
Original file line numberDiff line numberDiff line change
@@ -37,27 +37,23 @@ pip install git+https://github.com/pymc-labs/CausalPy.git
3737
## Quickstart
3838

3939
```python
40-
from causalpy.pymc_experiments import RegressionDiscontinuity
41-
from causalpy.pymc_models import LinearRegression
42-
import pandas as pd
43-
import pathlib
40+
import causalpy as cp
4441

4542

4643
# Import and process data
47-
rd_data_path = pathlib.Path.cwd().parents[1] / "causalpy" / "data" / "drinking.csv"
4844
df = (
49-
pd.read_csv(rd_data_path)[["agecell", "all", "mva", "suicide"]]
45+
cp.load_data("drinking")
5046
.rename(columns={"agecell": "age"})
5147
.assign(treated=lambda df_: df_.age > 21)
5248
.dropna(axis=0)
5349
)
5450

5551
# Run the analysis
56-
result = RegressionDiscontinuity(
52+
result = cp.pymc_experiments.RegressionDiscontinuity(
5753
df,
5854
formula="all ~ 1 + age + treated",
5955
running_variable_name="age",
60-
prediction_model=LinearRegression(),
56+
prediction_model=cp.pymc_models.LinearRegression(),
6157
treatment_threshold=21,
6258
)
6359

causalpy/__init__.py

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import causalpy.pymc_experiments
2+
import causalpy.pymc_models
3+
import causalpy.skl_experiments
4+
import causalpy.skl_models
5+
6+
from .data import load_data

causalpy/data/__init__.py

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
"""Code for loading datasets."""
2+
from .datasets import load_data
3+
4+
__all__ = ["load_data"]

causalpy/data/datasets.py

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import os
2+
import pathlib
3+
4+
import pandas as pd
5+
6+
import causalpy as cp
7+
8+
DATASETS = {
9+
"banks": {"filename": "banks.csv"},
10+
"did": {"filename": "did.csv"},
11+
"drinking": {"filename": "drinking.csv"},
12+
"its": {"filename": "its.csv"},
13+
"its simple": {"filename": "its_simple.csv"},
14+
"rd": {"filename": "regression_discontinuity.csv"},
15+
"sc": {"filename": "synthetic_control.csv"},
16+
}
17+
18+
19+
def get_data_home():
20+
"""Return the path of the data directory"""
21+
return pathlib.Path(cp.__file__).parents[1] / "causalpy" / "data"
22+
23+
24+
def load_data(dataset: str = None):
25+
26+
if dataset in DATASETS:
27+
28+
data_dir = get_data_home()
29+
datafile = DATASETS[dataset]
30+
file_path = data_dir / datafile["filename"]
31+
return pd.read_csv(file_path)
32+
else:
33+
raise ValueError(f"Dataset {dataset} not found!")

causalpy/version.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.0.2"
1+
__version__ = "0.0.3"

docs/index.rst

+4-8
Original file line numberDiff line numberDiff line change
@@ -29,27 +29,23 @@ Quickstart
2929

3030
.. code-block:: python
3131
32-
from causalpy.pymc_experiments import RegressionDiscontinuity
33-
from causalpy.pymc_models import LinearRegression
34-
import pandas as pd
35-
import pathlib
32+
import causalpy as cp
3633
3734
3835
# Import and process data
39-
rd_data_path = pathlib.Path.cwd().parents[1] / "causalpy" / "data" / "drinking.csv"
4036
df = (
41-
pd.read_csv(rd_data_path)[["agecell", "all", "mva", "suicide"]]
37+
cp.load_data("drinking")
4238
.rename(columns={"agecell": "age"})
4339
.assign(treated=lambda df_: df_.age > 21)
4440
.dropna(axis=0)
4541
)
4642
4743
# Run the analysis
48-
result = RegressionDiscontinuity(
44+
result = cp.pymc_experiments.RegressionDiscontinuity(
4945
df,
5046
formula="all ~ 1 + age + treated",
5147
running_variable_name="age",
52-
prediction_model=LinearRegression(),
48+
prediction_model=cp.pymc_models.LinearRegression(),
5349
treatment_threshold=21,
5450
)
5551

docs/notebooks/did_pymc.ipynb

+11-17
Large diffs are not rendered by default.

docs/notebooks/did_pymc_banks.ipynb

+30-20
Large diffs are not rendered by default.

docs/notebooks/did_skl.ipynb

+3-28
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@
1313
"metadata": {},
1414
"outputs": [],
1515
"source": [
16-
"import pandas as pd\n",
17-
"import pathlib\n",
16+
"import causalpy as cp\n",
1817
"import arviz as az"
1918
]
2019
},
@@ -27,28 +26,13 @@
2726
"az.style.use(\"arviz-darkgrid\")"
2827
]
2928
},
30-
{
31-
"cell_type": "markdown",
32-
"metadata": {},
33-
"source": [
34-
"## Load data"
35-
]
36-
},
3729
{
3830
"cell_type": "code",
3931
"execution_count": 3,
4032
"metadata": {},
4133
"outputs": [],
4234
"source": [
43-
"did_data_path = pathlib.Path.cwd().parents[1] / \"causalpy\" / \"data\" / \"did.csv\"\n",
44-
"data = pd.read_csv(did_data_path)"
45-
]
46-
},
47-
{
48-
"cell_type": "markdown",
49-
"metadata": {},
50-
"source": [
51-
"## Run the analysis"
35+
"data = cp.load_data(\"did\")"
5236
]
5337
},
5438
{
@@ -57,26 +41,17 @@
5741
"metadata": {},
5842
"outputs": [],
5943
"source": [
60-
"from causalpy.skl_experiments import DifferenceInDifferences\n",
6144
"from sklearn.linear_model import LinearRegression\n",
6245
"\n",
63-
"# NOTE: `treated` is a deterministic function of `t` and `group`. So add this function into the formula.\n",
6446
"\n",
65-
"result = DifferenceInDifferences(\n",
47+
"result = cp.skl_experiments.DifferenceInDifferences(\n",
6648
" data,\n",
6749
" formula=\"y ~ 1 + group + t + treated:group\",\n",
6850
" time_variable_name=\"t\",\n",
6951
" prediction_model=LinearRegression(),\n",
7052
")"
7153
]
7254
},
73-
{
74-
"cell_type": "markdown",
75-
"metadata": {},
76-
"source": [
77-
"## Examine the results"
78-
]
79-
},
8055
{
8156
"cell_type": "code",
8257
"execution_count": 5,

docs/notebooks/rd_pymc.ipynb

+12-33
Large diffs are not rendered by default.

docs/notebooks/rd_pymc_drinking.ipynb

+41-39
Large diffs are not rendered by default.

docs/notebooks/rd_skl.ipynb

+88-19
Large diffs are not rendered by default.

docs/notebooks/rd_skl_drinking.ipynb

+4-21
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,8 @@
1717
"metadata": {},
1818
"outputs": [],
1919
"source": [
20-
"import pandas as pd\n",
21-
"import pathlib\n",
22-
"import arviz as az"
20+
"import arviz as az\n",
21+
"import causalpy as cp"
2322
]
2423
},
2524
{
@@ -31,45 +30,29 @@
3130
"az.style.use(\"arviz-darkgrid\")"
3231
]
3332
},
34-
{
35-
"cell_type": "markdown",
36-
"metadata": {},
37-
"source": [
38-
"## Load data"
39-
]
40-
},
4133
{
4234
"cell_type": "code",
4335
"execution_count": 3,
4436
"metadata": {},
4537
"outputs": [],
4638
"source": [
47-
"rd_data_path = pathlib.Path.cwd().parents[1] / \"causalpy\" / \"data\" / \"drinking.csv\"\n",
4839
"df = (\n",
49-
" pd.read_csv(rd_data_path)[[\"agecell\", \"all\", \"mva\", \"suicide\"]]\n",
40+
" cp.load_data(\"drinking\")\n",
5041
" .rename(columns={\"agecell\": \"age\"})\n",
5142
" .assign(treated=lambda df_: df_.age > 21)\n",
5243
" .dropna(axis=0)\n",
5344
")"
5445
]
5546
},
56-
{
57-
"cell_type": "markdown",
58-
"metadata": {},
59-
"source": [
60-
"## Linear model"
61-
]
62-
},
6347
{
6448
"cell_type": "code",
6549
"execution_count": 4,
6650
"metadata": {},
6751
"outputs": [],
6852
"source": [
69-
"from causalpy.skl_experiments import RegressionDiscontinuity\n",
7053
"from sklearn.linear_model import LinearRegression\n",
7154
"\n",
72-
"result = RegressionDiscontinuity(\n",
55+
"result = cp.skl_experiments.RegressionDiscontinuity(\n",
7356
" df,\n",
7457
" formula=\"all ~ 1 + age + treated\",\n",
7558
" running_variable_name=\"age\",\n",

docs/notebooks/sc_pymc.ipynb

+33-40
Large diffs are not rendered by default.

docs/notebooks/sc_skl.ipynb

+5-13
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@
1313
"metadata": {},
1414
"outputs": [],
1515
"source": [
16-
"import pandas as pd\n",
17-
"import pathlib\n",
16+
"import causalpy as cp\n",
1817
"import arviz as az"
1918
]
2019
},
@@ -40,10 +39,7 @@
4039
"metadata": {},
4140
"outputs": [],
4241
"source": [
43-
"sc_data_path = (\n",
44-
" pathlib.Path.cwd().parents[1] / \"causalpy\" / \"data\" / \"synthetic_control.csv\"\n",
45-
")\n",
46-
"df = pd.read_csv(sc_data_path)\n",
42+
"df = cp.load_data(\"sc\")\n",
4743
"treatment_time = 70"
4844
]
4945
},
@@ -60,15 +56,12 @@
6056
"metadata": {},
6157
"outputs": [],
6258
"source": [
63-
"from causalpy.skl_models import WeightedProportion\n",
64-
"from causalpy.skl_experiments import SyntheticControl\n",
65-
"\n",
6659
"# Note, we do not want an intercept in this model\n",
67-
"result = SyntheticControl(\n",
60+
"result = cp.skl_experiments.SyntheticControl(\n",
6861
" df,\n",
6962
" treatment_time,\n",
7063
" formula=\"actual ~ 0 + a + b + c + d + e + f + g\",\n",
71-
" prediction_model=WeightedProportion(),\n",
64+
" prediction_model=cp.skl_models.WeightedProportion(),\n",
7265
")"
7366
]
7467
},
@@ -132,11 +125,10 @@
132125
"metadata": {},
133126
"outputs": [],
134127
"source": [
135-
"from causalpy.skl_experiments import SyntheticControl\n",
136128
"from sklearn.linear_model import LinearRegression\n",
137129
"\n",
138130
"# Note, we do not want an intercept in this model\n",
139-
"result = SyntheticControl(\n",
131+
"result = cp.skl_experiments.SyntheticControl(\n",
140132
" df,\n",
141133
" treatment_time,\n",
142134
" formula=\"actual ~ 0 + a + b + c + d + e + f + g\",\n",

requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
arviz>=0.13.0rc1
1+
arviz>=0.14.0
22
graphviz
33
matplotlib>=3.5.3
44
numpy

0 commit comments

Comments
 (0)