Skip to content

Commit 4c4e4a3

Browse files
authored
[ci] use pre-commit, update actions (#61)
1 parent 6967528 commit 4c4e4a3

11 files changed

+129
-71
lines changed

.github/workflows/main.yml

+4-4
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,17 @@ jobs:
1414
- task: linting
1515
steps:
1616
- name: Checkout repository
17-
uses: actions/checkout@v2
17+
uses: actions/checkout@v4
1818
- name: Set up Python
19-
uses: conda-incubator/setup-miniconda@v2
19+
uses: conda-incubator/setup-miniconda@v3
2020
with:
2121
python-version: 3.11
2222
- name: linting
2323
if: matrix.task == 'linting'
2424
shell: bash
2525
run: |
26-
pip install --upgrade black flake8 isort nbqa
27-
make lint
26+
pip install --upgrade pre-commit
27+
pre-commit run --all-files
2828
all-tests-successful:
2929
if: always()
3030
runs-on: ubuntu-latest

.pre-commit-config.yaml

+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
---
2+
exclude: |
3+
(?x)^(
4+
LightGBM
5+
)$
6+
7+
repos:
8+
- repo: https://github.com/pre-commit/pre-commit-hooks
9+
rev: v4.6.0
10+
hooks:
11+
- id: check-toml
12+
- id: end-of-file-fixer
13+
- id: trailing-whitespace
14+
- repo: https://github.com/pycqa/isort
15+
rev: 5.13.2
16+
hooks:
17+
- id: isort
18+
name: isort (python)
19+
args: ["--settings-path", "pyproject.toml"]
20+
- repo: https://github.com/pre-commit/mirrors-mypy
21+
rev: v1.10.0
22+
hooks:
23+
- id: mypy
24+
args: ["--config-file", "pyproject.toml"]
25+
exclude: "tests"
26+
additional_dependencies:
27+
- types-requests
28+
- repo: https://github.com/astral-sh/ruff-pre-commit
29+
# Ruff version.
30+
rev: v0.4.10
31+
hooks:
32+
# Run the linter.
33+
- id: ruff
34+
args: ["--config", "pyproject.toml"]
35+
types_or: [jupyter, python]
36+
# Run the formatter.
37+
- id: ruff-format
38+
args: ["--config", "pyproject.toml"]
39+
types_or: [python, jupyter]
40+
- repo: https://github.com/maxwinterstein/shfmt-py
41+
rev: v3.7.0.1
42+
hooks:
43+
- id: shfmt
44+
args: ["--indent=4", "--space-redirects", "--write"]
45+
- repo: https://github.com/shellcheck-py/shellcheck-py
46+
rev: v0.10.0.1
47+
hooks:
48+
- id: shellcheck
49+
args: ["--exclude=SC2002"]

Makefile

-16
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,6 @@ ecr-details.json:
6868
--repository-name ${CLUSTER_IMAGE_NAME} \
6969
> ./ecr-details.json
7070

71-
.PHONY: format
72-
format:
73-
black .
74-
isort .
75-
nbqa isort .
76-
nbqa black .
77-
7871
$(LIGHTGBM_REPO):
7972
git clone --recursive https://github.com/microsoft/LightGBM.git
8073

@@ -100,15 +93,6 @@ lightgbm-unit-tests:
10093
/bin/bash -cex \
10194
"sh ./build-python.sh install --precompile && pip install pytest && pytest -vv -rA tests/python_package_test/test_dask.py"
10295

103-
.PHONY: lint
104-
lint: lint-dockerfiles
105-
isort --check .
106-
black --check --diff .
107-
flake8 --count .
108-
nbqa black --check --diff .
109-
nbqa flake8 .
110-
nbqa isort --check .
111-
11296
.PHONY: lint-dockerfiles
11397
lint-dockerfiles:
11498
for dockerfile in $$(ls | grep -E '^Dockerfile'); do \

bin/profile-example-memory-usage.sh

+8-6
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,19 @@ set -e -u -o pipefail
88

99
echo "profiling examples"
1010
mkdir -p "${PROFILING_OUTPUT_DIR}/bin"
11+
12+
# shellcheck disable=SC2044
1113
for py_script in $(find "${LIGHTGBM_HOME}/examples/python-guide" -name '*.py'); do
1214
base_filename=$(basename "${py_script}")
13-
prof_file=$(echo "${base_filename}" | sed -e 's/\.py/\.bin/g')
14-
table_file=$(echo "${base_filename}" | sed -e 's/\.py/-table\.html/g')
15-
leak_table_file=$(echo "${base_filename}" | sed -e 's/\.py/-leak-table\.html/g')
16-
flamegraph_file=$(echo "${base_filename}" | sed -e 's/\.py/-flamegraph\.html/g')
15+
prof_file="${base_filename/.py/.bin}"
16+
table_file="${base_filename/.py/-table.html}"
17+
leak_table_file="${base_filename/.py/-leak-table.html}"
18+
flamegraph_file="${base_filename/.py/-flamegraph.html}"
1719
echo " - ${base_filename}"
1820
memray run \
1921
-o "${PROFILING_OUTPUT_DIR}/bin/${prof_file}" \
20-
"${py_script}" 2>&1 > /dev/null \
21-
|| true
22+
"${py_script}" > /dev/null 2>&1 ||
23+
true
2224
memray table \
2325
-o "${PROFILING_OUTPUT_DIR}/${table_file}" \
2426
--force \

bin/profile-examples.sh

+4-3
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,16 @@
77
set -e -u -o pipefail
88

99
echo "profiling examples"
10+
# shellcheck disable=SC2044
1011
for py_script in $(find "${LIGHTGBM_HOME}/examples/python-guide" -name '*.py'); do
1112
base_filename=$(basename "${py_script}")
12-
prof_file=$(echo "${base_filename}" | sed -e 's/\.py/\.prof/g')
13+
prof_file="${base_filename/.py/.prof}"
1314
echo " - ${base_filename}"
1415
python \
1516
-Wignore \
1617
-m cProfile \
1718
-o "${PROFILING_OUTPUT_DIR}/${prof_file}" \
18-
"${py_script}" 2>&1 > /dev/null \
19-
|| true
19+
"${py_script}" > /dev/null 2>&1 ||
20+
true
2021
done
2122
echo "Done profiling examples. See '${PROFILING_OUTPUT_DIR}' for results."

jupyter_notebook_config.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# mypy: disable-error-code="name-defined"
12
c.ServerApp.token = ""
23
c.ServerApp.password = ""
34
c.ServerApp.open_browser = False

notebooks/_img/dask-horizontal.svg

+4-4
Loading

notebooks/demo-aws.ipynb

+3-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,9 @@
5151
"with open(\"../ecr-details.json\", \"r\") as f:\n",
5252
" ecr_details = json.loads(f.read())\n",
5353
"\n",
54-
"CONTAINER_IMAGE = ecr_details[\"repository\"][\"repositoryUri\"] + \":\" + os.environ[\"DASK_VERSION\"]\n",
54+
"CONTAINER_IMAGE = (\n",
55+
" ecr_details[\"repository\"][\"repositoryUri\"] + \":\" + os.environ[\"DASK_VERSION\"]\n",
56+
")\n",
5557
"print(f\"scheduler and worker image: {CONTAINER_IMAGE}\")"
5658
]
5759
},

notebooks/testing/ranker-local.ipynb

+24-11
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,12 @@
5757
" avg_gs=10,\n",
5858
" random_state=0,\n",
5959
"):\n",
60-
" \"\"\"Generate a learning-to-rank dataset - feature vectors grouped together with\n",
61-
" integer-valued graded relevance scores. Replace this with a sklearn.datasets function\n",
62-
" if ranking objective becomes supported in sklearn.datasets module.\"\"\"\n",
60+
" \"\"\"\n",
61+
" Generate a learning-to-rank dataset - feature vectors grouped\n",
62+
" together with integer-valued graded relevance scores. Replace this\n",
63+
" with a sklearn.datasets function if ranking objective becomes\n",
64+
" supported in sklearn.datasets module.\n",
65+
" \"\"\"\n",
6366
" rnd_generator = check_random_state(random_state)\n",
6467
"\n",
6568
" y_vec, group_vec = np.empty((0,), dtype=int), np.empty((0,), dtype=int)\n",
@@ -84,7 +87,8 @@
8487
" x_grid = np.linspace(0, stop=1, num=gmax + 2)\n",
8588
" X = rnd_generator.uniform(size=(n_samples, n_features))\n",
8689
"\n",
87-
" # make first n_informative features values bucketed according to relevance scores.\n",
90+
" # make first n_informative features values\n",
91+
" # bucketed according to relevance scores.\n",
8892
" def bucket_fn(z):\n",
8993
" return rnd_generator.uniform(x_grid[z], high=x_grid[z + 1])\n",
9094
"\n",
@@ -102,12 +106,14 @@
102106
" g_rle = np.array([sum([1 for _ in grp]) for _, grp in itertools.groupby(g)])\n",
103107
"\n",
104108
" if output == \"dataframe\":\n",
105-
" # add target, weight, and group to DataFrame so that partitions abide by group boundaries.\n",
109+
" # add target, weight, and group to DataFrame so that\n",
110+
" # partitions abide by group boundaries.\n",
106111
" X_df = pd.DataFrame(X, columns=[f\"feature_{i}\" for i in range(X.shape[1])])\n",
107112
" X = X_df.copy()\n",
108113
" X_df = X_df.assign(y=y, g=g, w=w)\n",
109114
"\n",
110-
" # set_index ensures partitions are based on group id. See https://bit.ly/3pAWyNw.\n",
115+
" # set_index ensures partitions are based on group id.\n",
116+
" # See https://bit.ly/3pAWyNw.\n",
111117
" X_df.set_index(\"g\", inplace=True)\n",
112118
" dX = dd.from_pandas(X_df, chunksize=chunk_size)\n",
113119
"\n",
@@ -117,12 +123,16 @@
117123
" dX = dX.drop(columns=[\"y\", \"w\"])\n",
118124
" dg = dX.index.to_series()\n",
119125
"\n",
120-
" # encode group identifiers into run-length encoding, the format LightGBMRanker is expecting\n",
126+
" # encode group identifiers into run-length encoding,\n",
127+
" # the format LightGBMRanker is expecting\n",
121128
" # so that within each partition, sum(g) = n_samples.\n",
122-
" dg = dg.map_partitions(lambda p: p.groupby(\"g\", sort=False).apply(lambda z: z.shape[0]))\n",
129+
" dg = dg.map_partitions(\n",
130+
" lambda p: p.groupby(\"g\", sort=False).apply(lambda z: z.shape[0])\n",
131+
" )\n",
123132
"\n",
124133
" elif output == \"array\":\n",
125-
" # ranking arrays: one chunk per group. Each chunk must include all columns.\n",
134+
" # ranking arrays: one chunk per group.\n",
135+
" # Each chunk must include all columns.\n",
126136
" p = X.shape[1]\n",
127137
" dX, dy, dw, dg = list(), list(), list(), list()\n",
128138
" for g_idx, rhs in enumerate(np.cumsum(g_rle)):\n",
@@ -138,7 +148,9 @@
138148
" dg = da.concatenate(dg, axis=0)\n",
139149
"\n",
140150
" else:\n",
141-
" raise ValueError(\"ranking data creation only supported for Dask arrays and dataframes\")\n",
151+
" raise ValueError(\n",
152+
" \"ranking data creation only supported for Dask arrays and dataframes\"\n",
153+
" )\n",
142154
"\n",
143155
" return X, y, w, g_rle, dX, dy, dw, dg"
144156
]
@@ -219,7 +231,8 @@
219231
"metadata": {},
220232
"outputs": [],
221233
"source": [
222-
"# relative difference between distributed ranker and local ranker spearman corr should be small.\n",
234+
"# relative difference between distributed ranker\n",
235+
"# and local ranker spearman corr should be small.\n",
223236
"lcor = spearmanr(rnkvec_local, y).correlation\n",
224237
"print(np.abs(dcor - lcor))\n",
225238
"assert np.abs(dcor - lcor) < 0.003"

pyproject.toml

+32-11
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,33 @@
1-
[tool.black]
2-
line-length = 100
3-
exclude = '''
4-
/(
5-
| LightGBM
6-
)/
7-
'''
1+
[tool.ruff.lint]
2+
select = [
3+
# flake8-bugbear
4+
"B",
5+
# flake8-comprehensions
6+
"C4",
7+
# pycodestyle
8+
"E",
9+
# pyflakes
10+
"F",
11+
# NumPy-specific rules
12+
"NPY",
13+
# pylint
14+
"PL",
15+
# flake8-return: unnecessary assignment before return
16+
"RET504",
17+
# flake8-simplify: use dict.get() instead of an if-else block
18+
"SIM401",
19+
]
820

9-
[tool.nbqa.exclude]
10-
black = "LightGBM/"
11-
flake8 = "LightGBM/"
12-
isort = "LightGBM/"
21+
[tool.ruff.lint.per-file-ignores]
22+
"*.ipynb" = [
23+
# (pylint) Unnecessary list() call
24+
"C408",
25+
# (pylint) too many arguments in function definition
26+
"PLR0913",
27+
# (pylint) Magic value used in comparison
28+
"PLR2004",
29+
]
30+
"jupyter_notebook_config.py" = [
31+
# (flake8) undefined name
32+
"F821",
33+
]

setup.cfg

-15
This file was deleted.

0 commit comments

Comments
 (0)