Skip to content

Commit 19af952

Browse files
authored
Add Python script for generating changelog content (#6391)
1 parent aba4340 commit 19af952

File tree

2 files changed

+141
-6
lines changed

2 files changed

+141
-6
lines changed

dev/release/README.md

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -76,14 +76,35 @@ PyPI.
7676

7777
### Change Log
7878

79-
We maintain a `CHANGELOG.md` so our users know what has been
80-
changed between releases.
79+
We maintain a `CHANGELOG.md` so our users know what has been changed between releases.
8180

82-
The CHANGELOG is managed automatically using
83-
[update_change_log.sh](https://github.com/apache/arrow-datafusion/blob/main/dev/release/update_change_log.sh)
81+
The changelog is generated using a Python script:
8482

85-
This script creates a changelog using GitHub PRs and issues based on the labels
86-
associated with them.
83+
```bash
84+
$ GITHUB_TOKEN=<TOKEN> ./dev/release/generate-changelog.py apache/arrow-datafusion 24.0.0 HEAD > dev/changelog/25.0.0.md
85+
```
86+
87+
This script creates a changelog from GitHub PRs based on the labels associated with them as well as looking for
88+
titles starting with `feat:`, `fix:`, or `docs:` . The script will produce output similar to:
89+
90+
```
91+
Fetching list of commits between 24.0.0 and HEAD
92+
Fetching pull requests
93+
Categorizing pull requests
94+
Generating changelog content
95+
```
96+
97+
This process is not fully automated, so there are some additional manual steps:
98+
99+
- Add the ASF header to the generated file
100+
- Add a link to this changelog from the top-level `/datafusion/CHANGELOG.md`
101+
- Add the following content (copy from the previous version's changelog and update as appropriate:
102+
103+
```
104+
## [24.0.0](https://github.com/apache/arrow-datafusion/tree/24.0.0) (2023-05-06)
105+
106+
[Full Changelog](https://github.com/apache/arrow-datafusion/compare/23.0.0...24.0.0)
107+
```
87108

88109
## Prepare release commits and PR
89110

dev/release/generate-changelog.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
#!/usr/bin/env python
2+
3+
# Licensed to the Apache Software Foundation (ASF) under one or more
4+
# contributor license agreements. See the NOTICE file distributed with
5+
# this work for additional information regarding copyright ownership.
6+
# The ASF licenses this file to You under the Apache License, Version 2.0
7+
# (the "License"); you may not use this file except in compliance with
8+
# the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
18+
import argparse
19+
import sys
20+
from github import Github
21+
import os
22+
import re
23+
24+
25+
def print_pulls(repo_name, title, pulls):
26+
if len(pulls) > 0:
27+
print("**{}:**".format(title))
28+
print()
29+
for (pull, commit) in pulls:
30+
url = "https://github.com/{}/pull/{}".format(repo_name, pull.number)
31+
print("- {} [#{}]({}) ({})".format(pull.title, pull.number, url, commit.author.login))
32+
print()
33+
34+
35+
def generate_changelog(repo, repo_name, tag1, tag2):
36+
37+
# get a list of commits between two tags
38+
print(f"Fetching list of commits between {tag1} and {tag2}", file=sys.stderr)
39+
comparison = repo.compare(tag1, tag2)
40+
41+
# get the pull requests for these commits
42+
print("Fetching pull requests", file=sys.stderr)
43+
unique_pulls = []
44+
all_pulls = []
45+
for commit in comparison.commits:
46+
pulls = commit.get_pulls()
47+
for pull in pulls:
48+
# there can be multiple commits per PR if squash merge is not being used and
49+
# in this case we should get all the author names, but for now just pick one
50+
if pull.number not in unique_pulls:
51+
unique_pulls.append(pull.number)
52+
all_pulls.append((pull, commit))
53+
54+
# we split the pulls into categories
55+
#TODO: make categories configurable
56+
breaking = []
57+
bugs = []
58+
docs = []
59+
enhancements = []
60+
61+
# categorize the pull requests based on GitHub labels
62+
print("Categorizing pull requests", file=sys.stderr)
63+
for (pull, commit) in all_pulls:
64+
65+
# see if PR title uses Conventional Commits
66+
cc_type = ''
67+
cc_scope = ''
68+
cc_breaking = ''
69+
parts = re.findall(r'^([a-z]+)(\([a-z]+\))?(!)?:', pull.title)
70+
if len(parts) == 1:
71+
parts_tuple = parts[0]
72+
cc_type = parts_tuple[0] # fix, feat, docs, chore
73+
cc_scope = parts_tuple[1] # component within project
74+
cc_breaking = parts_tuple[2] == '!'
75+
76+
labels = [label.name for label in pull.labels]
77+
#print(pull.number, labels, parts, file=sys.stderr)
78+
if 'api change' in labels or cc_breaking:
79+
breaking.append((pull, commit))
80+
elif 'bug' in labels or cc_type == 'fix':
81+
bugs.append((pull, commit))
82+
elif 'enhancement' in labels or cc_type == 'feat':
83+
enhancements.append((pull, commit))
84+
elif 'documentation' in labels or cc_type == 'docs':
85+
docs.append((pull, commit))
86+
87+
# produce the changelog content
88+
print("Generating changelog content", file=sys.stderr)
89+
print_pulls(repo_name, "Breaking changes", breaking)
90+
print_pulls(repo_name, "Implemented enhancements", enhancements)
91+
print_pulls(repo_name, "Fixed bugs", bugs)
92+
print_pulls(repo_name, "Documentation updates", docs)
93+
print_pulls(repo_name, "Merged pull requests", all_pulls)
94+
95+
96+
def cli(args=None):
97+
"""Process command line arguments."""
98+
if not args:
99+
args = sys.argv[1:]
100+
101+
parser = argparse.ArgumentParser()
102+
parser.add_argument("project", help="The project name e.g. apache/arrow-datafusion")
103+
parser.add_argument("tag1", help="The previous release tag")
104+
parser.add_argument("tag2", help="The current release tag")
105+
args = parser.parse_args()
106+
107+
token = os.getenv("GITHUB_TOKEN")
108+
109+
g = Github(token)
110+
repo = g.get_repo(args.project)
111+
generate_changelog(repo, args.project, args.tag1, args.tag2)
112+
113+
if __name__ == "__main__":
114+
cli()

0 commit comments

Comments
 (0)