2020-10-30 03:50:54 -04:00
|
|
|
#
|
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
|
|
# this work for additional information regarding copyright ownership.
|
|
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
|
# (the "License"); you may not use this file except in compliance with
|
|
|
|
# the License. You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
2020-12-05 03:55:31 -05:00
|
|
|
import csv as lib_csv
|
2020-10-30 03:50:54 -04:00
|
|
|
import os
|
|
|
|
import re
|
2020-12-05 03:55:31 -05:00
|
|
|
import sys
|
2020-10-30 03:50:54 -04:00
|
|
|
from dataclasses import dataclass
|
2020-12-05 03:55:31 -05:00
|
|
|
from typing import Any, Dict, Iterator, List, Optional, Union
|
2020-10-30 03:50:54 -04:00
|
|
|
|
|
|
|
import click
|
2021-07-21 14:46:43 -04:00
|
|
|
from click.core import Context
|
2020-10-30 03:50:54 -04:00
|
|
|
|
2021-02-02 04:24:40 -05:00
|
|
|
try:
|
2021-03-17 11:55:46 -04:00
|
|
|
from github import BadCredentialsException, Github, PullRequest, Repository
|
2021-02-02 04:24:40 -05:00
|
|
|
except ModuleNotFoundError:
|
|
|
|
print("PyGithub is a required package for this script")
|
|
|
|
exit(1)
|
|
|
|
|
|
|
|
SUPERSET_REPO = "apache/superset"
|
2021-03-17 11:55:46 -04:00
|
|
|
SUPERSET_PULL_REQUEST_TYPES = r"^(fix|feat|chore|refactor|docs|build|ci|/gmi)"
|
|
|
|
SUPERSET_RISKY_LABELS = r"^(blocking|risk|hold|revert|security vulnerability)"
|
2021-02-02 04:24:40 -05:00
|
|
|
|
2020-10-30 03:50:54 -04:00
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class GitLog:
|
|
|
|
"""
|
|
|
|
Represents a git log entry
|
|
|
|
"""
|
|
|
|
|
|
|
|
sha: str
|
|
|
|
author: str
|
|
|
|
time: str
|
|
|
|
message: str
|
|
|
|
pr_number: Union[int, None] = None
|
2020-12-05 03:55:31 -05:00
|
|
|
author_email: str = ""
|
2020-10-30 03:50:54 -04:00
|
|
|
|
|
|
|
def __eq__(self, other: object) -> bool:
|
2021-07-21 14:46:43 -04:00
|
|
|
"""A log entry is considered equal if it has the same PR number"""
|
2020-10-30 03:50:54 -04:00
|
|
|
if isinstance(other, self.__class__):
|
|
|
|
return other.pr_number == self.pr_number
|
|
|
|
return False
|
|
|
|
|
2020-12-05 03:55:31 -05:00
|
|
|
def __repr__(self) -> str:
|
2020-10-30 03:50:54 -04:00
|
|
|
return f"[{self.pr_number}]: {self.message} {self.time} {self.author}"
|
|
|
|
|
|
|
|
|
|
|
|
class GitChangeLog:
|
|
|
|
"""
|
|
|
|
Helper class to output a list of logs entries on a superset changelog format
|
|
|
|
|
|
|
|
We want to map a git author to a github login, for that we call github's API
|
|
|
|
"""
|
|
|
|
|
2021-02-02 04:24:40 -05:00
|
|
|
def __init__(
|
2021-03-17 11:55:46 -04:00
|
|
|
self,
|
|
|
|
version: str,
|
|
|
|
logs: List[GitLog],
|
|
|
|
access_token: Optional[str] = None,
|
|
|
|
risk: Optional[bool] = False,
|
2021-02-02 04:24:40 -05:00
|
|
|
) -> None:
|
2020-10-30 03:50:54 -04:00
|
|
|
self._version = version
|
|
|
|
self._logs = logs
|
2021-03-17 11:55:46 -04:00
|
|
|
self._pr_logs_with_details: Dict[int, Dict[str, Any]] = {}
|
2020-10-30 03:50:54 -04:00
|
|
|
self._github_login_cache: Dict[str, Optional[str]] = {}
|
2021-03-17 11:55:46 -04:00
|
|
|
self._github_prs: Dict[int, Any] = {}
|
2020-10-30 03:50:54 -04:00
|
|
|
self._wait = 10
|
2021-02-02 04:24:40 -05:00
|
|
|
github_token = access_token or os.environ.get("GITHUB_TOKEN")
|
|
|
|
self._github = Github(github_token)
|
2021-03-17 11:55:46 -04:00
|
|
|
self._show_risk = risk
|
|
|
|
self._superset_repo: Repository = None
|
2020-10-30 03:50:54 -04:00
|
|
|
|
2021-02-02 04:24:40 -05:00
|
|
|
def _fetch_github_pr(self, pr_number: int) -> PullRequest:
|
2020-10-30 03:50:54 -04:00
|
|
|
"""
|
|
|
|
Fetches a github PR info
|
|
|
|
"""
|
|
|
|
try:
|
2021-02-02 04:24:40 -05:00
|
|
|
github_repo = self._github.get_repo(SUPERSET_REPO)
|
2021-03-17 11:55:46 -04:00
|
|
|
self._superset_repo = github_repo
|
|
|
|
pull_request = self._github_prs.get(pr_number)
|
|
|
|
if not pull_request:
|
|
|
|
pull_request = github_repo.get_pull(pr_number)
|
|
|
|
self._github_prs[pr_number] = pull_request
|
2021-02-02 04:24:40 -05:00
|
|
|
except BadCredentialsException as ex:
|
|
|
|
print(
|
|
|
|
f"Bad credentials to github provided"
|
|
|
|
f" use access_token parameter or set GITHUB_TOKEN"
|
|
|
|
)
|
|
|
|
sys.exit(1)
|
|
|
|
|
2021-03-17 11:55:46 -04:00
|
|
|
return pull_request
|
2020-10-30 03:50:54 -04:00
|
|
|
|
|
|
|
def _get_github_login(self, git_log: GitLog) -> Optional[str]:
|
|
|
|
"""
|
|
|
|
Tries to fetch a github login (username) from a git author
|
|
|
|
"""
|
|
|
|
author_name = git_log.author
|
|
|
|
github_login = self._github_login_cache.get(author_name)
|
|
|
|
if github_login:
|
|
|
|
return github_login
|
2020-12-05 03:55:31 -05:00
|
|
|
if git_log.pr_number:
|
|
|
|
pr_info = self._fetch_github_pr(git_log.pr_number)
|
|
|
|
if pr_info:
|
2021-02-02 04:24:40 -05:00
|
|
|
github_login = pr_info.user.login
|
2020-12-05 03:55:31 -05:00
|
|
|
else:
|
|
|
|
github_login = author_name
|
2020-10-30 03:50:54 -04:00
|
|
|
# set cache
|
|
|
|
self._github_login_cache[author_name] = github_login
|
|
|
|
return github_login
|
|
|
|
|
2021-03-17 11:55:46 -04:00
|
|
|
def _has_commit_migrations(self, git_sha: str) -> bool:
|
|
|
|
commit = self._superset_repo.get_commit(sha=git_sha)
|
|
|
|
return any(
|
|
|
|
"superset/migrations/versions/" in file.filename for file in commit.files
|
|
|
|
)
|
|
|
|
|
|
|
|
def _get_pull_request_details(self, git_log: GitLog) -> Dict[str, Any]:
|
|
|
|
pr_number = git_log.pr_number
|
|
|
|
if pr_number:
|
|
|
|
detail = self._pr_logs_with_details.get(pr_number)
|
|
|
|
if detail:
|
|
|
|
return detail
|
|
|
|
pr_info = self._fetch_github_pr(pr_number)
|
|
|
|
|
|
|
|
has_migrations = self._has_commit_migrations(git_log.sha)
|
|
|
|
title = pr_info.title if pr_info else git_log.message
|
|
|
|
pr_type = re.match(SUPERSET_PULL_REQUEST_TYPES, title)
|
|
|
|
if pr_type:
|
|
|
|
pr_type = pr_type.group().strip('"')
|
|
|
|
|
|
|
|
labels = (" | ").join([label.name for label in pr_info.labels])
|
|
|
|
is_risky = self._is_risk_pull_request(pr_info.labels)
|
|
|
|
detail = {
|
|
|
|
"id": pr_number,
|
|
|
|
"has_migrations": has_migrations,
|
|
|
|
"labels": labels,
|
|
|
|
"title": title,
|
|
|
|
"type": pr_type,
|
|
|
|
"is_risky": is_risky or has_migrations,
|
|
|
|
}
|
|
|
|
|
|
|
|
if pr_number:
|
|
|
|
self._pr_logs_with_details[pr_number] = detail
|
|
|
|
|
|
|
|
return detail
|
|
|
|
|
|
|
|
def _is_risk_pull_request(self, labels: List[Any]) -> bool:
|
|
|
|
for label in labels:
|
|
|
|
risk_label = re.match(SUPERSET_RISKY_LABELS, label.name)
|
|
|
|
if risk_label is not None:
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
2020-12-05 03:55:31 -05:00
|
|
|
def _get_changelog_version_head(self) -> str:
|
2020-10-30 03:50:54 -04:00
|
|
|
return f"### {self._version} ({self._logs[0].time})"
|
|
|
|
|
2021-03-17 11:55:46 -04:00
|
|
|
def _parse_change_log(
|
|
|
|
self, changelog: Dict[str, str], pr_info: Dict[str, str], github_login: str,
|
2021-07-21 14:46:43 -04:00
|
|
|
) -> None:
|
2021-03-17 11:55:46 -04:00
|
|
|
formatted_pr = (
|
|
|
|
f"- [#{pr_info.get('id')}]"
|
|
|
|
f"(https://github.com/{SUPERSET_REPO}/pull/{pr_info.get('id')}) "
|
|
|
|
f"{pr_info.get('title')} (@{github_login})\n"
|
|
|
|
)
|
|
|
|
if pr_info.get("has_migrations"):
|
|
|
|
changelog["Database Migrations"] += formatted_pr
|
|
|
|
elif pr_info.get("type") == "fix":
|
|
|
|
changelog["Fixes"] += formatted_pr
|
|
|
|
elif pr_info.get("type") == "feat":
|
|
|
|
changelog["Features"] += formatted_pr
|
|
|
|
else:
|
|
|
|
changelog["Others"] += formatted_pr
|
|
|
|
|
2020-12-05 03:55:31 -05:00
|
|
|
def __repr__(self) -> str:
|
2020-10-30 03:50:54 -04:00
|
|
|
result = f"\n{self._get_changelog_version_head()}\n"
|
2021-03-17 11:55:46 -04:00
|
|
|
changelog = {
|
|
|
|
"Database Migrations": "\n",
|
|
|
|
"Features": "\n",
|
|
|
|
"Fixes": "\n",
|
|
|
|
"Others": "\n",
|
|
|
|
}
|
2020-10-30 03:50:54 -04:00
|
|
|
for i, log in enumerate(self._logs):
|
|
|
|
github_login = self._get_github_login(log)
|
2021-03-17 11:55:46 -04:00
|
|
|
pr_info = self._get_pull_request_details(log)
|
|
|
|
|
2020-10-30 03:50:54 -04:00
|
|
|
if not github_login:
|
|
|
|
github_login = log.author
|
2021-03-17 11:55:46 -04:00
|
|
|
|
|
|
|
if self._show_risk:
|
|
|
|
if pr_info.get("is_risky"):
|
|
|
|
result += (
|
|
|
|
f"- [#{log.pr_number}]"
|
|
|
|
f"(https://github.com/{SUPERSET_REPO}/pull/{log.pr_number}) "
|
|
|
|
f"{pr_info.get('title')} (@{github_login}) "
|
|
|
|
f"{pr_info.get('labels')} \n"
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
self._parse_change_log(changelog, pr_info, github_login)
|
|
|
|
|
2020-10-30 03:50:54 -04:00
|
|
|
print(f"\r {i}/{len(self._logs)}", end="", flush=True)
|
2021-03-17 11:55:46 -04:00
|
|
|
|
|
|
|
if self._show_risk:
|
|
|
|
return result
|
|
|
|
|
|
|
|
for key in changelog:
|
|
|
|
result += f"**{key}** {changelog[key]}\n"
|
2020-10-30 03:50:54 -04:00
|
|
|
return result
|
|
|
|
|
2020-12-05 03:55:31 -05:00
|
|
|
def __iter__(self) -> Iterator[Dict[str, Any]]:
|
|
|
|
for log in self._logs:
|
|
|
|
yield {
|
|
|
|
"pr_number": log.pr_number,
|
2021-02-02 04:24:40 -05:00
|
|
|
"pr_link": f"https://github.com/{SUPERSET_REPO}/pull/"
|
2020-12-05 03:55:31 -05:00
|
|
|
f"{log.pr_number}",
|
|
|
|
"message": log.message,
|
|
|
|
"time": log.time,
|
|
|
|
"author": log.author,
|
|
|
|
"email": log.author_email,
|
|
|
|
"sha": log.sha,
|
|
|
|
}
|
|
|
|
|
2020-10-30 03:50:54 -04:00
|
|
|
|
|
|
|
class GitLogs:
|
|
|
|
"""
|
|
|
|
Manages git log entries from a specific branch/tag
|
|
|
|
|
|
|
|
Can compare git log entries by PR number
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, git_ref: str) -> None:
|
|
|
|
self._git_ref = git_ref
|
|
|
|
self._logs: List[GitLog] = []
|
|
|
|
|
|
|
|
@property
|
|
|
|
def git_ref(self) -> str:
|
|
|
|
return self._git_ref
|
|
|
|
|
|
|
|
@property
|
|
|
|
def logs(self) -> List[GitLog]:
|
|
|
|
return self._logs
|
|
|
|
|
2020-12-05 03:55:31 -05:00
|
|
|
def fetch(self) -> None:
|
2020-10-30 03:50:54 -04:00
|
|
|
self._logs = list(map(self._parse_log, self._git_logs()))[::-1]
|
|
|
|
|
|
|
|
def diff(self, git_logs: "GitLogs") -> List[GitLog]:
|
|
|
|
return [log for log in git_logs.logs if log not in self._logs]
|
|
|
|
|
2020-12-05 03:55:31 -05:00
|
|
|
def __repr__(self) -> str:
|
2020-10-30 03:50:54 -04:00
|
|
|
return f"{self._git_ref}, Log count:{len(self._logs)}"
|
|
|
|
|
2020-12-05 03:55:31 -05:00
|
|
|
@staticmethod
|
|
|
|
def _git_get_current_head() -> str:
|
2020-10-30 03:50:54 -04:00
|
|
|
output = os.popen("git status | head -1").read()
|
|
|
|
match = re.match("(?:HEAD detached at|On branch) (.*)", output)
|
|
|
|
if not match:
|
|
|
|
return ""
|
|
|
|
return match.group(1)
|
|
|
|
|
2020-12-05 03:55:31 -05:00
|
|
|
def _git_checkout(self, git_ref: str) -> None:
|
2020-10-30 03:50:54 -04:00
|
|
|
os.popen(f"git checkout {git_ref}").read()
|
|
|
|
current_head = self._git_get_current_head()
|
|
|
|
if current_head != git_ref:
|
|
|
|
print(f"Could not checkout {git_ref}")
|
2020-12-05 03:55:31 -05:00
|
|
|
sys.exit(1)
|
2020-10-30 03:50:54 -04:00
|
|
|
|
|
|
|
def _git_logs(self) -> List[str]:
|
|
|
|
# let's get current git ref so we can revert it back
|
|
|
|
current_git_ref = self._git_get_current_head()
|
|
|
|
self._git_checkout(self._git_ref)
|
|
|
|
output = (
|
2020-12-05 03:55:31 -05:00
|
|
|
os.popen('git --no-pager log --pretty=format:"%h|%an|%ae|%ad|%s|"')
|
2020-10-30 03:50:54 -04:00
|
|
|
.read()
|
|
|
|
.split("\n")
|
|
|
|
)
|
|
|
|
# revert to git ref, let's be nice
|
|
|
|
self._git_checkout(current_git_ref)
|
|
|
|
return output
|
|
|
|
|
2020-12-05 03:55:31 -05:00
|
|
|
@staticmethod
|
|
|
|
def _parse_log(log_item: str) -> GitLog:
|
2020-10-30 03:50:54 -04:00
|
|
|
pr_number = None
|
|
|
|
split_log_item = log_item.split("|")
|
|
|
|
# parse the PR number from the log message
|
2020-12-05 03:55:31 -05:00
|
|
|
match = re.match(r".*\(\#(\d*)\)", split_log_item[4])
|
2020-10-30 03:50:54 -04:00
|
|
|
if match:
|
|
|
|
pr_number = int(match.group(1))
|
|
|
|
return GitLog(
|
|
|
|
sha=split_log_item[0],
|
|
|
|
author=split_log_item[1],
|
2020-12-05 03:55:31 -05:00
|
|
|
author_email=split_log_item[2],
|
|
|
|
time=split_log_item[3],
|
|
|
|
message=split_log_item[4],
|
2020-10-30 03:50:54 -04:00
|
|
|
pr_number=pr_number,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class BaseParameters:
|
|
|
|
previous_logs: GitLogs
|
|
|
|
current_logs: GitLogs
|
|
|
|
|
|
|
|
|
|
|
|
def print_title(message: str) -> None:
|
|
|
|
print(f"{50*'-'}")
|
|
|
|
print(message)
|
|
|
|
print(f"{50*'-'}")
|
|
|
|
|
|
|
|
|
|
|
|
@click.group()
|
|
|
|
@click.pass_context
|
2020-12-05 03:55:31 -05:00
|
|
|
@click.option("--previous_version", help="The previous release version", required=True)
|
|
|
|
@click.option("--current_version", help="The current release version", required=True)
|
2021-07-21 14:46:43 -04:00
|
|
|
def cli(ctx: Context, previous_version: str, current_version: str) -> None:
|
|
|
|
"""Welcome to change log generator"""
|
2020-10-30 03:50:54 -04:00
|
|
|
previous_logs = GitLogs(previous_version)
|
|
|
|
current_logs = GitLogs(current_version)
|
|
|
|
previous_logs.fetch()
|
|
|
|
current_logs.fetch()
|
|
|
|
base_parameters = BaseParameters(previous_logs, current_logs)
|
|
|
|
ctx.obj = base_parameters
|
|
|
|
|
|
|
|
|
|
|
|
@cli.command("compare")
|
|
|
|
@click.pass_obj
|
2020-12-05 03:55:31 -05:00
|
|
|
def compare(base_parameters: BaseParameters) -> None:
|
2021-07-21 14:46:43 -04:00
|
|
|
"""Compares both versions (by PR)"""
|
2020-10-30 03:50:54 -04:00
|
|
|
previous_logs = base_parameters.previous_logs
|
|
|
|
current_logs = base_parameters.current_logs
|
|
|
|
print_title(
|
|
|
|
f"Pull requests from " f"{current_logs.git_ref} not in {previous_logs.git_ref}"
|
|
|
|
)
|
|
|
|
previous_diff_logs = previous_logs.diff(current_logs)
|
|
|
|
for diff_log in previous_diff_logs:
|
|
|
|
print(f"{diff_log}")
|
|
|
|
|
|
|
|
print_title(
|
|
|
|
f"Pull requests from " f"{previous_logs.git_ref} not in {current_logs.git_ref}"
|
|
|
|
)
|
|
|
|
current_diff_logs = current_logs.diff(previous_logs)
|
|
|
|
for diff_log in current_diff_logs:
|
|
|
|
print(f"{diff_log}")
|
|
|
|
|
|
|
|
|
|
|
|
@cli.command("changelog")
|
2020-12-05 03:55:31 -05:00
|
|
|
@click.option(
|
|
|
|
"--csv", help="The csv filename to export the changelog to",
|
|
|
|
)
|
2021-02-02 04:24:40 -05:00
|
|
|
@click.option(
|
|
|
|
"--access_token",
|
|
|
|
help="The github access token,"
|
|
|
|
" if not provided will try to fetch from GITHUB_TOKEN env var",
|
|
|
|
)
|
2021-03-17 11:55:46 -04:00
|
|
|
@click.option("--risk", is_flag=True, help="show all pull requests with risky labels")
|
2020-10-30 03:50:54 -04:00
|
|
|
@click.pass_obj
|
2021-03-17 11:55:46 -04:00
|
|
|
def change_log(
|
|
|
|
base_parameters: BaseParameters, csv: str, access_token: str, risk: bool
|
|
|
|
) -> None:
|
2021-07-21 14:46:43 -04:00
|
|
|
"""Outputs a changelog (by PR)"""
|
2020-10-30 03:50:54 -04:00
|
|
|
previous_logs = base_parameters.previous_logs
|
|
|
|
current_logs = base_parameters.current_logs
|
|
|
|
previous_diff_logs = previous_logs.diff(current_logs)
|
2021-02-02 04:24:40 -05:00
|
|
|
logs = GitChangeLog(
|
2021-03-17 11:55:46 -04:00
|
|
|
current_logs.git_ref,
|
|
|
|
previous_diff_logs[::-1],
|
|
|
|
access_token=access_token,
|
|
|
|
risk=risk,
|
2021-02-02 04:24:40 -05:00
|
|
|
)
|
2020-12-05 03:55:31 -05:00
|
|
|
if csv:
|
|
|
|
with open(csv, "w") as csv_file:
|
|
|
|
log_items = list(logs)
|
|
|
|
field_names = log_items[0].keys()
|
2021-08-13 23:31:45 -04:00
|
|
|
writer = lib_csv.DictWriter( # type: ignore
|
2020-12-05 03:55:31 -05:00
|
|
|
csv_file,
|
|
|
|
delimiter=",",
|
|
|
|
quotechar='"',
|
|
|
|
quoting=lib_csv.QUOTE_ALL,
|
2021-08-13 23:31:45 -04:00
|
|
|
fieldnames=field_names, # type: ignore
|
2020-12-05 03:55:31 -05:00
|
|
|
)
|
|
|
|
writer.writeheader()
|
|
|
|
for log in logs:
|
|
|
|
writer.writerow(log)
|
|
|
|
else:
|
|
|
|
print("Fetching github usernames, this may take a while:")
|
|
|
|
print(logs)
|
2020-10-30 03:50:54 -04:00
|
|
|
|
|
|
|
|
|
|
|
cli()
|