"""
Parses commit messages using `scipy tags <scipy-style>`_ of the form::
<tag>(<scope>): <subject>
<body>
The elements <tag>, <scope> and <body> are optional. If no tag is present, the
commit will be added to the changelog section "None" and no version increment
will be performed.
While <scope> is supported here it isn't actually part of the scipy style.
If it is missing, parentheses around it are too. The commit should then be
of the form::
<tag>: <subject>
<body>
To communicate a breaking change add "BREAKING CHANGE" into the body at the
beginning of a paragraph. Fill this paragraph with information how to migrate
from the broken behavior to the new behavior. It will be added to the
"Breaking" section of the changelog.
Supported Tags::
(
API,
DEP,
ENH,
REV,
BUG,
MAINT,
BENCH,
BLD,
)
DEV, DOC, STY, TST, REL, FEAT, TEST
Supported Changelog Sections::
breaking, feature, fix, Other, None
.. _`scipy-style`: https://docs.scipy.org/doc/scipy/reference/dev/contributor/development_workflow.html#writing-the-commit-message
"""
from __future__ import annotations
import re
from functools import reduce
from itertools import zip_longest
from re import compile as regexp
from textwrap import dedent
from typing import TYPE_CHECKING, Tuple
from git.objects.commit import Commit
from pydantic.dataclasses import dataclass
from semantic_release.commit_parser._base import CommitParser, ParserOptions
from semantic_release.commit_parser.token import (
ParsedCommit,
ParsedMessageResult,
ParseError,
ParseResult,
)
from semantic_release.commit_parser.util import (
deep_copy_commit,
force_str,
parse_paragraphs,
)
from semantic_release.enums import LevelBump
from semantic_release.errors import InvalidParserOptions
from semantic_release.globals import logger
from semantic_release.helpers import sort_numerically, text_reducer
if TYPE_CHECKING: # pragma: no cover
from git.objects.commit import Commit
def _logged_parse_error(commit: Commit, error: str) -> ParseError:
logger.debug(error)
return ParseError(commit, error=error)
tag_to_section = {
"API": "breaking",
"BENCH": "none",
"BLD": "fix",
"BUG": "fix",
"DEP": "breaking",
"DEV": "none",
"DOC": "documentation",
"ENH": "feature",
"MAINT": "fix",
"REV": "other",
"STY": "none",
"TST": "none",
"REL": "none",
# strictly speaking not part of the standard
"FEAT": "feature",
"TEST": "none",
}
[docs]
@dataclass
class ScipyParserOptions(ParserOptions):
"""
Options dataclass for ScipyCommitParser
Scipy-style commit messages follow the same format as Angular-style commit
just with different tag names.
"""
major_tags: Tuple[str, ...] = ("API", "DEP")
"""Commit-type prefixes that should result in a major release bump."""
minor_tags: Tuple[str, ...] = ("ENH", "FEAT")
"""Commit-type prefixes that should result in a minor release bump."""
patch_tags: Tuple[str, ...] = ("BLD", "BUG", "MAINT")
"""Commit-type prefixes that should result in a patch release bump."""
other_allowed_tags: Tuple[str, ...] = (
# "REV", # Revert commits are NOT Currently Supported
"DEV",
"BENCH",
"DOC",
"STY",
"TST",
"REL",
"TEST",
)
"""Commit-type prefixes that are allowed but do not result in a version bump."""
allowed_tags: Tuple[str, ...] = (
*major_tags,
*minor_tags,
*patch_tags,
*other_allowed_tags,
)
"""
All commit-type prefixes that are allowed.
These are used to identify a valid commit message. If a commit message does not start with
one of these prefixes, it will not be considered a valid commit message.
"""
# TODO: breaking v11, make consistent with AngularParserOptions
default_level_bump: LevelBump = LevelBump.NO_RELEASE
"""The minimum bump level to apply to valid commit message."""
parse_squash_commits: bool = True
"""Toggle flag for whether or not to parse squash commits"""
ignore_merge_commits: bool = True
"""Toggle flag for whether or not to ignore merge commits"""
@property
def tag_to_level(self) -> dict[str, LevelBump]:
"""A mapping of commit tags to the level bump they should result in."""
return self._tag_to_level
def __post_init__(self) -> None:
# TODO: breaking v11, remove as the name is now consistent
self.default_bump_level = self.default_level_bump
self._tag_to_level: dict[str, LevelBump] = {
str(tag): level
for tag, level in [
# we have to do a type ignore as zip_longest provides a type that is not specific enough
# for our expected output. Due to the empty second array, we know the first is always longest
# and that means no values in the first entry of the tuples will ever be a LevelBump. We
# apply a str() to make mypy happy although it will never happen.
*zip_longest(self.allowed_tags, (), fillvalue=self.default_bump_level),
*zip_longest(self.patch_tags, (), fillvalue=LevelBump.PATCH),
*zip_longest(self.minor_tags, (), fillvalue=LevelBump.MINOR),
*zip_longest(self.major_tags, (), fillvalue=LevelBump.MAJOR),
]
if "|" not in str(tag)
}
[docs]
class ScipyCommitParser(CommitParser[ParseResult, ScipyParserOptions]):
"""Parser for scipy-style commit messages"""
# TODO: Deprecate in lieu of get_default_options()
parser_options = ScipyParserOptions
def __init__(self, options: ScipyParserOptions | None = None) -> None:
super().__init__(options)
try:
commit_type_pattern = regexp(
r"(?P<type>%s)" % str.join("|", self.options.allowed_tags)
)
except re.error as err:
raise InvalidParserOptions(
str.join(
"\n",
[
f"Invalid options for {self.__class__.__name__}",
"Unable to create regular expression from configured commit-types.",
"Please check the configured commit-types and remove or escape any regular expression characters.",
],
)
) from err
self.commit_prefix = regexp(
str.join(
"",
[
f"^{commit_type_pattern.pattern}",
r"(?::[\t ]*(?P<scope>[^:\n]+))?",
r":[\t ]+",
],
)
)
self.commit_msg_pattern = regexp(
str.join(
"",
[
self.commit_prefix.pattern,
r"(?P<subject>[^\n]+)",
r"(?:\n\n(?P<text>.+))?", # commit body
],
),
flags=re.DOTALL,
)
# GitHub & Gitea use (#123), GitLab uses (!123), and BitBucket uses (pull request #123)
self.mr_selector = regexp(
r"[\t ]+\((?:pull request )?(?P<mr_number>[#!]\d+)\)[\t ]*$"
)
self.issue_selector = regexp(
str.join(
"",
[
r"^(?:clos(?:e|es|ed|ing)|fix(?:es|ed|ing)?|resolv(?:e|es|ed|ing)|implement(?:s|ed|ing)?):",
r"[\t ]+(?P<issue_predicate>.+)[\t ]*$",
],
),
flags=re.MULTILINE | re.IGNORECASE,
)
self.notice_selector = regexp(r"^NOTICE: (?P<notice>.+)$")
self.filters = {
"typo-extra-spaces": (regexp(r"(\S) +(\S)"), r"\1 \2"),
"git-header-commit": (
regexp(r"^[\t ]*commit [0-9a-f]+$\n?", flags=re.MULTILINE),
"",
),
"git-header-author": (
regexp(r"^[\t ]*Author: .+$\n?", flags=re.MULTILINE),
"",
),
"git-header-date": (
regexp(r"^[\t ]*Date: .+$\n?", flags=re.MULTILINE),
"",
),
"git-squash-heading": (
regexp(
r"^[\t ]*Squashed commit of the following:.*$\n?",
flags=re.MULTILINE,
),
"",
),
"git-squash-commit-prefix": (
regexp(
str.join(
"",
[
r"^(?:[\t ]*[*-][\t ]+|[\t ]+)?", # bullet points or indentation
commit_type_pattern.pattern + r"\b", # prior to commit type
],
),
flags=re.MULTILINE,
),
# move commit type to the start of the line
r"\1",
),
}
[docs]
@staticmethod
def get_default_options() -> ScipyParserOptions:
return ScipyParserOptions()
[docs]
def commit_body_components_separator(
self, accumulator: dict[str, list[str]], text: str
) -> dict[str, list[str]]:
if (match := self.notice_selector.match(text)) and (
notice := match.group("notice")
):
accumulator["notices"].append(notice)
return accumulator
if match := self.issue_selector.search(text):
# if match := self.issue_selector.search(text):
predicate = regexp(r",? and | *[,;/& ] *").sub(
",", match.group("issue_predicate") or ""
)
# Almost all issue trackers use a number to reference an issue so
# we use a simple regexp to validate the existence of a number which helps filter out
# any non-issue references that don't fit our expected format
has_number = regexp(r"\d+")
new_issue_refs: set[str] = set(
filter(
lambda issue_str, validator=has_number: validator.search(issue_str), # type: ignore[arg-type]
predicate.split(","),
)
)
if new_issue_refs:
accumulator["linked_issues"] = sort_numerically(
set(accumulator["linked_issues"]).union(new_issue_refs)
)
return accumulator
# Prevent appending duplicate descriptions
if text not in accumulator["descriptions"]:
accumulator["descriptions"].append(text)
return accumulator
[docs]
def parse_message(self, message: str) -> ParsedMessageResult | None:
if not (parsed := self.commit_msg_pattern.match(message)):
return None
parsed_scope = parsed.group("scope") or ""
parsed_subject = parsed.group("subject")
parsed_text = parsed.group("text")
parsed_type = parsed.group("type")
linked_merge_request = ""
if mr_match := self.mr_selector.search(parsed_subject):
linked_merge_request = mr_match.group("mr_number")
parsed_subject = self.mr_selector.sub("", parsed_subject).strip()
body_components: dict[str, list[str]] = reduce(
self.commit_body_components_separator,
[
# Insert the subject before the other paragraphs
parsed_subject,
*parse_paragraphs(parsed_text or ""),
],
{
"descriptions": [],
"notices": [],
"linked_issues": [],
},
)
level_bump = self.options.tag_to_level.get(
parsed_type, self.options.default_bump_level
)
return ParsedMessageResult(
bump=level_bump,
type=parsed_type,
category=tag_to_section.get(parsed_type, "None"),
scope=parsed_scope,
descriptions=tuple(
body_components["descriptions"]
if level_bump != LevelBump.MAJOR
else [parsed_subject]
),
breaking_descriptions=tuple(
body_components["descriptions"][1:]
if level_bump == LevelBump.MAJOR
else []
),
release_notices=tuple(body_components["notices"]),
linked_issues=tuple(body_components["linked_issues"]),
linked_merge_request=linked_merge_request,
)
[docs]
@staticmethod
def is_merge_commit(commit: Commit) -> bool:
return len(commit.parents) > 1
[docs]
def parse_commit(self, commit: Commit) -> ParseResult:
if not (parsed_msg_result := self.parse_message(force_str(commit.message))):
return _logged_parse_error(
commit,
f"Unable to parse commit message: {commit.message!r}",
)
return ParsedCommit.from_parsed_message_result(commit, parsed_msg_result)
[docs]
def parse(self, commit: Commit) -> ParseResult | list[ParseResult]:
"""
Parse a commit message
If the commit message is a squashed merge commit, it will be split into
multiple commits, each of which will be parsed separately. Single commits
will be returned as a list of a single ParseResult.
"""
if self.options.ignore_merge_commits and self.is_merge_commit(commit):
return _logged_parse_error(
commit, "Ignoring merge commit: %s" % commit.hexsha[:8]
)
separate_commits: list[Commit] = (
self.unsquash_commit(commit)
if self.options.parse_squash_commits
else [commit]
)
# Parse each commit individually if there were more than one
parsed_commits: list[ParseResult] = list(
map(self.parse_commit, separate_commits)
)
def add_linked_merge_request(
parsed_result: ParseResult, mr_number: str
) -> ParseResult:
return (
parsed_result
if not isinstance(parsed_result, ParsedCommit)
else ParsedCommit(
**{
**parsed_result._asdict(),
"linked_merge_request": mr_number,
}
)
)
# TODO: improve this for other VCS systems other than GitHub & BitBucket
# Github works as the first commit in a squash merge commit has the PR number
# appended to the first line of the commit message
lead_commit = next(iter(parsed_commits))
if isinstance(lead_commit, ParsedCommit) and lead_commit.linked_merge_request:
# If the first commit has linked merge requests, assume all commits
# are part of the same PR and add the linked merge requests to all
# parsed commits
parsed_commits = [
lead_commit,
*map(
lambda parsed_result, mr=lead_commit.linked_merge_request: ( # type: ignore[misc]
add_linked_merge_request(parsed_result, mr)
),
parsed_commits[1:],
),
]
elif isinstance(lead_commit, ParseError) and (
mr_match := self.mr_selector.search(force_str(lead_commit.message))
):
# Handle BitBucket Squash Merge Commits (see #1085), which have non angular commit
# format but include the PR number in the commit subject that we want to extract
linked_merge_request = mr_match.group("mr_number")
# apply the linked MR to all commits
parsed_commits = [
add_linked_merge_request(parsed_result, linked_merge_request)
for parsed_result in parsed_commits
]
return parsed_commits
[docs]
def unsquash_commit(self, commit: Commit) -> list[Commit]:
# GitHub EXAMPLE:
# feat(changelog): add autofit_text_width filter to template environment (#1062)
#
# This change adds an equivalent style formatter that can apply a text alignment
# to a maximum width and also maintain an indent over paragraphs of text
#
# * docs(changelog-templates): add definition & usage of autofit_text_width template filter
#
# * test(changelog-context): add test cases to check autofit_text_width filter use
#
# `git merge --squash` EXAMPLE:
# Squashed commit of the following:
#
# commit 63ec09b9e844e616dcaa7bae35a0b66671b59fbb
# Author: codejedi365 <codejedi365@gmail.com>
# Date: Sun Oct 13 12:05:23 2024 -0600
#
# feat(release-config): some commit subject
#
# Return a list of artificial commits (each with a single commit message)
return [
# create a artificial commit object (copy of original but with modified message)
Commit(
**{
**deep_copy_commit(commit),
"message": commit_msg,
}
)
for commit_msg in self.unsquash_commit_message(force_str(commit.message))
] or [commit]
[docs]
def unsquash_commit_message(self, message: str) -> list[str]:
normalized_message = message.replace("\r", "").strip()
# split by obvious separate commits (applies to manual git squash merges)
obvious_squashed_commits = self.filters["git-header-commit"][0].split(
normalized_message
)
separate_commit_msgs: list[str] = reduce(
lambda all_msgs, msgs: all_msgs + msgs,
map(self._find_squashed_commits_in_str, obvious_squashed_commits),
[],
)
return list(filter(None, separate_commit_msgs))
def _find_squashed_commits_in_str(self, message: str) -> list[str]:
separate_commit_msgs: list[str] = []
current_msg = ""
for paragraph in filter(None, message.strip().split("\n\n")):
# Apply filters to normalize the paragraph
clean_paragraph = reduce(text_reducer, self.filters.values(), paragraph)
# remove any filtered (and now empty) paragraphs (ie. the git headers)
if not clean_paragraph.strip():
continue
# Check if the paragraph is the start of a new angular commit
if not self.commit_prefix.search(clean_paragraph):
if not separate_commit_msgs and not current_msg:
# if there are no separate commit messages and no current message
# then this is the first commit message
current_msg = dedent(clean_paragraph)
continue
# append the paragraph as part of the previous commit message
if current_msg:
current_msg += f"\n\n{dedent(clean_paragraph)}"
# else: drop the paragraph
continue
# Since we found the start of the new commit, store any previous commit
# message separately and start the new commit message
if current_msg:
separate_commit_msgs.append(current_msg)
current_msg = clean_paragraph
return [*separate_commit_msgs, current_msg]