Coverage for src / python_commitlint / parser.py: 93%

68 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-04-28 02:54 +0000

1"""Parses raw commit message text into structured :class:`CommitMessage` values.""" 

2 

3import re 

4 

5from python_commitlint.core.models import CommitMessage 

6 

7 

8class ConventionalCommitParser: 

9 """Parses Conventional Commits messages into :class:`CommitMessage`. 

10 

11 The parser extracts type, optional scope, breaking-change marker, and 

12 subject from the header line, and separates the body from the footer 

13 block when present. Messages that do not match the conventional pattern 

14 are returned with empty type and subject but with the original raw 

15 text and header preserved. 

16 """ 

17 

18 HEADER_PATTERN = re.compile( 

19 r"^(?P<type>\w+)" 

20 r"(?:\((?P<scope>[^\)]+)\))?" 

21 r"(?P<breaking>!)?" 

22 r":\s*" 

23 r"(?P<subject>.+?)$" 

24 ) 

25 BREAKING_FOOTER_PATTERN = re.compile( 

26 r"^BREAKING[- ]CHANGE:\s*(.+)", re.MULTILINE 

27 ) 

28 _FOOTER_LINE_PATTERNS: tuple[re.Pattern[str], ...] = ( 

29 re.compile(r"^BREAKING[- ]CHANGE:"), 

30 re.compile(r"^[\w-]+:\s+"), 

31 re.compile(r"^[\w-]+\s+#\d+"), 

32 ) 

33 

34 def parse(self, message: str) -> CommitMessage: 

35 """Parse ``message`` into a :class:`CommitMessage`. 

36 

37 Args: 

38 message: Raw commit message text. May be a single line or 

39 contain a body and footer separated by blank lines. 

40 

41 Returns: 

42 A :class:`CommitMessage` populated with whichever fields 

43 could be extracted. Non-conventional messages return an 

44 empty-typed result with the original ``raw`` and ``header``. 

45 """ 

46 lines = message.split("\n") 

47 header = self._extract_header(lines) 

48 body = self._extract_body(lines) 

49 footer = self._extract_footer(lines) 

50 

51 match = self.HEADER_PATTERN.match(header) 

52 if not match: 

53 return CommitMessage( 

54 raw=message, 

55 header=header, 

56 body=body, 

57 footer=footer, 

58 ) 

59 

60 type_ = match.group("type") or "" 

61 scope = match.group("scope") or "" 

62 subject = match.group("subject") or "" 

63 breaking = match.group("breaking") is not None 

64 

65 if not breaking and self.BREAKING_FOOTER_PATTERN.search(footer): 

66 breaking = True 

67 

68 return CommitMessage( 

69 raw=message, 

70 header=header, 

71 type=type_, 

72 scope=scope, 

73 subject=subject, 

74 body=body, 

75 footer=footer, 

76 breaking=breaking, 

77 is_conventional=True, 

78 ) 

79 

80 def _extract_header(self, lines: list[str]) -> str: 

81 if not lines: 

82 return "" 

83 return lines[0].strip() 

84 

85 def _extract_body(self, lines: list[str]) -> str: 

86 body, _ = self._split_body_footer(lines) 

87 return "\n".join(body).strip() 

88 

89 def _extract_footer(self, lines: list[str]) -> str: 

90 _, footer = self._split_body_footer(lines) 

91 return "\n".join(footer).strip() 

92 

93 def _split_body_footer( 

94 self, lines: list[str] 

95 ) -> tuple[list[str], list[str]]: 

96 # The Conventional Commits spec requires a blank-line separator 

97 # between body and footer. A `token: value` line buried in the body 

98 # is body content, not a footer — only a footer-token line that 

99 # follows a blank line marks the start of the footer block. 

100 if len(lines) <= 1: 

101 return [], [] 

102 

103 post_header = lines[1:] 

104 body_start = self._first_non_blank(post_header) 

105 if body_start is None: 

106 return [], [] 

107 

108 if self._is_footer_line(post_header[body_start]): 

109 return [], post_header[body_start:] 

110 

111 footer_split = self._find_footer_split(post_header, body_start) 

112 if footer_split is not None: 

113 blank_idx, footer_idx = footer_split 

114 return ( 

115 self._rstrip_blank(post_header[body_start:blank_idx]), 

116 post_header[footer_idx:], 

117 ) 

118 

119 return self._rstrip_blank(post_header[body_start:]), [] 

120 

121 @staticmethod 

122 def _first_non_blank(lines: list[str]) -> int | None: 

123 for i, line in enumerate(lines): 

124 if line.strip(): 

125 return i 

126 return None 

127 

128 def _find_footer_split( 

129 self, post_header: list[str], body_start: int 

130 ) -> tuple[int, int] | None: 

131 for i in range(body_start, len(post_header) - 1): 

132 if not post_header[i].strip() and self._is_footer_line( 

133 post_header[i + 1] 

134 ): 

135 return i, i + 1 

136 return None 

137 

138 @staticmethod 

139 def _rstrip_blank(lines: list[str]) -> list[str]: 

140 result = list(lines) 

141 while result and not result[-1].strip(): 

142 result.pop() 

143 return result 

144 

145 def _is_footer_line(self, line: str) -> bool: 

146 return any( 

147 pattern.match(line) for pattern in self._FOOTER_LINE_PATTERNS 

148 ) 

149 

150 

151class CommitParserFactory: 

152 """Constructs :class:`ConventionalCommitParser` instances.""" 

153 

154 @staticmethod 

155 def create() -> ConventionalCommitParser: 

156 """Return a default :class:`ConventionalCommitParser`.""" 

157 return ConventionalCommitParser()