#!/usr/bin/env python3 # pylint: disable=invalid-name """ Convert a log to CheckStyle format. Url: https://github.com/mdeweerd/LogToCheckStyle The log can then be used for generating annotations in a github action. Note: this script is very young and "quick and dirty". Patterns can be added to "PATTERNS" to match more messages. # Examples Assumes that logToCs.py is available as .github/logToCs.py. ## Example 1: ```yaml - run: | pre-commit run -all-files | tee pre-commit.log .github/logToCs.py pre-commit.log pre-commit.xml - uses: staabm/annotate-pull-request-from-checkstyle-action@v1 with: files: pre-commit.xml notices-as-warnings: true # optional ``` ## Example 2: ```yaml - run: | pre-commit run --all-files | tee pre-commit.log - name: Add results to PR if: ${{ always() }} run: | .github/logToCs.py pre-commit.log | cs2pr ``` Author(s): - https://github.com/mdeweerd License: MIT License """ import argparse import os import re import sys import xml.etree.ElementTree as ET # nosec def remove_prefix(string, prefix): """ Remove prefix from string Provided for backward compatibility. """ if prefix and string.startswith(prefix): return string[len(prefix) :] return string def convert_to_checkstyle(messages, root_path=None): """ Convert provided message to CheckStyle format. """ root = ET.Element("checkstyle") for message in messages: fields = parse_message(message) if fields: add_error_entry(root, **fields, root_path=root_path) return ET.tostring(root, encoding="utf_8").decode("utf_8") def convert_text_to_checkstyle(text, root_path=None): """ Convert provided message to CheckStyle format. """ root = ET.Element("checkstyle") for fields in parse_file(text): if fields: add_error_entry(root, **fields, root_path=root_path) return ET.tostring(root, encoding="utf_8").decode("utf_8") ANY_REGEX = r".*?" FILE_REGEX = r"\s*(?P\S.*?)\s*?" FILEGROUP_REGEX = r"\s*(?P\S.*?)\s*?" EOL_REGEX = r"[\r\n]" LINE_REGEX = r"\s*(?P\d+?)\s*?" COLUMN_REGEX = r"\s*(?P\d+?)\s*?" SEVERITY_REGEX = r"\s*(?Perror|warning|notice|style|info)\s*?" MSG_REGEX = r"\s*(?P.+?)\s*?" MULTILINE_MSG_REGEX = r"\s*(?P(?:.|.[\r\n])+)" # cpplint confidence index CONFIDENCE_REGEX = r"\s*\[(?P\d+)\]\s*?" # List of message patterns, add more specific patterns earlier in the list # Creating patterns by using constants makes them easier to define and read. PATTERNS = [ # beautysh # File ftp.sh: error: "esac" before "case" in line 90. re.compile( f"^File {FILE_REGEX}:{SEVERITY_REGEX}:" f" {MSG_REGEX} in line {LINE_REGEX}.$" ), # beautysh # File socks4echo.sh: error: indent/outdent mismatch: -2. re.compile(f"^File {FILE_REGEX}:{SEVERITY_REGEX}: {MSG_REGEX}$"), # yamllint # ##[group].pre-commit-config.yaml # ##[error]97:14 [trailing-spaces] trailing spaces # ##[endgroup] re.compile(rf"^##\[group\]{FILEGROUP_REGEX}$"), # Start file group re.compile( rf"^##\[{SEVERITY_REGEX}\]{LINE_REGEX}:{COLUMN_REGEX}{MSG_REGEX}$" ), # Msg re.compile(r"^##(?P\[endgroup\])$"), # End file group # File socks4echo.sh: error: indent/outdent mismatch: -2. re.compile(f"^File {FILE_REGEX}:{SEVERITY_REGEX}: {MSG_REGEX}$"), # ESLint (JavaScript Linter), RoboCop, shellcheck # path/to/file.js:10:2: Some linting issue # path/to/file.rb:10:5: Style/Indentation: Incorrect indentation detected # path/to/script.sh:10:1: SC2034: Some shell script issue re.compile(f"^{FILE_REGEX}:{LINE_REGEX}:{COLUMN_REGEX}: {MSG_REGEX}$"), # Cpplint default output: # '%s:%s: %s [%s] [%d]\n' # % (filename, linenum, message, category, confidence) re.compile(f"^{FILE_REGEX}:{LINE_REGEX}:{MSG_REGEX}{CONFIDENCE_REGEX}$"), # MSVC # file.cpp(10): error C1234: Some error message re.compile( f"^{FILE_REGEX}\\({LINE_REGEX}\\):{SEVERITY_REGEX}{MSG_REGEX}$" ), # Java compiler # File.java:10: error: Some error message re.compile(f"^{FILE_REGEX}:{LINE_REGEX}:{SEVERITY_REGEX}:{MSG_REGEX}$"), # Python # File ".../logToCs.py", line 90 (note: code line follows) re.compile(f'^File "{FILE_REGEX}", line {LINE_REGEX}$'), # Pylint, others # path/to/file.py:10: [C0111] Missing docstring # others re.compile(f"^{FILE_REGEX}:{LINE_REGEX}: {MSG_REGEX}$"), # Shellcheck: # In script.sh line 76: re.compile( f"^In {FILE_REGEX} line {LINE_REGEX}:{EOL_REGEX}?" f"({MULTILINE_MSG_REGEX})?{EOL_REGEX}{EOL_REGEX}" ), # eslint: # /path/to/filename # 14:5 error Unexpected trailing comma comma-dangle re.compile( f"^{FILE_REGEX}{EOL_REGEX}" rf"\s+{LINE_REGEX}:{COLUMN_REGEX}\s+{SEVERITY_REGEX}\s+{MSG_REGEX}$" ), ] # Severities available in CodeSniffer report format SEVERITY_NOTICE = "notice" SEVERITY_WARNING = "warning" SEVERITY_ERROR = "error" def strip_ansi(text: str): """ Strip ANSI escape sequences from string (colors, etc) """ return re.sub(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])", "", text) def parse_file(text): """ Parse all messages in a file Returns the fields in a dict. """ # pylint: disable=too-many-branches # regex required to allow same group names try: import regex # pylint: disable=import-outside-toplevel except ImportError as exc: raise ImportError( "The 'parsefile' method requires 'python -m pip install regex'" ) from exc patterns = [pattern.pattern for pattern in PATTERNS] # patterns = [PATTERNS[0].pattern] file_group = None # The file name for the group (if any) full_regex = "(?:(?:" + (")|(?:".join(patterns)) + "))" results = [] for fields in regex.finditer( full_regex, strip_ansi(text), regex.MULTILINE ): if not fields: continue result = fields.groupdict() if len(result) == 0: continue severity = result.get("severity", None) file_name = result.get("file_name", None) confidence = result.pop("confidence", None) new_file_group = result.pop("file_group", None) file_endgroup = result.pop("file_endgroup", None) if new_file_group is not None: # Start of file_group, just store file file_group = new_file_group continue if file_endgroup is not None: file_group = None continue if file_name is None: if file_group is not None: file_name = file_group result["file_name"] = file_name else: # No filename, skip continue if confidence is not None: # Convert confidence level of cpplint # to warning, etc. confidence = int(confidence) if confidence <= 1: severity = SEVERITY_NOTICE elif confidence >= 5: severity = SEVERITY_ERROR else: severity = SEVERITY_WARNING if severity is None: severity = SEVERITY_ERROR else: severity = severity.lower() if severity in ["info", "style"]: severity = SEVERITY_NOTICE result["severity"] = severity results.append(result) return results def parse_message(message): """ Parse message until it matches a pattern. Returns the fields in a dict. """ for pattern in PATTERNS: fields = pattern.match(message) if not fields: continue result = fields.groupdict() if len(result) == 0: continue if "confidence" in result: # Convert confidence level of cpplint # to warning, etc. confidence = int(result["confidence"]) del result["confidence"] if confidence <= 1: severity = SEVERITY_NOTICE elif confidence >= 5: severity = SEVERITY_ERROR else: severity = SEVERITY_WARNING result["severity"] = severity if "severity" not in result: result["severity"] = SEVERITY_ERROR else: result["severity"] = result["severity"].lower() if result["severity"] in ["info", "style"]: result["severity"] = SEVERITY_NOTICE return result # Nothing matched return None def add_error_entry( # pylint: disable=too-many-arguments root, severity, file_name, line=None, column=None, message=None, source=None, root_path=None, ): """ Add error information to the CheckStyle output being created. """ file_element = find_or_create_file_element( root, file_name, root_path=root_path ) error_element = ET.SubElement(file_element, "error") error_element.set("severity", severity) if line: error_element.set("line", line) if column: error_element.set("column", column) if message: error_element.set("message", message) if source: # To verify if this is a valid attribute error_element.set("source", source) def find_or_create_file_element(root, file_name: str, root_path=None): """ Find/create file element in XML document tree. """ if root_path is not None: file_name = remove_prefix(file_name, root_path) for file_element in root.findall("file"): if file_element.get("name") == file_name: return file_element file_element = ET.SubElement(root, "file") file_element.set("name", file_name) return file_element def main(): """ Parse the script arguments and get the conversion done. """ parser = argparse.ArgumentParser( description="Convert messages to Checkstyle XML format." ) parser.add_argument( "input", help="Input file. Use '-' for stdin.", nargs="?", default="-" ) parser.add_argument( "output", help="Output file. Use '-' for stdout.", nargs="?", default="-", ) parser.add_argument( "-i", "--in", dest="input_named", help="Input filename. Overrides positional input.", ) parser.add_argument( "-o", "--out", dest="output_named", help="Output filename. Overrides positional output.", ) parser.add_argument( "--root", metavar="ROOT_PATH", help="Root directory to remove from file paths." " Defaults to working directory.", default=os.getcwd(), ) args = parser.parse_args() if args.input == "-" and args.input_named: with open( args.input_named, encoding="utf_8", errors="surrogateescape" ) as input_file: text = input_file.read() elif args.input != "-": with open( args.input, encoding="utf_8", errors="surrogateescape" ) as input_file: text = input_file.read() else: text = sys.stdin.read() root_path = os.path.join(args.root, "") try: checkstyle_xml = convert_text_to_checkstyle(text, root_path=root_path) except ImportError: checkstyle_xml = convert_to_checkstyle( re.split(r"[\r\n]+", text), root_path=root_path ) if args.output == "-" and args.output_named: with open(args.output_named, "w", encoding="utf_8") as output_file: output_file.write(checkstyle_xml) elif args.output != "-": with open(args.output, "w", encoding="utf_8") as output_file: output_file.write(checkstyle_xml) else: print(checkstyle_xml) if __name__ == "__main__": main()