From a72a8f518352e696832a2cbc9cd1ecf27a36394e Mon Sep 17 00:00:00 2001 From: MDW Date: Sun, 7 Jan 2024 15:29:16 +0100 Subject: [PATCH] Add pre-commit workflow + update hooks --- .github/logToCs.py | 362 +++++++++++++++++++++++++++++++ .github/workflows/pre-commit.yml | 61 ++++++ .pre-commit-config.yaml | 17 +- 3 files changed, 433 insertions(+), 7 deletions(-) create mode 100755 .github/logToCs.py create mode 100644 .github/workflows/pre-commit.yml diff --git a/.github/logToCs.py b/.github/logToCs.py new file mode 100755 index 0000000..8131c06 --- /dev/null +++ b/.github/logToCs.py @@ -0,0 +1,362 @@ +#!/usr/bin/env python3 +# pylint: disable=invalid-name +""" +Convert a log to CheckStyle format. + +Url: https://github.com/mdeweerd/LogToCheckStyle + +The log can then be used for generating annotations in a github action. + +Note: this script is very young and "quick and dirty". + Patterns can be added to "PATTERNS" to match more messages. + +# Examples + +Assumes that logToCs.py is available as .github/logToCs.py. + +## Example 1: + + +```yaml + - run: | + pre-commit run -all-files | tee pre-commit.log + .github/logToCs.py pre-commit.log pre-commit.xml + - uses: staabm/annotate-pull-request-from-checkstyle-action@v1 + with: + files: pre-commit.xml + notices-as-warnings: true # optional +``` + +## Example 2: + + +```yaml + - run: | + pre-commit run --all-files | tee pre-commit.log + - name: Add results to PR + if: ${{ always() }} + run: | + .github/logToCs.py pre-commit.log | cs2pr +``` + +Author(s): + - https://github.com/mdeweerd + +License: MIT License + +""" + +import argparse +import os +import re +import sys +import xml.etree.ElementTree as ET # nosec + + +def convert_to_checkstyle(messages, root_path=None): + """ + Convert provided message to CheckStyle format. + """ + root = ET.Element("checkstyle") + for message in messages: + fields = parse_message(message) + if fields: + add_error_entry(root, **fields, root_path=root_path) + return ET.tostring(root, encoding="utf-8").decode("utf-8") + + +def convert_text_to_checkstyle(text, root_path=None): + """ + Convert provided message to CheckStyle format. + """ + root = ET.Element("checkstyle") + for fields in parse_file(text): + if fields: + add_error_entry(root, **fields, root_path=root_path) + return ET.tostring(root, encoding="utf-8").decode("utf-8") + + +ANY_REGEX = r".*?" +FILE_REGEX = r"\s*(?P\S.*?)\s*?" +EOL_REGEX = r"[\r\n]" +LINE_REGEX = r"\s*(?P\d+?)\s*?" +COLUMN_REGEX = r"\s*(?P\d+?)\s*?" +SEVERITY_REGEX = r"\s*(?Perror|warning|notice|style|info)\s*?" +MSG_REGEX = r"\s*(?P.+?)\s*?" +MULTILINE_MSG_REGEX = r"\s*(?P(?:.|.[\r\n])+)" +# cpplint confidence index +CONFIDENCE_REGEX = r"\s*\[(?P\d+)\]\s*?" + + +# List of message patterns, add more specific patterns earlier in the list +# Creating patterns by using constants makes them easier to define and read. +PATTERNS = [ + # beautysh + # File ftp.sh: error: "esac" before "case" in line 90. + re.compile( + f"^File {FILE_REGEX}:{SEVERITY_REGEX}:" + f" {MSG_REGEX} in line {LINE_REGEX}.$" + ), + # beautysh + # File socks4echo.sh: error: indent/outdent mismatch: -2. + re.compile(f"^File {FILE_REGEX}:{SEVERITY_REGEX}: {MSG_REGEX}$"), + # ESLint (JavaScript Linter), RoboCop, shellcheck + # path/to/file.js:10:2: Some linting issue + # path/to/file.rb:10:5: Style/Indentation: Incorrect indentation detected + # path/to/script.sh:10:1: SC2034: Some shell script issue + re.compile(f"^{FILE_REGEX}:{LINE_REGEX}:{COLUMN_REGEX}: {MSG_REGEX}$"), + # Cpplint default output: + # '%s:%s: %s [%s] [%d]\n' + # % (filename, linenum, message, category, confidence) + re.compile(f"^{FILE_REGEX}:{LINE_REGEX}:{MSG_REGEX}{CONFIDENCE_REGEX}$"), + # MSVC + # file.cpp(10): error C1234: Some error message + re.compile( + f"^{FILE_REGEX}\\({LINE_REGEX}\\):{SEVERITY_REGEX}{MSG_REGEX}$" + ), + # Java compiler + # File.java:10: error: Some error message + re.compile(f"^{FILE_REGEX}:{LINE_REGEX}:{SEVERITY_REGEX}:{MSG_REGEX}$"), + # Python + # File ".../logToCs.py", line 90 (note: code line follows) + re.compile(f'^File "{FILE_REGEX}", line {LINE_REGEX}$'), + # Pylint, others + # path/to/file.py:10: [C0111] Missing docstring + # others + re.compile(f"^{FILE_REGEX}:{LINE_REGEX}: {MSG_REGEX}$"), + # Shellcheck: + # In script.sh line 76: + re.compile( + f"^In {FILE_REGEX} line {LINE_REGEX}:{EOL_REGEX}?" + f"({MULTILINE_MSG_REGEX})?{EOL_REGEX}{EOL_REGEX}" + ), + # eslint: + # /path/to/filename + # 14:5 error Unexpected trailing comma comma-dangle + re.compile( + f"^{FILE_REGEX}{EOL_REGEX}" + rf"\s+{LINE_REGEX}:{COLUMN_REGEX}\s+{SEVERITY_REGEX}\s+{MSG_REGEX}$" + ), +] + +# Severities available in CodeSniffer report format +SEVERITY_NOTICE = "notice" +SEVERITY_WARNING = "warning" +SEVERITY_ERROR = "error" + + +def strip_ansi(text: str): + """ + Strip ANSI escape sequences from string (colors, etc) + """ + return re.sub(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])", "", text) + + +def parse_file(text): + """ + Parse all messages in a file + + Returns the fields in a dict. + """ + # regex required to allow same group names + import regex # pylint: disable=import-outside-toplevel + + patterns = [pattern.pattern for pattern in PATTERNS] + # patterns = [PATTERNS[0].pattern] + + full_regex = "(?:(?:" + (")|(?:".join(patterns)) + "))" + results = [] + + for fields in regex.finditer( + full_regex, strip_ansi(text), regex.MULTILINE + ): + if not fields: + continue + result = fields.groupdict() + + if len(result) == 0: + continue + severity = result.get("severity", None) + confidence = result.pop("confidence", None) + + if confidence is not None: + # Convert confidence level of cpplint + # to warning, etc. + confidence = int(confidence) + + if confidence <= 1: + severity = SEVERITY_NOTICE + elif confidence >= 5: + severity = SEVERITY_ERROR + else: + severity = SEVERITY_WARNING + + if severity is None: + severity = SEVERITY_ERROR + else: + severity = severity.lower() + + if severity in ["info", "style"]: + severity = SEVERITY_NOTICE + + result["severity"] = severity + + results.append(result) + + return results + + +def parse_message(message): + """ + Parse message until it matches a pattern. + + Returns the fields in a dict. + """ + for pattern in PATTERNS: + fields = pattern.match(message) + if not fields: + continue + result = fields.groupdict() + if len(result) == 0: + continue + + if "confidence" in result: + # Convert confidence level of cpplint + # to warning, etc. + confidence = int(result["confidence"]) + del result["confidence"] + + if confidence <= 1: + severity = SEVERITY_NOTICE + elif confidence >= 5: + severity = SEVERITY_ERROR + else: + severity = SEVERITY_WARNING + result["severity"] = severity + + if "severity" not in result: + result["severity"] = SEVERITY_ERROR + else: + result["severity"] = result["severity"].lower() + + if result["severity"] in ["info", "style"]: + result["severity"] = SEVERITY_NOTICE + + return result + + # Nothing matched + return None + + +def add_error_entry( # pylint: disable=too-many-arguments + root, + severity, + file_name, + line=None, + column=None, + message=None, + source=None, + root_path=None, +): + """ + Add error information to the CheckStyle output being created. + """ + file_element = find_or_create_file_element( + root, file_name, root_path=root_path + ) + error_element = ET.SubElement(file_element, "error") + error_element.set("severity", severity) + if line: + error_element.set("line", line) + if column: + error_element.set("column", column) + if message: + error_element.set("message", message) + if source: + # To verify if this is a valid attribute + error_element.set("source", source) + + +def find_or_create_file_element(root, file_name: str, root_path=None): + """ + Find/create file element in XML document tree. + """ + + if root_path is not None: + file_name = file_name.removeprefix(root_path) + for file_element in root.findall("file"): + if file_element.get("name") == file_name: + return file_element + file_element = ET.SubElement(root, "file") + file_element.set("name", file_name) + return file_element + + +def main(): + """ + Parse the script arguments and get the conversion done. + """ + parser = argparse.ArgumentParser( + description="Convert messages to Checkstyle XML format." + ) + parser.add_argument( + "input", help="Input file. Use '-' for stdin.", nargs="?", default="-" + ) + parser.add_argument( + "output", + help="Output file. Use '-' for stdout.", + nargs="?", + default="-", + ) + parser.add_argument( + "-i", + "--in", + dest="input_named", + help="Input filename. Overrides positional input.", + ) + parser.add_argument( + "-o", + "--out", + dest="output_named", + help="Output filename. Overrides positional output.", + ) + parser.add_argument( + "--root", + metavar="ROOT_PATH", + help="Root directory to remove from file paths." + " Defaults to working directory.", + default=os.getcwd(), + ) + + args = parser.parse_args() + + if args.input == "-" and args.input_named: + with open(args.input_named, encoding="utf_8") as input_file: + text = input_file.read() + elif args.input != "-": + with open(args.input, encoding="utf_8") as input_file: + text = input_file.read() + else: + text = sys.stdin.read() + + root_path = os.path.join(args.root, "") + + try: + checkstyle_xml = convert_text_to_checkstyle(text, root_path=root_path) + except ImportError: + checkstyle_xml = convert_to_checkstyle( + re.split(r"[\r\n]+", text), root_path=root_path + ) + + if args.output == "-" and args.output_named: + with open(args.output_named, "w", encoding="utf_8") as output_file: + output_file.write(checkstyle_xml) + elif args.output != "-": + with open(args.output, "w", encoding="utf_8") as output_file: + output_file.write(checkstyle_xml) + else: + print(checkstyle_xml) + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 0000000..a7a5fd0 --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,61 @@ +--- +name: pre-commit +on: + pull_request: + push: +jobs: + pre-commit: + runs-on: ubuntu-latest + env: + LOG_TO_CS: .github/logToCs.py + RAW_LOG: pre-commit.log + CS_XML: pre-commit.xml + steps: + - name: Install required tools + run: sudo apt-get update && sudo apt-get install cppcheck + if: false + - uses: actions/checkout@v4 + - name: Create requirements.txt if no requirements.txt or pyproject.toml + run: |- + [ -r requirements.txt ] || [ -r pyproject.toml ] || touch requirements.txt + - uses: actions/setup-python@v4 + with: + cache: pip + python-version: '3.11' + - run: python -m pip install pre-commit regex + - uses: actions/cache/restore@v3 + with: + path: ~/.cache/pre-commit/ + key: pre-commit-4|${{ env.pythonLocation }}|${{ hashFiles('.pre-commit-config.yaml') + }} + - name: Run pre-commit hooks + run: | + set -o pipefail + pre-commit gc + pre-commit run --show-diff-on-failure --color=always --all-files | tee ${RAW_LOG} + - name: Convert Raw Log to CheckStyle format + if: ${{ failure() }} + run: | + python ${LOG_TO_CS} ${RAW_LOG} ${CS_XML} + - name: Annotate Source Code with Messages + uses: staabm/annotate-pull-request-from-checkstyle-action@v1 + if: ${{ failure() }} + with: + files: ${{ env.CS_XML }} + notices-as-warnings: true # optional + prepend-filename: true # optional + - uses: actions/cache/save@v3 + if: ${{ always() }} + with: + path: ~/.cache/pre-commit/ + key: pre-commit-4|${{ env.pythonLocation }}|${{ hashFiles('.pre-commit-config.yaml') + }} + - name: Provide log as artifact + uses: actions/upload-artifact@v3 + if: ${{ always() }} + with: + name: precommit-logs + path: | + ${{ env.RAW_LOG }} + ${{ env.CS_XML }} + retention-days: 2 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index da6cf83..d110e1b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,7 +4,7 @@ exclude: ^(\.[^/]*cache/.*|.*/_user.py)$ repos: - repo: https://github.com/executablebooks/mdformat # Do this before other tools "fixing" the line endings - rev: 0.7.14 + rev: 0.7.17 hooks: - id: mdformat name: Format Markdown @@ -21,8 +21,9 @@ repos: - mdformat-black - mdformat-web - mdformat-gfm + - setuptools - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 + rev: v4.5.0 hooks: - id: no-commit-to-branch args: [--branch, main] @@ -43,7 +44,7 @@ repos: - id: check-case-conflict # - id: check-toml - repo: https://github.com/adrienverge/yamllint.git - rev: v1.26.3 + rev: v1.33.0 hooks: - id: yamllint args: @@ -54,8 +55,10 @@ repos: rev: v6.2.1 hooks: - id: beautysh + additional_dependencies: + - setuptools - repo: https://github.com/asottile/pyupgrade - rev: v2.34.0 + rev: v3.15.0 hooks: - id: pyupgrade # - repo: https://github.com/psf/black @@ -94,7 +97,7 @@ repos: # hooks: # - id: isort - repo: https://github.com/codespell-project/codespell - rev: v2.1.0 + rev: v2.2.6 hooks: - id: codespell args: @@ -103,8 +106,8 @@ repos: - --ignore-words-list=hass,master - --skip="./.*" - --quiet-level=2 -# - repo: https://github.com/pre-commit/mirrors-pylint -# rev: v3.0.0a5 +# - repo: https://github.com/pylint-dev/pylint +# rev: v3.0.3 # hooks: # - id: pylint # args: