From e9c16fc2ea939fc964c7f1e0f3e9348a3ca3c0a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tibor=20=C5=A0imko?= Date: Fri, 10 Jan 2025 10:42:38 +0100 Subject: [PATCH] ci(check-fixtures): parallelise fixture checking commands Introduces several independent `run-tests.sh` fixture-checking commands in order to speed up fixture checking by parallelisation. Renames `run-tests.sh` script options and CI rules to better separate data checks, formatting checks and linting checks. Adds `shfmt` formatting checks, `flake8` and `yamllint` linting checks. Removes `pydocstyle` formating checks since we moved to `black` code formatter. Introduces `/run-tests.sh --help` explaining all the checking options. Updates CI environment to Ubuntu 24.04 and latest actions (`actions/checkout@v4`, `actions/setup-node@v4`, `actions/setup-python@v5`). Amends `.editorconfig` to add rules for shell scripts and remove rules for ReST files that are no longer needed after switch to Markdown. BREAKING CHANGE: Refactors `run-tests.sh` script options. --- .editorconfig | 6 +- .flake8 | 19 ++++ .github/workflows/ci.yml | 196 ++++++++++++++++++++++++---------- .yamllint.yaml | 8 ++ docker-compose.yml | 6 +- run-tests.sh | 165 ++++++++++++++++++---------- scripts/split_json_file.py | 4 +- scripts/start-server-debug.sh | 13 +-- 8 files changed, 288 insertions(+), 129 deletions(-) create mode 100644 .flake8 create mode 100644 .yamllint.yaml diff --git a/.editorconfig b/.editorconfig index 54923eea5a..4c0343b2c5 100644 --- a/.editorconfig +++ b/.editorconfig @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of CERN Open Data Portal. -# Copyright (C) 2016 CERN. +# Copyright (C) 2016, 2025 CERN. # # CERN Open Data Portal is free software; you can redistribute it # and/or modify it under the terms of the GNU General Public License as @@ -40,8 +40,8 @@ known_first_party = cernopendata multi_line_output = 2 default_section = THIRDPARTY -# RST files (used by sphinx) -[*.rst] +# Shell script files +[*.sh] indent_size = 4 # CSS, HTML, JS, JSON, YML diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000000..089b8114c2 --- /dev/null +++ b/.flake8 @@ -0,0 +1,19 @@ +[flake8] +max-line-length = 89 + +exclude = + build + dist + docs + coverage.xml + cernopendata_client.egg-info + .*/ + env/ + .git + __pycache__ + +ignore = E203, E231, E266, E501, W503, F403, F401, W605 + +max-complexity = 18 + +select = B,C,E,F,W,T4,B9 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 209cf1e8ba..7acc198156 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,5 +1,5 @@ # This file is part of CERN Open Data Portal. -# Copyright (C) 2020, 2023, 2024 CERN. +# Copyright (C) 2020, 2023, 2024, 2025 CERN. # # CERN Open Data Portal is free software; you can redistribute it # and/or modify it under the terms of the GNU General Public License as @@ -20,117 +20,199 @@ # waive the privileges and immunities granted to it by virtue of its status # as an Intergovernmental Organization or submit itself to any jurisdiction. -name: CI +name: ci on: [push, pull_request] jobs: - lint-shellcheck: - runs-on: ubuntu-20.04 + data-dois: + runs-on: ubuntu-24.04 steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - - name: Runs shell script static analysis - run: | - sudo apt-get install shellcheck - ./run-tests.sh --check-shellscript + - name: Setup node + uses: actions/setup-node@v4 - lint-black: - runs-on: ubuntu-20.04 + - name: Check data DOIs + run: ./run-tests.sh --data-dois + + data-licenses: + runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@v2 + - name: Checkout + uses: actions/checkout@v4 - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: 3.9 - - name: Check Python code formatting + - name: Check data licenses run: | - pip install black - ./run-tests.sh --check-black + ./run-tests.sh --data-licenses - lint-pycodestyle: - runs-on: ubuntu-20.04 + data-recids: + runs-on: ubuntu-24.04 steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 + + - name: Setup node + uses: actions/setup-node@v4 + + - name: Check data record IDs + run: ./run-tests.sh --data-recids + + data-slugs: + runs-on: ubuntu-24.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup node + uses: actions/setup-node@v4 + + - name: Check data slugs + run: ./run-tests.sh --data-slugs + + data-types: + runs-on: ubuntu-24.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup node + uses: actions/setup-node@v4 + + - name: Check data types + run: ./run-tests.sh --data-types + + format-black: + runs-on: ubuntu-24.04 + steps: + - name: Checkout + uses: actions/checkout@v4 - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: 3.9 - - name: Check compliance with Python coding style conventions + - name: Check formatting of Python code run: | - pip install --upgrade pip - pip install pycodestyle - ./run-tests.sh --check-pycodestyle + pip install black + ./run-tests.sh --format-black - lint-pydocstyle: - runs-on: ubuntu-20.04 + format-isort: + runs-on: ubuntu-24.04 steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: 3.9 - - name: Check compliance with Python docstring conventions + - name: Check formatting of Python imports run: | - pip install --upgrade pip - pip install pydocstyle - ./run-tests.sh --check-pydocstyle + pip install isort + ./run-tests.sh --format-isort - check-fixtures: - runs-on: ubuntu-20.04 + format-shfmt: + runs-on: ubuntu-24.04 steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 + + - name: Check formatting of shell scripts + run: | + sudo apt-get install shfmt + ./run-tests.sh --format-shfmt + + format-whitespace: + runs-on: ubuntu-24.04 + steps: + - name: Checkout + uses: actions/checkout@v4 - name: Setup node - uses: actions/setup-node@v1 + uses: actions/setup-node@v4 + + - name: Check formatting of whitespace + run: ./run-tests.sh --format-whitespace + + lint-flake8: + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 with: - node-version: "14" + python-version: 3.9 - - name: Install Node dependencies - run: npm install -g jsonlint + - name: Check linting of Python code + run: | + pip install flake8 + ./run-tests.sh --lint-flake8 + + lint-jsonlint: + runs-on: ubuntu-24.04 + steps: + - name: Checkout + uses: actions/checkout@v4 - - name: Check fixtures - run: ./run-tests.sh --check-fixtures + - name: Setup Node + uses: actions/setup-node@v4 - check-isort: - runs-on: ubuntu-20.04 + - name: Check linting of JSON files + run: | + npm install jsonlint --global + ./run-tests.sh --lint-jsonlint + + lint-pydocstyle: + runs-on: ubuntu-24.04 steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: 3.9 - - name: Check isort + - name: Check linting of Python docstrings run: | - pip install --upgrade pip - pip install isort - ./run-tests.sh --check-isort + pip install pydocstyle + ./run-tests.sh --lint-pydocstyle + + lint-shellcheck: + runs-on: ubuntu-24.04 + steps: + - name: Checkout + uses: actions/checkout@v4 - check-licenses: - runs-on: ubuntu-20.04 + - name: Check linting of shell scripts + run: | + sudo apt-get install shellcheck + ./run-tests.sh --lint-shellcheck + + lint-yamllint: + runs-on: ubuntu-24.04 steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: - python-version: 3.9 + python-version: "3.12" - - name: Check licenses + - name: Lint YAML files run: | - pip install --upgrade pip - ./run-tests.sh --check-licenses + pip install yamllint + ./run-tests.sh --lint-yamllint diff --git a/.yamllint.yaml b/.yamllint.yaml new file mode 100644 index 0000000000..e98607e5fa --- /dev/null +++ b/.yamllint.yaml @@ -0,0 +1,8 @@ +extends: default + +rules: + comments: + min-spaces-from-content: 1 + document-start: disable + line-length: disable + truthy: disable diff --git a/docker-compose.yml b/docker-compose.yml index eda781ad1e..2d506f49cf 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,4 +1,4 @@ include: - - path: - - ../cernopendata-portal/docker-compose.yml - - docker-compose-override.yml + - path: + - ../cernopendata-portal/docker-compose.yml + - docker-compose-override.yml diff --git a/run-tests.sh b/run-tests.sh index 98985fec35..d674b2f5da 100755 --- a/run-tests.sh +++ b/run-tests.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash # # This file is part of CERN Open Data Portal. -# Copyright (C) 2015, 2016, 2017, 2018, 2019, 2020, 2024 CERN. +# Copyright (C) 2015, 2016, 2017, 2018, 2019, 2020, 2024, 2025 CERN. # # CERN Open Data Portal is free software; you can redistribute it # and/or modify it under the terms of the GNU General Public License as @@ -22,40 +22,34 @@ # waive the privileges and immunities granted to it by virtue of its status # as an Intergovernmental Organization or submit itself to any jurisdiction. -# quit on errors and potentially unbound symbols: set -o errexit set -o nounset -check_script () { - shellcheck run-tests.sh +data_dois() { + dupes=$(jq '.[].doi' data/{records,skeletons}/*.json | sort | grep -v null | uniq -d) + if [ "x${dupes}" != "x" ]; then + echo "[ERROR] Found duplicate record DOIs:" + echo "${dupes}" + exit 1 + fi + echo "Record DOIs are unique." } -check_black () { - black --check . +data_licenses() { + scripts/check_licenses.py } - -check_fixtures () { - # check for possibly incorrect JSON files: - find data/ -name "*.json" -exec jsonlint -q {} \; - - # check record ID uniqueness: +data_recids() { dupes=$(jq '.[].recid' data/{records,skeletons}/*.json | sort | uniq -d) if [ "x${dupes}" != "x" ]; then echo "[ERROR] Found duplicate record IDs:" echo "${dupes}" exit 1 fi + echo "Record IDs are unique." +} - # check DOI uniqueness: - dupes=$(jq '.[].doi' data/{records,skeletons}/*.json | sort | grep -v null | uniq -d) - if [ "x${dupes}" != "x" ]; then - echo "[ERROR] Found duplicate record DOIs:" - echo "${dupes}" - exit 1 - fi - - # check docs slug uniqueness: +data_slugs() { # shellcheck disable=SC2044 dupes=$(for file in $(find data/docs -name "*.json"); do jq '.[].slug' "$file"; done | sort | grep -v null | uniq -d) if [ "x${dupes}" != "x" ]; then @@ -63,8 +57,32 @@ check_fixtures () { echo "${dupes}" exit 1 fi + echo "Docs slugs are unique." +} + +data_types() { + # shellcheck disable=SC2044 + for file in $(find data/{records,docs}/ -name "*.json"); do + secondaries=$(jq '.[].type.secondary' "$file" -c | sort | uniq) + if echo "$secondaries" | grep -q -e '\[\]' -e "null"; then + echo "[Warning] empty type.secondary field in $file" + fi + done +} + +format_black() { + black --check . +} + +format_isort() { + isort -rc -c -df --profile black -- **/*.py +} + +format_shfmt() { + shfmt -d . +} - # check trailing whitespace: +format_whitespace() { whitespace_found_p=0 for file in $(git ls-files | grep -E '.(py|html|css|json|md|sh|txt|yml)$'); do if grep -q ' $' "$file"; then @@ -76,57 +94,88 @@ check_fixtures () { if [ "${whitespace_found_p}" != "0" ]; then exit 1 fi +} - # check for empty secondary type in fixtures - # shellcheck disable=SC2044 - for file in $(find data/{records,docs}/ -name "*.json"); do - secondaries=$(jq '.[].type.secondary' "$file" -c | sort | uniq) - if echo "$secondaries" | grep -q -e '\[\]' -e "null"; then - echo "[Warning] empty type.secondary field in $file" - fi - done +lint_flake8() { + flake8 . } -check_pycodestyle () { - pycodestyle --max-line-length=120 scripts +lint_jsonlint() { + find . -name "*.json" -exec jsonlint -q {} \+ } -check_pydocstyle () { +lint_pydocstyle() { pydocstyle scripts } -check_isort () { - isort -rc -c -df --profile black -- **/*.py +lint_shellcheck() { + find . -name "*.sh" -exec shellcheck {} \+ } -check_licenses () { - scripts/check_licenses.py +lint_yamllint() { + yamllint . +} + +all() { + data_dois + data_licenses + data_recids + data_slugs + data_types + format_black + format_isort + format_shfmt + format_whitespace + lint_flake8 + lint_jsonlint + lint_pydocstyle + lint_shellcheck + lint_yamllint } -check_all () { - check_script - check_fixtures - check_pycodestyle - check_black - check_pydocstyle - check_isort - check_licenses +help() { + echo "Usage: $0 [options]" + echo "Options:" + echo " --all Perform all checks [default]" + echo " --data-dois Check data DOIs" + echo " --data-licenses Check data licenses" + echo " --data-recids Check data record IDs " + echo " --data-slugs Check data slugs" + echo " --data-types Check data types" + echo " --help Display this help message" + echo " --format-black Check formatting of Python code" + echo " --format-isort Check formatting of Python imports" + echo " --format-shfmt Check formatting of shell scripts" + echo " --format-whitespace Check formatting of whitespace" + echo " --lint-flake8 Check linting of Python code" + echo " --lint-jsonlint Check linting of JSON files" + echo " --lint-pydocstyle Check linting of Python docstrings" + echo " --lint-shellcheck Check linting of shell scripts" + echo " --lint-yamllint Check linting of YAML files" } if [ $# -eq 0 ]; then - check_all + all exit 0 fi -for arg in "$@" -do - case $arg in - --check-shellscript) check_script;; - --check-fixtures) check_fixtures;; - --check-pycodestyle) check_pycodestyle;; - --check-pydocstyle) check_pydocstyle;; - --check-isort) check_isort;; - --check-licenses) check_licenses;; - *) - esac -done +arg="$1" +case $arg in +--all) all ;; +--help) help ;; +--data-dois) data_dois ;; +--data-licenses) data_licenses ;; +--data-recids) data_recids ;; +--data-slugs) data_slugs ;; +--data-types) data_types ;; +--format-black) format_black ;; +--format-isort) format_isort ;; +--format-shfmt) format_shfmt ;; +--format-whitespace) format_whitespace ;; +--lint-flake8) lint_flake8 ;; +--lint-jsonlint) lint_jsonlint ;; +--lint-pydocstyle) lint_pydocstyle ;; +--lint-shellcheck) lint_shellcheck ;; +--lint-yamllint) lint_yamllint ;; +*) echo "[ERROR] Invalid argument '$arg'. Exiting." && help && exit 1 ;; +esac diff --git a/scripts/split_json_file.py b/scripts/split_json_file.py index 62f1537994..f8b66b4e73 100755 --- a/scripts/split_json_file.py +++ b/scripts/split_json_file.py @@ -24,11 +24,11 @@ def split_json_file(filename, split): for i in range(0, math.ceil(len(records) / split)): filenamepart = filename.replace( - ".json", f"-part_{i+1:0{len(str(num_output_files))}}.json" + ".json", f"-part_{i + 1:0{len(str(num_output_files))}}.json" ) print("[INFO] Creating file %s..." % filenamepart) split_content = json.dumps( - records[split * i: split * (i + 1)], + records[split * i : split * (i + 1)], indent=2, sort_keys=True, ensure_ascii=False, diff --git a/scripts/start-server-debug.sh b/scripts/start-server-debug.sh index 8693d6944b..4997e90812 100755 --- a/scripts/start-server-debug.sh +++ b/scripts/start-server-debug.sh @@ -1,12 +1,13 @@ #!/bin/bash echo "Ready to start the web server in debug mode" -cd /code -export INVENIO_SECRET_KEY=$(head -c 500 /dev/urandom | tr -dc 'a-zA-Z0-9~!@#$%^&*_-' | fold -w 10 | head -n 1) +cd /code || exit +INVENIO_SECRET_KEY=$(head -c 500 /dev/urandom | tr -dc 'a-zA-Z0-9~!@#$%^&*_-' | fold -w 10 | head -n 1) +export INVENIO_SECRET_KEY if [ -d "/opt/invenio/var/instance/python/lib/python3.9/site-packages/cernopendata" ]; then - echo "The installation directory is still there... let's overwrite it" - pip install -e . + echo "The installation directory is still there... let's overwrite it" + pip install -e . fi -cernopendata run -h 0.0.0.0 --reload; +cernopendata run -h 0.0.0.0 --reload echo "THE WEB SERVICE DIED!!! Let's sleep for a bit to give some time to debug" -sleep 60 \ No newline at end of file +sleep 60