From 47b9dcdc111a5ec96e270837848e4305166d897c Mon Sep 17 00:00:00 2001 From: Pankaj Thorat Date: Tue, 1 Oct 2024 00:38:27 +0530 Subject: [PATCH] test errors fixes Signed-off-by: Pankaj Thorat --- .../test-code-syntactic_concept_extractor.yml | 124 ++++++++++++++++++ .make.versions | 4 + transforms/code/Makefile | 16 --- .../syntactic_concept_extractor_transform.py | 30 ++--- ...t_syntactic_concept_extractor.py_python.py | 47 ------- .../syntactic_concept_extractor/ray/Makefile | 2 - 6 files changed, 140 insertions(+), 83 deletions(-) create mode 100644 .github/workflows/test-code-syntactic_concept_extractor.yml delete mode 100644 transforms/code/syntactic_concept_extractor/python/test/test_syntactic_concept_extractor.py_python.py diff --git a/.github/workflows/test-code-syntactic_concept_extractor.yml b/.github/workflows/test-code-syntactic_concept_extractor.yml new file mode 100644 index 0000000000..7f95b90a89 --- /dev/null +++ b/.github/workflows/test-code-syntactic_concept_extractor.yml @@ -0,0 +1,124 @@ +# +# DO NOT EDIT THIS FILE: it is generated from test-transform.template, Edit there and run make to change these files +# +name: Test - transforms/code/syntactic_concept_extractor + +on: + workflow_dispatch: + push: + branches: + - "dev" + - "releases/**" + tags: + - "*" + paths: + - "transforms/code/syntactic_concept_extractor/**" + - "data-processing-lib/**" + - "!transforms/code/syntactic_concept_extractor/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + pull_request: + branches: + - "dev" + - "releases/**" + paths: + - "transforms/code/syntactic_concept_extractor/**" + - "data-processing-lib/**" + - "!transforms/code/syntactic_concept_extractor/**/kfp_ray/**" # This is/will be tested in separate workflow + - "!data-processing-lib/**/test/**" + - "!data-processing-lib/**/test-data/**" + - "!**.md" + - "!**/doc/**" + - "!**/images/**" + - "!**.gitignore" + +jobs: + check_if_push_image: + # check whether the Docker images should be pushed to the remote repository + # The images are pushed if it is a merge to dev branch or a new tag is created. + # The latter being part of the release process. + # The images tag is derived from the value of the DOCKER_IMAGE_VERSION variable set in the .make.versions file. + runs-on: ubuntu-22.04 + outputs: + publish_images: ${{ steps.version.outputs.publish_images }} + steps: + - id: version + run: | + publish_images='false' + if [[ ${GITHUB_REF} == refs/heads/dev && ${GITHUB_EVENT_NAME} != 'pull_request' && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + if [[ ${GITHUB_REF} == refs/tags/* && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ; + then + publish_images='true' + fi + echo "publish_images=$publish_images" >> "$GITHUB_OUTPUT" + test-src: + runs-on: ubuntu-22.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform source in transforms/code/syntactic_concept_extractor + run: | + if [ -e "transforms/code/syntactic_concept_extractor/Makefile" ]; then + make -C transforms/code/syntactic_concept_extractor DOCKER=docker test-src + else + echo "transforms/code/syntactic_concept_extractor/Makefile not found - source testing disabled for this transform." + fi + test-image: + needs: [check_if_push_image] + runs-on: ubuntu-22.04 + timeout-minutes: 120 + env: + DOCKER_REGISTRY_USER: ${{ secrets.DOCKER_REGISTRY_USER }} + DOCKER_REGISTRY_KEY: ${{ secrets.DOCKER_REGISTRY_KEY }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Free up space in github runner + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup + sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + df -h + - name: Test transform image in transforms/code/syntactic_concept_extractor + run: | + if [ -e "transforms/code/syntactic_concept_extractor/Makefile" ]; then + if [ -d "transforms/code/syntactic_concept_extractor/spark" ]; then + make -C data-processing-lib/spark DOCKER=docker image + fi + make -C transforms/code/syntactic_concept_extractor DOCKER=docker test-image + else + echo "transforms/code/syntactic_concept_extractor/Makefile not found - testing disabled for this transform." + fi + - name: Print space + # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 + run: | + df -h + docker images + - name: Publish images + if: needs.check_if_push_image.outputs.publish_images == 'true' + run: | + if [ -e "transforms/code/syntactic_concept_extractor/Makefile" ]; then + make -C transforms/code/syntactic_concept_extractor publish + else + echo "transforms/code/syntactic_concept_extractor/Makefile not found - publishing disabled for this transform." + fi diff --git a/.make.versions b/.make.versions index 6c9bbc08de..73eec85426 100644 --- a/.make.versions +++ b/.make.versions @@ -109,6 +109,10 @@ HTML2PARQUET_PYTHON_VERSION=$(DPK_VERSION) DPK_TRANSFORMS_VERSION=$(DPK_VERSION) +SYNTACTIC_CONCEPT_EXTRACTOR_PYTHON_VERSION=$(DPK_VERSION) +SYNTACTIC_CONCEPT_EXTRACTOR_RAY_VERSION=$(DPK_VERSION) + + ################## ################## ################## ################## ################## ################## # Begin versions that the repo depends on. diff --git a/transforms/code/Makefile b/transforms/code/Makefile index 17afe2785a..b5d5c7bbe5 100644 --- a/transforms/code/Makefile +++ b/transforms/code/Makefile @@ -27,26 +27,10 @@ image:: @# Help: Recursively make $@ in all subdirs @$(MAKE) RULE=$@ .recurse -test-image:: - @# Help: Recursively make $@ in all subdirs - @$(MAKE) RULE=$@ .recurse - publish:: @# Help: Recursively make $@ in all subdirs @$(MAKE) RULE=$@ .recurse -kind-load-image:: - @# Help: Recursively make $@ in all subdirs - @$(MAKE) RULE=$@ .recurse - -docker-load-image:: - @# Help: Recursively make $@ in all subdirs - $(MAKE) RULE=$@ .recurse - -docker-save-image:: - @# Help: Recursively make $@ in all subdirs - $(MAKE) RULE=$@ .recurse - set-versions: @# Help: Recursively $@ in all subdirs @$(MAKE) RULE=$@ .recurse diff --git a/transforms/code/syntactic_concept_extractor/python/src/syntactic_concept_extractor_transform.py b/transforms/code/syntactic_concept_extractor/python/src/syntactic_concept_extractor_transform.py index 1d75473fc2..4b760fef27 100644 --- a/transforms/code/syntactic_concept_extractor/python/src/syntactic_concept_extractor_transform.py +++ b/transforms/code/syntactic_concept_extractor/python/src/syntactic_concept_extractor_transform.py @@ -10,36 +10,25 @@ # limitations under the License. ################################################################################ -import functools import os -import time from argparse import ArgumentParser, Namespace from typing import Any from data_processing.utils import get_logger -import numpy as np -import pandas as pd + import pyarrow as pa -import pyarrow.parquet as pq -import requests from data_processing.transform import AbstractTableTransform from tree_sitter import Language, Parser as TSParser -from tree_sitter_languages import get_language, get_parser +from tree_sitter_languages import get_language + -from collections import Counter -from UAST import UAST from UAST_parser import UASTParser -from concurrent.futures import ThreadPoolExecutor import json from data_processing.transform import AbstractBinaryTransform, TransformConfiguration from data_processing.utils import ( - GB, CLIArgumentProvider, - TransformUtils, - UnrecoverableException, get_logger, - str2bool, ) short_name = "SyntacticConceptExtractor" @@ -64,11 +53,16 @@ def __init__(self, config: dict[str, Any]): self.contents = self.config.get("contents") self.language = self.config.get("language") - # Compute the absolute path to the tree-sitter-bindings directory - script_dir = os.path.dirname(os.path.abspath(__file__)) - bindings_path = os.path.join(script_dir, '..', '..', 'input', 'tree-sitter-bindings') + # Get the project root from an environment variable (set by the Makefile) + repo_root = os.environ.get('REPOROOT', os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..', '..', '..'))) + + # Construct the path to the 'tree-sitter-bindings' directory + bindings_path = os.path.join(repo_root, 'transforms', 'code', 'syntactic_concept_extractor', 'input', 'tree-sitter-bindings') + + # Debugging: Print the computed path to ensure it's correct + print(f"Computed bindings path: {bindings_path}") - # Verify that the bindings_path exists + # Check if the directory exists if not os.path.exists(bindings_path): raise FileNotFoundError(f"Bindings path does not exist: {bindings_path}") diff --git a/transforms/code/syntactic_concept_extractor/python/test/test_syntactic_concept_extractor.py_python.py b/transforms/code/syntactic_concept_extractor/python/test/test_syntactic_concept_extractor.py_python.py deleted file mode 100644 index e56f09b65a..0000000000 --- a/transforms/code/syntactic_concept_extractor/python/test/test_syntactic_concept_extractor.py_python.py +++ /dev/null @@ -1,47 +0,0 @@ -# (C) Copyright IBM Corp. 2024. -# Licensed under the Apache License, Version 2.0 (the “License”); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an “AS IS” BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -################################################################################ - -import os - -from data_processing.runtime.pure_python import PythonTransformLauncher -from data_processing.test_support.launch.transform_test import ( - AbstractTransformLauncherTest, -) -from noop_transform import sleep_cli_param -from noop_transform_python import NOOPPythonTransformConfiguration - - -class TestPythonNOOPTransform(AbstractTransformLauncherTest): - """ - Extends the super-class to define the test data for the tests defined there. - The name of this class MUST begin with the word Test so that pytest recognizes it as a test class. - """ - - def get_test_transform_fixtures(self) -> list[tuple]: - src_file_dir = os.path.abspath(os.path.dirname(__file__)) - fixtures = [] - - launcher = PythonTransformLauncher(NOOPPythonTransformConfiguration()) - input_dir = os.path.join(src_file_dir, "../test-data/input") - expected_dir = os.path.join(src_file_dir, "../test-data/expected") - transform_config = {sleep_cli_param: 0} - fixtures.append( - ( - launcher, - transform_config, - input_dir, - expected_dir, - [], # optional list of column names to ignore in comparing test-generated with expected. - ) - ) - - return fixtures diff --git a/transforms/code/syntactic_concept_extractor/ray/Makefile b/transforms/code/syntactic_concept_extractor/ray/Makefile index 301b0c9af0..737da22eb6 100644 --- a/transforms/code/syntactic_concept_extractor/ray/Makefile +++ b/transforms/code/syntactic_concept_extractor/ray/Makefile @@ -23,8 +23,6 @@ test-src:: .transforms.test-src setup:: .transforms.setup -test-image:: .transforms.ray-test-image - build:: build-dist image publish: publish-image