fixed parsing errors

softwaresaved · Feb 16, 2024 · a269d1d · a269d1d
1 parent e3f36a9
commit a269d1d
Show file tree

Hide file tree

Showing 5 changed files with 94 additions and 30 deletions.
diff --git a/fuji_server/data/software_file.json b/fuji_server/data/software_file.json
@@ -23,7 +23,7 @@
     ],
     "parse": "file_name",
     "pattern": [
-      "(\\w*/)*docs/"
+      "(\\w*/)*docs(/\\w*\\.\\w*)*"
     ]
   },
   "github_actions": {

diff --git a/fuji_server/evaluators/fair_evaluator_license_file.py b/fuji_server/evaluators/fair_evaluator_license_file.py
@@ -368,30 +368,28 @@ def testBuildScriptChecksLicenseHeader(self):
             if "maven_pom" in required_build_scripts:  # check Maven POM for plugin
                 mvn_pom = self.fuji.github_data.get("maven_pom")
                 if mvn_pom is not None:
+                    content = mvn_pom[0]["content"]
                     # Check whether pom.xml uses license:check-file-header to validate license headers.
                     # See https://www.mojohaus.org/license-maven-plugin/check-file-header-mojo.html for more info.
-                    root = ET.fromstring(mvn_pom)
+                    root = ET.fromstring(content)
                     namespaces = root.nsmap
                     # look for plugin with artifactID license-maven-plugin
                     found_license_plugin = False
                     for plugin in root.iterfind(".//plugin", namespaces):
                         artifact_id = plugin.find("artifactId", namespaces)
                         if artifact_id is not None and artifact_id.text == "license-maven-plugin":
                             found_license_plugin = True
-                            fail_on_missing_header = plugin.find("configuration/failOnMissingHeader", namespaces)
-                            if fail_on_missing_header is not None and fail_on_missing_header.text == "true":
-                                test_status = True
-                                self.logger.log(
-                                    self.fuji.LOG_SUCCESS,
-                                    f"{self.metric_identifier} : Maven POM checks for license headers in source files.",
-                                )
-                                self.maturity = self.getTestConfigMaturity(test_id)
-                                self.setEvaluationCriteriumScore(test_id, test_score, "pass")
-                                self.score.earned += test_score
-                            else:
-                                self.logger.warning(
-                                    f"{self.metric_identifier} : Maven POM uses license-maven-plugin (license:check-file-header) but does not fail on missing header."
-                                )
+                            # too strict
+                            # fail_on_missing_header = plugin.find("configuration/failOnMissingHeader", namespaces)
+                            # if fail_on_missing_header is not None and fail_on_missing_header.text == "true":
+                            test_status = True
+                            self.logger.log(
+                                self.fuji.LOG_SUCCESS,
+                                f"{self.metric_identifier} : Maven POM checks for license headers in source files.",
+                            )
+                            self.maturity = self.getTestConfigMaturity(test_id)
+                            self.setEvaluationCriteriumScore(test_id, test_score, "pass")
+                            self.score.earned += test_score
                             break
                     if not found_license_plugin:
                         self.logger.warning(

diff --git a/fuji_server/evaluators/fair_evaluator_requirements.py b/fuji_server/evaluators/fair_evaluator_requirements.py
@@ -25,7 +25,7 @@ def __init__(self, fuji_instance):
 
         # Create map from metric test names to class functions. This is necessary as functions may be reused for different metrics relating to licenses.
         self.metric_test_map = {  # overall map
-            "testBuildInstructions": ["FRSM-13-R1-1"],
+            "testInstructions": ["FRSM-13-R1-1"],
             "testDependencies": ["FRSM-13-R1-2"],
             "testDependenciesBuildAutomatedChecks": ["FRSM-13-R1-CESSDA-1"],
             "testBadgeIncluded": ["FRSM-13-R1-CESSDA-2"],
@@ -83,6 +83,8 @@ def scanForKeywords(self, keywords, locations):
             for k in keys_to_check:
                 content = self.fuji.github_data.get(location)
                 if content is not None:
+                    if type(content) == bytes:
+                        content = content.decode("utf-8")
                     if type(content) == str:
                         if k in content.lower():
                             hit_dict[k] = True  # found keyword in location
@@ -91,13 +93,13 @@ def scanForKeywords(self, keywords, locations):
                         hit_dict[k] = self.nestedDataContainsKeyword(content, k)
         return hit_dict
 
-    def testBuildInstructions(self):
+    def testInstructions(self):
         """The software has build, installation and/or execution instructions.
 
         Returns:
             bool: True if the test was defined and passed. False otherwise.
         """
-        agnostic_test_name = "testBuildInstructions"
+        agnostic_test_name = "testInstructions"
         test_status = False
         test_defined = False
         for test_id in self.metric_test_map[agnostic_test_name]:
@@ -149,7 +151,6 @@ def testDependencies(self):
                 test_defined = True
                 break
         if test_defined:
-            self.logger.warning(f"{self.metric_identifier} : Test for dependencies is not implemented.")
             test_score = self.getTestConfigScore(test_id)
             # Check for presence of machine-readable dependency files
             dependency_requirements = self.metric_tests[test_id].metric_test_requirements[0]
@@ -163,7 +164,7 @@ def testDependencies(self):
             automation_requirements = self.metric_tests[test_id].metric_test_requirements[1]
             required_automation_locations = automation_requirements["required"]["automation_file"]
             required_automation_keywords = automation_requirements["required"]["automation_keywords"]
-            self.logger.warning(
+            self.logger.info(
                 f"{self.metric_identifier} : Looking for {automation_requirements['modality']} keywords {required_automation_keywords} in {required_automation_locations}."
             )
             automation_hit_dict = self.scanForKeywords(required_automation_keywords, required_automation_locations)
@@ -183,9 +184,12 @@ def testDependencies(self):
                 self.setEvaluationCriteriumScore(test_id, test_score, "pass")
                 self.score.earned += test_score
             else:  # fail
-                self.logger.warning(
-                    f"{self.metric_identifier} : Did not find {automation_requirements['modality']} keywords {required_automation_keywords} in {required_automation_locations}."
-                )
+                if not dependency_present:
+                    self.logger.warning(f"{self.metric_identifier} : Did not find any of {required_dependency_files}.")
+                if not found_automation:
+                    self.logger.warning(
+                        f"{self.metric_identifier} : Did not find {automation_requirements['modality']} keywords {required_automation_keywords} in {required_automation_locations}."
+                    )
         return test_status
 
     def testDependenciesBuildAutomatedChecks(self):
@@ -207,12 +211,57 @@ def testDependenciesBuildAutomatedChecks(self):
                 f"{self.metric_identifier} : Test for dependency information, build instructions and automated checks is not implemented."
             )
             test_score = self.getTestConfigScore(test_id)
-            test_requirements = self.metric_tests[test_id].metric_test_requirements[0]
+            instructions_requirements = self.metric_tests[test_id].metric_test_requirements[0]
+            required_instructions_locations = instructions_requirements["required"]["location"]
+            required_instructions_keywords = instructions_requirements["required"]["keywords"]
+            automation_requirements = self.metric_tests[test_id].metric_test_requirements[1]
+            required_automation_locations = automation_requirements["required"]["automation_file"]
+            required_automation_keywords = automation_requirements["required"]["automation_keywords"]
+            self.logger.info(
+                f"{self.metric_identifier} : Looking for {instructions_requirements['modality']} keywords {required_instructions_keywords} in {required_instructions_locations}."
+            )
             # dependency info and build instruction in README
-            first_half = self.scanForKeywords(["dependency", "dependencies", "build"], ["README"])
+            instructions_hit_dict = self.scanForKeywords(
+                required_instructions_keywords, required_instructions_locations
+            )
+            found_instructions = False
+            if instructions_requirements["modality"] == "all":
+                found_instructions = all(instructions_hit_dict.values())
+            elif instructions_requirements["modality"] == "any":
+                found_instructions = any(instructions_hit_dict.values())
+            else:
+                self.logger.warning(
+                    f"{self.metric_identifier} : Unknown modality {instructions_requirements['modality']} in test requirements. Choose 'all' or 'any'."
+                )
             # linting and other relevant checks present in automated build and test process
-            # TODO
-            print((test_score, test_requirements, first_half))  # fix linting error for now
+            self.logger.info(
+                f"{self.metric_identifier} : Looking for {automation_requirements['modality']} keywords {required_automation_keywords} in {required_automation_locations}."
+            )
+            automation_hit_dict = self.scanForKeywords(required_automation_keywords, required_automation_locations)
+            found_automation = False
+            if automation_requirements["modality"] == "all":
+                found_automation = all(automation_hit_dict.values())
+            elif automation_requirements["modality"] == "any":
+                found_automation = any(automation_hit_dict.values())
+            else:
+                self.logger.warning(
+                    f"{self.metric_identifier} : Unknown modality {automation_requirements['modality']} in test requirements. Choose 'all' or 'any'."
+                )
+            if found_instructions and found_automation:  # pass
+                test_status = True
+                self.logger.log(self.fuji.LOG_SUCCESS, f"{self.metric_identifier} : Found required keywords.")
+                self.maturity = self.getTestConfigMaturity(test_id)
+                self.setEvaluationCriteriumScore(test_id, test_score, "pass")
+                self.score.earned += test_score
+            else:  # fail
+                if not found_instructions:
+                    self.logger.warning(
+                        f"{self.metric_identifier} : Did not find {instructions_requirements['modality']} keywords {required_instructions_keywords} in {required_instructions_locations}."
+                    )
+                if not found_automation:
+                    self.logger.warning(
+                        f"{self.metric_identifier} : Did not find {automation_requirements['modality']} keywords {required_automation_keywords} in {required_automation_locations}."
+                    )
         return test_status
 
     def testBadgeIncluded(self):
@@ -257,7 +306,7 @@ def evaluate(self):
             )
             self.output = RequirementsOutput()
             self.result.test_status = "fail"
-            if self.testBuildInstructions():
+            if self.testInstructions():
                 self.result.test_status = "pass"
             if self.testDependencies():
                 self.result.test_status = "pass"

diff --git a/fuji_server/harvester/github_harvester.py b/fuji_server/harvester/github_harvester.py
@@ -109,7 +109,7 @@ def retrieve_all(self, repo):
                                 file_entry = {
                                     "name": content_file.name,
                                     "path": content_file.path,
-                                    "content": content_file.decoded_content.decode("utf-8"),
+                                    "content": content_file.decoded_content,
                                 }
                             elif self.files_map[k]["parse"] == "file_name":
                                 file_entry = {"name": content_file.name, "path": content_file.path}

diff --git a/fuji_server/yaml/metrics_v0.7_software_cessda.yaml b/fuji_server/yaml/metrics_v0.7_software_cessda.yaml
@@ -307,6 +307,23 @@ metrics:
     metric_test_name: Dependency information and build instructions are included in the README file. Linting and other relevant checks are present in the automated build and test process (e.g. via the Jenkinsfile).
     metric_test_score: 1
     metric_test_maturity: 1
+    metric_test_requirements:
+    - target: https://f-uji.net/vocab/metadata/standards
+      modality: all
+      required:
+        location:
+        - README
+        keywords:
+        - build
+        - dependencies
+    - target: https://f-uji.net/vocab/metadata/standards
+      modality: all
+      required:
+        automation_file:
+        - Jenkinsfile
+        - github_actions
+        automation_keywords:
+        - lint
   - metric_test_identifier: FRSM-13-R1-CESSDA-2
     metric_test_name: The README file includes a badge that links to the automated build tool (Jenkins). Deployment to development and staging environments is automated (conditional on test results).
     metric_test_score: 1