From 47e92027e491ead4103c65129a9f392eae27d7e8 Mon Sep 17 00:00:00 2001 From: Anita Caron Date: Wed, 12 Jun 2024 10:17:07 +0200 Subject: [PATCH] Update ontology summary (#2329) * enable release diff in odk config * run release simple diff and post diff in PR * fix release artefact uri and diff filename * add edges, synonyms, xrefs and cl_terms reports to odk config * update custom reports to only cl * wip ontology content report * add cxg and hra numbers * revert default report sparql queries * create custom reports for CL only * add custom CL reports to odk config * script to generate content summary * add command to generate content summary as part of release * revert changes in edges, synonyms and xrefs reports * add instructions to add summary table in release notes * use cl-base to run diff * change file name and actions version * update to commit cl-base-diff and not full release diff * add report on diff between release and previous release The report is generated by the OAK diff command using the base releases for better comparison. The report shows new terms, new relationships, obsolete terms, changes on synonyms and definitions. The report is appended to the table with the ontology summary content and saved in the reports/summary_release.md file to be used as a release note. * update cl-release docs to reflect the new release process The release notes need to be updated and this commit explains how to fix a current issue in the OAK diff command when generating the report. * rewording explanation in readme Co-authored-by: Aleix Puig <94959119+aleixpuigb@users.noreply.github.com> * add missing dependency in prepare_content_summary goal The file is used in the rule, but it wasn't defined as a dependency, which could be used as an updated file. Co-authored-by: Nico Matentzoglu * update the sparql queries for the custom reports Filter out the obsolete classes and the obsolete CP namespace from the queries not to count them in the custom reports and so to the ontology content summary report generated for the releases. * use cl-base.obo to generate robot release base diff We need to download the cl-base.obo to generate the output for the OAK diff command, so we can use the same artefact to generate the robot diff instead of downloading another artefact. This also adds the two dependencies for the `release-base-diff` target to make sure the files are updated. * improve the documentation about CL release workflow Update the link to the documentation about how to update the imports because the previous one was linking to an non-existing page. Change to inline code syntax instead of code block the GitHub release link because it was breaking the list numbers, making it to reset the numbering. Finally undo the change on the number of the last three items on the list as mistakenly done on the previous commit. --------- Co-authored-by: Anita Caron Co-authored-by: Aleix Puig <94959119+aleixpuigb@users.noreply.github.com> Co-authored-by: Nico Matentzoglu --- .github/workflows/post-release-diff.yaml | 8 +- .gitignore | 1 + docs/cl-release.md | 8 +- src/ontology/Makefile | 6 +- src/ontology/cl-odk.yaml | 9 +- src/ontology/cl.Makefile | 22 ++- src/ontology/reports/edges.tsv | 2 +- src/ontology/reports/synonyms.tsv | 2 +- src/ontology/reports/xrefs.tsv | 2 +- src/scripts/content_summary.py | 198 +++++++++++++++++++++++ src/sparql/cl-def-xrefs.sparql | 16 ++ src/sparql/cl-edges.sparql | 21 +++ src/sparql/cl-synonyms.sparql | 29 ++++ src/sparql/cl-xrefs.sparql | 11 ++ 14 files changed, 313 insertions(+), 22 deletions(-) create mode 100644 src/scripts/content_summary.py create mode 100644 src/sparql/cl-def-xrefs.sparql create mode 100644 src/sparql/cl-edges.sparql create mode 100644 src/sparql/cl-synonyms.sparql create mode 100644 src/sparql/cl-xrefs.sparql diff --git a/.github/workflows/post-release-diff.yaml b/.github/workflows/post-release-diff.yaml index d7e299075..55ddddfdb 100644 --- a/.github/workflows/post-release-diff.yaml +++ b/.github/workflows/post-release-diff.yaml @@ -5,7 +5,7 @@ on: pull_request: branches: [ master ] paths: - - 'src/ontology/diffs/cl-diff.md' + - 'src/ontology/reports/cl-base-diff.md' # Allows you to run this workflow manually from the Actions tab workflow_dispatch: @@ -15,15 +15,15 @@ jobs: post_diff: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Prepare release comment env: GITHUB_SHA: ${{ github.sha }} - run: "echo \"[Here's a diff of how this release impacts cl.owl](https://github.com/obophenotype/cell-ontology/blob/${{ env.GITHUB_SHA }}/src/ontology/diffs/cl-diff.md)\" >comment.md" + run: "echo \"[Here's a diff of how this release impacts cl-base.owl](https://github.com/obophenotype/cell-ontology/blob/${{ env.GITHUB_SHA }}/src/ontology/reports/cl-base-diff.md)\" >comment.md" - name: Post reasoned comment env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - uses: NejcZdovc/comment-pr@v1.1.1 + uses: NejcZdovc/comment-pr@v2 with: github_token: ${{ env.GITHUB_TOKEN }} file: "../../comment.md" diff --git a/.gitignore b/.gitignore index c02a3ceda..6d1b94f52 100644 --- a/.gitignore +++ b/.gitignore @@ -46,6 +46,7 @@ src/patterns/pattern_owl_seed.txt src/ontology/ontologyterms.txt src/ontology/simple_seed.txt src/ontology/reports/* +!src/ontology/reports/cl-base-diff.md src/ontology/cl-hipc.owl site/ src/ontology/cl-check.obo diff --git a/docs/cl-release.md b/docs/cl-release.md index 3d61f3963..648cceb80 100644 --- a/docs/cl-release.md +++ b/docs/cl-release.md @@ -41,7 +41,7 @@ Preparation: 1. Make sure that all changes to master are committed to Github (`git status` should say that there are no modified files) 1. Locally make sure you have the latest changes from master (`git pull`) 1. Checkout a new branch (e.g. `git checkout -b release-2021-01-01`) -1. You may or may not want to refresh your imports as part of your release strategy (see [here](UpdateImports.md))(Note: in CL we decouple our imports and releases - we hence advice that you do not update imports) +1. You may or may not want to refresh your imports as part of your release strategy (see second section [here](Adding_classes_from_another_ontology.md))(Note: in CL we decouple our imports and releases - we hence advice that you do not update imports) 1. Make sure you have the latest ODK installed by running `docker pull obolibrary/odkfull` To actually run the release, you: @@ -54,12 +54,10 @@ To actually run the release, you: 1. Deploy release on GitHub by running `make deploy_release GHVERSION="v2022-06-20"` on the release branch (DO NOTE CHANGE TO MAIN BRANCH!), replacing the date with the date of release (NOTE: no `sh run.sh`) Editors note: ODK 1.3.2 will have a feature to run the release from inside the docker container. For now deploy_release has to be run outside. 1. This should end with a GitHub release link that looks something like: -``` -https://github.com/obophenotype/cl/releases/tag/untagged-8935f3432525b27a0d84 -``` +`https://github.com/obophenotype/cl/releases/tag/untagged-8935f3432525b27a0d84` Copy the link and paste it in your browser, this should show you a draft release. 1. Click the edit button (the pencil button on the top right corner) and change the tag to the GHVERSION you entered above (eg v2022-06-20) -1. Change the `TBD.` in the main text to a summary of the main changes in the release if needed. +1. Change the `TBD.` in the main text to a summary of the main changes in the release if needed. Copy and paste the text and table from the `reports/summary_release.md` file. This file is in `.gitignore` and will only be available to those who have run the release. The section `Classes added` needs to be manually amended due a [known issue](https://github.com/INCATools/ontology-access-kit/issues/732) in the OAK diff command. Remove the duplicated classes and update the number of new classes created. 1. Scroll down all the way and click the `update release` button. diff --git a/src/ontology/Makefile b/src/ontology/Makefile index 7ee8050f2..bf6ab5589 100644 --- a/src/ontology/Makefile +++ b/src/ontology/Makefile @@ -10,7 +10,7 @@ # More information: https://github.com/INCATools/ontology-development-kit/ # Fingerprint of the configuration file when this Makefile was last generated -CONFIG_HASH= 8b5b779b91f8bb931caf3512d6c7fcb325ef83bafc7ba409b86058a9dae7f67f +CONFIG_HASH= b786b0d7cbd09184896d55b42fe68a40981419e0c9d848963a74348b7bb955b7 # ---------------------------------------- @@ -45,7 +45,7 @@ REPORT_LABEL = REPORT_PROFILE_OPTS = OBO_FORMAT_OPTIONS = SPARQL_VALIDATION_CHECKS = equivalent-classes owldef-self-reference nolabels pmid-not-dbxref obsolete-replaced_by obsolete-alt-id orcid-contributor illegal-annotation-property label-synonym-polysemy illegal-date -SPARQL_EXPORTS = basic-report +SPARQL_EXPORTS = cl_terms cl-edges cl-synonyms cl-xrefs cl-def-xrefs ODK_VERSION_MAKEFILE = v1.5 TODAY ?= $(shell date +%Y-%m-%d) @@ -87,7 +87,7 @@ endif all: all_odk .PHONY: all_odk -all_odk: odkversion config_check test custom_reports all_assets +all_odk: odkversion config_check test custom_reports all_assets release_diff .PHONY: test test: odkversion dosdp_validation reason_test sparql_test robot_reports $(REPORTDIR)/validate_profile_owl2dl_$(ONT).owl.txt diff --git a/src/ontology/cl-odk.yaml b/src/ontology/cl-odk.yaml index 0fe944a9c..1b4e4df5f 100644 --- a/src/ontology/cl-odk.yaml +++ b/src/ontology/cl-odk.yaml @@ -8,6 +8,7 @@ report_fail_on: None use_dosdps: TRUE use_mappings: True use_edit_file_imports: FALSE +release_diff: TRUE export_formats: - owl - obo @@ -112,8 +113,12 @@ robot_report: - illegal-annotation-property - label-synonym-polysemy - illegal-date - custom_sparql_exports : - - basic-report + custom_sparql_exports: + - cl_terms + - cl-edges + - cl-synonyms + - cl-xrefs + - cl-def-xrefs components: products: - filename: hra_subset.owl diff --git a/src/ontology/cl.Makefile b/src/ontology/cl.Makefile index 5025db23c..52c5d1ada 100644 --- a/src/ontology/cl.Makefile +++ b/src/ontology/cl.Makefile @@ -337,13 +337,25 @@ DEPLOY_GH=true .PHONY: cl cl: - $(MAKE) prepare_release IMP=false PAT=false - $(MAKE) release-diff + $(MAKE) prepare_release IMP=false PAT=false MIR=false + $(MAKE) release-base-diff + $(MAKE) prepare_content_summary if [ $(DEPLOY_GH) = true ]; then $(MAKE) deploy_release GHVERSION="v$(TODAY)"; fi -.PHONY: release-diff -release-diff: - $(ROBOT) diff --labels True -f markdown --left-iri http://purl.obolibrary.org/obo/cl.owl --right ../../cl.owl --output diffs/$(ONT)-diff.md +CURRENT_BASE_RELEASE=$(ONTBASE)/cl-base.obo + +$(TMPDIR)/current-base-release.obo: + wget $(CURRENT_BASE_RELEASE) -O $@ + +.PHONY: release-base-diff +release-base-diff: $(TMPDIR)/current-base-release.obo $(RELEASEDIR)/cl-base.obo + $(ROBOT) diff --labels True -f markdown --left $(TMPDIR)/current-base-release.obo --right $(RELEASEDIR)/cl-base.obo --output reports/$(ONT)-base-diff.md + +.PHONY: prepare_content_summary +prepare_content_summary: $(RELEASEDIR)/cl-base.owl $(RELEASEDIR)/cl-base.obo $(TMPDIR)/current-base-release.obo custom_reports + python ./$(SCRIPTSDIR)/content_summary.py --ontology_iri $< --ont_namespace "CL" > $(REPORTDIR)/ontology_content.md + runoak -i simpleobo:$(TMPDIR)/current-base-release.obo diff -X simpleobo:$(RELEASEDIR)/cl-base.obo -o $(REPORTDIR)/diff_release_oak.md --output-type md + cat $(REPORTDIR)/ontology_content.md $(REPORTDIR)/diff_release_oak.md > $(REPORTDIR)/summary_release.md FILTER_OUT=../patterns/definitions.owl ../patterns/pattern.owl reports/cl-edit.owl-obo-report.tsv MAIN_FILES_RELEASE = $(foreach n, $(filter-out $(FILTER_OUT), $(RELEASE_ASSETS)), ../../$(n)) \ diff --git a/src/ontology/reports/edges.tsv b/src/ontology/reports/edges.tsv index 1add5ca13..832ac895b 100644 --- a/src/ontology/reports/edges.tsv +++ b/src/ontology/reports/edges.tsv @@ -2775,4 +2775,4 @@ - + \ No newline at end of file diff --git a/src/ontology/reports/synonyms.tsv b/src/ontology/reports/synonyms.tsv index 8473d8e56..992203168 100644 --- a/src/ontology/reports/synonyms.tsv +++ b/src/ontology/reports/synonyms.tsv @@ -4598,4 +4598,4 @@ "CD8-positive, CD25-positive Treg" "pale thymic epithelial cell" "R6 cell" - "goblet cell of epithelium of pyloric gland" + "goblet cell of epithelium of pyloric gland" \ No newline at end of file diff --git a/src/ontology/reports/xrefs.tsv b/src/ontology/reports/xrefs.tsv index 2ce2a598a..34497ac9a 100644 --- a/src/ontology/reports/xrefs.tsv +++ b/src/ontology/reports/xrefs.tsv @@ -1439,4 +1439,4 @@ "FMA:263061" "BTO:0003064" "FMA:263102" - "KUPO:0001086" + "KUPO:0001086" \ No newline at end of file diff --git a/src/scripts/content_summary.py b/src/scripts/content_summary.py new file mode 100644 index 000000000..b0486d3a8 --- /dev/null +++ b/src/scripts/content_summary.py @@ -0,0 +1,198 @@ +""" Script to summarize content in an ontology """ +import argparse +from datetime import datetime + +import pandas as pd +from rdflib import Graph + + +class OntologyContentReport: + """Generic class for summarizing content in an ontology""" + + def __init__(self, ontology_iri, ont_namespace): + """ + Initialize the OntologyContentReport object. + + Args: + ontology_iri (str): The IRI or filepath of the ontology to summarize. + ont_namespace (str): The namespace of the ontology. + """ + self.ontology_iri = ontology_iri + self.ont_namespace = ont_namespace + self.g = self._init_graph(ontology_iri) + self.date = datetime.now().strftime("%Y-%m-%d") + self.nb_subclass_root = None + self.nb_annotations = None + self.nb_synonyms = None + self.nb_references = None + self.nb_def_references = None + self.nb_relationships = None + self.nb_cxg = None + self.nb_hra = None + + def _init_graph(self, ontology_iri): + """ + Load the given ontology into a Graph object. + + Args: + ontology_iri (str): The IRI or filepath of the ontology. + + Returns: + rdflib.Graph: The loaded ontology graph. + """ + g = Graph() + g.parse(ontology_iri, format="xml") + return g + + def query(self, query): + """ + Execute a SPARQL query on the ontology graph. + + Args: + query (str): The SPARQL query to execute. + + Returns: + int: The count of query results. + """ + response = self.g.query(query) + return response.bindings[0]["count"] + + def get_content_summary(self): + """ + Query the ontology graph to get the content summary. + """ + self.nb_subclass_root = self.query(f""" + SELECT (COUNT (DISTINCT ?class) AS ?count) + WHERE {{ + ?ont rdf:type owl:Ontology . + ?ont ?root . + ?class rdfs:subClassOf* ?root . + FILTER (STRSTARTS(STR(?class), "http://purl.obolibrary.org/obo/{self.ont_namespace}_")) + }} + """) + + self.nb_annotations = self.query(f""" + SELECT (COUNT (?annotation) AS ?count) + WHERE {{ + ?annotation rdf:type owl:AnnotationProperty . + ?class rdf:type owl:Class . + ?class ?annotation ?value . + FILTER (STRSTARTS(STR(?class), "http://purl.obolibrary.org/obo/{self.ont_namespace}_")) + }} + """) + + self.nb_cxg = self.query(f""" + SELECT (COUNT (?cxg) AS ?count) + WHERE {{ + ?cxg rdf:type owl:Class . + ?cxg . + FILTER (STRSTARTS(STR(?cxg), "http://purl.obolibrary.org/obo/{self.ont_namespace}_")) + }} + """) + + self.nb_hra = self.query(f""" + SELECT (COUNT (?hra) AS ?count) + WHERE {{ + ?hra rdf:type owl:Class . + ?hra . + FILTER (STRSTARTS(STR(?hra), "http://purl.obolibrary.org/obo/{self.ont_namespace}_")) + }} + """) + + self.nb_synonyms = self.count_report( + self.load_report(f"{self.ont_namespace.lower()}-synonyms") + ) + + self.nb_relationships = self.count_report( + self.load_report(f"{self.ont_namespace.lower()}-edges") + ) + + self.nb_references = self.count_report(self.load_report( + f"{self.ont_namespace.lower()}-xrefs")["?xref"].unique() + ) + + self.nb_def_references = self.count_report( + self.load_report( + f"{self.ont_namespace.lower()}-def-xrefs" + )["?xref"].unique() + ) + + def load_report(self, report_type): + """ + Load a report from a file. + + Args: + report_type (str): The type of report to load. + + Returns: + pandas.DataFrame: The loaded report data. + """ + return pd.read_csv(f"reports/{report_type}.tsv", sep="\t") + + def count_report(self, data): + """ + Count the number of rows in a report. + + Args: + data (pandas.DataFrame): The report data. + + Returns: + int: The number of rows in the report. + """ + return len(data) + + def prepare_report(self): + """ + Prepare the content summary report for printing. + """ + print(f"# Release Notes {self.date}") + print("## Ontology content summary") + + summary_table = [ + { + "Metric": "Number of subclasses of root", + "Value": self.nb_subclass_root + }, + { + "Metric": f"Number of annotations on {self.ont_namespace} terms", + "Value": self.nb_annotations + }, + { + "Metric": "Number of synonyms", + "Value": self.nb_synonyms + }, + { + "Metric": "Number of unique references", + "Value": self.nb_references + }, + { + "Metric": "Number of unique references in definitions", + "Value": self.nb_def_references + }, + { + "Metric": f"Number of relationships with {self.ont_namespace} term as subject", + "Value": self.nb_relationships + }, + { + "Metric": "Number of cellxgene classes", + "Value": self.nb_cxg + }, + { + "Metric": "Number of HRA classes", + "Value": self.nb_hra + } + ] + + print(pd.DataFrame(summary_table).to_markdown(index=False)) + + +if __name__ == "__main__": + cli = argparse.ArgumentParser() + cli.add_argument("--ontology_iri", type=str, help="IRI or filepath of ontology to summarize") + cli.add_argument("--ont_namespace", type=str, help="Ontology namespace") + + args = cli.parse_args() + + report = OntologyContentReport(args.ontology_iri, args.ont_namespace) + report.get_content_summary() + report.prepare_report() diff --git a/src/sparql/cl-def-xrefs.sparql b/src/sparql/cl-def-xrefs.sparql new file mode 100644 index 000000000..daf8adb2c --- /dev/null +++ b/src/sparql/cl-def-xrefs.sparql @@ -0,0 +1,16 @@ +prefix oio: +prefix owl: +prefix definition: +prefix xsd: + +SELECT ?cls ?xref WHERE +{ + ?cls definition: ?def . + ?ax a owl:Axiom; + owl:annotatedSource ?cls; + owl:annotatedProperty definition:; + owl:annotatedTarget ?def; + oio:hasDbXref ?xref . + FILTER NOT EXISTS { ?cls owl:deprecated "true"^^xsd:boolean . } + FILTER(isIRI(?cls) && STRSTARTS(str(?cls), "http://purl.obolibrary.org/obo/CL_") || STRSTARTS(str(?cls), "http://purl.obolibrary.org/obo/cl#")) +} diff --git a/src/sparql/cl-edges.sparql b/src/sparql/cl-edges.sparql new file mode 100644 index 000000000..b2daa35db --- /dev/null +++ b/src/sparql/cl-edges.sparql @@ -0,0 +1,21 @@ +prefix owl: +prefix rdfs: +prefix rdf: +prefix xsd: + +SELECT ?x ?p ?y +WHERE { + {?x rdfs:subClassOf [ + a owl:Restriction ; + owl:onProperty ?p ; + owl:someValuesFrom ?y ] + } + UNION { + ?x rdfs:subClassOf ?y . + BIND(rdfs:subClassOf AS ?p) + } + ?x a owl:Class . + ?y a owl:Class . + FILTER NOT EXISTS { ?x owl:deprecated "true"^^xsd:boolean . } + FILTER(isIRI(?x) && STRSTARTS(str(?x), "http://purl.obolibrary.org/obo/CL_") || STRSTARTS(str(?x), "http://purl.obolibrary.org/obo/cl#")) +} diff --git a/src/sparql/cl-synonyms.sparql b/src/sparql/cl-synonyms.sparql new file mode 100644 index 000000000..1c0eceaa2 --- /dev/null +++ b/src/sparql/cl-synonyms.sparql @@ -0,0 +1,29 @@ +prefix owl: +prefix oboInOwl: +prefix rdfs: +prefix xsd: + +SELECT ?cls ?pred ?val ?synType +WHERE + { ?cls ?pred ?val ; + a owl:Class . + FILTER ( + ?pred = rdfs:label || + ?pred = oboInOwl:hasRelatedSynonym || + ?pred = oboInOwl:hasNarrowSynonym || + ?pred = oboInOwl:hasBroadSynonym || + ?pred = oboInOwl:hasExactSynonym + ) + + OPTIONAL { + [ + a owl:Axiom ; + owl:annotatedSource ?cls ; + owl:annotatedProperty ?pred ; + owl:annotatedTarget ?val ; + oboInOwl:hasSynonymType ?synType + ] + } + FILTER NOT EXISTS { ?cls owl:deprecated "true"^^xsd:boolean . } + FILTER(isIRI(?cls) && STRSTARTS(str(?cls), "http://purl.obolibrary.org/obo/CL_") || STRSTARTS(str(?cls), "http://purl.obolibrary.org/obo/cl#")) + } diff --git a/src/sparql/cl-xrefs.sparql b/src/sparql/cl-xrefs.sparql new file mode 100644 index 000000000..211d43a56 --- /dev/null +++ b/src/sparql/cl-xrefs.sparql @@ -0,0 +1,11 @@ +prefix oio: +prefix owl: +prefix xsd: + +SELECT ?cls ?xref WHERE +{ + ?cls a owl:Class ; + oio:hasDbXref ?xref . + FILTER NOT EXISTS { ?cls owl:deprecated "true"^^xsd:boolean . } + FILTER(isIRI(?cls) && STRSTARTS(str(?cls), "http://purl.obolibrary.org/obo/CL_") || STRSTARTS(str(?cls), "http://purl.obolibrary.org/obo/cl#")) +}