diff --git a/.github/workflows/post-release-diff.yaml b/.github/workflows/post-release-diff.yaml index d7e299075..55ddddfdb 100644 --- a/.github/workflows/post-release-diff.yaml +++ b/.github/workflows/post-release-diff.yaml @@ -5,7 +5,7 @@ on: pull_request: branches: [ master ] paths: - - 'src/ontology/diffs/cl-diff.md' + - 'src/ontology/reports/cl-base-diff.md' # Allows you to run this workflow manually from the Actions tab workflow_dispatch: @@ -15,15 +15,15 @@ jobs: post_diff: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Prepare release comment env: GITHUB_SHA: ${{ github.sha }} - run: "echo \"[Here's a diff of how this release impacts cl.owl](https://github.com/obophenotype/cell-ontology/blob/${{ env.GITHUB_SHA }}/src/ontology/diffs/cl-diff.md)\" >comment.md" + run: "echo \"[Here's a diff of how this release impacts cl-base.owl](https://github.com/obophenotype/cell-ontology/blob/${{ env.GITHUB_SHA }}/src/ontology/reports/cl-base-diff.md)\" >comment.md" - name: Post reasoned comment env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - uses: NejcZdovc/comment-pr@v1.1.1 + uses: NejcZdovc/comment-pr@v2 with: github_token: ${{ env.GITHUB_TOKEN }} file: "../../comment.md" diff --git a/.gitignore b/.gitignore index c02a3ceda..6d1b94f52 100644 --- a/.gitignore +++ b/.gitignore @@ -46,6 +46,7 @@ src/patterns/pattern_owl_seed.txt src/ontology/ontologyterms.txt src/ontology/simple_seed.txt src/ontology/reports/* +!src/ontology/reports/cl-base-diff.md src/ontology/cl-hipc.owl site/ src/ontology/cl-check.obo diff --git a/docs/cl-release.md b/docs/cl-release.md index 3d61f3963..648cceb80 100644 --- a/docs/cl-release.md +++ b/docs/cl-release.md @@ -41,7 +41,7 @@ Preparation: 1. Make sure that all changes to master are committed to Github (`git status` should say that there are no modified files) 1. Locally make sure you have the latest changes from master (`git pull`) 1. Checkout a new branch (e.g. `git checkout -b release-2021-01-01`) -1. You may or may not want to refresh your imports as part of your release strategy (see [here](UpdateImports.md))(Note: in CL we decouple our imports and releases - we hence advice that you do not update imports) +1. You may or may not want to refresh your imports as part of your release strategy (see second section [here](Adding_classes_from_another_ontology.md))(Note: in CL we decouple our imports and releases - we hence advice that you do not update imports) 1. Make sure you have the latest ODK installed by running `docker pull obolibrary/odkfull` To actually run the release, you: @@ -54,12 +54,10 @@ To actually run the release, you: 1. Deploy release on GitHub by running `make deploy_release GHVERSION="v2022-06-20"` on the release branch (DO NOTE CHANGE TO MAIN BRANCH!), replacing the date with the date of release (NOTE: no `sh run.sh`) Editors note: ODK 1.3.2 will have a feature to run the release from inside the docker container. For now deploy_release has to be run outside. 1. This should end with a GitHub release link that looks something like: -``` -https://github.com/obophenotype/cl/releases/tag/untagged-8935f3432525b27a0d84 -``` +`https://github.com/obophenotype/cl/releases/tag/untagged-8935f3432525b27a0d84` Copy the link and paste it in your browser, this should show you a draft release. 1. Click the edit button (the pencil button on the top right corner) and change the tag to the GHVERSION you entered above (eg v2022-06-20) -1. Change the `TBD.` in the main text to a summary of the main changes in the release if needed. +1. Change the `TBD.` in the main text to a summary of the main changes in the release if needed. Copy and paste the text and table from the `reports/summary_release.md` file. This file is in `.gitignore` and will only be available to those who have run the release. The section `Classes added` needs to be manually amended due a [known issue](https://github.com/INCATools/ontology-access-kit/issues/732) in the OAK diff command. Remove the duplicated classes and update the number of new classes created. 1. Scroll down all the way and click the `update release` button. diff --git a/src/ontology/Makefile b/src/ontology/Makefile index 7ee8050f2..bf6ab5589 100644 --- a/src/ontology/Makefile +++ b/src/ontology/Makefile @@ -10,7 +10,7 @@ # More information: https://github.com/INCATools/ontology-development-kit/ # Fingerprint of the configuration file when this Makefile was last generated -CONFIG_HASH= 8b5b779b91f8bb931caf3512d6c7fcb325ef83bafc7ba409b86058a9dae7f67f +CONFIG_HASH= b786b0d7cbd09184896d55b42fe68a40981419e0c9d848963a74348b7bb955b7 # ---------------------------------------- @@ -45,7 +45,7 @@ REPORT_LABEL = REPORT_PROFILE_OPTS = OBO_FORMAT_OPTIONS = SPARQL_VALIDATION_CHECKS = equivalent-classes owldef-self-reference nolabels pmid-not-dbxref obsolete-replaced_by obsolete-alt-id orcid-contributor illegal-annotation-property label-synonym-polysemy illegal-date -SPARQL_EXPORTS = basic-report +SPARQL_EXPORTS = cl_terms cl-edges cl-synonyms cl-xrefs cl-def-xrefs ODK_VERSION_MAKEFILE = v1.5 TODAY ?= $(shell date +%Y-%m-%d) @@ -87,7 +87,7 @@ endif all: all_odk .PHONY: all_odk -all_odk: odkversion config_check test custom_reports all_assets +all_odk: odkversion config_check test custom_reports all_assets release_diff .PHONY: test test: odkversion dosdp_validation reason_test sparql_test robot_reports $(REPORTDIR)/validate_profile_owl2dl_$(ONT).owl.txt diff --git a/src/ontology/cl-odk.yaml b/src/ontology/cl-odk.yaml index 0fe944a9c..1b4e4df5f 100644 --- a/src/ontology/cl-odk.yaml +++ b/src/ontology/cl-odk.yaml @@ -8,6 +8,7 @@ report_fail_on: None use_dosdps: TRUE use_mappings: True use_edit_file_imports: FALSE +release_diff: TRUE export_formats: - owl - obo @@ -112,8 +113,12 @@ robot_report: - illegal-annotation-property - label-synonym-polysemy - illegal-date - custom_sparql_exports : - - basic-report + custom_sparql_exports: + - cl_terms + - cl-edges + - cl-synonyms + - cl-xrefs + - cl-def-xrefs components: products: - filename: hra_subset.owl diff --git a/src/ontology/cl.Makefile b/src/ontology/cl.Makefile index 5025db23c..52c5d1ada 100644 --- a/src/ontology/cl.Makefile +++ b/src/ontology/cl.Makefile @@ -337,13 +337,25 @@ DEPLOY_GH=true .PHONY: cl cl: - $(MAKE) prepare_release IMP=false PAT=false - $(MAKE) release-diff + $(MAKE) prepare_release IMP=false PAT=false MIR=false + $(MAKE) release-base-diff + $(MAKE) prepare_content_summary if [ $(DEPLOY_GH) = true ]; then $(MAKE) deploy_release GHVERSION="v$(TODAY)"; fi -.PHONY: release-diff -release-diff: - $(ROBOT) diff --labels True -f markdown --left-iri http://purl.obolibrary.org/obo/cl.owl --right ../../cl.owl --output diffs/$(ONT)-diff.md +CURRENT_BASE_RELEASE=$(ONTBASE)/cl-base.obo + +$(TMPDIR)/current-base-release.obo: + wget $(CURRENT_BASE_RELEASE) -O $@ + +.PHONY: release-base-diff +release-base-diff: $(TMPDIR)/current-base-release.obo $(RELEASEDIR)/cl-base.obo + $(ROBOT) diff --labels True -f markdown --left $(TMPDIR)/current-base-release.obo --right $(RELEASEDIR)/cl-base.obo --output reports/$(ONT)-base-diff.md + +.PHONY: prepare_content_summary +prepare_content_summary: $(RELEASEDIR)/cl-base.owl $(RELEASEDIR)/cl-base.obo $(TMPDIR)/current-base-release.obo custom_reports + python ./$(SCRIPTSDIR)/content_summary.py --ontology_iri $< --ont_namespace "CL" > $(REPORTDIR)/ontology_content.md + runoak -i simpleobo:$(TMPDIR)/current-base-release.obo diff -X simpleobo:$(RELEASEDIR)/cl-base.obo -o $(REPORTDIR)/diff_release_oak.md --output-type md + cat $(REPORTDIR)/ontology_content.md $(REPORTDIR)/diff_release_oak.md > $(REPORTDIR)/summary_release.md FILTER_OUT=../patterns/definitions.owl ../patterns/pattern.owl reports/cl-edit.owl-obo-report.tsv MAIN_FILES_RELEASE = $(foreach n, $(filter-out $(FILTER_OUT), $(RELEASE_ASSETS)), ../../$(n)) \ diff --git a/src/ontology/reports/edges.tsv b/src/ontology/reports/edges.tsv index 1add5ca13..832ac895b 100644 --- a/src/ontology/reports/edges.tsv +++ b/src/ontology/reports/edges.tsv @@ -2775,4 +2775,4 @@ - + \ No newline at end of file diff --git a/src/ontology/reports/synonyms.tsv b/src/ontology/reports/synonyms.tsv index 8473d8e56..992203168 100644 --- a/src/ontology/reports/synonyms.tsv +++ b/src/ontology/reports/synonyms.tsv @@ -4598,4 +4598,4 @@ "CD8-positive, CD25-positive Treg" "pale thymic epithelial cell" "R6 cell" - "goblet cell of epithelium of pyloric gland" + "goblet cell of epithelium of pyloric gland" \ No newline at end of file diff --git a/src/ontology/reports/xrefs.tsv b/src/ontology/reports/xrefs.tsv index 2ce2a598a..34497ac9a 100644 --- a/src/ontology/reports/xrefs.tsv +++ b/src/ontology/reports/xrefs.tsv @@ -1439,4 +1439,4 @@ "FMA:263061" "BTO:0003064" "FMA:263102" - "KUPO:0001086" + "KUPO:0001086" \ No newline at end of file diff --git a/src/scripts/content_summary.py b/src/scripts/content_summary.py new file mode 100644 index 000000000..b0486d3a8 --- /dev/null +++ b/src/scripts/content_summary.py @@ -0,0 +1,198 @@ +""" Script to summarize content in an ontology """ +import argparse +from datetime import datetime + +import pandas as pd +from rdflib import Graph + + +class OntologyContentReport: + """Generic class for summarizing content in an ontology""" + + def __init__(self, ontology_iri, ont_namespace): + """ + Initialize the OntologyContentReport object. + + Args: + ontology_iri (str): The IRI or filepath of the ontology to summarize. + ont_namespace (str): The namespace of the ontology. + """ + self.ontology_iri = ontology_iri + self.ont_namespace = ont_namespace + self.g = self._init_graph(ontology_iri) + self.date = datetime.now().strftime("%Y-%m-%d") + self.nb_subclass_root = None + self.nb_annotations = None + self.nb_synonyms = None + self.nb_references = None + self.nb_def_references = None + self.nb_relationships = None + self.nb_cxg = None + self.nb_hra = None + + def _init_graph(self, ontology_iri): + """ + Load the given ontology into a Graph object. + + Args: + ontology_iri (str): The IRI or filepath of the ontology. + + Returns: + rdflib.Graph: The loaded ontology graph. + """ + g = Graph() + g.parse(ontology_iri, format="xml") + return g + + def query(self, query): + """ + Execute a SPARQL query on the ontology graph. + + Args: + query (str): The SPARQL query to execute. + + Returns: + int: The count of query results. + """ + response = self.g.query(query) + return response.bindings[0]["count"] + + def get_content_summary(self): + """ + Query the ontology graph to get the content summary. + """ + self.nb_subclass_root = self.query(f""" + SELECT (COUNT (DISTINCT ?class) AS ?count) + WHERE {{ + ?ont rdf:type owl:Ontology . + ?ont ?root . + ?class rdfs:subClassOf* ?root . + FILTER (STRSTARTS(STR(?class), "http://purl.obolibrary.org/obo/{self.ont_namespace}_")) + }} + """) + + self.nb_annotations = self.query(f""" + SELECT (COUNT (?annotation) AS ?count) + WHERE {{ + ?annotation rdf:type owl:AnnotationProperty . + ?class rdf:type owl:Class . + ?class ?annotation ?value . + FILTER (STRSTARTS(STR(?class), "http://purl.obolibrary.org/obo/{self.ont_namespace}_")) + }} + """) + + self.nb_cxg = self.query(f""" + SELECT (COUNT (?cxg) AS ?count) + WHERE {{ + ?cxg rdf:type owl:Class . + ?cxg . + FILTER (STRSTARTS(STR(?cxg), "http://purl.obolibrary.org/obo/{self.ont_namespace}_")) + }} + """) + + self.nb_hra = self.query(f""" + SELECT (COUNT (?hra) AS ?count) + WHERE {{ + ?hra rdf:type owl:Class . + ?hra . + FILTER (STRSTARTS(STR(?hra), "http://purl.obolibrary.org/obo/{self.ont_namespace}_")) + }} + """) + + self.nb_synonyms = self.count_report( + self.load_report(f"{self.ont_namespace.lower()}-synonyms") + ) + + self.nb_relationships = self.count_report( + self.load_report(f"{self.ont_namespace.lower()}-edges") + ) + + self.nb_references = self.count_report(self.load_report( + f"{self.ont_namespace.lower()}-xrefs")["?xref"].unique() + ) + + self.nb_def_references = self.count_report( + self.load_report( + f"{self.ont_namespace.lower()}-def-xrefs" + )["?xref"].unique() + ) + + def load_report(self, report_type): + """ + Load a report from a file. + + Args: + report_type (str): The type of report to load. + + Returns: + pandas.DataFrame: The loaded report data. + """ + return pd.read_csv(f"reports/{report_type}.tsv", sep="\t") + + def count_report(self, data): + """ + Count the number of rows in a report. + + Args: + data (pandas.DataFrame): The report data. + + Returns: + int: The number of rows in the report. + """ + return len(data) + + def prepare_report(self): + """ + Prepare the content summary report for printing. + """ + print(f"# Release Notes {self.date}") + print("## Ontology content summary") + + summary_table = [ + { + "Metric": "Number of subclasses of root", + "Value": self.nb_subclass_root + }, + { + "Metric": f"Number of annotations on {self.ont_namespace} terms", + "Value": self.nb_annotations + }, + { + "Metric": "Number of synonyms", + "Value": self.nb_synonyms + }, + { + "Metric": "Number of unique references", + "Value": self.nb_references + }, + { + "Metric": "Number of unique references in definitions", + "Value": self.nb_def_references + }, + { + "Metric": f"Number of relationships with {self.ont_namespace} term as subject", + "Value": self.nb_relationships + }, + { + "Metric": "Number of cellxgene classes", + "Value": self.nb_cxg + }, + { + "Metric": "Number of HRA classes", + "Value": self.nb_hra + } + ] + + print(pd.DataFrame(summary_table).to_markdown(index=False)) + + +if __name__ == "__main__": + cli = argparse.ArgumentParser() + cli.add_argument("--ontology_iri", type=str, help="IRI or filepath of ontology to summarize") + cli.add_argument("--ont_namespace", type=str, help="Ontology namespace") + + args = cli.parse_args() + + report = OntologyContentReport(args.ontology_iri, args.ont_namespace) + report.get_content_summary() + report.prepare_report() diff --git a/src/sparql/cl-def-xrefs.sparql b/src/sparql/cl-def-xrefs.sparql new file mode 100644 index 000000000..daf8adb2c --- /dev/null +++ b/src/sparql/cl-def-xrefs.sparql @@ -0,0 +1,16 @@ +prefix oio: +prefix owl: +prefix definition: +prefix xsd: + +SELECT ?cls ?xref WHERE +{ + ?cls definition: ?def . + ?ax a owl:Axiom; + owl:annotatedSource ?cls; + owl:annotatedProperty definition:; + owl:annotatedTarget ?def; + oio:hasDbXref ?xref . + FILTER NOT EXISTS { ?cls owl:deprecated "true"^^xsd:boolean . } + FILTER(isIRI(?cls) && STRSTARTS(str(?cls), "http://purl.obolibrary.org/obo/CL_") || STRSTARTS(str(?cls), "http://purl.obolibrary.org/obo/cl#")) +} diff --git a/src/sparql/cl-edges.sparql b/src/sparql/cl-edges.sparql new file mode 100644 index 000000000..b2daa35db --- /dev/null +++ b/src/sparql/cl-edges.sparql @@ -0,0 +1,21 @@ +prefix owl: +prefix rdfs: +prefix rdf: +prefix xsd: + +SELECT ?x ?p ?y +WHERE { + {?x rdfs:subClassOf [ + a owl:Restriction ; + owl:onProperty ?p ; + owl:someValuesFrom ?y ] + } + UNION { + ?x rdfs:subClassOf ?y . + BIND(rdfs:subClassOf AS ?p) + } + ?x a owl:Class . + ?y a owl:Class . + FILTER NOT EXISTS { ?x owl:deprecated "true"^^xsd:boolean . } + FILTER(isIRI(?x) && STRSTARTS(str(?x), "http://purl.obolibrary.org/obo/CL_") || STRSTARTS(str(?x), "http://purl.obolibrary.org/obo/cl#")) +} diff --git a/src/sparql/cl-synonyms.sparql b/src/sparql/cl-synonyms.sparql new file mode 100644 index 000000000..1c0eceaa2 --- /dev/null +++ b/src/sparql/cl-synonyms.sparql @@ -0,0 +1,29 @@ +prefix owl: +prefix oboInOwl: +prefix rdfs: +prefix xsd: + +SELECT ?cls ?pred ?val ?synType +WHERE + { ?cls ?pred ?val ; + a owl:Class . + FILTER ( + ?pred = rdfs:label || + ?pred = oboInOwl:hasRelatedSynonym || + ?pred = oboInOwl:hasNarrowSynonym || + ?pred = oboInOwl:hasBroadSynonym || + ?pred = oboInOwl:hasExactSynonym + ) + + OPTIONAL { + [ + a owl:Axiom ; + owl:annotatedSource ?cls ; + owl:annotatedProperty ?pred ; + owl:annotatedTarget ?val ; + oboInOwl:hasSynonymType ?synType + ] + } + FILTER NOT EXISTS { ?cls owl:deprecated "true"^^xsd:boolean . } + FILTER(isIRI(?cls) && STRSTARTS(str(?cls), "http://purl.obolibrary.org/obo/CL_") || STRSTARTS(str(?cls), "http://purl.obolibrary.org/obo/cl#")) + } diff --git a/src/sparql/cl-xrefs.sparql b/src/sparql/cl-xrefs.sparql new file mode 100644 index 000000000..211d43a56 --- /dev/null +++ b/src/sparql/cl-xrefs.sparql @@ -0,0 +1,11 @@ +prefix oio: +prefix owl: +prefix xsd: + +SELECT ?cls ?xref WHERE +{ + ?cls a owl:Class ; + oio:hasDbXref ?xref . + FILTER NOT EXISTS { ?cls owl:deprecated "true"^^xsd:boolean . } + FILTER(isIRI(?cls) && STRSTARTS(str(?cls), "http://purl.obolibrary.org/obo/CL_") || STRSTARTS(str(?cls), "http://purl.obolibrary.org/obo/cl#")) +}