Skip to content

Commit

Permalink
add script to gen json-ld and respec
Browse files Browse the repository at this point in the history
  • Loading branch information
tonyseale committed Nov 20, 2023
1 parent fde83c2 commit 5f42e0e
Show file tree
Hide file tree
Showing 7 changed files with 314 additions and 20 deletions.
Empty file added code/__init__.py
Empty file.
98 changes: 98 additions & 0 deletions code/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import re
from dataclasses import dataclass, field, asdict
from typing import List

import jinja2
import rdflib
from jinja2 import Environment, PackageLoader, select_autoescape
from rdflib import Graph, URIRef
from rdflib.namespace import OWL, RDF, RDFS, XSD, DCAT
import json

# Load the OWL ontology into an RDFlib graph
g = Graph()
g.parse('../ontology/dprod/dprod.ttl', format='ttl')

# Define the JSON-LD context
context = {
"@vocab": str(RDF),
"owl": str(OWL),
"rdfs": str(RDFS),
"xsd": str(XSD),
"dcat": str(DCAT)
}

@dataclass
class RdfProperty:
name: str
uri: URIRef
description: str = None

@dataclass
class RdfClass:
name: str
uri: URIRef
description: str = None
inherits: list = field(default_factory=list)
properties: List[RdfProperty] = field(default_factory=list)



classes = {}


def short_name(uri, split_on=r'/|#'):
if uri is None:
return None
split = re.split(split_on, uri)
return split[len(split) - 1]


# Define a function to add classes and properties to the context
def add_to_context(uri):
if isinstance(uri, rdflib.term.URIRef):
name = short_name(uri)
types = list(g.objects(uri, RDF.type))
if OWL.Class in g.objects(uri, RDF.type):
context[name] = {"@id": str(uri)}
class_obj = RdfClass(name=name, uri=uri)
classes[uri] = class_obj
for s1, p1, o1 in g.triples((class_obj.uri, None, None)):
class_obj.__dict__[short_name(p1)] = o1
for s, p, o in g.triples((None, RDFS.subClassOf, uri)):
add_to_context(s)
for s, p, o in g.triples((None, RDFS.domain, uri)):
rdf_property = RdfProperty(name=short_name(s), uri=s)
class_obj.properties.append(rdf_property)
for s1, p1, o1 in g.triples((rdf_property.uri, None, None)):
rdf_property.__dict__[short_name(p1)] = o1


elif OWL.ObjectProperty in types:
for s, p, o in g.triples((uri, RDFS.range, None)):
context[name] = {"@id": str(uri), "@type": str(o)}
elif OWL.DatatypeProperty in types:
range_uri = next(g.objects(uri, RDFS.range))
context[name] = {"@id": str(uri), "@type": str(range_uri)}
# context[name] = {"@id": str(uri)}


# Add classes and properties to the context
for class_uri in g.subjects():
add_to_context(class_uri)

json_ld = {"@context": context}
classes = classes.values()

env = jinja2.Environment(loader=jinja2.FileSystemLoader(searchpath="../docs/respec/"))
template = env.get_template("template.html")
spec = template.render(classes=classes)
for c in classes:
print(c.name)
with open('../docs/assets/spec.html', 'w', encoding='utf-8') as f:
f.write(spec)

with open('../docs/assets/dprod.jsonld', 'w', encoding='utf-8') as f:
f.write(json.dumps(json_ld, indent=4))


66 changes: 66 additions & 0 deletions docs/assets/dprod.jsonld
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
{
"@context": {
"@vocab": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
"owl": "http://www.w3.org/2002/07/owl#",
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
"xsd": "http://www.w3.org/2001/XMLSchema#",
"dcat": "http://www.w3.org/ns/dcat#",
"inputPort": {
"@id": "https://w3id.org/dprod/inputPort",
"@type": "http://www.w3.org/ns/dcat#DataService"
},
"DataProductLifecycleStatus": {
"@id": "https://w3id.org/dprod/DataProductLifecycleStatus"
},
"GraphQLDataService": {
"@id": "https://w3id.org/dprod/GraphQLDataService"
},
"CallbackDataService": {
"@id": "https://w3id.org/dprod/CallbackDataService"
},
"StreamingDataService": {
"@id": "https://w3id.org/dprod/StreamingDataService"
},
"ObjectDataService": {
"@id": "https://w3id.org/dprod/ObjectDataService"
},
"lifecycle": {
"@id": "https://w3id.org/dprod/lifecycle",
"@type": "http://www.w3.org/ns/dcat#DataProductLifecycle"
},
"RESTDataService": {
"@id": "https://w3id.org/dprod/RESTDataService"
},
"dataProductOwner": {
"@id": "https://w3id.org/dprod/dataProductOwner",
"@type": "http://purl.org/dc/terms/Agent"
},
"outputPort": {
"@id": "https://w3id.org/dprod/outputPort",
"@type": "http://www.w3.org/ns/dcat#DataService"
},
"offersDistribution": {
"@id": "https://w3id.org/dprod/offersDistribution",
"@type": "http://www.w3.org/ns/dcat#Distribution"
},
"QueuingDataService": {
"@id": "https://w3id.org/dprod/QueuingDataService"
},
"FileDataService": {
"@id": "https://w3id.org/dprod/FileDataService"
},
"DatabaseDataService": {
"@id": "https://w3id.org/dprod/DatabaseDataService"
},
"DataProduct": {
"@id": "https://w3id.org/dprod/DataProduct"
},
"Enumeration": {
"@id": "https://w3id.org/dprod/Enumeration"
},
"belongsToDataset": {
"@id": "https://w3id.org/dprod/belongsToDataset",
"@type": "http://www.w3.org/ns/dcat#Dataset"
}
}
}
133 changes: 119 additions & 14 deletions docs/assets/spec.html
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ <h2>Conformance</h2>
Everything else in this Profile is normative.</p>

<p>The key words MAY, MUST, MUST NOT, RECOMMENDED, SHOULD, and SHOULD NOT are to be interpreted as described in [[!RFC2119]].

</section>
<section>
<h3>Normative namespaces</h3>
<p>Namespaces and prefixes used in normative parts of this Profile are shown in the following table.</p>
Expand Down Expand Up @@ -135,7 +135,7 @@ <h3>Normative namespaces</h3>

</section>
<section>
<h2>Data Product (DPROD) Model</h2>
<h2>Data Product (DPROD) Model</h2>
<p>Data Catalog Vocabulary (DCAT) is a W3C standard that facilitates interoperability between data catalogs published on the web. By using DCAT to declare input and output ports, Semantic Data Products can effectively describe the details of the data services they provide, including the datasets and the operations that can be performed on them.

Semantic Data Products take advantage of this by defining ports that specify not only the data format and structure but also the semantics—meaning the meaning and relationships of the data elements. This allows for the integration of data across different domains, as the shared semantics ensure that all stakeholders have a common understanding of the data, which is critical in a Data Mesh architecture.
Expand All @@ -153,7 +153,8 @@ <h2>Data Product (DPROD) Model</h2>
<figcaption>
Overview of DCAT Profile, showing the relevant classes, properties and relationships.
</figcaption>
</figure>
</figure>
</section>
<p>
The Profile consists of the following classes:
<UL>
Expand All @@ -175,6 +176,8 @@ <h2>Data Product (DPROD) Model</h2>
"@type": "dprod:DataProduct",
"dprod:title": "UK Bonds",
"dprod:description": "UK Bonds is your one-stop-shop for all ...",
"dprod:dataProductOwner" : "https://www.linkedin.com/in/tonyseale/",
"dprod:lifecycle" : "Consume",
"dprod:outputPort": {
"@type": "dprod:Port",
"dcat:endpointURL": "https://y.com/uk-10-year-bonds",
Expand All @@ -191,26 +194,128 @@ <h2>Data Product (DPROD) Model</h2>

</section>

# for each class
<section>
<h2>Describing the Dataset Series</h2>

#foreach property
<section>
<h2>Identifier</h2>


<section>
<h2>DataProductLifecycleStatus</h2>
The lifecycle status of the Data Product taken from a control list ( Ideation, Design, Build, Deploy, Consume ).

</section>

<section>
<h2>GraphQLDataService</h2>
Single REST endpoint, with structure given by GraphQL schema

</section>

<section>
<h2>CallbackDataService</h2>
Streams by making calls to a client-provided e.g. WebSockets

</section>

<section>
<h2>StreamingDataService</h2>
Data is continuously made available

</section>

<section>
<h2>ObjectDataService</h2>
Structured API, e.g. gRPC, CORBA, SOAP, ORM

</section>

<section>
<h2>RESTDataService</h2>
Accessed using http verbs with parameters, may be defined using OpenAPI

</section>

<section>
<h2>QueuingDataService</h2>
Streams using a queue or topic e.g. MQTT, Kafka, DDS

</section>

<section>
<h2>FileDataService</h2>
Uses file-like access methods. May or may not be streaming if the file is continuously wriitten to

</section>

<section>
<h2>DatabaseDataService</h2>
Uses database-like access methods, including query e.g. JDBC, ODBC, SPARQL endpoint

</section>

<section>
<h2>DataProduct</h2>
A data product is a rational, managed, and governed collection of data, with purpose, value and ownership, meeting consumer needs over a planned life-cycle.

<section>
<h2>dataProductOwner</h2>
<table class="def propdef">
<tbody>
<tr><th>Identifier:</th> <td><code>dct:identifier</code></td></tr>
<tr><th>Notes:</th><td>A unique URI of the Dataset Series. Mapped to <code>@id</code> in JSON-LD serialisations.</td></tr>
<tr><th>Identifier:</th> <td><code>prod:dataProductOwner</code></td></tr>
<tr><th>Notes:</th><td>The Agent that is overall accountable for the data product. This includes managing the data product along its lifecycle ( creation, usage, versioning, deletion). This can be different from the creator or the publisher of the Data Product </td></tr>
<tr><th>Domain:</th><td>https://w3id.org/dprod/DataProduct</td></tr>
<tr><th>Range:</th><td>http://purl.org/dc/terms/Agent</td></tr>
</tbody>
</table>
</section>
</section>

<section>
<h2>inputPort</h2>
<table class="def propdef">
<tbody>
<tr><th>Identifier:</th> <td><code>prod:inputPort</code></td></tr>
<tr><th>Notes:</th><td>an input port describes a set of services exposed by a data product to collect its source data and makes it available for further internal transformation. An input port can receive data from one or more upstream sources in a push (i.e. asynchronous subscription) or pop mode (i.e. synchronous query). Each data product may have one or more input ports</td></tr>
<tr><th>Domain:</th><td>https://w3id.org/dprod/DataProduct</td></tr>
<tr><th>Range:</th><td>http://www.w3.org/ns/dcat#DataService</td></tr>
</tbody>
</table>
</section>

<section>
<h2>outputPort</h2>
<table class="def propdef">
<tbody>
<tr><th>Identifier:</th> <td><code>prod:outputPort</code></td></tr>
<tr><th>Notes:</th><td>an output port describes a set of services exposed by a data product to share the generated data in a way that can be understood and trusted. Each data product must have at least one or more output ports</td></tr>
<tr><th>Domain:</th><td>https://w3id.org/dprod/DataProduct</td></tr>
<tr><th>Range:</th><td>http://www.w3.org/ns/dcat#DataService</td></tr>
</tbody>
</table>
</section>

<section>
<h2>lifecycle</h2>
<table class="def propdef">
<tbody>
<tr><th>Identifier:</th> <td><code>prod:lifecycle</code></td></tr>
<tr><th>Notes:</th><td>The lifecycle status of the Data Product taken from a control list ( Ideation, Design, Build, Deploy, Consume ).</td></tr>
<tr><th>Domain:</th><td>https://w3id.org/dprod/DataProduct</td></tr>
<tr><th>Range:</th><td>http://www.w3.org/ns/dcat#DataProductLifecycle</td></tr>
</tbody>
</table>
</section>

</section>

<section>
<h2>Enumeration</h2>
The superclass of enumeration lists referenced from Data Product related artifacts

</section>



<section class="appendix">
<h2>Acknowledgements</h2>
<p>The editors gratefully acknowledge the feedback and contributions made to this document by: </p>
</section>

</body>
</html>
</html>
Loading

0 comments on commit 5f42e0e

Please sign in to comment.