Skip to content

Commit

Permalink
Workflow docs - diagram and draft presentation (#54)
Browse files Browse the repository at this point in the history
* chroma error imports have changed

* tweak path for docs action re-run

* publish action to run on PR (will this work)

* backtrack, publish action to push on main

* Added plankton_app_deployment.drawio

* Update plankton_app_deployment.drawio

* add the default material from quarto-revealjs-template

* outline in presentation form to complement demos

* quick screenshots for a walkthrough

* add image links to the just-in-time presentation

* fix titles and ORCID, add notes in README
  • Loading branch information
metazool authored Dec 2, 2024
1 parent e8b1ddf commit d67820f
Show file tree
Hide file tree
Showing 14 changed files with 306 additions and 1 deletion.
96 changes: 96 additions & 0 deletions docs/diagrams/could_be/plankton_app_deployment.drawio
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
<mxfile host="app.diagrams.net" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:132.0) Gecko/20100101 Firefox/132.0" version="24.8.3">
<diagram name="Page-1" id="uENUqXIsOQj-Sv-_rjTT">
<mxGraphModel dx="875" dy="465" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="827" pageHeight="1169" math="0" shadow="0">
<root>
<mxCell id="0" />
<mxCell id="1" parent="0" />
<mxCell id="HB7L-xhzdcljNeKlnPmA-1" value="&lt;div&gt;NAS &lt;br&gt;&lt;/div&gt;&lt;div&gt;S:\&lt;br&gt;&lt;/div&gt;" style="shape=cylinder3;whiteSpace=wrap;html=1;boundedLbl=1;backgroundOutline=1;size=15;" parent="1" vertex="1">
<mxGeometry x="120" y="250" width="60" height="80" as="geometry" />
</mxCell>
<mxCell id="HB7L-xhzdcljNeKlnPmA-2" value="&lt;div&gt;JASMIN&lt;/div&gt;&lt;div&gt;s3 storage&lt;br&gt;&lt;/div&gt;" style="ellipse;shape=cloud;whiteSpace=wrap;html=1;" parent="1" vertex="1">
<mxGeometry x="660" y="280" width="120" height="80" as="geometry" />
</mxCell>
<mxCell id="HB7L-xhzdcljNeKlnPmA-7" value="Task scheduler" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#CCFFCC;" parent="1" vertex="1">
<mxGeometry x="310" y="80" width="120" height="60" as="geometry" />
</mxCell>
<mxCell id="HB7L-xhzdcljNeKlnPmA-9" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;" parent="1" source="HB7L-xhzdcljNeKlnPmA-7" target="HB7L-xhzdcljNeKlnPmA-8" edge="1">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="HB7L-xhzdcljNeKlnPmA-11" value="Triggers" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="HB7L-xhzdcljNeKlnPmA-9" vertex="1" connectable="0">
<mxGeometry x="0.1381" y="-1" relative="1" as="geometry">
<mxPoint as="offset" />
</mxGeometry>
</mxCell>
<mxCell id="HB7L-xhzdcljNeKlnPmA-15" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.049;entryY=0.49;entryDx=0;entryDy=0;entryPerimeter=0;" parent="1" source="9WAs7ZXHk1DkFdnagY5z-1" target="HB7L-xhzdcljNeKlnPmA-2" edge="1">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="HB7L-xhzdcljNeKlnPmA-16" value="&lt;div&gt;Write access&lt;/div&gt;" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="HB7L-xhzdcljNeKlnPmA-15" vertex="1" connectable="0">
<mxGeometry x="-0.0696" relative="1" as="geometry">
<mxPoint as="offset" />
</mxGeometry>
</mxCell>
<mxCell id="HB7L-xhzdcljNeKlnPmA-19" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;" parent="1" source="HB7L-xhzdcljNeKlnPmA-8" target="HB7L-xhzdcljNeKlnPmA-18" edge="1">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="HB7L-xhzdcljNeKlnPmA-20" value="Read/write access" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="HB7L-xhzdcljNeKlnPmA-19" vertex="1" connectable="0">
<mxGeometry x="-0.2753" y="-2" relative="1" as="geometry">
<mxPoint as="offset" />
</mxGeometry>
</mxCell>
<mxCell id="9WAs7ZXHk1DkFdnagY5z-2" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;" edge="1" parent="1" source="HB7L-xhzdcljNeKlnPmA-8" target="9WAs7ZXHk1DkFdnagY5z-1">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="9WAs7ZXHk1DkFdnagY5z-3" value="POSTS to " style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" vertex="1" connectable="0" parent="9WAs7ZXHk1DkFdnagY5z-2">
<mxGeometry x="0.0278" y="-4" relative="1" as="geometry">
<mxPoint as="offset" />
</mxGeometry>
</mxCell>
<mxCell id="HB7L-xhzdcljNeKlnPmA-8" value="Task runner" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#CCFFCC;" parent="1" vertex="1">
<mxGeometry x="310" y="250" width="120" height="60" as="geometry" />
</mxCell>
<mxCell id="HB7L-xhzdcljNeKlnPmA-12" value="&lt;h1 style=&quot;margin-top: 0px;&quot;&gt;Tasks&lt;/h1&gt;&lt;p&gt;* read TIFF images&lt;/p&gt;&lt;p&gt;* extract data and create metadata&lt;/p&gt;&lt;p&gt;* upload extracts to s3 storage&lt;br&gt;&lt;/p&gt;" style="text;html=1;whiteSpace=wrap;overflow=hidden;rounded=0;" parent="1" vertex="1">
<mxGeometry x="480" y="10" width="230" height="120" as="geometry" />
</mxCell>
<mxCell id="HB7L-xhzdcljNeKlnPmA-13" value="" style="endArrow=classic;html=1;rounded=0;exitX=0;exitY=0.5;exitDx=0;exitDy=0;entryX=0.971;entryY=0.423;entryDx=0;entryDy=0;entryPerimeter=0;" parent="1" source="HB7L-xhzdcljNeKlnPmA-8" target="HB7L-xhzdcljNeKlnPmA-1" edge="1">
<mxGeometry width="50" height="50" relative="1" as="geometry">
<mxPoint x="460" y="290" as="sourcePoint" />
<mxPoint x="510" y="240" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="HB7L-xhzdcljNeKlnPmA-14" value="Read access" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="HB7L-xhzdcljNeKlnPmA-13" vertex="1" connectable="0">
<mxGeometry x="-0.0066" relative="1" as="geometry">
<mxPoint as="offset" />
</mxGeometry>
</mxCell>
<mxCell id="HB7L-xhzdcljNeKlnPmA-18" value="Metadata storage" style="shape=cylinder3;whiteSpace=wrap;html=1;boundedLbl=1;backgroundOutline=1;size=15;fillColor=#CCFFCC;" parent="1" vertex="1">
<mxGeometry x="420" y="390" width="60" height="80" as="geometry" />
</mxCell>
<mxCell id="HB7L-xhzdcljNeKlnPmA-22" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=1;entryY=0.5;entryDx=0;entryDy=0;entryPerimeter=0;" parent="1" source="HB7L-xhzdcljNeKlnPmA-21" target="HB7L-xhzdcljNeKlnPmA-18" edge="1">
<mxGeometry relative="1" as="geometry">
<mxPoint x="500" y="450" as="targetPoint" />
<Array as="points" />
</mxGeometry>
</mxCell>
<mxCell id="HB7L-xhzdcljNeKlnPmA-24" value="Read access" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="HB7L-xhzdcljNeKlnPmA-22" vertex="1" connectable="0">
<mxGeometry x="-0.0122" relative="1" as="geometry">
<mxPoint as="offset" />
</mxGeometry>
</mxCell>
<mxCell id="HB7L-xhzdcljNeKlnPmA-25" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;" parent="1" source="HB7L-xhzdcljNeKlnPmA-21" target="HB7L-xhzdcljNeKlnPmA-2" edge="1">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="HB7L-xhzdcljNeKlnPmA-26" value="Read access" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="HB7L-xhzdcljNeKlnPmA-25" vertex="1" connectable="0">
<mxGeometry x="-0.0135" y="2" relative="1" as="geometry">
<mxPoint as="offset" />
</mxGeometry>
</mxCell>
<mxCell id="HB7L-xhzdcljNeKlnPmA-21" value="&lt;div&gt;Posit / Datalabs&lt;/div&gt;&lt;div&gt;(streamlit / fastapi)&lt;br&gt;&lt;/div&gt;" style="ellipse;shape=cloud;whiteSpace=wrap;html=1;" parent="1" vertex="1">
<mxGeometry x="630" y="420" width="120" height="70" as="geometry" />
</mxCell>
<mxCell id="9WAs7ZXHk1DkFdnagY5z-1" value="&lt;div&gt;Object store API &lt;br&gt;&lt;/div&gt;&lt;div&gt;(python / fastapi) &lt;br&gt;&lt;/div&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#CCFFCC;" vertex="1" parent="1">
<mxGeometry x="510" y="150" width="130" height="40" as="geometry" />
</mxCell>
</root>
</mxGraphModel>
</diagram>
</mxfile>
7 changes: 7 additions & 0 deletions presentation/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Overview presentation

A developing overview of the aims and state of this project.

Please see the seed project at [quarto-revealjs-template](https://github.com/ukceh-rse/quarto-revealjs-template) for detail documentation.

In short, within this directory `quarto preview presentation.qmd`
1 change: 1 addition & 0 deletions presentation/images/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.svg
Binary file added presentation/images/API_list_example.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added presentation/images/PP_wave-01.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added presentation/images/PP_wave-02.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added presentation/images/logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added presentation/images/logo_white.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added presentation/images/luigi_screenshot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
94 changes: 94 additions & 0 deletions presentation/presentation.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
---
title: Plankton ML Pipelines
subtitle: Workflows for re-use of image machine learning models
date: last-modified
date-format: long

format:
revealjs:
theme: [default, ukceh.scss]
logo: images/logo.png
footer: Slides published at <a href="https://ukceh-rse.github.io/quarto-revealjs-template">ukceh-rse.github.io/quarto-revealjs-template</a>
slide-number: c/t
show-slide-number: all
progress: true
embed-resources: true
title-slide-attributes:
data-background-image: images/PP_wave-01.png
header-includes: |
<script src="ukceh.js" type="application/javascript"></script>
affiliations:
- id: ukceh
name: UK Centre for Ecology & Hydrology
url: https://ceh.ac.uk

authors:
- name: Jo Walsh, Alba Gomez Segura
affiliation:
- ref: ukceh
email: [email protected]
url: https://github.com/metazool
orcid: 0000-0003-0268-2263

bibliography: references.bib
---

## Plankton machine learning pipelines

* Building on work done as a Turing collaboration
* Intended to build reusable workflows for other image domains
* APIs to simplify working with models and storage
* Pipelines, classification and embeddings

## Workflow - data, processing, cloud storage

![workflow, high-level](images/workflow.png)

## Luigi pipeline

* Simple python package similar to R's targets, originally from Spotify
* Usage from AMI and other Biodiversity projects
* Great for prototyping, probably less so for production

## Object store API

https://github.com/NERC-CEH/object_store_api/
![](images/API_list_example.png)

## Choice of models

* Several off-the-shelf models published by Turing Inst
* One trained on CEFAS data several years ago, published through sci.vision
* Several newer ones, oriented to marine plankton, both light and heavy to run

https://github.com/alan-turing-institute/ViT-LASNet

## Image embeddings

* Image machine learning models are useful as a source of _embeddings_
* E.g. a vector of numbers which the model uses as input to classification
* Use of embeddings for search and retrieval is a common technique for working with Large Language Models

## Embeddings visualiser

![](images/embeddings_similarity_screenshot.png)
* similarity search
* clustering (simplest K-means clustering shown)

## Image annotation

https://github.com/NERC-CEH/cyto-ML - Ezra's original work with contributions from Phil and Erica

Discussion about whether we can adapt an off-the-shelf package rather than DIY

"Seeding success" project is using Label Studio which looks great

## Thanks for listening! { .no-logo .no-footer .closing-slide .center background-image="images/PP_wave-02.png"}

https://github.com/NERC-CEH/plankton_ml

https://github.com/NERC-CEH/cyto-ML


## References
49 changes: 49 additions & 0 deletions presentation/ukceh.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@

function toggleLogo(event) {
var elements = document.querySelectorAll(".slide-logo");
if (event.currentSlide.matches('#title-slide') || event.currentSlide.classList.contains('no-logo')) {
// Hide the logo on the title slide
elements.forEach(function(elem) {
elem.style.display = "none";
});
} else {
// Show and position the logo on other slides
elements.forEach(function(elem) {
elem.style.display = "block";
});
}
}

function toggleFooter(event) {
var elements = document.querySelectorAll("div.footer-default");
if (event.currentSlide.matches('#title-slide') || event.currentSlide.classList.contains('no-footer')) {
// Hide the logo on the title slide
elements.forEach(function(elem) {
elem.style.display = "none";
});
} else {
// Show and position the logo on other slides
elements.forEach(function(elem) {
elem.style.display = "block";
});
}
}

window.addEventListener("load", (event) => {
var elements = document.querySelectorAll(".slide-logo");
elements.forEach(function(elem) {
elem.style.display = "none"; // Hide initially
});

var elements = document.querySelectorAll("div.footer-default");
elements.forEach(function(elem) {
elem.style.display = "none"; // Hide initially
});

Reveal.on("slidechanged", function(event) {
toggleLogo(event);
toggleFooter(event);
});
});


58 changes: 58 additions & 0 deletions presentation/ukceh.scss
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*-- scss:defaults --*/

$body-bg: whitesmoke !default;
$body-color: black !default;
$link-color: green !default;
$presentation-heading-color: steelblue !default;
$presentation-h1-font-size: 2.0em !default;
$presentation-title-slide-text-align: left !default;

/*-- scss:rules --*/


.reveal .slide-logo {
width: auto !important;
height: auto !important;
max-height: 4rem !important;
left: 16px;
}

.reveal .slide-menu-button {
left: 8px;
bottom: unset;
top: 8px;
}

.reveal .slide-number {
top: unset !important;
right: 8px !important;
bottom: 8px !important;
font-size: 24px !important;
}

.reveal .slides > section:first-child {
color: white !important;
}

.reveal .slides > section:first-child h1 {
color: white !important;
}

.reveal .slide-number > a[href*='title-slide'] {
display: none;
}

.closing-slide {
color: white;
}

.closing-slide h1,
.closing-slide h2,
.closing-slide h3,
.closing-slide h4,
.closing-slide h5,
.closing-slide h6 {
color: white;
}


2 changes: 1 addition & 1 deletion scripts/params.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cluster:
n_clusters: 5

collection: test-upload-alba
collection: untagged-images-lana

0 comments on commit d67820f

Please sign in to comment.