Skip to content

Commit

Permalink
refactor: Move data collect from client to server
Browse files Browse the repository at this point in the history
  • Loading branch information
annelhote committed Nov 16, 2023
1 parent 07eef70 commit 3a71428
Show file tree
Hide file tree
Showing 10 changed files with 243 additions and 188 deletions.
7 changes: 1 addition & 6 deletions client/.env
Original file line number Diff line number Diff line change
@@ -1,13 +1,8 @@
VITE_API=/api
VITE_APP_NAME="Works finder"
VITE_BSO_MAX_SIZE=10000
VITE_BSO_DATASETS_INDEX="bso3-datacite-20230413"
VITE_BSO_PUBLICATIONS_INDEX="bso-publications"
VITE_DESCRIPTION="Retrieve the scholarly works of your institution"
VITE_GIT_REPOSITORY_URL="https://github.com/dataesr/works-finder/"
VITE_HEADER_TAG="dev"
VITE_HEADER_TAG=dev
VITE_HEADER_TAG_COLOR="green-emeraude"
VITE_MINISTER_NAME="Minisère de l'enseignement supérieur et de la recherche"
VITE_OPENALEX_PER_PAGE=200
VITE_OPENALEX_SIZE=10000
VITE_VERSION=$npm_package_version
3 changes: 1 addition & 2 deletions client/.env.staging
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
VITE_BSO_MAX_SIZE=0
VITE_HEADER_TAG="staging"
VITE_HEADER_TAG=staging
43 changes: 2 additions & 41 deletions client/src/pages/home/index.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -32,54 +32,15 @@ import {
getAuthorsTooltipField,
} from '../../utils/templates';
import {
getBsoCount,
getBsoWorks,
getOpenAlexPublications,
mergePublications,
getData,
} from '../../utils/works';
import { status } from '../../config';

import 'primereact/resources/primereact.min.css';
import 'primereact/resources/themes/lara-light-indigo/theme.css';

const {
VITE_BSO_MAX_SIZE,
VITE_BSO_DATASETS_INDEX,
VITE_BSO_PUBLICATIONS_INDEX,
} = import.meta.env;

const DATASOURCES = [{ key: 'bso', label: 'French OSM' }, { key: 'openalex', label: 'OpenAlex' }];

const getData = async (options) => {
const data = { datasets: [], publications: [], total: {} };
const publications = await Promise.all([getBsoWorks({ options, index: VITE_BSO_PUBLICATIONS_INDEX }), getOpenAlexPublications(options)]);
publications.forEach((publication) => {
data.publications = [...data.publications, ...publication.results];
data.total[publication.datasource] = publication.total;
});
const dataset = await getBsoWorks({ options, index: VITE_BSO_DATASETS_INDEX });
data.datasets = [...data.datasets, ...dataset.results];
data.total.dataset = dataset.total;
if ((Number(data.total.bso) === 0) || (Number(data.total.bso) === Number(VITE_BSO_MAX_SIZE))) {
const { count } = await getBsoCount(options);
data.total.bso = count;
}
// Deduplicate publications by DOI or by hal_id
data.total.all = data.publications.length;
const deduplicatedPublications = {};
data.publications.forEach((publication) => {
const id = publication?.doi ?? publication?.primary_location?.landing_page_url?.split('/')?.pop() ?? publication.id;
if (!Object.keys(deduplicatedPublications).includes(id)) {
deduplicatedPublications[id] = publication;
} else {
deduplicatedPublications[id] = mergePublications(deduplicatedPublications[id], publication);
}
});
data.publications = Object.values(deduplicatedPublications);
data.total.deduplicated = Object.values(deduplicatedPublications).length;
return data;
};

export default function Home() {
const [affiliationsNotice, setAffiliationsNotice] = useState(true);
const [allAffiliations, setAllAffiliations] = useState([]);
Expand Down Expand Up @@ -218,7 +179,7 @@ export default function Home() {
setAllDatasets(allDatasetsTmp);
setAllPublications(allPublicationsTmp);
setFilteredPublications(allPublicationsTmp);
const allYears = [...new Set(allPublicationsTmp.map((publication) => publication?.year))];
const allYears = [...new Set(allPublicationsTmp.map((publication) => publication?.year).filter((year) => !!year))];
setYears(allYears);
setFilteredYears(allYears);
const allTypes = [...new Set(allPublicationsTmp.map((publication) => publication?.type))];
Expand Down
1 change: 0 additions & 1 deletion client/src/utils/templates.jsx
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
/* eslint-disable react/no-danger */
/* eslint-disable react/no-array-index-key */
import { Badge } from '@dataesr/react-dsfr';
import { Dropdown } from 'primereact/dropdown';
import { Tooltip } from 'react-tooltip';

import { getIdLink } from './works';
Expand Down
141 changes: 10 additions & 131 deletions client/src/utils/works.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
const {
VITE_API,
VITE_OPENALEX_PER_PAGE,
VITE_OPENALEX_SIZE,
} = import.meta.env;

const VITE_OPENALEX_MAX_PAGE = Math.floor(VITE_OPENALEX_SIZE / VITE_OPENALEX_PER_PAGE);

const getBsoCount = (options) => {
const urlParams = new URLSearchParams(options).toString();
return fetch(`${VITE_API}/bso/count?${urlParams}`)
Expand All @@ -24,6 +20,15 @@ const getBsoWorks = async ({ options, index }) => {
});
};

const getData = async (options) => {
const urlParams = new URLSearchParams(options).toString();
return fetch(`${VITE_API}/works?${urlParams}`)
.then((response) => {
if (response.ok) return response.json();
return 'Oops... BSO API request did not work';
});
};

const getIdLink = (type, id) => {
let prefix = null;
switch (type) {
Expand All @@ -49,135 +54,9 @@ const getIdLink = (type, id) => {
return (prefix !== null) ? `${prefix}${id}` : false;
};

const getIdValue = (id) => (
id
? id.replace('https://doi.org/', '').replace('https://openalex.org/', '').replace('https://pubmed.ncbi.nlm.nih.gov/', '').replace('https://www.ncbi.nlm.nih.gov/pmc/articles/', '')
: null
);

const getAffilitionsFromOpenAlex = (publication) => {
if (publication?.authorships) {
return publication?.authorships?.map((author) => {
if (author.raw_affiliation_strings.length === 1) {
const affiliation = { name: author.raw_affiliation_strings[0] };
if (author?.institutions?.[0]?.ror) affiliation.ror = author.institutions[0].ror;
return affiliation;
}
return author.raw_affiliation_strings.map((name) => ({ name }));
}).flat();
}
return publication.affiliations;
};

const getTypeFromOpenAlex = (type) => {
let newType = type;
// eslint-disable-next-line default-case
switch (type) {
case 'component':
case 'dissertation':
case 'editorial':
case 'erratum':
case 'grant':
case 'journal':
case 'journal-issue':
case 'journal-volume':
case 'letter':
case 'paratext':
case 'peer-review':
case 'reference-entry':
case 'report':
case 'report-series':
case 'standard':
newType = 'other';
break;
case 'book-series':
case 'book-set':
case 'monograph':
case 'reference-book':
newType = 'book';
break;
case 'proceedings-article':
case 'proceedings-series':
newType = 'proceedings';
break;
case 'article':
newType = 'journal-article';
break;
case 'book-part':
newType = 'book-chapter';
break;
case 'posted-content':
newType = 'preprint';
break;
}
return newType;
};

const getOpenAlexPublications = (options, page = '1', previousResponse = []) => {
let url = `${VITE_API}/openalex?per_page=${Math.min(VITE_OPENALEX_SIZE, VITE_OPENALEX_PER_PAGE)}`;
url += '&filter=is_paratext:false';
if (options?.startYear && options?.endYear) {
url += `,publication_year:${Number(options.startYear)}-${Number(options?.endYear)}`;
} else if (options?.startYear) {
url += `,publication_year:${Number(options.startYear)}-`;
} else if (options?.endYear) {
url += `,publication_year:-${Number(options.endYear)}`;
}
if (options.affiliations.length) {
url += ',raw_affiliation_string.search:';
if (options.affiliations.length) url += `(${options.affiliations.map((aff) => `"${aff}"`).join(' OR ')})`;
}
url += '&select=authorships,display_name,doi,id,ids,publication_year,type';
return fetch(`${url}&page=${page}`)
.then((response) => {
if (response.ok) return response.json();
return 'Oops... OpenAlex API request did not work';
})
.then((response) => {
const results = [...previousResponse, ...response.results];
const nextPage = Number(page) + 1;
if (Number(response.results.length) === Number(VITE_OPENALEX_PER_PAGE) && nextPage <= VITE_OPENALEX_MAX_PAGE) {
return getOpenAlexPublications(options, nextPage, results);
}
return ({ total: response.meta.count, results });
})
.then((response) => ({
datasource: 'openalex',
total: response.total,
results: response.results.map((result) => ({
affiliations: getAffilitionsFromOpenAlex(result),
allIds: result?.ids ? Object.keys(result.ids).map((key) => ({ id_type: key, id_value: getIdValue(result.ids[key]) })) : result.allIds,
authors: result?.authorships?.map((author) => ({ ...author, full_name: author.author.display_name })) ?? result.authors,
datasource: 'openalex',
doi: getIdValue(result?.doi),
id: result?.doi ? getIdValue(result.doi) : result.id,
original: result,
title: result?.display_name ?? result.title,
type: getTypeFromOpenAlex(result.type),
year: Number(result?.publication_year) ?? Number(result.year),
})),
}));
};

const mergePublications = (publi1, publi2) => {
const priorityPublication = [publi1, publi2].some((publi) => publi.datasource === 'bso')
? [publi1, publi2].find((publi) => publi.datasource === 'bso')
: publi1;
return ({
...priorityPublication,
affiliations: [...publi1.affiliations, ...publi2.affiliations],
// Filter allIds by unique values
allIds: Object.values([...publi1.allIds, ...publi2.allIds].reduce((acc, obj) => ({ ...acc, [obj.id_value]: obj }), {})),
// Filter authors by unique full_name
authors: Object.values([...publi1.authors, ...publi2.authors].reduce((acc, obj) => ({ ...acc, [obj.full_name]: obj }), {})),
datasource: 'bso, openalex',
});
};

export {
getBsoCount,
getBsoWorks,
getData,
getIdLink,
getOpenAlexPublications,
mergePublications,
};
8 changes: 5 additions & 3 deletions server/src/router.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import express from 'express';

import openalexRouter from './routes/openalex';
import bsoRouter from './routes/bso';
import bsoRouter from './routes/bso.routes';
import openalexRouter from './routes/openalex.routes';
import worksRouter from './routes/works.routes';

const router = new express.Router();

router.use(openalexRouter);
router.use(bsoRouter);
router.use(openalexRouter);
router.use(worksRouter);

export default router;
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ router.route('/openalex')
}
const url = `https://api.openalex.org/works?${new URLSearchParams(urlParams)}`;
try {
let response = await fetch(url, { method: 'GET' });
let response = await fetch(url);
response = await response.json();
res.status(200).json(response);
} catch (err) {
Expand Down
56 changes: 56 additions & 0 deletions server/src/routes/works.routes.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import express from 'express';

import {
getBsoCount,
getBsoWorks,
getOpenAlexPublications,
mergePublications,
} from '../utils';

const router = new express.Router();

router.route('/works')
.get(async (req, res) => {
try {
const options = req?.query ?? {};
if (!options?.affiliations) {
res.status(400).json({ message: 'You must provide at least one affiliation.' });
} else {
const data = { datasets: [], publications: [], total: {} };
const publications = await Promise.all([
getBsoWorks({ options, index: process.env.VITE_BSO_PUBLICATIONS_INDEX }),
getOpenAlexPublications(options),
]);
publications.forEach((publication) => {
data.publications = [...data.publications, ...publication.results];
data.total[publication.datasource] = publication.total;
});
const dataset = await getBsoWorks({ options, index: process.env.VITE_BSO_DATASETS_INDEX });
data.datasets = [...data.datasets, ...dataset.results];
data.total.dataset = dataset.total;
if ((Number(data.total.bso) === 0) || (Number(data.total.bso) === Number(process.env.VITE_BSO_MAX_SIZE))) {
const { count } = await getBsoCount(options);
data.total.bso = count;
}
// Deduplicate publications by DOI or by hal_id
data.total.all = data.publications.length;
const deduplicatedPublications = {};
data.publications.forEach((publication) => {
const id = publication?.doi ?? publication?.primary_location?.landing_page_url?.split('/')?.pop() ?? publication.id;
if (!Object.keys(deduplicatedPublications).includes(id)) {
deduplicatedPublications[id] = publication;
} else {
deduplicatedPublications[id] = mergePublications(deduplicatedPublications[id], publication);
}
});
data.publications = Object.values(deduplicatedPublications);
data.total.deduplicated = Object.values(deduplicatedPublications).length;
res.status(200).json(data);
}
} catch (err) {
console.error(err);
res.status(500).json({ message: 'Internal Server Error.' });
}
});

export default router;
Loading

0 comments on commit 3a71428

Please sign in to comment.