diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index b120129..59c6af8 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -11,9 +11,9 @@ jobs: permissions: id-token: write steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: '3.x' - name: Install dependencies diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/run-pytest.yml index 489f0f6..3d158c5 100644 --- a/.github/workflows/run-pytest.yml +++ b/.github/workflows/run-pytest.yml @@ -15,10 +15,10 @@ jobs: os: [ubuntu-latest] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} diff --git a/.readthedocs.yaml b/.readthedocs.yaml deleted file mode 100644 index 69e650b..0000000 --- a/.readthedocs.yaml +++ /dev/null @@ -1,10 +0,0 @@ -version: 2 -build: - os: ubuntu-22.04 - tools: - python: "3.12" -mkdocs: - configuration: mkdocs.yml -python: - install: - - requirements: requirements/requirements-docs.txt diff --git a/README.md b/README.md index dd0ac2a..3e4ca98 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,9 @@ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/geofetch/README.html) -`geofetch` is a command-line tool that downloads sequencing data and metadata from GEO and SRA and create metadata tables in [standard PEP format](https://pep.databio.org/). `geofetch` is hosted at [pypi](https://pypi.org/project/geofetch/). You can convert the result of geofetch into unmapped `bam` or `fastq` files with the included `sraconvert` command. + +**geofetch** is a command-line tool that downloads sequencing data and metadata from GEO and SRA and create metadata tables in [standard PEP format](https://pep.databio.org/). `geofetch` is hosted at [pypi](https://pypi.org/project/geofetch/). You can convert the result of geofetch into unmapped `bam` or `fastq` files with the included `sraconvert` command. + ## Key geofetch features: @@ -20,4 +22,45 @@ - Can search GEO to find relevant data - Can be used either as a command-line tool or from within Python using an API -For more information, see [complete documentation at https://pep.databio.org/geofetch](https://pep.databio.org/geofetch). + +## Docs + +--- + +**Documentation**: https://pep.databio.org/geofetch/ + +**Source Code**: https://github.com/pepkit/geofetch/ + +--- + + +## Installation +To install `geofetch` use this command: +``` +pip install geofetch +``` +or install the latest version from the GitHub repository: +``` +pip install git+https://github.com/pepkit/geofetch.git +``` + + +## How to cite: +https://doi.org/10.1093/bioinformatics/btad069 +```bibtex +@article{10.1093/bioinformatics/btad069, + author = {Khoroshevskyi, Oleksandr and LeRoy, Nathan and Reuter, Vincent P and Sheffield, Nathan C}, + title = "{GEOfetch: a command-line tool for downloading data and standardized metadata from GEO and SRA}", + journal = {Bioinformatics}, + volume = {39}, + number = {3}, + pages = {btad069}, + year = {2023}, + month = {03}, + abstract = "{The Gene Expression Omnibus has become an important source of biological data for secondary analysis. However, there is no simple, programmatic way to download data and metadata from Gene Expression Omnibus (GEO) in a standardized annotation format.To address this, we present GEOfetch—a command-line tool that downloads and organizes data and metadata from GEO and SRA. GEOfetch formats the downloaded metadata as a Portable Encapsulated Project, providing universal format for the reanalysis of public data.GEOfetch is available on Bioconda and the Python Package Index (PyPI).}", + issn = {1367-4811}, + doi = {10.1093/bioinformatics/btad069}, + url = {https://doi.org/10.1093/bioinformatics/btad069}, + eprint = {https://academic.oup.com/bioinformatics/article-pdf/39/3/btad069/49407404/btad069.pdf}, +} +``` diff --git a/docs/img/geofetch_logo.svg b/docs/img/geofetch_logo.svg new file mode 100644 index 0000000..372c82c --- /dev/null +++ b/docs/img/geofetch_logo.svg @@ -0,0 +1,141 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + geofetch + + + + + + + + + + + + diff --git a/geofetch/__init__.py b/geofetch/__init__.py index fcd9139..2e3620e 100644 --- a/geofetch/__init__.py +++ b/geofetch/__init__.py @@ -1,12 +1,11 @@ """ Package-level data """ -import logmuse import coloredlogs +import logmuse -from geofetch.geofetch import Geofetcher -from geofetch.finder import Finder from geofetch._version import __version__ - +from geofetch.finder import Finder +from geofetch.geofetch import Geofetcher __author__ = ["Oleksandr Khoroshevskyi", "Vince Reuter", "Nathan Sheffield"] __all__ = ["Finder", "Geofetcher", "__version__"] diff --git a/geofetch/__main__.py b/geofetch/__main__.py index 97e5466..077dace 100644 --- a/geofetch/__main__.py +++ b/geofetch/__main__.py @@ -1,4 +1,5 @@ import sys + from geofetch.geofetch import main if __name__ == "__main__": diff --git a/geofetch/_version.py b/geofetch/_version.py index 8e2394f..6ece8ad 100644 --- a/geofetch/_version.py +++ b/geofetch/_version.py @@ -1 +1 @@ -__version__ = "0.12.6" +__version__ = "0.12.7" diff --git a/geofetch/cli.py b/geofetch/cli.py index 6bb96b7..168b2a3 100644 --- a/geofetch/cli.py +++ b/geofetch/cli.py @@ -1,6 +1,8 @@ import argparse import os + import logmuse + from geofetch._version import __version__ diff --git a/geofetch/finder.py b/geofetch/finder.py index e41405e..587e1ae 100644 --- a/geofetch/finder.py +++ b/geofetch/finder.py @@ -1,19 +1,20 @@ +import logging +import os +import re +from datetime import datetime, timedelta + +import coloredlogs +import requests +import xmltodict + from .const import ( - RETMAX, - ETOOLS_GEO_GSE_BASE, - ETOOLS_ENDING, - TODAY_DATE, DATE_FILTER, + ETOOLS_ENDING, + ETOOLS_GEO_GSE_BASE, + RETMAX, THREE_MONTH_FILTER, + TODAY_DATE, ) -import requests -import xmltodict -import re -import os -import logging -import coloredlogs -from datetime import datetime -from datetime import timedelta __author__ = "Oleksandr Khoroshevskyi" diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index bdbd6a1..b490428 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -1,65 +1,65 @@ import copy import csv +import logging import os +import re import sys +import time +from typing import Dict, List, NoReturn, Tuple, Union + +import logmuse +import pandas as pd +import peppy import requests import xmltodict import yaml -import time -import logging - from rich.progress import track -import re -import logmuse from ubiquerg import expandpath -from typing import List, Union, Dict, Tuple, NoReturn -import peppy -import pandas as pd from geofetch.cli import _parse_cmdl from geofetch.const import ( - GSE_PATTERN, - SAMPLE_SUPP_METADATA_FILE, + CONFIG_PROCESSED_TEMPLATE_NAME, + CONFIG_RAW_TEMPLATE_NAME, + CONFIG_SRA_TEMPLATE, EXP_SUPP_METADATA_FILE, - NEW_GENOME_COL_NAME, + EXPERIMENT_PATTERN, FILE_RAW_NAME_SAMPLE_PATTERN, FILE_RAW_NAME_SUBSAMPLE_PATTERN, - CONFIG_RAW_TEMPLATE_NAME, - CONFIG_SRA_TEMPLATE, - CONFIG_PROCESSED_TEMPLATE_NAME, + GSE_PATTERN, + NCBI_EFETCH, + NCBI_ESEARCH, + NEW_GENOME_COL_NAME, NUM_RETRIES, + PROJECT_PATTERN, + SAMPLE_SUPP_METADATA_FILE, SER_SUPP_FILE_PATTERN, SUPP_FILE_PATTERN, - PROJECT_PATTERN, - NCBI_EFETCH, - NCBI_ESEARCH, - EXPERIMENT_PATTERN, ) from geofetch.utils import ( Accession, - build_prefetch_command, - parse_accessions, - parse_SOFT_line, - convert_size, - clean_soft_files, - run_subprocess, + _check_file_existance, + _create_dot_yaml, + _dict_to_list_converter, + _filter_gsm, _get_list_of_keys, _get_value, _read_tar_filelist, - _check_file_existance, - _separate_list_of_files, - _update_columns, - _sanitize_name, _sanitize_config_string, - _create_dot_yaml, - _which, - _dict_to_list_converter, - _standardize_colnames, + _sanitize_name, _separate_file_url, - _filter_gsm, + _separate_list_of_files, + _standardize_colnames, _unify_list_keys, + _update_columns, + _which, + build_prefetch_command, + clean_soft_files, + convert_size, gse_content_to_dict, is_prefetch_callable, + parse_accessions, + parse_SOFT_line, + run_subprocess, ) _LOGGER = logging.getLogger(__name__) diff --git a/geofetch/sraconvert.py b/geofetch/sraconvert.py index d2dd3bc..6e64a9a 100755 --- a/geofetch/sraconvert.py +++ b/geofetch/sraconvert.py @@ -1,10 +1,11 @@ #!/usr/bin/env python -from argparse import ArgumentParser import os -import pypiper -import logmuse import sys +from argparse import ArgumentParser + +import logmuse +import pypiper __version__ = "0.1.0" diff --git a/geofetch/utils.py b/geofetch/utils.py index dcab44f..c006e7b 100644 --- a/geofetch/utils.py +++ b/geofetch/utils.py @@ -1,14 +1,15 @@ """ Independently-importable utilities to circumvent true scripts. """ +import csv import logging import os +import re import subprocess import sys -import re -import requests from io import StringIO -import csv -from typing import Union, List, NoReturn, Dict +from typing import Dict, List, NoReturn, Union + +import requests _LOGGER = logging.getLogger(__name__) diff --git a/mkdocs.yml b/mkdocs.yml deleted file mode 100644 index e8bc1b9..0000000 --- a/mkdocs.yml +++ /dev/null @@ -1,40 +0,0 @@ -site_name: Geofetch -site_url: http://code.databio.org/geofetch/ -repo_url: http://github.com/pepkit/geofetch -site_logo: img/geofetch_logo_dark.svg -pypi_name: geofetch -paper_link: https://doi.org/10.1093/bioinformatics/btad069 - -nav: - - Getting started: - - Introduction: README.md - - Install and configure: install.md - - SRA convert: sra_convert.md - - Tutorials: - - Tutorial for processed data: processed-data-downloading.md - - Tutorial for raw data: raw-data-downloading.md - - geofetch from within Python: python-usage.md - - GSE Finder: gse_finder.md - - How-to Guides: - - Specifying samples to download: file-specification.md - - Set SRA data download location: howto-location.md - - Run SRA convert: how_to_convert_fastq_from_sra.md - - Reference: - - Metadata output: metadata_output.md - - Usage: usage.md - - FAQ: faq.md - - Support: http://github.com/pepkit/geofetch/issues - - Contributing: contributing.md - - Changelog: changelog.md - -theme: databio - - -plugins: - - search - - databio: - jupyter_source: "docs_jupyter" - jupyter_build: "docs_jupyter/build" - usage_template: "docs/usage_template.md" - usage_cmds: - -"geofetch --help diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 8cc987b..f9b4326 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -4,6 +4,6 @@ ubiquerg>=0.6.2 requests>=2.28.1 xmltodict>=0.13.0 pandas>=1.5.3 -peppy>=0.40.0 +peppy>=0.40.6 rich>=12.5.1 coloredlogs>=15.0.1 diff --git a/setup.py b/setup.py index 2d180f2..4cd56f2 100644 --- a/setup.py +++ b/setup.py @@ -1,9 +1,10 @@ #! /usr/bin/env python import os -from setuptools import setup import sys +from setuptools import setup + PACKAGE = "geofetch" REQDIR = "requirements" @@ -49,6 +50,8 @@ def read_reqs(reqs_name): "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Topic :: Scientific/Engineering :: Bio-Informatics", ], keywords="project, bioinformatics, sequencing, ngs, workflow, GUI", diff --git a/tests/test_geofetch.py b/tests/test_geofetch.py index 5e5d70f..e33fe0e 100644 --- a/tests/test_geofetch.py +++ b/tests/test_geofetch.py @@ -1,11 +1,12 @@ +import os +import shutil + import peppy +import pytest import geofetch from geofetch import Geofetcher, utils from geofetch.utils import parse_accessions -import os -import pytest -import shutil INPUT_ACC_FILE = "tests/test_files/input_acc.txt" GSE_FILES = "tests/test_files/soft_files"