Skip to content

Commit

Permalink
- Generating 0.4.3 release
Browse files Browse the repository at this point in the history
  • Loading branch information
Robert Hubley committed Jan 9, 2023
1 parent c084af6 commit 20c436d
Show file tree
Hide file tree
Showing 8 changed files with 33 additions and 9 deletions.
8 changes: 7 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,19 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## 0.4.3 - Unreleased
## 0.4.3 - 2023-01-09
### Added
- Most subcommands will now accept multiple arguments for a species name and
treat it as a single space-separated string instead of raising an error. For
instance, `famdb.py names homo sapiens` now works exactly the same as
`famdb.py names 'homo sapiens'`
### Changed
- Major change to HDF5 schema (now v0.5) fixes performance issues with scaling
to >>500k families. HDF5 exhibits an increasing insertion time-cost for entries
(datasets or links) within a group. In our original schema families were stored
in a single group. In v0.5 we now bin families by two character prefix bins for
Dfam and Auxiliary families. Currently 0.4.3 is not backwards compatible and
cannot read v0.4 formated files.
- `export_dfam.py` has been refactored and extended . It subsumes the previous
functionality of `convert_hmm.py`, which has been removed.
### Fixed
Expand Down
15 changes: 10 additions & 5 deletions famdb.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
famdb.py, version 0.4.2
famdb.py, version 0.4.3
Usage: famdb.py [-h] [-l LOG_LEVEL] [-i FILE] command ...
Queries or modifies the contents of a famdb file. For more detailed help
Expand Down Expand Up @@ -743,7 +743,7 @@ def __init__(self, filename, mode="r"):
self.names_dump = json.loads(self.file["TaxaNames"][0])

def __write_metadata(self):
self.file.attrs["generator"] = "famdb.py v0.4.2"
self.file.attrs["generator"] = "famdb.py v0.4.3"
self.file.attrs["version"] = FILE_VERSION
self.file.attrs["created"] = str(datetime.datetime.now())

Expand Down Expand Up @@ -1377,11 +1377,16 @@ def __get_family(entry):
def get_family_by_accession(self, accession):
"""Returns the family with the given accession."""
path = self.__accession_bin(accession)
entry = self.file[path].get(accession)
return self.__get_family(entry)
if path in self.file:
entry = self.file[path].get(accession)
return self.__get_family(entry)
return None

def get_family_by_name(self, name):
"""Returns the family with the given name."""
# TODO: This will also suffer the performance issues seen with
# other groups that exceed 200-500k entries in a single group
# at some point. This needs to be refactored to scale appropriately.
entry = self.file["Families/ByName"].get(name)
return self.__get_family(entry)

Expand Down Expand Up @@ -1743,7 +1748,7 @@ def main():

logging.basicConfig()

parser = argparse.ArgumentParser(description="""This is famdb.py version 0.4.2.
parser = argparse.ArgumentParser(description="""This is famdb.py version 0.4.3.
example commands, including the most commonly used options:
Expand Down
2 changes: 1 addition & 1 deletion tests/cli/family-byname-hmm.args
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
family
Test family TEST0003
TEST0003
-f
hmm
2 changes: 1 addition & 1 deletion tests/cli/family-byname.args
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
family
Test family TEST0003
TEST0003
-f
fasta_name
4 changes: 4 additions & 0 deletions tests/cli/info.out
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
File: /tmp/famdbtestfile.h5
FamDB Generator: famdb.py v0.4.3
FamDB Format Version: 0.5
FamDB Creation Date: 2023-01-09 09:57:56.026443

Database: Test
Version: V1
Date: 2020-07-15
Expand Down
2 changes: 1 addition & 1 deletion tests/cli/usage.out
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
usage: famdb.py [-h] [-l LOG_LEVEL] [-i FILE]
{info,names,lineage,families,family,append} ...

This is famdb.py version 0.4.2.
This is famdb.py version 0.4.3.

example commands, including the most commonly used options:

Expand Down
6 changes: 6 additions & 0 deletions tests/doubles.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,12 @@ def init_db_file(filename):
with famdb.FamDB(filename, "w") as db:
db.set_db_info("Test", "V1", "2020-07-15", "Test Database", "<copyright header>")

# Override setting of format metadata for testing
db.file.attrs['generator'] = "famdb.py v0.4.3"
db.file.attrs['version'] = "0.5"
db.file.attrs['created'] = "2023-01-09 09:57:56.026443"


families = [
make_family("TEST0001", [1], "ACGT", "<model1>"),
make_family("TEST0002", [2, 3], None, "<model2>"),
Expand Down
3 changes: 3 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@ def test_one(t, filename, spec_path):
args.insert(0, "coverage")
args.insert(1, "run")

print("running: " + str(args))
result = subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
print("ERROR:" + str(result.stderr))
print("OUT:" + str(result.stdout))

def compare_output(actual, expected_file):
if os.environ.get("FAMDB_TEST_BLESS"):
Expand Down

0 comments on commit 20c436d

Please sign in to comment.