From 651c4af8fdb1c43212c72e4b0160bce1ff13fced Mon Sep 17 00:00:00 2001 From: Hannes Hauswedell Date: Thu, 10 Mar 2022 13:44:13 +0000 Subject: [PATCH] [doc] huge additions and fixes to the documentation --- doc/main_page.md | 9 +- doc/record_based/1_introduction.md | 50 ++++++ doc/record_based/2_record_faq.md | 156 ++++++++++++++++++ include/bio/detail/reader_base.hpp | 15 +- include/bio/misc.hpp | 2 + include/bio/record.hpp | 39 +++-- include/bio/seq_io/reader.hpp | 5 + include/bio/seq_io/reader_options.hpp | 2 + include/bio/var_io/reader.hpp | 3 +- include/bio/var_io/reader_options.hpp | 32 +--- test/documentation/DoxygenLayout.xml | 4 + test/snippet/detail/reader_base.cpp | 52 ++++++ test/snippet/record.cpp | 42 +++++ test/snippet/seq_io/seq_io_reader.cpp | 56 ++++++- test/snippet/seq_io/seq_io_reader.err | 15 ++ test/snippet/seq_io/seq_io_reader_options.cpp | 8 - test/unit/seq_io/data.hpp | 19 +++ 17 files changed, 449 insertions(+), 60 deletions(-) create mode 100644 doc/record_based/1_introduction.md create mode 100644 doc/record_based/2_record_faq.md create mode 100644 test/snippet/detail/reader_base.cpp create mode 100644 test/snippet/record.cpp diff --git a/doc/main_page.md b/doc/main_page.md index 29fe1aa..6909d4d 100644 --- a/doc/main_page.md +++ b/doc/main_page.md @@ -1,12 +1,15 @@ # Welcome {#mainpage} Welcome to the documentation of the B.I.O. library. -This web-site contains the API reference (documentation of our interfaces) and more elaborate Tutorials and -How-Tos. +This web-site contains the API reference (documentation of our interfaces) and some small Tutorials and HowTos. + +B.I.O makes use of SeqAn3 and it is recommended to have a look at [their documentation](https://docs.seqan.de) first. ## Overview +This section contains a very short overview of the most important parts of the library. + ### General IO Utilities @@ -20,7 +23,7 @@ The transparent streams can be used in place of the standard library streams. Th compressions such as GZip, BZip2 and BGZip. -### Readers and Writers +### Record-based I/O | Reader | Writer | Description | diff --git a/doc/record_based/1_introduction.md b/doc/record_based/1_introduction.md new file mode 100644 index 0000000..a9fe572 --- /dev/null +++ b/doc/record_based/1_introduction.md @@ -0,0 +1,50 @@ +# Introduction {#record_based_intro} + +Most files in bioinformatics are comprised of *records*, i.e. multiple, indepedent entries that each consist of one or +more *fields*. +For example, a FastA file contains one or more sequence records that each contain an ID field and sequence field. + +[TOC] + +``` +>myseq1 +ACGT + +>myseq2 +GAGGA + +>myseq3 +ACTA +``` + +
+↓↓↓↓↓↓↓ +
+ + +| ID field | sequence field | +|:----------:|:--------------:| +| "myseq1" | "ACGT" | +| "myseq2" | "GAGGA" | +| "myseq3" | "ACTA" | + +Each line in this table is conceptionally "a record", and each file is modeled as a series of these records. +The process of "reading a file", is transforming the on-disk representation displayed above into the "abstraction" shown below. +The process of "writing a file" is the reverse. + +Details on how records are defined is available here: \ref record_faq + +## Readers + +So called *readers* are responsible for detecting the format and decoding a file into a series of records: + +\snippet test/snippet/seq_io/seq_io_reader.cpp simple_usage_file + +The reader is an *input range* which is C++ terminology for "something that you can iterate over (once)". +The last bit is important, it implies that once you reach the end, the reader will be "empty". To iterate over it again, you need to recreate it. + + + +## Writers + +TODO diff --git a/doc/record_based/2_record_faq.md b/doc/record_based/2_record_faq.md new file mode 100644 index 0000000..860f395 --- /dev/null +++ b/doc/record_based/2_record_faq.md @@ -0,0 +1,156 @@ +# Record FAQ {#record_faq} + +Records in B.I.O. are of implemented as a specialisation of the bio::record template.¹ +This behaves very similar to a std::tuple with the difference that a bio::field identifier is associated with every +element and a corresponding member function is provided, so you can easily access the elements without knowing the order. + +¹ With the exception of bio::plain_io which uses bio::plain_io::record. + +[TOC] + +\note This page contains details on how records are defined. It is meant to provide a better understanding of the design and performance implications. We recommend starting with the snippets shown in the API (e.g. bio::seq_io::reader, bio::var_io::reader, …) and only return to this page if you have questions or want to fine-tune things. + +## What is the full type of my record? {#record_type} + +Most records you interact with are produced by readers. + +\snippet test/snippet/seq_io/seq_io_reader.cpp simple_usage_file + +In this example, `rec` is the record and with each iteration of the loop, a new record is generated from the file. The exact type of the record depends on the reader. In the above example, it is: + +\snippet test/snippet/seq_io/seq_io_reader.cpp simple_usage_file_type + +That is quite long and difficulat to remember (even though definitions of X* and Y* are omitted here), +so we write `auto &` instead. +But it is important to know which fields are contained in the record (in this case ID, SEQ and QUAL). +The documentation for the reader will tell you this, e.g. bio::seq_io::reader. + +## How can I access the fields? + +The easiest way to access a field, is by calling the respective member function: + +\snippet test/snippet/seq_io/seq_io_reader.cpp simple_usage_file + +Here, `.id()` (bio::record#id()) and `.seq()` (bio::record#seq()) are used to access the fields. Note, that the +documentation has entries for all field-accessor member functions, but it depends on the specific specialisation +(used by the reader) whether that function is available. +So, on the record defined by bio::seq_io::reader above, the members `.id()`, `.seq()`, `.qual()` are available, but +the member `.pos()` would not be. + +When the number of fields in the record is low and you know the order, you can also use +[structured bindings](https://en.cppreference.com/w/cpp/language/structured_binding) +to decompose the record into its fields: + +\snippet test/snippet/seq_io/seq_io_reader.cpp decomposed + +Note that the order of the fields is fixed (in this case it is defined by bio::seq_io::default_field_ids). +It is independent of the names you give to the bindings, so this syntax is error-prone when used with large records +(e.g. those defined by bio::var_io::reader). + +In generic contexts, you can also access fields via `get<0>(rec)` (returns the 0-th field in the record) or +`get(rec)` (the same as calling `rec.id()`); but most users will never need this. + + +## Does my record own the data? (Shallow vs deep records) {#shallow_vs_deep} + +As shown above, every field has an identifier (e.g. bio::field::id) and a type (e.g. std::string_view). +You may have wondered, why std::string_view is used as a type and what these `transform_view`s are. +These imply that the record is a *shallow* data structure, i.e. the fields *appear* like strings or vectors, but they +are implemented more like references or pointers. +See the SeqAn3 documentation for an in-depth [Tutorial on Ranges and Views](http://docs.seqan.de/seqan/3-master-user/tutorial_ranges.html). + +Shallow records imply fewer memory allocations and/or copy operations during reading. This results in a **better +performance** but also in some important limitations: + +* Shallow records cannot be modified (as easily²). +* Shallow records cannot be "stored"; they depend on internal caches and buffers of the reader and become invalid +as soon as the next record is read from the file. + + +If you need to change a record in-place and/or "store" the record for longer than one iteration of the reader, you need to use *deep records* instead. +You can tell the reader that you want deep records by providing the respective options: + +\snippet test/snippet/seq_io/seq_io_reader.cpp options2 + +This snippet behaves similar to the previous one, except that the type of `rec` is now the following: + +\snippet test/snippet/seq_io/seq_io_reader.cpp options2_type + +This allows you to call std::vector's `.push_back()` member function (which is not possible in the default case). +Creating this kind of record is likely a bit slower than the shallow record. + +**Summary** + +* The records generated by readers are *shallow* by default. +* This setting has the best performance; but it is less flexible than a *deep* record. +* Readers can be configured to produce *deep* records via the options. + +² Some modifying operations are possible on views, too, but this depends on the exact types. + +## How can I change the field types? + +In the previous section, we showed how to change the field types from being shallow to deep. +For some readers, more options are available, e.g. bio::seq_io::reader assumes nucleotide data for the SEQ field by default, but you might want to read protein data instead. + +\snippet test/snippet/seq_io/seq_io_reader.cpp options + +The snippet above illustrates how the alphabet can be changed (and how to provide another option at the same time). + +Instead of using these pre-defined `field_types`, you can also define them completely manually. You can decide to even read only a subset of the fields by changing the `.field_ids` member: + +\snippet test/snippet/seq_io/seq_io_reader_options.cpp example_advanced2 + +This code makes FASTA the only legal format and creates records with only the sequence field asa std::string. + +But you can also use this mechanism to make some fields shallow and other fields deep. It also allows +to choose different container types. +See the API documentation of the respective `reader_options` for more advanced use-cases and the +exact restrictions on allowed types. + +## How can I create record variables? + +There are various easy ways to create a bio::record that do not involve manually providing the template arguments: + +1. Deduce from the reader. +2. Use an alias. +3. Use bio::make_record or bio::tie_record. + +### Deduce from the reader {#record_type_from_reader} + +When iterating over a reader, it is easy to use `auto &` to deduce the record type, but sometimes you need +the record type outside of the for-loop or in a separate context. + +This snippet demonstrates how to read an interleaved FastQ file and process the read pairs together (at every second iteration of the loop): + +\snippet test/snippet/detail/reader_base.cpp read_pair_processing + +To to this, you need to use deep records, because shallow records become invalid after the loop iteration. +Note how it is possible to "ask" the reader for the type of its record to create the local variable. + +### Record type aliases {#record_aliases} + +When writing a file without reading a file previously, you can use one of the predefined aliases: + +* bio::var_io::default_record + +This longer example illustrates using an alias: + +\snippet test/snippet/var_io/var_io_writer.cpp creation +\snippet test/snippet/var_io/var_io_writer.cpp simple_usage_file + +Here bio::var_io::default_record is the type that a bio::var_io::reader would generate if it is defined without any options, **except that the alias is deep by default.** +This is based on the assumption that aliases are typically used to define local variables whose values you want to change. + +### Making and tying records {#record_make_tie} + +There are convenience functions for making and tying records, similar to std::make_tuple and std::tie: +\snippet test/snippet/record.cpp make_and_tie_record + +The type of rec1 is: +\snippet test/snippet/record.cpp make_and_tie_record_type_rec1 + +The type of rec2 is: +\snippet test/snippet/record.cpp make_and_tie_record_type_rec2 + +When creating a record from existing variables, you can use bio::tie_record to avoid needless copies. +Instead of manually entering the identifiers as a bio::vtag, you can use bio::seq_io::default_field_ids (or the respective defaults of another reader/writer). diff --git a/include/bio/detail/reader_base.hpp b/include/bio/detail/reader_base.hpp index 489ef7a..cbd498c 100644 --- a/include/bio/detail/reader_base.hpp +++ b/include/bio/detail/reader_base.hpp @@ -37,6 +37,7 @@ namespace bio // ---------------------------------------------------------------------------- /*!\brief This is a (non-CRTP) base-class for I/O readers. + * \ingroup bio * \tparam options_t Type of the reader options. * \details * @@ -72,7 +73,19 @@ class reader_base : public std::ranges::view_base * \brief The exact type of the record depends on the options! * \{ */ - //!\brief The type of the record, a specialisation of bio::record; acts as a tuple of the selected field types. + /*!\brief The type of the record, a specialisation of bio::record. + * \details + * + * ### Example + * + * This snippet demonstrates how to read an interleaved FastQ file and process the read pairs + * together (at every second iteration of the loop): + * + * \snippet test/snippet/detail/reader_base.cpp read_pair_processing + * + * To be able to easily backup the first record of a mate-pair, you need to create a temporary + * variable (`last_record`). This type alias helps define it. + */ using record_type = record; //!\brief The iterator type of this view (an input iterator). using iterator = detail::in_file_iterator; diff --git a/include/bio/misc.hpp b/include/bio/misc.hpp index fd7140b..bcc98f1 100644 --- a/include/bio/misc.hpp +++ b/include/bio/misc.hpp @@ -34,6 +34,8 @@ namespace bio * Typically used to configure a class template to have members that are vectors/strings VS members that are views. * The "shallow" version of such a class is typically cheap to copy (no dynamic memory) while the "deep" version * is expensive to copy (holds dynamic memory). + * + * See \ref shallow_vs_deep on what this means in practice. */ enum class ownership { diff --git a/include/bio/record.hpp b/include/bio/record.hpp index e628d37..464563c 100644 --- a/include/bio/record.hpp +++ b/include/bio/record.hpp @@ -107,23 +107,22 @@ namespace bio /*!\brief The class template that file records are based on; behaves like an std::tuple. * \implements seqan3::tuple_like * \ingroup bio - * \tparam field_types The types of the fields in this record as a seqan3::type_list. * \tparam field_ids A vtag_t type with bio::field IDs corresponding to field_types. + * \tparam field_types The types of the fields in this record as a seqan3::type_list. * \details * - * This class template behaves just like an std::tuple, with the exception that it provides an additional + * This class template behaves like a std::tuple, with the exception that it provides an additional * get-interface that takes a bio::field identifier. The traditional get interfaces (via index and * via type) are also supported, but discouraged, because accessing via bio::field is unambiguous and * better readable. * - * ### Example + * In addition to the get()-interfaces, member accessors are provided with the same name as the fields. * - * For input files this template is specialised automatically and provided by the file via its `record_type` member. - * For output files you my define it locally and pass instances of this to the output file's `push_back()`. + * See bio::seq_io::reader for how this data structure is used in practice. * - * This is how it works: + * See #make_record() and #tie_record() for easy ways to create stand-alone record variables. * - * \todo include test/snippet/io/record_2.cpp + * See the \ref record_faq for more details. */ template struct record : seqan3::detail::transfer_template_args_onto_t @@ -372,15 +371,22 @@ auto const && get(record const && r) // make_record //------------------------------------------------------------------------------- -/*!\brief Create a bio::record and deduce type from arguments (like std::make_tuple for std::tuple). +/*!\brief Create a deep bio::record from the arguments (like std::make_tuple for std::tuple). + * \param[in] tag A tag that specifies the identifiers of the subsequent arguments. + * \param[in] fields The arguments to put into the record. + * \returns A bio::record with copies of the field arguments. * \details * + * The record will contain copies of the arguments. + * + * For more information, see \ref record_type and \ref record_make_tie + * * ### Example * - * TODO + * \snippet test/snippet/record.cpp make_and_tie_record */ template -constexpr auto make_record(vtag_t, field_type_ts &&... fields) +constexpr auto make_record(vtag_t BIO_DOXYGEN_ONLY(tag), field_type_ts &&... fields) -> record, seqan3::type_list...>> { return {std::forward(fields)...}; @@ -390,15 +396,22 @@ constexpr auto make_record(vtag_t, field_type_ts &&... fields) // tie_record //------------------------------------------------------------------------------- -/*!\brief Create a bio::record of references (like std::tie for std::tuple). +/*!\brief Create a shallow bio::record from the arguments (like std::tie for std::tuple). + * \param[in] tag A tag that specifies the identifiers of the subsequent arguments. + * \param[in] fields The arguments to represent in the record. + * \returns A bio::record with references to the field arguments. * \details * + * The record will contain references to the arguments. + * + * For more information, see \ref record_type and \ref record_make_tie + * * ### Example * - * TODO + * \snippet test/snippet/record.cpp make_and_tie_record */ template -constexpr auto tie_record(vtag_t, field_type_ts &... fields) +constexpr auto tie_record(vtag_t BIO_DOXYGEN_ONLY(tag), field_type_ts &... fields) -> record, seqan3::type_list> { return {fields...}; diff --git a/include/bio/seq_io/reader.hpp b/include/bio/seq_io/reader.hpp index 79a6465..419fa0a 100644 --- a/include/bio/seq_io/reader.hpp +++ b/include/bio/seq_io/reader.hpp @@ -92,6 +92,11 @@ namespace bio::seq_io * at the first whitespace: * \snippet test/snippet/seq_io/seq_io_reader.cpp options * + * If you need to modify or store the records, request *deep records* from the reader: + * \snippet test/snippet/seq_io/seq_io_reader.cpp options2 + * + * For more information on *shallow* vs *deep*, see \ref shallow_vs_deep + * * For more advanced options, see bio::seq_io::reader_options. */ template diff --git a/include/bio/seq_io/reader_options.hpp b/include/bio/seq_io/reader_options.hpp index c706e59..5acae27 100644 --- a/include/bio/seq_io/reader_options.hpp +++ b/include/bio/seq_io/reader_options.hpp @@ -104,6 +104,8 @@ inline constinit auto field_types_protein = field_types; //!\} diff --git a/include/bio/var_io/reader.hpp b/include/bio/var_io/reader.hpp index d7d6abc..50eacaa 100644 --- a/include/bio/var_io/reader.hpp +++ b/include/bio/var_io/reader.hpp @@ -55,8 +55,7 @@ namespace bio::var_io * are returned by default also correspond to VCF specification (i.e. 1-based positions, string as strings and not * as numbers) **with one exception:** the genotypes are not grouped by sample (as in the VCF format) but by * genotype field (as in the BCF format). - * This results in a notably better performance when reading BCF files. See below for information on how to change - * this. + * This results in a notably better performance when reading BCF files. * * This reader supports the following formats: * diff --git a/include/bio/var_io/reader_options.hpp b/include/bio/var_io/reader_options.hpp index 9f4c547..90e7ef3 100644 --- a/include/bio/var_io/reader_options.hpp +++ b/include/bio/var_io/reader_options.hpp @@ -24,7 +24,6 @@ #include #include #include - #include #include @@ -90,7 +89,7 @@ namespace bio::var_io * and all other tools that deal with VCF/BCF. * * Beyond that, a wide variety of types are supported per field (see below), but most users will be happy - * with one of the predefined sets. + * with one of the predefined tags. * * ### Pre-defined tags * @@ -103,29 +102,8 @@ namespace bio::var_io * * When reading and writing, you need to make sure that the IDX values in the output header are the same as in the * input header, otherwise your record fields might change meaning or even become invalid. * - * Both styles are "shallow" by default, but can be configured to be "deep": - * - * 1. shallow (bio::ownership::shallow) - * * The record contains light-weight data structures like views. - * * Record cannot be "stored"; it depends on internal caches and buffers, and it becomes invalid - * as soon as the next record is read from the file. - * 2. deep (bio::ownership::deep) - * * The record is self-contained; sequences and strings are stored in containers. - * * Record can be copied or stored and can "live on" independently of the reader. - * - * This example shows how to use deep records: - * - * \snippet test/snippet/var_io/var_io_reader_options.cpp field_types_deep - * - * Performance implications: - * * Shallow records imply fewer allocations and lower overhead during reading. - * * If you know that you need to copy your fields anyway, using a deep record can be faster than using a shallow - * record and copying the data "manually" out of that (because certain internal caches are re-used to create deep - * records). - * * field_types_bcf_style is faster than field_types, but for the shallow variants - * there is almost no difference. - * - * TODO some of this should be moved to a general documentation page on configuring records; shallow vs deep; etc + * Both styles are *shallow* by default, but can be configured to be *deep*. + * For more details, see \ref shallow_vs_deep * * ### Manual configuration * @@ -172,9 +150,7 @@ namespace bio::var_io * \snippet test/snippet/var_io/var_io_reader_options.cpp field_types_expert * * Reading fewer fields than available may provide a noticeable speed-up since only the - * requested fields are actually parsed. Any field may also be set to `std::span` which - * results in no parsing happening for that field. - * + * requested fields are actually parsed. */ template ), diff --git a/test/documentation/DoxygenLayout.xml b/test/documentation/DoxygenLayout.xml index 74d11a2..7941884 100644 --- a/test/documentation/DoxygenLayout.xml +++ b/test/documentation/DoxygenLayout.xml @@ -4,6 +4,10 @@ + + + + diff --git a/test/snippet/detail/reader_base.cpp b/test/snippet/detail/reader_base.cpp new file mode 100644 index 0000000..5bb77db --- /dev/null +++ b/test/snippet/detail/reader_base.cpp @@ -0,0 +1,52 @@ +#include + +#include +#include + +#include "../../unit/seq_io/data.hpp" + +void process_read_pair(auto&&, auto&&) {} + +int main() +{ + //================= PRE ========================== + { + std::ofstream os{"example.fastq", std::ios::binary}; + os << interleaved_fastq; + } + + //================= SNIPPETS ====================== +{ +//![read_pair_processing] +// choose deep records so they can be copied/moved +bio::seq_io::reader_options options{ .field_types = bio::seq_io::field_types }; + +// open an interleaved paired-end FastQ file +bio::seq_io::reader reader{"example.fastq", options}; + +// ask the reader for its record_type; create a variable to hold previous record +decltype(reader)::record_type last_record; + +bool is_first_of_pair = true; +for (auto & current_record : reader) +{ + if (is_first_of_pair) + { + // backup the current record; only possible because it is deep + std::swap(current_record, last_record); + } + else // is second of pair + { + // do something with the pair + process_read_pair(last_record, current_record); + } + + is_first_of_pair = !is_first_of_pair; +} +//![read_pair_processing] + +} + + //================= POST ========================== + std::filesystem::remove("example.fastq"); +} diff --git a/test/snippet/record.cpp b/test/snippet/record.cpp new file mode 100644 index 0000000..576957f --- /dev/null +++ b/test/snippet/record.cpp @@ -0,0 +1,42 @@ +#include +#include + +#include + +int main() +{ +//![make_and_tie_record] +using namespace seqan3::literals; + +std::string id = "seq1"; +std::vector seq = "ACGT"_dna5; +std::vector qual = "!!!!"_phred42; + +/* This creates a *deep* record; it contains a copy of the above strings/vectors */ +auto rec1 = bio::make_record(bio::vtag, // identifiers + id, seq, qual); // values + + +/* This creates a *shallow* record; it contains references to the above strings/vectors */ +auto rec2 = bio::tie_record(bio::vtag, // identifiers + id, seq, qual); // values +//![make_and_tie_record] + +static_assert(std::same_as, + seqan3::type_list, + std::vector>> +//![make_and_tie_record_type_rec1] + >); + +static_assert(std::same_as, + seqan3::type_list &, + std::vector &>> +//![make_and_tie_record_type_rec2] + >); +} diff --git a/test/snippet/seq_io/seq_io_reader.cpp b/test/snippet/seq_io/seq_io_reader.cpp index 2604e73..a4c355e 100644 --- a/test/snippet/seq_io/seq_io_reader.cpp +++ b/test/snippet/seq_io/seq_io_reader.cpp @@ -5,6 +5,10 @@ #include "../../unit/seq_io/data.hpp" + +template +std::pair get_arg_t(std::ranges::transform_view); + int main() { //================= PRE ========================== @@ -27,6 +31,21 @@ for (auto & rec : reader) seqan3::debug_stream << "Seq: " << rec.seq() << '\n'; } //![simple_usage_file] + +using X1 = decltype(get_arg_t(reader.begin()->seq()).first); +using X2 = decltype(get_arg_t(reader.begin()->seq()).second); +using Y1 = decltype(get_arg_t(reader.begin()->qual()).first); +using Y2 = decltype(get_arg_t(reader.begin()->qual()).second); + +static_assert(std::same_as, // identifiers of the fields + seqan3::type_list, // type of the SEQ field + std::ranges::transform_view>> // type of the QUAL field +//![simple_usage_file_type] + >); + } { @@ -45,10 +64,10 @@ for (auto & rec : reader) //![decomposed] bio::seq_io::reader reader{"example.fasta"}; -for (auto & [ id, seq, qual ] : reader) +for (auto & [ i, s, q ] : reader) { - seqan3::debug_stream << "ID: " << id << '\n'; - seqan3::debug_stream << "Seq: " << seq << '\n'; + seqan3::debug_stream << "ID: " << i << '\n'; + seqan3::debug_stream << "Seq: " << s << '\n'; } //![decomposed] } @@ -70,8 +89,8 @@ for (auto & rec : reader | std::views::filter(min_length) | std::views::take(5)) { //![options] bio::seq_io::reader reader{"example.fasta", - bio::seq_io::reader_options{ .field_types = bio::seq_io::field_types_protein, - .truncate_ids = true }}; + bio::seq_io::reader_options{.field_types = bio::seq_io::field_types_protein, + .truncate_ids = true }}; for (auto & rec : reader) { @@ -81,6 +100,33 @@ for (auto & rec : reader) //![options] } +{ +//![options2] +using namespace seqan3::literals; + +bio::seq_io::reader reader{"example.fasta", + bio::seq_io::reader_options{.field_types = bio::seq_io::field_types}}; + +for (auto & rec : reader) +{ + seqan3::debug_stream << "ID: " << rec.id() << '\n'; + seqan3::debug_stream << "Seq: " << rec.seq() << '\n'; + + rec.seq().push_back('A'_dna5); // ← this is not possible with shallow records (default) + seqan3::debug_stream << "SeqM: " << rec.seq() << '\n'; +} +//![options2] + +static_assert(std::same_as, // identifiers of the fields + seqan3::type_list, // type of the SEQ field + std::vector>> // type of the QUAL field +//![options2_type] + >); +} + //================= POST ========================== std::filesystem::remove("example.fasta"); } diff --git a/test/snippet/seq_io/seq_io_reader.err b/test/snippet/seq_io/seq_io_reader.err index 5c05c65..f46377e 100644 --- a/test/snippet/seq_io/seq_io_reader.err +++ b/test/snippet/seq_io/seq_io_reader.err @@ -46,3 +46,18 @@ ID: ID4 Seq: ACGTTTA ID: ID5 Seq: ACGTTTAACGTTTTTTTT +ID: ID1 +Seq: ACGTTTTTTTTTTTTTTT +SeqM: ACGTTTTTTTTTTTTTTTA +ID: ID2 +Seq: ACGTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +SeqM: ACGTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTA +ID: ID3 lala +Seq: ACGTTTAACGTTTTTTTT +SeqM: ACGTTTAACGTTTTTTTTA +ID: ID4 +Seq: ACGTTTA +SeqM: ACGTTTAA +ID: ID5 lala +Seq: ACGTTTAACGTTTTTTTT +SeqM: ACGTTTAACGTTTTTTTTA diff --git a/test/snippet/seq_io/seq_io_reader_options.cpp b/test/snippet/seq_io/seq_io_reader_options.cpp index bb66c3f..d88ba99 100644 --- a/test/snippet/seq_io/seq_io_reader_options.cpp +++ b/test/snippet/seq_io/seq_io_reader_options.cpp @@ -4,14 +4,6 @@ int main() { -{ -//![example_custom] -bio::seq_io::reader_options options -{ - .field_types = bio::seq_io::field_types, -}; -//![example_custom] -} { //![example_simple] diff --git a/test/unit/seq_io/data.hpp b/test/unit/seq_io/data.hpp index 516badc..037ce0e 100644 --- a/test/unit/seq_io/data.hpp +++ b/test/unit/seq_io/data.hpp @@ -30,3 +30,22 @@ inline constexpr std::string_view input_bgzipped{ "\x39\xa6\xb8\xd5\x02\x00\xcd\x3b\x57\x80\xba\x00\x00\x00\x1f\x8b\x08\x04\x00\x00\x00\x00\x00\xff\x06\x00\x42\x43\x02" "\x00\x1b\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00", 100}; + +inline constexpr std::string_view interleaved_fastq = + R"raw(@M10991:61:000000000-A7EML:1:1101:14011:1001 1:N:0:28 +NGCTCCTAGGTCGGCATGATGGGGGAAGGAGAGCATGGGAAGAAATGAGAGAGTAGCAA ++ +#8BCCGGGGGFEFECFGGGGGGGGG@;FFGGGEG@FF