BioHDF version 0.3 alpha
Scalable NGS Data Storage Based on HDF5
|
Represents NGS alignments (SAM, etc. More...
Data Structures | |
struct | bioh5g_alignment_data |
Alignment data container. More... | |
Defines | |
#define | BIOH5G_READS_PATH_ATTR "READS_PATH" |
Attribute name: BioHDF path to the associated reads. | |
#define | BIOH5G_INDEX_METHOD_ATTR "INDEX_METHOD" |
Attribute name: Describes how the alignments are indexed. | |
Typedefs | |
typedef struct _bioh5g_alignments * | bioh5g_alignments |
BioHDF alignments collection handle. | |
typedef struct _bioh5g_alignments_creation_properties * | bioh5g_alignments_creation_properties |
BioHDF alignments creation properties. | |
typedef struct _bioh5g_alignments_iterator * | bioh5g_alignments_iterator |
BioHDF alignments iterator handle. | |
Enumerations | |
enum | bioh5g_alignments_format { SAM_FORMAT } |
The text file format for alignment I/O. More... | |
enum | bioh5g_alignments_index_method { UNINDEXED = 0, REF_POS_SECONDARY = 1 } |
The indexing scheme used on the alignments in this collection. More... | |
Functions | |
BIOHDF_API biohdf_error | BIOH5Gcheck_alignments_presence (const biohdf_file file, const char *path, int *presence) |
Test if an alignments collection exists. | |
BIOHDF_API biohdf_error | BIOH5Gcreate_alignments_collection (const biohdf_file file, const bioh5g_alignments_creation_properties props, const char *path, bioh5g_alignments *aligns) |
Create (and open) a new alignments collection. | |
BIOHDF_API biohdf_error | BIOH5Gopen_alignments_collection (const biohdf_file file, const char *path, biohdf_open_mode mode, bioh5g_alignments *aligns) |
Open an existing alignments collection. | |
BIOHDF_API biohdf_error | BIOH5Gclose_alignments_collection (bioh5g_alignments *aligns) |
Close an open alignments collection. | |
BIOHDF_API biohdf_error | BIOH5Gget_reads_path (const bioh5g_alignments aligns, char **reads_path) |
Get the path to the associated reads. | |
BIOHDF_API biohdf_error | BIOH5Gget_alignments_count (const bioh5g_alignments aligns, int64_t *count) |
Get the number of stored alignments in a collection. | |
BIOHDF_API biohdf_error | BIOH5Gcreate_alignments_iterator (const bioh5g_alignments aligns, bioh5g_alignments_iterator *iter) |
Create an iterator for an alignments collection. | |
BIOHDF_API biohdf_error | BIOH5Gadd_alignments_iterator_range_filter (bioh5g_alignments_iterator iter, const char *reference, int32_t start, int32_t end) |
Add a reference region filter to an alignments iterator. | |
BIOHDF_API biohdf_error | BIOH5Gadd_alignments_iterator_mapq_filter (bioh5g_alignments_iterator iter, unsigned char min_mapq) |
Add a SAM MAPQ filter to an alignments iterator. | |
BIOHDF_API biohdf_error | BIOH5Gadd_alignments_iterator_flags_filter (bioh5g_alignments_iterator iter, uint32_t mask) |
Add a SAM FLAGS filter to an alignments iterator. | |
BIOHDF_API biohdf_error | BIOH5Gdestroy_alignments_iterator (bioh5g_alignments_iterator *iter) |
Destroy an iterator for an alignments collection. | |
BIOHDF_API biohdf_error | BIOH5Gadd_alignment (const bioh5g_alignments aligns, const bioh5g_alignment_data *data) |
Add an alignment to a collection. | |
BIOHDF_API biohdf_error | BIOH5Gget_index_of_last_added_alignment (const bioh5g_alignments aligns, int64_t *index) |
Get the index of the last alignment that was added. | |
BIOHDF_API biohdf_error | BIOH5Gget_next_alignment (bioh5g_alignments_iterator iter, int64_t *index, bioh5g_alignment_data **data) |
Get the next alignment from an alignments collection. | |
BIOHDF_API biohdf_error | BIOH5Gget_alignment (const bioh5g_alignments aligns, int64_t index, bioh5g_alignment_data **data) |
Given an alignment index, get the alignment from an alignment collection. | |
BIOHDF_API biohdf_error | BIOH5Gfree_alignment_data (bioh5g_alignment_data **data) |
Free alignment data that has been obtained from the library. | |
BIOHDF_API biohdf_error | BIOH5Gcreate_alignments_index (bioh5g_alignments aligns, bioh5g_alignments_index_method method, biohdf_index_creation_properties props) |
Create an index for an alignments collection. | |
BIOHDF_API biohdf_error | BIOH5Gstore_alignment_file_header (const bioh5g_alignments aligns, bioh5g_alignments_format format, const char *header) |
Store a file header from an alignment file (e.g. | |
BIOHDF_API biohdf_error | BIOH5Gget_alignment_file_header (const bioh5g_alignments aligns, bioh5g_alignments_format *format, char **header) |
Get a stored alignment file header. | |
BIOHDF_API biohdf_error | BIOH5Gcreate_alignment_string (const bioh5g_alignment_data *alignment, const bioh5g_read_data *read, bioh5g_alignments_format format, char **alignment_string) |
Create an alignment string in a particular format. | |
BIOHDF_API biohdf_error | BIOH5Gwrite_alignment_to_stream (const bioh5g_alignment_data *alignment, const bioh5g_read_data *read, bioh5g_alignments_format format, FILE *stream) |
Write an alignment string in a particular format to an output stream. | |
Functions: Data Accessors | |
BIOHDF_API biohdf_error | BIOH5Gcreate_alignment_data (bioh5g_alignment_data **data) |
BIOHDF_API biohdf_error | BIOH5Gget_alignment_read_index (bioh5g_alignment_data *data, int64_t *read_index) |
BIOHDF_API biohdf_error | BIOH5Gset_alignment_read_index (bioh5g_alignment_data *data, int64_t read_index) |
BIOHDF_API biohdf_error | BIOH5Gget_alignment_reference (bioh5g_alignment_data *data, char **reference) |
BIOHDF_API biohdf_error | BIOH5Gset_alignment_reference (bioh5g_alignment_data *data, char *reference) |
BIOHDF_API biohdf_error | BIOH5Gget_alignment_position (bioh5g_alignment_data *data, int32_t *position) |
BIOHDF_API biohdf_error | BIOH5Gset_alignment_position (bioh5g_alignment_data *data, int32_t position) |
BIOHDF_API biohdf_error | BIOH5Gget_alignment_length (bioh5g_alignment_data *data, int32_t *length) |
BIOHDF_API biohdf_error | BIOH5Gset_alignment_length (bioh5g_alignment_data *data, int32_t length) |
BIOHDF_API biohdf_error | BIOH5Gget_alignment_sam_mapq (bioh5g_alignment_data *data, unsigned char *sam_mapq) |
BIOHDF_API biohdf_error | BIOH5Gset_alignment_sam_mapq (bioh5g_alignment_data *data, unsigned char sam_mapq) |
BIOHDF_API biohdf_error | BIOH5Gget_alignment_sam_flags (bioh5g_alignment_data *data, uint32_t *sam_flags) |
BIOHDF_API biohdf_error | BIOH5Gset_alignment_sam_flags (bioh5g_alignment_data *data, uint32_t sam_flags) |
BIOHDF_API biohdf_error | BIOH5Gget_alignment_sam_cigar (bioh5g_alignment_data *data, char **sam_cigar) |
BIOHDF_API biohdf_error | BIOH5Gset_alignment_sam_cigar (bioh5g_alignment_data *data, char *sam_cigar) |
BIOHDF_API biohdf_error | BIOH5Gget_alignment_sam_tags (bioh5g_alignment_data *data, char **sam_tags) |
BIOHDF_API biohdf_error | BIOH5Gset_alignment_sam_tags (bioh5g_alignment_data *data, char *sam_tags) |
BIOHDF_API biohdf_error | BIOH5Gget_alignment_sam_rnext (bioh5g_alignment_data *data, char **sam_rnext) |
BIOHDF_API biohdf_error | BIOH5Gset_alignment_sam_rnext (bioh5g_alignment_data *data, char *sam_rnext) |
BIOHDF_API biohdf_error | BIOH5Gget_alignment_sam_pnext (bioh5g_alignment_data *data, int32_t *sam_pnext) |
BIOHDF_API biohdf_error | BIOH5Gset_alignment_sam_pnext (bioh5g_alignment_data *data, int32_t sam_pnext) |
BIOHDF_API biohdf_error | BIOH5Gget_alignment_sam_tlen (bioh5g_alignment_data *data, int32_t *sam_tlen) |
BIOHDF_API biohdf_error | BIOH5Gset_alignment_sam_tlen (bioh5g_alignment_data *data, int32_t sam_tlen) |
Functions: Collection Creation Properties | |
BIOHDF_API biohdf_error | BIOH5Gcreate_alignments_properties (bioh5g_alignments_creation_properties *props) |
BIOHDF_API biohdf_error | BIOH5Gdestroy_alignments_properties (bioh5g_alignments_creation_properties *props) |
BIOHDF_API biohdf_error | BIOH5Gset_alignments_properties_reads_path (bioh5g_alignments_creation_properties props, char *reads_path) |
BIOHDF_API biohdf_error | BIOH5Gset_alignments_properties_refs_scheme (bioh5g_alignments_creation_properties props, biohdf_string_storage_scheme scheme) |
BIOHDF_API biohdf_error | BIOH5Gset_alignments_properties_tags_scheme (bioh5g_alignments_creation_properties props, biohdf_string_storage_scheme scheme) |
BIOHDF_API biohdf_error | BIOH5Gset_alignments_properties_cigar_scheme (bioh5g_alignments_creation_properties props, biohdf_string_storage_scheme scheme) |
BIOHDF_API biohdf_error | BIOH5Gset_alignments_properties_refs_length (bioh5g_alignments_creation_properties props, size_t length) |
BIOHDF_API biohdf_error | BIOH5Gset_alignments_properties_tags_length (bioh5g_alignments_creation_properties props, size_t length) |
BIOHDF_API biohdf_error | BIOH5Gset_alignments_properties_cigar_length (bioh5g_alignments_creation_properties props, size_t length) |
BIOHDF_API biohdf_error | BIOH5Gset_alignments_properties_chunk_size (bioh5g_alignments_creation_properties props, int64_t chunk_size) |
BIOHDF_API biohdf_error | BIOH5Gset_alignments_properties_compression_level (bioh5g_alignments_creation_properties props, compression_level level) |
Represents NGS alignments (SAM, etc.
entries).
BIOHDF_API biohdf_error BIOH5Gadd_alignment | ( | const bioh5g_alignments | aligns, |
const bioh5g_alignment_data * | data | ||
) |
Add an alignment to a collection.
aligns | A BioHDF alignments handle |
data | A BioHDF alignment |
CHECK*PARAMETERS
CODE
SUCCESS
FAILURE
BIOHDF_API biohdf_error BIOH5Gadd_alignments_iterator_flags_filter | ( | bioh5g_alignments_iterator | iter, |
uint32_t | mask | ||
) |
Add a SAM FLAGS filter to an alignments iterator.
Only alignments which have all the bits in the mask set (an AND mask) will be returned.
See the SAM spec for the meanings of the individual flags.
iter | An iterator for an alignments collection |
mask | A SAM flags mask. Note that adding a subsequent mask to an iterator clobbers the old one. They are NOT combined with logical OR. |
CHECK*PARAMETERS
CODE
SUCCESS
FAILURE
BIOHDF_API biohdf_error BIOH5Gadd_alignments_iterator_mapq_filter | ( | bioh5g_alignments_iterator | iter, |
unsigned char | min_mapq | ||
) |
Add a SAM MAPQ filter to an alignments iterator.
Set a minimum MAPQ level on an iterator. When set, only alignments which have a MAPQ score above or equal to the minimum will be returned.
NOTE: Allowable MAPQ values are from 0 to 255.
iter | An iterator for an alignments collection |
min_mapq | The minimum acceptable MAPQ value (inclusive) |
CHECK*PARAMETERS
CODE
SUCCESS
FAILURE
BIOHDF_API biohdf_error BIOH5Gadd_alignments_iterator_range_filter | ( | bioh5g_alignments_iterator | iter, |
const char * | reference, | ||
int32_t | start, | ||
int32_t | end | ||
) |
Add a reference region filter to an alignments iterator.
Add reference region filters one at a time. If no reference regions are specified, all alignments are returned.
iter | An iterator for an alignments collection |
reference | The reference name |
start | The start point of the region (1-based, inclusive) |
end | The end point of the region (1-based, inclusive) |
CHECK*PARAMETERS
CODE
SUCCESS
FAILURE
BIOHDF_API biohdf_error BIOH5Gcheck_alignments_presence | ( | const biohdf_file | file, |
const char * | path, | ||
int * | presence | ||
) |
Test if an alignments collection exists.
This function will return TRUE if a collection of the same type exists at the named location. If any other HDF5 or BioHDF object with that same name exists, TRUE will be returned as well as an error code, the assumption being that Bad Code(tm) that does not check return values will be more likely to attempt to open code (and fail), rather than create things (which my partially succeed, making a mess).
file | A BioHDF file handle | |
path | The BioHDF path to the collection | |
[out] | presence | TRUE if the collection exists, FALSE if it does not. |
CHECK*PARAMETERS
CODE
SUCCESS
FAILURE
BIOHDF_API biohdf_error BIOH5Gclose_alignments_collection | ( | bioh5g_alignments * | aligns | ) |
Close an open alignments collection.
This function will set the collection handle to NULL after freeing it.
[in,out] | alignments | A BioHDF alignments handle |
CHECK*PARAMETERS
CODE
SUCCESS
FAILURE
BIOHDF_API biohdf_error BIOH5Gcreate_alignment_string | ( | const bioh5g_alignment_data * | alignment, |
const bioh5g_read_data * | read, | ||
bioh5g_alignments_format | format, | ||
char ** | alignment_string | ||
) |
Create an alignment string in a particular format.
Read data is normally required for correct SAM output. If no read data is supplied, the QNAME will be an arbitrary integer and SEQ and QUAL will both be '*'.
alignment | The alignment data | |
read | The read data | |
format | The format of the output string | |
[out] | alignment_string | The alignment string |
BIOHDF_API biohdf_error BIOH5Gcreate_alignments_collection | ( | const biohdf_file | file, |
const bioh5g_alignments_creation_properties | props, | ||
const char * | path, | ||
bioh5g_alignments * | aligns | ||
) |
Create (and open) a new alignments collection.
The collection handle returned by this function will be ready to accept I/O.
file | A BioHDF file handle | |
properties | Collection creation properties | |
path | The BioHDF path to the collection | |
[out] | aligns | A BioHDF alignments handle |
CHECK*PARAMETERS
CODE
SUCCESS
FAILURE
BIOHDF_API biohdf_error BIOH5Gcreate_alignments_index | ( | bioh5g_alignments | aligns, |
bioh5g_alignments_index_method | method, | ||
biohdf_index_creation_properties | props | ||
) |
Create an index for an alignments collection.
A pre-existing index of the same method/type will be deleted.
aligns | The alignments collection |
method | The indexing method to use |
props | Index creation properties |
CHECK*PARAMETERS
CODE
SUCCESS
FAILURE
BIOHDF_API biohdf_error BIOH5Gcreate_alignments_iterator | ( | const bioh5g_alignments | aligns, |
bioh5g_alignments_iterator * | iter | ||
) |
Create an iterator for an alignments collection.
aligns | A BioHDF alignments handle | |
[out] | iter | An iterator for an alignments collection |
CHECK*PARAMETERS
CODE
SUCCESS
FAILURE
BIOHDF_API biohdf_error BIOH5Gdestroy_alignments_iterator | ( | bioh5g_alignments_iterator * | iter | ) |
Destroy an iterator for an alignments collection.
The iterator is set to NULL as a part of deletion.
[in,out] | iter | An iterator for an alignments collection |
BIOHDF_API biohdf_error BIOH5Gfree_alignment_data | ( | bioh5g_alignment_data ** | data | ) |
Free alignment data that has been obtained from the library.
The data is set to NULL as a part of deletion.
[in,out] | data | The BioHDF alignment |
BIOHDF_API biohdf_error BIOH5Gget_alignment | ( | const bioh5g_alignments | aligns, |
int64_t | index, | ||
bioh5g_alignment_data ** | data | ||
) |
Given an alignment index, get the alignment from an alignment collection.
aligns | A BioHDF alignments handle | |
index | The index of this alignment | |
[out] | data | The BioHDF alignment |
Given an alignment index, get the alignment from an alignment collection.
CODE SUCCESS FAILURE
CHECK*PARAMETERS
CODE
SUCCESS
FAILURE
BIOHDF_API biohdf_error BIOH5Gget_alignment_file_header | ( | const bioh5g_alignments | aligns, |
bioh5g_alignments_format * | format, | ||
char ** | header | ||
) |
Get a stored alignment file header.
NOTE: Headers are stored verbatim and are not parsed or generated. This function can only return a previously-stored header, it will not generate a header from scratch.
aligns | The alignments collection | |
[out] | format | The format of the header |
[out] | header | The header string |
CHECK*PARAMETERS
CODE
SUCCESS
FAILURE
BIOHDF_API biohdf_error BIOH5Gget_alignments_count | ( | const bioh5g_alignments | aligns, |
int64_t * | count | ||
) |
Get the number of stored alignments in a collection.
aligns | A BioHDF alignments handle | |
[out] | count | The number of alignments in the collection |
CHECK*PARAMETERS
CODE
SUCCESS
FAILURE
BIOHDF_API biohdf_error BIOH5Gget_index_of_last_added_alignment | ( | const bioh5g_alignments | aligns, |
int64_t * | index | ||
) |
Get the index of the last alignment that was added.
Useful for higher level structures where links must be created.
aligns | A BioHDF alignments handle | |
[out] | index | The index of the last alignment that was added |
CHECK*PARAMETERS
CODE
SUCCESS
FAILURE
BIOHDF_API biohdf_error BIOH5Gget_next_alignment | ( | bioh5g_alignments_iterator | iter, |
int64_t * | index, | ||
bioh5g_alignment_data ** | data | ||
) |
Get the next alignment from an alignments collection.
iter | An iterator for an alignments collection | |
[out] | index | The index of this alignment |
[out] | data | The BioHDF alignment |
CHECK*PARAMETERS
CODE
SUCCESS
FAILURE
BIOHDF_API biohdf_error BIOH5Gget_reads_path | ( | const bioh5g_alignments | aligns, |
char ** | reads_path | ||
) |
Get the path to the associated reads.
Each BioHDF alignments collection includes a link the associated reads collection.
An empty string ("\0") is returned if the link is not present.
alignments | A BioHDF alignments handle | |
[out] | reads_path | The BioHDF path to the associated reads |
CHECK*PARAMETERS
CODE
SUCCESS
FAILURE
BIOHDF_API biohdf_error BIOH5Gopen_alignments_collection | ( | const biohdf_file | file, |
const char * | path, | ||
biohdf_open_mode | mode, | ||
bioh5g_alignments * | aligns | ||
) |
Open an existing alignments collection.
file | A BioHDF file handle | |
path | The BioHDF path to the collection | |
mode | The access mode (read-only | read-write) | |
[out] | alignments | A BioHDF alignments handle |
CHECK*PARAMETERS
CODE
SUCCESS
FAILURE
BIOHDF_API biohdf_error BIOH5Gstore_alignment_file_header | ( | const bioh5g_alignments | aligns, |
bioh5g_alignments_format | format, | ||
const char * | header | ||
) |
Store a file header from an alignment file (e.g.
SAM)
NOTE: Headers are stored verbatim and are not parsed.
aligns | The alignments collection |
format | The format of the header |
header | The header string to store |
CHECK*PARAMETERS
CODE
SUCCESS
FAILURE
BIOHDF_API biohdf_error BIOH5Gwrite_alignment_to_stream | ( | const bioh5g_alignment_data * | alignment, |
const bioh5g_read_data * | read, | ||
bioh5g_alignments_format | format, | ||
FILE * | stream | ||
) |
Write an alignment string in a particular format to an output stream.
This saves you from having to create temp strings that will just be dumped to a stream.
Read data is normally required for correct SAM output. If no read data is supplied, the QNAME will be an arbitrary integer and SEQ and QUAL will both be '*'.
alignment | The alignment data |
read | The read data |
format | The format of the output string |
stream | The output stream (can be STDOUT) |
CHECK*PARAMETERS
CODE
SUCCESS
FAILURE