BioHDF version 0.3 alpha
Scalable NGS Data Storage Based on HDF5
|
00001 /***************************************************************************** 00002 * Copyright by The HDF Group * 00003 * All rights reserved. * 00004 * * 00005 * This file is part of BioHDF. The full BioHDF copyright notice, including * 00006 * terms governing use, modification, and redistribution, is contained in * 00007 * the file COPYING. COPYING can be found at the root of the source code * 00008 * distribution tree. If you do not have access to this file, you may * 00009 * request a copy from help@hdfgroup.org. * 00010 *****************************************************************************/ 00011 00020 #ifndef _BIOH5G_ALIGNMENTS_H 00021 #define _BIOH5G_ALIGNMENTS_H 00022 00023 #include <stdio.h> 00024 00025 #include "biohdf_api.h" 00026 #include "biohdf_error.h" 00027 #include "biohdf_file.h" 00028 #include "biohdf_utility.h" 00029 00030 #include "bioh5g_api.h" 00031 #include "bioh5g_reads.h" 00032 00033 00034 00035 /***************************************************************************** 00036 * Attribute Names * 00037 *****************************************************************************/ 00038 00039 00040 00042 #define BIOH5G_READS_PATH_ATTR "READS_PATH" 00043 00044 00045 00047 #define BIOH5G_INDEX_METHOD_ATTR "INDEX_METHOD" 00048 00049 00050 00051 /***************************************************************************** 00052 * Type definitions * 00053 *****************************************************************************/ 00054 00055 00056 00058 typedef struct _bioh5g_alignments *bioh5g_alignments; 00059 00060 00061 00063 typedef struct _bioh5g_alignments_creation_properties *bioh5g_alignments_creation_properties; 00064 00065 00066 00068 typedef struct _bioh5g_alignments_iterator *bioh5g_alignments_iterator; 00069 00070 00071 00072 /***************************************************************************** 00073 * Structs and enums * 00074 *****************************************************************************/ 00075 00076 00077 00079 typedef enum 00080 { 00081 SAM_FORMAT 00082 } bioh5g_alignments_format; 00083 00084 00085 00087 typedef enum 00088 { 00089 UNINDEXED = 0, 00090 REF_POS_SECONDARY = 1 00092 /* REF_POS_PRIMARY = 2, */ 00093 /* REF_POS_NCLIST_PRIMARY = 3, */ 00094 /* REF_POS_NCLIST_PRIMARY = 4, */ 00095 00096 } bioh5g_alignments_index_method; 00097 00098 00099 00100 /***************************************************************************** 00101 * Data container * 00102 *****************************************************************************/ 00103 00104 00105 00110 typedef struct 00111 { 00112 /* basic alignment data */ 00113 int64_t read_index; 00114 char *reference; 00115 int32_t position; 00116 int32_t length; 00118 /* SAM data */ 00119 unsigned char sam_mapq; 00120 int32_t sam_flags; 00121 char *sam_cigar; 00122 char *sam_tags; 00124 /* SAM template data */ 00125 char *sam_rnext; 00126 int32_t sam_pnext; 00127 int32_t sam_tlen; 00129 } bioh5g_alignment_data; 00130 00131 00132 00133 /***************************************************************************** 00134 * Create, open, close * 00135 *****************************************************************************/ 00136 00137 00138 00154 BIOHDF_API biohdf_error 00155 BIOH5Gcheck_alignments_presence(const biohdf_file file, 00156 const char *path, 00157 /*OUT*/ int *presence); 00158 00159 00160 00173 BIOHDF_API biohdf_error 00174 BIOH5Gcreate_alignments_collection(const biohdf_file file, 00175 const bioh5g_alignments_creation_properties props, 00176 const char *path, 00177 /*OUT*/ bioh5g_alignments *aligns); 00178 00179 00180 00190 BIOHDF_API biohdf_error 00191 BIOH5Gopen_alignments_collection(const biohdf_file file, 00192 const char *path, 00193 biohdf_open_mode mode, 00194 /*OUT*/ bioh5g_alignments *aligns); 00195 00196 00197 00206 BIOHDF_API biohdf_error 00207 BIOH5Gclose_alignments_collection(/*IN-OUT*/ bioh5g_alignments *aligns); 00208 00209 00210 00223 BIOHDF_API biohdf_error 00224 BIOH5Gget_reads_path(const bioh5g_alignments aligns, 00225 /*OUT*/ char **reads_path); 00226 00227 00228 00229 /***************************************************************************** 00230 * Append and read data * 00231 *****************************************************************************/ 00232 00233 00234 00242 BIOHDF_API biohdf_error 00243 BIOH5Gget_alignments_count(const bioh5g_alignments aligns, 00244 /*OUT*/ int64_t *count); 00245 00246 00247 00255 BIOHDF_API biohdf_error 00256 BIOH5Gcreate_alignments_iterator(const bioh5g_alignments aligns, 00257 /*OUT*/ bioh5g_alignments_iterator *iter); 00258 00259 00272 BIOHDF_API biohdf_error 00273 BIOH5Gadd_alignments_iterator_range_filter(bioh5g_alignments_iterator iter, 00274 const char *reference, 00275 int32_t start, 00276 int32_t end); 00277 00278 00279 00293 BIOHDF_API biohdf_error 00294 BIOH5Gadd_alignments_iterator_mapq_filter(bioh5g_alignments_iterator iter, 00295 unsigned char min_mapq); 00296 00297 00298 00313 BIOHDF_API biohdf_error 00314 BIOH5Gadd_alignments_iterator_flags_filter(bioh5g_alignments_iterator iter, 00315 uint32_t mask); 00316 00317 00318 00327 BIOHDF_API biohdf_error 00328 BIOH5Gdestroy_alignments_iterator(/*IN-OUT*/ bioh5g_alignments_iterator *iter); 00329 00330 00331 00339 BIOHDF_API biohdf_error 00340 BIOH5Gadd_alignment(const bioh5g_alignments aligns, 00341 const bioh5g_alignment_data *data); 00342 00343 00344 00354 BIOHDF_API biohdf_error 00355 BIOH5Gget_index_of_last_added_alignment(const bioh5g_alignments aligns, 00356 /*OUT*/ int64_t *index); 00357 00358 00359 00368 BIOHDF_API biohdf_error 00369 BIOH5Gget_next_alignment(bioh5g_alignments_iterator iter, 00370 /*OUT*/ int64_t *index, 00371 /*OUT*/ bioh5g_alignment_data **data); 00372 00373 00374 00383 BIOHDF_API biohdf_error 00384 BIOH5Gget_alignment(const bioh5g_alignments aligns, 00385 int64_t index, 00386 /*OUT*/ bioh5g_alignment_data **data); 00387 00388 00389 00398 BIOHDF_API biohdf_error 00399 BIOH5Gfree_alignment_data(/*IN-OUT*/ bioh5g_alignment_data **data); 00400 00401 00402 00403 /***************************************************************************** 00404 * Alignment hit index functionality * 00405 *****************************************************************************/ 00406 00407 00408 00419 BIOHDF_API biohdf_error 00420 BIOH5Gcreate_alignments_index(bioh5g_alignments aligns, 00421 bioh5g_alignments_index_method method, 00422 biohdf_index_creation_properties props); 00423 00424 00425 00426 /***************************************************************************** 00427 * External file header storage * 00428 *****************************************************************************/ 00429 00430 00441 BIOHDF_API biohdf_error 00442 BIOH5Gstore_alignment_file_header(const bioh5g_alignments aligns, 00443 bioh5g_alignments_format format, 00444 const char *header); 00445 00446 00447 00460 BIOHDF_API biohdf_error 00461 BIOH5Gget_alignment_file_header(const bioh5g_alignments aligns, 00462 /*OUT*/ bioh5g_alignments_format *format, 00463 /*OUT*/ char **header); 00464 00465 00466 00467 /***************************************************************************** 00468 * Data formats * 00469 *****************************************************************************/ 00470 00471 00485 BIOHDF_API biohdf_error 00486 BIOH5Gcreate_alignment_string(const bioh5g_alignment_data *alignment, 00487 const bioh5g_read_data *read, 00488 bioh5g_alignments_format format, 00489 /*OUT*/ char **alignment_string); 00490 00491 00492 00509 BIOHDF_API biohdf_error 00510 BIOH5Gwrite_alignment_to_stream(const bioh5g_alignment_data *alignment, 00511 const bioh5g_read_data *read, 00512 bioh5g_alignments_format format, 00513 FILE *stream); 00514 00515 00516 00517 00518 /***************************************************************************** 00519 * Accessor functions (needed for higher-language interoperation) * 00520 *****************************************************************************/ 00521 00527 BIOHDF_API biohdf_error 00528 BIOH5Gcreate_alignment_data(/*OUT*/ bioh5g_alignment_data **data); 00529 00530 00531 00532 BIOHDF_API biohdf_error 00533 BIOH5Gget_alignment_read_index(bioh5g_alignment_data *data, 00534 /*OUT*/ int64_t *read_index); 00535 00536 00537 00538 BIOHDF_API biohdf_error 00539 BIOH5Gset_alignment_read_index(bioh5g_alignment_data *data, 00540 int64_t read_index); 00541 00542 00543 00544 BIOHDF_API biohdf_error 00545 BIOH5Gget_alignment_reference(bioh5g_alignment_data *data, 00546 /*OUT*/ char **reference); 00547 00548 00549 00550 BIOHDF_API biohdf_error 00551 BIOH5Gset_alignment_reference(bioh5g_alignment_data *data, 00552 char *reference); 00553 00554 00555 00556 BIOHDF_API biohdf_error 00557 BIOH5Gget_alignment_position(bioh5g_alignment_data *data, 00558 /*OUT*/ int32_t *position); 00559 00560 00561 00562 BIOHDF_API biohdf_error 00563 BIOH5Gset_alignment_position(bioh5g_alignment_data *data, 00564 int32_t position); 00565 00566 00567 00568 BIOHDF_API biohdf_error 00569 BIOH5Gget_alignment_length(bioh5g_alignment_data *data, 00570 /*OUT*/ int32_t *length); 00571 00572 00573 00574 BIOHDF_API biohdf_error 00575 BIOH5Gset_alignment_length(bioh5g_alignment_data *data, 00576 int32_t length); 00577 00578 00579 00580 BIOHDF_API biohdf_error 00581 BIOH5Gget_alignment_sam_mapq(bioh5g_alignment_data *data, 00582 /*OUT*/ unsigned char *sam_mapq); 00583 00584 00585 00586 BIOHDF_API biohdf_error 00587 BIOH5Gset_alignment_sam_mapq(bioh5g_alignment_data *data, 00588 unsigned char sam_mapq); 00589 00590 00591 00592 BIOHDF_API biohdf_error 00593 BIOH5Gget_alignment_sam_flags(bioh5g_alignment_data *data, 00594 /*OUT*/ uint32_t *sam_flags); 00595 00596 00597 00598 BIOHDF_API biohdf_error 00599 BIOH5Gset_alignment_sam_flags(bioh5g_alignment_data *data, 00600 uint32_t sam_flags); 00601 00602 00603 00604 BIOHDF_API biohdf_error 00605 BIOH5Gget_alignment_sam_cigar(bioh5g_alignment_data *data, 00606 /*OUT*/ char **sam_cigar); 00607 00608 00609 00610 BIOHDF_API biohdf_error 00611 BIOH5Gset_alignment_sam_cigar(bioh5g_alignment_data *data, 00612 char *sam_cigar); 00613 00614 00615 00616 BIOHDF_API biohdf_error 00617 BIOH5Gget_alignment_sam_tags(bioh5g_alignment_data *data, 00618 /*OUT*/ char **sam_tags); 00619 00620 00621 00622 BIOHDF_API biohdf_error 00623 BIOH5Gset_alignment_sam_tags(bioh5g_alignment_data *data, 00624 char *sam_tags); 00625 00626 00627 00628 BIOHDF_API biohdf_error 00629 BIOH5Gget_alignment_sam_rnext(bioh5g_alignment_data *data, 00630 /*OUT*/ char **sam_rnext); 00631 00632 00633 00634 BIOHDF_API biohdf_error 00635 BIOH5Gset_alignment_sam_rnext(bioh5g_alignment_data *data, 00636 char *sam_rnext); 00637 00638 00639 00640 BIOHDF_API biohdf_error 00641 BIOH5Gget_alignment_sam_pnext(bioh5g_alignment_data *data, 00642 /*OUT*/ int32_t *sam_pnext); 00643 00644 00645 00646 BIOHDF_API biohdf_error 00647 BIOH5Gset_alignment_sam_pnext(bioh5g_alignment_data *data, 00648 int32_t sam_pnext); 00649 00650 00651 00652 BIOHDF_API biohdf_error 00653 BIOH5Gget_alignment_sam_tlen(bioh5g_alignment_data *data, 00654 /*OUT*/ int32_t *sam_tlen); 00655 00656 00657 00658 BIOHDF_API biohdf_error 00659 BIOH5Gset_alignment_sam_tlen(bioh5g_alignment_data *data, 00660 int32_t sam_tlen); 00661 00664 /***************************************************************************** 00665 * Alignments properties - create, destroy, access * 00666 *****************************************************************************/ 00667 00673 BIOHDF_API biohdf_error 00674 BIOH5Gcreate_alignments_properties(/*OUT*/ bioh5g_alignments_creation_properties *props); 00675 00676 00677 00678 BIOHDF_API biohdf_error 00679 BIOH5Gdestroy_alignments_properties(/*OUT*/ bioh5g_alignments_creation_properties *props); 00680 00681 00682 00683 BIOHDF_API biohdf_error 00684 BIOH5Gset_alignments_properties_reads_path(bioh5g_alignments_creation_properties props, 00685 char *reads_path); 00686 00687 00688 00689 BIOHDF_API biohdf_error 00690 BIOH5Gset_alignments_properties_refs_scheme(bioh5g_alignments_creation_properties props, 00691 biohdf_string_storage_scheme scheme); 00692 00693 00694 00695 BIOHDF_API biohdf_error 00696 BIOH5Gset_alignments_properties_tags_scheme(bioh5g_alignments_creation_properties props, 00697 biohdf_string_storage_scheme scheme); 00698 00699 00700 00701 BIOHDF_API biohdf_error 00702 BIOH5Gset_alignments_properties_cigar_scheme(bioh5g_alignments_creation_properties props, 00703 biohdf_string_storage_scheme scheme); 00704 00705 00706 00707 BIOHDF_API biohdf_error 00708 BIOH5Gset_alignments_properties_refs_length(bioh5g_alignments_creation_properties props, 00709 size_t length); 00710 00711 00712 00713 BIOHDF_API biohdf_error 00714 BIOH5Gset_alignments_properties_tags_length(bioh5g_alignments_creation_properties props, 00715 size_t length); 00716 00717 00718 00719 BIOHDF_API biohdf_error 00720 BIOH5Gset_alignments_properties_cigar_length(bioh5g_alignments_creation_properties props, 00721 size_t length); 00722 00723 00724 00725 BIOHDF_API biohdf_error 00726 BIOH5Gset_alignments_properties_chunk_size(bioh5g_alignments_creation_properties props, 00727 int64_t chunk_size); 00728 00729 00730 00731 BIOHDF_API biohdf_error 00732 BIOH5Gset_alignments_properties_compression_level(bioh5g_alignments_creation_properties props, 00733 compression_level level); 00734 00735 00739 #endif