btllib
Loading...
Searching...
No Matches
seq_reader_multiline_fastq_module.hpp
1#ifndef BTLLIB_SEQ_READER_MULTILINE_FASTQ_MODULE_HPP
2#define BTLLIB_SEQ_READER_MULTILINE_FASTQ_MODULE_HPP
3
4#include "btllib/cstring.hpp"
5#include "btllib/status.hpp"
6
7#include <cstdlib>
8
9namespace btllib {
10
12class SeqReaderMultilineFastqModule
13{
14
15private:
16 friend class SeqReader;
17
18 enum class Stage
19 {
20 HEADER,
21 SEQ,
22 TRANSITION,
23 SEP,
24 QUAL
25 };
26
27 Stage stage = Stage::HEADER;
28 CString tmp;
29
30 static bool buffer_valid(const char* buffer, size_t size);
31 template<typename ReaderType, typename RecordType>
32 bool read_buffer(ReaderType& reader, RecordType& record);
33 template<typename ReaderType, typename RecordType>
34 bool read_transition(ReaderType& reader, RecordType& record);
35 template<typename ReaderType, typename RecordType>
36 bool read_file(ReaderType& reader, RecordType& record);
37};
38
39template<typename ReaderType, typename RecordType>
40inline bool
41SeqReaderMultilineFastqModule::read_buffer(ReaderType& reader,
42 RecordType& record)
43{
44 record.header.clear();
45 record.seq.clear();
46 record.qual.clear();
47 if (reader.buffer.start < reader.buffer.end) {
48 int c;
49 for (;;) {
50 switch (stage) {
51 case Stage::HEADER: {
52 if (!reader.readline_buffer_append(record.header)) {
53 return false;
54 }
55 stage = Stage::SEQ;
56 }
57 // fall through
58 case Stage::SEQ: {
59 if (!reader.readline_buffer_append(record.seq)) {
60 return false;
61 }
62 rtrim(record.seq);
63 stage = Stage::TRANSITION;
64 }
65 // fall through
66 case Stage::TRANSITION: {
67 c = reader.getc_buffer();
68 if (c == EOF) {
69 return false;
70 }
71 reader.ungetc_buffer(c);
72 if (c == '+') {
73 stage = Stage::SEP;
74 } else {
75 stage = Stage::SEQ;
76 }
77 break;
78 }
79 case Stage::SEP: {
80 if (!reader.readline_buffer_append(tmp)) {
81 return false;
82 }
83 stage = Stage::QUAL;
84 tmp.clear();
85 }
86 // fallthrough
87 case Stage::QUAL: {
88 if (!reader.readline_buffer_append(record.qual)) {
89 return false;
90 }
91 rtrim(record.qual);
92 if (record.qual.size() == record.seq.size()) {
93 stage = Stage::HEADER;
94 return true;
95 }
96 check_error(record.qual.size() > record.seq.size(),
97 "SeqReader: Multiline FASTQ reader: Quality string is "
98 "longer than sequence string.");
99 break;
100 }
101 default: {
102 log_error("SeqReader has entered an invalid state.");
103 std::exit(EXIT_FAILURE); // NOLINT(concurrency-mt-unsafe)
104 }
105 }
106 }
107 }
108 return false;
109}
110
111template<typename ReaderType, typename RecordType>
112inline bool
113SeqReaderMultilineFastqModule::read_transition(ReaderType& reader,
114 RecordType& record)
115{
116 if (std::ferror(reader.source) == 0 && std::feof(reader.source) == 0) {
117 const auto p = std::fgetc(reader.source);
118 if (p != EOF) {
119 const auto ret = std::ungetc(p, reader.source);
120 check_error(ret == EOF, "SeqReaderMultilineFastqModule: ungetc failed.");
121 int c;
122 for (;;) {
123 switch (stage) {
124 case Stage::HEADER: {
125 reader.readline_file_append(record.header, reader.source);
126 stage = Stage::SEQ;
127 }
128 // fall through
129 case Stage::SEQ: {
130 reader.readline_file_append(record.seq, reader.source);
131 rtrim(record.seq);
132 stage = Stage::TRANSITION;
133 }
134 // fall through
135 case Stage::TRANSITION: {
136 c = std::fgetc(reader.source);
137 if (c == EOF) {
138 return false;
139 }
140 const auto ret = std::ungetc(c, reader.source);
141 check_error(ret == EOF,
142 "SeqReaderMultilineFastqModule: ungetc failed.");
143 if (c == '+') {
144 stage = Stage::SEP;
145 } else {
146 stage = Stage::SEQ;
147 }
148 break;
149 }
150 case Stage::SEP: {
151 reader.readline_file_append(tmp, reader.source);
152 stage = Stage::QUAL;
153 tmp.clear();
154 }
155 // fallthrough
156 case Stage::QUAL: {
157 reader.readline_file_append(record.qual, reader.source);
158 rtrim(record.qual);
159 if (record.qual.size() == record.seq.size()) {
160 stage = Stage::HEADER;
161 return true;
162 }
163 check_error(record.qual.size() > record.seq.size(),
164 "SeqReader: Multiline FASTQ reader: Quality string is "
165 "longer than sequence string.");
166 break;
167 }
168 default: {
169 log_error("SeqReader has entered an invalid state.");
170 std::exit(EXIT_FAILURE); // NOLINT(concurrency-mt-unsafe)
171 }
172 }
173 }
174 }
175 }
176 return false;
177}
178
179template<typename ReaderType, typename RecordType>
180inline bool
181SeqReaderMultilineFastqModule::read_file(ReaderType& reader, RecordType& record)
182{
183 if (!reader.file_at_end(reader.source)) {
184 reader.readline_file(record.header, reader.source);
185 int c;
186 reader.readline_file(record.seq, reader.source);
187 rtrim(record.seq);
188 for (;;) {
189 c = std::fgetc(reader.source);
190 check_error(c == EOF,
191 "SeqReader: Multiline FASTQ reader: Unexpected end.");
192 const auto ret = std::ungetc(c, reader.source);
193 check_error(ret == EOF, "SeqReaderMultilineFastqModule: ungetc failed.");
194 if (c == '+') {
195 reader.readline_file(tmp, reader.source);
196 reader.readline_file(record.qual, reader.source);
197 rtrim(record.qual);
198 size_t prevlen;
199 while (record.qual.size() < record.seq.size()) {
200 prevlen = record.qual.size();
201 reader.readline_file_append(record.qual, reader.source);
202 check_error(prevlen == record.qual.size(),
203 "SeqReader: Multiline FASTQ reader: Failed to read the "
204 "quality string.");
205 rtrim(record.qual);
206 }
207 check_error(record.qual.size() > record.seq.size(),
208 "SeqReader: Multiline FASTQ reader: Quality string is "
209 "longer than sequence string.");
210 return true;
211 }
212 reader.readline_file_append(record.seq, reader.source);
213 rtrim(record.seq);
214 }
215 }
216 return false;
217}
219
220} // namespace btllib
221
222#endif
Definition aahash.hpp:12
void check_error(bool condition, const std::string &msg)
void rtrim(std::string &s)
void log_error(const std::string &msg)