12#include <btllib/hashing_internals.hpp>
13#include <btllib/status.hpp>
15namespace btllib::hashing_internals {
17using SpacedSeed = std::vector<unsigned>;
18using SpacedSeedBlocks = std::vector<std::array<unsigned, 2>>;
19using SpacedSeedMonomers = std::vector<unsigned>;
22get_blocks(
const std::vector<std::string>& seed_strings,
23 std::vector<SpacedSeedBlocks>& blocks,
24 std::vector<SpacedSeedMonomers>& monomers)
26 for (
const auto& seed_string : seed_strings) {
27 const char pad = seed_string[seed_string.length() - 1] ==
'1' ?
'0' :
'1';
28 const std::string padded_string = seed_string + pad;
29 SpacedSeedBlocks care_blocks, ignore_blocks;
30 std::vector<unsigned> care_monos, ignore_monos;
32 bool is_care_block = padded_string[0] ==
'1';
33 for (
unsigned pos = 0; pos < padded_string.length(); pos++) {
34 if (is_care_block && padded_string[pos] ==
'0') {
35 if (pos - i_start == 1) {
36 care_monos.push_back(i_start);
38 const std::array<unsigned, 2> block{ { i_start, pos } };
39 care_blocks.push_back(block);
42 is_care_block =
false;
43 }
else if (!is_care_block && padded_string[pos] ==
'1') {
44 if (pos - i_start == 1) {
45 ignore_monos.push_back(i_start);
47 const std::array<unsigned, 2> block{ { i_start, pos } };
48 ignore_blocks.push_back(block);
54 const unsigned num_cares = care_blocks.size() * 2 + care_monos.size();
55 const unsigned num_ignores =
56 ignore_blocks.size() * 2 + ignore_monos.size() + 2;
57 if (num_ignores < num_cares) {
58 const unsigned string_end = seed_string.length();
59 const std::array<unsigned, 2> block{ { 0, string_end } };
60 ignore_blocks.push_back(block);
61 blocks.push_back(ignore_blocks);
62 monomers.push_back(ignore_monos);
64 blocks.push_back(care_blocks);
65 monomers.push_back(care_monos);
71parsed_seeds_to_blocks(
const std::vector<std::vector<unsigned>>& seeds,
73 std::vector<SpacedSeedBlocks>& blocks,
74 std::vector<SpacedSeedMonomers>& monomers)
76 std::vector<std::string> seed_strings;
77 for (
const auto& seed : seeds) {
78 std::string seed_string(k,
'1');
79 for (
const auto& i : seed) {
82 seed_strings.push_back(seed_string);
84 get_blocks(seed_strings, blocks, monomers);
88check_seeds(
const std::vector<std::string>& seeds,
unsigned k)
90 for (
const auto& seed : seeds) {
92 "SeedNtHash: Spaced seed string length (" +
93 std::to_string(seed.length()) +
") not equal to k=" +
94 std::to_string(k) +
" in " + seed);
95 const std::string reversed(seed.rbegin(), seed.rend());
98 "SeedNtHash: Seed " + seed +
99 " is not symmetric, reverse-complement hashing will be inconsistent");
128ntmsm64(
const char* kmer_seq,
129 const std::vector<SpacedSeedBlocks>& seeds_blocks,
130 const std::vector<SpacedSeedMonomers>& seeds_monomers,
134 uint64_t* fh_nomonos,
135 uint64_t* rh_nomonos,
142 uint64_t fh_seed, rh_seed;
143 for (
unsigned i_seed = 0; i_seed < m; i_seed++) {
146 for (
const auto& block : seeds_blocks[i_seed]) {
147 uint8_t fh_loc, rh_loc, d;
149 unsigned i = block[0];
150 unsigned j = block[1];
153 fh_loc = (CONVERT_TAB[(
unsigned char)kmer_seq[i]] << 2) |
154 (CONVERT_TAB[(
unsigned char)kmer_seq[i + 1]]);
156 (RC_CONVERT_TAB[(
unsigned char)kmer_seq[i + 1]] << 2) |
157 (RC_CONVERT_TAB[(
unsigned char)kmer_seq[i]]);
158 x = DIMER_TAB[fh_loc];
160 fh_seed ^= d > 0 ? srol(x, d) : x;
161 x = DIMER_TAB[rh_loc];
163 rh_seed ^= d > 0 ? srol(x, d) : x;
167 (CONVERT_TAB[(
unsigned char)kmer_seq[i]] << 4) |
168 (CONVERT_TAB[(
unsigned char)kmer_seq[i + 1]] << 2) |
169 (CONVERT_TAB[(
unsigned char)kmer_seq[i + 2]]);
171 (RC_CONVERT_TAB[(
unsigned char)kmer_seq[i + 2]] << 4) |
172 (RC_CONVERT_TAB[(
unsigned char)kmer_seq[i + 1]] << 2) |
173 (RC_CONVERT_TAB[(
unsigned char)kmer_seq[i]]);
174 x = TRIMER_TAB[fh_loc];
176 fh_seed ^= d > 0 ? srol(x, d) : x;
177 x = TRIMER_TAB[rh_loc];
179 rh_seed ^= d > 0 ? srol(x, d) : x;
183 (CONVERT_TAB[(
unsigned char)kmer_seq[i]] << 6) |
184 (CONVERT_TAB[(
unsigned char)kmer_seq[i + 1]] << 4) |
185 (CONVERT_TAB[(
unsigned char)kmer_seq[i + 2]] << 2) |
186 (CONVERT_TAB[(
unsigned char)kmer_seq[i + 3]]);
188 (RC_CONVERT_TAB[(
unsigned char)kmer_seq[i + 3]] << 6) |
189 (RC_CONVERT_TAB[(
unsigned char)kmer_seq[i + 2]] << 4) |
190 (RC_CONVERT_TAB[(
unsigned char)kmer_seq[i + 1]] << 2) |
191 (RC_CONVERT_TAB[(
unsigned char)kmer_seq[i]]);
192 x = TETRAMER_TAB[fh_loc];
194 fh_seed ^= d > 0 ? srol(x, d) : x;
195 x = TETRAMER_TAB[rh_loc];
197 rh_seed ^= d > 0 ? srol(x, d) : x;
200 for (
unsigned pos = block[0]; pos < block[1]; pos++) {
201 if (kmer_seq[pos] == SEED_N) {
205 fh_seed ^= srol_table((
unsigned char)kmer_seq[pos], k - 1 - pos);
206 rh_seed ^= srol_table((
unsigned char)kmer_seq[pos] & CP_OFF, pos);
211 fh_nomonos[i_seed] = fh_seed;
212 rh_nomonos[i_seed] = rh_seed;
213 for (
const auto& pos : seeds_monomers[i_seed]) {
214 fh_seed ^= srol_table((
unsigned char)kmer_seq[pos], k - 1 - pos);
215 rh_seed ^= srol_table((
unsigned char)kmer_seq[pos] & CP_OFF, pos);
217 fh_val[i_seed] = fh_seed;
218 rh_val[i_seed] = rh_seed;
219 i_base = i_seed * m2;
220 h_val[i_base] = canonical(fh_seed, rh_seed);
221 for (
unsigned i_hash = 1; i_hash < m2; i_hash++) {
222 h_val[i_base + i_hash] = h_val[i_base] * (i_hash ^ k * MULTISEED);
223 h_val[i_base + i_hash] ^= h_val[i_base + i_hash] >> MULTISHIFT;
229#define NTMSM64(ROL_HANDLING, IN_HANDLING, OUT_HANDLING, ROR_HANDLING) \
230 unsigned char char_out, char_in; \
231 uint64_t fh_seed, rh_seed; \
232 unsigned i_out, i_in, i_base; \
233 for (unsigned i_seed = 0; i_seed < m; i_seed++) { \
235 for (const auto& block : seeds_blocks[i_seed]) \
239 fh_seed ^= srol_table(char_out, k - i_out); \
240 fh_seed ^= srol_table(char_in, k - i_in); \
241 rh_seed ^= srol_table(char_out & CP_OFF, i_out); \
242 rh_seed ^= srol_table(char_in & CP_OFF, i_in); \
245 fh_nomonos[i_seed] = fh_seed; \
246 rh_nomonos[i_seed] = rh_seed; \
247 for (const auto& pos : seeds_monomers[i_seed]) { \
248 fh_seed ^= srol_table((unsigned char)kmer_seq[pos + 1], k - 1 - pos); \
249 rh_seed ^= srol_table((unsigned char)kmer_seq[pos + 1] & CP_OFF, pos); \
251 fh_val[i_seed] = fh_seed; \
252 rh_val[i_seed] = rh_seed; \
253 i_base = i_seed * m2; \
254 h_val[i_base] = canonical(fh_seed, rh_seed); \
255 for (unsigned i_hash = 1; i_hash < m2; i_hash++) { \
256 h_val[i_base + i_hash] = h_val[i_base] * (i_hash ^ k * MULTISEED); \
257 h_val[i_base + i_hash] ^= h_val[i_base + i_hash] >> MULTISHIFT; \
283ntmsm64(
const char* kmer_seq,
284 const std::vector<SpacedSeedBlocks>& seeds_blocks,
285 const std::vector<SpacedSeedMonomers>& seeds_monomers,
289 uint64_t* fh_nomonos,
290 uint64_t* rh_nomonos,
295 NTMSM64(fh_seed = srol(fh_nomonos[i_seed]); rh_seed = rh_nomonos[i_seed];
297 char_in = (
unsigned char)kmer_seq[i_in];
299 char_out = (
unsigned char)kmer_seq[i_out];
300 , rh_seed = sror(rh_seed);)
304ntmsm64(
const std::deque<char>& kmer_seq,
305 const std::vector<SpacedSeedBlocks>& seeds_blocks,
306 const std::vector<SpacedSeedMonomers>& seeds_monomers,
310 uint64_t* fh_nomonos,
311 uint64_t* rh_nomonos,
316 NTMSM64(fh_seed = srol(fh_nomonos[i_seed]); rh_seed = rh_nomonos[i_seed];
318 char_in = (
unsigned char)kmer_seq[i_in];
320 char_out = (
unsigned char)kmer_seq[i_out];
321 , rh_seed = sror(rh_seed);)
346ntmsm64l(
const char* kmer_seq,
347 const std::vector<SpacedSeedBlocks>& seeds_blocks,
348 const std::vector<SpacedSeedMonomers>& seeds_monomers,
352 uint64_t* fh_nomonos,
353 uint64_t* rh_nomonos,
358 NTMSM64(fh_seed = fh_nomonos[i_seed]; rh_seed = srol(rh_nomonos[i_seed]);
360 char_in = (
unsigned char)kmer_seq[i_in];
362 char_out = (
unsigned char)kmer_seq[i_out];
363 , fh_seed = sror(fh_seed);)
367ntmsm64l(
const std::deque<char>& kmer_seq,
368 const std::vector<SpacedSeedBlocks>& seeds_blocks,
369 const std::vector<SpacedSeedMonomers>& seeds_monomers,
373 uint64_t* fh_nomonos,
374 uint64_t* rh_nomonos,
379 NTMSM64(fh_seed = fh_nomonos[i_seed]; rh_seed = srol(rh_nomonos[i_seed]);
381 char_in = (
unsigned char)kmer_seq[i_in];
383 char_out = (
unsigned char)kmer_seq[i_out];
384 , fh_seed = sror(fh_seed);)
409ntmsm64(
const char* kmer_seq,
411 const std::vector<SpacedSeedBlocks>& seeds_blocks,
412 const std::vector<SpacedSeedMonomers>& seeds_monomers,
416 uint64_t* fh_nomonos,
417 uint64_t* rh_nomonos,
423 fh_seed = srol(fh_nomonos[i_seed]); rh_seed = rh_nomonos[i_seed];
425 if (i_in > k - 1) { char_in = in; }
else {
426 char_in = (
unsigned char)kmer_seq[i_in];
429 char_out = (
unsigned char)kmer_seq[i_out];
430 , rh_seed = sror(rh_seed);)
455ntmsm64l(
const char* kmer_seq,
457 const std::vector<SpacedSeedBlocks>& seeds_blocks,
458 const std::vector<SpacedSeedMonomers>& seeds_monomers,
462 uint64_t* fh_nomonos,
463 uint64_t* rh_nomonos,
469 fh_seed = fh_nomonos[i_seed]; rh_seed = srol(rh_nomonos[i_seed]);
471 if (i_in > k - 1) { char_in = in; }
else {
472 char_in = (
unsigned char)kmer_seq[i_in];
475 char_out = (
unsigned char)kmer_seq[i_out];
476 , fh_seed = sror(fh_seed);)
483using hashing_internals::check_seeds;
484using hashing_internals::get_blocks;
485using hashing_internals::ntmsm64;
486using hashing_internals::ntmsm64l;
487using hashing_internals::parsed_seeds_to_blocks;
488using hashing_internals::SEED_N;
489using hashing_internals::SEED_TAB;
495inline std::vector<std::vector<unsigned>>
498 std::vector<std::vector<unsigned>> seed_set;
499 for (
const auto& seed_string : seed_strings) {
500 std::vector<unsigned> seed;
502 for (
const auto& c : seed_string) {
508 seed_set.push_back(seed);
532 const std::vector<std::string>& seeds,
533 hashing_internals::NUM_HASHES_TYPE num_hashes_per_seed,
534 hashing_internals::K_TYPE k,
538 , num_hashes_per_seed(num_hashes_per_seed)
542 , fwd_hash_nomonos(new uint64_t[seeds.size()])
543 , rev_hash_nomonos(new uint64_t[seeds.size()])
544 , fwd_hash(new uint64_t[seeds.size()])
545 , rev_hash(new uint64_t[seeds.size()])
546 , hash_arr(new uint64_t[num_hashes_per_seed * seeds.size()])
548 check_seeds(seeds, k);
550 "SeedNtHash: k should be equal to seed string lengths");
551 get_blocks(seeds, blocks, monomers);
564 const std::vector<std::string>& seeds,
565 hashing_internals::NUM_HASHES_TYPE num_hashes_per_seed,
566 hashing_internals::K_TYPE k,
568 :
SeedNtHash(seq.data(), seq.size(), seeds, num_hashes_per_seed, k, pos)
584 const std::vector<std::vector<unsigned>>& seeds,
585 hashing_internals::NUM_HASHES_TYPE num_hashes_per_seed,
586 hashing_internals::K_TYPE k,
590 , num_hashes_per_seed(num_hashes_per_seed)
594 , fwd_hash_nomonos(new uint64_t[seeds.size()])
595 , rev_hash_nomonos(new uint64_t[seeds.size()])
596 , fwd_hash(new uint64_t[seeds.size()])
597 , rev_hash(new uint64_t[seeds.size()])
598 , hash_arr(new uint64_t[num_hashes_per_seed * seeds.size()])
600 parsed_seeds_to_blocks(seeds, k, blocks, monomers);
613 const std::vector<std::vector<unsigned>>& seeds,
614 hashing_internals::NUM_HASHES_TYPE num_hashes_per_seed,
615 hashing_internals::K_TYPE k,
617 :
SeedNtHash(seq.data(), seq.size(), seeds, num_hashes_per_seed, k, pos)
623 , seq_len(obj.seq_len)
624 , num_hashes_per_seed(obj.num_hashes_per_seed)
627 , initialized(obj.initialized)
629 , monomers(obj.monomers)
630 , fwd_hash_nomonos(new uint64_t[obj.blocks.size()])
631 , rev_hash_nomonos(new uint64_t[obj.blocks.size()])
632 , fwd_hash(new uint64_t[obj.blocks.size()])
633 , rev_hash(new uint64_t[obj.blocks.size()])
634 , hash_arr(new uint64_t[obj.num_hashes_per_seed * obj.blocks.size()])
636 std::memcpy(fwd_hash_nomonos.get(),
637 obj.fwd_hash_nomonos.get(),
638 obj.blocks.size() *
sizeof(uint64_t));
639 std::memcpy(rev_hash_nomonos.get(),
640 obj.rev_hash_nomonos.get(),
641 obj.blocks.size() *
sizeof(uint64_t));
643 fwd_hash.get(), obj.fwd_hash.get(), obj.blocks.size() *
sizeof(uint64_t));
645 rev_hash.get(), obj.rev_hash.get(), obj.blocks.size() *
sizeof(uint64_t));
646 std::memcpy(hash_arr.get(),
648 obj.num_hashes_per_seed * obj.blocks.size() *
sizeof(uint64_t));
663 if (pos >= seq_len - k) {
666 if (SEED_TAB[(
unsigned char)seq[pos + k]] == SEED_N) {
676 fwd_hash_nomonos.get(),
677 rev_hash_nomonos.get(),
697 if (SEED_TAB[(
unsigned char)seq[pos - 1]] == SEED_N && pos >= k) {
701 if (SEED_TAB[(
unsigned char)seq[pos - 1]] == SEED_N) {
704 ntmsm64l(seq + pos - 1,
710 fwd_hash_nomonos.get(),
711 rev_hash_nomonos.get(),
726 if (pos >= seq_len - k) {
729 return peek(seq[pos + k]);
753 const std::unique_ptr<uint64_t[]> fwd_hash_nomonos_cpy(
754 new uint64_t[blocks.size()]);
755 const std::unique_ptr<uint64_t[]> rev_hash_nomonos_cpy(
756 new uint64_t[blocks.size()]);
757 const std::unique_ptr<uint64_t[]> fwd_hash_cpy(
new uint64_t[blocks.size()]);
758 const std::unique_ptr<uint64_t[]> rev_hash_cpy(
new uint64_t[blocks.size()]);
759 std::memcpy(fwd_hash_nomonos_cpy.get(),
760 fwd_hash_nomonos.get(),
761 blocks.size() *
sizeof(uint64_t));
762 std::memcpy(rev_hash_nomonos_cpy.get(),
763 rev_hash_nomonos.get(),
764 blocks.size() *
sizeof(uint64_t));
766 fwd_hash_cpy.get(), fwd_hash.get(), blocks.size() *
sizeof(uint64_t));
768 rev_hash_cpy.get(), rev_hash.get(), blocks.size() *
sizeof(uint64_t));
776 fwd_hash_nomonos_cpy.get(),
777 rev_hash_nomonos_cpy.get(),
793 const std::unique_ptr<uint64_t[]> fwd_hash_nomonos_cpy(
794 new uint64_t[blocks.size()]);
795 const std::unique_ptr<uint64_t[]> rev_hash_nomonos_cpy(
796 new uint64_t[blocks.size()]);
797 const std::unique_ptr<uint64_t[]> fwd_hash_cpy(
new uint64_t[blocks.size()]);
798 const std::unique_ptr<uint64_t[]> rev_hash_cpy(
new uint64_t[blocks.size()]);
799 std::memcpy(fwd_hash_nomonos_cpy.get(),
800 fwd_hash_nomonos.get(),
801 blocks.size() *
sizeof(uint64_t));
802 std::memcpy(rev_hash_nomonos_cpy.get(),
803 rev_hash_nomonos.get(),
804 blocks.size() *
sizeof(uint64_t));
806 fwd_hash_cpy.get(), fwd_hash.get(), blocks.size() *
sizeof(uint64_t));
808 rev_hash_cpy.get(), rev_hash.get(), blocks.size() *
sizeof(uint64_t));
809 ntmsm64l(seq + pos - 1,
816 fwd_hash_nomonos_cpy.get(),
817 rev_hash_nomonos_cpy.get(),
828 const uint64_t*
hashes()
const {
return hash_arr.get(); }
841 unsigned get_hash_num()
const {
return num_hashes_per_seed * blocks.size(); }
849 return num_hashes_per_seed;
856 hashing_internals::K_TYPE
get_k()
const {
return k; }
872 const size_t seq_len;
873 hashing_internals::NUM_HASHES_TYPE num_hashes_per_seed;
874 hashing_internals::K_TYPE k;
877 std::vector<hashing_internals::SpacedSeedBlocks> blocks;
878 std::vector<hashing_internals::SpacedSeedMonomers> monomers;
879 std::unique_ptr<uint64_t[]> fwd_hash_nomonos;
880 std::unique_ptr<uint64_t[]> rev_hash_nomonos;
881 std::unique_ptr<uint64_t[]> fwd_hash;
882 std::unique_ptr<uint64_t[]> rev_hash;
883 std::unique_ptr<uint64_t[]> hash_arr;
892 while (pos < seq_len - k + 1 && !ntmsm64(seq + pos,
898 fwd_hash_nomonos.get(),
899 rev_hash_nomonos.get(),
906 if (pos > seq_len - k) {
933 const std::vector<std::string>& seeds,
934 hashing_internals::NUM_HASHES_TYPE num_hashes_per_seed,
935 hashing_internals::K_TYPE k,
937 : seq(seq + pos, seq + pos + k)
938 , num_hashes_per_seed(num_hashes_per_seed)
941 , fwd_hash_nomonos(new uint64_t[seeds.size()])
942 , rev_hash_nomonos(new uint64_t[seeds.size()])
943 , fwd_hash(new uint64_t[seeds.size()])
944 , rev_hash(new uint64_t[seeds.size()])
945 , hash_arr(new uint64_t[num_hashes_per_seed * seeds.size()])
947 check_seeds(seeds, k);
948 get_blocks(seeds, blocks, monomers);
956 fwd_hash_nomonos.get(),
957 rev_hash_nomonos.get(),
965 : seq(seed_nthash.seq)
966 , num_hashes_per_seed(seed_nthash.num_hashes_per_seed)
968 , pos(seed_nthash.pos)
969 , blocks(seed_nthash.blocks)
970 , monomers(seed_nthash.monomers)
971 , fwd_hash_nomonos(new uint64_t[seed_nthash.blocks.size()])
972 , rev_hash_nomonos(new uint64_t[seed_nthash.blocks.size()])
973 , fwd_hash(new uint64_t[seed_nthash.blocks.size()])
974 , rev_hash(new uint64_t[seed_nthash.blocks.size()])
975 , hash_arr(new uint64_t[num_hashes_per_seed * seed_nthash.blocks.size()])
977 std::memcpy(fwd_hash_nomonos.get(),
978 seed_nthash.fwd_hash_nomonos.get(),
979 seed_nthash.blocks.size() *
sizeof(uint64_t));
980 std::memcpy(rev_hash_nomonos.get(),
981 seed_nthash.rev_hash_nomonos.get(),
982 seed_nthash.blocks.size() *
sizeof(uint64_t));
983 std::memcpy(fwd_hash.get(),
984 seed_nthash.fwd_hash.get(),
985 seed_nthash.blocks.size() *
sizeof(uint64_t));
986 std::memcpy(rev_hash.get(),
987 seed_nthash.rev_hash.get(),
988 seed_nthash.blocks.size() *
sizeof(uint64_t));
989 std::memcpy(hash_arr.get(),
990 seed_nthash.hash_arr.get(),
991 num_hashes_per_seed * seed_nthash.blocks.size() *
1004 seq.push_back(char_in);
1010 num_hashes_per_seed,
1011 fwd_hash_nomonos.get(),
1012 rev_hash_nomonos.get(),
1025 seq.push_front(char_in);
1031 num_hashes_per_seed,
1032 fwd_hash_nomonos.get(),
1033 rev_hash_nomonos.get(),
1045 const uint64_t*
hashes()
const {
return hash_arr.get(); }
1058 unsigned get_hash_num()
const {
return num_hashes_per_seed * blocks.size(); }
1066 return num_hashes_per_seed;
1073 hashing_internals::K_TYPE
get_k()
const {
return k; }
1088 std::deque<char> seq;
1089 hashing_internals::NUM_HASHES_TYPE num_hashes_per_seed;
1090 hashing_internals::K_TYPE k;
1092 std::vector<hashing_internals::SpacedSeedBlocks> blocks;
1093 std::vector<hashing_internals::SpacedSeedMonomers> monomers;
1094 std::unique_ptr<uint64_t[]> fwd_hash_nomonos;
1095 std::unique_ptr<uint64_t[]> rev_hash_nomonos;
1096 std::unique_ptr<uint64_t[]> fwd_hash;
1097 std::unique_ptr<uint64_t[]> rev_hash;
1098 std::unique_ptr<uint64_t[]> hash_arr;
Definition nthash_seed.hpp:919
void roll_back(char char_in)
Definition nthash_seed.hpp:1023
BlindSeedNtHash(const char *seq, const std::vector< std::string > &seeds, hashing_internals::NUM_HASHES_TYPE num_hashes_per_seed, hashing_internals::K_TYPE k, long pos=0)
Definition nthash_seed.hpp:932
void roll(char char_in)
Definition nthash_seed.hpp:1002
uint64_t * get_forward_hash() const
Definition nthash_seed.hpp:1079
hashing_internals::K_TYPE get_k() const
Definition nthash_seed.hpp:1073
long get_pos() const
Definition nthash_seed.hpp:1052
hashing_internals::NUM_HASHES_TYPE get_hash_num_per_seed() const
Definition nthash_seed.hpp:1064
unsigned get_hash_num() const
Definition nthash_seed.hpp:1058
uint64_t * get_reverse_hash() const
Definition nthash_seed.hpp:1085
const uint64_t * hashes() const
Definition nthash_seed.hpp:1045
Definition nthash_seed.hpp:517
bool peek_back(char char_in)
Definition nthash_seed.hpp:788
uint64_t * get_forward_hash() const
Definition nthash_seed.hpp:862
bool roll_back()
Definition nthash_seed.hpp:689
uint64_t * get_reverse_hash() const
Definition nthash_seed.hpp:868
bool peek_back()
Definition nthash_seed.hpp:736
size_t get_pos() const
Definition nthash_seed.hpp:835
SeedNtHash(const std::string &seq, const std::vector< std::string > &seeds, hashing_internals::NUM_HASHES_TYPE num_hashes_per_seed, hashing_internals::K_TYPE k, size_t pos=0)
Definition nthash_seed.hpp:563
SeedNtHash(const char *seq, size_t seq_len, const std::vector< std::vector< unsigned > > &seeds, hashing_internals::NUM_HASHES_TYPE num_hashes_per_seed, hashing_internals::K_TYPE k, size_t pos=0)
Definition nthash_seed.hpp:582
SeedNtHash(const std::string &seq, const std::vector< std::vector< unsigned > > &seeds, hashing_internals::NUM_HASHES_TYPE num_hashes_per_seed, hashing_internals::K_TYPE k, size_t pos=0)
Definition nthash_seed.hpp:612
bool peek()
Definition nthash_seed.hpp:724
bool roll()
Definition nthash_seed.hpp:658
unsigned get_hash_num() const
Definition nthash_seed.hpp:841
bool peek(char char_in)
Definition nthash_seed.hpp:748
SeedNtHash(const char *seq, size_t seq_len, const std::vector< std::string > &seeds, hashing_internals::NUM_HASHES_TYPE num_hashes_per_seed, hashing_internals::K_TYPE k, size_t pos=0)
Definition nthash_seed.hpp:530
hashing_internals::K_TYPE get_k() const
Definition nthash_seed.hpp:856
const uint64_t * hashes() const
Definition nthash_seed.hpp:828
hashing_internals::NUM_HASHES_TYPE get_hash_num_per_seed() const
Definition nthash_seed.hpp:847
void check_error(bool condition, const std::string &msg)
void check_warning(bool condition, const std::string &msg)
std::vector< std::vector< unsigned > > parse_seeds(const std::vector< std::string > &seed_strings)
Definition nthash_seed.hpp:496