btllib
Loading...
Searching...
No Matches
aahash.hpp
1#ifndef BTLLIB_AAHASH_HPP
2#define BTLLIB_AAHASH_HPP
3
4#include <cstdint>
5#include <cstring>
6#include <limits>
7#include <memory>
8#include <string>
9#include <string_view>
10#include <vector>
11
12namespace btllib {
13
14using SpacedSeed = std::vector<unsigned>;
15
16inline std::vector<SpacedSeed>
17aa_parse_seeds(const std::vector<std::string>& seeds)
18{
19 std::vector<SpacedSeed> seed_vec;
20 for (const auto& seed : seeds) {
21 SpacedSeed seed_vec_tmp;
22 for (const auto& c : seed) {
23 seed_vec_tmp.push_back((unsigned)(c - '0'));
24 }
25 seed_vec.push_back(seed_vec_tmp);
26 }
27 return seed_vec;
28}
29
30class AAHash;
31
32class SeedAAHash;
33
34class AAHash
35{
36 static constexpr const char* HASH_FN_NAME = "aahash1";
37
38private:
39 friend class SeedAAHash;
40
42 bool init();
43
44 const char* seq;
45 size_t seq_len;
46 const uint8_t hash_num;
47 const uint16_t k;
48 unsigned level;
49
50 size_t pos;
51 bool initialized = false;
52 std::unique_ptr<uint64_t[]> hashes_array;
53
54public:
63 AAHash(std::string_view seq,
64 uint8_t hash_num,
65 uint16_t k,
66 unsigned level,
67 size_t pos = 0)
68 : seq(seq.data())
69 , seq_len(seq.size())
70 , hash_num(hash_num)
71 , k(k)
72 , level(level)
73 , pos(pos)
74 , hashes_array(new uint64_t[hash_num])
75 {
76 }
77
78 AAHash(const AAHash& aahash)
79 : seq(aahash.seq)
80 , seq_len(aahash.seq_len)
81 , hash_num(aahash.hash_num)
82 , k(aahash.k)
83 , level(aahash.level)
84 , pos(aahash.pos)
85 , initialized(aahash.initialized)
86 , hashes_array(new uint64_t[hash_num])
87 {
88 std::memcpy(hashes_array.get(),
89 aahash.hashes_array.get(),
90 hash_num * sizeof(uint64_t));
91 }
92
93 AAHash(AAHash&&) = default;
94
108 bool roll();
109
110 const uint64_t* hashes() const { return hashes_array.get(); }
111 size_t get_pos() const { return pos; }
112 unsigned get_hash_num() const { return hash_num; }
113 unsigned get_k() const { return k; }
114 uint64_t get_forward_hash() const { return hashes_array[0]; }
115 unsigned get_level() const { return level; }
116 const char* get_seq() const { return seq; }
117};
118
120{
121private:
122 AAHash aahash;
123 const unsigned hash_num_per_seed;
124 std::unique_ptr<uint64_t[]> hashes_array;
125 std::vector<SpacedSeed> seeds;
126 bool verify_seed();
128 void init();
129
130public:
142 SeedAAHash(const char* seq,
143 const std::vector<SpacedSeed>& seeds,
144 unsigned hash_num_per_seed,
145 unsigned k,
146 size_t pos = 0)
147 : aahash(seq, 1, k, 1, pos)
148 , hash_num_per_seed(hash_num_per_seed)
149 , hashes_array(new uint64_t[hash_num_per_seed * seeds.size()])
150 , seeds(seeds)
151 {
152 init();
153 }
154 SeedAAHash(const std::string& seq,
155 const std::vector<SpacedSeed>& seeds,
156 unsigned hash_num_per_seed,
157 unsigned k,
158 size_t pos = 0)
159 : aahash(seq, 1, k, 1, pos)
160 , hash_num_per_seed(hash_num_per_seed)
161 , hashes_array(new uint64_t[hash_num_per_seed * seeds.size()])
162 , seeds(seeds)
163 {
164 init();
165 }
166 SeedAAHash(const char* seq,
167 const std::vector<std::string>& seeds,
168 unsigned hash_num_per_seed,
169 unsigned k,
170 size_t pos = 0)
171 : aahash(seq, 1, k, 1, pos)
172 , hash_num_per_seed(hash_num_per_seed)
173 , hashes_array(new uint64_t[hash_num_per_seed * seeds.size()])
174 , seeds(aa_parse_seeds(seeds))
175 {
176 init();
177 }
178 SeedAAHash(const std::string& seq,
179 const std::vector<std::string>& seeds,
180 unsigned hash_num_per_seed,
181 unsigned k,
182 size_t pos = 0)
183 : aahash(seq, 1, k, 1, pos)
184 , hash_num_per_seed(hash_num_per_seed)
185 , hashes_array(new uint64_t[hash_num_per_seed * seeds.size()])
186 , seeds(aa_parse_seeds(seeds))
187 {
188 init();
189 }
190
191 SeedAAHash(const SeedAAHash& seed_aahash)
192 : aahash(seed_aahash.aahash)
193 , hash_num_per_seed(seed_aahash.hash_num_per_seed)
194 , hashes_array(new uint64_t[hash_num_per_seed * seed_aahash.seeds.size()])
195 , seeds(seed_aahash.seeds)
196 {
197 std::memcpy(hashes_array.get(),
198 seed_aahash.hashes_array.get(),
199 hash_num_per_seed * seeds.size() * sizeof(uint64_t));
200 }
201 SeedAAHash(SeedAAHash&&) = default;
202
216 bool roll();
217
218 const uint64_t* hashes() const { return hashes_array.get(); }
219
220 size_t get_pos() const { return aahash.get_pos(); }
221 unsigned get_hash_num() const { return aahash.get_hash_num(); }
222 unsigned get_hash_num_per_seed() const { return hash_num_per_seed; }
223 unsigned get_k() const { return aahash.get_k(); }
224};
225
226} // namespace btllib
227
228#endif
Definition aahash.hpp:35
AAHash(std::string_view seq, uint8_t hash_num, uint16_t k, unsigned level, size_t pos=0)
Definition aahash.hpp:63
Definition aahash.hpp:120
SeedAAHash(const char *seq, const std::vector< SpacedSeed > &seeds, unsigned hash_num_per_seed, unsigned k, size_t pos=0)
Definition aahash.hpp:142
Definition aahash.hpp:12