1 #ifndef BTLLIB_COUNTING_BLOOM_FILTER_HPP
2 #define BTLLIB_COUNTING_BLOOM_FILTER_HPP
4 #include "btllib/bloom_filter.hpp"
5 #include "btllib/counting_bloom_filter.hpp"
6 #include "btllib/nthash.hpp"
7 #include "btllib/status.hpp"
27 static const char*
const COUNTING_BLOOM_FILTER_SIGNATURE =
28 "[BTLCountingBloomFilter_v5]";
30 static const char*
const KMER_COUNTING_BLOOM_FILTER_SIGNATURE =
31 "[BTLKmerCountingBloomFilter_v5]";
58 std::string hash_fn =
"");
80 void insert(
const uint64_t* hashes, T n = 1);
88 void insert(
const std::vector<uint64_t>& hashes, T n = 1)
99 void remove(
const uint64_t* hashes);
106 void remove(
const std::vector<uint64_t>& hashes) {
remove(hashes.data()); }
114 void clear(
const uint64_t* hashes);
121 void clear(
const std::vector<uint64_t>& hashes) {
clear(hashes.data()); }
131 T
contains(
const uint64_t* hashes)
const;
140 T
contains(
const std::vector<uint64_t>& hashes)
const
267 double get_fpr(T threshold = 1)
const;
276 void save(
const std::string& path);
285 return btllib::BloomFilter::check_file_signature(
286 path, COUNTING_BLOOM_FILTER_SIGNATURE);
292 void set(
const uint64_t* hashes, T min_val, T new_val);
297 size_t array_size = 0;
298 unsigned hash_num = 0;
300 std::unique_ptr<std::atomic<T>[]> array;
309 class KmerCountingBloomFilter
344 void insert(
const char* seq,
size_t seq_len);
351 void insert(
const std::string& seq) {
insert(seq.c_str(), seq.size()); }
360 void insert(
const uint64_t* hashes, T n = 1)
362 counting_bloom_filter.insert(hashes, n);
371 void insert(
const std::vector<uint64_t>& hashes, T n = 1)
373 counting_bloom_filter.insert(hashes, n);
382 void remove(
const char* seq,
size_t seq_len);
389 void remove(
const std::string& seq) {
remove(seq.c_str(), seq.size()); }
397 void remove(
const uint64_t* hashes) { counting_bloom_filter.remove(hashes); }
404 void remove(
const std::vector<uint64_t>& hashes)
406 counting_bloom_filter.remove(hashes);
415 void clear(
const char* seq,
size_t seq_len);
422 void clear(
const std::string& seq) {
clear(seq.c_str(), seq.size()); }
430 void clear(
const uint64_t* hashes) { counting_bloom_filter.clear(hashes); }
437 void clear(
const std::vector<uint64_t>& hashes)
439 counting_bloom_filter.clear(hashes);
450 uint64_t
contains(
const char* seq,
size_t seq_len)
const;
461 return contains(seq.c_str(), seq.size());
474 return counting_bloom_filter.contains(hashes);
484 T
contains(
const std::vector<uint64_t>& hashes)
const
486 return counting_bloom_filter.contains(hashes);
522 return counting_bloom_filter.contains_insert(hashes, n);
535 return counting_bloom_filter.contains_insert(hashes, n);
572 return counting_bloom_filter.insert_contains(hashes, n);
585 return counting_bloom_filter.insert_contains(hashes, n);
627 return counting_bloom_filter.insert_thresh_contains(hashes, threshold);
644 return counting_bloom_filter.insert_thresh_contains(hashes, threshold);
686 return counting_bloom_filter.contains_insert_thresh(hashes, threshold);
701 return counting_bloom_filter.contains_insert_thresh(hashes, threshold);
705 size_t get_bytes()
const {
return counting_bloom_filter.get_bytes(); }
709 return counting_bloom_filter.get_pop_cnt(threshold);
714 return counting_bloom_filter.get_occupancy(threshold);
717 unsigned get_hash_num()
const {
return counting_bloom_filter.get_hash_num(); }
726 return counting_bloom_filter.get_fpr(threshold);
729 unsigned get_k()
const {
return k; }
733 return counting_bloom_filter.get_hash_fn();
738 return counting_bloom_filter;
746 void save(
const std::string& path);
756 return btllib::BloomFilter::check_file_signature(
757 path, KMER_COUNTING_BLOOM_FILTER_SIGNATURE);
769 #include "counting_bloom_filter-inl.hpp"
double get_occupancy(T threshold=1) const
Definition: counting_bloom_filter.hpp:712
double get_fpr(T threshold=1) const
Definition: counting_bloom_filter-inl.hpp:191
T contains_insert(const uint64_t *hashes, T n=1)
Definition: counting_bloom_filter-inl.hpp:119
static bool is_bloom_file(const std::string &path)
Definition: counting_bloom_filter.hpp:283
double get_fpr(T threshold=1) const
Definition: counting_bloom_filter.hpp:724
void save(const std::string &path)
Definition: counting_bloom_filter-inl.hpp:232
T contains_insert_thresh(const uint64_t *hashes, T threshold)
Definition: counting_bloom_filter-inl.hpp:155
void insert(const uint64_t *hashes, T n=1)
Definition: counting_bloom_filter-inl.hpp:82
size_t get_bytes() const
Definition: counting_bloom_filter.hpp:705
T insert_thresh_contains(const uint64_t *hashes, T threshold)
Definition: counting_bloom_filter-inl.hpp:142
T contains_insert(const std::vector< uint64_t > &hashes, T n=1)
Definition: counting_bloom_filter.hpp:533
void clear(const uint64_t *hashes)
Definition: counting_bloom_filter-inl.hpp:97
Definition: counting_bloom_filter.hpp:42
void clear(const char *seq, size_t seq_len)
Definition: counting_bloom_filter-inl.hpp:289
void insert(const std::string &seq)
Definition: counting_bloom_filter.hpp:351
const std::string & get_hash_fn() const
Definition: counting_bloom_filter.hpp:731
T insert_thresh_contains(const std::vector< uint64_t > &hashes, const T threshold)
Definition: counting_bloom_filter.hpp:641
T contains(const std::vector< uint64_t > &hashes) const
Definition: counting_bloom_filter.hpp:140
T contains_insert_thresh(const char *seq, size_t seq_len, T threshold)
Definition: counting_bloom_filter-inl.hpp:350
uint64_t get_pop_cnt(T threshold=1) const
Definition: counting_bloom_filter.hpp:707
uint64_t contains(const char *seq, size_t seq_len) const
Definition: counting_bloom_filter-inl.hpp:299
T insert_contains(const uint64_t *hashes, T n=1)
Definition: counting_bloom_filter.hpp:570
CountingBloomFilter()
Definition: counting_bloom_filter.hpp:47
T contains_insert_thresh(const std::string &seq, const T threshold)
Definition: counting_bloom_filter.hpp:668
T insert_contains(const std::string &seq)
Definition: counting_bloom_filter.hpp:555
T contains_insert_thresh(const std::vector< uint64_t > &hashes, const T threshold)
Definition: counting_bloom_filter.hpp:698
T contains(const std::vector< uint64_t > &hashes) const
Definition: counting_bloom_filter.hpp:484
void clear(const uint64_t *hashes)
Definition: counting_bloom_filter.hpp:430
T insert_thresh_contains(const uint64_t *hashes, const T threshold)
Definition: counting_bloom_filter.hpp:625
T contains(const uint64_t *hashes) const
Definition: counting_bloom_filter-inl.hpp:105
unsigned get_hash_num() const
Definition: counting_bloom_filter.hpp:260
void insert(const uint64_t *hashes, T n=1)
Definition: counting_bloom_filter.hpp:360
void clear(const std::vector< uint64_t > &hashes)
Definition: counting_bloom_filter.hpp:121
void clear(const std::string &seq)
Definition: counting_bloom_filter.hpp:422
KmerCountingBloomFilter()
Definition: counting_bloom_filter.hpp:314
T insert_contains(const char *seq, size_t seq_len)
Definition: counting_bloom_filter-inl.hpp:323
Definition: counting_bloom_filter.hpp:34
T contains_insert_thresh(const uint64_t *hashes, const T threshold)
Definition: counting_bloom_filter.hpp:684
const std::string & get_hash_fn() const
Definition: counting_bloom_filter.hpp:269
CountingBloomFilter< T > & get_counting_bloom_filter()
Definition: counting_bloom_filter.hpp:736
uint64_t get_pop_cnt(T threshold=1) const
Definition: counting_bloom_filter-inl.hpp:167
T insert_contains(const std::vector< uint64_t > &hashes, T n=1)
Definition: counting_bloom_filter.hpp:583
T contains_insert(const char *seq, size_t seq_len)
Definition: counting_bloom_filter-inl.hpp:311
double get_occupancy(T threshold=1) const
Definition: counting_bloom_filter-inl.hpp:184
uint64_t contains(const std::string &seq) const
Definition: counting_bloom_filter.hpp:459
T contains_insert(const uint64_t *hashes, T n=1)
Definition: counting_bloom_filter.hpp:520
T contains_insert(const std::vector< uint64_t > &hashes, T n=1)
Definition: counting_bloom_filter.hpp:164
T insert_thresh_contains(const std::string &seq, const T threshold)
Definition: counting_bloom_filter.hpp:609
T insert_thresh_contains(const char *seq, size_t seq_len, T threshold)
Definition: counting_bloom_filter-inl.hpp:335
T insert_contains(const uint64_t *hashes, T n=1)
Definition: counting_bloom_filter-inl.hpp:130
T contains_insert_thresh(const std::vector< uint64_t > &hashes, const T threshold)
Definition: counting_bloom_filter.hpp:246
void insert(const char *seq, size_t seq_len)
Definition: counting_bloom_filter-inl.hpp:269
void insert(const std::vector< uint64_t > &hashes, T n=1)
Definition: counting_bloom_filter.hpp:88
unsigned get_k() const
Definition: counting_bloom_filter.hpp:729
T insert_contains(const std::vector< uint64_t > &hashes, T n=1)
Definition: counting_bloom_filter.hpp:189
void clear(const std::vector< uint64_t > &hashes)
Definition: counting_bloom_filter.hpp:437
unsigned get_hash_num() const
Definition: counting_bloom_filter.hpp:717
size_t get_bytes() const
Definition: counting_bloom_filter.hpp:253
static bool is_bloom_file(const std::string &path)
Definition: counting_bloom_filter.hpp:754
T insert_thresh_contains(const std::vector< uint64_t > &hashes, const T threshold)
Definition: counting_bloom_filter.hpp:218
void insert(const std::vector< uint64_t > &hashes, T n=1)
Definition: counting_bloom_filter.hpp:371
T contains(const uint64_t *hashes) const
Definition: counting_bloom_filter.hpp:472
T contains_insert(const std::string &seq)
Definition: counting_bloom_filter.hpp:506
void save(const std::string &path)
Definition: counting_bloom_filter-inl.hpp:388