1#ifndef BTLLIB_COUNTING_BLOOM_FILTER_HPP
2#define BTLLIB_COUNTING_BLOOM_FILTER_HPP
4#include "btllib/bloom_filter.hpp"
5#include "btllib/counting_bloom_filter.hpp"
6#include "btllib/nthash.hpp"
7#include "btllib/status.hpp"
27static const char*
const COUNTING_BLOOM_FILTER_SIGNATURE =
28 "[BTLCountingBloomFilter_v5]";
30static const char*
const KMER_COUNTING_BLOOM_FILTER_SIGNATURE =
31 "[BTLKmerCountingBloomFilter_v5]";
34class KmerCountingBloomFilter;
58 std::string hash_fn =
"");
80 void insert(
const uint64_t* hashes, T n = 1);
88 void insert(
const std::vector<uint64_t>& hashes, T n = 1)
99 void remove(
const uint64_t* hashes);
106 void remove(
const std::vector<uint64_t>& hashes) {
remove(hashes.data()); }
114 void clear(
const uint64_t* hashes);
121 void clear(
const std::vector<uint64_t>& hashes) {
clear(hashes.data()); }
131 T
contains(
const uint64_t* hashes)
const;
140 T
contains(
const std::vector<uint64_t>& hashes)
const
267 double get_fpr(T threshold = 1)
const;
276 void save(
const std::string& path);
285 return btllib::BloomFilter::check_file_signature(
286 path, COUNTING_BLOOM_FILTER_SIGNATURE);
292 void set(
const uint64_t* hashes, T min_val, T new_val);
297 size_t array_size = 0;
298 unsigned hash_num = 0;
300 std::unique_ptr<std::atomic<T>[]> array;
344 void insert(
const char* seq,
size_t seq_len);
351 void insert(
const std::string& seq) {
insert(seq.c_str(), seq.size()); }
360 void insert(
const uint64_t* hashes, T n = 1)
362 counting_bloom_filter.insert(hashes, n);
371 void insert(
const std::vector<uint64_t>& hashes, T n = 1)
373 counting_bloom_filter.insert(hashes, n);
382 void remove(
const char* seq,
size_t seq_len);
389 void remove(
const std::string& seq) {
remove(seq.c_str(), seq.size()); }
397 void remove(
const uint64_t* hashes) { counting_bloom_filter.remove(hashes); }
404 void remove(
const std::vector<uint64_t>& hashes)
406 counting_bloom_filter.remove(hashes);
415 void clear(
const char* seq,
size_t seq_len);
422 void clear(
const std::string& seq) {
clear(seq.c_str(), seq.size()); }
430 void clear(
const uint64_t* hashes) { counting_bloom_filter.clear(hashes); }
437 void clear(
const std::vector<uint64_t>& hashes)
439 counting_bloom_filter.clear(hashes);
450 uint64_t
contains(
const char* seq,
size_t seq_len)
const;
461 return contains(seq.c_str(), seq.size());
474 return counting_bloom_filter.contains(hashes);
484 T
contains(
const std::vector<uint64_t>& hashes)
const
486 return counting_bloom_filter.contains(hashes);
522 return counting_bloom_filter.contains_insert(hashes, n);
535 return counting_bloom_filter.contains_insert(hashes, n);
572 return counting_bloom_filter.insert_contains(hashes, n);
585 return counting_bloom_filter.insert_contains(hashes, n);
627 return counting_bloom_filter.insert_thresh_contains(hashes, threshold);
644 return counting_bloom_filter.insert_thresh_contains(hashes, threshold);
686 return counting_bloom_filter.contains_insert_thresh(hashes, threshold);
701 return counting_bloom_filter.contains_insert_thresh(hashes, threshold);
705 size_t get_bytes()
const {
return counting_bloom_filter.get_bytes(); }
709 return counting_bloom_filter.get_pop_cnt(threshold);
714 return counting_bloom_filter.get_occupancy(threshold);
717 unsigned get_hash_num()
const {
return counting_bloom_filter.get_hash_num(); }
726 return counting_bloom_filter.get_fpr(threshold);
729 unsigned get_k()
const {
return k; }
733 return counting_bloom_filter.get_hash_fn();
738 return counting_bloom_filter;
746 void save(
const std::string& path);
756 return btllib::BloomFilter::check_file_signature(
757 path, KMER_COUNTING_BLOOM_FILTER_SIGNATURE);
769#include "counting_bloom_filter-inl.hpp"
Definition counting_bloom_filter.hpp:43
T insert_thresh_contains(const std::vector< uint64_t > &hashes, const T threshold)
Definition counting_bloom_filter.hpp:218
void remove(const uint64_t *hashes)
Definition counting_bloom_filter-inl.hpp:89
double get_occupancy(T threshold=1) const
Definition counting_bloom_filter-inl.hpp:184
void insert(const uint64_t *hashes, T n=1)
Definition counting_bloom_filter-inl.hpp:82
const std::string & get_hash_fn() const
Definition counting_bloom_filter.hpp:269
static bool is_bloom_file(const std::string &path)
Definition counting_bloom_filter.hpp:283
double get_fpr(T threshold=1) const
Definition counting_bloom_filter-inl.hpp:191
uint64_t get_pop_cnt(T threshold=1) const
Definition counting_bloom_filter-inl.hpp:167
void save(const std::string &path)
Definition counting_bloom_filter-inl.hpp:232
T contains_insert_thresh(const uint64_t *hashes, T threshold)
Definition counting_bloom_filter-inl.hpp:155
T contains(const std::vector< uint64_t > &hashes) const
Definition counting_bloom_filter.hpp:140
void remove(const std::vector< uint64_t > &hashes)
Definition counting_bloom_filter.hpp:106
size_t get_bytes() const
Definition counting_bloom_filter.hpp:253
T contains_insert(const std::vector< uint64_t > &hashes, T n=1)
Definition counting_bloom_filter.hpp:164
unsigned get_hash_num() const
Definition counting_bloom_filter.hpp:260
void insert(const std::vector< uint64_t > &hashes, T n=1)
Definition counting_bloom_filter.hpp:88
T contains(const uint64_t *hashes) const
Definition counting_bloom_filter-inl.hpp:105
CountingBloomFilter()
Definition counting_bloom_filter.hpp:47
void clear(const std::vector< uint64_t > &hashes)
Definition counting_bloom_filter.hpp:121
void clear(const uint64_t *hashes)
Definition counting_bloom_filter-inl.hpp:97
T insert_contains(const std::vector< uint64_t > &hashes, T n=1)
Definition counting_bloom_filter.hpp:189
T insert_contains(const uint64_t *hashes, T n=1)
Definition counting_bloom_filter-inl.hpp:130
T contains_insert(const uint64_t *hashes, T n=1)
Definition counting_bloom_filter-inl.hpp:119
T contains_insert_thresh(const std::vector< uint64_t > &hashes, const T threshold)
Definition counting_bloom_filter.hpp:246
T insert_thresh_contains(const uint64_t *hashes, T threshold)
Definition counting_bloom_filter-inl.hpp:142
Definition counting_bloom_filter.hpp:310
uint64_t contains(const char *seq, size_t seq_len) const
Definition counting_bloom_filter-inl.hpp:299
void clear(const uint64_t *hashes)
Definition counting_bloom_filter.hpp:430
void clear(const std::string &seq)
Definition counting_bloom_filter.hpp:422
T insert_thresh_contains(const std::vector< uint64_t > &hashes, const T threshold)
Definition counting_bloom_filter.hpp:641
void clear(const char *seq, size_t seq_len)
Definition counting_bloom_filter-inl.hpp:289
void save(const std::string &path)
Definition counting_bloom_filter-inl.hpp:388
void insert(const std::vector< uint64_t > &hashes, T n=1)
Definition counting_bloom_filter.hpp:371
CountingBloomFilter< T > & get_counting_bloom_filter()
Definition counting_bloom_filter.hpp:736
T insert_contains(const char *seq, size_t seq_len)
Definition counting_bloom_filter-inl.hpp:323
T insert_thresh_contains(const std::string &seq, const T threshold)
Definition counting_bloom_filter.hpp:609
T contains_insert_thresh(const uint64_t *hashes, const T threshold)
Definition counting_bloom_filter.hpp:684
T insert_thresh_contains(const char *seq, size_t seq_len, T threshold)
Definition counting_bloom_filter-inl.hpp:335
T contains(const uint64_t *hashes) const
Definition counting_bloom_filter.hpp:472
void remove(const char *seq, size_t seq_len)
Definition counting_bloom_filter-inl.hpp:279
T contains_insert(const std::string &seq)
Definition counting_bloom_filter.hpp:506
size_t get_bytes() const
Definition counting_bloom_filter.hpp:705
T contains_insert_thresh(const char *seq, size_t seq_len, T threshold)
Definition counting_bloom_filter-inl.hpp:350
T insert_thresh_contains(const uint64_t *hashes, const T threshold)
Definition counting_bloom_filter.hpp:625
T contains_insert_thresh(const std::vector< uint64_t > &hashes, const T threshold)
Definition counting_bloom_filter.hpp:698
T insert_contains(const std::string &seq)
Definition counting_bloom_filter.hpp:555
void remove(const std::vector< uint64_t > &hashes)
Definition counting_bloom_filter.hpp:404
const std::string & get_hash_fn() const
Definition counting_bloom_filter.hpp:731
T insert_contains(const std::vector< uint64_t > &hashes, T n=1)
Definition counting_bloom_filter.hpp:583
void insert(const char *seq, size_t seq_len)
Definition counting_bloom_filter-inl.hpp:269
double get_occupancy(T threshold=1) const
Definition counting_bloom_filter.hpp:712
T contains_insert(const std::vector< uint64_t > &hashes, T n=1)
Definition counting_bloom_filter.hpp:533
T contains_insert(const char *seq, size_t seq_len)
Definition counting_bloom_filter-inl.hpp:311
unsigned get_hash_num() const
Definition counting_bloom_filter.hpp:717
uint64_t get_pop_cnt(T threshold=1) const
Definition counting_bloom_filter.hpp:707
void insert(const uint64_t *hashes, T n=1)
Definition counting_bloom_filter.hpp:360
double get_fpr(T threshold=1) const
Definition counting_bloom_filter.hpp:724
void remove(const std::string &seq)
Definition counting_bloom_filter.hpp:389
unsigned get_k() const
Definition counting_bloom_filter.hpp:729
void insert(const std::string &seq)
Definition counting_bloom_filter.hpp:351
T insert_contains(const uint64_t *hashes, T n=1)
Definition counting_bloom_filter.hpp:570
uint64_t contains(const std::string &seq) const
Definition counting_bloom_filter.hpp:459
void remove(const uint64_t *hashes)
Definition counting_bloom_filter.hpp:397
void clear(const std::vector< uint64_t > &hashes)
Definition counting_bloom_filter.hpp:437
T contains_insert_thresh(const std::string &seq, const T threshold)
Definition counting_bloom_filter.hpp:668
static bool is_bloom_file(const std::string &path)
Definition counting_bloom_filter.hpp:754
KmerCountingBloomFilter()
Definition counting_bloom_filter.hpp:314
T contains_insert(const uint64_t *hashes, T n=1)
Definition counting_bloom_filter.hpp:520
T contains(const std::vector< uint64_t > &hashes) const
Definition counting_bloom_filter.hpp:484