btllib
Loading...
Searching...
No Matches
btllib::KmerCountingBloomFilter< T > Class Template Reference

#include <counting_bloom_filter.hpp>

Public Member Functions

 KmerCountingBloomFilter ()
 
 KmerCountingBloomFilter (size_t bytes, unsigned hash_num, unsigned k)
 
 KmerCountingBloomFilter (const std::string &path)
 
 KmerCountingBloomFilter (const KmerCountingBloomFilter &)=delete
 
 KmerCountingBloomFilter (KmerCountingBloomFilter &&)=delete
 
KmerCountingBloomFilteroperator= (const KmerCountingBloomFilter &)=delete
 
KmerCountingBloomFilteroperator= (KmerCountingBloomFilter &&)=delete
 
void insert (const char *seq, size_t seq_len)
 
void insert (const std::string &seq)
 
void insert (const uint64_t *hashes, T n=1)
 
void insert (const std::vector< uint64_t > &hashes, T n=1)
 
void remove (const char *seq, size_t seq_len)
 
void remove (const std::string &seq)
 
void remove (const uint64_t *hashes)
 
void remove (const std::vector< uint64_t > &hashes)
 
void clear (const char *seq, size_t seq_len)
 
void clear (const std::string &seq)
 
void clear (const uint64_t *hashes)
 
void clear (const std::vector< uint64_t > &hashes)
 
uint64_t contains (const char *seq, size_t seq_len) const
 
uint64_t contains (const std::string &seq) const
 
contains (const uint64_t *hashes) const
 
contains (const std::vector< uint64_t > &hashes) const
 
contains_insert (const char *seq, size_t seq_len)
 
contains_insert (const std::string &seq)
 
contains_insert (const uint64_t *hashes, T n=1)
 
contains_insert (const std::vector< uint64_t > &hashes, T n=1)
 
insert_contains (const char *seq, size_t seq_len)
 
insert_contains (const std::string &seq)
 
insert_contains (const uint64_t *hashes, T n=1)
 
insert_contains (const std::vector< uint64_t > &hashes, T n=1)
 
insert_thresh_contains (const char *seq, size_t seq_len, T threshold)
 
insert_thresh_contains (const std::string &seq, const T threshold)
 
insert_thresh_contains (const uint64_t *hashes, const T threshold)
 
insert_thresh_contains (const std::vector< uint64_t > &hashes, const T threshold)
 
contains_insert_thresh (const char *seq, size_t seq_len, T threshold)
 
contains_insert_thresh (const std::string &seq, const T threshold)
 
contains_insert_thresh (const uint64_t *hashes, const T threshold)
 
contains_insert_thresh (const std::vector< uint64_t > &hashes, const T threshold)
 
size_t get_bytes () const
 
uint64_t get_pop_cnt (T threshold=1) const
 
double get_occupancy (T threshold=1) const
 
unsigned get_hash_num () const
 
double get_fpr (T threshold=1) const
 
unsigned get_k () const
 
const std::string & get_hash_fn () const
 
CountingBloomFilter< T > & get_counting_bloom_filter ()
 
void save (const std::string &path)
 

Static Public Member Functions

static bool is_bloom_file (const std::string &path)
 

Detailed Description

template<typename T>
class btllib::KmerCountingBloomFilter< T >

Counting Bloom filter data structure that stores k-mers. Provides KmerCountingBloomFilter8, KmerCountingBloomFilter16, and KmerCountingBloomFilter32 classes with corresponding bit-size counters.

Constructor & Destructor Documentation

◆ KmerCountingBloomFilter() [1/3]

template<typename T >
btllib::KmerCountingBloomFilter< T >::KmerCountingBloomFilter ( )
inline

Construct a dummy k-mer Bloom filter (e.g. as a default argument).

◆ KmerCountingBloomFilter() [2/3]

template<typename T >
btllib::KmerCountingBloomFilter< T >::KmerCountingBloomFilter ( size_t bytes,
unsigned hash_num,
unsigned k )
inline

Construct an empty k-mer Counting Bloom filter of given size.

Parameters
bytesFilter size in bytes.
hash_numNumber of hash values per element.
kK-mer size.

◆ KmerCountingBloomFilter() [3/3]

template<typename T >
btllib::KmerCountingBloomFilter< T >::KmerCountingBloomFilter ( const std::string & path)
inlineexplicit

Load a k-mer Counting Bloom filter from a file.

Parameters
pathFilepath to load from.

Member Function Documentation

◆ clear() [1/4]

template<typename T >
void btllib::KmerCountingBloomFilter< T >::clear ( const char * seq,
size_t seq_len )
inline

Set the counts of a sequence's k-mers to zero in the filter.

Parameters
seqSequence to k-merize.
seq_lenLength of seq.

◆ clear() [2/4]

template<typename T >
void btllib::KmerCountingBloomFilter< T >::clear ( const std::string & seq)
inline

Set the counts of a sequence's k-mers to zero in the filter.

Parameters
seqSequence to k-merize.

◆ clear() [3/4]

template<typename T >
void btllib::KmerCountingBloomFilter< T >::clear ( const std::vector< uint64_t > & hashes)
inline

Set the counts of a sequence's k-mers to zero in the filter.

Parameters
hashesInteger vector of the k-mer's hash values.

◆ clear() [4/4]

template<typename T >
void btllib::KmerCountingBloomFilter< T >::clear ( const uint64_t * hashes)
inline

Set the counts of a sequence's k-mers to zero in the filter.

Parameters
hashesInteger array of the k-mer's hash values. Array size should equal the hash_num argument used when the Bloom filter was constructed.

◆ contains() [1/4]

template<typename T >
uint64_t btllib::KmerCountingBloomFilter< T >::contains ( const char * seq,
size_t seq_len ) const
inline

Query the counts of k-mers of a sequence.

Parameters
seqSequence to k-merize.
seq_lenLength of seq.
Returns
The sum of counts of seq's k-mers found in the filter.

◆ contains() [2/4]

template<typename T >
uint64_t btllib::KmerCountingBloomFilter< T >::contains ( const std::string & seq) const
inline

Query the counts of k-mers of a sequence.

Parameters
seqSequence to k-merize.
Returns
The sum of counts of seq's k-mers found in the filter.

◆ contains() [3/4]

template<typename T >
T btllib::KmerCountingBloomFilter< T >::contains ( const std::vector< uint64_t > & hashes) const
inline

Get a k-mer's count.

Parameters
hashesInteger vector of k-mer's hash values.
Returns
The count of the queried k-mer.

◆ contains() [4/4]

template<typename T >
T btllib::KmerCountingBloomFilter< T >::contains ( const uint64_t * hashes) const
inline

Get a k-mer's count.

Parameters
hashesInteger array of k-mer's hash values. Array size should equal the hash_num argument used when the Bloom filter was constructed.
Returns
The count of the queried k-mer.

◆ contains_insert() [1/4]

template<typename T >
T btllib::KmerCountingBloomFilter< T >::contains_insert ( const char * seq,
size_t seq_len )
inline

Get the counts of a sequence's k-mers and then increment the counts.

Parameters
seqSequence to k-merize.
seq_lenLength of seq.
Returns
The sum of counts of the queried k-mers before insertion.

◆ contains_insert() [2/4]

template<typename T >
T btllib::KmerCountingBloomFilter< T >::contains_insert ( const std::string & seq)
inline

Get the counts of a sequence's k-mers and then increment the counts.

Parameters
seqSequence to k-merize.
Returns
The sum of counts of the queried k-mers before insertion.

◆ contains_insert() [3/4]

template<typename T >
T btllib::KmerCountingBloomFilter< T >::contains_insert ( const std::vector< uint64_t > & hashes,
T n = 1 )
inline

Get the count of a k-mer and then increment the count.

Parameters
hashesInteger vector of the k-mer's hash values.
nIncrement value
Returns
The count of the queried k-mer before insertion.

◆ contains_insert() [4/4]

template<typename T >
T btllib::KmerCountingBloomFilter< T >::contains_insert ( const uint64_t * hashes,
T n = 1 )
inline

Get the count of a k-mer and then increment the count.

Parameters
hashesInteger array of the k-mers's hash values. Array size should equal the hash_num argument used when the Bloom filter was constructed.
nIncrement value
Returns
The count of the queried k-mer before insertion.

◆ contains_insert_thresh() [1/4]

template<typename T >
T btllib::KmerCountingBloomFilter< T >::contains_insert_thresh ( const char * seq,
size_t seq_len,
T threshold )
inline

Get the counts of a sequence's k-mer's and then increment the counts if they are not above the threshold.

Parameters
seqSequence to k-merize.
seq_lenLength of seq.
thresholdThe threshold.
Returns
The sum of counts of the queried k-mers before insertion.

◆ contains_insert_thresh() [2/4]

template<typename T >
T btllib::KmerCountingBloomFilter< T >::contains_insert_thresh ( const std::string & seq,
const T threshold )
inline

Get the counts of a sequence's k-mer's and then increment the counts if they are not above the threshold.

Parameters
seqSequence to k-merize.
thresholdThe threshold.
Returns
The sum of counts of the queried k-mers before insertion.

◆ contains_insert_thresh() [3/4]

template<typename T >
T btllib::KmerCountingBloomFilter< T >::contains_insert_thresh ( const std::vector< uint64_t > & hashes,
const T threshold )
inline

Get the count of a k-mer and then increment the count if it's not above the threshold.

Parameters
hashesInteger vector of the k-mer's hash values.
thresholdThe threshold.
Returns
The count of the queried k-mer before insertion.

◆ contains_insert_thresh() [4/4]

template<typename T >
T btllib::KmerCountingBloomFilter< T >::contains_insert_thresh ( const uint64_t * hashes,
const T threshold )
inline

Get the count of a k-mer and then increment the count if it's not above the threshold.

Parameters
hashesInteger array of the k-mer's hash values. Array size should equal the hash_num argument used when the Bloom filter was constructed.
thresholdThe threshold.
Returns
The count of the queried k-mer before insertion.

◆ get_bytes()

template<typename T >
size_t btllib::KmerCountingBloomFilter< T >::get_bytes ( ) const
inline

Get filter size in bytes.

◆ get_counting_bloom_filter()

template<typename T >
CountingBloomFilter< T > & btllib::KmerCountingBloomFilter< T >::get_counting_bloom_filter ( )
inline

Get a reference to the underlying vanilla Counting Bloom filter.

◆ get_fpr()

template<typename T >
double btllib::KmerCountingBloomFilter< T >::get_fpr ( T threshold = 1) const
inline

Get the query false positive rate for elements with count >= threshold.

Parameters
thresholdThe threshold.
Returns
The false positive rate.

◆ get_hash_fn()

template<typename T >
const std::string & btllib::KmerCountingBloomFilter< T >::get_hash_fn ( ) const
inline

Get the name of the hash function used.

◆ get_hash_num()

template<typename T >
unsigned btllib::KmerCountingBloomFilter< T >::get_hash_num ( ) const
inline

Get the number of hash values per element.

◆ get_k()

template<typename T >
unsigned btllib::KmerCountingBloomFilter< T >::get_k ( ) const
inline

Get the k-mer size used.

◆ get_occupancy()

template<typename T >
double btllib::KmerCountingBloomFilter< T >::get_occupancy ( T threshold = 1) const
inline

Get the fraction of the filter occupied by >0 counters.

◆ get_pop_cnt()

template<typename T >
uint64_t btllib::KmerCountingBloomFilter< T >::get_pop_cnt ( T threshold = 1) const
inline

Get population count, i.e. the number of counters >0 in the filter.

◆ insert() [1/4]

template<typename T >
void btllib::KmerCountingBloomFilter< T >::insert ( const char * seq,
size_t seq_len )
inline

Insert a sequence's k-mers into the filter.

Parameters
seqSequence to k-merize.
seq_lenLength of seq.

◆ insert() [2/4]

template<typename T >
void btllib::KmerCountingBloomFilter< T >::insert ( const std::string & seq)
inline

Insert a sequence's k-mers into the filter.

Parameters
seqSequence to k-merize.

◆ insert() [3/4]

template<typename T >
void btllib::KmerCountingBloomFilter< T >::insert ( const std::vector< uint64_t > & hashes,
T n = 1 )
inline

Insert a k-mer into the filter.

Parameters
hashesInteger vector of the k-mer's hash values.
nIncrement value

◆ insert() [4/4]

template<typename T >
void btllib::KmerCountingBloomFilter< T >::insert ( const uint64_t * hashes,
T n = 1 )
inline

Insert a k-mer into the filter.

Parameters
hashesInteger array of the k-mer's hash values. Array size should equal the hash_num argument used when the Bloom filter was constructed.
nIncrement value

◆ insert_contains() [1/4]

template<typename T >
T btllib::KmerCountingBloomFilter< T >::insert_contains ( const char * seq,
size_t seq_len )
inline

Increment the counts of a sequence's k-mers and then return the counts.

Parameters
seqSequence to k-merize.
seq_lenLength of seq.
Returns
The sum of counts of the queried k-mers after insertion.

◆ insert_contains() [2/4]

template<typename T >
T btllib::KmerCountingBloomFilter< T >::insert_contains ( const std::string & seq)
inline

Increment the counts of a sequence's k-mers and then return the counts.

Parameters
seqSequence to k-merize.
Returns
The sum of counts of the queried k-mers after insertion.

◆ insert_contains() [3/4]

template<typename T >
T btllib::KmerCountingBloomFilter< T >::insert_contains ( const std::vector< uint64_t > & hashes,
T n = 1 )
inline

Increment a k-mer's count and then return the count.

Parameters
hashesInteger vector of the k-mer's hash values.
nIncrement value
Returns
The count of the queried k-mer after insertion.

◆ insert_contains() [4/4]

template<typename T >
T btllib::KmerCountingBloomFilter< T >::insert_contains ( const uint64_t * hashes,
T n = 1 )
inline

Increment a k-mer's count and then return the count.

Parameters
hashesInteger array of the k-mer's hash values. Array size should equal the hash_num argument used when the Bloom filter was constructed.
nIncrement value
Returns
The count of the queried k-mer after insertion.

◆ insert_thresh_contains() [1/4]

template<typename T >
T btllib::KmerCountingBloomFilter< T >::insert_thresh_contains ( const char * seq,
size_t seq_len,
T threshold )
inline

Increment the counts of a sequence's k-mers if they are not above the threshold and then return the counts.

Parameters
seqSequence to k-merize.
seq_lenLength of seq.
thresholdThe threshold.
Returns
The sum of counts of the queried k-mers after insertion.

◆ insert_thresh_contains() [2/4]

template<typename T >
T btllib::KmerCountingBloomFilter< T >::insert_thresh_contains ( const std::string & seq,
const T threshold )
inline

Increment the counts of a sequence's k-mers if they are not above the threshold and then return the counts.

Parameters
seqSequence to k-merize.
thresholdThe threshold.
Returns
The sum of counts of the queried k-mers after insertion.

◆ insert_thresh_contains() [3/4]

template<typename T >
T btllib::KmerCountingBloomFilter< T >::insert_thresh_contains ( const std::vector< uint64_t > & hashes,
const T threshold )
inline

Increment a k-mer's count if it's not above the threshold and then return the count.

Parameters
hashesInteger array of the k-mer's hash values. Array size should equal the hash_num argument used when the Bloom filter was constructed.
thresholdThe threshold.
Returns
The count of the queried k-mer after insertion.

◆ insert_thresh_contains() [4/4]

template<typename T >
T btllib::KmerCountingBloomFilter< T >::insert_thresh_contains ( const uint64_t * hashes,
const T threshold )
inline

Increment a k-mer's count if it's not above the threshold and then return the count.

Parameters
hashesInteger array of the k-mer's hash values. Array size should equal the hash_num argument used when the Bloom filter was constructed.
thresholdThe threshold.
Returns
The count of the queried k-mer after insertion.

◆ is_bloom_file()

template<typename T >
static bool btllib::KmerCountingBloomFilter< T >::is_bloom_file ( const std::string & path)
inlinestatic

Check whether the file at the given path is a saved k-mer counting Bloom filter.

Parameters
pathFilepath to check.

◆ remove() [1/4]

template<typename T >
void btllib::KmerCountingBloomFilter< T >::remove ( const char * seq,
size_t seq_len )
inline

Decrease the counts of a sequence's k-mers from the filter.

Parameters
seqSequence to k-merize.
seq_lenLength of seq.

◆ remove() [2/4]

template<typename T >
void btllib::KmerCountingBloomFilter< T >::remove ( const std::string & seq)
inline

Decrease the counts of a sequence's k-mers from the filter.

Parameters
seqSequence to k-merize.

◆ remove() [3/4]

template<typename T >
void btllib::KmerCountingBloomFilter< T >::remove ( const std::vector< uint64_t > & hashes)
inline

Decrease the counts of a sequence's k-mers from the filter.

Parameters
hashesInteger vector of the k-mer's hash values.

◆ remove() [4/4]

template<typename T >
void btllib::KmerCountingBloomFilter< T >::remove ( const uint64_t * hashes)
inline

Decrease the counts of a sequence's k-mers from the filter.

Parameters
hashesInteger array of the k-mer's hash values. Array size should equal the hash_num argument used when the Bloom filter was constructed.

◆ save()

template<typename T >
void btllib::KmerCountingBloomFilter< T >::save ( const std::string & path)
inline

Save the Bloom filter to a file that can be loaded in the future.

Parameters
pathFilepath to store filter at.

The documentation for this class was generated from the following files: