btllib
 All Classes Namespaces Functions Variables
Public Member Functions | Static Public Member Functions | List of all members
btllib::KmerCountingBloomFilter< T > Class Template Reference

#include <counting_bloom_filter.hpp>

Public Member Functions

 KmerCountingBloomFilter ()
 
 KmerCountingBloomFilter (size_t bytes, unsigned hash_num, unsigned k)
 
 KmerCountingBloomFilter (const std::string &path)
 
 KmerCountingBloomFilter (const KmerCountingBloomFilter &)=delete
 
 KmerCountingBloomFilter (KmerCountingBloomFilter &&)=delete
 
KmerCountingBloomFilteroperator= (const KmerCountingBloomFilter &)=delete
 
KmerCountingBloomFilteroperator= (KmerCountingBloomFilter &&)=delete
 
void insert (const char *seq, size_t seq_len)
 
void insert (const std::string &seq)
 
void insert (const uint64_t *hashes, T n=1)
 
void insert (const std::vector< uint64_t > &hashes, T n=1)
 
void remove (const char *seq, size_t seq_len)
 
void remove (const std::string &seq)
 
void remove (const uint64_t *hashes)
 
void remove (const std::vector< uint64_t > &hashes)
 
void clear (const char *seq, size_t seq_len)
 
void clear (const std::string &seq)
 
void clear (const uint64_t *hashes)
 
void clear (const std::vector< uint64_t > &hashes)
 
uint64_t contains (const char *seq, size_t seq_len) const
 
uint64_t contains (const std::string &seq) const
 
contains (const uint64_t *hashes) const
 
contains (const std::vector< uint64_t > &hashes) const
 
contains_insert (const char *seq, size_t seq_len)
 
contains_insert (const std::string &seq)
 
contains_insert (const uint64_t *hashes, T n=1)
 
contains_insert (const std::vector< uint64_t > &hashes, T n=1)
 
insert_contains (const char *seq, size_t seq_len)
 
insert_contains (const std::string &seq)
 
insert_contains (const uint64_t *hashes, T n=1)
 
insert_contains (const std::vector< uint64_t > &hashes, T n=1)
 
insert_thresh_contains (const char *seq, size_t seq_len, T threshold)
 
insert_thresh_contains (const std::string &seq, const T threshold)
 
insert_thresh_contains (const uint64_t *hashes, const T threshold)
 
insert_thresh_contains (const std::vector< uint64_t > &hashes, const T threshold)
 
contains_insert_thresh (const char *seq, size_t seq_len, T threshold)
 
contains_insert_thresh (const std::string &seq, const T threshold)
 
contains_insert_thresh (const uint64_t *hashes, const T threshold)
 
contains_insert_thresh (const std::vector< uint64_t > &hashes, const T threshold)
 
size_t get_bytes () const
 
uint64_t get_pop_cnt (T threshold=1) const
 
double get_occupancy (T threshold=1) const
 
unsigned get_hash_num () const
 
double get_fpr (T threshold=1) const
 
unsigned get_k () const
 
const std::string & get_hash_fn () const
 
CountingBloomFilter< T > & get_counting_bloom_filter ()
 
void save (const std::string &path)
 

Static Public Member Functions

static bool is_bloom_file (const std::string &path)
 

Detailed Description

template<typename T>
class btllib::KmerCountingBloomFilter< T >

Counting Bloom filter data structure that stores k-mers. Provides KmerCountingBloomFilter8, KmerCountingBloomFilter16, and KmerCountingBloomFilter32 classes with corresponding bit-size counters.

Constructor & Destructor Documentation

template<typename T >
btllib::KmerCountingBloomFilter< T >::KmerCountingBloomFilter ( )
inline

Construct a dummy k-mer Bloom filter (e.g. as a default argument).

template<typename T >
btllib::KmerCountingBloomFilter< T >::KmerCountingBloomFilter ( size_t  bytes,
unsigned  hash_num,
unsigned  k 
)
inline

Construct an empty k-mer Counting Bloom filter of given size.

Parameters
bytesFilter size in bytes.
hash_numNumber of hash values per element.
kK-mer size.
template<typename T >
btllib::KmerCountingBloomFilter< T >::KmerCountingBloomFilter ( const std::string &  path)
inlineexplicit

Load a k-mer Counting Bloom filter from a file.

Parameters
pathFilepath to load from.

Member Function Documentation

template<typename T >
void btllib::KmerCountingBloomFilter< T >::clear ( const char *  seq,
size_t  seq_len 
)
inline

Set the counts of a sequence's k-mers to zero in the filter.

Parameters
seqSequence to k-merize.
seq_lenLength of seq.
template<typename T >
void btllib::KmerCountingBloomFilter< T >::clear ( const std::string &  seq)
inline

Set the counts of a sequence's k-mers to zero in the filter.

Parameters
seqSequence to k-merize.
template<typename T >
void btllib::KmerCountingBloomFilter< T >::clear ( const uint64_t *  hashes)
inline

Set the counts of a sequence's k-mers to zero in the filter.

Parameters
hashesInteger array of the k-mer's hash values. Array size should equal the hash_num argument used when the Bloom filter was constructed.
template<typename T >
void btllib::KmerCountingBloomFilter< T >::clear ( const std::vector< uint64_t > &  hashes)
inline

Set the counts of a sequence's k-mers to zero in the filter.

Parameters
hashesInteger vector of the k-mer's hash values.
template<typename T >
uint64_t btllib::KmerCountingBloomFilter< T >::contains ( const char *  seq,
size_t  seq_len 
) const
inline

Query the counts of k-mers of a sequence.

Parameters
seqSequence to k-merize.
seq_lenLength of seq.
Returns
The sum of counts of seq's k-mers found in the filter.
template<typename T >
uint64_t btllib::KmerCountingBloomFilter< T >::contains ( const std::string &  seq) const
inline

Query the counts of k-mers of a sequence.

Parameters
seqSequence to k-merize.
Returns
The sum of counts of seq's k-mers found in the filter.
template<typename T >
T btllib::KmerCountingBloomFilter< T >::contains ( const uint64_t *  hashes) const
inline

Get a k-mer's count.

Parameters
hashesInteger array of k-mer's hash values. Array size should equal the hash_num argument used when the Bloom filter was constructed.
Returns
The count of the queried k-mer.
template<typename T >
T btllib::KmerCountingBloomFilter< T >::contains ( const std::vector< uint64_t > &  hashes) const
inline

Get a k-mer's count.

Parameters
hashesInteger vector of k-mer's hash values.
Returns
The count of the queried k-mer.
template<typename T >
T btllib::KmerCountingBloomFilter< T >::contains_insert ( const char *  seq,
size_t  seq_len 
)
inline

Get the counts of a sequence's k-mers and then increment the counts.

Parameters
seqSequence to k-merize.
seq_lenLength of seq.
Returns
The sum of counts of the queried k-mers before insertion.
template<typename T >
T btllib::KmerCountingBloomFilter< T >::contains_insert ( const std::string &  seq)
inline

Get the counts of a sequence's k-mers and then increment the counts.

Parameters
seqSequence to k-merize.
Returns
The sum of counts of the queried k-mers before insertion.
template<typename T >
T btllib::KmerCountingBloomFilter< T >::contains_insert ( const uint64_t *  hashes,
n = 1 
)
inline

Get the count of a k-mer and then increment the count.

Parameters
hashesInteger array of the k-mers's hash values. Array size should equal the hash_num argument used when the Bloom filter was constructed.
nIncrement value
Returns
The count of the queried k-mer before insertion.
template<typename T >
T btllib::KmerCountingBloomFilter< T >::contains_insert ( const std::vector< uint64_t > &  hashes,
n = 1 
)
inline

Get the count of a k-mer and then increment the count.

Parameters
hashesInteger vector of the k-mer's hash values.
nIncrement value
Returns
The count of the queried k-mer before insertion.
template<typename T >
T btllib::KmerCountingBloomFilter< T >::contains_insert_thresh ( const char *  seq,
size_t  seq_len,
threshold 
)
inline

Get the counts of a sequence's k-mer's and then increment the counts if they are not above the threshold.

Parameters
seqSequence to k-merize.
seq_lenLength of seq.
thresholdThe threshold.
Returns
The sum of counts of the queried k-mers before insertion.
template<typename T >
T btllib::KmerCountingBloomFilter< T >::contains_insert_thresh ( const std::string &  seq,
const T  threshold 
)
inline

Get the counts of a sequence's k-mer's and then increment the counts if they are not above the threshold.

Parameters
seqSequence to k-merize.
thresholdThe threshold.
Returns
The sum of counts of the queried k-mers before insertion.
template<typename T >
T btllib::KmerCountingBloomFilter< T >::contains_insert_thresh ( const uint64_t *  hashes,
const T  threshold 
)
inline

Get the count of a k-mer and then increment the count if it's not above the threshold.

Parameters
hashesInteger array of the k-mer's hash values. Array size should equal the hash_num argument used when the Bloom filter was constructed.
thresholdThe threshold.
Returns
The count of the queried k-mer before insertion.
template<typename T >
T btllib::KmerCountingBloomFilter< T >::contains_insert_thresh ( const std::vector< uint64_t > &  hashes,
const T  threshold 
)
inline

Get the count of a k-mer and then increment the count if it's not above the threshold.

Parameters
hashesInteger vector of the k-mer's hash values.
thresholdThe threshold.
Returns
The count of the queried k-mer before insertion.
template<typename T >
size_t btllib::KmerCountingBloomFilter< T >::get_bytes ( ) const
inline

Get filter size in bytes.

template<typename T >
CountingBloomFilter<T>& btllib::KmerCountingBloomFilter< T >::get_counting_bloom_filter ( )
inline

Get a reference to the underlying vanilla Counting Bloom filter.

template<typename T >
double btllib::KmerCountingBloomFilter< T >::get_fpr ( threshold = 1) const
inline

Get the query false positive rate for elements with count >= threshold.

Parameters
thresholdThe threshold.
Returns
The false positive rate.
template<typename T >
const std::string& btllib::KmerCountingBloomFilter< T >::get_hash_fn ( ) const
inline

Get the name of the hash function used.

template<typename T >
unsigned btllib::KmerCountingBloomFilter< T >::get_hash_num ( ) const
inline

Get the number of hash values per element.

template<typename T >
unsigned btllib::KmerCountingBloomFilter< T >::get_k ( ) const
inline

Get the k-mer size used.

template<typename T >
double btllib::KmerCountingBloomFilter< T >::get_occupancy ( threshold = 1) const
inline

Get the fraction of the filter occupied by >0 counters.

template<typename T >
uint64_t btllib::KmerCountingBloomFilter< T >::get_pop_cnt ( threshold = 1) const
inline

Get population count, i.e. the number of counters >0 in the filter.

template<typename T >
void btllib::KmerCountingBloomFilter< T >::insert ( const char *  seq,
size_t  seq_len 
)
inline

Insert a sequence's k-mers into the filter.

Parameters
seqSequence to k-merize.
seq_lenLength of seq.
template<typename T >
void btllib::KmerCountingBloomFilter< T >::insert ( const std::string &  seq)
inline

Insert a sequence's k-mers into the filter.

Parameters
seqSequence to k-merize.
template<typename T >
void btllib::KmerCountingBloomFilter< T >::insert ( const uint64_t *  hashes,
n = 1 
)
inline

Insert a k-mer into the filter.

Parameters
hashesInteger array of the k-mer's hash values. Array size should equal the hash_num argument used when the Bloom filter was constructed.
nIncrement value
template<typename T >
void btllib::KmerCountingBloomFilter< T >::insert ( const std::vector< uint64_t > &  hashes,
n = 1 
)
inline

Insert a k-mer into the filter.

Parameters
hashesInteger vector of the k-mer's hash values.
nIncrement value
template<typename T >
T btllib::KmerCountingBloomFilter< T >::insert_contains ( const char *  seq,
size_t  seq_len 
)
inline

Increment the counts of a sequence's k-mers and then return the counts.

Parameters
seqSequence to k-merize.
seq_lenLength of seq.
Returns
The sum of counts of the queried k-mers after insertion.
template<typename T >
T btllib::KmerCountingBloomFilter< T >::insert_contains ( const std::string &  seq)
inline

Increment the counts of a sequence's k-mers and then return the counts.

Parameters
seqSequence to k-merize.
Returns
The sum of counts of the queried k-mers after insertion.
template<typename T >
T btllib::KmerCountingBloomFilter< T >::insert_contains ( const uint64_t *  hashes,
n = 1 
)
inline

Increment a k-mer's count and then return the count.

Parameters
hashesInteger array of the k-mer's hash values. Array size should equal the hash_num argument used when the Bloom filter was constructed.
nIncrement value
Returns
The count of the queried k-mer after insertion.
template<typename T >
T btllib::KmerCountingBloomFilter< T >::insert_contains ( const std::vector< uint64_t > &  hashes,
n = 1 
)
inline

Increment a k-mer's count and then return the count.

Parameters
hashesInteger vector of the k-mer's hash values.
nIncrement value
Returns
The count of the queried k-mer after insertion.
template<typename T >
T btllib::KmerCountingBloomFilter< T >::insert_thresh_contains ( const char *  seq,
size_t  seq_len,
threshold 
)
inline

Increment the counts of a sequence's k-mers if they are not above the threshold and then return the counts.

Parameters
seqSequence to k-merize.
seq_lenLength of seq.
thresholdThe threshold.
Returns
The sum of counts of the queried k-mers after insertion.
template<typename T >
T btllib::KmerCountingBloomFilter< T >::insert_thresh_contains ( const std::string &  seq,
const T  threshold 
)
inline

Increment the counts of a sequence's k-mers if they are not above the threshold and then return the counts.

Parameters
seqSequence to k-merize.
thresholdThe threshold.
Returns
The sum of counts of the queried k-mers after insertion.
template<typename T >
T btllib::KmerCountingBloomFilter< T >::insert_thresh_contains ( const uint64_t *  hashes,
const T  threshold 
)
inline

Increment a k-mer's count if it's not above the threshold and then return the count.

Parameters
hashesInteger array of the k-mer's hash values. Array size should equal the hash_num argument used when the Bloom filter was constructed.
thresholdThe threshold.
Returns
The count of the queried k-mer after insertion.
template<typename T >
T btllib::KmerCountingBloomFilter< T >::insert_thresh_contains ( const std::vector< uint64_t > &  hashes,
const T  threshold 
)
inline

Increment a k-mer's count if it's not above the threshold and then return the count.

Parameters
hashesInteger array of the k-mer's hash values. Array size should equal the hash_num argument used when the Bloom filter was constructed.
thresholdThe threshold.
Returns
The count of the queried k-mer after insertion.
template<typename T >
static bool btllib::KmerCountingBloomFilter< T >::is_bloom_file ( const std::string &  path)
inlinestatic

Check whether the file at the given path is a saved k-mer counting Bloom filter.

Parameters
pathFilepath to check.
template<typename T >
void btllib::KmerCountingBloomFilter< T >::remove ( const char *  seq,
size_t  seq_len 
)
inline

Decrease the counts of a sequence's k-mers from the filter.

Parameters
seqSequence to k-merize.
seq_lenLength of seq.
template<typename T >
void btllib::KmerCountingBloomFilter< T >::remove ( const std::string &  seq)
inline

Decrease the counts of a sequence's k-mers from the filter.

Parameters
seqSequence to k-merize.
template<typename T >
void btllib::KmerCountingBloomFilter< T >::remove ( const uint64_t *  hashes)
inline

Decrease the counts of a sequence's k-mers from the filter.

Parameters
hashesInteger array of the k-mer's hash values. Array size should equal the hash_num argument used when the Bloom filter was constructed.
template<typename T >
void btllib::KmerCountingBloomFilter< T >::remove ( const std::vector< uint64_t > &  hashes)
inline

Decrease the counts of a sequence's k-mers from the filter.

Parameters
hashesInteger vector of the k-mer's hash values.
template<typename T >
void btllib::KmerCountingBloomFilter< T >::save ( const std::string &  path)
inline

Save the Bloom filter to a file that can be loaded in the future.

Parameters
pathFilepath to store filter at.

The documentation for this class was generated from the following files: