#include <bloom_filter.hpp>
Bloom filter data structure that stores spaced seed k-mers.
◆ SeedBloomFilter() [1/3]
btllib::SeedBloomFilter::SeedBloomFilter |
( |
| ) |
|
|
inline |
Construct a dummy Seed Bloom filter (e.g. as a default argument).
◆ SeedBloomFilter() [2/3]
btllib::SeedBloomFilter::SeedBloomFilter |
( |
size_t | bytes, |
|
|
unsigned | k, |
|
|
const std::vector< std::string > & | seeds, |
|
|
unsigned | hash_num_per_seed ) |
Construct an empty Seed Bloom filter of given size.
- Parameters
-
bytes | Filter size in bytes. |
k | K-mer size. |
seeds | A vector of spaced seeds in string format. 0s indicate ignored and 1s indicate relevant bases. |
◆ SeedBloomFilter() [3/3]
btllib::SeedBloomFilter::SeedBloomFilter |
( |
const std::string & | path | ) |
|
|
explicit |
Load a Seed Bloom filter from a file.
- Parameters
-
path | Filepath to load from. |
◆ contains() [1/4]
std::vector< std::vector< unsigned > > btllib::SeedBloomFilter::contains |
( |
const char * | seq, |
|
|
size_t | seq_len ) const |
Query the presence of spaced seed k-mers of a sequence.
- Parameters
-
seq | Sequence to k-merize. |
seq_len | Length of seq. |
- Returns
- A vector indicating which seeds had a hit for every k-mer. The indices of the outer vector are indices of seq k-mers. The indices of inner vector are indices of spaced seeds hit for that k-mer.
◆ contains() [2/4]
std::vector< std::vector< unsigned > > btllib::SeedBloomFilter::contains |
( |
const std::string & | seq | ) |
const |
|
inline |
Query the presence of spaced seed k-mers of a sequence.
- Parameters
-
- Returns
- A vector indicating which seeds had a hit for every k-mer. The indices of the outer vector are indices of seq k-mers. The indices of inner vector are indices of spaced seeds hit for that k-mer.
◆ contains() [3/4]
bool btllib::SeedBloomFilter::contains |
( |
const std::vector< uint64_t > & | hashes | ) |
const |
|
inline |
Check for the presence of an element's hash values. A single spaced seed is an element here.
- Parameters
-
hashes | Integer vector of hash values. |
◆ contains() [4/4]
bool btllib::SeedBloomFilter::contains |
( |
const uint64_t * | hashes | ) |
const |
|
inline |
Check for the presence of an element's hash values. A single spaced seed is an element here.
- Parameters
-
hashes | Integer array of hash values. Array size should equal the hash_num_per_seed argument used when the Bloom filter was constructed. |
◆ contains_insert() [1/4]
std::vector< std::vector< unsigned > > btllib::SeedBloomFilter::contains_insert |
( |
const char * | seq, |
|
|
size_t | seq_len ) |
Query the presence of spaced seed k-mers of a sequence and insert if missing.
- Parameters
-
seq | Sequence to k-merize. |
seq_len | Length of seq. |
- Returns
- A vector indicating which seeds had a hit for every k-mer before insertion. The indices of the outer vector are indices of seq k-mers. The indices of inner vector are indices of spaced seeds hit for that k-mer.
◆ contains_insert() [2/4]
std::vector< std::vector< unsigned > > btllib::SeedBloomFilter::contains_insert |
( |
const std::string & | seq | ) |
|
|
inline |
Query the presence of spaced seed k-mers of a sequence and insert if missing.
- Parameters
-
- Returns
- A vector indicating which seeds had a hit for every k-mer before insertion. The indices of the outer vector are indices of seq k-mers. The indices of inner vector are indices of spaced seeds hit for that k-mer.
◆ contains_insert() [3/4]
bool btllib::SeedBloomFilter::contains_insert |
( |
const std::vector< uint64_t > & | hashes | ) |
|
|
inline |
Check for the presence of an element's hash values and insert if missing. A single spaced seed is an element here.
- Parameters
-
hashes | Integer vector of hash values. |
- Returns
- True if present before insertion, false otherwise.
◆ contains_insert() [4/4]
bool btllib::SeedBloomFilter::contains_insert |
( |
const uint64_t * | hashes | ) |
|
|
inline |
Check for the presence of an element's hash values and insert if missing. A single spaced seed is an element here.
- Parameters
-
hashes | Integer array of hash values. Array size should equal the hash_num_per_seed argument used when the Bloom filter was constructed. |
- Returns
- True if present before insertion, false otherwise.
◆ get_bytes()
size_t btllib::SeedBloomFilter::get_bytes |
( |
| ) |
const |
|
inline |
Get filter size in bytes.
◆ get_fpr()
double btllib::SeedBloomFilter::get_fpr |
( |
| ) |
const |
Get the false positive rate of at least one seed falsely reporting a hit per k-mer.
◆ get_hash_fn()
const std::string & btllib::SeedBloomFilter::get_hash_fn |
( |
| ) |
const |
|
inline |
Get the name of the hash function used.
◆ get_hash_num()
unsigned btllib::SeedBloomFilter::get_hash_num |
( |
| ) |
const |
|
inline |
Get the number of hash values per element, i.e. seed.
◆ get_hash_num_per_seed()
unsigned btllib::SeedBloomFilter::get_hash_num_per_seed |
( |
| ) |
const |
|
inline |
Get the number of hash values per element, i.e. seed.
◆ get_k()
unsigned btllib::SeedBloomFilter::get_k |
( |
| ) |
const |
|
inline |
◆ get_kmer_bloom_filter()
Get a reference to the underlying Kmer Bloom filter.
◆ get_occupancy()
double btllib::SeedBloomFilter::get_occupancy |
( |
| ) |
const |
|
inline |
Get the fraction of the filter occupied by 1 bits.
◆ get_parsed_seeds()
const std::vector< btllib::hashing_internals::SpacedSeed > & btllib::SeedBloomFilter::get_parsed_seeds |
( |
| ) |
const |
|
inline |
Get the seeds used in parsed format. Parsed format is a vector of indices of 0s in the seed.
◆ get_pop_cnt()
uint64_t btllib::SeedBloomFilter::get_pop_cnt |
( |
| ) |
const |
|
inline |
Get population count, i.e. the number of 1 bits in the filter.
◆ get_seeds()
const std::vector< std::string > & btllib::SeedBloomFilter::get_seeds |
( |
| ) |
const |
|
inline |
Get the seeds used in string format.
◆ get_total_hash_num()
unsigned btllib::SeedBloomFilter::get_total_hash_num |
( |
| ) |
const |
|
inline |
Get the number of hash values per k-mer, i.e. total number of hash values for all seeds.
◆ insert() [1/4]
void btllib::SeedBloomFilter::insert |
( |
const char * | seq, |
|
|
size_t | seq_len ) |
Insert a sequence's spaced seed k-mers into the filter.
- Parameters
-
seq | Sequence to k-merize. |
seq_len | Length of seq. |
◆ insert() [2/4]
void btllib::SeedBloomFilter::insert |
( |
const std::string & | seq | ) |
|
|
inline |
Insert a sequence's spaced seed k-mers into the filter.
- Parameters
-
◆ insert() [3/4]
void btllib::SeedBloomFilter::insert |
( |
const std::vector< uint64_t > & | hashes | ) |
|
|
inline |
Insert an element's hash values.
- Parameters
-
hashes | Integer vector of hash values. |
◆ insert() [4/4]
void btllib::SeedBloomFilter::insert |
( |
const uint64_t * | hashes | ) |
|
|
inline |
Insert an element's hash values.
- Parameters
-
hashes | Integer array of hash values. Array size should equal the hash_num argument used when the Bloom filter was constructed. |
◆ is_bloom_file()
static bool btllib::SeedBloomFilter::is_bloom_file |
( |
const std::string & | path | ) |
|
|
inlinestatic |
Check whether the file at the given path is a saved Seed Bloom filter.
- Parameters
-
◆ save()
void btllib::SeedBloomFilter::save |
( |
const std::string & | path | ) |
|
Save the Bloom filter to a file that can be loaded in the future.
- Parameters
-
path | Filepath to store filter at. |
The documentation for this class was generated from the following file: