#include <bloom_filter.hpp>
Bloom filter data structure that stores spaced seed k-mers.
btllib::SeedBloomFilter::SeedBloomFilter |
( |
| ) |
|
|
inline |
Construct a dummy Seed Bloom filter (e.g. as a default argument).
btllib::SeedBloomFilter::SeedBloomFilter |
( |
size_t |
bytes, |
|
|
unsigned |
k, |
|
|
const std::vector< std::string > & |
seeds, |
|
|
unsigned |
hash_num_per_seed |
|
) |
| |
Construct an empty Seed Bloom filter of given size.
- Parameters
-
bytes | Filter size in bytes. |
k | K-mer size. |
seeds | A vector of spaced seeds in string format. 0s indicate ignored and 1s indicate relevant bases. |
btllib::SeedBloomFilter::SeedBloomFilter |
( |
const std::string & |
path | ) |
|
|
explicit |
Load a Seed Bloom filter from a file.
- Parameters
-
path | Filepath to load from. |
std::vector<std::vector<unsigned> > btllib::SeedBloomFilter::contains |
( |
const char * |
seq, |
|
|
size_t |
seq_len |
|
) |
| const |
Query the presence of spaced seed k-mers of a sequence.
- Parameters
-
seq | Sequence to k-merize. |
seq_len | Length of seq. |
- Returns
- A vector indicating which seeds had a hit for every k-mer. The indices of the outer vector are indices of seq k-mers. The indices of inner vector are indices of spaced seeds hit for that k-mer.
std::vector<std::vector<unsigned> > btllib::SeedBloomFilter::contains |
( |
const std::string & |
seq | ) |
const |
|
inline |
Query the presence of spaced seed k-mers of a sequence.
- Parameters
-
- Returns
- A vector indicating which seeds had a hit for every k-mer. The indices of the outer vector are indices of seq k-mers. The indices of inner vector are indices of spaced seeds hit for that k-mer.
bool btllib::SeedBloomFilter::contains |
( |
const uint64_t * |
hashes | ) |
const |
|
inline |
Check for the presence of an element's hash values. A single spaced seed is an element here.
- Parameters
-
hashes | Integer array of hash values. Array size should equal the hash_num_per_seed argument used when the Bloom filter was constructed. |
bool btllib::SeedBloomFilter::contains |
( |
const std::vector< uint64_t > & |
hashes | ) |
const |
|
inline |
Check for the presence of an element's hash values. A single spaced seed is an element here.
- Parameters
-
hashes | Integer vector of hash values. |
std::vector<std::vector<unsigned> > btllib::SeedBloomFilter::contains_insert |
( |
const char * |
seq, |
|
|
size_t |
seq_len |
|
) |
| |
Query the presence of spaced seed k-mers of a sequence and insert if missing.
- Parameters
-
seq | Sequence to k-merize. |
seq_len | Length of seq. |
- Returns
- A vector indicating which seeds had a hit for every k-mer before insertion. The indices of the outer vector are indices of seq k-mers. The indices of inner vector are indices of spaced seeds hit for that k-mer.
std::vector<std::vector<unsigned> > btllib::SeedBloomFilter::contains_insert |
( |
const std::string & |
seq | ) |
|
|
inline |
Query the presence of spaced seed k-mers of a sequence and insert if missing.
- Parameters
-
- Returns
- A vector indicating which seeds had a hit for every k-mer before insertion. The indices of the outer vector are indices of seq k-mers. The indices of inner vector are indices of spaced seeds hit for that k-mer.
bool btllib::SeedBloomFilter::contains_insert |
( |
const uint64_t * |
hashes | ) |
|
|
inline |
Check for the presence of an element's hash values and insert if missing. A single spaced seed is an element here.
- Parameters
-
hashes | Integer array of hash values. Array size should equal the hash_num_per_seed argument used when the Bloom filter was constructed. |
- Returns
- True if present before insertion, false otherwise.
bool btllib::SeedBloomFilter::contains_insert |
( |
const std::vector< uint64_t > & |
hashes | ) |
|
|
inline |
Check for the presence of an element's hash values and insert if missing. A single spaced seed is an element here.
- Parameters
-
hashes | Integer vector of hash values. |
- Returns
- True if present before insertion, false otherwise.
size_t btllib::SeedBloomFilter::get_bytes |
( |
| ) |
const |
|
inline |
Get filter size in bytes.
double btllib::SeedBloomFilter::get_fpr |
( |
| ) |
const |
Get the false positive rate of at least one seed falsely reporting a hit per k-mer.
const std::string& btllib::SeedBloomFilter::get_hash_fn |
( |
| ) |
const |
|
inline |
Get the name of the hash function used.
unsigned btllib::SeedBloomFilter::get_hash_num |
( |
| ) |
const |
|
inline |
Get the number of hash values per element, i.e. seed.
unsigned btllib::SeedBloomFilter::get_hash_num_per_seed |
( |
| ) |
const |
|
inline |
Get the number of hash values per element, i.e. seed.
unsigned btllib::SeedBloomFilter::get_k |
( |
| ) |
const |
|
inline |
Get a reference to the underlying Kmer Bloom filter.
double btllib::SeedBloomFilter::get_occupancy |
( |
| ) |
const |
|
inline |
Get the fraction of the filter occupied by 1 bits.
const std::vector<btllib::hashing_internals::SpacedSeed>& btllib::SeedBloomFilter::get_parsed_seeds |
( |
| ) |
const |
|
inline |
Get the seeds used in parsed format. Parsed format is a vector of indices of 0s in the seed.
uint64_t btllib::SeedBloomFilter::get_pop_cnt |
( |
| ) |
const |
|
inline |
Get population count, i.e. the number of 1 bits in the filter.
const std::vector<std::string>& btllib::SeedBloomFilter::get_seeds |
( |
| ) |
const |
|
inline |
Get the seeds used in string format.
unsigned btllib::SeedBloomFilter::get_total_hash_num |
( |
| ) |
const |
|
inline |
Get the number of hash values per k-mer, i.e. total number of hash values for all seeds.
void btllib::SeedBloomFilter::insert |
( |
const char * |
seq, |
|
|
size_t |
seq_len |
|
) |
| |
Insert a sequence's spaced seed k-mers into the filter.
- Parameters
-
seq | Sequence to k-merize. |
seq_len | Length of seq. |
void btllib::SeedBloomFilter::insert |
( |
const std::string & |
seq | ) |
|
|
inline |
Insert a sequence's spaced seed k-mers into the filter.
- Parameters
-
void btllib::SeedBloomFilter::insert |
( |
const uint64_t * |
hashes | ) |
|
|
inline |
Insert an element's hash values.
- Parameters
-
hashes | Integer array of hash values. Array size should equal the hash_num argument used when the Bloom filter was constructed. |
void btllib::SeedBloomFilter::insert |
( |
const std::vector< uint64_t > & |
hashes | ) |
|
|
inline |
Insert an element's hash values.
- Parameters
-
hashes | Integer vector of hash values. |
static bool btllib::SeedBloomFilter::is_bloom_file |
( |
const std::string & |
path | ) |
|
|
inlinestatic |
Check whether the file at the given path is a saved Seed Bloom filter.
- Parameters
-
void btllib::SeedBloomFilter::save |
( |
const std::string & |
path | ) |
|
Save the Bloom filter to a file that can be loaded in the future.
- Parameters
-
path | Filepath to store filter at. |
The documentation for this class was generated from the following file: