Skip to content

Commit 417b1fa

Browse files
committed
Add FNV1a hash function
1 parent d5e7618 commit 417b1fa

File tree

3 files changed

+172
-2
lines changed

3 files changed

+172
-2
lines changed

include/oxli/hashtable.hh

Lines changed: 114 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -540,12 +540,124 @@ public:
540540
}
541541
};
542542

543+
544+
class FNVKmerHashIterator : public KmerHashIterator
545+
{
546+
const char * _seq;
547+
const char _ksize;
548+
unsigned int index;
549+
unsigned int length;
550+
bool _initialized;
551+
public:
552+
FNVKmerHashIterator(const char * seq, unsigned char k) :
553+
_seq(seq), _ksize(k), index(0), _initialized(false)
554+
{
555+
length = strlen(_seq);
556+
};
557+
558+
HashIntoType first()
559+
{
560+
_initialized = true;
561+
return next();
562+
}
563+
564+
HashIntoType next()
565+
{
566+
if (!_initialized) {
567+
_initialized = true;
568+
}
569+
570+
if (done()) {
571+
throw oxli_exception("past end of iterator");
572+
}
573+
574+
std::string kmer;
575+
kmer.assign(_seq + index, _ksize);
576+
index += 1;
577+
return _hash_fnv(kmer, _ksize);
578+
}
579+
580+
bool done() const
581+
{
582+
return (index + _ksize > length);
583+
}
584+
585+
unsigned int get_start_pos() const
586+
{
587+
if (!_initialized) {
588+
return 0;
589+
}
590+
return index - 1;
591+
}
592+
unsigned int get_end_pos() const
593+
{
594+
if (!_initialized) {
595+
return _ksize;
596+
}
597+
return index + _ksize - 1;
598+
}
599+
};
600+
601+
602+
class FNVHashtable : public oxli::Hashtable
603+
{
604+
public:
605+
explicit FNVHashtable(WordLength ksize, Storage * s)
606+
: Hashtable(ksize, s) { };
607+
608+
inline
609+
virtual
610+
HashIntoType
611+
hash_dna(const char * kmer) const
612+
{
613+
if (!(strlen(kmer) >= _ksize)) {
614+
throw oxli_value_exception("Supplied kmer string doesn't match the underlying k-size.");
615+
}
616+
return _hash_fnv(kmer, _ksize);
617+
}
618+
619+
inline virtual HashIntoType
620+
hash_dna_top_strand(const char * kmer) const
621+
{
622+
throw oxli_value_exception("not implemented");
623+
}
624+
625+
inline virtual HashIntoType
626+
hash_dna_bottom_strand(const char * kmer) const
627+
{
628+
throw oxli_value_exception("not implemented");
629+
}
630+
631+
inline virtual std::string
632+
unhash_dna(HashIntoType hashval) const
633+
{
634+
throw oxli_value_exception("not implemented");
635+
}
636+
637+
virtual KmerHashIteratorPtr new_kmer_iterator(const char * sp) const
638+
{
639+
KmerHashIterator * ki = new FNVKmerHashIterator(sp, _ksize);
640+
return unique_ptr<KmerHashIterator>(ki);
641+
}
642+
643+
virtual void save(std::string filename)
644+
{
645+
store->save(filename, _ksize);
646+
}
647+
virtual void load(std::string filename)
648+
{
649+
store->load(filename, _ksize);
650+
_init_bitstuff();
651+
}
652+
};
653+
654+
543655
// Hashtable-derived class with ByteStorage.
544-
class Counttable : public oxli::MurmurHashtable
656+
class Counttable : public oxli::FNVHashtable
545657
{
546658
public:
547659
explicit Counttable(WordLength ksize, std::vector<uint64_t> sizes)
548-
: MurmurHashtable(ksize, new ByteStorage(sizes)) { } ;
660+
: FNVHashtable(ksize, new ByteStorage(sizes)) { } ;
549661
};
550662

551663
// Hashtable-derived class with NibbleStorage.

include/oxli/kmer_hash.hh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,14 @@ HashIntoType _hash_murmur(const std::string& kmer, const WordLength k,
115115
HashIntoType _hash_murmur_forward(const std::string& kmer,
116116
const WordLength k);
117117

118+
119+
HashIntoType _hash_fnv(const std::string& kmer, const WordLength k);
120+
HashIntoType _hash_fnv(const std::string& kmer, const WordLength k,
121+
HashIntoType& h, HashIntoType& r);
122+
HashIntoType _hash_fnv_forward(const std::string& kmer,
123+
const WordLength k);
124+
125+
118126
// Function to support k-mer banding.
119127
std::pair<uint64_t, uint64_t> compute_band_interval(unsigned int num_bands,
120128
unsigned int band);

src/oxli/kmer_hash.cc

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,56 @@ HashIntoType _hash_murmur_forward(const std::string& kmer, const WordLength k)
214214
return h;
215215
}
216216

217+
218+
inline const uint64_t hash_64_fnv1a(const char* data, const uint64_t len) {
219+
uint64_t hash = 0xcbf29ce484222325;
220+
uint64_t prime = 0x100000001b3;
221+
222+
for(int i = 0; i < len; ++i) {
223+
const uint8_t value = data[i];
224+
hash = hash ^ value;
225+
hash *= prime;
226+
}
227+
228+
return hash;
229+
230+
}
231+
232+
HashIntoType _hash_fnv(const std::string& kmer, const WordLength k)
233+
{
234+
HashIntoType h = 0;
235+
HashIntoType r = 0;
236+
237+
return oxli::_hash_fnv(kmer, k, h, r);
238+
}
239+
240+
HashIntoType _hash_fnv(const std::string& kmer, const WordLength k,
241+
HashIntoType& h, HashIntoType& r) {
242+
h = hash_64_fnv1a(kmer.c_str(), k);
243+
244+
assert(kmer.length() == k); // an assumption of the below code
245+
std::string rev = oxli::_revcomp(kmer);
246+
if (rev == kmer) {
247+
// self complement kmer, can't use bitwise XOR
248+
r = h;
249+
return h;
250+
}
251+
r = hash_64_fnv1a(rev.c_str(), k);
252+
253+
return h ^ r;
254+
}
255+
256+
HashIntoType _hash_fnv_forward(const std::string& kmer, const WordLength k)
257+
{
258+
HashIntoType h = 0;
259+
HashIntoType r = 0;
260+
261+
oxli::_hash_fnv(kmer, k, h, r);
262+
263+
return h;
264+
}
265+
266+
217267
std::pair<uint64_t, uint64_t> compute_band_interval(unsigned int num_bands,
218268
unsigned int band)
219269
{

0 commit comments

Comments
 (0)