@@ -45,7 +45,7 @@ cdef CpHashtable * hashtable_arg_shim(object table,
4545 SmallCounttable, QFCounttable)):
4646 cdef CPyHashtable_Object* cpyhashtable
4747 cdef CpHashtable * hashtable
48-
48+
4949 if isinstance (table, allowed):
5050 if isinstance (table, CYTHON_TABLES):
5151 hashtable = (< Hashtable> table).c_table.get()
@@ -226,7 +226,7 @@ cdef class Hashtable:
226226 total_reads,
227227 n_consumed)
228228 return total_reads, n_consumed
229-
229+
230230 def consume_seqfile_banding (self , file_name , num_bands , band ):
231231 """ Count all k-mers from file_name."""
232232 cdef unsigned long long n_consumed = 0
@@ -270,7 +270,7 @@ cdef class Hashtable:
270270 """ Calculate the k-mer abundance distribution over reads."""
271271 cdef CpHashtable * cptracking = hashtable_arg_shim(tracking,
272272 allowed = (PyNodegraph, Nodetable))
273-
273+
274274 cdef CPyReadParser_Object* parser
275275 parser = < CPyReadParser_Object* > read_parser
276276 cdef uint64_t * x = deref(self .c_table).abundance_distribution[CpFastxReader](
@@ -311,14 +311,36 @@ cdef class Hashtable:
311311
312312
313313cdef class QFCounttable(Hashtable):
314- def __cinit__ (self , int k , int starting_size ):
315- # starting size has to be a power of two
316- power_of_two = ((starting_size & (starting_size - 1 ) == 0 ) and
317- (starting_size != 0 ))
314+ """ Count kmers using a counting quotient filter.
315+
316+ The counting quotient filter (CQF) is an extension of the quotient filter
317+ that supports counting in addition to simple membership testing. A CQF has
318+ better cache locality compared to (Small)Counttable which increases
319+ performance.
320+
321+ Each new k-mer uses one slot, and the number of slots used per k-mer
322+ increases the more often the same k-mer is entered into the CQF. As a result
323+ the CQF can be "full" and will stop accepting calls to `add` and `count`.
324+
325+ Parameters
326+ ----------
327+ k : integer
328+ k-mer size
329+
330+ size : integer
331+ Set the number of slots used by the counting quotient filter. This
332+ determines the amount of memory used and how many k-mers can be entered
333+ into the datastructure. Each slot uses roughly 1.3 bytes.
334+ """
335+ def __cinit__ (self , int k , uint64_t size ):
336+ # size has to be a power of two
337+ power_of_two = ((size & (size - 1 ) == 0 ) and
338+ (size != 0 ))
318339 if not power_of_two:
319- raise ValueError (" starting_size has to be a power of two." )
340+ raise ValueError (" size has to be a power of two, not"
341+ " {}." .format(size))
320342 if type (self ) is QFCounttable:
321- self .c_table.reset(< CpHashtable* > new CpQFCounttable(k, int (log(starting_size , 2 ))))
343+ self .c_table.reset(< CpHashtable* > new CpQFCounttable(k, int (log(size , 2 ))))
322344
323345 @classmethod
324346 def load (cls , file_name ):
@@ -346,5 +368,3 @@ cdef class Nodetable(Hashtable):
346368 if type (self ) is Nodetable:
347369 primes = get_n_primes_near_x(n_tables, starting_size)
348370 self .c_table.reset(< CpHashtable* > new CpNodetable(k, primes))
349-
350-
0 commit comments