-
Notifications
You must be signed in to change notification settings - Fork 0
/
cuckoo.pyx
191 lines (143 loc) · 5.5 KB
/
cuckoo.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
#
# cuckoo.pyx
#
"""Cuckoo Hash library
This module provides a simple interface to the Cuckoo Hash,
an efficient sparse array implementation.
Cuckoo Hashing was proposed by Pagh and Rodler (2001). The idea is to
build a dictionary data structure with two hash tables and two different
hash functions.
SEE ALSO
Rasmus Pagh and Flemming Friche Rodler. 2001. Cuckoo Hashing, Proceedings
of ESA 2001, Lecture Notes in Computer Science, vol. 2161.
Chuleerat Jaruskulchai and Canasai Kruengkrai. 2002. Building Inverted
Files Through Efficient Dynamic Hashing. Proceedings of the Fifth National
Computer Science and Engineering Conference (NCSEC-2002).
L Devroye, P Morin. 2003. Cuckoo hashing: Further analysis. Information
Processing Letters.
This Python interface is to CKHash 0.4.2:
* Copyright (C) 2005-2008 Thai Computational Linguistics Laboratory (TCL),
* National Institute of Information and Communications Technology (NICT)
* Written by Canasai Kruengkrai <[email protected]>
"""
__author__ = 'Jose Nazario <[email protected]>'
__copyright__ = 'Copyright (c) 2008 Jose Nazario'
__license__ = 'GPL2'
__url__ = 'http://code.google.com/p/pycuckoohash/'
__version__ = '1.0-0.4.2'
cdef extern from "../cuckoo_hash/cuckoo_hash.h":
pass
cdef extern from "Python.h":
void Py_INCREF(object o)
void Py_DECREF(object o)
cdef extern from *:
cdef struct CKHash_Cell_t:
unsigned char *key
int value
cdef struct CKHash_Table_t:
unsigned int size
unsigned int table_size
int shift
unsigned int min_size
unsigned int mean_size
unsigned int max_chain
CKHash_Cell_t *T1
CKHash_Cell_t *T2
int function_size
int *a1
int *a2
ctypedef int (*applyfunc)(char *key, int value, void *arg)
CKHash_Table_t *ckh_construct_table(int min_size)
int ckh_lookup(CKHash_Table_t *D, char *key)
int ckh_insert (CKHash_Table_t *D, char *key, int value)
int ckh_delete(CKHash_Table_t *D, char *key)
int ckh_get(CKHash_Table_t *D, char *key, int *ret_value)
void cuckoo_apply(CKHash_Table_t *D, applyfunc cb, void *arg)
int cuckoo_table_size(CKHash_Table_t *D)
CKHash_Table_t *ckh_destruct_table(CKHash_Table_t *D)
cdef int _cuckoo_get_keys(char *key, int val, void *arg):
lst = <object>arg
lst.append(key)
cdef int _cuckoo_get_values(char *key, int val, void *arg):
lst = <object>arg
lst.append(<object>val)
cdef int _cuckoo_get_items(char *key, int val, void *arg):
lst = <object>arg
lst.append((key, <object>val))
cdef class cuckoohash:
"""cuckoohash() -> new empty Cuckoo Hash table
cuckoohash(size=n) -> Create a new Cuckoo Hash table with
initial size n.
Note: the table will automatically grow as needed.
cuckoohash(seq=seq) -> Create a new Cuckoo Hash table as if via:
c = cuckoohash()
for k, v in seq: c[k] = v
"""
cdef CKHash_Table_t *table
def __init__(self, size=1024, seq=[]):
self.table = ckh_construct_table(size)
if len(seq) > 0:
for k, v in seq: self.insert(k, v)
def __del__(self):
ckh_destruct_table(self.table)
def __len__(self):
return cuckoo_table_size(self.table)
def __getitem__(self, key):
return self.get(key)
def __setitem__(self, key, value):
self.insert(key, value)
def __delitem__(self, key):
self.delete(key)
def delete(self, char *key):
"""delete(key)
delete the specified key"""
cdef int ret
ckh_get(self.table, key, &ret)
Py_DECREF(<int><void *>ret)
ckh_delete(self.table, key)
def lookup(self, char *key):
"""lookup(key)
test to see if the key is present in the hash table"""
return ckh_lookup(self.table, key)
def has_key(self, char *key):
"""has_key(key)
a synonym for lookup(key)"""
return self.lookup(key)
def insert(self, char *key, object val):
"""insert(key, val)
insert the key:value pair into the hash table.
key - a string
val - any Python object"""
Py_INCREF(val)
ckh_insert(self.table, key, <int><void *>val)
def get(self, char *key):
"""get(key)
retriee the value for the specified key"""
cdef int ret
ckh_get(self.table, key, &ret)
return <object>ret
cdef object __cuckoo_gather(self,
int gatherfunc(char *key, int val, void *arg)):
l = []
cuckoo_apply(self.table, gatherfunc, <void *>l)
return l
def keys(self):
"""keys() - the list of keys for the hash table"""
return self.__cuckoo_gather(_cuckoo_get_keys)
def values(self):
"""values() - the list of values held in the hash table"""
return self.__cuckoo_gather(_cuckoo_get_values)
def items(self):
"""C.items() -> list of (key, value) pairs in C"""
return self.__cuckoo_gather(_cuckoo_get_items)
def __iter__(self):
return iter(self.keys())
def iteritems(self):
"""C.iteritems() -> iterate over the (key, value) items in C"""
return iter(self.items())
def iterkeys(self):
"""C.iterkeys() -> iterate over the keys in C"""
return iter(self)
def itervalues(self):
"""C.itervalues() -> iterate over the value in C"""
return iter(self.values())