00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef _PkgDb_hash_h
00023 #define _PkgDb_hash_h
00024
00025 #include <list>
00026 #include <vector>
00027 #include <string>
00028 #include <assert.h>
00029
00030 const size_t primes[] = { 31, 101, 503, 1009, 2003, 5003 };
00031
00032 inline size_t hashfun( unsigned p ) { return size_t(p); };
00033 size_t hashfun( const std::string& s );
00034 size_t hashfun( std::string& s );
00035 size_t hashfun( const char * p );
00036
00037 template<class Key>
00038 struct basicHashElt {
00039 basicHashElt *next;
00040 Key key;
00041 basicHashElt(const Key& k) : next(0), key(k) {};
00042 };
00043
00044 template<class Key, class list_elem>
00045 class basic_hash {
00046 public:
00047 typedef size_t size_type;
00048 typedef list_elem list_type;
00049 typedef std::vector<list_elem*> vector_type;
00050 typedef size_type (*hashfun_t)( const Key& );
00051
00052 template<class le>
00053 class hash_iterator {
00054 friend class basic_hash<Key, list_elem>;
00055 typedef std::forward_iterator_tag iterator_category;
00056 private:
00057 le *current;
00058 size_type adr;
00059 const vector_type *vec;
00060
00061 public:
00062 hash_iterator() : vec(0) {}
00063 hash_iterator( le *c, size_type a, const vector_type *v )
00064 : current(c), adr(a), vec(v) {}
00065
00066 hash_iterator( const hash_iterator<list_elem>& i2 ) {
00067 const hash_iterator& i3 = (const hash_iterator&)i2;
00068 current = i3.current;
00069 adr = i3.adr;
00070 vec = i3.vec;
00071 }
00072
00073
00074
00075 operator const void* () const { return vec; }
00076 bool operator! () const { return vec == 0; }
00077
00078 le& operator* () const {
00079 assert(vec);
00080 return *current;
00081 }
00082 le* operator-> () const {
00083 assert(vec);
00084 return &*current;
00085 }
00086
00087 hash_iterator& operator++ () {
00088 current = (le *)current->next;
00089
00090
00091 if (!current) {
00092 while (++adr < vec->size())
00093 if ((current = (*vec)[adr]))
00094 break;
00095 if (adr == vec->size())
00096
00097 vec = 0;
00098 }
00099 return *this;
00100 }
00101 hash_iterator operator++ (int) {
00102 iterator temp = *this;
00103 operator++();
00104 return temp;
00105 }
00106
00107 bool operator== ( const hash_iterator& x ) const {
00108 return (!vec && !x.vec) ||
00109 (vec && x.vec && current == x.current);
00110 }
00111
00112 bool operator!= ( const hash_iterator& x ) const {
00113 return !operator==(x);
00114 }
00115 };
00116
00117 typedef hash_iterator<list_elem> iterator;
00118 typedef hash_iterator<const list_elem> const_iterator;
00119 friend class iterator;
00120 friend class const_iterator;
00121
00122 protected:
00123 vector_type v;
00124 size_type vsize;
00125 size_type n_elements;
00126 size_type n_buckets;
00127 hashfun_t hf;
00128 private:
00129
00130
00131 void construct( const basic_hash& S ) {
00132 hf = S.hf;
00133 vsize = S.vsize;
00134 v = vector_type( vsize, 0 );
00135 n_elements = 0;
00136 n_buckets = 0;
00137 for( iterator t = S.begin(); t != S.end(); ++t )
00138 insert( t );
00139 }
00140
00141 void resize( size_type new_size ) {
00142 basic_hash newhash(new_size, hf);
00143 for( iterator t = begin(); t != end(); ++t )
00144 newhash.insert( t );
00145 swap( newhash );
00146 }
00147
00148 size_type next_size() {
00149 unsigned i;
00150 for( i = 0; i < sizeof(primes)/sizeof(*primes)-1; ++i ) {
00151 if (primes[i] > vsize)
00152 break;
00153 }
00154 return primes[i];
00155 }
00156
00157 bool resize_if_needed() {
00158
00159 if (5*n_elements >= 9*n_buckets) {
00160 resize( next_size() );
00161 return true;
00162 }
00163 return false;
00164 }
00165
00166 public:
00167 basic_hash( size_type size = 31, hashfun_t f = hashfun ) :
00168 v(size, 0), vsize(size), n_elements(0), n_buckets(0), hf(f) {}
00169 basic_hash( const basic_hash& S ) { construct(S); }
00170 ~basic_hash() { clear(); }
00171 basic_hash& operator= ( const basic_hash& S ) {
00172 if (this != &S) {
00173 clear();
00174 construct(S);
00175 }
00176 return *this;
00177 }
00178
00179 iterator begin() {
00180 size_type adr = 0;
00181 while( adr < vsize ) {
00182 if (!v[adr])
00183 ++adr;
00184 else
00185 return iterator(v[adr], adr, &v);
00186 }
00187 return iterator();
00188 }
00189 iterator end() { return iterator(); }
00190 const_iterator const_begin() const {
00191 size_type adr = 0;
00192 while( adr < vsize ) {
00193 if (!v[adr])
00194 ++adr;
00195 else
00196 return const_iterator(v[adr], adr, &v);
00197 }
00198 return const_iterator();
00199 }
00200 const_iterator begin() const { return const_begin(); }
00201 const_iterator const_end() const { return const_iterator(); }
00202 const_iterator end() const { return const_end(); }
00203
00204
00205 void clear() {
00206 for( size_type i = 0; i < vsize; i++ ) {
00207 if (list_elem *p = v[i]) {
00208 list_elem *pnext;
00209 while( p ) {
00210 pnext = (list_elem *)p->next;
00211 delete p;
00212 p = pnext;
00213 }
00214 v[i] = 0;
00215 }
00216 }
00217 n_elements = 0;
00218 n_buckets = 0;
00219 }
00220
00221 list_elem *find( const Key& k, list_elem ***where = 0 ) {
00222 size_type adr = hf(k) % vsize;
00223 list_elem **p = &v[adr];
00224 if (!*p) {
00225 if (where) {
00226 *where = p;
00227 ++n_buckets;
00228 }
00229 return 0;
00230 }
00231 for( ; *p; p = (list_elem **)&((*p)->next) ) {
00232 if ((*p)->key == k)
00233 return *p;
00234 }
00235 if (where) *where = p;
00236 return 0;
00237 }
00238
00239 const list_elem *find( const Key& k ) const {
00240 return (const_cast<basic_hash<Key,list_elem>*>(this)->find(k));
00241 }
00242
00243 bool exists( const Key& k ) const {
00244 return (const_cast<basic_hash<Key,list_elem>*>(this)->find(k) != 0);
00245 };
00246
00247
00248 list_elem* insert( const Key& k ) {
00249 list_elem **p;
00250 list_elem *q;
00251 if ((q = find( k, &p )))
00252 return q;
00253 else {
00254 *p = new list_elem(k);
00255 ++n_elements;
00256 return *p;
00257 }
00258 }
00259 list_elem* insert( const list_elem *e ) { return insert( e->k ); }
00260 list_elem* insert( const iterator& i ) { return insert( i->k ); }
00261
00262 bool erase( const Key& k ) {
00263 size_type adr = hf(k) % vsize;
00264 list_elem **p = &v[adr];
00265 if (!*p)
00266 return false;
00267 for( ; *p; p = (list_elem **)&((*p)->next) ) {
00268 if ((*p)->key == k) {
00269 list_elem *q = *p;
00270 *p = (list_elem *)q->next;
00271 delete q;
00272 --n_elements;
00273 if (!*p) --n_buckets;
00274 return true;
00275 }
00276 }
00277 return false;
00278 }
00279 bool erase( const iterator& i ) { return erase( i->k ); }
00280
00281 size_type size() const { return n_elements; }
00282 size_type max_size() const { return vsize; }
00283 bool empty() const { return n_elements == 0; }
00284 void swap( basic_hash& s ) {
00285 v.swap(s.v);
00286 std::swap(n_elements, s.n_elements);
00287 std::swap(n_buckets, s.n_buckets);
00288 std::swap(hf, s.hf);
00289 std::swap(vsize, s.vsize);
00290 }
00291
00292 void statistics();
00293 };
00294
00295
00296 template<class Key>
00297 class noval_hash : public basic_hash<Key,basicHashElt<Key> > {};
00298
00299 template<class Key, class T>
00300 struct HashElt : public basicHashElt<Key> {
00301 T value;
00302 HashElt(const Key& k, const T& v) : basicHashElt<Key>(k), value(v) {};
00303 };
00304
00305 template<class Key, class T>
00306 class hash : public basic_hash<Key,HashElt<Key,T> > {
00307 typedef basicHashElt<Key> list_elem;
00308 typedef HashElt<Key,T> list_type;
00309
00310
00311 void construct( const hash& S ) {
00312 hf = S.hf;
00313 vsize = S.vsize;
00314 v = vector_type( vsize, 0 );
00315 n_elements = 0;
00316 n_buckets = 0;
00317 for( typename basic_hash<Key,HashElt<Key,T> >::const_iterator t = S.begin(); t != S.end(); ++t )
00318 insert( t );
00319 }
00320
00321 void resize( typename basic_hash<Key,HashElt<Key,T> >::size_type new_size ) {
00322 hash newhash(new_size, hf);
00323 for( typename basic_hash<Key,HashElt<Key,T> >::const_iterator t = begin(); t != end(); ++t )
00324 newhash.insert( t );
00325 swap( newhash );
00326 }
00327
00328 public:
00329 hash( typename basic_hash<Key,HashElt<Key,T> >::size_type size = 31, typename basic_hash<Key,HashElt<Key,T> >::hashfun_t f = hashfun ) :
00330 basic_hash<Key,HashElt<Key,T> >( size, hashfun ) {};
00331 hash( const hash& S ) { construct(S); }
00332 ~hash() { clear(); }
00333 hash& operator= ( const hash& S ) {
00334 if (this != &S) {
00335 clear();
00336 construct(S);
00337 }
00338 return *this;
00339 }
00340
00341 T& operator[] ( const Key& k ) {
00342 return insert( k, T() )->value;
00343 }
00344
00345 T operator[] ( const Key& k ) const {
00346 const list_type *p = find(k);
00347 return p ? p->value : T();
00348 }
00349
00350 list_type* insert( const Key& k, const T& v ) {
00351 list_type *q;
00352 list_type **p;
00353 if ((q = find( k, &p )))
00354 return q;
00355 else {
00356 *p = new list_type(k,v);
00357 ++n_elements;
00358 return *p;
00359 }
00360 }
00361 list_type* insert( const list_type *e ) {
00362 return insert( e->key, e->value ); }
00363 list_type* insert( const typename hash<Key,T>::iterator& i ) {
00364 return insert( i->key, i->value ); }
00365 list_type* insert( const typename hash<Key,T>::const_iterator& i ) {
00366 return insert( i->key, i->value ); }
00367 };
00368
00369 template <class Key, class list_elem>
00370 void basic_hash<Key, list_elem>::statistics() {
00371 cout << n_elements << " elements in " << n_buckets << "/" << vsize
00372 << " buckets\n";
00373 unsigned free = 0, alloced = 0, maxlen = 0, sumlen = 0;
00374 size_type maxbuck;
00375
00376 for( size_type adr = 0; adr < vsize; ++adr ) {
00377 if (!v[adr])
00378 ++free;
00379 else {
00380 ++alloced;
00381 size_type len = 0;
00382 for( list_elem *p = (list_elem*)v[adr]; p; p = (list_elem*)p->next )
00383 ++len;
00384 if (len > maxlen) {
00385 maxlen = len;
00386 maxbuck = adr;
00387 }
00388 sumlen += len;
00389 }
00390 }
00391 cout << free << " buckets free, " << alloced << " used\n";
00392 cout << "avg. list len " << (float)sumlen/alloced <<
00393 " max len " << maxlen << endl;
00394
00395 cout << "Estimated avg. list len " << (float)n_elements/n_buckets << endl;
00396 }
00397
00398 #endif
00399
00400
00401
00402