Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members

hash.h

Go to the documentation of this file.
00001 /*---------------------------------------------------------------------\
00002 |                                                                      |
00003 |                      __   __    ____ _____ ____                      |
00004 |                      \ \ / /_ _/ ___|_   _|___ \                     |
00005 |                       \ V / _` \___ \ | |   __) |                    |
00006 |                        | | (_| |___) || |  / __/                     |
00007 |                        |_|\__,_|____/ |_| |_____|                    |
00008 |                                                                      |
00009 |                               core system                            |
00010 |                                                     (C) 2002 SuSE AG |
00011 \----------------------------------------------------------------------/
00012 
00013    File:       hash.h
00014    Purpose:    declare hash functions, see PHI docu
00015    Author:     Roman Hodek <roman.hodek@informatik.uni-erlangen.de>
00016    Maintainer: Ludwig Nussel <lnussel@suse.de>
00017 
00018    this file is taken from PHI
00019 
00020 /-*/
00021 
00022 #ifndef _PkgDb_hash_h                                        /* -*- C++ -*- */
00023 #define _PkgDb_hash_h
00024 
00025 #include <list>
00026 #include <vector>
00027 #include <string>
00028 #include <assert.h>
00029 
00030 const size_t primes[] = { 31, 101, 503, 1009, 2003, 5003 };
00031 
00032 inline size_t hashfun( unsigned p ) { return size_t(p); };
00033 size_t hashfun( const std::string& s );
00034 size_t hashfun( std::string& s );
00035 size_t hashfun( const char * p );
00036 
00037 template<class Key>
00038 struct basicHashElt {
00039         basicHashElt *next;
00040         Key key;
00041         basicHashElt(const Key& k) : next(0), key(k) {};
00042 };
00043 
00044 template<class Key, class list_elem>
00045 class basic_hash {
00046 public:
00047         typedef size_t size_type;
00048         typedef list_elem list_type;
00049         typedef std::vector<list_elem*> vector_type;
00050         typedef size_type (*hashfun_t)( const Key& );
00051 
00052         template<class le>
00053         class hash_iterator {
00054                 friend class basic_hash<Key, list_elem>;
00055                 typedef std::forward_iterator_tag iterator_category;
00056           private:
00057                 le *current;
00058                 size_type adr;
00059                 const vector_type *vec;
00060 
00061           public:
00062                 hash_iterator() : vec(0) {}
00063                 hash_iterator( le *c, size_type a, const vector_type *v )
00064                         : current(c), adr(a), vec(v) {}
00065                 // make it possible to convert an iterator to a const_iterator
00066                 hash_iterator( const hash_iterator<list_elem>& i2 ) {
00067                         const hash_iterator& i3 = (const hash_iterator&)i2;
00068                         current = i3.current;
00069                         adr = i3.adr;
00070                         vec = i3.vec;
00071                 }
00072 
00073                 // The following operators allow to test a hash in a condition if the
00074                 // iterator is defined
00075                 operator const void* () const { return vec; }
00076                 bool operator! () const { return vec == 0; }
00077 
00078                 le& operator* () const {
00079                         assert(vec);
00080                         return *current;
00081                 }
00082                 le* operator-> () const {
00083                         assert(vec);
00084                         return &*current;
00085                 }
00086 
00087            hash_iterator& operator++ () {
00088                    current = (le *)current->next;
00089                    // If -- after stepping forth in the list -- we've reached the end,
00090                    // we have to go to the next bucket.
00091                    if (!current) {
00092                            while (++adr < vec->size())
00093                                    if ((current = (*vec)[adr]))
00094                                            break;
00095                            if (adr == vec->size())
00096                                    // end of vector reached
00097                                    vec = 0;
00098                    }
00099                    return *this;
00100            }
00101            hash_iterator operator++ (int) {
00102                    iterator temp = *this;
00103                    operator++();
00104                    return temp;
00105            }
00106 
00107            bool operator== ( const hash_iterator& x ) const {
00108                    return (!vec && !x.vec) ||
00109                                   (vec && x.vec && current == x.current);
00110            }
00111 
00112            bool operator!= ( const hash_iterator& x ) const {
00113                   return !operator==(x);
00114            }
00115         }; // iterator
00116 
00117         typedef hash_iterator<list_elem> iterator;
00118         typedef hash_iterator<const list_elem> const_iterator;
00119         friend class iterator;
00120         friend class const_iterator;
00121 
00122 protected:
00123         vector_type v;
00124         size_type vsize;      // # of buckets (vector size)
00125         size_type n_elements; // # of stored elements
00126         size_type n_buckets;  // # of used buckets
00127         hashfun_t hf;
00128 private:
00129 
00130         // helper called from copy constructor and operator=
00131         void construct( const basic_hash& S ) {
00132                 hf = S.hf;
00133                 vsize = S.vsize;
00134                 v = vector_type( vsize, 0 );
00135                 n_elements = 0;
00136                 n_buckets = 0;
00137                 for( iterator t = S.begin(); t != S.end(); ++t )
00138                         insert( t );
00139         }
00140 
00141         void resize( size_type new_size ) {
00142                 basic_hash newhash(new_size, hf);
00143                 for( iterator t = begin(); t != end(); ++t )
00144                         newhash.insert( t );
00145                 swap( newhash );
00146         }
00147 
00148         size_type next_size() {
00149                 unsigned i;
00150                 for( i = 0; i < sizeof(primes)/sizeof(*primes)-1; ++i ) {
00151                         if (primes[i] > vsize)
00152                                 break;
00153                 }
00154                 return primes[i];
00155         }
00156 
00157         bool resize_if_needed() {
00158                 // resize hash if average list length exceeds 1.8 = 9/5
00159                 if (5*n_elements >= 9*n_buckets) {
00160                         resize( next_size() );
00161                         return true;
00162                 }
00163                 return false;
00164         }
00165 
00166   public:
00167         basic_hash( size_type size = 31, hashfun_t f = hashfun ) :
00168                 v(size, 0), vsize(size), n_elements(0), n_buckets(0), hf(f) {}
00169         basic_hash( const basic_hash& S ) { construct(S); }
00170         ~basic_hash() { clear(); }
00171         basic_hash& operator= ( const basic_hash& S ) {
00172                 if (this != &S) {
00173                         clear();
00174                         construct(S);
00175                 }
00176                 return *this;
00177         }
00178 
00179         iterator begin() {
00180                 size_type adr = 0;
00181                 while( adr < vsize ) {
00182                         if (!v[adr])
00183                                 ++adr;
00184                         else
00185                                 return iterator(v[adr], adr, &v);
00186                 }
00187                 return iterator();
00188         }
00189         iterator end() { return iterator(); }
00190         const_iterator const_begin() const {
00191                 size_type adr = 0;
00192                 while( adr < vsize ) {
00193                         if (!v[adr])
00194                                 ++adr;
00195                         else
00196                                 return const_iterator(v[adr], adr, &v);
00197                 }
00198                 return const_iterator();
00199         }
00200         const_iterator begin() const { return const_begin(); }
00201         const_iterator const_end() const { return const_iterator(); }
00202         const_iterator end() const { return const_end(); }
00203 
00204         // destruct all contents
00205         void clear() {
00206                 for( size_type i = 0; i < vsize; i++ ) {
00207                         if (list_elem *p = v[i]) {
00208                                 list_elem *pnext;
00209                                 while( p ) {
00210                                         pnext = (list_elem *)p->next;
00211                                         delete p;
00212                                         p = pnext;
00213                                 }
00214                                 v[i] = 0;
00215                         }
00216                 }
00217                 n_elements = 0;
00218                 n_buckets = 0;
00219         }
00220 
00221         list_elem *find( const Key& k, list_elem ***where = 0 ) {
00222                 size_type adr = hf(k) % vsize;
00223                 list_elem **p = &v[adr];
00224                 if (!*p) {
00225                         if (where) {
00226                                 *where = p;
00227                                 ++n_buckets;
00228                         }
00229                         return 0;
00230                 }
00231                 for( ; *p; p = (list_elem **)&((*p)->next) ) {
00232                         if ((*p)->key == k)
00233                                 return *p;
00234                 }
00235                 if (where) *where = p;
00236                 return 0;
00237         }
00238 
00239         const list_elem *find( const Key& k ) const {
00240                 return (const_cast<basic_hash<Key,list_elem>*>(this)->find(k));
00241         }
00242 
00243         bool exists( const Key& k ) const {
00244                 return (const_cast<basic_hash<Key,list_elem>*>(this)->find(k) != 0);
00245         };
00246 
00247         // insert methods for compatibility with STL
00248         list_elem* insert( const Key& k ) {
00249                 list_elem **p;
00250                 list_elem *q;
00251                 if ((q = find( k, &p )))
00252                         return q;
00253                 else {
00254                         *p = new list_elem(k);
00255                         ++n_elements;
00256                         return *p;
00257                 }
00258         }
00259         list_elem* insert( const list_elem *e ) { return insert( e->k ); }
00260         list_elem* insert( const iterator& i )  { return insert( i->k ); }
00261 
00262         bool erase( const Key& k ) {
00263                 size_type adr = hf(k) % vsize;
00264                 list_elem **p = &v[adr];
00265                 if (!*p)
00266                         return false;
00267                 for( ; *p; p = (list_elem **)&((*p)->next) ) {
00268                         if ((*p)->key == k) {
00269                                 list_elem *q = *p;
00270                                 *p = (list_elem *)q->next;
00271                                 delete q;
00272                                 --n_elements;
00273                                 if (!*p) --n_buckets;
00274                                 return true;
00275                         }
00276                 }
00277                 return false;
00278         }
00279         bool erase( const iterator& i ) { return erase( i->k ); }
00280 
00281         size_type size()         const { return n_elements; }
00282         size_type max_size() const { return vsize; }
00283         bool empty()             const { return n_elements == 0; }
00284         void swap( basic_hash& s ) {
00285            v.swap(s.v);
00286            std::swap(n_elements, s.n_elements);
00287            std::swap(n_buckets, s.n_buckets);
00288            std::swap(hf, s.hf);
00289            std::swap(vsize, s.vsize);
00290         }
00291 
00292         void statistics();
00293 };
00294 
00295 
00296 template<class Key>
00297 class noval_hash : public basic_hash<Key,basicHashElt<Key> > {};
00298 
00299 template<class Key, class T>
00300 struct HashElt : public basicHashElt<Key> {
00301         T value;
00302         HashElt(const Key& k, const T& v) : basicHashElt<Key>(k), value(v) {};
00303 };
00304 
00305 template<class Key, class T>
00306 class hash : public basic_hash<Key,HashElt<Key,T> > {
00307         typedef basicHashElt<Key> list_elem;
00308         typedef HashElt<Key,T> list_type;
00309 
00310         // helper called from copy constructor and operator=
00311         void construct( const hash& S ) {
00312                 hf = S.hf;
00313                 vsize = S.vsize;
00314                 v = vector_type( vsize, 0 );
00315                 n_elements = 0;
00316                 n_buckets = 0;
00317                 for( typename basic_hash<Key,HashElt<Key,T> >::const_iterator t = S.begin(); t != S.end(); ++t )
00318                         insert( t );
00319         }
00320 
00321         void resize( typename basic_hash<Key,HashElt<Key,T> >::size_type new_size ) {
00322                 hash newhash(new_size, hf);
00323                 for( typename basic_hash<Key,HashElt<Key,T> >::const_iterator t = begin(); t != end(); ++t )
00324                         newhash.insert( t );
00325                 swap( newhash );
00326         }
00327 
00328 public:
00329         hash( typename basic_hash<Key,HashElt<Key,T> >::size_type size = 31, typename basic_hash<Key,HashElt<Key,T> >::hashfun_t f = hashfun ) :
00330                 basic_hash<Key,HashElt<Key,T> >( size, hashfun ) {};
00331         hash( const hash& S ) { construct(S); }
00332         ~hash() { clear(); }
00333         hash& operator= ( const hash& S ) {
00334                 if (this != &S) {
00335                         clear();
00336                         construct(S);
00337                 }
00338                 return *this;
00339         }
00340 
00341         T& operator[] ( const Key& k ) {
00342                 return insert( k, T() )->value;
00343         }
00344 
00345         T operator[] ( const Key& k ) const {
00346                 const list_type *p = find(k);
00347                 return p ? p->value : T();
00348         }
00349 
00350         list_type* insert( const Key& k, const T& v ) {
00351                 list_type *q;
00352                 list_type **p;
00353                 if ((q = find( k, &p )))
00354                         return q;
00355                 else {
00356                         *p = new list_type(k,v);
00357                         ++n_elements;
00358                         return *p;
00359                 }
00360         }
00361         list_type* insert( const list_type *e ) {
00362                 return insert( e->key, e->value ); }
00363         list_type* insert( const typename hash<Key,T>::iterator& i ) {
00364                 return insert( i->key, i->value ); }
00365         list_type* insert( const typename hash<Key,T>::const_iterator& i ) {
00366                 return insert( i->key, i->value ); }
00367 };
00368 
00369 template <class Key, class list_elem>
00370 void basic_hash<Key, list_elem>::statistics() {
00371         cout << n_elements << " elements in " << n_buckets << "/" << vsize
00372                  << " buckets\n";
00373         unsigned free = 0, alloced = 0, maxlen = 0, sumlen = 0;
00374         size_type maxbuck;
00375 
00376         for( size_type adr = 0; adr < vsize; ++adr ) {
00377                 if (!v[adr])
00378                         ++free;
00379                 else {
00380                         ++alloced;
00381                         size_type len = 0;
00382                         for( list_elem *p = (list_elem*)v[adr]; p; p = (list_elem*)p->next )
00383                                 ++len;
00384                         if (len > maxlen) {
00385                                 maxlen = len;
00386                                 maxbuck = adr;
00387                         }
00388                         sumlen += len;
00389                 }
00390         }
00391         cout << free << " buckets free, " << alloced << " used\n";
00392         cout << "avg. list len " << (float)sumlen/alloced <<
00393                 " max len " << maxlen << endl;
00394 
00395         cout << "Estimated avg. list len " << (float)n_elements/n_buckets << endl;
00396 }
00397 
00398 #endif  /* _PkgDb_hash_h */
00399 
00400 // Local Variables:
00401 // tab-width: 4
00402 // End:

Generated on Thu Feb 23 23:56:10 2006 for liby2util by doxygen 1.3.6