libalpm
Arch Linux Package Manager Library
pkghash.c
Go to the documentation of this file.
00001 /*
00002  *  pkghash.c
00003  *
00004  *  Copyright (c) 2011 Pacman Development Team <pacman-dev@archlinux.org>
00005  *
00006  *  This program is free software; you can redistribute it and/or modify
00007  *  it under the terms of the GNU General Public License as published by
00008  *  the Free Software Foundation; either version 2 of the License, or
00009  *  (at your option) any later version.
00010  *
00011  *  This program is distributed in the hope that it will be useful,
00012  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  *  GNU General Public License for more details.
00015  *
00016  *  You should have received a copy of the GNU General Public License
00017  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
00018  */
00019 
00020 #include <errno.h>
00021 
00022 #include "pkghash.h"
00023 #include "util.h"
00024 
00025 /* List of primes for possible sizes of hash tables.
00026  *
00027  * The maximum table size is the last prime under 1,000,000.  That is
00028  * more than an order of magnitude greater than the number of packages
00029  * in any Linux distribution, and well under UINT_MAX.
00030  */
00031 static const unsigned int prime_list[] =
00032 {
00033     11u, 13u, 17u, 19u, 23u, 29u, 31u, 37u, 41u, 43u, 47u,
00034     53u, 59u, 61u, 67u, 71u, 73u, 79u, 83u, 89u, 97u, 103u,
00035     109u, 113u, 127u, 137u, 139u, 149u, 157u, 167u, 179u, 193u,
00036     199u, 211u, 227u, 241u, 257u, 277u, 293u, 313u, 337u, 359u,
00037     383u, 409u, 439u, 467u, 503u, 541u, 577u, 619u, 661u, 709u,
00038     761u, 823u, 887u, 953u, 1031u, 1109u, 1193u, 1289u, 1381u,
00039     1493u, 1613u, 1741u, 1879u, 2029u, 2179u, 2357u, 2549u,
00040     2753u, 2971u, 3209u, 3469u, 3739u, 4027u, 4349u, 4703u,
00041     5087u, 5503u, 5953u, 6427u, 6949u, 7517u, 8123u, 8783u,
00042     9497u, 10273u, 11113u, 12011u, 12983u, 14033u, 15173u,
00043     16411u, 17749u, 19183u, 20753u, 22447u, 24281u, 26267u,
00044     28411u, 30727u, 33223u, 35933u, 38873u, 42043u, 45481u,
00045     49201u, 53201u, 57557u, 62233u, 67307u, 72817u, 78779u,
00046     85229u, 92203u, 99733u, 107897u, 116731u, 126271u, 136607u,
00047     147793u, 159871u, 172933u, 187091u, 202409u, 218971u, 236897u,
00048     256279u, 277261u, 299951u, 324503u, 351061u, 379787u, 410857u,
00049     444487u, 480881u, 520241u, 562841u, 608903u, 658753u, 712697u,
00050     771049u, 834181u, 902483u, 976369u
00051 };
00052 
00053 /* How far forward do we look when linear probing for a spot? */
00054 static const unsigned int stride = 1;
00055 /* What is the maximum load percentage of our hash table? */
00056 static const double max_hash_load = 0.68;
00057 /* Initial load percentage given a certain size */
00058 static const double initial_hash_load = 0.58;
00059 
00060 /* Allocate a hash table with space for at least "size" elements */
00061 alpm_pkghash_t *_alpm_pkghash_create(unsigned int size)
00062 {
00063     alpm_pkghash_t *hash = NULL;
00064     unsigned int i, loopsize;
00065 
00066     CALLOC(hash, 1, sizeof(alpm_pkghash_t), return NULL);
00067     size = size / initial_hash_load + 1;
00068 
00069     loopsize = sizeof(prime_list) / sizeof(*prime_list);
00070     for(i = 0; i < loopsize; i++) {
00071         if(prime_list[i] > size) {
00072             hash->buckets = prime_list[i];
00073             hash->limit = hash->buckets * max_hash_load;
00074             break;
00075         }
00076     }
00077 
00078     if(hash->buckets < size) {
00079         errno = ERANGE;
00080         free(hash);
00081         return NULL;
00082     }
00083 
00084     CALLOC(hash->hash_table, hash->buckets, sizeof(alpm_list_t *), \
00085                 free(hash); return NULL);
00086 
00087     return hash;
00088 }
00089 
00090 static unsigned int get_hash_position(unsigned long name_hash,
00091         alpm_pkghash_t *hash)
00092 {
00093     unsigned int position;
00094 
00095     position = name_hash % hash->buckets;
00096 
00097     /* collision resolution using open addressing with linear probing */
00098     while(hash->hash_table[position] != NULL) {
00099         position += stride;
00100         while(position >= hash->buckets) {
00101             position -= hash->buckets;
00102         }
00103     }
00104 
00105     return position;
00106 }
00107 
00108 /* Expand the hash table size to the next increment and rebin the entries */
00109 static alpm_pkghash_t *rehash(alpm_pkghash_t *oldhash)
00110 {
00111     alpm_pkghash_t *newhash;
00112     unsigned int newsize, i;
00113 
00114     /* Hash tables will need resized in two cases:
00115      *  - adding packages to the local database
00116      *  - poor estimation of the number of packages in sync database
00117      *
00118      * For small hash tables sizes (<500) the increase in size is by a
00119      * minimum of a factor of 2 for optimal rehash efficiency.  For
00120      * larger database sizes, this increase is reduced to avoid excess
00121      * memory allocation as both scenarios requiring a rehash should not
00122      * require a table size increase that large. */
00123     if(oldhash->buckets < 500) {
00124         newsize = oldhash->buckets * 2;
00125     } else if(oldhash->buckets < 2000) {
00126         newsize = oldhash->buckets * 3 / 2;
00127     } else if(oldhash->buckets < 5000) {
00128         newsize = oldhash->buckets * 4 / 3;
00129     } else {
00130         newsize = oldhash->buckets + 1;
00131     }
00132 
00133     newhash = _alpm_pkghash_create(newsize);
00134     if(newhash == NULL) {
00135         /* creation of newhash failed, stick with old one... */
00136         return oldhash;
00137     }
00138 
00139     newhash->list = oldhash->list;
00140     oldhash->list = NULL;
00141 
00142     for(i = 0; i < oldhash->buckets; i++) {
00143         if(oldhash->hash_table[i] != NULL) {
00144             alpm_pkg_t *package = oldhash->hash_table[i]->data;
00145             unsigned int position = get_hash_position(package->name_hash, newhash);
00146 
00147             newhash->hash_table[position] = oldhash->hash_table[i];
00148             oldhash->hash_table[i] = NULL;
00149         }
00150     }
00151 
00152     newhash->entries = oldhash->entries;
00153 
00154     _alpm_pkghash_free(oldhash);
00155 
00156     return newhash;
00157 }
00158 
00159 static alpm_pkghash_t *pkghash_add_pkg(alpm_pkghash_t *hash, alpm_pkg_t *pkg,
00160         int sorted)
00161 {
00162     alpm_list_t *ptr;
00163     unsigned int position;
00164 
00165     if(pkg == NULL || hash == NULL) {
00166         return hash;
00167     }
00168 
00169     if(hash->entries >= hash->limit) {
00170         hash = rehash(hash);
00171     }
00172 
00173     position = get_hash_position(pkg->name_hash, hash);
00174 
00175     ptr = malloc(sizeof(alpm_list_t));
00176     if(ptr == NULL) {
00177         return hash;
00178     }
00179 
00180     ptr->data = pkg;
00181     ptr->prev = ptr;
00182     ptr->next = NULL;
00183 
00184     hash->hash_table[position] = ptr;
00185     if(!sorted) {
00186         hash->list = alpm_list_join(hash->list, ptr);
00187     } else {
00188         hash->list = alpm_list_mmerge(hash->list, ptr, _alpm_pkg_cmp);
00189     }
00190 
00191     hash->entries += 1;
00192     return hash;
00193 }
00194 
00195 alpm_pkghash_t *_alpm_pkghash_add(alpm_pkghash_t *hash, alpm_pkg_t *pkg)
00196 {
00197     return pkghash_add_pkg(hash, pkg, 0);
00198 }
00199 
00200 alpm_pkghash_t *_alpm_pkghash_add_sorted(alpm_pkghash_t *hash, alpm_pkg_t *pkg)
00201 {
00202     return pkghash_add_pkg(hash, pkg, 1);
00203 }
00204 
00205 static unsigned int move_one_entry(alpm_pkghash_t *hash,
00206         unsigned int start, unsigned int end)
00207 {
00208     /* Iterate backwards from 'end' to 'start', seeing if any of the items
00209      * would hash to 'start'. If we find one, we move it there and break.  If
00210      * we get all the way back to position and find none that hash to it, we
00211      * also end iteration. Iterating backwards helps prevent needless shuffles;
00212      * we will never need to move more than one item per function call.  The
00213      * return value is our current iteration location; if this is equal to
00214      * 'start' we can stop this madness. */
00215     while(end != start) {
00216         alpm_list_t *i = hash->hash_table[end];
00217         alpm_pkg_t *info = i->data;
00218         unsigned int new_position = get_hash_position(info->name_hash, hash);
00219 
00220         if(new_position == start) {
00221             hash->hash_table[start] = i;
00222             hash->hash_table[end] = NULL;
00223             break;
00224         }
00225 
00226         /* the odd math ensures we are always positive, e.g.
00227          * e.g. (0 - 1) % 47      == -1
00228          * e.g. (47 + 0 - 1) % 47 == 46 */
00229         end = (hash->buckets + end - stride) % hash->buckets;
00230     }
00231     return end;
00232 }
00233 
00234 /**
00235  * @brief Remove a package from a pkghash.
00236  *
00237  * @param hash     the hash to remove the package from
00238  * @param pkg      the package we are removing
00239  * @param data     output parameter containing the removed item
00240  *
00241  * @return the resultant hash
00242  */
00243 alpm_pkghash_t *_alpm_pkghash_remove(alpm_pkghash_t *hash, alpm_pkg_t *pkg,
00244         alpm_pkg_t **data)
00245 {
00246     alpm_list_t *i;
00247     unsigned int position;
00248 
00249     if(data) {
00250         *data = NULL;
00251     }
00252 
00253     if(pkg == NULL || hash == NULL) {
00254         return hash;
00255     }
00256 
00257     position = pkg->name_hash % hash->buckets;
00258     while((i = hash->hash_table[position]) != NULL) {
00259         alpm_pkg_t *info = i->data;
00260 
00261         if(info->name_hash == pkg->name_hash &&
00262                     strcmp(info->name, pkg->name) == 0) {
00263             unsigned int stop, prev;
00264 
00265             /* remove from list and hash */
00266             hash->list = alpm_list_remove_item(hash->list, i);
00267             if(data) {
00268                 *data = info;
00269             }
00270             hash->hash_table[position] = NULL;
00271             free(i);
00272             hash->entries -= 1;
00273 
00274             /* Potentially move entries following removed entry to keep open
00275              * addressing collision resolution working. We start by finding the
00276              * next null bucket to know how far we have to look. */
00277             stop = position + stride;
00278             while(stop >= hash->buckets) {
00279                 stop -= hash->buckets;
00280             }
00281             while(hash->hash_table[stop] != NULL && stop != position) {
00282                 stop += stride;
00283                 while(stop >= hash->buckets) {
00284                     stop -= hash->buckets;
00285                 }
00286             }
00287             stop = (hash->buckets + stop - stride) % hash->buckets;
00288 
00289             /* We now search backwards from stop to position. If we find an
00290              * item that now hashes to position, we will move it, and then try
00291              * to plug the new hole we just opened up, until we finally don't
00292              * move anything. */
00293             while((prev = move_one_entry(hash, position, stop)) != position) {
00294                 position = prev;
00295             }
00296 
00297             return hash;
00298         }
00299 
00300         position += stride;
00301         while(position >= hash->buckets) {
00302             position -= hash->buckets;
00303         }
00304     }
00305 
00306     return hash;
00307 }
00308 
00309 void _alpm_pkghash_free(alpm_pkghash_t *hash)
00310 {
00311     if(hash != NULL) {
00312         unsigned int i;
00313         for(i = 0; i < hash->buckets; i++) {
00314             free(hash->hash_table[i]);
00315         }
00316         free(hash->hash_table);
00317     }
00318     free(hash);
00319 }
00320 
00321 alpm_pkg_t *_alpm_pkghash_find(alpm_pkghash_t *hash, const char *name)
00322 {
00323     alpm_list_t *lp;
00324     unsigned long name_hash;
00325     unsigned int position;
00326 
00327     if(name == NULL || hash == NULL) {
00328         return NULL;
00329     }
00330 
00331     name_hash = _alpm_hash_sdbm(name);
00332 
00333     position = name_hash % hash->buckets;
00334 
00335     while((lp = hash->hash_table[position]) != NULL) {
00336         alpm_pkg_t *info = lp->data;
00337 
00338         if(info->name_hash == name_hash && strcmp(info->name, name) == 0) {
00339             return info;
00340         }
00341 
00342         position += stride;
00343         while(position >= hash->buckets) {
00344             position -= hash->buckets;
00345         }
00346     }
00347 
00348     return NULL;
00349 }
00350 
00351 /* vim: set ts=2 sw=2 noet: */