libalpm
Arch Linux Package Manager Library
|
00001 /* 00002 * pkghash.c 00003 * 00004 * Copyright (c) 2011 Pacman Development Team <pacman-dev@archlinux.org> 00005 * 00006 * This program is free software; you can redistribute it and/or modify 00007 * it under the terms of the GNU General Public License as published by 00008 * the Free Software Foundation; either version 2 of the License, or 00009 * (at your option) any later version. 00010 * 00011 * This program is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 * GNU General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU General Public License 00017 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00018 */ 00019 00020 #include <errno.h> 00021 00022 #include "pkghash.h" 00023 #include "util.h" 00024 00025 /* List of primes for possible sizes of hash tables. 00026 * 00027 * The maximum table size is the last prime under 1,000,000. That is 00028 * more than an order of magnitude greater than the number of packages 00029 * in any Linux distribution, and well under UINT_MAX. 00030 */ 00031 static const unsigned int prime_list[] = 00032 { 00033 11u, 13u, 17u, 19u, 23u, 29u, 31u, 37u, 41u, 43u, 47u, 00034 53u, 59u, 61u, 67u, 71u, 73u, 79u, 83u, 89u, 97u, 103u, 00035 109u, 113u, 127u, 137u, 139u, 149u, 157u, 167u, 179u, 193u, 00036 199u, 211u, 227u, 241u, 257u, 277u, 293u, 313u, 337u, 359u, 00037 383u, 409u, 439u, 467u, 503u, 541u, 577u, 619u, 661u, 709u, 00038 761u, 823u, 887u, 953u, 1031u, 1109u, 1193u, 1289u, 1381u, 00039 1493u, 1613u, 1741u, 1879u, 2029u, 2179u, 2357u, 2549u, 00040 2753u, 2971u, 3209u, 3469u, 3739u, 4027u, 4349u, 4703u, 00041 5087u, 5503u, 5953u, 6427u, 6949u, 7517u, 8123u, 8783u, 00042 9497u, 10273u, 11113u, 12011u, 12983u, 14033u, 15173u, 00043 16411u, 17749u, 19183u, 20753u, 22447u, 24281u, 26267u, 00044 28411u, 30727u, 33223u, 35933u, 38873u, 42043u, 45481u, 00045 49201u, 53201u, 57557u, 62233u, 67307u, 72817u, 78779u, 00046 85229u, 92203u, 99733u, 107897u, 116731u, 126271u, 136607u, 00047 147793u, 159871u, 172933u, 187091u, 202409u, 218971u, 236897u, 00048 256279u, 277261u, 299951u, 324503u, 351061u, 379787u, 410857u, 00049 444487u, 480881u, 520241u, 562841u, 608903u, 658753u, 712697u, 00050 771049u, 834181u, 902483u, 976369u 00051 }; 00052 00053 /* How far forward do we look when linear probing for a spot? */ 00054 static const unsigned int stride = 1; 00055 /* What is the maximum load percentage of our hash table? */ 00056 static const double max_hash_load = 0.68; 00057 /* Initial load percentage given a certain size */ 00058 static const double initial_hash_load = 0.58; 00059 00060 /* Allocate a hash table with space for at least "size" elements */ 00061 alpm_pkghash_t *_alpm_pkghash_create(unsigned int size) 00062 { 00063 alpm_pkghash_t *hash = NULL; 00064 unsigned int i, loopsize; 00065 00066 CALLOC(hash, 1, sizeof(alpm_pkghash_t), return NULL); 00067 size = size / initial_hash_load + 1; 00068 00069 loopsize = sizeof(prime_list) / sizeof(*prime_list); 00070 for(i = 0; i < loopsize; i++) { 00071 if(prime_list[i] > size) { 00072 hash->buckets = prime_list[i]; 00073 hash->limit = hash->buckets * max_hash_load; 00074 break; 00075 } 00076 } 00077 00078 if(hash->buckets < size) { 00079 errno = ERANGE; 00080 free(hash); 00081 return NULL; 00082 } 00083 00084 CALLOC(hash->hash_table, hash->buckets, sizeof(alpm_list_t *), \ 00085 free(hash); return NULL); 00086 00087 return hash; 00088 } 00089 00090 static unsigned int get_hash_position(unsigned long name_hash, 00091 alpm_pkghash_t *hash) 00092 { 00093 unsigned int position; 00094 00095 position = name_hash % hash->buckets; 00096 00097 /* collision resolution using open addressing with linear probing */ 00098 while(hash->hash_table[position] != NULL) { 00099 position += stride; 00100 while(position >= hash->buckets) { 00101 position -= hash->buckets; 00102 } 00103 } 00104 00105 return position; 00106 } 00107 00108 /* Expand the hash table size to the next increment and rebin the entries */ 00109 static alpm_pkghash_t *rehash(alpm_pkghash_t *oldhash) 00110 { 00111 alpm_pkghash_t *newhash; 00112 unsigned int newsize, i; 00113 00114 /* Hash tables will need resized in two cases: 00115 * - adding packages to the local database 00116 * - poor estimation of the number of packages in sync database 00117 * 00118 * For small hash tables sizes (<500) the increase in size is by a 00119 * minimum of a factor of 2 for optimal rehash efficiency. For 00120 * larger database sizes, this increase is reduced to avoid excess 00121 * memory allocation as both scenarios requiring a rehash should not 00122 * require a table size increase that large. */ 00123 if(oldhash->buckets < 500) { 00124 newsize = oldhash->buckets * 2; 00125 } else if(oldhash->buckets < 2000) { 00126 newsize = oldhash->buckets * 3 / 2; 00127 } else if(oldhash->buckets < 5000) { 00128 newsize = oldhash->buckets * 4 / 3; 00129 } else { 00130 newsize = oldhash->buckets + 1; 00131 } 00132 00133 newhash = _alpm_pkghash_create(newsize); 00134 if(newhash == NULL) { 00135 /* creation of newhash failed, stick with old one... */ 00136 return oldhash; 00137 } 00138 00139 newhash->list = oldhash->list; 00140 oldhash->list = NULL; 00141 00142 for(i = 0; i < oldhash->buckets; i++) { 00143 if(oldhash->hash_table[i] != NULL) { 00144 alpm_pkg_t *package = oldhash->hash_table[i]->data; 00145 unsigned int position = get_hash_position(package->name_hash, newhash); 00146 00147 newhash->hash_table[position] = oldhash->hash_table[i]; 00148 oldhash->hash_table[i] = NULL; 00149 } 00150 } 00151 00152 newhash->entries = oldhash->entries; 00153 00154 _alpm_pkghash_free(oldhash); 00155 00156 return newhash; 00157 } 00158 00159 static alpm_pkghash_t *pkghash_add_pkg(alpm_pkghash_t *hash, alpm_pkg_t *pkg, 00160 int sorted) 00161 { 00162 alpm_list_t *ptr; 00163 unsigned int position; 00164 00165 if(pkg == NULL || hash == NULL) { 00166 return hash; 00167 } 00168 00169 if(hash->entries >= hash->limit) { 00170 hash = rehash(hash); 00171 } 00172 00173 position = get_hash_position(pkg->name_hash, hash); 00174 00175 ptr = malloc(sizeof(alpm_list_t)); 00176 if(ptr == NULL) { 00177 return hash; 00178 } 00179 00180 ptr->data = pkg; 00181 ptr->prev = ptr; 00182 ptr->next = NULL; 00183 00184 hash->hash_table[position] = ptr; 00185 if(!sorted) { 00186 hash->list = alpm_list_join(hash->list, ptr); 00187 } else { 00188 hash->list = alpm_list_mmerge(hash->list, ptr, _alpm_pkg_cmp); 00189 } 00190 00191 hash->entries += 1; 00192 return hash; 00193 } 00194 00195 alpm_pkghash_t *_alpm_pkghash_add(alpm_pkghash_t *hash, alpm_pkg_t *pkg) 00196 { 00197 return pkghash_add_pkg(hash, pkg, 0); 00198 } 00199 00200 alpm_pkghash_t *_alpm_pkghash_add_sorted(alpm_pkghash_t *hash, alpm_pkg_t *pkg) 00201 { 00202 return pkghash_add_pkg(hash, pkg, 1); 00203 } 00204 00205 static unsigned int move_one_entry(alpm_pkghash_t *hash, 00206 unsigned int start, unsigned int end) 00207 { 00208 /* Iterate backwards from 'end' to 'start', seeing if any of the items 00209 * would hash to 'start'. If we find one, we move it there and break. If 00210 * we get all the way back to position and find none that hash to it, we 00211 * also end iteration. Iterating backwards helps prevent needless shuffles; 00212 * we will never need to move more than one item per function call. The 00213 * return value is our current iteration location; if this is equal to 00214 * 'start' we can stop this madness. */ 00215 while(end != start) { 00216 alpm_list_t *i = hash->hash_table[end]; 00217 alpm_pkg_t *info = i->data; 00218 unsigned int new_position = get_hash_position(info->name_hash, hash); 00219 00220 if(new_position == start) { 00221 hash->hash_table[start] = i; 00222 hash->hash_table[end] = NULL; 00223 break; 00224 } 00225 00226 /* the odd math ensures we are always positive, e.g. 00227 * e.g. (0 - 1) % 47 == -1 00228 * e.g. (47 + 0 - 1) % 47 == 46 */ 00229 end = (hash->buckets + end - stride) % hash->buckets; 00230 } 00231 return end; 00232 } 00233 00234 /** 00235 * @brief Remove a package from a pkghash. 00236 * 00237 * @param hash the hash to remove the package from 00238 * @param pkg the package we are removing 00239 * @param data output parameter containing the removed item 00240 * 00241 * @return the resultant hash 00242 */ 00243 alpm_pkghash_t *_alpm_pkghash_remove(alpm_pkghash_t *hash, alpm_pkg_t *pkg, 00244 alpm_pkg_t **data) 00245 { 00246 alpm_list_t *i; 00247 unsigned int position; 00248 00249 if(data) { 00250 *data = NULL; 00251 } 00252 00253 if(pkg == NULL || hash == NULL) { 00254 return hash; 00255 } 00256 00257 position = pkg->name_hash % hash->buckets; 00258 while((i = hash->hash_table[position]) != NULL) { 00259 alpm_pkg_t *info = i->data; 00260 00261 if(info->name_hash == pkg->name_hash && 00262 strcmp(info->name, pkg->name) == 0) { 00263 unsigned int stop, prev; 00264 00265 /* remove from list and hash */ 00266 hash->list = alpm_list_remove_item(hash->list, i); 00267 if(data) { 00268 *data = info; 00269 } 00270 hash->hash_table[position] = NULL; 00271 free(i); 00272 hash->entries -= 1; 00273 00274 /* Potentially move entries following removed entry to keep open 00275 * addressing collision resolution working. We start by finding the 00276 * next null bucket to know how far we have to look. */ 00277 stop = position + stride; 00278 while(stop >= hash->buckets) { 00279 stop -= hash->buckets; 00280 } 00281 while(hash->hash_table[stop] != NULL && stop != position) { 00282 stop += stride; 00283 while(stop >= hash->buckets) { 00284 stop -= hash->buckets; 00285 } 00286 } 00287 stop = (hash->buckets + stop - stride) % hash->buckets; 00288 00289 /* We now search backwards from stop to position. If we find an 00290 * item that now hashes to position, we will move it, and then try 00291 * to plug the new hole we just opened up, until we finally don't 00292 * move anything. */ 00293 while((prev = move_one_entry(hash, position, stop)) != position) { 00294 position = prev; 00295 } 00296 00297 return hash; 00298 } 00299 00300 position += stride; 00301 while(position >= hash->buckets) { 00302 position -= hash->buckets; 00303 } 00304 } 00305 00306 return hash; 00307 } 00308 00309 void _alpm_pkghash_free(alpm_pkghash_t *hash) 00310 { 00311 if(hash != NULL) { 00312 unsigned int i; 00313 for(i = 0; i < hash->buckets; i++) { 00314 free(hash->hash_table[i]); 00315 } 00316 free(hash->hash_table); 00317 } 00318 free(hash); 00319 } 00320 00321 alpm_pkg_t *_alpm_pkghash_find(alpm_pkghash_t *hash, const char *name) 00322 { 00323 alpm_list_t *lp; 00324 unsigned long name_hash; 00325 unsigned int position; 00326 00327 if(name == NULL || hash == NULL) { 00328 return NULL; 00329 } 00330 00331 name_hash = _alpm_hash_sdbm(name); 00332 00333 position = name_hash % hash->buckets; 00334 00335 while((lp = hash->hash_table[position]) != NULL) { 00336 alpm_pkg_t *info = lp->data; 00337 00338 if(info->name_hash == name_hash && strcmp(info->name, name) == 0) { 00339 return info; 00340 } 00341 00342 position += stride; 00343 while(position >= hash->buckets) { 00344 position -= hash->buckets; 00345 } 00346 } 00347 00348 return NULL; 00349 } 00350 00351 /* vim: set ts=2 sw=2 noet: */