1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 2 /* */ 3 /* This file is part of the class library */ 4 /* SoPlex --- the Sequential object-oriented simPlex. */ 5 /* */ 6 /* Copyright (c) 1996-2023 Zuse Institute Berlin (ZIB) */ 7 /* */ 8 /* Licensed under the Apache License, Version 2.0 (the "License"); */ 9 /* you may not use this file except in compliance with the License. */ 10 /* You may obtain a copy of the License at */ 11 /* */ 12 /* http://www.apache.org/licenses/LICENSE-2.0 */ 13 /* */ 14 /* Unless required by applicable law or agreed to in writing, software */ 15 /* distributed under the License is distributed on an "AS IS" BASIS, */ 16 /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ 17 /* See the License for the specific language governing permissions and */ 18 /* limitations under the License. */ 19 /* */ 20 /* You should have received a copy of the Apache-2.0 license */ 21 /* along with SoPlex; see the file LICENSE. If not email to soplex@zib.de. */ 22 /* */ 23 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 24 25 /**@file datahashtable.h 26 * @brief Generic hash table for data objects. 27 */ 28 #ifndef _DATAHASHTABLE_H_ 29 #define _DATAHASHTABLE_H_ 30 31 #include <iostream> 32 #include <assert.h> 33 #include <limits.h> 34 35 #include "soplex/spxdefines.h" 36 #include "soplex/array.h" 37 38 #define SOPLEX_HASHTABLE_FILLFACTOR 0.7 39 40 namespace soplex 41 { 42 /**@brief Generic hash table for data objects. 43 @ingroup Elementary 44 45 Class DataHashTable provides a generic hash table for 46 \ref DataObjects "Data Objects", 47 i.e., a map that maps arguments called \a HashItems to values called \a Infos. 48 HashItem and Info types are passed as template arguments. HashItems 49 must provide a comparison operator==(). Furthermore, both the HashItem and 50 Info must be data objects in the sense that the assignment operator is 51 equivalent to a <tt>memcpy()</tt> of the structure and no destructor is 52 required. 53 54 The construction of a DataHashTable requires a \em hash \em function that 55 assigns an integer value to every HashItem. Provided this, pairs of a 56 HashItem and a Info can be added to the DataHashTable. No more 57 than one Info can be assigned to the same HashItem at a time. The Info 58 to a HashItem can be accessed through the subscript operator[]() with 59 the Info object as a subscript. 60 61 The maximum number of elemens a DataHashTable can hold can be 62 specified upon construction and may be reset with reMax() later on. 63 Further, a value hash size value is required. This value must be less then 64 the maximum number of elements and must not have a common dominator with 65 the maximum number of elements. If not specified explicitely, it 66 is set automatically to a reasonable value. 67 68 The implementation relies on an array of DataHashTable::Element%s, from 69 now on referred to as elements. Upon construction, all elements are 70 marked as \c FREE. When an entry is added 71 to the DataHashTable, the hash value is computed by calling #m_hashfun 72 for its HashItem. If this array element is unused, it is 73 taken right away. Otherwise, the array index is incremented by 74 the hash size (modulo the element array size()) until an unused element 75 is found. 76 77 Removing elements is simply done by marking it as \c RELEASED. Hence, 78 when searching for an element, the search loop may not stop, when a 79 \c RELEASED element is encountered. However, such an element may be 80 reused when adding a new element to the DataHashTable. 81 82 Further, memory management with resizing of the element array is 83 straight forward. 84 */ 85 template < class HashItem, class Info > 86 class DataHashTable 87 { 88 private: 89 90 //----------------------------------- 91 /**@name Types */ 92 ///@{ 93 /// template class for elements stored in the hash table 94 template < class ElemHashItem, class ElemInfo > 95 class Element 96 { 97 public: 98 /// 99 ElemHashItem item; 100 /// 101 ElemInfo info; 102 /// States of an element 103 enum states 104 { 105 FREE, ///< element has never been used 106 RELEASED, ///< element had been used, but released 107 USED ///< element is in use 108 } stat; 109 }; 110 typedef Element< HashItem, Info > Elem; 111 ///@} 112 113 //----------------------------------- 114 /**@name Data */ 115 ///@{ 116 /// stores all elements of the hash table 117 Array < Elem > m_elem; 118 /// increment added to hash index, if allready used 119 int m_hashsize; 120 /// current number of entries in the hash table 121 int m_used; 122 /// pointer to hash function (mapping: \a HashItem -> int) 123 int (*m_hashfun)(const HashItem*); 124 /// memory is \ref soplex::DataHashTable::reMax() "reMax()"ed by this factor if a new element does't fit 125 Real m_memfactor; 126 /// some prime numbers for fast access 127 int primes[50]; 128 /// number of stored prime numbers 129 int nprimes; 130 131 ///@} 132 133 public: 134 135 //----------------------------------- 136 /**@name Access / modification */ 137 ///@{ 138 /// Is item \p h present in DataHashTable? 139 bool has(const HashItem& h) const 140 { 141 return index(h) >= 0; 142 } 143 144 /// returns const pointer to \a Info of \a HashItem \p h or 0, 145 /// if item is not found. 146 /** Returns a pointer to \a Info component of hash element \p h or a zero 147 * pointer if element \p h is not in the table. 148 */ 149 const Info* get(const HashItem& h) const 150 { 151 int i = index(h); 152 153 return (i >= 0) ? &m_elem[i].info : nullptr; 154 } 155 /// references \a Info of \a HashItem \p h. 156 /** Index operator for accessing the \a Info associated to 157 * \a HashItem \p h. It is required that \p h belongs to the 158 * DataHashTable, otherwise it core dumps. Methods has() or 159 * get() can be used for inquiring wheater \p h belongs to the 160 * DataHashTable or not. 161 */ 162 const Info& operator[](const HashItem& h) const 163 { 164 assert(has(h)); 165 166 return m_elem[index(h)].info; 167 } 168 /// adds a new entry to the hash table. 169 /** Adds a new entry consisting of \a HashItem \p h and \a Info \p info to the 170 * DataHashTable. No entry with HashItem \p h must be in the 171 * DataHashTable yet. After completion, \p info may be accessed via get() or 172 * operator[]() with \p h as parameter. The DataHashTable is #reMax()%ed 173 * if it becomes neccessary. 174 */ 175 void add(const HashItem& h, const Info& info) 176 { 177 assert(!has(h)); 178 179 if(m_used >= m_elem.size() * SOPLEX_HASHTABLE_FILLFACTOR) 180 reMax(int(m_memfactor * m_used) + 1); 181 182 assert(m_used < m_elem.size()); 183 184 decltype(m_elem.size()) i; 185 186 for(i = (*m_hashfun)(&h) % m_elem.size(); 187 m_elem[i].stat == Elem::USED; 188 i = (i + m_hashsize) % m_elem.size()) 189 ; 190 191 assert(m_elem[i].stat != Elem::USED); 192 193 m_elem[i].stat = Elem::USED; 194 m_elem[i].info = info; 195 m_elem[i].item = h; 196 197 m_used++; 198 199 assert(has(h)); 200 } 201 202 /// remove \a HashItem \p h from the DataHashTable. 203 void remove(const HashItem& h) 204 { 205 int i = index(h); 206 207 if(i < 0) 208 return; 209 210 m_elem[i].stat = Elem::RELEASED; 211 m_used--; 212 assert(!has(h)); 213 } 214 215 /// remove all entries from DataHashTable. 216 void clear() 217 { 218 for(int i = 0; i < m_elem.size(); i++) 219 m_elem[i].stat = Elem::FREE; 220 221 m_used = 0; 222 } 223 /// reset size of the DataHashTable. 224 /** Reset the maximum number of elements of a DataHashTable to \p newSize. 225 * However, if \p newSize < #m_used, it is resized to #m_used only. 226 * If \p newHashSize < 1, a new hash size is computed automatically. 227 * Otherwise, the specified value will be taken. 228 */ 229 void reMax(int newSize = -1, int newHashSize = 0) 230 { 231 Array< Elem > save(m_elem); 232 233 m_elem.reSize(newSize < m_used ? m_used : newSize); 234 235 clear(); 236 237 m_hashsize = (newHashSize < 1) ? autoHashSize() : newHashSize; 238 239 for(int i = 0; i < save.size(); i++) 240 if(save[i].stat == Elem::USED) 241 add(save[i].item, save[i].info); 242 } 243 ///@} 244 245 //----------------------------------- 246 /**@name Debugging */ 247 ///@{ 248 /// checks whether DataHashTable is consistent 249 bool isConsistent() const 250 { 251 #ifdef ENABLE_CONSISTENCY_CHECKS 252 int total = 0; 253 254 for(int i = 0; i < m_elem.size(); i++) 255 { 256 if(m_elem[i].stat == Elem::USED) 257 { 258 total++; 259 260 if(!has(m_elem[i].item)) 261 return SPX_MSG_INCONSISTENT("DataHashTable"); 262 } 263 } 264 265 if(total != m_used) 266 return SPX_MSG_INCONSISTENT("DataHashTable"); 267 268 return m_elem.isConsistent(); 269 #else 270 return true; 271 #endif 272 } 273 ///@} 274 275 //----------------------------------- 276 /**@name Construction / destruction */ 277 ///@{ 278 /// default constructor. 279 /** Allocates a DataHashTable for \p maxsize entries using \p hashfun 280 * as hash function. If \p hashsize > 0, #m_hashsize is set to the 281 * specified value, otherwise a suitable hash size is computed 282 * automatically. Parameter \p factor is used for memory management: 283 * If more than \p maxsize entries are added to the DataHashTable, it 284 * will automatically be #reMax()%ed by a factor of \p factor. 285 * 286 * @param hashfun pointer to hash function. 287 * @param maxsize maximum number of hash elements. 288 * @param hashsize hash size. 289 * @param factor factor for increasing data block. 290 */ 291 explicit DataHashTable( 292 int (*hashfun)(const HashItem*), 293 int maxsize = 265, 294 int hashsize = 0, 295 Real factor = 2.0) 296 : m_elem(maxsize) 297 , m_hashfun(hashfun) 298 , m_memfactor(factor) 299 { 300 clear(); 301 302 primes[0] = 1523; 303 primes[1] = 3547; 304 primes[2] = 8011; 305 primes[3] = 17707; 306 primes[4] = 38723; 307 primes[5] = 83833; 308 primes[6] = 180317; 309 primes[7] = 385897; 310 primes[8] = 821411; 311 primes[9] = 1742369; 312 primes[10] = 3680893; 313 primes[11] = 5693959; 314 primes[12] = 7753849; 315 primes[13] = 9849703; 316 primes[14] = 11973277; 317 primes[15] = 14121853; 318 primes[16] = 17643961; 319 primes[17] = 24273817; 320 primes[18] = 32452843; 321 primes[19] = 49979687; 322 primes[20] = 67867967; 323 primes[21] = 86028121; 324 primes[22] = 104395301; 325 primes[23] = 122949823; 326 primes[24] = 141650939; 327 primes[25] = 160481183; 328 primes[26] = 179424673; 329 primes[27] = 198491317; 330 primes[28] = 217645177; 331 primes[29] = 256203161; 332 primes[30] = 314606869; 333 primes[31] = 373587883; 334 primes[32] = 433024223; 335 primes[33] = 492876847; 336 primes[34] = 553105243; 337 primes[35] = 613651349; 338 primes[36] = 694847533; 339 primes[37] = 756065159; 340 primes[38] = 817504243; 341 primes[39] = 879190747; 342 primes[40] = 941083981; 343 primes[41] = 982451653; 344 primes[42] = INT_MAX; 345 nprimes = 43; 346 347 m_hashsize = (hashsize < 1) ? autoHashSize() : hashsize; 348 349 assert(m_memfactor > 1.0); 350 assert(isConsistent()); 351 } 352 353 /// assignment operator. 354 DataHashTable& operator=(const DataHashTable& base) 355 { 356 m_elem = base.m_elem; 357 m_hashfun = base.m_hashfun; 358 m_memfactor = base.m_memfactor; 359 m_used = base.m_used; 360 m_hashsize = base.m_hashsize; 361 primes = base.primes; 362 nprimes = base.nprimes; 363 364 assert(m_memfactor > 1.0); 365 assert(isConsistent()); 366 return *this; 367 } 368 369 /// copy constructor. 370 DataHashTable(const DataHashTable& base) 371 : m_elem(base.m_elem) 372 , m_hashfun(base.m_hashfun) 373 , m_memfactor(base.m_memfactor) 374 , m_used(base.m_used) 375 , m_hashsize(base.m_hashsize) 376 , primes(base.primes) 377 , nprimes(base.nprimes) 378 { 379 assert(m_memfactor > 1.0); 380 assert(isConsistent()); 381 } 382 ///@} 383 384 private: 385 386 //----------------------------------- 387 /**@name Helpers */ 388 ///@{ 389 /// determine a good \ref soplex::DataHashTable::m_hashsize "m_hashsize". 390 /** Determine next larger prime number for new #m_hashsize 391 * @return good value for #m_hashsize 392 */ 393 int autoHashSize() const 394 { 395 auto oldsize = m_elem.size(); 396 397 int left = 0; 398 int right = nprimes - 1; 399 int middle; 400 401 while(left <= right) 402 { 403 middle = (left + right) / 2; 404 405 if(oldsize < primes[middle]) 406 { 407 right = middle - 1; 408 } 409 else if(oldsize > primes[middle]) 410 { 411 left = middle + 1; 412 } 413 else 414 { 415 assert(oldsize == primes[middle]); 416 return primes[middle + 1]; 417 } 418 } 419 420 assert(left == right + 1); 421 return primes[left]; 422 } 423 424 /// automatically computes a good \ref soplex::DataHashTable::m_hashsize "m_hashsize". 425 /** Computes a good #m_hashsize as the product of all prime numbers 426 * not divisors of the number of elements that are <= 427 * the maximum divisor of the number of elemens. 428 * @return good value for #m_hashsize 429 */ 430 int autoHashSizeold() const 431 { 432 DataArray< bool > prime(m_elem.size()); 433 int hashsize = 1; 434 int maxsize = m_elem.size(); 435 int i; 436 437 for(i = 2; i < maxsize; i++) 438 prime[i] = true; 439 440 for(i = 2; i < maxsize; ++i) 441 { 442 if(prime[i]) 443 { 444 for(int j = i; j < maxsize; j += i) 445 prime[j] = false; 446 447 if(m_elem.size() % i != 0) 448 { 449 hashsize *= i; 450 451 if(hashsize > maxsize) 452 { 453 hashsize /= i; 454 break; 455 } 456 } 457 } 458 } 459 460 return hashsize; 461 } 462 463 /// returns hash index of \a HashItem \p h or -1, if \p h is not present. 464 /** Using the hash function #m_hashfun, the hash value of \p h 465 * is calculated. 466 * Starting with this hash index, every #m_hashsize%-th element is 467 * compared with \p h until \p h is found or all elements have been checked. 468 * 469 * @param h \a HashItem, for which the hash index should be calculated 470 * @return hash index of \p h or -1, 471 * if \p h is not a member of the hash table 472 */ 473 int index(const HashItem& h) const 474 { 475 if(m_used == 0) 476 return -1; 477 478 assert(m_elem.size() > 0); 479 480 auto i = (*m_hashfun)(&h) % m_elem.size(); 481 auto j = i; 482 483 while(m_elem[i].stat != Elem::FREE) 484 { 485 if((m_elem[i].stat == Elem::USED) 486 && (m_elem[i].item == h)) 487 return i; 488 489 i = (i + m_hashsize) % m_elem.size(); 490 491 if(i == j) 492 break; 493 } 494 495 return -1; 496 } 497 ///@} 498 499 }; 500 501 } // namespace soplex 502 #endif // _DATAHASHTABLE_H_ 503