VirtualBox

source: kBuild/trunk/src/kmk/strcache2.c@ 1943

Last change on this file since 1943 was 1943, checked in by bird, 17 years ago

strcache2.c: fixed the length check.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 38.3 KB
Line 
1/* $Id: strcache2.c 1943 2008-10-26 02:19:06Z bird $ */
2/** @file
3 * strcache2 - New string cache.
4 */
5
6/*
7 * Copyright (c) 2008 knut st. osmundsen <bird-src-spam@anduin.net>
8 *
9 * This file is part of kBuild.
10 *
11 * kBuild is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * kBuild is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with kBuild; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 *
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include "make.h"
32#include "strcache2.h"
33
34#include <assert.h>
35
36#include "debug.h"
37
38#ifdef _MSC_VER
39typedef unsigned char uint8_t;
40typedef unsigned short uint16_t;
41typedef unsigned int uint32_t;
42typedef signed char int8_t;
43typedef signed short int16_t;
44typedef signed int int32_t;
45#else
46# include <stdint.h>
47#endif
48
49#ifdef WINDOWS32
50# include <io.h>
51# include <process.h>
52# include <Windows.h>
53# define PARSE_IN_WORKER
54#endif
55
56#ifdef __OS2__
57# include <sys/fmutex.h>
58#endif
59
60#ifdef HAVE_PTHREAD
61# include <pthread.h>
62#endif
63
64
65/*******************************************************************************
66* Defined Constants And Macros *
67*******************************************************************************/
68/* The default size of a memory segment (1MB). */
69#define STRCACHE2_SEG_SIZE (1024U*1024U)
70/* The default hash table shift (hash size give as a power of two). */
71#define STRCACHE2_HASH_SHIFT 16
72/** Does the modding / masking of a hash number into an index. */
73#ifdef STRCACHE2_USE_MASK
74# define STRCACHE2_MOD_IT(cache, hash) ((hash) & (cache)->hash_mask)
75#else
76# define STRCACHE2_MOD_IT(cache, hash) ((hash) % (cache)->hash_div)
77#endif
78
79# if defined(__amd64__) || defined(__x86_64__) || defined(__AMD64__) || defined(_M_X64) || defined(__amd64) \
80 || defined(__i386__) || defined(__x86__) || defined(__X86__) || defined(_M_IX86) || defined(__i386)
81# define strcache2_get_unaligned_16bits(ptr) ( *((const uint16_t *)(ptr)))
82# else
83 /* (endian doesn't matter) */
84# define strcache2_get_unaligned_16bits(ptr) ( (((const uint8_t *)(ptr))[0] << 8) \
85 | (((const uint8_t *)(ptr))[1]) )
86# endif
87
88
89/*******************************************************************************
90* Global Variables *
91*******************************************************************************/
92/* List of initialized string caches. */
93static struct strcache2 *strcache_head;
94
95
96/** Finds the closest primary number for power of two value (or something else
97 * useful if not support). */
98MY_INLINE unsigned int strcache2_find_prime(unsigned int shift)
99{
100 switch (shift)
101 {
102 case 5: return 31;
103 case 6: return 61;
104 case 7: return 127;
105 case 8: return 251;
106 case 9: return 509;
107 case 10: return 1021;
108 case 11: return 2039;
109 case 12: return 4093;
110 case 13: return 8191;
111 case 14: return 16381;
112 case 15: return 32749;
113 case 16: return 65521;
114 case 17: return 131063;
115 case 18: return 262139;
116 case 19: return 524269;
117 case 20: return 1048573;
118 case 21: return 2097143;
119 case 22: return 4194301;
120 case 23: return 8388593;
121
122 default:
123 assert (0);
124 return (1 << shift) - 1;
125 }
126}
127
128/* The following is a bit experiment. It produces longer chains, i.e. worse
129 distribution of the strings in the table, however the actual make
130 performances is better (<time). The explanation is probably that the
131 collisions only really increase for entries that aren't looked up that
132 much and that it actually improoves the situation for those that is. Or
133 that we spend so much less time hashing that it makes up (and more) for
134 the pentalty we suffer from the longer chains and worse distribution.
135
136 XXX: Check how this works out with different path lengths. I suspect it
137 might depend on the length of PATH_ROOT and the depth of the files
138 in the project as well. If it does, this might make matters worse
139 for some and better for others which isn't very cool... */
140
141#if 0
142# define BIG_HASH_SIZE 32 /* kinda fast */
143# define BIG_HASH_HEAD 16
144# define BIG_HASH_TAIL 12
145#elif 0
146# define BIG_HASH_SIZE 68 /* kinda safe */
147# define BIG_HASH_HEAD 24
148# define BIG_HASH_TAIL 24
149#elif 0
150# define BIG_HASH_SIZE 128 /* safe */
151# define BIG_HASH_HEAD 32
152# define BIG_HASH_TAIL 32
153#endif
154
155#ifdef BIG_HASH_SIZE
156/* long string: hash head and tail, drop the middle. */
157MY_INLINE unsigned int
158strcache2_case_sensitive_hash_big (const char *str, unsigned int len)
159{
160 uint32_t hash = len;
161 uint32_t tmp;
162 unsigned int head;
163
164 /* head BIG_HASH_HEAD bytes */
165 head = (BIG_HASH_HEAD >> 2);
166 while (head-- > 0)
167 {
168 hash += strcache2_get_unaligned_16bits (str);
169 tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
170 hash = (hash << 16) ^ tmp;
171 str += 2 * sizeof (uint16_t);
172 hash += hash >> 11;
173 }
174
175 /* tail BIG_HASH_TAIL bytes (minus the odd ones) */
176 str += (len - BIG_HASH_HEAD - BIG_HASH_TAIL) & ~3U;
177 head = (BIG_HASH_TAIL >> 2);
178 while (head-- > 0)
179 {
180 hash += strcache2_get_unaligned_16bits (str);
181 tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
182 hash = (hash << 16) ^ tmp;
183 str += 2 * sizeof (uint16_t);
184 hash += hash >> 11;
185 }
186
187 /* force "avalanching" of final 127 bits. */
188 hash ^= hash << 3;
189 hash += hash >> 5;
190 hash ^= hash << 4;
191 hash += hash >> 17;
192 hash ^= hash << 25;
193 hash += hash >> 6;
194
195 return hash;
196}
197#endif /* BIG_HASH_SIZE */
198
199MY_INLINE unsigned int
200strcache2_case_sensitive_hash (const char *str, unsigned int len)
201{
202#if 1
203 /* Paul Hsieh hash SuperFast function:
204 http://www.azillionmonkeys.com/qed/hash.html
205
206 This performs very good and as a sligtly better distribution than
207 STRING_N_HASH_1 on a typical kBuild run.
208
209 It is also 37% faster than return_STRING_N_HASH_1 when running the
210 two 100 times over typical kBuild strings that end up here (did a
211 fprintf here and built kBuild). Compiler was 32-bit gcc 4.0.1, darwin,
212 with -O2.
213
214 FIXME: A path for well aligned data should be added to speed up
215 execution on alignment sensitive systems. */
216 unsigned int rem;
217 uint32_t hash;
218 uint32_t tmp;
219
220 assert (sizeof (uint8_t) == sizeof (char));
221
222# ifdef BIG_HASH_SIZE
223 /* long string? */
224# if 0 /*BIG_HASH_SIZE > 128*/
225 if (MY_PREDICT_FALSE(len >= BIG_HASH_SIZE))
226# else
227 if (len >= BIG_HASH_SIZE)
228# endif
229 return strcache2_case_sensitive_hash_big (str, len);
230# endif
231
232 /* short string: main loop, walking on 2 x uint16_t */
233 hash = len;
234 rem = len & 3;
235 len >>= 2;
236 while (len > 0)
237 {
238 hash += strcache2_get_unaligned_16bits (str);
239 tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
240 hash = (hash << 16) ^ tmp;
241 str += 2 * sizeof (uint16_t);
242 hash += hash >> 11;
243 len--;
244 }
245
246 /* the remainder */
247 switch (rem)
248 {
249 case 3:
250 hash += strcache2_get_unaligned_16bits (str);
251 hash ^= hash << 16;
252 hash ^= str[sizeof (uint16_t)] << 18;
253 hash += hash >> 11;
254 break;
255 case 2:
256 hash += strcache2_get_unaligned_16bits (str);
257 hash ^= hash << 11;
258 hash += hash >> 17;
259 break;
260 case 1:
261 hash += *str;
262 hash ^= hash << 10;
263 hash += hash >> 1;
264 break;
265 }
266
267 /* force "avalanching" of final 127 bits. */
268 hash ^= hash << 3;
269 hash += hash >> 5;
270 hash ^= hash << 4;
271 hash += hash >> 17;
272 hash ^= hash << 25;
273 hash += hash >> 6;
274
275 return hash;
276
277#elif 1
278 /* Note! This implementation is 18% faster than return_STRING_N_HASH_1
279 when running the two 100 times over typical kBuild strings that
280 end up here (did a fprintf here and built kBuild).
281 Compiler was 32-bit gcc 4.0.1, darwin, with -O2. */
282
283 unsigned int hash = 0;
284 if (MY_PREDICT_TRUE(len >= 2))
285 {
286 unsigned int ch0 = *str++;
287 hash = 0;
288 len--;
289 while (len >= 2)
290 {
291 unsigned int ch1 = *str++;
292 hash += ch0 << (ch1 & 0xf);
293
294 ch0 = *str++;
295 hash += ch1 << (ch0 & 0xf);
296
297 len -= 2;
298 }
299 if (len == 1)
300 {
301 unsigned ch1 = *str;
302 hash += ch0 << (ch1 & 0xf);
303
304 hash += ch1;
305 }
306 else
307 hash += ch0;
308 }
309 else if (len)
310 {
311 hash = *str;
312 hash += hash << (hash & 0xf);
313 }
314 else
315 hash = 0;
316 return hash;
317
318#elif 1
319# if 0
320 /* This is SDBM. This specific form/unroll was benchmarked to be 28% faster
321 than return_STRING_N_HASH_1. (Now the weird thing is that putting the (ch)
322 first in the assignment made it noticably slower.)
323
324 However, it is noticably slower in practice, most likely because of more
325 collisions. Hrmpf. */
326
327# define UPDATE_HASH(ch) hash = (hash << 6) + (hash << 16) - hash + (ch)
328 unsigned int hash = 0;
329
330# else
331 /* This is DJB2. This specific form/unroll was benchmarked to be 27%
332 fast than return_STRING_N_HASH_1.
333
334 Ditto. */
335
336# define UPDATE_HASH(ch) hash = (hash << 5) + hash + (ch)
337 unsigned int hash = 5381;
338# endif
339
340
341 while (len >= 4)
342 {
343 UPDATE_HASH (str[0]);
344 UPDATE_HASH (str[1]);
345 UPDATE_HASH (str[2]);
346 UPDATE_HASH (str[3]);
347 str += 4;
348 len -= 4;
349 }
350 switch (len)
351 {
352 default:
353 case 0:
354 return hash;
355 case 1:
356 UPDATE_HASH (str[0]);
357 return hash;
358 case 2:
359 UPDATE_HASH (str[0]);
360 UPDATE_HASH (str[1]);
361 return hash;
362 case 3:
363 UPDATE_HASH (str[0]);
364 UPDATE_HASH (str[1]);
365 UPDATE_HASH (str[2]);
366 return hash;
367 }
368#endif
369}
370
371MY_INLINE unsigned int
372strcache2_case_insensitive_hash (const char *str, unsigned int len)
373{
374 unsigned int hash = 0;
375 if (MY_PREDICT_TRUE(len >= 2))
376 {
377 unsigned int ch0 = *str++;
378 ch0 = tolower (ch0);
379 hash = 0;
380 len--;
381 while (len >= 2)
382 {
383 unsigned int ch1 = *str++;
384 ch1 = tolower (ch1);
385 hash += ch0 << (ch1 & 0xf);
386
387 ch0 = *str++;
388 ch0 = tolower (ch0);
389 hash += ch1 << (ch0 & 0xf);
390
391 len -= 2;
392 }
393 if (len == 1)
394 {
395 unsigned ch1 = *str;
396 ch1 = tolower (ch1);
397 hash += ch0 << (ch1 & 0xf);
398
399 hash += ch1;
400 }
401 else
402 hash += ch0;
403 }
404 else if (len)
405 {
406 hash = *str;
407 hash += hash << (hash & 0xf);
408 }
409 else
410 hash = 0;
411 return hash;
412}
413
414MY_INLINE int
415strcache2_memcmp_inline_short (const char *xs, const char *ys, unsigned int length)
416{
417 if (length <= 8)
418 {
419 /* short string compare - ~50% of the kBuild calls. */
420 assert ( !((size_t)ys & 3) );
421 if (!((size_t)xs & 3))
422 {
423 /* aligned */
424 int result;
425 switch (length)
426 {
427 default: /* memcmp for longer strings */
428 return memcmp (xs, ys, length);
429 case 8:
430 result = *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
431 result |= *(int32_t*)xs - *(int32_t*)ys;
432 return result;
433 case 7:
434 result = xs[6] - ys[6];
435 result |= xs[5] - ys[5];
436 result |= xs[4] - ys[4];
437 result |= *(int32_t*)xs - *(int32_t*)ys;
438 return result;
439 case 6:
440 result = xs[5] - ys[5];
441 result |= xs[4] - ys[4];
442 result |= *(int32_t*)xs - *(int32_t*)ys;
443 return result;
444 case 5:
445 result = xs[4] - ys[4];
446 result |= *(int32_t*)xs - *(int32_t*)ys;
447 return result;
448 case 4:
449 return *(int32_t*)xs - *(int32_t*)ys;
450 case 3:
451 result = xs[2] - ys[2];
452 result |= xs[1] - ys[1];
453 result |= xs[0] - ys[0];
454 return result;
455 case 2:
456 result = xs[1] - ys[1];
457 result |= xs[0] - ys[0];
458 return result;
459 case 1:
460 return *xs - *ys;
461 case 0:
462 return 0;
463 }
464 }
465 else
466 {
467 /* unaligned */
468 int result = 0;
469 switch (length)
470 {
471 case 8: result |= xs[7] - ys[7];
472 case 7: result |= xs[6] - ys[6];
473 case 6: result |= xs[5] - ys[5];
474 case 5: result |= xs[4] - ys[4];
475 case 4: result |= xs[3] - ys[3];
476 case 3: result |= xs[2] - ys[2];
477 case 2: result |= xs[1] - ys[1];
478 case 1: result |= xs[0] - ys[0];
479 case 0:
480 return result;
481 }
482 }
483 }
484
485 /* memcmp for longer strings */
486 return memcmp (xs, ys, length);
487}
488
489MY_INLINE int
490strcache2_memcmp_inlined (const char *xs, const char *ys, unsigned int length)
491{
492#ifndef ELECTRIC_HEAP
493 assert ( !((size_t)ys & 3) );
494#endif
495 if (!((size_t)xs & 3))
496 {
497 int result;
498 /* aligned */
499 while (length >= 8)
500 {
501 result = *(int32_t*)xs - *(int32_t*)ys;
502 result |= *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
503 if (MY_PREDICT_FALSE(result))
504 return result;
505 xs += 8;
506 ys += 8;
507 length -= 8;
508 }
509 switch (length)
510 {
511 case 7:
512 result = *(int32_t*)xs - *(int32_t*)ys;
513 result |= xs[6] - ys[6];
514 result |= xs[5] - ys[5];
515 result |= xs[4] - ys[4];
516 return result;
517 case 6:
518 result = *(int32_t*)xs - *(int32_t*)ys;
519 result |= xs[5] - ys[5];
520 result |= xs[4] - ys[4];
521 return result;
522 case 5:
523 result = *(int32_t*)xs - *(int32_t*)ys;
524 result |= xs[4] - ys[4];
525 return result;
526 case 4:
527 return *(int32_t*)xs - *(int32_t*)ys;
528 case 3:
529 result = xs[2] - ys[2];
530 result |= xs[1] - ys[1];
531 result |= xs[0] - ys[0];
532 return result;
533 case 2:
534 result = xs[1] - ys[1];
535 result |= xs[0] - ys[0];
536 return result;
537 case 1:
538 return *xs - *ys;
539 default:
540 case 0:
541 return 0;
542 }
543 }
544 else
545 {
546 /* unaligned */
547 int result;
548 while (length >= 8)
549 {
550#if defined(__i386__) || defined(__x86_64__)
551 result = ( ((int32_t)xs[3] << 24)
552 | ((int32_t)xs[2] << 16)
553 | ((int32_t)xs[1] << 8)
554 | xs[0] )
555 - *(int32_t*)ys;
556 result |= ( ((int32_t)xs[7] << 24)
557 | ((int32_t)xs[6] << 16)
558 | ((int32_t)xs[5] << 8)
559 | xs[4] )
560 - *(int32_t*)(ys + 4);
561#else
562 result = xs[3] - ys[3];
563 result |= xs[2] - ys[2];
564 result |= xs[1] - ys[1];
565 result |= xs[0] - ys[0];
566 result |= xs[7] - ys[7];
567 result |= xs[6] - ys[6];
568 result |= xs[5] - ys[5];
569 result |= xs[4] - ys[4];
570#endif
571 if (MY_PREDICT_FALSE(result))
572 return result;
573 xs += 8;
574 ys += 8;
575 length -= 8;
576 }
577 result = 0;
578 switch (length)
579 {
580 case 7: result |= xs[6] - ys[6];
581 case 6: result |= xs[5] - ys[5];
582 case 5: result |= xs[4] - ys[4];
583 case 4: result |= xs[3] - ys[3];
584 case 3: result |= xs[2] - ys[2];
585 case 2: result |= xs[1] - ys[1];
586 case 1: result |= xs[0] - ys[0];
587 return result;
588 default:
589 case 0:
590 return 0;
591 }
592 }
593}
594
595MY_INLINE int
596strcache2_is_equal (struct strcache2 *cache, struct strcache2_entry const *entry,
597 const char *str, unsigned int length, unsigned int hash)
598{
599 assert (!cache->case_insensitive);
600
601 /* the simple stuff first. */
602 if ( entry->hash != hash
603 || entry->length != length)
604 return 0;
605
606#if 0
607 return memcmp (str, entry + 1, length) == 0;
608#elif 1
609 return strcache2_memcmp_inlined (str, (const char *)(entry + 1), length) == 0;
610#else
611 return strcache2_memcmp_inline_short (str, (const char *)(entry + 1), length) == 0;
612#endif
613}
614
615MY_INLINE int
616strcache2_is_iequal (struct strcache2 *cache, struct strcache2_entry const *entry,
617 const char *str, unsigned int length, unsigned int hash)
618{
619 assert (cache->case_insensitive);
620
621 /* the simple stuff first. */
622 if ( entry->hash != hash
623 || entry->length != length)
624 return 0;
625
626#if defined(_MSC_VER) || defined(__OS2__)
627 return _memicmp (entry + 1, str, length) == 0;
628#else
629 return strncasecmp ((const char *)(entry + 1), str, length) == 0;
630#endif
631}
632
633static void
634strcache2_rehash (struct strcache2 *cache)
635{
636 unsigned int src = cache->hash_size;
637 struct strcache2_entry **src_tab = cache->hash_tab;
638 struct strcache2_entry **dst_tab;
639#ifndef STRCACHE2_USE_MASK
640 unsigned int hash_shift;
641#endif
642
643 /* Allocate a new hash table twice the size of the current. */
644 cache->hash_size <<= 1;
645#ifdef STRCACHE2_USE_MASK
646 cache->hash_mask <<= 1;
647 cache->hash_mask |= 1;
648#else
649 for (hash_shift = 1; (1U << hash_shift) < cache->hash_size; hash_shift++)
650 /* nothing */;
651 cache->hash_div = strcache2_find_prime (hash_shift);
652#endif
653 cache->rehash_count <<= 1;
654 cache->hash_tab = dst_tab = (struct strcache2_entry **)
655 xmalloc (cache->hash_size * sizeof (struct strcache2_entry *));
656 memset (dst_tab, '\0', cache->hash_size * sizeof (struct strcache2_entry *));
657
658 /* Copy the entries from the old to the new hash table. */
659 cache->collision_count = 0;
660 while (src-- > 0)
661 {
662 struct strcache2_entry *entry = src_tab[src];
663 while (entry)
664 {
665 struct strcache2_entry *next = entry->next;
666 unsigned int dst = STRCACHE2_MOD_IT (cache, entry->hash);
667 if ((entry->next = dst_tab[dst]) != 0)
668 cache->collision_count++;
669 dst_tab[dst] = entry;
670
671 entry = next;
672 }
673 }
674
675 /* That's it, just free the old table and we're done. */
676 free (src_tab);
677}
678
679static struct strcache2_seg *
680strcache2_new_seg (struct strcache2 *cache, unsigned int minlen)
681{
682 struct strcache2_seg *seg;
683 size_t size;
684 size_t off;
685
686 size = cache->def_seg_size;
687 if (size < (size_t)minlen + sizeof (struct strcache2_seg) + STRCACHE2_ENTRY_ALIGNMENT)
688 {
689 size = (size_t)minlen * 2;
690 size = (size + 0xfff) & ~(size_t)0xfff;
691 }
692
693 seg = xmalloc (size);
694 seg->start = (char *)(seg + 1);
695 seg->size = size - sizeof (struct strcache2_seg);
696 off = (size_t)seg->start & (STRCACHE2_ENTRY_ALIGNMENT - 1);
697 if (off)
698 {
699 off = STRCACHE2_ENTRY_ALIGNMENT - off;
700 seg->start += off;
701 seg->size -= off;
702 }
703 assert (seg->size > minlen);
704 seg->cursor = seg->start;
705 seg->avail = seg->size;
706
707 seg->next = cache->seg_head;
708 cache->seg_head = seg;
709
710 return seg;
711}
712
713/* Internal worker that enters a new string into the cache. */
714static const char *
715strcache2_enter_string (struct strcache2 *cache, unsigned int idx,
716 const char *str, unsigned int length,
717 unsigned int hash)
718{
719 struct strcache2_entry *entry;
720 struct strcache2_seg *seg;
721 unsigned int size;
722 char *str_copy;
723
724 /* Allocate space for the string. */
725
726 size = length + 1 + sizeof (struct strcache2_entry);
727 size = (size + STRCACHE2_ENTRY_ALIGNMENT - 1) & ~(STRCACHE2_ENTRY_ALIGNMENT - 1U);
728
729 seg = cache->seg_head;
730 if (MY_PREDICT_FALSE(seg->avail < size))
731 {
732 do
733 seg = seg->next;
734 while (seg && seg->avail < size);
735 if (!seg)
736 seg = strcache2_new_seg (cache, size);
737 }
738
739 entry = (struct strcache2_entry *) seg->cursor;
740 assert (!((size_t)entry & (STRCACHE2_ENTRY_ALIGNMENT - 1)));
741 seg->cursor += size;
742 seg->avail -= size;
743
744 /* Setup the entry, copy the string and insert it into the hash table. */
745
746 entry->user = NULL;
747 entry->length = length;
748 entry->hash = hash;
749 str_copy = (char *) memcpy (entry + 1, str, length);
750 str_copy[length] = '\0';
751
752 if ((entry->next = cache->hash_tab[idx]) != 0)
753 cache->collision_count++;
754 cache->hash_tab[idx] = entry;
755 cache->count++;
756 if (cache->count >= cache->rehash_count)
757 strcache2_rehash (cache);
758
759 return str_copy;
760}
761
762/* The public add string interface. */
763const char *
764strcache2_add (struct strcache2 *cache, const char *str, unsigned int length)
765{
766 struct strcache2_entry const *entry;
767 unsigned int hash = strcache2_case_sensitive_hash (str, length);
768 unsigned int idx;
769
770 assert (!cache->case_insensitive);
771
772 MAKE_STATS (cache->lookup_count++);
773
774 /* Lookup the entry in the hash table, hoping for an
775 early match. If not found, enter the string at IDX. */
776 idx = STRCACHE2_MOD_IT (cache, hash);
777 entry = cache->hash_tab[idx];
778 if (!entry)
779 return strcache2_enter_string (cache, idx, str, length, hash);
780 if (strcache2_is_equal (cache, entry, str, length, hash))
781 return (const char *)(entry + 1);
782 MAKE_STATS (cache->collision_1st_count++);
783
784 entry = entry->next;
785 if (!entry)
786 return strcache2_enter_string (cache, idx, str, length, hash);
787 if (strcache2_is_equal (cache, entry, str, length, hash))
788 return (const char *)(entry + 1);
789 MAKE_STATS (cache->collision_2nd_count++);
790
791 /* Loop the rest. */
792 for (;;)
793 {
794 entry = entry->next;
795 if (!entry)
796 return strcache2_enter_string (cache, idx, str, length, hash);
797 if (strcache2_is_equal (cache, entry, str, length, hash))
798 return (const char *)(entry + 1);
799 MAKE_STATS (cache->collision_3rd_count++);
800 }
801 /* not reached */
802}
803
804/* The public add string interface for prehashed strings.
805 Use strcache2_hash_str to calculate the hash of a string. */
806const char *
807strcache2_add_hashed (struct strcache2 *cache, const char *str,
808 unsigned int length, unsigned int hash)
809{
810 struct strcache2_entry const *entry;
811 unsigned int idx;
812#ifndef NDEBUG
813 unsigned correct_hash;
814
815 assert (!cache->case_insensitive);
816 correct_hash = strcache2_case_sensitive_hash (str, length);
817 MY_ASSERT_MSG (hash == correct_hash, ("%#x != %#x\n", hash, correct_hash));
818#endif /* NDEBUG */
819
820 MAKE_STATS (cache->lookup_count++);
821
822 /* Lookup the entry in the hash table, hoping for an
823 early match. If not found, enter the string at IDX. */
824 idx = STRCACHE2_MOD_IT (cache, hash);
825 entry = cache->hash_tab[idx];
826 if (!entry)
827 return strcache2_enter_string (cache, idx, str, length, hash);
828 if (strcache2_is_equal (cache, entry, str, length, hash))
829 return (const char *)(entry + 1);
830 MAKE_STATS (cache->collision_1st_count++);
831
832 entry = entry->next;
833 if (!entry)
834 return strcache2_enter_string (cache, idx, str, length, hash);
835 if (strcache2_is_equal (cache, entry, str, length, hash))
836 return (const char *)(entry + 1);
837 MAKE_STATS (cache->collision_2nd_count++);
838
839 /* Loop the rest. */
840 for (;;)
841 {
842 entry = entry->next;
843 if (!entry)
844 return strcache2_enter_string (cache, idx, str, length, hash);
845 if (strcache2_is_equal (cache, entry, str, length, hash))
846 return (const char *)(entry + 1);
847 MAKE_STATS (cache->collision_3rd_count++);
848 }
849 /* not reached */
850}
851
852/* The public lookup (case sensitive) string interface. */
853const char *
854strcache2_lookup (struct strcache2 *cache, const char *str, unsigned int length)
855{
856 struct strcache2_entry const *entry;
857 unsigned int hash = strcache2_case_sensitive_hash (str, length);
858 unsigned int idx;
859
860 assert (!cache->case_insensitive);
861
862 MAKE_STATS (cache->lookup_count++);
863
864 /* Lookup the entry in the hash table, hoping for an
865 early match. */
866 idx = STRCACHE2_MOD_IT (cache, hash);
867 entry = cache->hash_tab[idx];
868 if (!entry)
869 return NULL;
870 if (strcache2_is_equal (cache, entry, str, length, hash))
871 return (const char *)(entry + 1);
872 MAKE_STATS (cache->collision_1st_count++);
873
874 entry = entry->next;
875 if (!entry)
876 return NULL;
877 if (strcache2_is_equal (cache, entry, str, length, hash))
878 return (const char *)(entry + 1);
879 MAKE_STATS (cache->collision_2nd_count++);
880
881 /* Loop the rest. */
882 for (;;)
883 {
884 entry = entry->next;
885 if (!entry)
886 return NULL;
887 if (strcache2_is_equal (cache, entry, str, length, hash))
888 return (const char *)(entry + 1);
889 MAKE_STATS (cache->collision_3rd_count++);
890 }
891 /* not reached */
892}
893
894#if defined(HAVE_CASE_INSENSITIVE_FS)
895
896/* The public add string interface for case insensitive strings. */
897const char *
898strcache2_iadd (struct strcache2 *cache, const char *str, unsigned int length)
899{
900 struct strcache2_entry const *entry;
901 unsigned int hash = strcache2_case_insensitive_hash (str, length);
902 unsigned int idx;
903
904 assert (!cache->case_insensitive);
905
906 MAKE_STATS (cache->lookup_count++);
907
908 /* Lookup the entry in the hash table, hoping for an
909 early match. If not found, enter the string at IDX. */
910 idx = STRCACHE2_MOD_IT (cache, hash);
911 entry = cache->hash_tab[idx];
912 if (!entry)
913 return strcache2_enter_string (cache, idx, str, length, hash);
914 if (strcache2_is_equal (cache, entry, str, length, hash))
915 return (const char *)(entry + 1);
916 MAKE_STATS (cache->collision_1st_count++);
917
918 entry = entry->next;
919 if (!entry)
920 return strcache2_enter_string (cache, idx, str, length, hash);
921 if (strcache2_is_equal (cache, entry, str, length, hash))
922 return (const char *)(entry + 1);
923 MAKE_STATS (cache->collision_2nd_count++);
924
925 /* Loop the rest. */
926 for (;;)
927 {
928 entry = entry->next;
929 if (!entry)
930 return strcache2_enter_string (cache, idx, str, length, hash);
931 if (strcache2_is_equal (cache, entry, str, length, hash))
932 return (const char *)(entry + 1);
933 MAKE_STATS (cache->collision_3rd_count++);
934 }
935 /* not reached */
936}
937
938/* The public add string interface for prehashed case insensitive strings.
939 Use strcache2_hash_istr to calculate the hash of a string. */
940const char *
941strcache2_iadd_hashed (struct strcache2 *cache, const char *str,
942 unsigned int length, unsigned int hash)
943{
944 struct strcache2_entry const *entry;
945 unsigned int idx;
946#ifndef NDEBUG
947 unsigned correct_hash;
948
949 assert (!cache->case_insensitive);
950 correct_hash = strcache2_case_insensitive_hash (str, length);
951 MY_ASSERT_MSG (hash == correct_hash, ("%#x != %#x\n", hash, correct_hash));
952#endif /* NDEBUG */
953
954 MAKE_STATS (cache->lookup_count++);
955
956 /* Lookup the entry in the hash table, hoping for an
957 early match. If not found, enter the string at IDX. */
958 idx = STRCACHE2_MOD_IT (cache, hash);
959 entry = cache->hash_tab[idx];
960 if (!entry)
961 return strcache2_enter_string (cache, idx, str, length, hash);
962 if (strcache2_is_equal (cache, entry, str, length, hash))
963 return (const char *)(entry + 1);
964 MAKE_STATS (cache->collision_1st_count++);
965
966 entry = entry->next;
967 if (!entry)
968 return strcache2_enter_string (cache, idx, str, length, hash);
969 if (strcache2_is_equal (cache, entry, str, length, hash))
970 return (const char *)(entry + 1);
971 MAKE_STATS (cache->collision_2nd_count++);
972
973 /* Loop the rest. */
974 for (;;)
975 {
976 entry = entry->next;
977 if (!entry)
978 return strcache2_enter_string (cache, idx, str, length, hash);
979 if (strcache2_is_equal (cache, entry, str, length, hash))
980 return (const char *)(entry + 1);
981 MAKE_STATS (cache->collision_3rd_count++);
982 }
983 /* not reached */
984}
985
986/* The public lookup (case insensitive) string interface. */
987const char *
988strcache2_ilookup (struct strcache2 *cache, const char *str, unsigned int length)
989{
990 struct strcache2_entry const *entry;
991 unsigned int hash = strcache2_case_insensitive_hash (str, length);
992 unsigned int idx;
993
994 assert (!cache->case_insensitive);
995
996 MAKE_STATS (cache->lookup_count++);
997
998 /* Lookup the entry in the hash table, hoping for an
999 early match. */
1000 idx = STRCACHE2_MOD_IT (cache, hash);
1001 entry = cache->hash_tab[idx];
1002 if (!entry)
1003 return NULL;
1004 if (strcache2_is_equal (cache, entry, str, length, hash))
1005 return (const char *)(entry + 1);
1006 MAKE_STATS (cache->collision_1st_count++);
1007
1008 entry = entry->next;
1009 if (!entry)
1010 return NULL;
1011 if (strcache2_is_equal (cache, entry, str, length, hash))
1012 return (const char *)(entry + 1);
1013 MAKE_STATS (cache->collision_2nd_count++);
1014
1015 /* Loop the rest. */
1016 for (;;)
1017 {
1018 entry = entry->next;
1019 if (!entry)
1020 return NULL;
1021 if (strcache2_is_equal (cache, entry, str, length, hash))
1022 return (const char *)(entry + 1);
1023 MAKE_STATS (cache->collision_3rd_count++);
1024 }
1025 /* not reached */
1026}
1027
1028#endif /* HAVE_CASE_INSENSITIVE_FS */
1029
1030/* Is the given string cached? returns 1 if it is, 0 if it isn't. */
1031int
1032strcache2_is_cached (struct strcache2 *cache, const char *str)
1033{
1034 /* Check mandatory alignment first. */
1035 if (!((size_t)str & (sizeof (void *) - 1)))
1036 {
1037 /* Check the segment list and consider the question answered if the
1038 string is within one of them. (Could check it more thoroughly...) */
1039 struct strcache2_seg const *seg;
1040 for (seg = cache->seg_head; seg; seg = seg->next)
1041 if ((size_t)(str - seg->start) < seg->size)
1042 return 1;
1043 }
1044
1045 return 0;
1046}
1047
1048
1049/* Verify the integrity of the specified string, returning 0 if OK. */
1050int
1051strcache2_verify_entry (struct strcache2 *cache, const char *str)
1052{
1053 struct strcache2_entry const *entry;
1054 unsigned int hash;
1055 unsigned int length;
1056 const char *end;
1057
1058 entry = (struct strcache2_entry const *)str - 1;
1059 if ((size_t)entry & (STRCACHE2_ENTRY_ALIGNMENT - 1))
1060 {
1061 fprintf (stderr,
1062 "strcache2[%s]: missaligned entry %p\nstring: %p=%s\n",
1063 cache->name, (void *)entry, (void *)str, str);
1064 return -1;
1065 }
1066
1067 end = memchr (str, '\0', entry->length + 1);
1068 length = end - str;
1069 if (length != entry->length)
1070 {
1071 fprintf (stderr,
1072 "strcache2[%s]: corrupt entry %p, length: %u, expected %u;\nstring: %s\n",
1073 cache->name, (void *)entry, length, entry->length, str);
1074 return -1;
1075 }
1076
1077 hash = cache->case_insensitive
1078 ? strcache2_case_insensitive_hash (str, entry->length)
1079 : strcache2_case_sensitive_hash (str, entry->length);
1080 if (hash != entry->hash)
1081 {
1082 fprintf (stderr,
1083 "strcache2[%s]: corrupt entry %p, hash: %x, expected %x;\nstring: %s\n",
1084 cache->name, (void *)entry, hash, entry->hash, str);
1085 return -1;
1086 }
1087
1088 return 0;
1089}
1090
1091
1092/* Calculates the case sensitive hash values of the string.
1093 The first hash is returned, the other is put at HASH2P. */
1094unsigned int strcache2_hash_str (const char *str, unsigned int length, unsigned int *hash2p)
1095{
1096 *hash2p = 1;
1097 return strcache2_case_sensitive_hash (str, length);
1098}
1099
1100/* Calculates the case insensitive hash values of the string.
1101 The first hash is returned, the other is put at HASH2P. */
1102unsigned int strcache2_hash_istr (const char *str, unsigned int length, unsigned int *hash2p)
1103{
1104 *hash2p = 1;
1105 return strcache2_case_insensitive_hash (str, length);
1106}
1107
1108
1109
1110/* Initalizes a new cache. */
1111void
1112strcache2_init (struct strcache2 *cache, const char *name, unsigned int size,
1113 unsigned int def_seg_size, int case_insensitive, int thread_safe)
1114{
1115 unsigned hash_shift;
1116 assert (!thread_safe);
1117
1118 /* calc the size as a power of two */
1119 if (!size)
1120 hash_shift = STRCACHE2_HASH_SHIFT;
1121 else
1122 {
1123 assert (size <= (~0U / 2 + 1));
1124 for (hash_shift = 8; (1U << hash_shift) < size; hash_shift++)
1125 /* nothing */;
1126 }
1127
1128 /* adjust the default segment size */
1129 if (!def_seg_size)
1130 def_seg_size = STRCACHE2_SEG_SIZE;
1131 else if (def_seg_size < sizeof (struct strcache2_seg) * 10)
1132 def_seg_size = sizeof (struct strcache2_seg) * 10;
1133 else if ((def_seg_size & 0xfff) < 0xf00)
1134 def_seg_size = ((def_seg_size + 0xfff) & ~0xfffU);
1135
1136
1137 /* init the structure. */
1138 cache->case_insensitive = case_insensitive;
1139#ifdef STRCACHE2_USE_MASK
1140 cache->hash_mask = (1U << hash_shift) - 1U;
1141#else
1142 cache->hash_div = strcache2_find_prime(hash_shift);
1143#endif
1144 cache->count = 0;
1145 cache->collision_count = 0;
1146 cache->lookup_count = 0;
1147 cache->collision_1st_count = 0;
1148 cache->collision_2nd_count = 0;
1149 cache->collision_3rd_count = 0;
1150 cache->rehash_count = (1U << hash_shift) / 4 * 3; /* rehash at 75% */
1151 cache->init_size = 1U << hash_shift;
1152 cache->hash_size = 1U << hash_shift;
1153 cache->def_seg_size = def_seg_size;
1154 cache->lock = NULL;
1155 cache->name = name;
1156
1157 /* allocate the hash table and first segment. */
1158 cache->hash_tab = (struct strcache2_entry **)
1159 xmalloc (cache->init_size * sizeof (struct strcache2_entry *));
1160 memset (cache->hash_tab, '\0', cache->init_size * sizeof (struct strcache2_entry *));
1161 strcache2_new_seg (cache, 0);
1162
1163 /* link it */
1164 cache->next = strcache_head;
1165 strcache_head = cache;
1166}
1167
1168
1169/* Terminates a string cache, freeing all memory and other
1170 associated resources. */
1171void
1172strcache2_term (struct strcache2 *cache)
1173{
1174 /* unlink it */
1175 if (strcache_head == cache)
1176 strcache_head = cache->next;
1177 else
1178 {
1179 struct strcache2 *prev = strcache_head;
1180 while (prev->next != cache)
1181 prev = prev->next;
1182 assert (prev);
1183 prev->next = cache->next;
1184 }
1185
1186 /* free the memory segments */
1187 do
1188 {
1189 void *free_it = cache->seg_head;
1190 cache->seg_head = cache->seg_head->next;
1191 free (free_it);
1192 }
1193 while (cache->seg_head);
1194
1195 /* free the hash and clear the structure. */
1196 free (cache->hash_tab);
1197 memset (cache, '\0', sizeof (struct strcache2));
1198}
1199
1200/* Print statistics a string cache. */
1201void
1202strcache2_print_stats (struct strcache2 *cache, const char *prefix)
1203{
1204 unsigned int seg_count = 0;
1205 unsigned long seg_total_bytes = 0;
1206 unsigned long seg_avg_bytes;
1207 unsigned long seg_avail_bytes = 0;
1208 unsigned long seg_max_bytes = 0;
1209 struct strcache2_seg *seg;
1210 unsigned int str_count = 0;
1211 unsigned long str_total_len = 0;
1212 unsigned int str_avg_len;
1213 unsigned int str_min_len = ~0U;
1214 unsigned int str_max_len = 0;
1215 unsigned int idx;
1216 unsigned int rehashes;
1217 unsigned int chain_depths[32];
1218
1219 printf (_("\n%s strcache2: %s\n"), prefix, cache->name);
1220
1221 /* Segment statistics. */
1222 for (seg = cache->seg_head; seg; seg = seg->next)
1223 {
1224 seg_count++;
1225 seg_total_bytes += seg->size;
1226 seg_avail_bytes += seg->avail;
1227 if (seg->size > seg_max_bytes)
1228 seg_max_bytes = seg->size;
1229 }
1230 seg_avg_bytes = seg_total_bytes / seg_count;
1231 printf (_("%s %u segments: total = %lu / max = %lu / avg = %lu / def = %u avail = %lu\n"),
1232 prefix, seg_count, seg_total_bytes, seg_max_bytes, seg_avg_bytes,
1233 cache->def_seg_size, seg_avail_bytes);
1234
1235 /* String statistics. */
1236 memset (chain_depths, '\0', sizeof (chain_depths));
1237 idx = cache->hash_size;
1238 while (idx-- > 0)
1239 {
1240 struct strcache2_entry const *entry = cache->hash_tab[idx];
1241 unsigned int depth = 0;
1242 for (; entry != 0; entry = entry->next, depth++)
1243 {
1244 unsigned int length = entry->length;
1245 str_total_len += length;
1246 if (length > str_max_len)
1247 str_max_len = length;
1248 if (length < str_min_len)
1249 str_min_len = length;
1250 str_count++;
1251 }
1252 chain_depths[depth >= 32 ? 31 : depth]++;
1253 }
1254 str_avg_len = cache->count ? str_total_len / cache->count : 0;
1255 printf (_("%s %u strings: total len = %lu / max = %u / avg = %u / min = %u\n"),
1256 prefix, cache->count, str_total_len, str_max_len, str_avg_len, str_min_len);
1257 if (str_count != cache->count)
1258 printf (_("%s string count mismatch! cache->count=%u, actual count is %u\n"), prefix,
1259 cache->count, str_count);
1260
1261 /* Hash statistics. */
1262 idx = cache->init_size;
1263 rehashes = 0;
1264 while (idx < cache->hash_size)
1265 {
1266 idx *= 2;
1267 rehashes++;
1268 }
1269
1270#ifdef STRCACHE2_USE_MASK
1271 printf (_("%s hash size = %u mask = %#x rehashed %u times"),
1272 prefix, cache->hash_size, cache->hash_mask, rehashes);
1273#else
1274 printf (_("%s hash size = %u div = %#x rehashed %u times"),
1275 prefix, cache->hash_size, cache->hash_div, rehashes);
1276#endif
1277 if (cache->lookup_count)
1278 printf (_("%s lookups = %lu\n"
1279 "%s hash collisions 1st = %lu (%u%%) 2nd = %lu (%u%%) 3rd = %lu (%u%%)"),
1280 prefix, cache->lookup_count,
1281 prefix,
1282 cache->collision_1st_count, (unsigned int)((100.0 * cache->collision_1st_count) / cache->lookup_count),
1283 cache->collision_2nd_count, (unsigned int)((100.0 * cache->collision_2nd_count) / cache->lookup_count),
1284 cache->collision_3rd_count, (unsigned int)((100.0 * cache->collision_3rd_count) / cache->lookup_count));
1285 printf (_("\n%s hash insert collisions = %u (%u%%)\n"),
1286 prefix, cache->collision_count,(unsigned int)((100.0 * cache->collision_count) / cache->count));
1287 printf (_("%s %5u (%u%%) empty hash table slots\n"),
1288 prefix, chain_depths[0], (unsigned int)((100.0 * chain_depths[0]) / cache->hash_size));
1289 printf (_("%s %5u (%u%%) occupied hash table slots\n"),
1290 prefix, chain_depths[1], (unsigned int)((100.0 * chain_depths[1]) / cache->hash_size));
1291 for (idx = 2; idx < 32; idx++)
1292 {
1293 unsigned strs_at_this_depth = chain_depths[idx];
1294 unsigned i;
1295 for (i = idx + 1; i < 32; i++)
1296 strs_at_this_depth += chain_depths[i];
1297 if (strs_at_this_depth)
1298 printf (_("%s %5u (%2u%%) with %u string%s chained on; %5u (%2u%%) strings at depth %u.\n"),
1299 prefix, chain_depths[idx], (unsigned int)((100.0 * chain_depths[idx]) / (cache->count - cache->collision_count)),
1300 idx - 1, idx == 2 ? " " : "s",
1301 strs_at_this_depth, (unsigned int)((100.0 * strs_at_this_depth) / cache->count), idx - 1);
1302 }
1303}
1304
1305/* Print statistics for all string caches. */
1306void
1307strcache2_print_stats_all (const char *prefix)
1308{
1309 struct strcache2 *cur;
1310 for (cur = strcache_head; cur; cur = cur->next)
1311 strcache2_print_stats (cur, prefix);
1312}
1313
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette