VirtualBox

source: kBuild/trunk/src/kmk/strcache2.c@ 3084

Last change on this file since 3084 was 3084, checked in by bird, 8 years ago

strcache2.c: fixed HAVE_CASE_INSENSITIVE_FS case (unused)

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 38.7 KB
Line 
1/* $Id: strcache2.c 3084 2017-10-02 22:24:58Z bird $ */
2/** @file
3 * strcache2 - New string cache.
4 */
5
6/*
7 * Copyright (c) 2008-2010 knut st. osmundsen <bird-kBuild-spamx@anduin.net>
8 *
9 * This file is part of kBuild.
10 *
11 * kBuild is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 3 of the License, or
14 * (at your option) any later version.
15 *
16 * kBuild is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with kBuild. If not, see <http://www.gnu.org/licenses/>
23 *
24 */
25
26/*******************************************************************************
27* Header Files *
28*******************************************************************************/
29#include "make.h"
30#include "strcache2.h"
31
32#include <assert.h>
33
34#include "debug.h"
35
36#ifdef _MSC_VER
37typedef unsigned char uint8_t;
38typedef unsigned short uint16_t;
39typedef unsigned int uint32_t;
40typedef signed char int8_t;
41typedef signed short int16_t;
42typedef signed int int32_t;
43#else
44# include <stdint.h>
45#endif
46
47#ifdef WINDOWS32
48# include <io.h>
49# include <process.h>
50# include <Windows.h>
51# define PARSE_IN_WORKER
52#endif
53
54#ifdef __OS2__
55# include <sys/fmutex.h>
56#endif
57
58#ifdef HAVE_PTHREAD
59# include <pthread.h>
60#endif
61
62
63/*******************************************************************************
64* Defined Constants And Macros *
65*******************************************************************************/
66/* The default size of a memory segment (1MB). */
67#define STRCACHE2_SEG_SIZE (1024U*1024U)
68/* The default hash table shift (hash size give as a power of two). */
69#define STRCACHE2_HASH_SHIFT 16
70/** Does the modding / masking of a hash number into an index. */
71#ifdef STRCACHE2_USE_MASK
72# define STRCACHE2_MOD_IT(cache, hash) ((hash) & (cache)->hash_mask)
73#else
74# define STRCACHE2_MOD_IT(cache, hash) ((hash) % (cache)->hash_div)
75#endif
76
77# if ( defined(__amd64__) || defined(__x86_64__) || defined(__AMD64__) || defined(_M_X64) || defined(__amd64) \
78 || defined(__i386__) || defined(__x86__) || defined(__X86__) || defined(_M_IX86) || defined(__i386)) \
79 && !defined(GCC_ADDRESS_SANITIZER)
80# define strcache2_get_unaligned_16bits(ptr) ( *((const uint16_t *)(ptr)))
81# else
82 /* (endian doesn't matter) */
83# define strcache2_get_unaligned_16bits(ptr) ( (((const uint8_t *)(ptr))[0] << 8) \
84 | (((const uint8_t *)(ptr))[1]) )
85# endif
86
87
88/*******************************************************************************
89* Global Variables *
90*******************************************************************************/
91/* List of initialized string caches. */
92static struct strcache2 *strcache_head;
93
94
95#ifndef STRCACHE2_USE_MASK
96/** Finds the closest primary number for power of two value (or something else
97 * useful if not support). */
98MY_INLINE unsigned int strcache2_find_prime(unsigned int shift)
99{
100 switch (shift)
101 {
102 case 5: return 31;
103 case 6: return 61;
104 case 7: return 127;
105 case 8: return 251;
106 case 9: return 509;
107 case 10: return 1021;
108 case 11: return 2039;
109 case 12: return 4093;
110 case 13: return 8191;
111 case 14: return 16381;
112 case 15: return 32749;
113 case 16: return 65521;
114 case 17: return 131063;
115 case 18: return 262139;
116 case 19: return 524269;
117 case 20: return 1048573;
118 case 21: return 2097143;
119 case 22: return 4194301;
120 case 23: return 8388593;
121
122 default:
123 assert (0);
124 return (1 << shift) - 1;
125 }
126}
127#endif
128
129/* The following is a bit experiment. It produces longer chains, i.e. worse
130 distribution of the strings in the table, however the actual make
131 performances is better (<time). The explanation is probably that the
132 collisions only really increase for entries that aren't looked up that
133 much and that it actually improoves the situation for those that is. Or
134 that we spend so much less time hashing that it makes up (and more) for
135 the pentalty we suffer from the longer chains and worse distribution.
136
137 XXX: Check how this works out with different path lengths. I suspect it
138 might depend on the length of PATH_ROOT and the depth of the files
139 in the project as well. If it does, this might make matters worse
140 for some and better for others which isn't very cool... */
141
142#if 0
143# define BIG_HASH_SIZE 32 /* kinda fast */
144# define BIG_HASH_HEAD 16
145# define BIG_HASH_TAIL 12
146#elif 0
147# define BIG_HASH_SIZE 68 /* kinda safe */
148# define BIG_HASH_HEAD 24
149# define BIG_HASH_TAIL 24
150#elif 0
151# define BIG_HASH_SIZE 128 /* safe */
152# define BIG_HASH_HEAD 32
153# define BIG_HASH_TAIL 32
154#endif
155
156#ifdef BIG_HASH_SIZE
157/* long string: hash head and tail, drop the middle. */
158MY_INLINE unsigned int
159strcache2_case_sensitive_hash_big (const char *str, unsigned int len)
160{
161 uint32_t hash = len;
162 uint32_t tmp;
163 unsigned int head;
164
165 /* head BIG_HASH_HEAD bytes */
166 head = (BIG_HASH_HEAD >> 2);
167 while (head-- > 0)
168 {
169 hash += strcache2_get_unaligned_16bits (str);
170 tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
171 hash = (hash << 16) ^ tmp;
172 str += 2 * sizeof (uint16_t);
173 hash += hash >> 11;
174 }
175
176 /* tail BIG_HASH_TAIL bytes (minus the odd ones) */
177 str += (len - BIG_HASH_HEAD - BIG_HASH_TAIL) & ~3U;
178 head = (BIG_HASH_TAIL >> 2);
179 while (head-- > 0)
180 {
181 hash += strcache2_get_unaligned_16bits (str);
182 tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
183 hash = (hash << 16) ^ tmp;
184 str += 2 * sizeof (uint16_t);
185 hash += hash >> 11;
186 }
187
188 /* force "avalanching" of final 127 bits. */
189 hash ^= hash << 3;
190 hash += hash >> 5;
191 hash ^= hash << 4;
192 hash += hash >> 17;
193 hash ^= hash << 25;
194 hash += hash >> 6;
195
196 return hash;
197}
198#endif /* BIG_HASH_SIZE */
199
200MY_INLINE unsigned int
201strcache2_case_sensitive_hash (const char *str, unsigned int len)
202{
203#if 1
204 /* Paul Hsieh hash SuperFast function:
205 http://www.azillionmonkeys.com/qed/hash.html
206
207 This performs very good and as a sligtly better distribution than
208 STRING_N_HASH_1 on a typical kBuild run.
209
210 It is also 37% faster than return_STRING_N_HASH_1 when running the
211 two 100 times over typical kBuild strings that end up here (did a
212 fprintf here and built kBuild). Compiler was 32-bit gcc 4.0.1, darwin,
213 with -O2.
214
215 FIXME: A path for well aligned data should be added to speed up
216 execution on alignment sensitive systems. */
217 unsigned int rem;
218 uint32_t hash;
219 uint32_t tmp;
220
221 assert (sizeof (uint8_t) == sizeof (char));
222
223# ifdef BIG_HASH_SIZE
224 /* long string? */
225# if 0 /*BIG_HASH_SIZE > 128*/
226 if (MY_PREDICT_FALSE(len >= BIG_HASH_SIZE))
227# else
228 if (len >= BIG_HASH_SIZE)
229# endif
230 return strcache2_case_sensitive_hash_big (str, len);
231# endif
232
233 /* short string: main loop, walking on 2 x uint16_t */
234 hash = len;
235 rem = len & 3;
236 len >>= 2;
237 while (len > 0)
238 {
239 hash += strcache2_get_unaligned_16bits (str);
240 tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
241 hash = (hash << 16) ^ tmp;
242 str += 2 * sizeof (uint16_t);
243 hash += hash >> 11;
244 len--;
245 }
246
247 /* the remainder */
248 switch (rem)
249 {
250 case 3:
251 hash += strcache2_get_unaligned_16bits (str);
252 hash ^= hash << 16;
253 hash ^= str[sizeof (uint16_t)] << 18;
254 hash += hash >> 11;
255 break;
256 case 2:
257 hash += strcache2_get_unaligned_16bits (str);
258 hash ^= hash << 11;
259 hash += hash >> 17;
260 break;
261 case 1:
262 hash += *str;
263 hash ^= hash << 10;
264 hash += hash >> 1;
265 break;
266 }
267
268 /* force "avalanching" of final 127 bits. */
269 hash ^= hash << 3;
270 hash += hash >> 5;
271 hash ^= hash << 4;
272 hash += hash >> 17;
273 hash ^= hash << 25;
274 hash += hash >> 6;
275
276 return hash;
277
278#elif 1
279 /* Note! This implementation is 18% faster than return_STRING_N_HASH_1
280 when running the two 100 times over typical kBuild strings that
281 end up here (did a fprintf here and built kBuild).
282 Compiler was 32-bit gcc 4.0.1, darwin, with -O2. */
283
284 unsigned int hash = 0;
285 if (MY_PREDICT_TRUE(len >= 2))
286 {
287 unsigned int ch0 = *str++;
288 hash = 0;
289 len--;
290 while (len >= 2)
291 {
292 unsigned int ch1 = *str++;
293 hash += ch0 << (ch1 & 0xf);
294
295 ch0 = *str++;
296 hash += ch1 << (ch0 & 0xf);
297
298 len -= 2;
299 }
300 if (len == 1)
301 {
302 unsigned ch1 = *str;
303 hash += ch0 << (ch1 & 0xf);
304
305 hash += ch1;
306 }
307 else
308 hash += ch0;
309 }
310 else if (len)
311 {
312 hash = *str;
313 hash += hash << (hash & 0xf);
314 }
315 else
316 hash = 0;
317 return hash;
318
319#elif 1
320# if 0
321 /* This is SDBM. This specific form/unroll was benchmarked to be 28% faster
322 than return_STRING_N_HASH_1. (Now the weird thing is that putting the (ch)
323 first in the assignment made it noticably slower.)
324
325 However, it is noticably slower in practice, most likely because of more
326 collisions. Hrmpf. */
327
328# define UPDATE_HASH(ch) hash = (hash << 6) + (hash << 16) - hash + (ch)
329 unsigned int hash = 0;
330
331# else
332 /* This is DJB2. This specific form/unroll was benchmarked to be 27%
333 fast than return_STRING_N_HASH_1.
334
335 Ditto. */
336
337# define UPDATE_HASH(ch) hash = (hash << 5) + hash + (ch)
338 unsigned int hash = 5381;
339# endif
340
341
342 while (len >= 4)
343 {
344 UPDATE_HASH (str[0]);
345 UPDATE_HASH (str[1]);
346 UPDATE_HASH (str[2]);
347 UPDATE_HASH (str[3]);
348 str += 4;
349 len -= 4;
350 }
351 switch (len)
352 {
353 default:
354 case 0:
355 return hash;
356 case 1:
357 UPDATE_HASH (str[0]);
358 return hash;
359 case 2:
360 UPDATE_HASH (str[0]);
361 UPDATE_HASH (str[1]);
362 return hash;
363 case 3:
364 UPDATE_HASH (str[0]);
365 UPDATE_HASH (str[1]);
366 UPDATE_HASH (str[2]);
367 return hash;
368 }
369#endif
370}
371
372MY_INLINE unsigned int
373strcache2_case_insensitive_hash (const char *str, unsigned int len)
374{
375 unsigned int hash = 0;
376 if (MY_PREDICT_TRUE(len >= 2))
377 {
378 unsigned int ch0 = *str++;
379 ch0 = tolower (ch0);
380 hash = 0;
381 len--;
382 while (len >= 2)
383 {
384 unsigned int ch1 = *str++;
385 ch1 = tolower (ch1);
386 hash += ch0 << (ch1 & 0xf);
387
388 ch0 = *str++;
389 ch0 = tolower (ch0);
390 hash += ch1 << (ch0 & 0xf);
391
392 len -= 2;
393 }
394 if (len == 1)
395 {
396 unsigned ch1 = *str;
397 ch1 = tolower (ch1);
398 hash += ch0 << (ch1 & 0xf);
399
400 hash += ch1;
401 }
402 else
403 hash += ch0;
404 }
405 else if (len)
406 {
407 hash = *str;
408 hash += hash << (hash & 0xf);
409 }
410 else
411 hash = 0;
412 return hash;
413}
414
415#if 0
416MY_INLINE int
417strcache2_memcmp_inline_short (const char *xs, const char *ys, unsigned int length)
418{
419 if (length <= 8)
420 {
421 /* short string compare - ~50% of the kBuild calls. */
422 assert ( !((size_t)ys & 3) );
423 if (!((size_t)xs & 3))
424 {
425 /* aligned */
426 int result;
427 switch (length)
428 {
429 default: /* memcmp for longer strings */
430 return memcmp (xs, ys, length);
431 case 8:
432 result = *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
433 result |= *(int32_t*)xs - *(int32_t*)ys;
434 return result;
435 case 7:
436 result = xs[6] - ys[6];
437 result |= xs[5] - ys[5];
438 result |= xs[4] - ys[4];
439 result |= *(int32_t*)xs - *(int32_t*)ys;
440 return result;
441 case 6:
442 result = xs[5] - ys[5];
443 result |= xs[4] - ys[4];
444 result |= *(int32_t*)xs - *(int32_t*)ys;
445 return result;
446 case 5:
447 result = xs[4] - ys[4];
448 result |= *(int32_t*)xs - *(int32_t*)ys;
449 return result;
450 case 4:
451 return *(int32_t*)xs - *(int32_t*)ys;
452 case 3:
453 result = xs[2] - ys[2];
454 result |= xs[1] - ys[1];
455 result |= xs[0] - ys[0];
456 return result;
457 case 2:
458 result = xs[1] - ys[1];
459 result |= xs[0] - ys[0];
460 return result;
461 case 1:
462 return *xs - *ys;
463 case 0:
464 return 0;
465 }
466 }
467 else
468 {
469 /* unaligned */
470 int result = 0;
471 switch (length)
472 {
473 case 8: result |= xs[7] - ys[7];
474 case 7: result |= xs[6] - ys[6];
475 case 6: result |= xs[5] - ys[5];
476 case 5: result |= xs[4] - ys[4];
477 case 4: result |= xs[3] - ys[3];
478 case 3: result |= xs[2] - ys[2];
479 case 2: result |= xs[1] - ys[1];
480 case 1: result |= xs[0] - ys[0];
481 case 0:
482 return result;
483 }
484 }
485 }
486
487 /* memcmp for longer strings */
488 return memcmp (xs, ys, length);
489}
490#endif
491
492MY_INLINE int
493strcache2_memcmp_inlined (const char *xs, const char *ys, unsigned int length)
494{
495#ifndef ELECTRIC_HEAP
496 assert ( !((size_t)ys & 3) );
497#endif
498 if (!((size_t)xs & 3))
499 {
500 int result;
501 /* aligned */
502 while (length >= 8)
503 {
504 result = *(int32_t*)xs - *(int32_t*)ys;
505 result |= *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
506 if (MY_PREDICT_FALSE(result))
507 return result;
508 xs += 8;
509 ys += 8;
510 length -= 8;
511 }
512 switch (length)
513 {
514 case 7:
515 result = *(int32_t*)xs - *(int32_t*)ys;
516 result |= xs[6] - ys[6];
517 result |= xs[5] - ys[5];
518 result |= xs[4] - ys[4];
519 return result;
520 case 6:
521 result = *(int32_t*)xs - *(int32_t*)ys;
522 result |= xs[5] - ys[5];
523 result |= xs[4] - ys[4];
524 return result;
525 case 5:
526 result = *(int32_t*)xs - *(int32_t*)ys;
527 result |= xs[4] - ys[4];
528 return result;
529 case 4:
530 return *(int32_t*)xs - *(int32_t*)ys;
531 case 3:
532 result = xs[2] - ys[2];
533 result |= xs[1] - ys[1];
534 result |= xs[0] - ys[0];
535 return result;
536 case 2:
537 result = xs[1] - ys[1];
538 result |= xs[0] - ys[0];
539 return result;
540 case 1:
541 return *xs - *ys;
542 default:
543 case 0:
544 return 0;
545 }
546 }
547 else
548 {
549 /* unaligned */
550 int result;
551 while (length >= 8)
552 {
553#if defined(__i386__) || defined(__x86_64__)
554 result = ( ((int32_t)xs[3] << 24)
555 | ((int32_t)xs[2] << 16)
556 | ((int32_t)xs[1] << 8)
557 | xs[0] )
558 - *(int32_t*)ys;
559 result |= ( ((int32_t)xs[7] << 24)
560 | ((int32_t)xs[6] << 16)
561 | ((int32_t)xs[5] << 8)
562 | xs[4] )
563 - *(int32_t*)(ys + 4);
564#else
565 result = xs[3] - ys[3];
566 result |= xs[2] - ys[2];
567 result |= xs[1] - ys[1];
568 result |= xs[0] - ys[0];
569 result |= xs[7] - ys[7];
570 result |= xs[6] - ys[6];
571 result |= xs[5] - ys[5];
572 result |= xs[4] - ys[4];
573#endif
574 if (MY_PREDICT_FALSE(result))
575 return result;
576 xs += 8;
577 ys += 8;
578 length -= 8;
579 }
580 result = 0;
581 switch (length)
582 {
583 case 7: result |= xs[6] - ys[6]; /* fall thru */
584 case 6: result |= xs[5] - ys[5]; /* fall thru */
585 case 5: result |= xs[4] - ys[4]; /* fall thru */
586 case 4: result |= xs[3] - ys[3]; /* fall thru */
587 case 3: result |= xs[2] - ys[2]; /* fall thru */
588 case 2: result |= xs[1] - ys[1]; /* fall thru */
589 case 1: result |= xs[0] - ys[0]; /* fall thru */
590 return result;
591 default:
592 case 0:
593 return 0;
594 }
595 }
596}
597
598MY_INLINE int
599strcache2_is_equal (struct strcache2 *cache, struct strcache2_entry const *entry,
600 const char *str, unsigned int length, unsigned int hash)
601{
602 assert (!cache->case_insensitive);
603
604 /* the simple stuff first. */
605 if ( entry->hash != hash
606 || entry->length != length)
607 return 0;
608
609#if 0
610 return memcmp (str, entry + 1, length) == 0;
611#elif 1
612 return strcache2_memcmp_inlined (str, (const char *)(entry + 1), length) == 0;
613#else
614 return strcache2_memcmp_inline_short (str, (const char *)(entry + 1), length) == 0;
615#endif
616}
617
618#if defined(HAVE_CASE_INSENSITIVE_FS)
619MY_INLINE int
620strcache2_is_iequal (struct strcache2 *cache, struct strcache2_entry const *entry,
621 const char *str, unsigned int length, unsigned int hash)
622{
623 assert (cache->case_insensitive);
624
625 /* the simple stuff first. */
626 if ( entry->hash != hash
627 || entry->length != length)
628 return 0;
629
630# if defined(_MSC_VER) || defined(__OS2__)
631 return _memicmp (entry + 1, str, length) == 0;
632# else
633 return strncasecmp ((const char *)(entry + 1), str, length) == 0;
634# endif
635}
636#endif /* HAVE_CASE_INSENSITIVE_FS */
637
638static void
639strcache2_rehash (struct strcache2 *cache)
640{
641 unsigned int src = cache->hash_size;
642 struct strcache2_entry **src_tab = cache->hash_tab;
643 struct strcache2_entry **dst_tab;
644#ifndef STRCACHE2_USE_MASK
645 unsigned int hash_shift;
646#endif
647
648 /* Allocate a new hash table twice the size of the current. */
649 cache->hash_size <<= 1;
650#ifdef STRCACHE2_USE_MASK
651 cache->hash_mask <<= 1;
652 cache->hash_mask |= 1;
653#else
654 for (hash_shift = 1; (1U << hash_shift) < cache->hash_size; hash_shift++)
655 /* nothing */;
656 cache->hash_div = strcache2_find_prime (hash_shift);
657#endif
658 cache->rehash_count <<= 1;
659 cache->hash_tab = dst_tab = (struct strcache2_entry **)
660 xmalloc (cache->hash_size * sizeof (struct strcache2_entry *));
661 memset (dst_tab, '\0', cache->hash_size * sizeof (struct strcache2_entry *));
662
663 /* Copy the entries from the old to the new hash table. */
664 cache->collision_count = 0;
665 while (src-- > 0)
666 {
667 struct strcache2_entry *entry = src_tab[src];
668 while (entry)
669 {
670 struct strcache2_entry *next = entry->next;
671 unsigned int dst = STRCACHE2_MOD_IT (cache, entry->hash);
672 if ((entry->next = dst_tab[dst]) != 0)
673 cache->collision_count++;
674 dst_tab[dst] = entry;
675
676 entry = next;
677 }
678 }
679
680 /* That's it, just free the old table and we're done. */
681 free (src_tab);
682}
683
684static struct strcache2_seg *
685strcache2_new_seg (struct strcache2 *cache, unsigned int minlen)
686{
687 struct strcache2_seg *seg;
688 size_t size;
689 size_t off;
690
691 size = cache->def_seg_size;
692 if (size < (size_t)minlen + sizeof (struct strcache2_seg) + STRCACHE2_ENTRY_ALIGNMENT)
693 {
694 size = (size_t)minlen * 2;
695 size = (size + 0xfff) & ~(size_t)0xfff;
696 }
697
698 seg = xmalloc (size);
699 seg->start = (char *)(seg + 1);
700 seg->size = size - sizeof (struct strcache2_seg);
701 off = (size_t)seg->start & (STRCACHE2_ENTRY_ALIGNMENT - 1);
702 if (off)
703 {
704 off = STRCACHE2_ENTRY_ALIGNMENT - off;
705 seg->start += off;
706 seg->size -= off;
707 }
708 assert (seg->size > minlen);
709 seg->cursor = seg->start;
710 seg->avail = seg->size;
711
712 seg->next = cache->seg_head;
713 cache->seg_head = seg;
714
715 return seg;
716}
717
718/* Internal worker that enters a new string into the cache. */
719static const char *
720strcache2_enter_string (struct strcache2 *cache, unsigned int idx,
721 const char *str, unsigned int length,
722 unsigned int hash)
723{
724 struct strcache2_entry *entry;
725 struct strcache2_seg *seg;
726 unsigned int size;
727 char *str_copy;
728
729 /* Allocate space for the string. */
730
731 size = length + 1 + sizeof (struct strcache2_entry);
732 size = (size + STRCACHE2_ENTRY_ALIGNMENT - 1) & ~(STRCACHE2_ENTRY_ALIGNMENT - 1U);
733
734 seg = cache->seg_head;
735 if (MY_PREDICT_FALSE(seg->avail < size))
736 {
737 do
738 seg = seg->next;
739 while (seg && seg->avail < size);
740 if (!seg)
741 seg = strcache2_new_seg (cache, size);
742 }
743
744 entry = (struct strcache2_entry *) seg->cursor;
745 assert (!((size_t)entry & (STRCACHE2_ENTRY_ALIGNMENT - 1)));
746 seg->cursor += size;
747 seg->avail -= size;
748
749 /* Setup the entry, copy the string and insert it into the hash table. */
750
751 entry->user = NULL;
752 entry->length = length;
753 entry->hash = hash;
754 str_copy = (char *) memcpy (entry + 1, str, length);
755 str_copy[length] = '\0';
756
757 if ((entry->next = cache->hash_tab[idx]) != 0)
758 cache->collision_count++;
759 cache->hash_tab[idx] = entry;
760 cache->count++;
761 if (cache->count >= cache->rehash_count)
762 strcache2_rehash (cache);
763
764 return str_copy;
765}
766
767/* The public add string interface. */
768const char *
769strcache2_add (struct strcache2 *cache, const char *str, unsigned int length)
770{
771 struct strcache2_entry const *entry;
772 unsigned int hash = strcache2_case_sensitive_hash (str, length);
773 unsigned int idx;
774
775 assert (!cache->case_insensitive);
776 assert (!memchr (str, '\0', length));
777
778 MAKE_STATS (cache->lookup_count++);
779
780 /* Lookup the entry in the hash table, hoping for an
781 early match. If not found, enter the string at IDX. */
782 idx = STRCACHE2_MOD_IT (cache, hash);
783 entry = cache->hash_tab[idx];
784 if (!entry)
785 return strcache2_enter_string (cache, idx, str, length, hash);
786 if (strcache2_is_equal (cache, entry, str, length, hash))
787 return (const char *)(entry + 1);
788 MAKE_STATS (cache->collision_1st_count++);
789
790 entry = entry->next;
791 if (!entry)
792 return strcache2_enter_string (cache, idx, str, length, hash);
793 if (strcache2_is_equal (cache, entry, str, length, hash))
794 return (const char *)(entry + 1);
795 MAKE_STATS (cache->collision_2nd_count++);
796
797 /* Loop the rest. */
798 for (;;)
799 {
800 entry = entry->next;
801 if (!entry)
802 return strcache2_enter_string (cache, idx, str, length, hash);
803 if (strcache2_is_equal (cache, entry, str, length, hash))
804 return (const char *)(entry + 1);
805 MAKE_STATS (cache->collision_3rd_count++);
806 }
807 /* not reached */
808}
809
810/* The public add string interface for prehashed strings.
811 Use strcache2_hash_str to calculate the hash of a string. */
812const char *
813strcache2_add_hashed (struct strcache2 *cache, const char *str,
814 unsigned int length, unsigned int hash)
815{
816 struct strcache2_entry const *entry;
817 unsigned int idx;
818#ifndef NDEBUG
819 unsigned correct_hash;
820
821 assert (!cache->case_insensitive);
822 assert (!memchr (str, '\0', length));
823 correct_hash = strcache2_case_sensitive_hash (str, length);
824 MY_ASSERT_MSG (hash == correct_hash, ("%#x != %#x\n", hash, correct_hash));
825#endif /* NDEBUG */
826
827 MAKE_STATS (cache->lookup_count++);
828
829 /* Lookup the entry in the hash table, hoping for an
830 early match. If not found, enter the string at IDX. */
831 idx = STRCACHE2_MOD_IT (cache, hash);
832 entry = cache->hash_tab[idx];
833 if (!entry)
834 return strcache2_enter_string (cache, idx, str, length, hash);
835 if (strcache2_is_equal (cache, entry, str, length, hash))
836 return (const char *)(entry + 1);
837 MAKE_STATS (cache->collision_1st_count++);
838
839 entry = entry->next;
840 if (!entry)
841 return strcache2_enter_string (cache, idx, str, length, hash);
842 if (strcache2_is_equal (cache, entry, str, length, hash))
843 return (const char *)(entry + 1);
844 MAKE_STATS (cache->collision_2nd_count++);
845
846 /* Loop the rest. */
847 for (;;)
848 {
849 entry = entry->next;
850 if (!entry)
851 return strcache2_enter_string (cache, idx, str, length, hash);
852 if (strcache2_is_equal (cache, entry, str, length, hash))
853 return (const char *)(entry + 1);
854 MAKE_STATS (cache->collision_3rd_count++);
855 }
856 /* not reached */
857}
858
859/* The public lookup (case sensitive) string interface. */
860const char *
861strcache2_lookup (struct strcache2 *cache, const char *str, unsigned int length)
862{
863 struct strcache2_entry const *entry;
864 unsigned int hash = strcache2_case_sensitive_hash (str, length);
865 unsigned int idx;
866
867 assert (!cache->case_insensitive);
868 assert (!memchr (str, '\0', length));
869
870 MAKE_STATS (cache->lookup_count++);
871
872 /* Lookup the entry in the hash table, hoping for an
873 early match. */
874 idx = STRCACHE2_MOD_IT (cache, hash);
875 entry = cache->hash_tab[idx];
876 if (!entry)
877 return NULL;
878 if (strcache2_is_equal (cache, entry, str, length, hash))
879 return (const char *)(entry + 1);
880 MAKE_STATS (cache->collision_1st_count++);
881
882 entry = entry->next;
883 if (!entry)
884 return NULL;
885 if (strcache2_is_equal (cache, entry, str, length, hash))
886 return (const char *)(entry + 1);
887 MAKE_STATS (cache->collision_2nd_count++);
888
889 /* Loop the rest. */
890 for (;;)
891 {
892 entry = entry->next;
893 if (!entry)
894 return NULL;
895 if (strcache2_is_equal (cache, entry, str, length, hash))
896 return (const char *)(entry + 1);
897 MAKE_STATS (cache->collision_3rd_count++);
898 }
899 /* not reached */
900}
901
902#if defined(HAVE_CASE_INSENSITIVE_FS)
903
904/* The public add string interface for case insensitive strings. */
905const char *
906strcache2_iadd (struct strcache2 *cache, const char *str, unsigned int length)
907{
908 struct strcache2_entry const *entry;
909 unsigned int hash = strcache2_case_insensitive_hash (str, length);
910 unsigned int idx;
911
912 assert (cache->case_insensitive);
913 assert (!memchr (str, '\0', length));
914
915 MAKE_STATS (cache->lookup_count++);
916
917 /* Lookup the entry in the hash table, hoping for an
918 early match. If not found, enter the string at IDX. */
919 idx = STRCACHE2_MOD_IT (cache, hash);
920 entry = cache->hash_tab[idx];
921 if (!entry)
922 return strcache2_enter_string (cache, idx, str, length, hash);
923 if (strcache2_is_iequal (cache, entry, str, length, hash))
924 return (const char *)(entry + 1);
925 MAKE_STATS (cache->collision_1st_count++);
926
927 entry = entry->next;
928 if (!entry)
929 return strcache2_enter_string (cache, idx, str, length, hash);
930 if (strcache2_is_iequal (cache, entry, str, length, hash))
931 return (const char *)(entry + 1);
932 MAKE_STATS (cache->collision_2nd_count++);
933
934 /* Loop the rest. */
935 for (;;)
936 {
937 entry = entry->next;
938 if (!entry)
939 return strcache2_enter_string (cache, idx, str, length, hash);
940 if (strcache2_is_iequal (cache, entry, str, length, hash))
941 return (const char *)(entry + 1);
942 MAKE_STATS (cache->collision_3rd_count++);
943 }
944 /* not reached */
945}
946
947/* The public add string interface for prehashed case insensitive strings.
948 Use strcache2_hash_istr to calculate the hash of a string. */
949const char *
950strcache2_iadd_hashed (struct strcache2 *cache, const char *str,
951 unsigned int length, unsigned int hash)
952{
953 struct strcache2_entry const *entry;
954 unsigned int idx;
955#ifndef NDEBUG
956 unsigned correct_hash;
957
958 assert (cache->case_insensitive);
959 assert (!memchr (str, '\0', length));
960 correct_hash = strcache2_case_insensitive_hash (str, length);
961 MY_ASSERT_MSG (hash == correct_hash, ("%#x != %#x\n", hash, correct_hash));
962#endif /* NDEBUG */
963
964 MAKE_STATS (cache->lookup_count++);
965
966 /* Lookup the entry in the hash table, hoping for an
967 early match. If not found, enter the string at IDX. */
968 idx = STRCACHE2_MOD_IT (cache, hash);
969 entry = cache->hash_tab[idx];
970 if (!entry)
971 return strcache2_enter_string (cache, idx, str, length, hash);
972 if (strcache2_is_iequal (cache, entry, str, length, hash))
973 return (const char *)(entry + 1);
974 MAKE_STATS (cache->collision_1st_count++);
975
976 entry = entry->next;
977 if (!entry)
978 return strcache2_enter_string (cache, idx, str, length, hash);
979 if (strcache2_is_iequal (cache, entry, str, length, hash))
980 return (const char *)(entry + 1);
981 MAKE_STATS (cache->collision_2nd_count++);
982
983 /* Loop the rest. */
984 for (;;)
985 {
986 entry = entry->next;
987 if (!entry)
988 return strcache2_enter_string (cache, idx, str, length, hash);
989 if (strcache2_is_iequal (cache, entry, str, length, hash))
990 return (const char *)(entry + 1);
991 MAKE_STATS (cache->collision_3rd_count++);
992 }
993 /* not reached */
994}
995
996/* The public lookup (case insensitive) string interface. */
997const char *
998strcache2_ilookup (struct strcache2 *cache, const char *str, unsigned int length)
999{
1000 struct strcache2_entry const *entry;
1001 unsigned int hash = strcache2_case_insensitive_hash (str, length);
1002 unsigned int idx;
1003
1004 assert (cache->case_insensitive);
1005 assert (!memchr (str, '\0', length));
1006
1007 MAKE_STATS (cache->lookup_count++);
1008
1009 /* Lookup the entry in the hash table, hoping for an
1010 early match. */
1011 idx = STRCACHE2_MOD_IT (cache, hash);
1012 entry = cache->hash_tab[idx];
1013 if (!entry)
1014 return NULL;
1015 if (strcache2_is_iequal (cache, entry, str, length, hash))
1016 return (const char *)(entry + 1);
1017 MAKE_STATS (cache->collision_1st_count++);
1018
1019 entry = entry->next;
1020 if (!entry)
1021 return NULL;
1022 if (strcache2_is_iequal (cache, entry, str, length, hash))
1023 return (const char *)(entry + 1);
1024 MAKE_STATS (cache->collision_2nd_count++);
1025
1026 /* Loop the rest. */
1027 for (;;)
1028 {
1029 entry = entry->next;
1030 if (!entry)
1031 return NULL;
1032 if (strcache2_is_iequal (cache, entry, str, length, hash))
1033 return (const char *)(entry + 1);
1034 MAKE_STATS (cache->collision_3rd_count++);
1035 }
1036 /* not reached */
1037}
1038
1039#endif /* HAVE_CASE_INSENSITIVE_FS */
1040
1041/* Is the given string cached? returns 1 if it is, 0 if it isn't. */
1042int
1043strcache2_is_cached (struct strcache2 *cache, const char *str)
1044{
1045 /* Check mandatory alignment first. */
1046 if (!((size_t)str & (sizeof (void *) - 1)))
1047 {
1048 /* Check the segment list and consider the question answered if the
1049 string is within one of them. (Could check it more thoroughly...) */
1050 struct strcache2_seg const *seg;
1051 for (seg = cache->seg_head; seg; seg = seg->next)
1052 if ((size_t)(str - seg->start) < seg->size)
1053 return 1;
1054 }
1055
1056 return 0;
1057}
1058
1059
1060/* Verify the integrity of the specified string, returning 0 if OK. */
1061int
1062strcache2_verify_entry (struct strcache2 *cache, const char *str)
1063{
1064 struct strcache2_entry const *entry;
1065 unsigned int hash;
1066 unsigned int length;
1067 const char *end;
1068
1069 entry = (struct strcache2_entry const *)str - 1;
1070 if ((size_t)entry & (STRCACHE2_ENTRY_ALIGNMENT - 1))
1071 {
1072 fprintf (stderr,
1073 "strcache2[%s]: missaligned entry %p\nstring: %p=%s\n",
1074 cache->name, (void *)entry, (void *)str, str);
1075 return -1;
1076 }
1077
1078 end = memchr (str, '\0', entry->length + 1);
1079 length = end - str;
1080 if (length != entry->length)
1081 {
1082 fprintf (stderr,
1083 "strcache2[%s]: corrupt entry %p, length: %u, expected %u;\nstring: %s\n",
1084 cache->name, (void *)entry, length, entry->length, str);
1085 return -1;
1086 }
1087
1088 hash = cache->case_insensitive
1089 ? strcache2_case_insensitive_hash (str, entry->length)
1090 : strcache2_case_sensitive_hash (str, entry->length);
1091 if (hash != entry->hash)
1092 {
1093 fprintf (stderr,
1094 "strcache2[%s]: corrupt entry %p, hash: %x, expected %x;\nstring: %s\n",
1095 cache->name, (void *)entry, hash, entry->hash, str);
1096 return -1;
1097 }
1098
1099 return 0;
1100}
1101
1102
1103/* Calculates the case sensitive hash values of the string.
1104 The first hash is returned, the other is put at HASH2P. */
1105unsigned int strcache2_hash_str (const char *str, unsigned int length, unsigned int *hash2p)
1106{
1107 *hash2p = 1;
1108 return strcache2_case_sensitive_hash (str, length);
1109}
1110
1111/* Calculates the case insensitive hash values of the string.
1112 The first hash is returned, the other is put at HASH2P. */
1113unsigned int strcache2_hash_istr (const char *str, unsigned int length, unsigned int *hash2p)
1114{
1115 *hash2p = 1;
1116 return strcache2_case_insensitive_hash (str, length);
1117}
1118
1119
1120
1121/* Initalizes a new cache. */
1122void
1123strcache2_init (struct strcache2 *cache, const char *name, unsigned int size,
1124 unsigned int def_seg_size, int case_insensitive, int thread_safe)
1125{
1126 unsigned hash_shift;
1127 assert (!thread_safe);
1128
1129 /* calc the size as a power of two */
1130 if (!size)
1131 hash_shift = STRCACHE2_HASH_SHIFT;
1132 else
1133 {
1134 assert (size <= (~0U / 2 + 1));
1135 for (hash_shift = 8; (1U << hash_shift) < size; hash_shift++)
1136 /* nothing */;
1137 }
1138
1139 /* adjust the default segment size */
1140 if (!def_seg_size)
1141 def_seg_size = STRCACHE2_SEG_SIZE;
1142 else if (def_seg_size < sizeof (struct strcache2_seg) * 10)
1143 def_seg_size = sizeof (struct strcache2_seg) * 10;
1144 else if ((def_seg_size & 0xfff) < 0xf00)
1145 def_seg_size = ((def_seg_size + 0xfff) & ~0xfffU);
1146
1147
1148 /* init the structure. */
1149 cache->case_insensitive = case_insensitive;
1150#ifdef STRCACHE2_USE_MASK
1151 cache->hash_mask = (1U << hash_shift) - 1U;
1152#else
1153 cache->hash_div = strcache2_find_prime(hash_shift);
1154#endif
1155 cache->count = 0;
1156 cache->collision_count = 0;
1157 cache->lookup_count = 0;
1158 cache->collision_1st_count = 0;
1159 cache->collision_2nd_count = 0;
1160 cache->collision_3rd_count = 0;
1161 cache->rehash_count = (1U << hash_shift) / 4 * 3; /* rehash at 75% */
1162 cache->init_size = 1U << hash_shift;
1163 cache->hash_size = 1U << hash_shift;
1164 cache->def_seg_size = def_seg_size;
1165 cache->lock = NULL;
1166 cache->name = name;
1167
1168 /* allocate the hash table and first segment. */
1169 cache->hash_tab = (struct strcache2_entry **)
1170 xmalloc (cache->init_size * sizeof (struct strcache2_entry *));
1171 memset (cache->hash_tab, '\0', cache->init_size * sizeof (struct strcache2_entry *));
1172 strcache2_new_seg (cache, 0);
1173
1174 /* link it */
1175 cache->next = strcache_head;
1176 strcache_head = cache;
1177}
1178
1179
1180/* Terminates a string cache, freeing all memory and other
1181 associated resources. */
1182void
1183strcache2_term (struct strcache2 *cache)
1184{
1185 /* unlink it */
1186 if (strcache_head == cache)
1187 strcache_head = cache->next;
1188 else
1189 {
1190 struct strcache2 *prev = strcache_head;
1191 while (prev->next != cache)
1192 prev = prev->next;
1193 assert (prev);
1194 prev->next = cache->next;
1195 }
1196
1197 /* free the memory segments */
1198 do
1199 {
1200 void *free_it = cache->seg_head;
1201 cache->seg_head = cache->seg_head->next;
1202 free (free_it);
1203 }
1204 while (cache->seg_head);
1205
1206 /* free the hash and clear the structure. */
1207 free (cache->hash_tab);
1208 memset (cache, '\0', sizeof (struct strcache2));
1209}
1210
1211/* Print statistics a string cache. */
1212void
1213strcache2_print_stats (struct strcache2 *cache, const char *prefix)
1214{
1215 unsigned int seg_count = 0;
1216 unsigned long seg_total_bytes = 0;
1217 unsigned long seg_avg_bytes;
1218 unsigned long seg_avail_bytes = 0;
1219 unsigned long seg_max_bytes = 0;
1220 struct strcache2_seg *seg;
1221 unsigned int str_count = 0;
1222 unsigned long str_total_len = 0;
1223 unsigned int str_avg_len;
1224 unsigned int str_min_len = ~0U;
1225 unsigned int str_max_len = 0;
1226 unsigned int idx;
1227 unsigned int rehashes;
1228 unsigned int chain_depths[32];
1229
1230 printf (_("\n%s strcache2: %s\n"), prefix, cache->name);
1231
1232 /* Segment statistics. */
1233 for (seg = cache->seg_head; seg; seg = seg->next)
1234 {
1235 seg_count++;
1236 seg_total_bytes += seg->size;
1237 seg_avail_bytes += seg->avail;
1238 if (seg->size > seg_max_bytes)
1239 seg_max_bytes = seg->size;
1240 }
1241 seg_avg_bytes = seg_total_bytes / seg_count;
1242 printf (_("%s %u segments: total = %lu / max = %lu / avg = %lu / def = %u avail = %lu\n"),
1243 prefix, seg_count, seg_total_bytes, seg_max_bytes, seg_avg_bytes,
1244 cache->def_seg_size, seg_avail_bytes);
1245
1246 /* String statistics. */
1247 memset (chain_depths, '\0', sizeof (chain_depths));
1248 idx = cache->hash_size;
1249 while (idx-- > 0)
1250 {
1251 struct strcache2_entry const *entry = cache->hash_tab[idx];
1252 unsigned int depth = 0;
1253 for (; entry != 0; entry = entry->next, depth++)
1254 {
1255 unsigned int length = entry->length;
1256 str_total_len += length;
1257 if (length > str_max_len)
1258 str_max_len = length;
1259 if (length < str_min_len)
1260 str_min_len = length;
1261 str_count++;
1262 }
1263 chain_depths[depth >= 32 ? 31 : depth]++;
1264 }
1265 str_avg_len = cache->count ? str_total_len / cache->count : 0;
1266 printf (_("%s %u strings: total len = %lu / max = %u / avg = %u / min = %u\n"),
1267 prefix, cache->count, str_total_len, str_max_len, str_avg_len, str_min_len);
1268 if (str_count != cache->count)
1269 printf (_("%s string count mismatch! cache->count=%u, actual count is %u\n"), prefix,
1270 cache->count, str_count);
1271
1272 /* Hash statistics. */
1273 idx = cache->init_size;
1274 rehashes = 0;
1275 while (idx < cache->hash_size)
1276 {
1277 idx *= 2;
1278 rehashes++;
1279 }
1280
1281#ifdef STRCACHE2_USE_MASK
1282 printf (_("%s hash size = %u mask = %#x rehashed %u times"),
1283 prefix, cache->hash_size, cache->hash_mask, rehashes);
1284#else
1285 printf (_("%s hash size = %u div = %#x rehashed %u times"),
1286 prefix, cache->hash_size, cache->hash_div, rehashes);
1287#endif
1288 if (cache->lookup_count)
1289 printf (_("%s lookups = %lu\n"
1290 "%s hash collisions 1st = %lu (%u%%) 2nd = %lu (%u%%) 3rd = %lu (%u%%)"),
1291 prefix, cache->lookup_count,
1292 prefix,
1293 cache->collision_1st_count, (unsigned int)((100.0 * cache->collision_1st_count) / cache->lookup_count),
1294 cache->collision_2nd_count, (unsigned int)((100.0 * cache->collision_2nd_count) / cache->lookup_count),
1295 cache->collision_3rd_count, (unsigned int)((100.0 * cache->collision_3rd_count) / cache->lookup_count));
1296 printf (_("\n%s hash insert collisions = %u (%u%%)\n"),
1297 prefix, cache->collision_count,(unsigned int)((100.0 * cache->collision_count) / cache->count));
1298 printf (_("%s %5u (%u%%) empty hash table slots\n"),
1299 prefix, chain_depths[0], (unsigned int)((100.0 * chain_depths[0]) / cache->hash_size));
1300 printf (_("%s %5u (%u%%) occupied hash table slots\n"),
1301 prefix, chain_depths[1], (unsigned int)((100.0 * chain_depths[1]) / cache->hash_size));
1302 for (idx = 2; idx < 32; idx++)
1303 {
1304 unsigned strs_at_this_depth = chain_depths[idx];
1305 unsigned i;
1306 for (i = idx + 1; i < 32; i++)
1307 strs_at_this_depth += chain_depths[i];
1308 if (strs_at_this_depth)
1309 printf (_("%s %5u (%2u%%) with %u string%s chained on; %5u (%2u%%) strings at depth %u.\n"),
1310 prefix, chain_depths[idx], (unsigned int)((100.0 * chain_depths[idx]) / (cache->count - cache->collision_count)),
1311 idx - 1, idx == 2 ? " " : "s",
1312 strs_at_this_depth, (unsigned int)((100.0 * strs_at_this_depth) / cache->count), idx - 1);
1313 }
1314}
1315
1316/* Print statistics for all string caches. */
1317void
1318strcache2_print_stats_all (const char *prefix)
1319{
1320 struct strcache2 *cur;
1321 for (cur = strcache_head; cur; cur = cur->next)
1322 strcache2_print_stats (cur, prefix);
1323}
1324
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette