source: rtems/cpukit/libfs/src/rfs/rtems-rfs-dir-hash.c @ 66b8047

4.115
Last change on this file since 66b8047 was 66b8047, checked in by Ralf Corsepius <ralf.corsepius@…>, on 11/06/11 at 12:44:24

Remove stray whitespaces.

  • Property mode set to 100644
File size: 11.5 KB
Line 
1/*
2 *  $Id$
3 */
4/**
5 * @file
6 *
7 * @ingroup rtems-rfs
8 *
9 * RTEMS File Systems Directory Hash function.
10 */
11
12#if HAVE_CONFIG_H
13#include "config.h"
14#endif
15
16#include <rtems/rfs/rtems-rfs-dir-hash.h>
17
18#ifdef __rtems__
19# include <machine/endian.h>    /* attempt to define endianness */
20#endif
21#ifdef linux
22# include <endian.h>    /* attempt to define endianness */
23#endif
24
25/*
26 * My best guess at if you are big-endian or little-endian.  This may
27 * need adjustment.
28 */
29#if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
30     __BYTE_ORDER == __LITTLE_ENDIAN) || \
31  (defined(i386) || defined(__i386__) || defined(__i486__) || \
32   defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
33# define HASH_LITTLE_ENDIAN 1
34# define HASH_BIG_ENDIAN 0
35#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
36       __BYTE_ORDER == __BIG_ENDIAN) || \
37  (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
38# define HASH_LITTLE_ENDIAN 0
39# define HASH_BIG_ENDIAN 1
40#else
41# define HASH_LITTLE_ENDIAN 0
42# define HASH_BIG_ENDIAN 0
43#endif
44
45#define hashsize(n) ((uint32_t)1<<(n))
46#define hashmask(n) (hashsize(n)-1)
47#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
48
49/*
50  -------------------------------------------------------------------------------
51  mix -- mix 3 32-bit values reversibly.
52
53  This is reversible, so any information in (a,b,c) before mix() is still in
54  (a,b,c) after mix().
55
56  If four pairs of (a,b,c) inputs are run through mix(), or through mix() in
57  reverse, there are at least 32 bits of the output that are sometimes the same
58  for one pair and different for another pair.  This was tested for:
59
60  * pairs that differed by one bit, by two bits, in any combination of top bits
61    of (a,b,c), or in any combination of bottom bits of (a,b,c).
62
63  * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed the
64    output delta to a Gray code (a^(a>>1)) so a string of 1's (as is commonly
65    produced by subtraction) look like a single 1-bit difference.
66
67  * the base values were pseudorandom, all zero but one bit set, or all zero
68    plus a counter that starts at zero.
69
70  Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that satisfy this
71  are:
72
73     4  6  8 16 19  4
74     9 15  3 18 27 15
75    14  9  3  7 17  3
76
77  Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing for "differ" defined
78  as + with a one-bit base and a two-bit delta.  I used
79  http://burtleburtle.net/bob/hash/avalanche.html to choose the operations,
80  constants, and arrangements of the variables.
81
82  This does not achieve avalanche.  There are input bits of (a,b,c) that fail
83  to affect some output bits of (a,b,c), especially of a.  The most thoroughly
84  mixed value is c, but it doesn't really even achieve avalanche in c.
85
86  This allows some parallelism.  Read-after-writes are good at doubling the
87  number of bits affected, so the goal of mixing pulls in the opposite
88  direction as the goal of parallelism.  I did what I could.  Rotates seem to
89  cost as much as shifts on every machine I could lay my hands on, and rotates
90  are much kinder to the top and bottom bits, so I used rotates.
91  -------------------------------------------------------------------------------
92*/
93#define mix(a,b,c) \
94  { \
95  a -= c;  a ^= rot(c, 4);  c += b; \
96  b -= a;  b ^= rot(a, 6);  a += c; \
97  c -= b;  c ^= rot(b, 8);  b += a; \
98  a -= c;  a ^= rot(c,16);  c += b; \
99  b -= a;  b ^= rot(a,19);  a += c; \
100  c -= b;  c ^= rot(b, 4);  b += a; \
101  }
102
103/*
104  -------------------------------------------------------------------------------
105  final -- final mixing of 3 32-bit values (a,b,c) into c
106
107  Pairs of (a,b,c) values differing in only a few bits will usually produce
108  values of c that look totally different.  This was tested for
109
110  * pairs that differed by one bit, by two bits, in any combination of top bits
111    of (a,b,c), or in any combination of bottom bits of (a,b,c).
112
113  * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed the
114    output delta to a Gray code (a^(a>>1)) so a string of 1's (as is commonly
115    produced by subtraction) look like a single 1-bit difference.  * the base
116    values were pseudorandom, all zero but one bit set, or all zero plus a
117    counter that starts at zero.
118
119  These constants passed:
120    14 11 25 16 4 14 24
121    12 14 25 16 4 14 24
122  and these came close:
123     4  8 15 26 3 22 24
124    10  8 15 26 3 22 24
125    11  8 15 26 3 22 24
126  -------------------------------------------------------------------------------
127*/
128#define final(a,b,c)        \
129  {                         \
130    c ^= b; c -= rot(b,14); \
131    a ^= c; a -= rot(c,11); \
132    b ^= a; b -= rot(a,25); \
133    c ^= b; c -= rot(b,16); \
134    a ^= c; a -= rot(c,4);  \
135    b ^= a; b -= rot(a,14); \
136    c ^= b; c -= rot(b,24); \
137  }
138
139/**
140 * The follow is the documentation from Bob Jenkin's hash function:
141 *
142 *  http://burtleburtle.net/bob/hash/doobs.html
143 *
144 * The function hashlittle() has been renamed.
145 *
146 *  hashlittle() -- hash a variable-length key into a 32-bit value
147 *
148 *   k       : the key (the unaligned variable-length array of bytes)
149 *   length  : the length of the key, counting by bytes
150 *   initval : can be any 4-byte value
151 *
152 *  Returns a 32-bit value.  Every bit of the key affects every bit of the
153 *  return value.  Two keys differing by one or two bits will have totally
154 *  different hash values.
155 *
156 *  The best hash table sizes are powers of 2.  There is no need to do mod a
157 *  prime (mod is sooo slow!).  If you need less than 32 bits, use a bitmask.
158 *  For example, if you need only 10 bits, do h = (h & hashmask(10)); In which
159 *  case, the hash table should have hashsize(10) elements.
160 *
161 *  If you are hashing n strings (uint8_t **)k, do it like this: for (i=0, h=0;
162 *  i<n; ++i) h = hashlittle( k[i], len[i], h);
163 *
164 *  By Bob Jenkins, 2006.  bob_jenkins@burtleburtle.net.  You may use this
165 *  code any way you wish, private, educational, or commercial.  It's free.
166 *
167 *  Use for hash table lookup, or anything where one collision in 2^^32 is
168 *  acceptable.  Do NOT use for cryptographic purposes.
169*/
170
171#define initval (20010928)
172uint32_t
173rtems_rfs_dir_hash (const void *key, size_t length)
174{
175  uint32_t a,b,c;                             /* internal state */
176  union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
177
178  /* Set up the internal state */
179  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
180
181  u.ptr = key;
182  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
183    const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
184    /*const uint8_t  *k8;*/
185
186    /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
187    while (length > 12)
188    {
189      a += k[0];
190      b += k[1];
191      c += k[2];
192      mix(a,b,c);
193      length -= 12;
194      k += 3;
195    }
196
197    /*----------------------------- handle the last (probably partial) block */
198    /*
199     * "k[2]&0xffffff" actually reads beyond the end of the string, but
200     * then masks off the part it's not allowed to read.  Because the
201     * string is aligned, the masked-off tail is in the same word as the
202     * rest of the string.  Every machine with memory protection I've seen
203     * does it on word boundaries, so is OK with this.  But VALGRIND will
204     * still catch it and complain.  The masking trick does make the hash
205     * noticably faster for short strings (like English words).
206     */
207#ifndef VALGRIND
208
209    switch(length)
210    {
211      case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
212      case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
213      case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
214      case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
215      case 8 : b+=k[1]; a+=k[0]; break;
216      case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
217      case 6 : b+=k[1]&0xffff; a+=k[0]; break;
218      case 5 : b+=k[1]&0xff; a+=k[0]; break;
219      case 4 : a+=k[0]; break;
220      case 3 : a+=k[0]&0xffffff; break;
221      case 2 : a+=k[0]&0xffff; break;
222      case 1 : a+=k[0]&0xff; break;
223      case 0 : return c;              /* zero length strings require no mixing */
224    }
225
226#else /* make valgrind happy */
227
228    k8 = (const uint8_t *)k;
229    switch(length)
230    {
231      case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
232      case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
233      case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
234      case 9 : c+=k8[8];                   /* fall through */
235      case 8 : b+=k[1]; a+=k[0]; break;
236      case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
237      case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
238      case 5 : b+=k8[4];                   /* fall through */
239      case 4 : a+=k[0]; break;
240      case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
241      case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
242      case 1 : a+=k8[0]; break;
243      case 0 : return c;
244    }
245
246#endif /* !valgrind */
247
248  } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
249    const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
250    const uint8_t  *k8;
251
252    /*--------------- all but last block: aligned reads and different mixing */
253    while (length > 12)
254    {
255      a += k[0] + (((uint32_t)k[1])<<16);
256      b += k[2] + (((uint32_t)k[3])<<16);
257      c += k[4] + (((uint32_t)k[5])<<16);
258      mix(a,b,c);
259      length -= 12;
260      k += 6;
261    }
262
263    /*----------------------------- handle the last (probably partial) block */
264    k8 = (const uint8_t *)k;
265    switch(length)
266    {
267      case 12: c+=k[4]+(((uint32_t)k[5])<<16);
268        b+=k[2]+(((uint32_t)k[3])<<16);
269        a+=k[0]+(((uint32_t)k[1])<<16);
270        break;
271      case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
272      case 10: c+=k[4];
273        b+=k[2]+(((uint32_t)k[3])<<16);
274        a+=k[0]+(((uint32_t)k[1])<<16);
275        break;
276      case 9 : c+=k8[8];                      /* fall through */
277      case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
278        a+=k[0]+(((uint32_t)k[1])<<16);
279        break;
280      case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
281      case 6 : b+=k[2];
282        a+=k[0]+(((uint32_t)k[1])<<16);
283        break;
284      case 5 : b+=k8[4];                      /* fall through */
285      case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
286        break;
287      case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
288      case 2 : a+=k[0];
289        break;
290      case 1 : a+=k8[0];
291        break;
292      case 0 : return c;                     /* zero length requires no mixing */
293    }
294
295  } else {                        /* need to read the key one byte at a time */
296    const uint8_t *k = (const uint8_t *)key;
297
298    /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
299    while (length > 12)
300    {
301      a += k[0];
302      a += ((uint32_t)k[1])<<8;
303      a += ((uint32_t)k[2])<<16;
304      a += ((uint32_t)k[3])<<24;
305      b += k[4];
306      b += ((uint32_t)k[5])<<8;
307      b += ((uint32_t)k[6])<<16;
308      b += ((uint32_t)k[7])<<24;
309      c += k[8];
310      c += ((uint32_t)k[9])<<8;
311      c += ((uint32_t)k[10])<<16;
312      c += ((uint32_t)k[11])<<24;
313      mix(a,b,c);
314      length -= 12;
315      k += 12;
316    }
317
318    /*-------------------------------- last block: affect all 32 bits of (c) */
319    switch(length)                   /* all the case statements fall through */
320    {
321      case 12: c+=((uint32_t)k[11])<<24;
322      case 11: c+=((uint32_t)k[10])<<16;
323      case 10: c+=((uint32_t)k[9])<<8;
324      case 9 : c+=k[8];
325      case 8 : b+=((uint32_t)k[7])<<24;
326      case 7 : b+=((uint32_t)k[6])<<16;
327      case 6 : b+=((uint32_t)k[5])<<8;
328      case 5 : b+=k[4];
329      case 4 : a+=((uint32_t)k[3])<<24;
330      case 3 : a+=((uint32_t)k[2])<<16;
331      case 2 : a+=((uint32_t)k[1])<<8;
332      case 1 : a+=k[0];
333        break;
334      case 0 : return c;
335    }
336  }
337
338  final(a,b,c);
339  return c;
340}
341
Note: See TracBrowser for help on using the repository browser.