Context Navigation

source: rtems/cpukit/libfs/src/rfs/rtems-rfs-dir-hash.c @ 66b8047

4.115

Last change on this file since 66b8047 was 66b8047, checked in by Ralf Corsepius <ralf.corsepius@…>, on 11/06/11 at 12:44:24
Remove stray whitespaces.
Property mode set to `100644`
File size: 11.5 KB

Line
1	/*
2	* $Id$
3	*/
4	/**
5	* @file
6	*
7	* @ingroup rtems-rfs
8	*
9	* RTEMS File Systems Directory Hash function.
10	*/
11
12	#if HAVE_CONFIG_H
13	#include "config.h"
14	#endif
15
16	#include <rtems/rfs/rtems-rfs-dir-hash.h>
17
18	#ifdef __rtems__
19	# include <machine/endian.h> /* attempt to define endianness */
20	#endif
21	#ifdef linux
22	# include <endian.h> /* attempt to define endianness */
23	#endif
24
25	/*
26	* My best guess at if you are big-endian or little-endian. This may
27	* need adjustment.
28	*/
29	#if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
30	__BYTE_ORDER == __LITTLE_ENDIAN) \|\| \
31	(defined(i386) \|\| defined(__i386__) \|\| defined(__i486__) \|\| \
32	defined(__i586__) \|\| defined(__i686__) \|\| defined(vax) \|\| defined(MIPSEL))
33	# define HASH_LITTLE_ENDIAN 1
34	# define HASH_BIG_ENDIAN 0
35	#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
36	__BYTE_ORDER == __BIG_ENDIAN) \|\| \
37	(defined(sparc) \|\| defined(POWERPC) \|\| defined(mc68000) \|\| defined(sel))
38	# define HASH_LITTLE_ENDIAN 0
39	# define HASH_BIG_ENDIAN 1
40	#else
41	# define HASH_LITTLE_ENDIAN 0
42	# define HASH_BIG_ENDIAN 0
43	#endif
44
45	#define hashsize(n) ((uint32_t)1<<(n))
46	#define hashmask(n) (hashsize(n)-1)
47	#define rot(x,k) (((x)<<(k)) \| ((x)>>(32-(k))))
48
49	/*
50	-------------------------------------------------------------------------------
51	mix -- mix 3 32-bit values reversibly.
52
53	This is reversible, so any information in (a,b,c) before mix() is still in
54	(a,b,c) after mix().
55
56	If four pairs of (a,b,c) inputs are run through mix(), or through mix() in
57	reverse, there are at least 32 bits of the output that are sometimes the same
58	for one pair and different for another pair. This was tested for:
59
60	* pairs that differed by one bit, by two bits, in any combination of top bits
61	of (a,b,c), or in any combination of bottom bits of (a,b,c).
62
63	* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed the
64	output delta to a Gray code (a^(a>>1)) so a string of 1's (as is commonly
65	produced by subtraction) look like a single 1-bit difference.
66
67	* the base values were pseudorandom, all zero but one bit set, or all zero
68	plus a counter that starts at zero.
69
70	Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that satisfy this
71	are:
72
73	4 6 8 16 19 4
74	9 15 3 18 27 15
75	14 9 3 7 17 3
76
77	Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing for "differ" defined
78	as + with a one-bit base and a two-bit delta. I used
79	http://burtleburtle.net/bob/hash/avalanche.html to choose the operations,
80	constants, and arrangements of the variables.
81
82	This does not achieve avalanche. There are input bits of (a,b,c) that fail
83	to affect some output bits of (a,b,c), especially of a. The most thoroughly
84	mixed value is c, but it doesn't really even achieve avalanche in c.
85
86	This allows some parallelism. Read-after-writes are good at doubling the
87	number of bits affected, so the goal of mixing pulls in the opposite
88	direction as the goal of parallelism. I did what I could. Rotates seem to
89	cost as much as shifts on every machine I could lay my hands on, and rotates
90	are much kinder to the top and bottom bits, so I used rotates.
91	-------------------------------------------------------------------------------
92	*/
93	#define mix(a,b,c) \
94	{ \
95	a -= c; a ^= rot(c, 4); c += b; \
96	b -= a; b ^= rot(a, 6); a += c; \
97	c -= b; c ^= rot(b, 8); b += a; \
98	a -= c; a ^= rot(c,16); c += b; \
99	b -= a; b ^= rot(a,19); a += c; \
100	c -= b; c ^= rot(b, 4); b += a; \
101	}
102
103	/*
104	-------------------------------------------------------------------------------
105	final -- final mixing of 3 32-bit values (a,b,c) into c
106
107	Pairs of (a,b,c) values differing in only a few bits will usually produce
108	values of c that look totally different. This was tested for
109
110	* pairs that differed by one bit, by two bits, in any combination of top bits
111	of (a,b,c), or in any combination of bottom bits of (a,b,c).
112
113	* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed the
114	output delta to a Gray code (a^(a>>1)) so a string of 1's (as is commonly
115	produced by subtraction) look like a single 1-bit difference. * the base
116	values were pseudorandom, all zero but one bit set, or all zero plus a
117	counter that starts at zero.
118
119	These constants passed:
120	14 11 25 16 4 14 24
121	12 14 25 16 4 14 24
122	and these came close:
123	4 8 15 26 3 22 24
124	10 8 15 26 3 22 24
125	11 8 15 26 3 22 24
126	-------------------------------------------------------------------------------
127	*/
128	#define final(a,b,c) \
129	{ \
130	c ^= b; c -= rot(b,14); \
131	a ^= c; a -= rot(c,11); \
132	b ^= a; b -= rot(a,25); \
133	c ^= b; c -= rot(b,16); \
134	a ^= c; a -= rot(c,4); \
135	b ^= a; b -= rot(a,14); \
136	c ^= b; c -= rot(b,24); \
137	}
138
139	/**
140	* The follow is the documentation from Bob Jenkin's hash function:
141	*
142	* http://burtleburtle.net/bob/hash/doobs.html
143	*
144	* The function hashlittle() has been renamed.
145	*
146	* hashlittle() -- hash a variable-length key into a 32-bit value
147	*
148	* k : the key (the unaligned variable-length array of bytes)
149	* length : the length of the key, counting by bytes
150	* initval : can be any 4-byte value
151	*
152	* Returns a 32-bit value. Every bit of the key affects every bit of the
153	* return value. Two keys differing by one or two bits will have totally
154	* different hash values.
155	*
156	* The best hash table sizes are powers of 2. There is no need to do mod a
157	* prime (mod is sooo slow!). If you need less than 32 bits, use a bitmask.
158	* For example, if you need only 10 bits, do h = (h & hashmask(10)); In which
159	* case, the hash table should have hashsize(10) elements.
160	*
161	* If you are hashing n strings (uint8_t **)k, do it like this: for (i=0, h=0;
162	* i<n; ++i) h = hashlittle( k[i], len[i], h);
163	*
164	* By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
165	* code any way you wish, private, educational, or commercial. It's free.
166	*
167	* Use for hash table lookup, or anything where one collision in 2^^32 is
168	* acceptable. Do NOT use for cryptographic purposes.
169	*/
170
171	#define initval (20010928)
172	uint32_t
173	rtems_rfs_dir_hash (const void *key, size_t length)
174	{
175	uint32_t a,b,c; /* internal state */
176	union { const void ptr; size_t i; } u; / needed for Mac Powerbook G4 */
177
178	/* Set up the internal state */
179	a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
180
181	u.ptr = key;
182	if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
183	const uint32_t k = (const uint32_t )key; /* read 32-bit chunks */
184	/const uint8_t k8;*/
185
186	/------ all but last block: aligned reads and affect 32 bits of (a,b,c) /
187	while (length > 12)
188	{
189	a += k[0];
190	b += k[1];
191	c += k[2];
192	mix(a,b,c);
193	length -= 12;
194	k += 3;
195	}
196
197	/----------------------------- handle the last (probably partial) block /
198	/*
199	* "k[2]&0xffffff" actually reads beyond the end of the string, but
200	* then masks off the part it's not allowed to read. Because the
201	* string is aligned, the masked-off tail is in the same word as the
202	* rest of the string. Every machine with memory protection I've seen
203	* does it on word boundaries, so is OK with this. But VALGRIND will
204	* still catch it and complain. The masking trick does make the hash
205	* noticably faster for short strings (like English words).
206	*/
207	#ifndef VALGRIND
208
209	switch(length)
210	{
211	case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
212	case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
213	case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
214	case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
215	case 8 : b+=k[1]; a+=k[0]; break;
216	case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
217	case 6 : b+=k[1]&0xffff; a+=k[0]; break;
218	case 5 : b+=k[1]&0xff; a+=k[0]; break;
219	case 4 : a+=k[0]; break;
220	case 3 : a+=k[0]&0xffffff; break;
221	case 2 : a+=k[0]&0xffff; break;
222	case 1 : a+=k[0]&0xff; break;
223	case 0 : return c; /* zero length strings require no mixing */
224	}
225
226	#else /* make valgrind happy */
227
228	k8 = (const uint8_t *)k;
229	switch(length)
230	{
231	case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
232	case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
233	case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
234	case 9 : c+=k8[8]; /* fall through */
235	case 8 : b+=k[1]; a+=k[0]; break;
236	case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
237	case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
238	case 5 : b+=k8[4]; /* fall through */
239	case 4 : a+=k[0]; break;
240	case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
241	case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
242	case 1 : a+=k8[0]; break;
243	case 0 : return c;
244	}
245
246	#endif /* !valgrind */
247
248	} else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
249	const uint16_t k = (const uint16_t )key; /* read 16-bit chunks */
250	const uint8_t *k8;
251
252	/--------------- all but last block: aligned reads and different mixing /
253	while (length > 12)
254	{
255	a += k[0] + (((uint32_t)k[1])<<16);
256	b += k[2] + (((uint32_t)k[3])<<16);
257	c += k[4] + (((uint32_t)k[5])<<16);
258	mix(a,b,c);
259	length -= 12;
260	k += 6;
261	}
262
263	/----------------------------- handle the last (probably partial) block /
264	k8 = (const uint8_t *)k;
265	switch(length)
266	{
267	case 12: c+=k[4]+(((uint32_t)k[5])<<16);
268	b+=k[2]+(((uint32_t)k[3])<<16);
269	a+=k[0]+(((uint32_t)k[1])<<16);
270	break;
271	case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
272	case 10: c+=k[4];
273	b+=k[2]+(((uint32_t)k[3])<<16);
274	a+=k[0]+(((uint32_t)k[1])<<16);
275	break;
276	case 9 : c+=k8[8]; /* fall through */
277	case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
278	a+=k[0]+(((uint32_t)k[1])<<16);
279	break;
280	case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
281	case 6 : b+=k[2];
282	a+=k[0]+(((uint32_t)k[1])<<16);
283	break;
284	case 5 : b+=k8[4]; /* fall through */
285	case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
286	break;
287	case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
288	case 2 : a+=k[0];
289	break;
290	case 1 : a+=k8[0];
291	break;
292	case 0 : return c; /* zero length requires no mixing */
293	}
294
295	} else { /* need to read the key one byte at a time */
296	const uint8_t k = (const uint8_t )key;
297
298	/--------------- all but the last block: affect some 32 bits of (a,b,c) /
299	while (length > 12)
300	{
301	a += k[0];
302	a += ((uint32_t)k[1])<<8;
303	a += ((uint32_t)k[2])<<16;
304	a += ((uint32_t)k[3])<<24;
305	b += k[4];
306	b += ((uint32_t)k[5])<<8;
307	b += ((uint32_t)k[6])<<16;
308	b += ((uint32_t)k[7])<<24;
309	c += k[8];
310	c += ((uint32_t)k[9])<<8;
311	c += ((uint32_t)k[10])<<16;
312	c += ((uint32_t)k[11])<<24;
313	mix(a,b,c);
314	length -= 12;
315	k += 12;
316	}
317
318	/-------------------------------- last block: affect all 32 bits of (c) /
319	switch(length) /* all the case statements fall through */
320	{
321	case 12: c+=((uint32_t)k[11])<<24;
322	case 11: c+=((uint32_t)k[10])<<16;
323	case 10: c+=((uint32_t)k[9])<<8;
324	case 9 : c+=k[8];
325	case 8 : b+=((uint32_t)k[7])<<24;
326	case 7 : b+=((uint32_t)k[6])<<16;
327	case 6 : b+=((uint32_t)k[5])<<8;
328	case 5 : b+=k[4];
329	case 4 : a+=((uint32_t)k[3])<<24;
330	case 3 : a+=((uint32_t)k[2])<<16;
331	case 2 : a+=((uint32_t)k[1])<<8;
332	case 1 : a+=k[0];
333	break;
334	case 0 : return c;
335	}
336	}
337
338	final(a,b,c);
339	return c;
340	}
341

Note: See TracBrowser for help on using the repository browser.

Download in other formats: