From b647ce0239f6cf85623652e8c2497154ae6b3f44 Mon Sep 17 00:00:00 2001
From: Antonio Huete Jimenez <tuxillo@quantumachine.net>
Date: Wed, 16 May 2012 12:53:09 +0200
Subject: [PATCH] libkern - Bring Bob Jenkins hash algorithm.

- Taken from FreeBSD
- Also included a little testing program that gives approximate numbers.

Generating random buffer ...
CRC32 100000 LOOPS dataset 1024 bytes ... 4 collisions. Elapsed 515 msec
Jenkins 100000 LOOPS dataset 1024 bytes ... 2 collisions. Elapsed 296 msec
FNV 32 100000 LOOPS dataset 1024 bytes ... 2 collisions. Elapsed 164 msec
---
 sys/libkern/jenkins.h      |  185 ++++++++++++++++++++++++++++++++++++++++++++
 tools/test/hashes/Makefile |    9 ++
 tools/test/hashes/main.c   |  159 +++++++++++++++++++++++++++++++++++++
 3 files changed, 353 insertions(+), 0 deletions(-)
 create mode 100644 sys/libkern/jenkins.h
 create mode 100644 tools/test/hashes/Makefile
 create mode 100644 tools/test/hashes/main.c

diff --git a/sys/libkern/jenkins.h b/sys/libkern/jenkins.h
new file mode 100644
index 0000000..0846ae8
--- /dev/null
+++ b/sys/libkern/jenkins.h
@@ -0,0 +1,185 @@
+#ifndef __LIBKERN_JENKINS_H__
+#define __LIBKERN_JENKINS_H__
+/*
+ * Taken from http://burtleburtle.net/bob/c/lookup3.c
+ * $FreeBSD$
+ */
+
+/*
+-------------------------------------------------------------------------------
+  lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+
+  These are functions for producing 32-bit hashes for hash table lookup.
+  hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
+  are externally useful functions.  Routines to test the hash are included
+  if SELF_TEST is defined.  You can use this free for any purpose.  It's in
+  the public domain.  It has no warranty.
+
+  You probably want to use hashlittle().  hashlittle() and hashbig()
+  hash byte arrays.  hashlittle() is faster than hashbig() on
+  little-endian machines.  Intel and AMD are little-endian machines.
+  On second thought, you probably want hashlittle2(), which is identical to
+  hashlittle() except it returns two 32-bit hashes for the price of one.
+  You could implement hashbig2() if you wanted but I haven't bothered here.
+
+  If you want to find a hash of, say, exactly 7 integers, do
+    a = i1;  b = i2;  c = i3;
+    mix(a,b,c);
+    a += i4; b += i5; c += i6;
+    mix(a,b,c);
+    a += i7;
+    final(a,b,c);
+  then use c as the hash value.  If you have a variable length array of
+  4-byte integers to hash, use hashword().  If you have a byte array (like
+  a character string), use hashlittle().  If you have several byte arrays, or
+  a mix of things, see the comments above hashlittle().
+  
+  Why is this so big?  I read 12 bytes at a time into 3 4-byte integers,
+  then mix those integers.  This is fast (you can do a lot more thorough
+  mixing with 12*3 instructions on 3 integers than you can with 3 instructions
+  on 1 byte), but shoehorning those bytes into integers efficiently is messy.
+-------------------------------------------------------------------------------
+*/
+
+#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
+
+/*
+-------------------------------------------------------------------------------
+mix -- mix 3 32-bit values reversibly.
+
+This is reversible, so any information in (a,b,c) before mix() is
+still in (a,b,c) after mix().
+
+If four pairs of (a,b,c) inputs are run through mix(), or through
+mix() in reverse, there are at least 32 bits of the output that
+are sometimes the same for one pair and different for another pair.
+This was tested for:
+* pairs that differed by one bit, by two bits, in any combination
+  of top bits of (a,b,c), or in any combination of bottom bits of
+  (a,b,c).
+* "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
+  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
+  is commonly produced by subtraction) look like a single 1-bit
+  difference.
+* the base values were pseudorandom, all zero but one bit set, or 
+  all zero plus a counter that starts at zero.
+
+Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
+satisfy this are
+    4  6  8 16 19  4
+    9 15  3 18 27 15
+   14  9  3  7 17  3
+Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
+for "differ" defined as + with a one-bit base and a two-bit delta.  I
+used http://burtleburtle.net/bob/hash/avalanche.html to choose 
+the operations, constants, and arrangements of the variables.
+
+This does not achieve avalanche.  There are input bits of (a,b,c)
+that fail to affect some output bits of (a,b,c), especially of a.  The
+most thoroughly mixed value is c, but it doesn't really even achieve
+avalanche in c.
+
+This allows some parallelism.  Read-after-writes are good at doubling
+the number of bits affected, so the goal of mixing pulls in the opposite
+direction as the goal of parallelism.  I did what I could.  Rotates
+seem to cost as much as shifts on every machine I could lay my hands
+on, and rotates are much kinder to the top and bottom bits, so I used
+rotates.
+-------------------------------------------------------------------------------
+*/
+#define mix(a,b,c) \
+{ \
+  a -= c;  a ^= rot(c, 4);  c += b; \
+  b -= a;  b ^= rot(a, 6);  a += c; \
+  c -= b;  c ^= rot(b, 8);  b += a; \
+  a -= c;  a ^= rot(c,16);  c += b; \
+  b -= a;  b ^= rot(a,19);  a += c; \
+  c -= b;  c ^= rot(b, 4);  b += a; \
+}
+
+/*
+-------------------------------------------------------------------------------
+final -- final mixing of 3 32-bit values (a,b,c) into c
+
+Pairs of (a,b,c) values differing in only a few bits will usually
+produce values of c that look totally different.  This was tested for
+* pairs that differed by one bit, by two bits, in any combination
+  of top bits of (a,b,c), or in any combination of bottom bits of
+  (a,b,c).
+* "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
+  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
+  is commonly produced by subtraction) look like a single 1-bit
+  difference.
+* the base values were pseudorandom, all zero but one bit set, or 
+  all zero plus a counter that starts at zero.
+
+These constants passed:
+ 14 11 25 16 4 14 24
+ 12 14 25 16 4 14 24
+and these came close:
+  4  8 15 26 3 22 24
+ 10  8 15 26 3 22 24
+ 11  8 15 26 3 22 24
+-------------------------------------------------------------------------------
+*/
+#define final(a,b,c) \
+{ \
+  c ^= b; c -= rot(b,14); \
+  a ^= c; a -= rot(c,11); \
+  b ^= a; b -= rot(a,25); \
+  c ^= b; c -= rot(b,16); \
+  a ^= c; a -= rot(c,4);  \
+  b ^= a; b -= rot(a,14); \
+  c ^= b; c -= rot(b,24); \
+}
+
+/*
+--------------------------------------------------------------------
+ This works on all machines.  To be useful, it requires
+ -- that the key be an array of uint32_t's, and
+ -- that the length be the number of uint32_t's in the key
+
+ The function hashword() is identical to hashlittle() on little-endian
+ machines, and identical to hashbig() on big-endian machines,
+ except that the length has to be measured in uint32_ts rather than in
+ bytes.  hashlittle() is more complicated than hashword() only because
+ hashlittle() has to dance around fitting the key bytes into registers.
+--------------------------------------------------------------------
+*/
+static uint32_t
+jenkins_hashword(
+                const uint32_t *k,  /* the key, an array of uint32_t values */
+                size_t length,      /* the length of the key, in uint32_ts */
+                uint32_t initval    /* the previous hash, or an arbitrary value */
+)
+{
+  uint32_t a,b,c;
+
+  /* Set up the internal state */
+  a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
+
+  /*------------------------------------------------- handle most of the key */
+  while (length > 3)
+  {
+    a += k[0];
+    b += k[1];
+    c += k[2];
+    mix(a,b,c);
+    length -= 3;
+    k += 3;
+  }
+
+  /*------------------------------------------- handle the last 3 uint32_t's */
+  switch(length)                     /* all the case statements fall through */
+  { 
+  case 3 : c+=k[2];
+  case 2 : b+=k[1];
+  case 1 : a+=k[0];
+    final(a,b,c);
+  case 0:     /* case 0: nothing left to add */
+    break;
+  }
+  /*------------------------------------------------------ report the result */
+  return c;
+}
+#endif 
diff --git a/tools/test/hashes/Makefile b/tools/test/hashes/Makefile
new file mode 100644
index 0000000..fcafe5d
--- /dev/null
+++ b/tools/test/hashes/Makefile
@@ -0,0 +1,9 @@
+PROG=	hashes
+SRCS=	main.c crc32.c
+CFLAGS = -O0 -pg
+
+.PATH:	${.CURDIR}/../../../sys/libkern
+
+NOMAN=	sorry
+
+.include <bsd.prog.mk>
diff --git a/tools/test/hashes/main.c b/tools/test/hashes/main.c
new file mode 100644
index 0000000..4df22a6
--- /dev/null
+++ b/tools/test/hashes/main.c
@@ -0,0 +1,159 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/fnv_hash.h>
+#include <sys/jenkins.h>
+#include <sys/param.h>
+
+
+#define SIZE 	MAXPATHLEN
+#ifndef LOOPS
+#define LOOPS	100000
+#endif
+uint8_t buf[LOOPS][SIZE];
+
+extern uint32_t crc32(const void *buf, size_t size);
+
+uint32_t *crclst;
+uint32_t *jenlst;
+uint32_t *fnvlst;
+
+
+void
+fillcrap(void)
+{
+	int i;
+	printf("Generating random buffer ...\n");
+
+	for (i = 0; i < LOOPS; i++)
+		arc4random_buf(buf[i], SIZE);
+}
+
+int cmphash(const void *a, const void *b)
+{
+	uint32_t h1 = *(uint32_t *)a;
+	uint32_t h2 = *(uint32_t *)b;
+
+	if (h1 == h2)
+		return 0;
+	else
+		if (h1 < h2)
+			return -1;
+		else
+			return 1;
+}
+
+int
+getcollisions(uint32_t *lst)
+{
+	int col, i;
+	col = 0;
+
+	for (i = 1; i < LOOPS; i++) {
+		if ((cmphash(&lst[i-1], &lst[i])) == 0)
+			col++;
+	}
+
+	return col;
+
+}
+
+void
+test_crc32(void)
+{
+	uint32_t crc;
+	int i, col;
+	clock_t s, f;
+	int ms;
+
+	crclst = malloc(LOOPS * sizeof(*crclst));
+
+	printf("CRC32 %d LOOPS dataset %d bytes ... ", LOOPS, SIZE);
+	s = clock();
+	for (i = 0; i < LOOPS; i++) {
+	        crc = crc32((char *)buf[i], SIZE);
+		crclst[i] = crc;
+	}
+	f = clock();
+
+	qsort(crclst, LOOPS, sizeof(uint32_t), cmphash);
+
+	ms = ((double)(f - s))/CLOCKS_PER_SEC * 1000;
+
+	col = getcollisions(crclst);
+	printf("%d collisions. Elapsed %d msec\n", col, ms);
+
+
+}
+
+void
+test_jenkins(void)
+{
+	uint32_t h;
+	int i, col;
+	clock_t s, f;
+	int ms;
+
+	col = 0;
+
+	jenlst = malloc(LOOPS * sizeof(*jenlst));
+
+	printf("Jenkins %d LOOPS dataset %d bytes ... ", LOOPS, SIZE);
+	s = clock();
+	for (i = 0; i < LOOPS; i++) {
+		h = jenkins_hashword((const uint32_t *)buf[i], (sizeof(buf[i])-1)/4, 13);
+		jenlst[i] = h;
+	}
+	f = clock();
+
+	qsort(jenlst, LOOPS, sizeof(uint32_t), cmphash);
+
+	ms = ((double)(f - s))/CLOCKS_PER_SEC * 1000;
+
+        col = getcollisions(jenlst);
+	printf("%d collisions. Elapsed %d msec\n", col, ms);
+
+}
+
+void
+test_fnv(void)
+{
+	uint32_t fnv = FNV1_32_INIT;
+	int i, col;
+	clock_t s, f;
+	int ms;
+
+	col = 0;
+
+	fnvlst = malloc(LOOPS * sizeof(*fnvlst));
+
+	printf("FNV 32 %d LOOPS dataset %d bytes ... ", LOOPS, SIZE);
+	s = clock();
+	for (i = 0; i < LOOPS; i++) {
+		fnv = fnv_32_buf(buf[i], (sizeof(buf[i])-1)/4, fnv);
+		fnvlst[i] = fnv;
+	}
+	f = clock();
+
+	qsort(fnvlst, LOOPS, sizeof(uint32_t), cmphash);
+
+        col = getcollisions(fnvlst);
+	ms = ((double)(f - s))/CLOCKS_PER_SEC * 1000;
+
+	printf("%d collisions. Elapsed %d msec\n", col, ms);
+
+
+}
+
+int
+main(int argc, char *argv[])
+{
+	int i, j;
+
+	fillcrap();
+	test_crc32();
+	test_jenkins();
+	test_fnv();
+
+	return 0;
+}
-- 
1.7.7.2