From 216c7f72cf6c7fa27578fc1104cbe8021f8a892e Mon Sep 17 00:00:00 2001 From: Olly Betts Date: Tue, 12 Jul 2022 10:51:32 +1200 Subject: [PATCH] Revert "Adjust the DOH string hash function" Changing the hash function breaks some testcases. It seems there's a latent bug here (I suspect something somewhere in SWIG depends on the hash iteration order), but I didn't see where and we can't really have CI continuing to fail. See #2303. This reverts commit 5a96a39aa48fe35fa0bd0b92f5fb5615447ea6e8. --- Source/DOH/string.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/Source/DOH/string.c b/Source/DOH/string.c index 8f321508e..543c3e3f8 100644 --- a/Source/DOH/string.c +++ b/Source/DOH/string.c @@ -180,27 +180,19 @@ static int String_hash(DOH *so) { if (s->hashkey >= 0) { return s->hashkey; } else { - /* We use the djb2 hash function: https://theartincode.stanis.me/008-djb2/ - * - * One difference is we use initial seed 0. It seems the usual seed value - * is intended to help spread out hash values, which is beneficial if - * linear probing is used but DOH Hash uses a chain of buckets instead, and - * grouped hash values are probably more cache friendly. In tests using - * 0 seems slightly faster anyway. - */ - const char *c = s->str; + char *c = s->str; unsigned int len = s->len > 50 ? 50 : s->len; unsigned int h = 0; unsigned int mlen = len >> 2; unsigned int i = mlen; for (; i; --i) { - h = h + (h << 5) + *(c++); - h = h + (h << 5) + *(c++); - h = h + (h << 5) + *(c++); - h = h + (h << 5) + *(c++); + h = (h << 5) + *(c++); + h = (h << 5) + *(c++); + h = (h << 5) + *(c++); + h = (h << 5) + *(c++); } for (i = len - (mlen << 2); i; --i) { - h = h + (h << 5) + *(c++); + h = (h << 5) + *(c++); } h &= 0x7fffffff; s->hashkey = (int)h;