diff options
author | 2003-07-31 20:00:03 +0000 | |
---|---|---|
committer | 2003-07-31 20:00:03 +0000 | |
commit | a8013e937a548b723f6160bd52ac8d6bc6691146 (patch) | |
tree | 024570ca353c664c1db86eaa55124e1a1933b175 /usr.bin/diff/diffreg.c | |
parent | knf (diff) | |
download | wireguard-openbsd-a8013e937a548b723f6160bd52ac8d6bc6691146.tar.xz wireguard-openbsd-a8013e937a548b723f6160bd52ac8d6bc6691146.zip |
- Change the hash function to a simple multiplicative one. The old
hash function was apparently optimized for 16 bit processors and
generates quite some collisions.
- Fix another case of excessive reallocing.
ok millert@
Diffstat (limited to 'usr.bin/diff/diffreg.c')
-rw-r--r-- | usr.bin/diff/diffreg.c | 54 |
1 files changed, 29 insertions, 25 deletions
diff --git a/usr.bin/diff/diffreg.c b/usr.bin/diff/diffreg.c index 9855ff61d95..629351d9ffe 100644 --- a/usr.bin/diff/diffreg.c +++ b/usr.bin/diff/diffreg.c @@ -1,4 +1,4 @@ -/* $OpenBSD: diffreg.c,v 1.46 2003/07/31 02:53:57 millert Exp $ */ +/* $OpenBSD: diffreg.c,v 1.47 2003/07/31 20:00:03 otto Exp $ */ /* * Copyright (C) Caldera International Inc. 2001-2002. @@ -65,7 +65,7 @@ */ #ifndef lint -static const char rcsid[] = "$OpenBSD: diffreg.c,v 1.46 2003/07/31 02:53:57 millert Exp $"; +static const char rcsid[] = "$OpenBSD: diffreg.c,v 1.47 2003/07/31 20:00:03 otto Exp $"; #endif /* not lint */ #include <sys/param.h> @@ -541,12 +541,17 @@ prepare(int i, FILE *fd) { struct line *p; int j, h; + int sz; rewind(fd); - p = emalloc(3 * sizeof(struct line)); + sz = 100; + p = emalloc((sz + 3) * sizeof(struct line)); for (j = 0; (h = readhash(fd));) { - p = erealloc(p, (++j + 3) * sizeof(struct line)); - p[j].value = h; + if (j == sz) { + sz = sz * 3 / 2; + p = erealloc(p, (sz + 3) * sizeof(struct line)); + } + p[++j].value = h; } len[i] = j; file[i] = p; @@ -1138,43 +1143,38 @@ fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile) return (0); } -#define HASHMASK (16 - 1) /* for masking out 16 bytes */ - /* - * hashing has the effect of - * arranging line in 7-bit bytes and then - * summing 1-s complement in 16-bit hunks + * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. */ static int readhash(FILE *f) { - unsigned int shift; - int t, space; - long sum; + int i, t, space; + int sum; sum = 1; space = 0; if (!bflag && !wflag) { if (iflag) - for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { + for (i = 0; (t = getc(f)) != '\n'; i++) { if (t == EOF) { - if (shift == 0) + if (i == 0) return (0); break; } - sum += (long)chrtran[t] << (shift &= HASHMASK); + sum = sum * 127 + chrtran[t]; } else - for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { + for (i = 0; (t = getc(f)) != '\n'; i++) { if (t == EOF) { - if (shift == 0) + if (i == 0) return (0); break; } - sum += (long)t << (shift &= HASHMASK); + sum = sum * 127 + t; } } else { - for (shift = 0;;) { + for (i = 0;;) { switch (t = getc(f)) { case '\t': case ' ': @@ -1182,14 +1182,14 @@ readhash(FILE *f) continue; default: if (space && !wflag) { - shift += 7; + i++; space = 0; } - sum += (long)chrtran[t] << (shift &= HASHMASK); - shift += 7; + sum = sum * 127 + chrtran[t]; + i++; continue; case EOF: - if (shift == 0) + if (i == 0) return (0); /* FALLTHROUGH */ case '\n': @@ -1198,7 +1198,11 @@ readhash(FILE *f) break; } } - return (sum); + /* + * There is a remote possibility that we end up with a zero sum. + * Zero is used as an EOF marker, so return 1 instead. + */ + return (sum == 0 ? 1 : sum); } int |