String : Bloom Filter
A Bloom filter is a space-efficient probabilistic data structure that is used to test whether an element is a member of a set. False positive matches are possible, but false negatives are not.
|
|
class BloomFilter
{
uint[] bits = null;
int p = 0;
int k = 0;
Func<string, uint> hash1 = null;
Func<string, uint> hash2 = null;
public BloomFilter(int bitsSize, int p, int k, Func<string, uint> hash1,
Func<string, uint> hash2)
{
bits = new uint[bitsSize];
this.p = p;
this.k = k;
this.hash1 = hash1;
this.hash2 = hash2;
}
public bool Add(string s)
{
bool added = false;
long h1 = hash1(s);
long h2 = hash2(s);
for (int i = 0; i < k; i++)
{
int bit = (int)((h1 + h2 * i) % p + p * i);
if (!added && (bits[bit / 32] & (1 << (bit % 32))) == 0)
added = true;
bits[bit / 32] |= (uint)(1 << (bit % 32));
}
return added;
}
public bool Contains(string s)
{
long h1 = hash1(s);
long h2 = hash2(s);
for (int i = 0; i < k; i++)
{
int bit = (int)((h1 + h2 * i) % p + p * i);
if ((bits[bit / 32] & (1 << (bit % 32))) == 0)
return false;
}
return true;
}
}
class BloomFilterFactory
{
public enum BFType
{
bft_200MB_k70, // z = 200MB, m = 1600000000, k = 70
bft_200MB_k35, // z = 200MB, m = 1600000000, k = 35
bft_20MB_k70, // z = 20MB, m = 160000000, k = 70
bft_20MB_k35, // z = 20MB, m = 160000000, k = 35
bft_2MB_k70, // z = 2MB, m = 16000000, k = 70
bft_2MB_k35 // z = 2MB, m = 16000000, k = 35
}
static int[] xZ = { 50000000, 50000000, 5000000, 5000000, 500000, 500000 };
static int[] xP = { 22857119, 45714283, 2285711, 4571423, 228559, 457139 };
static int[] xK = { 70, 35, 70, 35, 70, 35 };
static uint jenkins(string s)
{
uint hash = 0;
int i = 0;
while (i != s.Length)
{
hash += s[i++];
hash += hash << 10;
hash ^= hash >> 6;
}
hash += hash << 3;
hash ^= hash >> 11;
hash += hash << 15;
return hash;
}
static uint minstd(string s)
{
const int mul = 48271;
const int mod = 2147483647;
long hash = 0;
foreach (char c in s)
hash = ((hash * mul) + c) % mod;
return (uint)hash;
}
static uint djb2(string s)
{
uint hash = 5381;
foreach (char c in s)
hash = ((hash << 5) + hash) + c;
return hash;
}
static uint sdbm(string s)
{
uint hash = 0;
foreach (char c in s)
hash = (hash << 6) + (hash << 16) - hash + c;
return hash;
}
public static BloomFilter Create(BFType bftype)
{
return new BloomFilter(
xZ[(int)bftype], xP[(int)bftype], xK[(int)bftype], djb2, sdbm
);
}
}