Algo Algo   C++   C#   Demo   JS   Py   SQL   Stat   TA

String : Bloom Filter

A Bloom filter is a space-efficient probabilistic data structure that is used to test whether an element is a member of a set. False positive matches are possible, but false negatives are not.

class BloomFilter
{
    uint[] bits = null;
    int p = 0;
    int k = 0;
    Func<string, uint> hash1 = null;
    Func<string, uint> hash2 = null;

    public BloomFilter(int bitsSize, int p, int k, Func<string, uint> hash1,
                                                   Func<string, uint> hash2)
    {
        bits = new uint[bitsSize];
        this.p = p;
        this.k = k;
        this.hash1 = hash1;
        this.hash2 = hash2;
    }

    public bool Add(string s)
    {
        bool added = false;
        long h1 = hash1(s);
        long h2 = hash2(s);
        for (int i = 0; i < k; i++)
        {
            int bit = (int)((h1 + h2 * i) % p + p * i);
            if (!added && (bits[bit / 32] & (1 << (bit % 32))) == 0)
                added = true;
            bits[bit / 32] |= (uint)(1 << (bit % 32));
        }
        return added;
    }

    public bool Contains(string s)
    {
        long h1 = hash1(s);
        long h2 = hash2(s);
        for (int i = 0; i < k; i++)
        {
            int bit = (int)((h1 + h2 * i) % p + p * i);
            if ((bits[bit / 32] & (1 << (bit % 32))) == 0)
                return false;
        }
        return true;
    }
}

class BloomFilterFactory
{
    public enum BFType
    {
        bft_200MB_k70,  // z = 200MB, m = 1600000000, k = 70
        bft_200MB_k35,  // z = 200MB, m = 1600000000, k = 35
        bft_20MB_k70,   // z = 20MB,  m = 160000000,  k = 70
        bft_20MB_k35,   // z = 20MB,  m = 160000000,  k = 35
        bft_2MB_k70,    // z = 2MB,   m = 16000000,   k = 70
        bft_2MB_k35     // z = 2MB,   m = 16000000,   k = 35
    }

    static int[] xZ = { 50000000, 50000000, 5000000, 5000000, 500000, 500000 };
    static int[] xP = { 22857119, 45714283, 2285711, 4571423, 228559, 457139 };
    static int[] xK = { 70, 35, 70, 35, 70, 35 };

    static uint jenkins(string s)
    {
        uint hash = 0;
        int i = 0;
        while (i != s.Length)
        {
            hash += s[i++];
            hash += hash << 10;
            hash ^= hash >> 6;
        }
        hash += hash << 3;
        hash ^= hash >> 11;
        hash += hash << 15;
        return hash;
    }

    static uint minstd(string s)
    {
        const int mul = 48271;
        const int mod = 2147483647;
        long hash = 0;
        foreach (char c in s)
            hash = ((hash * mul) + c) % mod;
        return (uint)hash;
    }

    static uint djb2(string s)
    {
        uint hash = 5381;
        foreach (char c in s)
            hash = ((hash << 5) + hash) + c;
        return hash;
    }

    static uint sdbm(string s)
    {
        uint hash = 0;
        foreach (char c in s)
            hash = (hash << 6) + (hash << 16) - hash + c;
        return hash;
    }

    public static BloomFilter Create(BFType bftype)
    {
        return new BloomFilter(
            xZ[(int)bftype], xP[(int)bftype], xK[(int)bftype], djb2, sdbm
        );
    }
}
Algo Algo   C++   C#   Demo   JS   Py   SQL   Stat   TA