Linxii's Blog
算法记录:字符串Blur image

1.字符串哈希#

1.1总体思考#

1.1.1 双哈希#

  双哈希的思想就是使用两组不同的哈希参数(基数和模数)来计算字符串的哈希值。降低哈希冲突的概率,同时使用随机base与随机mod防止被hack。

  hh数组用于存储前缀哈希值,pp数组用于存储基数的幂。其中hash函数是 h[i]=a1basei1+a2basei2+...+aibase0h[i] = a_{1}·base^{i-1} + a_{2}·base^{i-2} + ... + a_i·base^0

  例子的直观理解:

arr = [3, 1, 4, 1, 5],base=10

h[1] = 3
h[2] = 3*10 + 1 = 31
h[3] = 31*10 + 4 = 314
h[4] = 314*10 + 1 = 3141
h[5] = 3141*10 + 5 = 31415

//求子串hash
hash(l, r) = h[r+1] - h[l] × base^(r-l+1)
h[4] = 3141
h[1] = 3
len = 3 (r-l+1 = 3-1+1 = 3)
base^3 = 1000

hash = 3141 - 3×1000 = 141
cpp
随机双hash模板
template <typename T>
struct VectorHash
{
private:
    inline static int base1, base2, mod1, mod2;

    inline static mt19937_64 rng = mt19937_64(
        chrono::steady_clock::now().time_since_epoch().count());

    vector<i64> h1, h2, p1, p2;

    i64 elem_code(const T &x) const
    {
        if constexpr (is_integral_v<T>)
        {
            return x;
        }
        else
        {
            return hash<T>{}(x);
        }
    }

public:
    VectorHash(const vector<T> &v)
    {
        static once_flag flag;
        call_once(flag, []()
                  {
            uniform_int_distribution<int> dist_base(100000, 1000000000);
            uniform_int_distribution<int> dist_mod(100000000, 2000000000);

            base1 = dist_base(rng);
            base2 = dist_base(rng);
            

            mod1 = dist_mod(rng) | 1;
            mod2 = dist_mod(rng) | 1;

            while (mod2 == mod1)
            {
                mod2 = dist_mod(rng) | 1;
            } });

        int n = v.size();
        h1.resize(n + 1);
        h2.resize(n + 1);
        p1.resize(n + 1);
        p2.resize(n + 1);
        p1[0] = p2[0] = 1;

        for (int i = 0; i < n; ++i)
        {
            i64 c = elem_code(v[i]);

            i64 c1 = (c % mod1 + mod1) % mod1;
            h1[i + 1] = (h1[i] * base1 + c1) % mod1;
            p1[i + 1] = (p1[i] * base1) % mod1;

            i64 c2 = (c % mod2 + mod2) % mod2;
            h2[i + 1] = (h2[i] * base2 + c2) % mod2;
            p2[i + 1] = (p2[i] * base2) % mod2;
        }
    }

    pair<i64, i64> get_hash(int l, int r) const
    {
        if (l < 0 || r >= (int)h1.size() - 1 || l > r){
            return {0, 0};
        }

        int len = r - l + 1;

        i64 hash1 = (h1[r + 1] - h1[l] * p1[len] % mod1 + mod1) % mod1;
        i64 hash2 = (h2[r + 1] - h2[l] * p2[len] % mod2 + mod2) % mod2;

        return {hash1, hash2};
    }

    pair<i64, i64> full_hash() const
    {
        return get_hash(0, (int)h1.size() - 2);
    }

    bool equal(const VectorHash<T> &other, int l1, int r1, int l2, int r2) const
    {
        if (r1 - l1 != r2 - l2){
            return false;
        }
            
        return get_hash(l1, r1) == other.get_hash(l2, r2);
    }
};
cpp
算法记录:字符串
https://linxii-blog.tyuou2.workers.dev/blog/algorithm-4-string
Author 林夕夕
Published at May 17, 2026
Comment seems to stuck. Try to refresh?✨