Merkle 树（Merkle Tree）

一、为什么需要 Merkle 树#

假设你管理着一个包含 100 万条记录的数据库，如何确认其中某一条记录没有被篡改？

最直觉的做法是：把所有数据重新哈希一遍，跟已知的摘要做比对。但这样做代价太高——验证一条记录需要读取并哈希 100 万条数据。

Merkle 树解决了这个问题。它把数据组织成一棵哈希二叉树，每个内部节点存储子节点哈希的哈希。要验证单个叶子，只需要从叶子到根路径上的 O(log n) 个兄弟哈希——就像检查一排封条中哪一层被破坏了一样。

这个数据结构是 Git、ZFS、Bitcoin、IPFS 等系统的基石。

二、现实类比#

想象一个防篡改的运输封条系统。每个箱子贴上独一无二的蜡封，箱子装入板条箱后，板条箱的封印由所有箱子的蜡封推导而来。板条箱再装入集装箱，集装箱有自己的总封印。如果其中一件物品被掉包，它上方的每一层封印都会破损——你只需检查 log(n) 个封印就能定位被篡改的箱子，而无需打开所有箱子。

三、核心思想#

Merkle 树是一棵哈希二叉树。每个叶子节点包含数据块的哈希值，每个内部节点包含其两个子节点拼接后的哈希值。根哈希是整个数据集的指纹。

graph TB R["Root Hash H(H12 + H34)"] R --> H12["H12 H(H1+H2)"] R --> H34["H34 H(H3+H4)"] H12 --> H1["H1 hash(Data A)"] H12 --> H2["H2 hash(Data B)"] H34 --> H3["H3 hash(Data C)"] H34 --> H4["H4 hash(Data D)"] H1 --- A["Data A"] H2 --- B["Data B"] H3 --- C["Data C"] H4 --- D["Data D"] style R fill:#f97316,stroke:#9a3412,color:#fff style H12 fill:#fbbf24,stroke:#92400e style H34 fill:#fbbf24,stroke:#92400e

验证 Data C 的过程：

计算 H3 = hash(Data C)
用兄弟 H4 计算 H34 = hash(H3 + H4)
用叔伯 H12 计算根 = hash(H12 + H34)
比较根哈希是否与已知的根哈希一致

只需要 H4 和 H12 两个兄弟节点，就能完成验证——这就是 O(log n) 的魅力。

属性	值
验证代价	每个叶子 O(log n) 次哈希
树构建	O(n) 次哈希
证明空间	O(log n) 个兄弟哈希
篡改检测	任何修改都会改变根哈希

四、变体与对比#

变体	特点	适用场景
二叉 Merkle 树	每个节点两个子节点，最常见	Bitcoin、以太坊
Merkle Patricia Tree	结合 Patricia 树和 Merkle 树，支持键值查找	以太坊状态树
Merkle DAG	有向无环图而非树，允许多父节点	IPFS、Git
Merkle Mountain Range	多棵树的链式排列，支持追加	Tombchain、OpenTimestamps

与其他完整性验证方案对比：

方案	验证代价	证明大小	动态更新
全量哈希	O(n)	O(1)	重新哈希全部
Merkle 树	O(log n)	O(log n)	O(log n)
布隆过滤器	O(k)	O(m)	不支持删除

五、多语言实现#

Go 实现#

1
package merkle
2

3
import (
4
    "crypto/sha256"
5
    "encoding/hex"
6
)
7

8
// MerkleTree 表示一棵 Merkle 树
9
type MerkleTree struct {
10
    Root   *Node
11
    Leaves []*Node
12
}
13

14
// Node 表示树中的一个节点
15
type Node struct {
16
    Hash  string
17
    Left  *Node
18
    Right *Node
19
}
20

21
// NewMerkleTree 从数据块列表构建 Merkle 树
22
func NewMerkleTree(data [][]byte) *MerkleTree {
23
    // 构建叶子节点
24
    var leaves []*Node
25
    for _, d := range data {
26
        hash := sha256.Sum256(d)
27
        leaves = append(leaves, &Node{Hash: hex.EncodeToString(hash[:])})
28
    }
29

30
    // 奇数个叶子时复制最后一个
31
    if len(leaves)%2 != 0 {
32
        leaves = append(leaves, leaves[len(leaves)-1])
33
    }
34

35
    // 自底向上构建树
36
    currentLevel := leaves
37
    for len(currentLevel) > 1 {
38
        var nextLevel []*Node
39
        for i := 0; i < len(currentLevel); i += 2 {
40
            left := currentLevel[i]
41
            right := currentLevel[i+1]
42
            combined := left.Hash + right.Hash
43
            hash := sha256.Sum256([]byte(combined))
44
            parent := &Node{
45
                Hash:  hex.EncodeToString(hash[:]),
46
                Left:  left,
47
                Right: right,
48
            }
49
            nextLevel = append(nextLevel, parent)
50
        }
51
        currentLevel = nextLevel
52
    }
53

54
    return &MerkleTree{Root: currentLevel[0], Leaves: leaves}
55
}
56

57
// VerifyProof 验证某个叶子属于这棵树
58
func VerifyProof(leafHash string, proof []string, indices []bool, rootHash string) bool {
59
    current := leafHash
60
    for i, sibling := range proof {
61
        combined := sibling + current
62
        if indices[i] {
63
            combined = current + sibling
64
        }
65
        hash := sha256.Sum256([]byte(combined))
66
        current = hex.EncodeToString(hash[:])
67
    }
68
    return current == rootHash
69
}

TypeScript 实现#

1
function hash(data: string): string {
2
  let h = 0x811c9dc5;
3
  for (let i = 0; i < data.length; i++) {
4
    h ^= data.charCodeAt(i);
5
    h = Math.imul(h, 0x01000193);
6
  }
7
  return (h >>> 0).toString(16).padStart(8, "0");
8
}
9

10
class MerkleTree {
11
  private leaves: string[];
12
  private layers: string[][];
13

14
  constructor(data: string[]) {
15
    // 奇数个叶子时复制最后一个
16
    if (data.length % 2 !== 0) data.push(data[data.length - 1]);
17
    this.leaves = data.map((d) => hash(d));
18
    this.layers = [this.leaves];
19
    this.build();
20
  }
21

22
  private build(): void {
23
    let current = this.leaves;
24
    while (current.length > 1) {
25
      const next: string[] = [];
26
      for (let i = 0; i < current.length; i += 2) {
27
        next.push(hash(current[i] + current[i + 1]));
28
      }
29
      this.layers.push(next);
30
      current = next;
31
    }
32
  }
33

34
  getRoot(): string {
35
    const top = this.layers[this.layers.length - 1];
36
    return top[0];
37
  }
38

39
  // 获取验证某叶子所需的兄弟哈希路径
40
  getProof(index: number): { hash: string; isRight: boolean }[] {
41
    const proof: { hash: string; isRight: boolean }[] = [];
42
    let idx = index;
43
    for (let layer = 0; layer < this.layers.length - 1; layer++) {
44
      const siblingIdx = idx % 2 === 0 ? idx + 1 : idx - 1;
45
      proof.push({
46
        hash: this.layers[layer][siblingIdx],
47
        isRight: siblingIdx > idx,
48
      });
49
      idx = Math.floor(idx / 2);
50
    }
51
    return proof;
52
  }
53

54
  // 验证叶子属于这棵树
55
  static verify(
56
    leafHash: string,
57
    proof: { hash: string; isRight: boolean }[],
58
    rootHash: string
59
  ): boolean {
60
    let current = leafHash;
61
    for (const step of proof) {
62
      current = step.isRight
63
        ? hash(current + step.hash)
64
        : hash(step.hash + current);
65
    }
66
    return current === rootHash;
67
  }
68
}

六、生产验证#

项目	源码	用途
Git	tree.c#L136-L171	每个 commit、tree、blob 都通过 SHA-1 内容寻址，构成 Merkle DAG。修改任何文件中的一个字节都会改变到根 commit 的所有哈希，使 `git fsck` 能高效验证仓库完整性
ZFS	blkptr.c	每个块在父块指针中存储校验和，从数据块到 uberblock 形成 Merkle 树。`zpool scrub` 遍历此树检测静默数据损坏（bit rot）
Bitcoin	merkle.cpp	区块头只存储 Merkle 根哈希，SPV 轻节点通过 Merkle 证明验证交易存在性，无需下载完整区块

七、小结#

何时使用：

需要高效验证大数据集中单条记录的完整性
参与方之间互不信任，需要密码学证明
网络带宽有限，只传输必要的证明路径
数据需要追加写入并持续验证

何时不用：

数据量很小（直接全量哈希更简单）
所有参与方完全可信（不需要验证机制）
频繁的随机更新和删除（树重建代价高）
不需要完整性验证的场景

八、参考资料#

Merkle Tree - Wikipedia - Merkle 树概念与历史
Battle-Tested Patterns: Merkle Tree - 交互式可视化与多语言实现
Git Internals - Git Objects - Git 对象模型与 Merkle DAG
Bitcoin Developer Guide - Merkle Trees - Bitcoin 中 Merkle 树的作用
IPFS Merkle DAG - IPFS 中 Merkle DAG 的设计与实现