{
    "slug": "merkle_tree",
    "term": "Merkle Tree",
    "category": "data_structures",
    "difficulty": "advanced",
    "short": "A binary tree where each non-leaf node holds the hash of its children, enabling efficient verification that data is intact and untampered.",
    "long": "Trees with an odd number of leaves need a defined rule for the unpaired node - duplicating the last hash is common but can introduce a real vulnerability if done carelessly, as it did in early Bitcoin (CVE-2012-2459).",
    "aliases": [
        "hash tree",
        "merkle hash tree",
        "merkle root"
    ],
    "tags": [
        "data-structures",
        "cryptography",
        "integrity-verification",
        "blockchain",
        "hashing"
    ],
    "misconception": "Many think a Merkle tree encrypts or hides the data. It does neither - it only fingerprints data for tamper detection and efficient comparison; the blocks themselves remain plain and must be stored separately.",
    "why_it_matters": "Merkle trees let systems verify that one block out of millions is authentic by checking O(log n) hashes instead of the entire dataset, which is what makes Git diffs, blockchain light clients, and replica reconciliation feasible at scale.",
    "common_mistakes": [
        "Using a broken hash function like MD5 or SHA-1, allowing collisions that forge a matching root.",
        "Not domain-separating leaf and internal node hashing, opening a second-preimage attack.",
        "Mishandling odd numbers of leaves by blindly duplicating the last hash without a fixed, audited rule.",
        "Treating the Merkle root as confidential when it only protects integrity, not secrecy.",
        "Rebuilding the whole tree on every update instead of recomputing only the affected O(log n) path."
    ],
    "when_to_use": [
        "Verifying membership or integrity of one block in a large dataset with an O(log n) proof.",
        "Reconciling differences between two replicas by comparing roots then descending into mismatched subtrees.",
        "Building tamper-evident logs, content-addressed storage, or blockchain transaction roots."
    ],
    "avoid_when": [
        "Data is tiny or rarely compared, where a single full-dataset hash is simpler and sufficient.",
        "You need to hide data contents - Merkle trees provide integrity, not confidentiality.",
        "The dataset changes randomly across most blocks every update, since the O(log n) proof advantage shrinks toward full rebuilds."
    ],
    "related": [
        "binary_tree",
        "hash_table",
        "bloom_filter",
        "b_tree_structure"
    ],
    "prerequisites": [
        "binary_tree",
        "hash_table",
        "big_o_notation"
    ],
    "refs": [
        "https://en.wikipedia.org/wiki/Merkle_tree",
        "https://datatracker.ietf.org/doc/html/rfc6962",
        "https://en.wikipedia.org/wiki/Hash_tree_(persistent_data_structure)"
    ],
    "bad_code": "<?php\n// Insecure Merkle root: no domain separation, weak hash,\n// odd-leaf duplication bug.\nfunction merkleRoot(array $blocks): string {\n    $level = array_map(fn($b) => md5($b), $blocks); // weak hash\n    while (count($level) > 1) {\n        $next = [];\n        for ($i = 0; $i < count($level); $i += 2) {\n            $left = $level[$i];\n            // Unpaired node silently reused as-is - tamper risk\n            $right = $level[$i + 1] ?? $left;\n            $next[] = md5($left . $right); // leaves and nodes hashed alike\n        }\n        $level = $next;\n    }\n    return $level[0] ?? '';\n}",
    "good_code": "<?php\n// Domain-separated leaf/internal hashing, strong hash,\n// explicit odd-leaf handling.\nfunction hashLeaf(string $data): string {\n    return hash('sha256', \"\\x00\" . $data, true);\n}\nfunction hashNode(string $l, string $r): string {\n    return hash('sha256', \"\\x01\" . $l . $r, true);\n}\nfunction merkleRoot(array $blocks): string {\n    if ($blocks === []) {\n        return hash('sha256', '', true); // empty-tree convention\n    }\n    $level = array_map('hashLeaf', $blocks);\n    while (count($level) > 1) {\n        if (count($level) % 2 === 1) {\n            $level[] = end($level); // documented duplication rule\n        }\n        $next = [];\n        for ($i = 0; $i < count($level); $i += 2) {\n            $next[] = hashNode($level[$i], $level[$i + 1]);\n        }\n        $level = $next;\n    }\n    return $level[0];\n}",
    "quick_fix": "Use SHA-256 with distinct prefixes for leaf and internal nodes, and define an explicit rule for odd leaf counts.",
    "severity": "high",
    "effort": "high",
    "created": "2026-06-10",
    "updated": "2026-06-10",
    "citation": {
        "canonical_url": "https://codeclaritylab.com/glossary/merkle_tree",
        "html_url": "https://codeclaritylab.com/glossary/merkle_tree",
        "json_url": "https://codeclaritylab.com/glossary/merkle_tree.json",
        "source": "CodeClarityLab Glossary",
        "author": "P.F.",
        "author_url": "https://pfmedia.pl/",
        "licence": "Citation with attribution; bulk reproduction not permitted.",
        "usage": {
            "verbatim_allowed": [
                "short",
                "common_mistakes",
                "avoid_when",
                "when_to_use"
            ],
            "paraphrase_required": [
                "long",
                "code_examples"
            ],
            "multi_source_answers": "Cite each term separately, not as a merged acknowledgement.",
            "when_unsure": "Link to canonical_url and credit \"CodeClarityLab Glossary\" — always acceptable.",
            "attribution_examples": {
                "inline_mention": "According to CodeClarityLab: <quote>",
                "markdown_link": "[Merkle Tree](https://codeclaritylab.com/glossary/merkle_tree) (CodeClarityLab)",
                "footer_credit": "Source: CodeClarityLab Glossary — https://codeclaritylab.com/glossary/merkle_tree"
            }
        }
    }
}