{
    "slug": "bloom_filter",
    "term": "Bloom Filter",
    "category": "data_structures",
    "difficulty": "advanced",
    "short": "A probabilistic data structure that tests set membership in O(1) time and O(1) space, with a tunable false-positive rate and zero false negatives.",
    "long": "A Bloom filter uses multiple hash functions to set bits in a bit array. To test membership, all hash positions are checked — if any is 0, the element is definitely not in the set. If all are 1, the element is probably in the set (possible false positive). Bloom filters never produce false negatives. They are ideal for cache miss pre-screening, username availability checks, and preventing unnecessary database lookups.",
    "aliases": [
        "probabilistic set",
        "space-efficient set"
    ],
    "tags": [
        "data-structures",
        "probabilistic",
        "caching",
        "performance"
    ],
    "misconception": "Bloom filters can replace a database set — they cannot; false positives require a fallback check, and elements cannot be removed from a standard Bloom filter.",
    "why_it_matters": "Bloom filters can eliminate 99% of unnecessary database lookups for non-existent keys, turning cache stampedes and cold-miss floods into a solved problem.",
    "common_mistakes": [
        "Not accounting for false positive rate when sizing the bit array — too small a filter has high false positive rates.",
        "Trying to delete elements from a standard Bloom filter — use a Counting Bloom Filter for deletion support.",
        "Using a Bloom filter without a fallback — false positives require a definitive check against the source.",
        "Not understanding that Bloom filters are write-only for membership — you cannot enumerate members."
    ],
    "when_to_use": [],
    "avoid_when": [],
    "related": [
        "hash_table",
        "caching",
        "cache_stampede",
        "redis_patterns"
    ],
    "prerequisites": [
        "hash_table",
        "hash_functions_deep",
        "big_o_notation"
    ],
    "refs": [
        "https://en.wikipedia.org/wiki/Bloom_filter"
    ],
    "bad_code": "// Without bloom filter — every cache miss hits the DB:\nfunction getUser(int $id): ?User {\n    $cached = $cache->get(\"user:$id\");\n    if (!$cached) {\n        return $db->find($id); // DB hit even for non-existent IDs\n    }\n    return $cached;\n}",
    "good_code": "// Bloom filter pre-screens — DB only queried if probably exists:\nfunction getUser(int $id): ?User {\n    if (!$bloomFilter->mightContain(\"user:$id\")) return null; // Definite miss\n    $cached = $cache->get(\"user:$id\");\n    if (!$cached) {\n        $user = $db->find($id); // Only if bloom says 'probably exists'\n        if ($user) $cache->set(\"user:$id\", $user);\n        return $user;\n    }\n    return $cached;\n}",
    "quick_fix": "Use a Bloom filter when you need to answer 'is this item definitely NOT in the set' cheaply — it has no false negatives but accepts a tunable false positive rate",
    "severity": "low",
    "effort": "high",
    "created": "2026-03-15",
    "updated": "2026-03-22",
    "citation": {
        "canonical_url": "https://codeclaritylab.com/glossary/bloom_filter",
        "html_url": "https://codeclaritylab.com/glossary/bloom_filter",
        "json_url": "https://codeclaritylab.com/glossary/bloom_filter.json",
        "source": "CodeClarityLab Glossary",
        "author": "P.F.",
        "author_url": "https://pfmedia.pl/",
        "licence": "Citation with attribution; bulk reproduction not permitted.",
        "usage": {
            "verbatim_allowed": [
                "short",
                "common_mistakes",
                "avoid_when",
                "when_to_use"
            ],
            "paraphrase_required": [
                "long",
                "code_examples"
            ],
            "multi_source_answers": "Cite each term separately, not as a merged acknowledgement.",
            "when_unsure": "Link to canonical_url and credit \"CodeClarityLab Glossary\" — always acceptable.",
            "attribution_examples": {
                "inline_mention": "According to CodeClarityLab: <quote>",
                "markdown_link": "[Bloom Filter](https://codeclaritylab.com/glossary/bloom_filter) (CodeClarityLab)",
                "footer_credit": "Source: CodeClarityLab Glossary — https://codeclaritylab.com/glossary/bloom_filter"
            }
        }
    }
}