{
    "slug": "fuzzy_search",
    "term": "Fuzzy Search",
    "category": "search",
    "difficulty": "intermediate",
    "short": "Matching strings that are similar but not identical — tolerating typos, transpositions, and misspellings using edit distance algorithms.",
    "long": "Fuzzy search uses edit distance (Levenshtein distance): the minimum number of single-character edits to transform one string to another. Distance 1 matches one typo; distance 2 matches two. Elasticsearch's fuzzy query and Meilisearch/Typesense's built-in typo tolerance handle this automatically. For PHP, similar_text() and levenshtein() compute distances. Trigram indexes (PostgreSQL pg_trgm) enable fuzzy matching with database indexes.",
    "aliases": [
        "typo tolerance",
        "Levenshtein distance",
        "approximate matching",
        "edit distance"
    ],
    "tags": [
        "search",
        "algorithms",
        "ux"
    ],
    "misconception": "Fuzzy search matches everything loosely — good fuzzy search is calibrated to distance 1-2, which matches real typos without matching semantically unrelated words.",
    "why_it_matters": "Users typo queries — 'seach' for 'search', 'recieve' for 'receive' — without fuzzy matching, they see zero results for a query you can serve; fuzzy matching converts failed searches to successful ones.",
    "common_mistakes": [
        "Fuzzy distance too high — distance 3+ matches too many unrelated terms, reducing relevance.",
        "Fuzzy matching on every field — apply fuzzy only to text fields, not IDs or structured data.",
        "Not using AUTO fuzziness — Elasticsearch's AUTO:3,6 applies no fuzziness for short terms, distance 1 for 3-5 chars, distance 2 for 6+ chars.",
        "Levenshtein in PHP application code on every row — O(n) for n documents; use indexed fuzzy search."
    ],
    "when_to_use": [],
    "avoid_when": [],
    "related": [
        "elasticsearch_basics",
        "full_text_search",
        "autocomplete_design"
    ],
    "prerequisites": [
        "full_text_search",
        "meilisearch_typesense",
        "string_algorithms"
    ],
    "refs": [
        "https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-fuzzy-query.html"
    ],
    "bad_code": "// PHP Levenshtein on all rows — O(n), unusable at scale:\n$query = 'seach';\n$results = $db->query('SELECT * FROM products')->fetchAll();\n$fuzzyResults = array_filter($results, function($product) use ($query) {\n    return levenshtein($query, strtolower($product['name'])) <= 2;\n});\n// Scans all products in PHP — not viable for large datasets",
    "good_code": "// Elasticsearch fuzzy query — indexed, fast:\n$query = [\n    'query' => [\n        'match' => [\n            'name' => [\n                'query' => $searchTerm,\n                'fuzziness' => 'AUTO',     // AUTO:3,6 — sensible defaults\n                'prefix_length' => 2,       // First 2 chars must match exactly\n            ]\n        ]\n    ]\n];\n\n// PostgreSQL pg_trgm for simpler setups:\n// CREATE INDEX idx_products_name_trgm ON products USING gin(name gin_trgm_ops);\n// SELECT * FROM products WHERE name % 'seach' ORDER BY name <-> 'seach' LIMIT 10;",
    "quick_fix": "Enable fuzzy search in Meilisearch (it's on by default) or use Levenshtein distance for simple PHP implementations — never use LIKE '%term%' which can't do fuzzy matching",
    "severity": "medium",
    "effort": "medium",
    "created": "2026-03-15",
    "updated": "2026-03-22",
    "citation": {
        "canonical_url": "https://codeclaritylab.com/glossary/fuzzy_search",
        "html_url": "https://codeclaritylab.com/glossary/fuzzy_search",
        "json_url": "https://codeclaritylab.com/glossary/fuzzy_search.json",
        "source": "CodeClarityLab Glossary",
        "author": "P.F.",
        "author_url": "https://pfmedia.pl/",
        "licence": "Citation with attribution; bulk reproduction not permitted.",
        "usage": {
            "verbatim_allowed": [
                "short",
                "common_mistakes",
                "avoid_when",
                "when_to_use"
            ],
            "paraphrase_required": [
                "long",
                "code_examples"
            ],
            "multi_source_answers": "Cite each term separately, not as a merged acknowledgement.",
            "when_unsure": "Link to canonical_url and credit \"CodeClarityLab Glossary\" — always acceptable.",
            "attribution_examples": {
                "inline_mention": "According to CodeClarityLab: <quote>",
                "markdown_link": "[Fuzzy Search](https://codeclaritylab.com/glossary/fuzzy_search) (CodeClarityLab)",
                "footer_credit": "Source: CodeClarityLab Glossary — https://codeclaritylab.com/glossary/fuzzy_search"
            }
        }
    }
}