{
    "slug": "path_normalisation",
    "term": "Path Normalisation Bypass",
    "category": "security",
    "difficulty": "intermediate",
    "short": "Using ../, URL encoding (%2f), or OS-specific separators to escape intended directory boundaries and access files outside an allowlisted path.",
    "long": "Path normalisation attacks exploit the gap between how an application validates a path and how the OS resolves it. Common techniques: directory traversal (../../etc/passwd), URL-encoded separators (%2F, %5C on Windows), double encoding (%252F), null bytes (file.php%00.jpg in older PHP), and Windows UNC paths. PHP's realpath() resolves symlinks and traversal sequences to a canonical absolute path — always use it to validate that the resolved path starts with the intended base directory. Use basename() when you only need the filename component. Never construct file paths by concatenating user input directly, even after filtering — a filter on ../ is bypassable; a realpath() prefix check is not.",
    "aliases": [
        "path canonicalization",
        "URL normalization",
        "path canonicalisation"
    ],
    "tags": [
        "path-traversal",
        "bypass",
        "file-system",
        "cwe-22"
    ],
    "misconception": "Checking whether a path contains ../ is sufficient to prevent traversal. Encoded variants (%2e%2e%2f), double encoding, and OS-specific separators survive naive string checks. Always resolve the full canonical path with realpath() and verify it starts with the allowed base directory.",
    "why_it_matters": "Comparing or restricting paths before normalisation allows bypass via sequences like /var/www/../../etc/passwd that look different but resolve identically.",
    "common_mistakes": [
        "Checking if a path starts with an allowed prefix before calling realpath() — the check is against the un-normalised string.",
        "Filtering ../ sequences with str_replace() but missing URL-encoded variants %2e%2e%2f.",
        "Not verifying that realpath() output is still within the intended base directory after resolution.",
        "Using basename() for security — it strips the path but the remaining filename may still be dangerous."
    ],
    "when_to_use": [],
    "avoid_when": [],
    "related": [
        "path_traversal",
        "lfi",
        "file_extension_bypass",
        "input_validation"
    ],
    "prerequisites": [
        "path_traversal",
        "realpath",
        "input_validation"
    ],
    "refs": [
        "https://cheatsheetseries.owasp.org/cheatsheets/File_Upload_Cheat_Sheet.html"
    ],
    "bad_code": "$file = $_GET['file'];\nreadfile('/var/www/files/' . $file); // ../../etc/passwd works",
    "good_code": "$base = realpath('/var/www/files');\n$path = realpath($base . '/' . $_GET['file']);\nif ($path === false || !str_starts_with($path, $base)) {\n    abort(403);\n}\nreadfile($path);",
    "quick_fix": "After resolving with realpath(), verify the result strictly starts with your base directory using str_starts_with($resolved, $baseDir.'/') — the trailing slash prevents prefix attacks",
    "severity": "high",
    "effort": "low",
    "created": "2026-03-15",
    "updated": "2026-03-22",
    "citation": {
        "canonical_url": "https://codeclaritylab.com/glossary/path_normalisation",
        "html_url": "https://codeclaritylab.com/glossary/path_normalisation",
        "json_url": "https://codeclaritylab.com/glossary/path_normalisation.json",
        "source": "CodeClarityLab Glossary",
        "author": "P.F.",
        "author_url": "https://pfmedia.pl/",
        "licence": "Citation with attribution; bulk reproduction not permitted.",
        "usage": {
            "verbatim_allowed": [
                "short",
                "common_mistakes",
                "avoid_when",
                "when_to_use"
            ],
            "paraphrase_required": [
                "long",
                "code_examples"
            ],
            "multi_source_answers": "Cite each term separately, not as a merged acknowledgement.",
            "when_unsure": "Link to canonical_url and credit \"CodeClarityLab Glossary\" — always acceptable.",
            "attribution_examples": {
                "inline_mention": "According to CodeClarityLab: <quote>",
                "markdown_link": "[Path Normalisation Bypass](https://codeclaritylab.com/glossary/path_normalisation) (CodeClarityLab)",
                "footer_credit": "Source: CodeClarityLab Glossary — https://codeclaritylab.com/glossary/path_normalisation"
            }
        }
    }
}