{
    "slug": "regex_escape_sequences",
    "term": "Regex Escape Sequences",
    "category": "regex",
    "difficulty": "intermediate",
    "short": "Backslash sequences in regex that either match special characters literally or represent character classes, anchors, and control characters.",
    "long": "Escape sequences in regular expressions serve two distinct purposes. First, they neutralize metacharacters so they match literally: \\. matches a period, \\* matches an asterisk, \\\\ matches a backslash, \\( matches a parenthesis. Second, the backslash introduces shorthand classes and assertions: \\d (digit), \\w (word character), \\s (whitespace) and their negations \\D, \\W, \\S; \\b (word boundary), \\B (non-boundary), \\A (start of subject), \\z and \\Z (end of subject). It also encodes non-printing characters: \\n (newline), \\t (tab), \\r (carriage return), \\xHH (hex byte), \\x{HHHH} (Unicode code point with the /u flag), and \\0 (null). In PCRE under PHP you must also account for two layers of escaping: the regex parser AND the PHP string parser. In a double-quoted PHP string, \"\\\\d\" is needed to pass \\d to PCRE, while single-quoted '\\d' passes \\d directly because single quotes do not interpret \\d. Inside character classes ([...]) the rules change: most metacharacters lose their special meaning, so [.] matches a literal dot without an escape, but you still escape ], \\, ^ (when leading), and - (when between characters). For dynamic patterns built from user or runtime data, never hand-escape; call preg_quote($input, '/') which escapes every PCRE metacharacter plus your chosen delimiter. Misusing escapes leads to patterns that silently match the wrong thing - an unescaped dot matches any character, a missing backslash before a delimiter ends the pattern early, and a forgotten /u flag makes \\x{...} invalid.",
    "aliases": [
        "regex escaping",
        "backslash sequences",
        "escaped metacharacters",
        "preg_quote escaping"
    ],
    "tags": [
        "regex",
        "pcre",
        "escaping",
        "character-classes",
        "php",
        "input-validation"
    ],
    "misconception": "A backslash before any character always makes it literal. In fact a backslash before a letter often creates a special token (\\d, \\b, \\w) rather than escaping it, and \\q or \\e may be an error or a control character, not a literal q or e.",
    "why_it_matters": "An unescaped metacharacter turns a precise pattern into a permissive one - an unescaped dot in a validation regex accepts characters you meant to reject, and missing escapes around delimiters cause runtime preg errors or silent match failures.",
    "common_mistakes": [
        "Using an unescaped . expecting a literal period - it matches any character instead, weakening validation.",
        "Hand-escaping dynamic input instead of calling preg_quote(), missing the delimiter or a metacharacter.",
        "Forgetting that double-quoted PHP strings consume one backslash layer before PCRE sees the pattern - use single quotes for regex literals.",
        "Escaping characters inside [...] that do not need it, or failing to escape ] and - where they do.",
        "Using \\x{1F600} without the /u flag, producing an invalid pattern error rather than matching the code point."
    ],
    "when_to_use": [
        "Use a backslash before any literal metacharacter (. * + ? ( ) [ ] { } ^ $ | \\) you intend to match exactly.",
        "Use preg_quote() whenever a pattern incorporates user-supplied or runtime data.",
        "Use \\x{...} with the /u flag to match specific Unicode code points by value.",
        "Use shorthand classes (\\d, \\w, \\s) for concise, readable character matching."
    ],
    "avoid_when": [
        "Avoid hand-escaping when building patterns from variables - preg_quote is safer and complete.",
        "Do not over-escape inside character classes where most metacharacters are already literal, since it harms readability.",
        "Avoid double-quoted PHP strings for regex literals when the pattern contains backslash sequences."
    ],
    "related": [
        "regex_syntax",
        "regex_pcre_php",
        "regex_anchors",
        "preg_match"
    ],
    "prerequisites": [
        "regex_syntax",
        "regex_pcre_php"
    ],
    "refs": [
        "https://www.php.net/manual/en/regexp.reference.escape.php",
        "https://www.php.net/manual/en/function.preg-quote.php",
        "https://www.pcre.org/current/doc/html/pcre2pattern.html"
    ],
    "bad_code": "// Double-quoted string: \\d is not a recognized PHP escape, so it\n// survives as backslash-d here - but other sequences (\\n, \\t, \\0)\n// would be consumed, making double quotes fragile for regex\n$digits = \"/\\d+/\"; // works by luck; use single quotes instead",
    "good_code": "// Escape the dot to match a literal period\n$pattern = '/^file\\.txt$/';\npreg_match($pattern, 'fileXtxt'); // 0 - correctly rejected\n\n// Always escape runtime input with preg_quote\n$search = $_GET['q'];\n$escaped = preg_quote($search, '/');\npreg_match('/' . $escaped . '/', $subject); // safe literal match\n\n// Use single quotes so the backslash reaches PCRE intact\n$digits = '/\\d+/';\npreg_match($digits, 'abc123', $m); // $m[0] === '123'\n\n// Unicode code point needs the /u flag\npreg_match('/\\x{1F600}/u', $emoji);",
    "quick_fix": "Use single-quoted regex literals, escape metacharacters with a backslash, and call preg_quote($input, '/') for any runtime-built pattern.",
    "severity": "medium",
    "effort": "low",
    "created": "2026-06-05",
    "updated": "2026-06-05",
    "citation": {
        "canonical_url": "https://codeclaritylab.com/glossary/regex_escape_sequences",
        "html_url": "https://codeclaritylab.com/glossary/regex_escape_sequences",
        "json_url": "https://codeclaritylab.com/glossary/regex_escape_sequences.json",
        "source": "CodeClarityLab Glossary",
        "author": "P.F.",
        "author_url": "https://pfmedia.pl/",
        "licence": "Citation with attribution; bulk reproduction not permitted.",
        "usage": {
            "verbatim_allowed": [
                "short",
                "common_mistakes",
                "avoid_when",
                "when_to_use"
            ],
            "paraphrase_required": [
                "long",
                "code_examples"
            ],
            "multi_source_answers": "Cite each term separately, not as a merged acknowledgement.",
            "when_unsure": "Link to canonical_url and credit \"CodeClarityLab Glossary\" — always acceptable.",
            "attribution_examples": {
                "inline_mention": "According to CodeClarityLab: <quote>",
                "markdown_link": "[Regex Escape Sequences](https://codeclaritylab.com/glossary/regex_escape_sequences) (CodeClarityLab)",
                "footer_credit": "Source: CodeClarityLab Glossary — https://codeclaritylab.com/glossary/regex_escape_sequences"
            }
        }
    }
}