{
    "slug": "data_normalisation",
    "term": "Database Normalisation",
    "category": "general",
    "difficulty": "intermediate",
    "short": "Organising relational database tables to reduce redundancy and improve integrity — from 1NF through 3NF (and beyond) as design guidelines.",
    "long": "Normalisation progressively eliminates redundancy: 1NF (atomic column values — no repeating groups or arrays in cells), 2NF (eliminate partial dependencies — every non-key column depends on the whole primary key, not just part of it), 3NF (eliminate transitive dependencies — non-key columns depend only on the primary key, not on other non-key columns). BCNF and 4NF handle edge cases. Over-normalisation leads to excessive JOINs degrading read performance — denormalise deliberately when queries justify it, documented as an explicit trade-off. For PHP applications using ORMs, normalisation affects relationship mapping (hasMany, belongsToMany) and eager-loading strategies.",
    "aliases": [
        "data normalisation general",
        "data consistency",
        "canonical data"
    ],
    "tags": [
        "general",
        "database",
        "data-quality"
    ],
    "misconception": "Data normalisation is always about database normal forms. In a broader context, normalisation means establishing a single canonical representation — normalising phone numbers to E.164 format, emails to lowercase, or dates to UTC before storage prevents comparison and deduplication failures.",
    "why_it_matters": "Database normalisation eliminates redundant data — duplicated values cause update anomalies where changing one instance misses others, leading to inconsistent data across the system.",
    "common_mistakes": [
        "Storing calculated values that can be derived from existing columns — they go out of sync.",
        "Repeating customer name and address in every order row — a customer name change requires updating thousands of rows.",
        "Over-normalising to the point where simple queries require many JOINs — pragmatic denormalisation is sometimes correct.",
        "Not understanding 1NF, 2NF, 3NF — normalising to 3NF solves most practical anomalies."
    ],
    "when_to_use": [],
    "avoid_when": [],
    "related": [
        "database_indexing",
        "query_optimisation",
        "materialized_views",
        "eager_loading"
    ],
    "prerequisites": [
        "database_indexing",
        "db_schema_design",
        "acid_properties"
    ],
    "refs": [
        "https://en.wikipedia.org/wiki/Database_normalization"
    ],
    "bad_code": "-- 1NF violation — repeating groups in one column\nCREATE TABLE orders (\n    id INT,\n    tags VARCHAR(255)  -- 'urgent,fragile,priority' -- comma-list is not atomic\n);",
    "good_code": "-- 3NF design: separate tables, no transitive dependencies\nCREATE TABLE orders (id INT PRIMARY KEY, customer_id INT, total DECIMAL);\nCREATE TABLE customers (id INT PRIMARY KEY, name VARCHAR(100), city_id INT);\nCREATE TABLE cities (id INT PRIMARY KEY, name VARCHAR(100), country_id INT);\n\n-- Tags as many-to-many (junction table)\nCREATE TABLE tags (id INT PRIMARY KEY, name VARCHAR(50));\nCREATE TABLE order_tags (order_id INT, tag_id INT, PRIMARY KEY (order_id, tag_id));\n\n-- Denormalise only with evidence: materialised views or computed columns\n-- for read-heavy aggregates, not as the default",
    "quick_fix": "Normalise to 3NF for transactional data — each non-key column depends only on the whole primary key; denormalise intentionally for read-heavy query performance with documented trade-offs",
    "severity": "medium",
    "effort": "high",
    "created": "2026-03-15",
    "updated": "2026-03-22",
    "citation": {
        "canonical_url": "https://codeclaritylab.com/glossary/data_normalisation",
        "html_url": "https://codeclaritylab.com/glossary/data_normalisation",
        "json_url": "https://codeclaritylab.com/glossary/data_normalisation.json",
        "source": "CodeClarityLab Glossary",
        "author": "P.F.",
        "author_url": "https://pfmedia.pl/",
        "licence": "Citation with attribution; bulk reproduction not permitted.",
        "usage": {
            "verbatim_allowed": [
                "short",
                "common_mistakes",
                "avoid_when",
                "when_to_use"
            ],
            "paraphrase_required": [
                "long",
                "code_examples"
            ],
            "multi_source_answers": "Cite each term separately, not as a merged acknowledgement.",
            "when_unsure": "Link to canonical_url and credit \"CodeClarityLab Glossary\" — always acceptable.",
            "attribution_examples": {
                "inline_mention": "According to CodeClarityLab: <quote>",
                "markdown_link": "[Database Normalisation](https://codeclaritylab.com/glossary/data_normalisation) (CodeClarityLab)",
                "footer_credit": "Source: CodeClarityLab Glossary — https://codeclaritylab.com/glossary/data_normalisation"
            }
        }
    }
}