{
    "slug": "message_serialisation",
    "term": "Message Serialisation",
    "category": "messaging",
    "difficulty": "intermediate",
    "short": "JSON (readable, flexible), Avro (schema-enforced, compact, Kafka standard), Protobuf (typed, 5-10x smaller) — matching format to volume and schema requirements.",
    "long": "Message serialisation formats: JSON — human-readable, schema-free, widely supported, but verbose (35 bytes for a simple event). MessagePack — binary JSON, 2-3x smaller. Avro — schema stored in a schema registry; consumer must have schema; excellent for Kafka; schema evolution rules prevent breaking consumers. Protocol Buffers (Protobuf) — strongly typed .proto schema, 5-10x smaller and faster than JSON, excellent cross-language support. Never use PHP serialize() for messages — PHP-only format and a deserialization vulnerability source.",
    "aliases": [
        "Avro",
        "Protobuf",
        "Protocol Buffers",
        "MessagePack",
        "message format",
        "serialisation format"
    ],
    "tags": [
        "messaging",
        "performance",
        "architecture"
    ],
    "misconception": "JSON is always sufficient for message serialisation — at high message volume (millions/day), Protobuf or Avro significantly reduce storage and network costs; at 10M messages/day, Protobuf vs JSON is a 10x storage and bandwidth difference.",
    "why_it_matters": "A Kafka topic receiving 10M messages per day as JSON uses 10x more storage and bandwidth than Protobuf — serialisation format is a significant operational cost driver at scale.",
    "common_mistakes": [
        "PHP serialize() for messages — PHP-only, insecure deserialization vulnerability",
        "No schema validation for JSON messages — malformed messages corrupt consumer state silently",
        "Avro without a schema registry — consumers need schemas; registry provides versioned schema access",
        "Changing Protobuf field numbers — field numbers are permanent; changing breaks all existing consumers"
    ],
    "when_to_use": [],
    "avoid_when": [],
    "related": [
        "kafka_basics",
        "message_ordering_guarantees",
        "api_contract_testing"
    ],
    "prerequisites": [
        "message_queue_patterns",
        "kafka_basics",
        "api_design"
    ],
    "refs": [
        "https://developers.google.com/protocol-buffers"
    ],
    "bad_code": "// PHP serialize — PHP-only, deserialization risk:\n$message = serialize(['order_id' => 42, 'amount' => 99.99]);\n$queue->publish($message);\n// Cannot consume from Node.js, Python, Go services\n// Consumer must unserialize() — deserialization vulnerability",
    "good_code": "// JSON — simple, cross-language:\n$message = json_encode(['order_id' => 42, 'amount' => 99.99]);\n\n// Protobuf — typed, compact, cross-language:\n// payment.proto: message PaymentEvent { int64 order_id = 1; double amount = 2; }\n$event = new PaymentEvent();\n$event->setOrderId(42);\n$event->setAmount(99.99);\n$binary = $event->serializeToString(); // ~10 bytes vs JSON ~35 bytes",
    "quick_fix": "Use JSON for simple queue messages and Protocol Buffers or Avro (with Schema Registry) for high-throughput Kafka — always include a schema version in your message envelope so consumers can evolve independently",
    "severity": "medium",
    "effort": "medium",
    "created": "2026-03-16",
    "updated": "2026-03-22",
    "citation": {
        "canonical_url": "https://codeclaritylab.com/glossary/message_serialisation",
        "html_url": "https://codeclaritylab.com/glossary/message_serialisation",
        "json_url": "https://codeclaritylab.com/glossary/message_serialisation.json",
        "source": "CodeClarityLab Glossary",
        "author": "P.F.",
        "author_url": "https://pfmedia.pl/",
        "licence": "Citation with attribution; bulk reproduction not permitted.",
        "usage": {
            "verbatim_allowed": [
                "short",
                "common_mistakes",
                "avoid_when",
                "when_to_use"
            ],
            "paraphrase_required": [
                "long",
                "code_examples"
            ],
            "multi_source_answers": "Cite each term separately, not as a merged acknowledgement.",
            "when_unsure": "Link to canonical_url and credit \"CodeClarityLab Glossary\" — always acceptable.",
            "attribution_examples": {
                "inline_mention": "According to CodeClarityLab: <quote>",
                "markdown_link": "[Message Serialisation](https://codeclaritylab.com/glossary/message_serialisation) (CodeClarityLab)",
                "footer_credit": "Source: CodeClarityLab Glossary — https://codeclaritylab.com/glossary/message_serialisation"
            }
        }
    }
}