{
    "slug": "message_serialization",
    "term": "Message Serialization (Avro/Protobuf)",
    "category": "messaging",
    "difficulty": "intermediate",
    "short": "Binary serialization formats (Avro, Protobuf, MessagePack) are faster and smaller than JSON for high-throughput messaging — with schema evolution support for Avro.",
    "long": "JSON: human-readable, no schema, large. Avro: binary, schema required, schema registry for evolution. Protobuf: binary, .proto schema, excellent language support, smaller than Avro. MessagePack: binary JSON (no schema). Schema registry (Confluent): stores Avro/Protobuf schemas, enforces compatibility (backward/forward/full). Schema evolution: Avro backward compatible — add optional fields with defaults. Protobuf: add fields with new numbers, never reuse numbers. For PHP: use avro-php, google/protobuf PHP library. JSON is fine for low-volume; use binary for high-throughput Kafka pipelines.",
    "aliases": [],
    "tags": [
        "messaging",
        "serialization",
        "protobuf",
        "avro",
        "schema"
    ],
    "misconception": "JSON is sufficient for all messaging — for high-throughput Kafka pipelines (millions/sec), JSON parsing CPU and size become significant. Protobuf is 3-10x smaller and 5-10x faster to parse.",
    "why_it_matters": "At high throughput, serialization format determines CPU cost and network bandwidth — binary formats can cut infrastructure costs significantly.",
    "common_mistakes": [
        "No schema validation on JSON messages — producer sends wrong structure, consumer crashes.",
        "Not using a schema registry — schema changes break consumers silently.",
        "Reusing Protobuf field numbers — causes decoding errors in old consumers."
    ],
    "when_to_use": [],
    "avoid_when": [],
    "related": [
        "kafka_concepts",
        "message_broker",
        "at_least_once_delivery"
    ],
    "prerequisites": [
        "kafka_concepts",
        "message_broker"
    ],
    "refs": [
        "https://protobuf.dev/",
        "https://avro.apache.org/"
    ],
    "bad_code": "// JSON — verbose, no schema enforcement:\n$producer->send(json_encode(['userId' => 1, 'amount' => '10.00'])); // Is amount int or string?",
    "good_code": "// Protobuf — typed, compact:\n$msg = new OrderCreated();\n$msg->setUserId(1);\n$msg->setAmountCents(1000); // Explicit type\n$producer->send($msg->serializeToString());\n\n// Consumer:\n$event = new OrderCreated();\n$event->mergeFromString($rawMessage);",
    "quick_fix": "Use Protobuf for new high-throughput pipelines. Add schema registry if using Avro. Never change Protobuf field numbers. Always validate message schema on consumer.",
    "severity": "info",
    "effort": "medium",
    "created": "2026-03-23",
    "updated": "2026-03-23",
    "citation": {
        "canonical_url": "https://codeclaritylab.com/glossary/message_serialization",
        "html_url": "https://codeclaritylab.com/glossary/message_serialization",
        "json_url": "https://codeclaritylab.com/glossary/message_serialization.json",
        "source": "CodeClarityLab Glossary",
        "author": "P.F.",
        "author_url": "https://pfmedia.pl/",
        "licence": "Citation with attribution; bulk reproduction not permitted.",
        "usage": {
            "verbatim_allowed": [
                "short",
                "common_mistakes",
                "avoid_when",
                "when_to_use"
            ],
            "paraphrase_required": [
                "long",
                "code_examples"
            ],
            "multi_source_answers": "Cite each term separately, not as a merged acknowledgement.",
            "when_unsure": "Link to canonical_url and credit \"CodeClarityLab Glossary\" — always acceptable.",
            "attribution_examples": {
                "inline_mention": "According to CodeClarityLab: <quote>",
                "markdown_link": "[Message Serialization (Avro/Protobuf)](https://codeclaritylab.com/glossary/message_serialization) (CodeClarityLab)",
                "footer_credit": "Source: CodeClarityLab Glossary — https://codeclaritylab.com/glossary/message_serialization"
            }
        }
    }
}