{
    "slug": "p99_latency",
    "term": "P50/P95/P99 Latency Percentiles",
    "category": "observability",
    "difficulty": "beginner",
    "short": "Latency percentiles (P50, P95, P99) tell you what most users experience — P99 means '99% of requests are faster than this', revealing the worst experiences that averages hide.",
    "long": "Average latency is misleading — a 100ms average can mask 5% of requests taking 2 seconds. Percentiles: P50 (median — half faster), P95 (95% faster — almost everyone), P99 (99% faster — worst 1%), P99.9 (worst 0.1%). P50 ≈ typical user. P99 = power users or large-data users. P99.9 = outliers (usually infrastructure issues). Implementation: histogram metrics in Prometheus. histogram_quantile(0.99, rate(http_duration_bucket[5m])). Aggregating percentiles: can't average percentiles across instances — must use histogram buckets. Set SLO on P99, not average.",
    "aliases": [],
    "tags": [
        "observability",
        "latency",
        "percentiles",
        "p99",
        "metrics"
    ],
    "misconception": "Average latency is sufficient for monitoring — average hides slow outliers. A service with 50ms average and 5s P99 has serious performance issues that average masks.",
    "why_it_matters": "P99 latency determines whether power users and high-traffic moments are acceptable — averages let you ship a slow service believing it's fast.",
    "common_mistakes": [
        "Monitoring average instead of percentiles.",
        "Aggregating percentiles from different instances — statistically invalid.",
        "Setting SLO on P50 — only half of users satisfy it."
    ],
    "when_to_use": [],
    "avoid_when": [],
    "related": [
        "golden_signals",
        "slo_sli_sla",
        "prometheus_concepts",
        "alerting_best_practices"
    ],
    "prerequisites": [
        "slo_sli_sla",
        "golden_signals"
    ],
    "refs": [
        "https://prometheus.io/docs/practices/histograms/"
    ],
    "bad_code": "// Average latency metric — hides outliers:\nGauge::set('latency_avg', $totalTime / $count);\n// 100ms average, but 1% of requests take 5s",
    "good_code": "// Prometheus histogram — correct percentiles:\n$histogram = $meter->createHistogram('http.request.duration');\n$histogram->record($durationMs, ['route' => $route]);\n\n// Query P99:\n// histogram_quantile(0.99, rate(http_request_duration_bucket[5m]))\n\n// SLO: P99 < 500ms",
    "quick_fix": "Replace average latency with histogram metric. Query P99 in dashboards and alerts. Set SLO on P99, not P50. Use P999 for finding infrastructure outliers.",
    "severity": "medium",
    "effort": "low",
    "created": "2026-03-23",
    "updated": "2026-03-23",
    "citation": {
        "canonical_url": "https://codeclaritylab.com/glossary/p99_latency",
        "html_url": "https://codeclaritylab.com/glossary/p99_latency",
        "json_url": "https://codeclaritylab.com/glossary/p99_latency.json",
        "source": "CodeClarityLab Glossary",
        "author": "P.F.",
        "author_url": "https://pfmedia.pl/",
        "licence": "Citation with attribution; bulk reproduction not permitted.",
        "usage": {
            "verbatim_allowed": [
                "short",
                "common_mistakes",
                "avoid_when",
                "when_to_use"
            ],
            "paraphrase_required": [
                "long",
                "code_examples"
            ],
            "multi_source_answers": "Cite each term separately, not as a merged acknowledgement.",
            "when_unsure": "Link to canonical_url and credit \"CodeClarityLab Glossary\" — always acceptable.",
            "attribution_examples": {
                "inline_mention": "According to CodeClarityLab: <quote>",
                "markdown_link": "[P50/P95/P99 Latency Percentiles](https://codeclaritylab.com/glossary/p99_latency) (CodeClarityLab)",
                "footer_credit": "Source: CodeClarityLab Glossary — https://codeclaritylab.com/glossary/p99_latency"
            }
        }
    }
}