Tail sampling
Overview
With a tail sampling strategy, you make the decision to sample a trace considering all or most of the spans. For example, tail sampling is a good option to sample only traces that have errors or traces with long request duration.
Tail sampling is more complex to configure, implement, and maintain but is the recommended sampling strategy for large systems with a high telemetry volume.
The key points to keep in mind when choosing tail sampling:
- While the system grows and changes over time the tail sampling strategy should adapt to keep the volume of sampled data balanced
- The components that implement the tail sampling strategy must be stateful, they should hold data in memory for some time and use it to make a sampling decision
For Application Observability, you should sample your data at the collector after metrics generation so that all traces are available to generate accurate metrics. If you generate metrics from sampled traces, the sampling affects their values.
Before you begin
- You should have no sampling strategy at the application level.
- Use Grafana Alloy or OpenTelemetry Collector to collect traces from the application, generate metrics from traces, and apply sampling.
- Send all traces to the data collector to generate accurate metrics.
In Application Observability on Grafana Cloud:
- Disable metrics generation in the configuration.
- Choose OTEL Collector >= 0.94, Grafana Alloy >= 1.0, Grafana Agent >= 0.40 span metrics source in the configuration.
Configuration
The collector receives all traces, generates metrics, and sends metrics to Grafana Cloud Prometheus. In parallel, the collector applies a tail sampling strategy to the traces and sends sampled data to Grafana Cloud Tempo.
To view the Grafana Alloy configuration for tail sampling, select the river tab below. To view the OpenTelemetry Collector configuration for tail sampling, select the yaml tab below.
otelcol.receiver.otlp "default" {
// https://grafana.com/docs/alloy/latest/reference/components/otelcol.receiver.otlp/
// configures the default grpc endpoint "0.0.0.0:4317"
grpc { }
// configures the default http/protobuf endpoint "0.0.0.0:4318"
http { }
output {
metrics = [otelcol.processor.resourcedetection.default.input]
logs = [otelcol.processor.resourcedetection.default.input]
traces = [otelcol.processor.resourcedetection.default.input]
}
}
otelcol.processor.transform "add_resource_attributes_as_metric_attributes" {
// https://grafana.com/docs/alloy/latest/reference/components/otelcol.processor.transform/
error_mode = "ignore"
metric_statements {
context = "datapoint"
statements = [
"set(attributes[\"deployment.environment\"], resource.attributes[\"deployment.environment\"])",
"set(attributes[\"service.version\"], resource.attributes[\"service.version\"])",
]
}
output {
metrics = [otelcol.processor.batch.default.input]
}
}
otelcol.connector.spanmetrics "default" {
// https://grafana.com/docs/alloy/latest/reference/components/otelcol.connector.spanmetrics/
dimension {
name = "service.namespace"
}
dimension {
name = "service.version"
}
dimension {
name = "deployment.environment"
}
dimension {
name = "k8s.cluster.name"
}
dimension {
name = "k8s.namespace.name"
}
dimension {
name = "cloud.region"
}
dimension {
name = "cloud.availability_zone"
}
histogram {
explicit {
buckets = ["0s", "0.005s", "0.01s", "0.025s", "0.05s", "0.075s", "0.1s", "0.25s", "0.5s", "0.75s", "1s", "2.5s", "5s", "7.5s", "10s"]
}
unit = "s"
}
output {
metrics = [otelcol.processor.filter.drop_unneeded_span_metrics.input]
}
}
otelcol.processor.filter "drop_unneeded_span_metrics" {
// https://grafana.com/docs/alloy/latest/reference/components/otelcol.processor.filter/
error_mode = "ignore"
metrics {
datapoint = [
"IsMatch(metric.name, \"calls|duration\") and IsMatch(attributes[\"span.kind\"], \"SPAN_KIND_INTERNAL\")",
]
}
output {
metrics = [otelcol.processor.batch.default.input]
}
}
otelcol.processor.transform "drop_unneeded_resource_attributes" {
// https://grafana.com/docs/alloy/latest/reference/components/otelcol.processor.transform/
error_mode = "ignore"
trace_statements {
context = "resource"
statements = [
"delete_key(attributes, \"k8s.pod.start_time\")",
"delete_key(attributes, \"os.description\")",
"delete_key(attributes, \"os.type\")",
"delete_key(attributes, \"process.command_args\")",
"delete_key(attributes, \"process.executable.path\")",
"delete_key(attributes, \"process.pid\")",
"delete_key(attributes, \"process.runtime.description\")",
"delete_key(attributes, \"process.runtime.name\")",
"delete_key(attributes, \"process.runtime.version\")",
]
}
metric_statements {
context = "resource"
statements = [
"delete_key(attributes, \"k8s.pod.start_time\")",
"delete_key(attributes, \"os.description\")",
"delete_key(attributes, \"os.type\")",
"delete_key(attributes, \"process.command_args\")",
"delete_key(attributes, \"process.executable.path\")",
"delete_key(attributes, \"process.pid\")",
"delete_key(attributes, \"process.runtime.description\")",
"delete_key(attributes, \"process.runtime.name\")",
"delete_key(attributes, \"process.runtime.version\")",
]
}
log_statements {
context = "resource"
statements = [
"delete_key(attributes, \"k8s.pod.start_time\")",
"delete_key(attributes, \"os.description\")",
"delete_key(attributes, \"os.type\")",
"delete_key(attributes, \"process.command_args\")",
"delete_key(attributes, \"process.executable.path\")",
"delete_key(attributes, \"process.pid\")",
"delete_key(attributes, \"process.runtime.description\")",
"delete_key(attributes, \"process.runtime.name\")",
"delete_key(attributes, \"process.runtime.version\")",
]
}
output {
traces = [
otelcol.connector.servicegraph.default.input,
otelcol.connector.spanmetrics.default.input,
otelcol.processor.tail_sampling.default.input,
otelcol.connector.host_info.default.input,
]
metrics = [otelcol.processor.transform.add_resource_attributes_as_metric_attributes.input]
logs = [otelcol.processor.batch.default.input]
}
}
otelcol.connector.servicegraph "default" {
// https://grafana.com/docs/alloy/latest/reference/components/otelcol.connector.servicegraph/
dimensions = [
"service.namespace",
"service.version",
"deployment.environment",
"k8s.cluster.name",
"k8s.namespace.name",
"cloud.region",
"cloud.availability_zone",
]
latency_histogram_buckets = ["0s", "0.005s", "0.01s", "0.025s", "0.05s", "0.075s", "0.1s", "0.25s", "0.5s", "0.75s", "1s", "2.5s", "5s", "7.5s", "10s"]
store {
ttl = "2s"
}
output {
metrics = [otelcol.processor.batch.default.input]
}
}
otelcol.processor.batch "default" {
// https://grafana.com/docs/alloy/latest/reference/components/otelcol.processor.batch/
output {
metrics = [otelcol.exporter.otlphttp.grafana_cloud.input]
logs = [otelcol.exporter.otlphttp.grafana_cloud.input]
traces = [otelcol.exporter.otlphttp.grafana_cloud.input]
}
}
otelcol.processor.tail_sampling "default" {
// https://grafana.com/docs/alloy/latest/reference/components/otelcol.processor.tail_sampling/
// Examples: keep all traces that take more than 5000 ms
policy {
name = "all_traces_above_5000ms"
type = "latency"
latency = {
threshold_ms = 5000,
}
}
output {
traces = [otelcol.processor.batch.default.input]
}
}
otelcol.connector.host_info "default" {
// https://grafana.com/docs/alloy/latest/reference/components/otelcol.connector.host_info/
host_identifiers = ["host.name"]
output {
metrics = [otelcol.processor.batch.default.input]
}
}
otelcol.processor.resourcedetection "default" {
// https://grafana.com/docs/alloy/latest/reference/components/otelcol.processor.resourcedetection/
detectors = ["env", "system"]
system {
hostname_sources = ["os"]
}
output {
metrics = [otelcol.processor.transform.drop_unneeded_resource_attributes.input]
logs = [otelcol.processor.transform.drop_unneeded_resource_attributes.input]
traces = [otelcol.processor.transform.drop_unneeded_resource_attributes.input]
}
}
otelcol.exporter.otlphttp "grafana_cloud" {
// https://grafana.com/docs/alloy/latest/reference/components/otelcol.exporter.otlphttp/
client {
endpoint = env("GRAFANA_CLOUD_OTLP_ENDPOINT")
auth = otelcol.auth.basic.grafana_cloud.handler
}
}
otelcol.auth.basic "grafana_cloud" {
// https://grafana.com/docs/alloy/latest/reference/components/otelcol.auth.basic/
username = env("GRAFANA_CLOUD_INSTANCE_ID")
password = env("GRAFANA_CLOUD_API_KEY")
}
# Tested with OpenTelemetry Collector Contrib v0.94.0
receivers:
otlp:
# https://github.com/open-telemetry/opentelemetry-collector/tree/main/receiver/otlpreceiver
protocols:
grpc:
http:
hostmetrics:
# Optional. Host Metrics Receiver added as an example of Infra Monitoring capabilities of the OpenTelemetry Collector
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/hostmetricsreceiver
scrapers:
load:
memory:
processors:
batch:
# https://github.com/open-telemetry/opentelemetry-collector/tree/main/processor/batchprocessor
resourcedetection:
# Enriches telemetry data with resource information from the host
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/resourcedetectionprocessor
detectors: ["env", "system"]
override: false
transform/add_resource_attributes_as_metric_attributes:
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/transformprocessor
error_mode: ignore
metric_statements:
- context: datapoint
statements:
- set(attributes["deployment.environment"], resource.attributes["deployment.environment"])
- set(attributes["service.version"], resource.attributes["service.version"])
filter/drop_unneeded_span_metrics:
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/filterprocessor
error_mode: ignore
metrics:
datapoint:
- 'IsMatch(metric.name, "calls|duration") and IsMatch(attributes["span.kind"], "SPAN_KIND_INTERNAL")'
transform/drop_unneeded_resource_attributes:
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/transformprocessor
error_mode: ignore
trace_statements:
- context: resource
statements:
- delete_key(attributes, "k8s.pod.start_time")
- delete_key(attributes, "os.description")
- delete_key(attributes, "os.type")
- delete_key(attributes, "process.command_args")
- delete_key(attributes, "process.executable.path")
- delete_key(attributes, "process.pid")
- delete_key(attributes, "process.runtime.description")
- delete_key(attributes, "process.runtime.name")
- delete_key(attributes, "process.runtime.version")
metric_statements:
- context: resource
statements:
- delete_key(attributes, "k8s.pod.start_time")
- delete_key(attributes, "os.description")
- delete_key(attributes, "os.type")
- delete_key(attributes, "process.command_args")
- delete_key(attributes, "process.executable.path")
- delete_key(attributes, "process.pid")
- delete_key(attributes, "process.runtime.description")
- delete_key(attributes, "process.runtime.name")
- delete_key(attributes, "process.runtime.version")
log_statements:
- context: resource
statements:
- delete_key(attributes, "k8s.pod.start_time")
- delete_key(attributes, "os.description")
- delete_key(attributes, "os.type")
- delete_key(attributes, "process.command_args")
- delete_key(attributes, "process.executable.path")
- delete_key(attributes, "process.pid")
- delete_key(attributes, "process.runtime.description")
- delete_key(attributes, "process.runtime.name")
- delete_key(attributes, "process.runtime.version")
tail_sampling:
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/tailsamplingprocessor
policies:
# Examples: keep all traces that take more than 5000 ms
[
{
name: all_traces_above_5000ms,
type: latency,
latency: { threshold_ms: 5000 },
},
]
connectors:
servicegraph:
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/connector/servicegraphconnector
dimensions:
- service.namespace
- service.version
- deployment.environment
- k8s.cluster.name
- k8s.namespace.name
- cloud.region
- cloud.availability_zone
spanmetrics:
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/connector/spanmetricsconnector
histogram:
unit: s
dimensions:
- name: service.namespace
- name: service.version
- name: deployment.environment
- name: k8s.cluster.name
- name: k8s.namespace.name
- name: cloud.region
- name: cloud.availability_zone
grafanacloud:
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/connector/grafanacloudconnector
host_identifiers: ["host.name"]
exporters:
otlphttp/grafana_cloud:
# https://github.com/open-telemetry/opentelemetry-collector/tree/main/exporter/otlphttpexporter
endpoint: "${env:GRAFANA_CLOUD_OTLP_ENDPOINT}"
auth:
authenticator: basicauth/grafana_cloud
extensions:
basicauth/grafana_cloud:
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/extension/basicauthextension
client_auth:
username: "${env:GRAFANA_CLOUD_INSTANCE_ID}"
password: "${env:GRAFANA_CLOUD_API_KEY}"
service:
extensions:
[
basicauth/grafana_cloud_traces,
basicauth/grafana_cloud_metrics,
basicauth/grafana_cloud_logs,
]
pipelines:
traces:
receivers: [otlp]
processors:
[resourcedetection, transform/drop_unneeded_resource_attributes]
exporters: [servicegraph, spanmetrics, grafanacloud]
traces/grafana_cloud_traces:
receivers: [otlp]
processors:
[
resourcedetection,
transform/drop_unneeded_resource_attributes,
tail_sampling,
batch
]
exporters: [otlphttp/grafana_cloud]
metrics:
receivers: [otlp, hostmetrics]
processors:
[
resourcedetection,
transform/drop_unneeded_resource_attributes,
transform/add_resource_attributes_as_metric_attributes,
batch,
]
exporters: [otlphttp/grafana_cloud]
metrics/grafanacloud:
receivers: [grafanacloud]
processors: [batch]
exporters: [otlphttp/grafana_cloud]
metrics/spanmetrics:
receivers: [spanmetrics]
processors:
[
filter/drop_unneeded_span_metrics,
batch,
]
exporters: [otlphttp/grafana_cloud]
metrics/servicegraph:
receivers: [servicegraph]
processors: [batch]
exporters: [otlphttp/grafana_cloud]
logs:
receivers: [otlp]
processors: [resourcedetection, transform/drop_unneeded_resource_attributes, batch]
exporters: [otlphttp/grafana_cloud]
The Legacy option for span metrics source in the configuration is for customers who use Grafana Alloy or OpenTelemetry Collector with metric names that match those used by the Tempo metrics generator.
If you chose the Legacy option for span metrics source you should use legacy configuration below.
To view the Grafana Alloy legacy configuration for tail sampling, select the river tab below. To view the OpenTelemetry Collector legacy configuration for tail sampling, select the yaml tab below.
otelcol.receiver.otlp "default" {
// https://grafana.com/docs/alloy/latest/reference/components/otelcol.receiver.otlp/
// configures the default grpc endpoint "0.0.0.0:4317"
grpc { }
// configures the default http/protobuf endpoint "0.0.0.0:4318"
http { }
output {
metrics = [otelcol.processor.resourcedetection.default.input]
logs = [otelcol.processor.resourcedetection.default.input]
traces = [otelcol.processor.resourcedetection.default.input]
}
}
otelcol.processor.transform "add_resource_attributes_as_metric_attributes" {
// https://grafana.com/docs/alloy/latest/reference/components/otelcol.processor.transform/
error_mode = "ignore"
metric_statements {
context = "datapoint"
statements = [
"set(attributes[\"deployment.environment\"], resource.attributes[\"deployment.environment\"])",
"set(attributes[\"service.version\"], resource.attributes[\"service.version\"])",
]
}
output {
metrics = [otelcol.processor.batch.default.input]
}
}
otelcol.connector.spanmetrics "default" {
// https://grafana.com/docs/alloy/latest/reference/components/otelcol.connector.spanmetrics/
dimension {
name = "service.namespace"
}
dimension {
name = "service.version"
}
dimension {
name = "deployment.environment"
}
dimension {
name = "k8s.cluster.name"
}
dimension {
name = "k8s.namespace.name"
}
dimension {
name = "cloud.region"
}
dimension {
name = "cloud.availability_zone"
}
histogram {
explicit {
buckets = ["0s", "0.005s", "0.01s", "0.025s", "0.05s", "0.075s", "0.1s", "0.25s", "0.5s", "0.75s", "1s", "2.5s", "5s", "7.5s", "10s"]
}
unit = "s"
}
namespace = "traces.spanmetrics"
output {
metrics = [otelcol.processor.filter.drop_unneeded_span_metrics.input]
}
}
otelcol.processor.transform "use_grafana_metric_names" {
// https://grafana.com/docs/alloy/latest/reference/components/otelcol.processor.transform/
error_mode = "ignore"
metric_statements {
context = "metric"
statements = [
"set(name, \"traces.spanmetrics.latency\") where name == \"traces.spanmetrics.duration\"",
"set(name, \"traces.spanmetrics.calls.total\") where name == \"traces.spanmetrics.calls\"",
]
}
output {
metrics = [otelcol.processor.transform.use_grafana_metric_names.input]
}
}
otelcol.processor.filter "drop_unneeded_span_metrics" {
// https://grafana.com/docs/alloy/latest/reference/components/otelcol.processor.filter/
error_mode = "ignore"
metrics {
datapoint = [
"IsMatch(metric.name, \"traces.spanmetrics.calls|traces.spanmetrics.duration\") and IsMatch(attributes[\"span.kind\"], \"SPAN_KIND_INTERNAL\")",
]
}
output {
metrics = [otelcol.processor.batch.default.input]
}
}
otelcol.processor.transform "drop_unneeded_resource_attributes" {
// https://grafana.com/docs/alloy/latest/reference/components/otelcol.processor.transform/
error_mode = "ignore"
trace_statements {
context = "resource"
statements = [
"delete_key(attributes, \"k8s.pod.start_time\")",
"delete_key(attributes, \"os.description\")",
"delete_key(attributes, \"os.type\")",
"delete_key(attributes, \"process.command_args\")",
"delete_key(attributes, \"process.executable.path\")",
"delete_key(attributes, \"process.pid\")",
"delete_key(attributes, \"process.runtime.description\")",
"delete_key(attributes, \"process.runtime.name\")",
"delete_key(attributes, \"process.runtime.version\")",
]
}
metric_statements {
context = "resource"
statements = [
"delete_key(attributes, \"k8s.pod.start_time\")",
"delete_key(attributes, \"os.description\")",
"delete_key(attributes, \"os.type\")",
"delete_key(attributes, \"process.command_args\")",
"delete_key(attributes, \"process.executable.path\")",
"delete_key(attributes, \"process.pid\")",
"delete_key(attributes, \"process.runtime.description\")",
"delete_key(attributes, \"process.runtime.name\")",
"delete_key(attributes, \"process.runtime.version\")",
]
}
log_statements {
context = "resource"
statements = [
"delete_key(attributes, \"k8s.pod.start_time\")",
"delete_key(attributes, \"os.description\")",
"delete_key(attributes, \"os.type\")",
"delete_key(attributes, \"process.command_args\")",
"delete_key(attributes, \"process.executable.path\")",
"delete_key(attributes, \"process.pid\")",
"delete_key(attributes, \"process.runtime.description\")",
"delete_key(attributes, \"process.runtime.name\")",
"delete_key(attributes, \"process.runtime.version\")",
]
}
output {
traces = [
otelcol.connector.servicegraph.default.input,
otelcol.connector.spanmetrics.default.input,
otelcol.processor.tail_sampling.default.input,
otelcol.connector.host_info.default.input,
]
metrics = [otelcol.processor.transform.add_resource_attributes_as_metric_attributes.input]
logs = [otelcol.processor.batch.default.input]
}
}
otelcol.connector.servicegraph "default" {
// https://grafana.com/docs/alloy/latest/reference/components/otelcol.connector.servicegraph/
dimensions = [
"service.namespace",
"service.version",
"deployment.environment",
"k8s.cluster.name",
"k8s.namespace.name",
"cloud.region",
"cloud.availability_zone",
]
latency_histogram_buckets = ["0s", "0.005s", "0.01s", "0.025s", "0.05s", "0.075s", "0.1s", "0.25s", "0.5s", "0.75s", "1s", "2.5s", "5s", "7.5s", "10s"]
store {
ttl = "2s"
}
output {
metrics = [otelcol.processor.batch.default.input]
}
}
otelcol.processor.batch "default" {
// https://grafana.com/docs/alloy/latest/reference/components/otelcol.processor.batch/
output {
metrics = [otelcol.exporter.otlphttp.grafana_cloud.input]
logs = [otelcol.exporter.otlphttp.grafana_cloud.input]
traces = [otelcol.exporter.otlphttp.grafana_cloud.input]
}
}
otelcol.processor.tail_sampling "default" {
// https://grafana.com/docs/alloy/latest/reference/components/otelcol.processor.tail_sampling/
// Examples: keep all traces that take more than 5000 ms
policy {
name = "all_traces_above_5000ms"
type = "latency"
latency = {
threshold_ms = 5000,
}
}
output {
traces = [otelcol.processor.batch.default.input]
}
}
otelcol.connector.host_info "default" {
// https://grafana.com/docs/alloy/latest/reference/components/otelcol.connector.host_info/
host_identifiers = ["host.name"]
output {
metrics = [otelcol.processor.batch.default.input]
}
}
otelcol.processor.resourcedetection "default" {
// https://grafana.com/docs/alloy/latest/reference/components/otelcol.processor.resourcedetection/
detectors = ["env", "system"]
system {
hostname_sources = ["os"]
}
output {
metrics = [otelcol.processor.transform.drop_unneeded_resource_attributes.input]
logs = [otelcol.processor.transform.drop_unneeded_resource_attributes.input]
traces = [otelcol.processor.transform.drop_unneeded_resource_attributes.input]
}
}
otelcol.exporter.otlphttp "grafana_cloud" {
// https://grafana.com/docs/alloy/latest/reference/components/otelcol.exporter.otlphttp/
client {
endpoint = env("GRAFANA_CLOUD_OTLP_ENDPOINT")
auth = otelcol.auth.basic.grafana_cloud.handler
}
}
otelcol.auth.basic "grafana_cloud" {
// https://grafana.com/docs/alloy/latest/reference/components/otelcol.auth.basic/
username = env("GRAFANA_CLOUD_INSTANCE_ID")
password = env("GRAFANA_CLOUD_API_KEY")
}
# Tested with OpenTelemetry Collector Contrib v0.94.0
receivers:
otlp:
# https://github.com/open-telemetry/opentelemetry-collector/tree/main/receiver/otlpreceiver
protocols:
grpc:
http:
hostmetrics:
# Optional. Host Metrics Receiver added as an example of Infra Monitoring capabilities of the OpenTelemetry Collector
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/hostmetricsreceiver
scrapers:
load:
memory:
processors:
batch:
# https://github.com/open-telemetry/opentelemetry-collector/tree/main/processor/batchprocessor
resourcedetection:
# Enriches telemetry data with resource information from the host
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/resourcedetectionprocessor
detectors: ["env", "system"]
override: false
transform/add_resource_attributes_as_metric_attributes:
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/transformprocessor
error_mode: ignore
metric_statements:
- context: datapoint
statements:
- set(attributes["deployment.environment"], resource.attributes["deployment.environment"])
- set(attributes["service.version"], resource.attributes["service.version"])
filter/drop_unneeded_span_metrics:
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/filterprocessor
error_mode: ignore
metrics:
datapoint:
- 'IsMatch(metric.name, "traces.spanmetrics.calls|traces.spanmetrics.duration") and IsMatch(attributes["span.kind"], "SPAN_KIND_INTERNAL")'
transform/drop_unneeded_resource_attributes:
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/transformprocessor
error_mode: ignore
trace_statements:
- context: resource
statements:
- delete_key(attributes, "k8s.pod.start_time")
- delete_key(attributes, "os.description")
- delete_key(attributes, "os.type")
- delete_key(attributes, "process.command_args")
- delete_key(attributes, "process.executable.path")
- delete_key(attributes, "process.pid")
- delete_key(attributes, "process.runtime.description")
- delete_key(attributes, "process.runtime.name")
- delete_key(attributes, "process.runtime.version")
metric_statements:
- context: resource
statements:
- delete_key(attributes, "k8s.pod.start_time")
- delete_key(attributes, "os.description")
- delete_key(attributes, "os.type")
- delete_key(attributes, "process.command_args")
- delete_key(attributes, "process.executable.path")
- delete_key(attributes, "process.pid")
- delete_key(attributes, "process.runtime.description")
- delete_key(attributes, "process.runtime.name")
- delete_key(attributes, "process.runtime.version")
log_statements:
- context: resource
statements:
- delete_key(attributes, "k8s.pod.start_time")
- delete_key(attributes, "os.description")
- delete_key(attributes, "os.type")
- delete_key(attributes, "process.command_args")
- delete_key(attributes, "process.executable.path")
- delete_key(attributes, "process.pid")
- delete_key(attributes, "process.runtime.description")
- delete_key(attributes, "process.runtime.name")
- delete_key(attributes, "process.runtime.version")
transform/use_grafana_metric_names:
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/transformprocessor
error_mode: ignore
metric_statements:
- context: metric
statements:
- set(name, "traces.spanmetrics.latency") where name == "traces.spanmetrics.duration"
- set(name, "traces.spanmetrics.calls.total") where name == "traces.spanmetrics.calls"
tail_sampling:
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/tailsamplingprocessor
policies:
# Examples: keep all traces that take more than 5000 ms
[
{
name: all_traces_above_5000ms,
type: latency,
latency: { threshold_ms: 5000 },
},
]
connectors:
servicegraph:
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/connector/servicegraphconnector
dimensions:
- service.namespace
- service.version
- deployment.environment
- k8s.cluster.name
- k8s.namespace.name
- cloud.region
- cloud.availability_zone
spanmetrics:
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/connector/spanmetricsconnector
namespace: traces.spanmetrics
histogram:
unit: s
dimensions:
- name: service.namespace
- name: service.version
- name: deployment.environment
- name: k8s.cluster.name
- name: k8s.namespace.name
- name: cloud.region
- name: cloud.availability_zone
grafanacloud:
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/connector/grafanacloudconnector
host_identifiers: ["host.name"]
exporters:
otlphttp/grafana_cloud:
# https://github.com/open-telemetry/opentelemetry-collector/tree/main/exporter/otlphttpexporter
endpoint: "${env:GRAFANA_CLOUD_OTLP_ENDPOINT}"
auth:
authenticator: basicauth/grafana_cloud
extensions:
basicauth/grafana_cloud:
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/extension/basicauthextension
client_auth:
username: "${env:GRAFANA_CLOUD_INSTANCE_ID}"
password: "${env:GRAFANA_CLOUD_API_KEY}"
service:
extensions:
[
basicauth/grafana_cloud,
]
pipelines:
traces:
receivers: [otlp]
processors:
[resourcedetection, transform/drop_unneeded_resource_attributes]
exporters: [servicegraph, spanmetrics, grafanacloud]
traces/grafana_cloud_traces:
receivers: [otlp]
processors:
[
resourcedetection,
transform/drop_unneeded_resource_attributes,
tail_sampling,
batch
]
exporters: [otlphttp/grafana_cloud]
metrics:
receivers: [otlp, hostmetrics]
processors:
[
resourcedetection,
transform/drop_unneeded_resource_attributes
transform/add_resource_attributes_as_metric_attributes,
batch,
]
exporters: [otlphttp/grafana_cloud]
metrics/grafanacloud:
receivers: [grafanacloud]
processors: [batch]
exporters: [otlphttp/grafana_cloud]
metrics/spanmetrics:
receivers: [spanmetrics]
processors:
[
filter/drop_unneeded_span_metrics,
transform/use_grafana_metric_names,
batch,
]
exporters: [otlphttp/grafana_cloud]
metrics/servicegraph:
receivers: [servicegraph]
processors: [batch]
exporters: [otlphttp/grafana_cloud]
logs:
receivers: [otlp]
processors: [resourcedetection, transform/drop_unneeded_resource_attributes, batch]
exporters: [otlphttp/grafana_cloud]