diff --git a/apisix/cli/config.lua b/apisix/cli/config.lua index 25e6783c23f8..13359fa814cd 100644 --- a/apisix/cli/config.lua +++ b/apisix/cli/config.lua @@ -84,7 +84,8 @@ local _M = { neg_ttl = 60, neg_count = 512 } - } + }, + tracing = false }, nginx_config = { error_log = "logs/error.log", diff --git a/apisix/core/response.lua b/apisix/core/response.lua index baee97749598..ffc692eb869c 100644 --- a/apisix/core/response.lua +++ b/apisix/core/response.lua @@ -19,6 +19,7 @@ -- -- @module core.response +local tracer = require("apisix.tracer") local encode_json = require("cjson.safe").encode local ngx = ngx local arg = ngx.arg @@ -86,6 +87,9 @@ function resp_exit(code, ...) end if code then + if code >= 400 then + tracer.finish_all(ngx.ctx, tracer.status.ERROR, "response code " .. code) + end return ngx_exit(code) end end diff --git a/apisix/init.lua b/apisix/init.lua index 1fb0900254a0..e657623aa776 100644 --- a/apisix/init.lua +++ b/apisix/init.lua @@ -47,6 +47,8 @@ local debug = require("apisix.debug") local pubsub_kafka = require("apisix.pubsub.kafka") local resource = require("apisix.resource") local trusted_addresses_util = require("apisix.utils.trusted-addresses") +local tracer = require("apisix.tracer") + local discovery = require("apisix.discovery.init").discovery local ngx = ngx local get_method = ngx.req.get_method @@ -202,6 +204,9 @@ function _M.ssl_client_hello_phase() local ngx_ctx = ngx.ctx local api_ctx = core.tablepool.fetch("api_ctx", 0, 32) ngx_ctx.api_ctx = api_ctx + api_ctx.ngx_ctx = ngx_ctx + + local span = tracer.start(ngx_ctx, "ssl_client_hello_phase", tracer.kind.server) local ok, err = router.router_ssl.match_and_set(api_ctx, true, sni) @@ -215,18 +220,21 @@ function _M.ssl_client_hello_phase() core.log.error("failed to fetch ssl config: ", err) end core.log.error("failed to match any SSL certificate by SNI: ", sni) + tracer.finish(ngx_ctx, span, tracer.status.ERROR, "failed match SNI") ngx_exit(-1) end ok, err = apisix_ssl.set_protocols_by_clienthello(ngx_ctx.matched_ssl.value.ssl_protocols) if not ok then core.log.error("failed to set ssl protocols: ", err) + tracer.finish(ngx_ctx, span, tracer.status.ERROR, "failed set protocols") ngx_exit(-1) end -- in stream subsystem, ngx.ssl.server_name() return hostname of ssl session in preread phase, -- so that we can't get real SNI without recording it in ngx.ctx during client_hello phase ngx.ctx.client_hello_sni = sni + tracer.finish(ngx_ctx, span) end @@ -480,7 +488,6 @@ local function common_phase(phase_name) end - function _M.handle_upstream(api_ctx, route, enable_websocket) -- some plugins(ai-proxy...) request upstream by http client directly if api_ctx.bypass_nginx_upstream then @@ -677,9 +684,12 @@ function _M.http_access_phase() -- always fetch table from the table pool, we don't need a reused api_ctx local api_ctx = core.tablepool.fetch("api_ctx", 0, 32) ngx_ctx.api_ctx = api_ctx + api_ctx.ngx_ctx = ngx_ctx core.ctx.set_vars_meta(api_ctx) + local span = tracer.start(ngx_ctx, "apisix.phase.access", tracer.kind.server) + if not verify_https_client(api_ctx) then return core.response.exit(400) end @@ -717,10 +727,12 @@ function _M.http_access_phase() handle_x_forwarded_headers(api_ctx) + local match_span = tracer.start(ngx_ctx, "http_router_match", tracer.kind.internal) router.router_http.match(api_ctx) local route = api_ctx.matched_route if not route then + tracer.finish(ngx.ctx, match_span, tracer.status.ERROR, "no matched route") -- run global rule when there is no matching route local global_rules, conf_version = apisix_global_rules.global_rules() plugin.run_global_rules(api_ctx, global_rules, conf_version, nil) @@ -729,6 +741,7 @@ function _M.http_access_phase() return core.response.exit(404, {error_msg = "404 Route Not Found"}) end + tracer.finish(ngx_ctx, match_span) core.log.info("matched route: ", core.json.delay_encode(api_ctx.matched_route, true)) @@ -785,7 +798,6 @@ function _M.http_access_phase() else local plugins = plugin.filter(api_ctx, route) api_ctx.plugins = plugins - plugin.run_plugin("rewrite", plugins, api_ctx) if api_ctx.consumer then local changed @@ -821,6 +833,7 @@ function _M.http_access_phase() end plugin.run_plugin("access", plugins, api_ctx) end + tracer.finish(ngx_ctx, span) _M.handle_upstream(api_ctx, route, enable_websocket) @@ -879,6 +892,8 @@ end function _M.http_header_filter_phase() + local ngx_ctx = ngx.ctx + local span = tracer.start(ngx_ctx, "apisix.phase.header_filter", tracer.kind.server) core.response.set_header("Server", ver_header) local up_status = get_var("upstream_status") @@ -901,6 +916,9 @@ function _M.http_header_filter_phase() end core.response.set_header("Apisix-Plugins", core.table.concat(deduplicate, ", ")) end + tracer.finish(ngx_ctx, span) + + tracer.start(ngx_ctx, "apisix.phase.body_filter", tracer.kind.server) end @@ -1056,6 +1074,7 @@ function _M.http_log_phase() if not api_ctx then return end + tracer.finish_all(api_ctx.ngx_ctx) if not api_ctx.var.apisix_upstream_response_time or api_ctx.var.apisix_upstream_response_time == "" then @@ -1081,6 +1100,9 @@ function _M.http_log_phase() core.tablepool.release("matched_route_record", api_ctx.curr_req_matched) end + tracer.release(api_ctx.ngx_ctx) + api_ctx.ngx_ctx = nil + core.tablepool.release("api_ctx", api_ctx) end diff --git a/apisix/plugin.lua b/apisix/plugin.lua index 20a08aa5fff2..9e640aa0d750 100644 --- a/apisix/plugin.lua +++ b/apisix/plugin.lua @@ -38,6 +38,7 @@ local tostring = tostring local error = error local getmetatable = getmetatable local setmetatable = setmetatable +local tracer = require("apisix.tracer") -- make linter happy to avoid error: getting the Lua global "load" -- luacheck: globals load, ignore lua_load local lua_load = load @@ -1228,7 +1229,10 @@ function _M.run_plugin(phase, plugins, api_ctx) plugin_run = true run_meta_pre_function(conf, api_ctx, plugins[i]["name"]) api_ctx._plugin_name = plugins[i]["name"] + local span = tracer.start(api_ctx.ngx_ctx, "apisix.phase." .. phase + .. ".plugins." .. api_ctx._plugin_name) phase_func(conf, api_ctx) + tracer.finish(api_ctx.ngx_ctx, span) api_ctx._plugin_name = nil end end @@ -1301,6 +1305,7 @@ end function _M.run_global_rules(api_ctx, global_rules, conf_version, phase_name) if global_rules and #global_rules > 0 then + local span = tracer.start(api_ctx.ngx_ctx, "run_global_rules", tracer.kind.internal) local orig_conf_type = api_ctx.conf_type local orig_conf_version = api_ctx.conf_version local orig_conf_id = api_ctx.conf_id @@ -1335,6 +1340,7 @@ function _M.run_global_rules(api_ctx, global_rules, conf_version, phase_name) api_ctx.conf_type = orig_conf_type api_ctx.conf_version = orig_conf_version api_ctx.conf_id = orig_conf_id + tracer.finish(api_ctx.ngx_ctx, span) end end diff --git a/apisix/plugins/opentelemetry.lua b/apisix/plugins/opentelemetry.lua index d98ac44ae69d..487c143819ce 100644 --- a/apisix/plugins/opentelemetry.lua +++ b/apisix/plugins/opentelemetry.lua @@ -48,6 +48,7 @@ local pairs = pairs local ipairs = ipairs local unpack = unpack local string_format = string.format +local update_time = ngx.update_time local lrucache = core.lrucache.new({ type = 'plugin', count = 128, ttl = 24 * 60 * 60, @@ -327,10 +328,17 @@ function _M.rewrite(conf, api_ctx) local attributes = { attr.string("net.host.name", vars.host), + -- deprecated attributes attr.string("http.method", vars.method), attr.string("http.scheme", vars.scheme), attr.string("http.target", vars.request_uri), attr.string("http.user_agent", vars.http_user_agent), + + -- new attributes + attr.string("http.request.method", vars.method), + attr.string("url.scheme", vars.scheme), + attr.string("uri.path", vars.uri), + attr.string("user_agent.original", vars.http_user_agent), } if api_ctx.curr_req_matched then @@ -376,6 +384,10 @@ function _M.rewrite(conf, api_ctx) ngx_var.opentelemetry_span_id = span_context.span_id end + if not ctx:span():is_recording() and ngx.ctx.tracing then + ngx.ctx.tracing.skip = true + end + api_ctx.otel_context_token = ctx:attach() -- inject trace context into the headers of upstream HTTP request @@ -383,41 +395,85 @@ function _M.rewrite(conf, api_ctx) end -function _M.delayed_body_filter(conf, api_ctx) - if api_ctx.otel_context_token and ngx.arg[2] then - local ctx = context:current() - ctx:detach(api_ctx.otel_context_token) - api_ctx.otel_context_token = nil +local function create_child_span(tracer, parent_span_ctx, spans, span) + if not span or span.finished then + return + end + span.finished = true + local new_span_ctx, new_span = tracer:start(parent_span_ctx, span.name, + { + kind = span.kind, + attributes = span.attributes, + }) + new_span.start_time = span.start_time + + for _, idx in ipairs(span.child_ids or {}) do + create_child_span(tracer, new_span_ctx, spans, spans[idx]) + end + if span.status then + new_span:set_status(span.status.code, span.status.message) + end + new_span:finish(span.end_time) +end - -- get span from current context - local span = ctx:span() - local upstream_status = core.response.get_upstream_status(api_ctx) - if upstream_status and upstream_status >= 500 then - span:set_status(span_status.ERROR, - "upstream response status: " .. upstream_status) - end - span:set_attributes(attr.int("http.status_code", upstream_status)) +local function inject_core_spans(root_span_ctx, api_ctx, conf) + local tracing = api_ctx.ngx_ctx.tracing + if not tracing then + return + end - span:finish() + local span = root_span_ctx:span() + + local metadata = plugin.plugin_metadata(plugin_name) + local plugin_info = metadata.value + if span and not span:is_recording() then + return + end + local inject_conf = { + sampler = { + name = "always_on", + options = conf.sampler.options + }, + additional_attributes = conf.additional_attributes, + additional_header_prefix_attributes = conf.additional_header_prefix_attributes + } + local tracer, err = core.lrucache.plugin_ctx(lrucache, api_ctx, nil, + create_tracer_obj, inject_conf, plugin_info) + if not tracer then + core.log.error("failed to fetch tracer object: ", err) + return + end + + if #tracing.spans == 0 then + return + end + span.start_time = tracing.spans[1].start_time + local root_span = tracing.root_span + local spans = tracing.spans + for _, idx in ipairs(root_span.child_ids or {}) do + create_child_span(tracer, root_span_ctx, spans, spans[idx]) end end --- body_filter maybe not called because of empty http body response --- so we need to check if the span has finished in log phase function _M.log(conf, api_ctx) if api_ctx.otel_context_token then -- ctx:detach() is not necessary, because of ctx is stored in ngx.ctx local upstream_status = core.response.get_upstream_status(api_ctx) -- get span from current context - local span = context:current():span() + local ctx = context:current() + local span = ctx:span() if upstream_status and upstream_status >= 500 then span:set_status(span_status.ERROR, "upstream response status: " .. upstream_status) end + inject_core_spans(ctx, api_ctx, conf) + span:set_attributes(attr.int("http.status_code", upstream_status), + attr.int("http.response.status_code", upstream_status)) + update_time() span:finish() end end diff --git a/apisix/secret.lua b/apisix/secret.lua index 8ad1be260012..7ec526411675 100644 --- a/apisix/secret.lua +++ b/apisix/secret.lua @@ -18,6 +18,7 @@ local require = require local core = require("apisix.core") local string = require("apisix.core.string") +local tracer = require("apisix.tracer") local local_conf = require("apisix.core.config_local").local_conf() @@ -28,6 +29,7 @@ local byte = string.byte local type = type local pcall = pcall local pairs = pairs +local ngx = ngx local _M = {} @@ -148,6 +150,7 @@ local function fetch_by_uri_secret(secret_uri) return nil, "no secret conf, secret_uri: " .. secret_uri end + local span = tracer.start(ngx.ctx, "fetch_secret", tracer.kind.client) local ok, sm = pcall(require, "apisix.secret." .. opts.manager) if not ok then return nil, "no secret manager: " .. opts.manager @@ -155,9 +158,11 @@ local function fetch_by_uri_secret(secret_uri) local value, err = sm.get(conf, opts.key) if err then + tracer.finish(ngx.ctx, span, tracer.status.ERROR, err) return nil, err end + tracer.finish(ngx.ctx, span) return value end diff --git a/apisix/ssl/router/radixtree_sni.lua b/apisix/ssl/router/radixtree_sni.lua index 6104dcb10bc9..8736b608ecbe 100644 --- a/apisix/ssl/router/radixtree_sni.lua +++ b/apisix/ssl/router/radixtree_sni.lua @@ -21,6 +21,7 @@ local apisix_ssl = require("apisix.ssl") local secret = require("apisix.secret") local ngx_ssl = require("ngx.ssl") local config_util = require("apisix.core.config_util") +local tracer = require("apisix.tracer") local ngx = ngx local ipairs = ipairs local type = type @@ -169,6 +170,7 @@ function _M.match_and_set(api_ctx, match_only, alt_sni) core.log.debug("sni: ", sni) + local span = tracer.start(api_ctx.ngx_ctx, "sni_radixtree_match", tracer.kind.internal) local sni_rev = sni:reverse() local ok = radixtree_router:dispatch(sni_rev, nil, api_ctx) if not ok then @@ -177,9 +179,10 @@ function _M.match_and_set(api_ctx, match_only, alt_sni) -- with it sometimes core.log.error("failed to find any SSL certificate by SNI: ", sni) end + tracer.finish(api_ctx.ngx_ctx, span, tracer.status.ERROR, "failed match SNI") return false end - + tracer.finish(api_ctx.ngx_ctx, span) if api_ctx.matched_sni == "*" then -- wildcard matches everything, no need for further validation diff --git a/apisix/tracer.lua b/apisix/tracer.lua new file mode 100644 index 000000000000..efa8aa5eadd5 --- /dev/null +++ b/apisix/tracer.lua @@ -0,0 +1,134 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- +local table = require("apisix.core.table") +local tablepool = require("tablepool") +local span = require("apisix.utils.span") +local span_kind = require("opentelemetry.trace.span_kind") +local span_status = require("opentelemetry.trace.span_status") +local local_conf = require("apisix.core.config_local").local_conf() +local ipairs = ipairs +local ngx = ngx + +local enable_tracing = false +if ngx.config.subsystem == "http" and type(local_conf.apisix.tracing) == "boolean" then + enable_tracing = local_conf.apisix.tracing +end + +local _M = { + kind = span_kind, + status = span_status, + span_state = {}, +} + +function _M.start(ctx, name, kind) + if not enable_tracing then + return + end + + local tracing = ctx and ctx.tracing + if not tracing then + local root_span = span.new() + tracing = tablepool.fetch("tracing", 0, 8) + tracing.spans = tablepool.fetch("tracing_spans", 20, 0) + tracing.root_span = root_span + tracing.current_span = root_span + table.insert(tracing.spans, root_span) + root_span.id = 1 + ctx.tracing = tracing + end + if tracing.skip then + return + end + + local spans = tracing.spans + local sp = span.new(name, kind) + + table.insert(spans, sp) + local id = #spans + sp.id = id + local parent = tracing.current_span + if parent then + sp:set_parent(parent.id) + parent:append_child(id) + end + tracing.current_span = sp + return sp +end + + +local function finish_span(spans, sp, code, message) + if not sp or sp.end_time then + return + end + for _, id in ipairs(sp.child_ids or {}) do + finish_span(spans, spans[id]) + end + if code then + sp:set_status(code, message) + end + sp:finish() +end + + +function _M.finish(ctx, sp, code, message) + local tracing = ctx and ctx.tracing + if not tracing then + return + end + + sp = sp or tracing.current_span + if not sp then + return + end + + finish_span(tracing.spans, sp, code, message) + if sp == tracing.root_span then + return + end + tracing.current_span = tracing.spans[sp.parent_id] +end + + +function _M.release(ctx) + local tracing = ctx and ctx.tracing + if not tracing then + return + end + + for _, sp in ipairs(tracing.spans) do + sp:release() + end + tablepool.release("tracing_spans", tracing.spans) + tablepool.release("tracing", tracing) +end + + +function _M.finish_all(ctx, code, message) + local tracing = ctx and ctx.tracing + if not tracing then + return + end + + local spans = tracing.spans + tracing.current_span = tracing.root_span + for _, id in ipairs(tracing.root_span.child_ids or {}) do + finish_span(spans, spans[id], code, message) + end +end + + +return _M diff --git a/apisix/utils/span.lua b/apisix/utils/span.lua new file mode 100644 index 000000000000..d84fb347e08c --- /dev/null +++ b/apisix/utils/span.lua @@ -0,0 +1,102 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- +local tablepool = require("tablepool") +local util = require("opentelemetry.util") +local span_status = require("opentelemetry.trace.span_status") +local setmetatable = setmetatable +local table = table +local select = select +local pool_name = "opentelemetry_span" +local update_time = ngx.update_time + +local _M = {} + + +local mt = { + __index = _M +} + +local function get_time() + update_time() + return util.time_nano() +end + + +function _M.new(name, kind) + local self = tablepool.fetch(pool_name, 0, 16) + self.start_time = get_time() + self.name = name + self.kind = kind + return setmetatable(self, mt) +end + + +function _M.append_child(self, child_id) + if not self.child_ids then + self.child_ids = table.new(10, 0) + end + table.insert(self.child_ids, child_id) +end + + +function _M.set_parent(self, parent_id) + self.parent_id = parent_id +end + + +function _M.release(self) + tablepool.release(pool_name, self) +end + + +function _M.set_status(self, code, message) + code = span_status.validate(code) + local status = self.status + if not status then + status = { + code = code, + message = "" + } + self.status = status + else + status.code = code + end + + if code == span_status.ERROR then + status.message = message + end +end + + +function _M.set_attributes(self, ...) + if not self.attributes then + self.attributes = table.new(10, 0) + end + local count = select('#', ...) + for i = 1, count do + local attr = select(i, ...) + table.insert(self.attributes, attr) + end +end + + +function _M.finish(self) + self.end_time = get_time() +end + + +return _M diff --git a/apisix/utils/upstream.lua b/apisix/utils/upstream.lua index 19bdd1a57b71..f94fdb531d59 100644 --- a/apisix/utils/upstream.lua +++ b/apisix/utils/upstream.lua @@ -20,8 +20,8 @@ local ipairs = ipairs local type = type local tostring = tostring local resource = require("apisix.resource") - - +local tracer = require("apisix.tracer") +local ngx = ngx local _M = {} @@ -81,6 +81,7 @@ _M.compare_upstream_node = compare_upstream_node local function parse_domain_for_nodes(nodes) + local span = tracer.start(ngx.ctx, "resolve_dns", tracer.kind.internal) local new_nodes = core.table.new(#nodes, 0) for _, node in ipairs(nodes) do local host = node.host @@ -101,6 +102,7 @@ local function parse_domain_for_nodes(nodes) core.table.insert(new_nodes, node) end end + tracer.finish(ngx.ctx, span) return new_nodes end _M.parse_domain_for_nodes = parse_domain_for_nodes diff --git a/conf/config.yaml.example b/conf/config.yaml.example index 67fd190d872f..4d78ae5af330 100644 --- a/conf/config.yaml.example +++ b/conf/config.yaml.example @@ -149,6 +149,10 @@ apisix: count: 512 # Cache size neg_ttl: 60 # Negative cache TTL neg_count: 512 # Negative cache size + + tracing: false # Enable comprehensive request lifecycle tracing (SSL/SNI, rewrite, access, header_filter, body_filter, and log). + # When disabled, OpenTelemetry collects only a single span per request. + nginx_config: # Config for render the template to generate nginx.conf # user: root # Set the execution user of the worker process. This is only # effective if the master process runs with super-user privileges. diff --git a/docs/en/latest/plugins/opentelemetry.md b/docs/en/latest/plugins/opentelemetry.md index 061c26212dd5..e0ab3eb71b0e 100644 --- a/docs/en/latest/plugins/opentelemetry.md +++ b/docs/en/latest/plugins/opentelemetry.md @@ -95,6 +95,21 @@ curl http://127.0.0.1:9180/apisix/admin/plugin_metadata/opentelemetry -H "X-API- The examples below demonstrate how you can work with the `opentelemetry` Plugin for different scenarios. +### Enable Comprehensive Request Lifecycle Tracing + +:::note + +Enabling comprehensive tracing adds span creation and export overhead across the request lifecycle, which may impact throughput and latency. + +::: + +To enable comprehensive tracing across the request lifecycle (SSL/SNI, rewrite/access, header_filter/body_filter, and log), set the `tracing` field to `true` in the configuration file: + +```yaml title="config.yaml" +apisix: + tracing: true +``` + ### Enable `opentelemetry` Plugin By default, the `opentelemetry` Plugin is disabled in APISIX. To enable, add the Plugin to your configuration file as such: @@ -152,38 +167,124 @@ You should receive an `HTTP/1.1 200 OK` response. In OpenTelemetry collector's log, you should see information similar to the following: ```text -2024-02-18T17:14:03.825Z info ResourceSpans #0 +info ResourceSpans #0 Resource SchemaURL: Resource attributes: -> telemetry.sdk.language: Str(lua) -> telemetry.sdk.name: Str(opentelemetry-lua) -> telemetry.sdk.version: Str(0.1.1) - -> hostname: Str(e34673e24631) + -> hostname: Str(RC) -> service.name: Str(APISIX) ScopeSpans #0 ScopeSpans SchemaURL: InstrumentationScope opentelemetry-lua Span #0 - Trace ID : fbd0a38d4ea4a128ff1a688197bc58b0 + Trace ID : a5499493b517a3333578c2ac4fad3f4d + Parent ID : d0adf392b5c84111 + ID : d9816bbaef5ee63d + Name : http_router_match + Kind : Internal + Start time : 2026-02-04 05:57:04.846881024 +0000 UTC + End time : 2026-02-04 05:57:04.846951936 +0000 UTC + Status code : Unset + Status message : + DroppedAttributesCount: 0 + DroppedEventsCount: 0 + DroppedLinksCount: 0 +Span #1 + Trace ID : a5499493b517a3333578c2ac4fad3f4d + Parent ID : d0c33adf97b099f3 + ID : d0adf392b5c84111 + Name : apisix.phase.access + Kind : Server + Start time : 2026-02-04 05:57:04.846562048 +0000 UTC + End time : 2026-02-04 05:57:04.84724608 +0000 UTC + Status code : Unset + Status message : + DroppedAttributesCount: 0 + DroppedEventsCount: 0 + DroppedLinksCount: 0 +Span #2 + Trace ID : a5499493b517a3333578c2ac4fad3f4d + Parent ID : d0c33adf97b099f3 + ID : 4eb72d55359331fa + Name : resolve_dns + Kind : Internal + Start time : 2026-02-04 05:57:04.847251968 +0000 UTC + End time : 2026-02-04 05:57:04.84726912 +0000 UTC + Status code : Unset + Status message : + DroppedAttributesCount: 0 + DroppedEventsCount: 0 + DroppedLinksCount: 0 +Span #3 + Trace ID : a5499493b517a3333578c2ac4fad3f4d + Parent ID : d0c33adf97b099f3 + ID : de572aad9bad3b47 + Name : apisix.phase.header_filter + Kind : Server + Start time : 2026-02-04 05:57:04.84793088 +0000 UTC + End time : 2026-02-04 05:57:04.848005888 +0000 UTC + Status code : Unset + Status message : + DroppedAttributesCount: 0 + DroppedEventsCount: 0 + DroppedLinksCount: 0 +Span #4 + Trace ID : a5499493b517a3333578c2ac4fad3f4d + Parent ID : d0c33adf97b099f3 + ID : 0baddeee6e5d500d + Name : apisix.phase.body_filter + Kind : Server + Start time : 2026-02-04 05:57:04.848007936 +0000 UTC + End time : 2026-02-04 05:57:04.848103936 +0000 UTC + Status code : Unset + Status message : + DroppedAttributesCount: 0 + DroppedEventsCount: 0 + DroppedLinksCount: 0 +Span #5 + Trace ID : a5499493b517a3333578c2ac4fad3f4d + Parent ID : d0c33adf97b099f3 + ID : d57d53882c40612a + Name : apisix.phase.log.plugins.opentelemetry + Kind : Internal + Start time : 2026-02-04 05:57:04.84823296 +0000 UTC + End time : 2026-02-04 05:57:04.848385024 +0000 UTC + Status code : Unset + Status message : + DroppedAttributesCount: 0 + DroppedEventsCount: 0 + DroppedLinksCount: 0 +Span #6 + Trace ID : a5499493b517a3333578c2ac4fad3f4d Parent ID : - ID : af3dc7642104748a + ID : d0c33adf97b099f3 Name : GET /anything Kind : Server - Start time : 2024-02-18 17:14:03.763244032 +0000 UTC - End time : 2024-02-18 17:14:03.920229888 +0000 UTC + Start time : 2026-02-04 05:57:04.84655488 +0000 UTC + End time : 2026-02-04 05:57:04.84839296 +0000 UTC Status code : Unset Status message : + DroppedAttributesCount: 0 + DroppedEventsCount: 0 + DroppedLinksCount: 0 Attributes: - -> net.host.name: Str(127.0.0.1) + -> net.host.name: Str(localhost) -> http.method: Str(GET) -> http.scheme: Str(http) -> http.target: Str(/anything) - -> http.user_agent: Str(curl/7.64.1) + -> http.user_agent: Str(curl/7.81.0) + -> http.request.method: Str(GET) + -> url.scheme: Str(http) + -> uri.path: Str(/anything) + -> user_agent.original: Str(curl/7.81.0) -> apisix.route_id: Str(otel-tracing-route) -> apisix.route_name: Empty() -> http.route: Str(/anything) -> http.status_code: Int(200) -{"kind": "exporter", "data_type": "traces", "name": "debug"} + -> http.response.status_code: Int(200) +{"resource": {"service.instance.id": "ed436c1a-6ee7-46b0-ad58-527d0aaf4ade", "service.name": "otelcol-contrib", "service.version": "0.144.0"}, "otelcol.component.id": "debug", "otelcol.component.kind": "exporter", "otelcol.signal": "traces"} ``` To visualize these traces, you can export your telemetry to backend Services, such as Zipkin and Prometheus. See [exporters](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/exporter) for more details. diff --git a/docs/zh/latest/plugins/opentelemetry.md b/docs/zh/latest/plugins/opentelemetry.md index f22d90c932b3..ec57697049c4 100644 --- a/docs/zh/latest/plugins/opentelemetry.md +++ b/docs/zh/latest/plugins/opentelemetry.md @@ -94,6 +94,21 @@ curl http://127.0.0.1:9180/apisix/admin/plugin_metadata/opentelemetry -H "X-API- 以下示例展示了如何在不同场景下使用 `opentelemetry` 插件。 +### 启用全面的请求生命周期追踪 + +:::note + +开启全面追踪会在请求生命周期的各个阶段引入 span 的创建与上报开销,会对 APISIX 吞吐量和延迟产生影响。 + +::: + +要在请求生命周期的各个阶段(包括 SSL/SNI、rewrite、access、header_filter、body_filter、log)启用全面追踪,请在配置文件中将 `tracing` 字段设置为 `true`: + +```yaml title="config.yaml" +apisix: + tracing: true +``` + ### 启用 opentelemetry 插件 默认情况下,APISIX 中的 `opentelemetry` 插件是禁用的。要启用它,请将插件添加到配置文件中,如下所示: @@ -151,38 +166,124 @@ curl "http://127.0.0.1:9080/anything" 在 OpenTelemetry collector 的日志中,你应该看到类似以下的信息: ```text -2024-02-18T17:14:03.825Z info ResourceSpans #0 +info ResourceSpans #0 Resource SchemaURL: Resource attributes: - -> telemetry.sdk.language: Str(lua) - -> telemetry.sdk.name: Str(opentelemetry-lua) - -> telemetry.sdk.version: Str(0.1.1) - -> hostname: Str(e34673e24631) - -> service.name: Str(APISIX) + -> telemetry.sdk.language: Str(lua) + -> telemetry.sdk.name: Str(opentelemetry-lua) + -> telemetry.sdk.version: Str(0.1.1) + -> hostname: Str(RC) + -> service.name: Str(APISIX) ScopeSpans #0 ScopeSpans SchemaURL: InstrumentationScope opentelemetry-lua Span #0 - Trace ID : fbd0a38d4ea4a128ff1a688197bc58b0 + Trace ID : a5499493b517a3333578c2ac4fad3f4d + Parent ID : d0adf392b5c84111 + ID : d9816bbaef5ee63d + Name : http_router_match + Kind : Internal + Start time : 2026-02-04 05:57:04.846881024 +0000 UTC + End time : 2026-02-04 05:57:04.846951936 +0000 UTC + Status code : Unset + Status message : + DroppedAttributesCount: 0 + DroppedEventsCount: 0 + DroppedLinksCount: 0 +Span #1 + Trace ID : a5499493b517a3333578c2ac4fad3f4d + Parent ID : d0c33adf97b099f3 + ID : d0adf392b5c84111 + Name : apisix.phase.access + Kind : Server + Start time : 2026-02-04 05:57:04.846562048 +0000 UTC + End time : 2026-02-04 05:57:04.84724608 +0000 UTC + Status code : Unset + Status message : + DroppedAttributesCount: 0 + DroppedEventsCount: 0 + DroppedLinksCount: 0 +Span #2 + Trace ID : a5499493b517a3333578c2ac4fad3f4d + Parent ID : d0c33adf97b099f3 + ID : 4eb72d55359331fa + Name : resolve_dns + Kind : Internal + Start time : 2026-02-04 05:57:04.847251968 +0000 UTC + End time : 2026-02-04 05:57:04.84726912 +0000 UTC + Status code : Unset + Status message : + DroppedAttributesCount: 0 + DroppedEventsCount: 0 + DroppedLinksCount: 0 +Span #3 + Trace ID : a5499493b517a3333578c2ac4fad3f4d + Parent ID : d0c33adf97b099f3 + ID : de572aad9bad3b47 + Name : apisix.phase.header_filter + Kind : Server + Start time : 2026-02-04 05:57:04.84793088 +0000 UTC + End time : 2026-02-04 05:57:04.848005888 +0000 UTC + Status code : Unset + Status message : + DroppedAttributesCount: 0 + DroppedEventsCount: 0 + DroppedLinksCount: 0 +Span #4 + Trace ID : a5499493b517a3333578c2ac4fad3f4d + Parent ID : d0c33adf97b099f3 + ID : 0baddeee6e5d500d + Name : apisix.phase.body_filter + Kind : Server + Start time : 2026-02-04 05:57:04.848007936 +0000 UTC + End time : 2026-02-04 05:57:04.848103936 +0000 UTC + Status code : Unset + Status message : + DroppedAttributesCount: 0 + DroppedEventsCount: 0 + DroppedLinksCount: 0 +Span #5 + Trace ID : a5499493b517a3333578c2ac4fad3f4d + Parent ID : d0c33adf97b099f3 + ID : d57d53882c40612a + Name : apisix.phase.log.plugins.opentelemetry + Kind : Internal + Start time : 2026-02-04 05:57:04.84823296 +0000 UTC + End time : 2026-02-04 05:57:04.848385024 +0000 UTC + Status code : Unset + Status message : + DroppedAttributesCount: 0 + DroppedEventsCount: 0 + DroppedLinksCount: 0 +Span #6 + Trace ID : a5499493b517a3333578c2ac4fad3f4d Parent ID : - ID : af3dc7642104748a + ID : d0c33adf97b099f3 Name : GET /anything Kind : Server - Start time : 2024-02-18 17:14:03.763244032 +0000 UTC - End time : 2024-02-18 17:14:03.920229888 +0000 UTC + Start time : 2026-02-04 05:57:04.84655488 +0000 UTC + End time : 2026-02-04 05:57:04.84839296 +0000 UTC Status code : Unset Status message : + DroppedAttributesCount: 0 + DroppedEventsCount: 0 + DroppedLinksCount: 0 Attributes: - -> net.host.name: Str(127.0.0.1) - -> http.method: Str(GET) - -> http.scheme: Str(http) - -> http.target: Str(/anything) - -> http.user_agent: Str(curl/7.64.1) - -> apisix.route_id: Str(otel-tracing-route) - -> apisix.route_name: Empty() - -> http.route: Str(/anything) - -> http.status_code: Int(200) -{"kind": "exporter", "data_type": "traces", "name": "debug"} + -> net.host.name: Str(localhost) + -> http.method: Str(GET) + -> http.scheme: Str(http) + -> http.target: Str(/anything) + -> http.user_agent: Str(curl/7.81.0) + -> http.request.method: Str(GET) + -> url.scheme: Str(http) + -> uri.path: Str(/anything) + -> user_agent.original: Str(curl/7.81.0) + -> apisix.route_id: Str(otel-tracing-route) + -> apisix.route_name: Empty() + -> http.route: Str(/anything) + -> http.status_code: Int(200) + -> http.response.status_code: Int(200) +{"resource": {"service.instance.id": "ed436c1a-6ee7-46b0-ad58-527d0aaf4ade", "service.name": "otelcol-contrib", "service.version": "0.144.0"}, "otelcol.component.id": "debug", "otelcol.component.kind": "exporter", "otelcol.signal": "traces"} ``` 要可视化这些追踪,你可以将 traces 导出到后端服务,例如 Zipkin 和 Prometheus。有关更多详细信息,请参阅[exporters](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/exporter)。 diff --git a/t/plugin/opentelemetry6.t b/t/plugin/opentelemetry6.t new file mode 100644 index 000000000000..003e191f5268 --- /dev/null +++ b/t/plugin/opentelemetry6.t @@ -0,0 +1,236 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +BEGIN { + sub set_env_from_file { + my ($env_name, $file_path) = @_; + + open my $fh, '<', $file_path or die $!; + my $content = do { local $/; <$fh> }; + close $fh; + + $ENV{$env_name} = $content; + } + # set env + set_env_from_file('TEST_CERT', 't/certs/apisix.crt'); + set_env_from_file('TEST_KEY', 't/certs/apisix.key'); + set_env_from_file('TEST2_CERT', 't/certs/test2.crt'); + set_env_from_file('TEST2_KEY', 't/certs/test2.key'); +} +use t::APISIX 'no_plan'; +add_block_preprocessor(sub { + my ($block) = @_; + + if (!$block->extra_yaml_config) { + my $extra_yaml_config = <<_EOC_; +apisix: + tracing: true +plugins: + - opentelemetry +_EOC_ + $block->set_value("extra_yaml_config", $extra_yaml_config); + } + + if (!$block->request) { + $block->set_value("request", "GET /t"); + } + + if (!defined $block->response_body) { + $block->set_value("response_body", "passed\n"); + } + $block; +}); +repeat_each(1); +no_long_string(); +no_root_location(); +log_level("debug"); + +run_tests; + +__DATA__ + +=== TEST 1: empty file +--- exec +echo '' > ci/pod/otelcol-contrib/data-otlp.json +--- response_body eval +qr// + + + +=== TEST 2: add plugin metadata +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/plugin_metadata/opentelemetry', + ngx.HTTP_PUT, + [[{ + "batch_span_processor": { + "max_export_batch_size": 1, + "inactive_timeout": 0.5 + }, + "collector": { + "address": "127.0.0.1:4318", + "request_timeout": 3, + "request_headers": { + "foo": "bar" + } + }, + "trace_id_source": "x-request-id" + }]] + ) + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } + + + +=== TEST 3: set route +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "plugins": { + "opentelemetry": { + "sampler": { + "name": "always_on" + } + } + }, + "upstream": { + "nodes": { + "test1.com:1980": 1 + }, + "type": "roundrobin" + }, + "uri": "/opentracing" + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- request +GET /t + + + +=== TEST 4: set ssl with two certs and keys in env +--- config + location /t { + content_by_lua_block { + local core = require("apisix.core") + local t = require("lib.test_admin") + + local data = { + snis = {"test.com"}, + key = "$env://TEST_KEY", + cert = "$env://TEST_CERT", + keys = {"$env://TEST2_KEY"}, + certs = {"$env://TEST2_CERT"} + } + + local code, body = t.test('/apisix/admin/ssls/1', + ngx.HTTP_PUT, + core.json.encode(data), + [[{ + "value": { + "snis": ["test.com"], + "key": "$env://TEST_KEY", + "cert": "$env://TEST_CERT", + "keys": ["$env://TEST2_KEY"], + "certs": ["$env://TEST2_CERT"] + }, + "key": "/apisix/ssls/1" + }]] + ) + + ngx.status = code + ngx.say(body) + } + } +--- request +GET /t +--- response_body +passed + + + +=== TEST 5: trigger SSL match with SNI +--- init_by_lua_block + require "resty.core" + apisix = require("apisix") + core = require("apisix.core") + apisix.http_init() + + local utils = require("apisix.core.utils") + utils.dns_parse = function (domain) -- mock: DNS parser + if domain == "test1.com" then + return {address = "127.0.0.2"} + end + + error("unknown domain: " .. domain) + end +--- exec +curl -k --resolve "test.com:1994:127.0.0.1" https://test.com:1994/opentracing +--- wait: 5 +--- response_body +opentracing + + + +=== TEST 6: check sni_radixtree_match span +--- max_size: 1048576 +--- exec +tail -n 18 ci/pod/otelcol-contrib/data-otlp.json +--- response_body eval +qr/.*sni_radixtree_match.*/ + + + +=== TEST 7: check resolve_dns span +--- max_size: 1048576 +--- exec +tail -n 18 ci/pod/otelcol-contrib/data-otlp.json +--- response_body eval +qr/.*resolve_dns.*/ + + + +=== TEST 8: check apisix.phase.access span +--- max_size: 1048576 +--- exec +tail -n 18 ci/pod/otelcol-contrib/data-otlp.json +--- response_body eval +qr/.*apisix.phase.access.*/ + + + +=== TEST 9: check apisix.phase.header_filter span +--- max_size: 1048576 +--- exec +tail -n 18 ci/pod/otelcol-contrib/data-otlp.json +--- response_body eval +qr/.*apisix.phase.header_filter.*/