|
| 1 | +/* |
| 2 | + This file is part of libhttpserver |
| 3 | + Copyright (C) 2011-2026 Sebastiano Merlino |
| 4 | +
|
| 5 | + This library is free software; you can redistribute it and/or |
| 6 | + modify it under the terms of the GNU Lesser General Public |
| 7 | + License as published by the Free Software Foundation; either |
| 8 | + version 2.1 of the License, or (at your option) any later version. |
| 9 | +
|
| 10 | + This library is distributed in the hope that it will be useful, |
| 11 | + but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 13 | + Lesser General Public License for more details. |
| 14 | +
|
| 15 | + You should have received a copy of the GNU Lesser General Public |
| 16 | + License along with this library; if not, write to the Free Software |
| 17 | + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 |
| 18 | + USA |
| 19 | +*/ |
| 20 | + |
| 21 | +// TASK-027: bespoke segment-trie storage for the parameterized + prefix |
| 22 | +// route tier. Each node represents one URL path segment; children are |
| 23 | +// keyed by the literal segment string, with a single optional wildcard |
| 24 | +// child carrying the parameter name (e.g. "{id}" -> wildcard_name_ = "id"). |
| 25 | +// |
| 26 | +// The architecture spec (§4.7) commits only to the OUTER shape (three-tier |
| 27 | +// + cache); the radix-tree implementation choice is intentionally left |
| 28 | +// open. A segment trie is sufficient for the 9-method / N-segment |
| 29 | +// registration shape libhttpserver supports and avoids dragging in a |
| 30 | +// vendored library (which would conflict with the project's tightly |
| 31 | +// curated source tree and LGPL-2.1 distribution). |
| 32 | +// |
| 33 | +// Internal header — only reachable when compiling libhttpserver. |
| 34 | +#if !defined(HTTPSERVER_COMPILATION) |
| 35 | +#error "radix_tree.hpp is internal; only reachable when compiling libhttpserver." |
| 36 | +#endif |
| 37 | + |
| 38 | +#ifndef SRC_HTTPSERVER_DETAIL_RADIX_TREE_HPP_ |
| 39 | +#define SRC_HTTPSERVER_DETAIL_RADIX_TREE_HPP_ |
| 40 | + |
| 41 | +#include <cstddef> |
| 42 | +#include <memory> |
| 43 | +#include <optional> |
| 44 | +#include <string> |
| 45 | +#include <string_view> |
| 46 | +#include <unordered_map> |
| 47 | +#include <utility> |
| 48 | +#include <vector> |
| 49 | + |
| 50 | +#include "httpserver/http_utils.hpp" |
| 51 | + |
| 52 | +namespace httpserver { |
| 53 | +namespace detail { |
| 54 | + |
| 55 | +// radix_match: result type of radix_tree<T>::find. `entry` is a non-owning |
| 56 | +// pointer into the tree; valid until the next mutation. `captures` lists |
| 57 | +// (parameter-name, captured-value) pairs in the order the wildcards |
| 58 | +// appear along the matched path. `is_prefix_match` is true iff the match |
| 59 | +// came from a `is_prefix=true` registration that did not consume every |
| 60 | +// remaining request segment. |
| 61 | +template <typename T> |
| 62 | +struct radix_match { |
| 63 | + const T* entry = nullptr; |
| 64 | + std::vector<std::pair<std::string, std::string>> captures; |
| 65 | + bool is_prefix_match = false; |
| 66 | +}; |
| 67 | + |
| 68 | +// Single trie node. Children are split into: |
| 69 | +// - `children_`: keyed by the literal segment string (exact match). |
| 70 | +// - `wildcard_child_`: optional single child consuming any one segment. |
| 71 | +// |
| 72 | +// Each node may carry an `exact_terminus_` (registration with is_prefix=false |
| 73 | +// that ends here) and/or a `prefix_terminus_` (is_prefix=true). The two |
| 74 | +// are kept separately because a prefix and an exact registration may both |
| 75 | +// terminate at the same node (e.g. /static prefix + /static exact would be |
| 76 | +// a user error caught at registration time, but the storage allows it). |
| 77 | +template <typename T> |
| 78 | +struct radix_node { |
| 79 | + std::unordered_map<std::string, std::unique_ptr<radix_node>> children_; |
| 80 | + std::unique_ptr<radix_node> wildcard_child_; |
| 81 | + std::string wildcard_name_; |
| 82 | + std::optional<T> exact_terminus_; |
| 83 | + std::optional<T> prefix_terminus_; |
| 84 | +}; |
| 85 | + |
| 86 | +// radix_tree<T>: segment-trie. Inserts route paths split on '/', supports |
| 87 | +// `{name}` wildcard segments, and carries a `is_prefix` flag per insertion |
| 88 | +// so the same tree backs both parameterized exact and prefix registrations. |
| 89 | +// |
| 90 | +// Concurrency: this type is NOT internally synchronized. The owning |
| 91 | +// webserver_impl protects all three tier structures (exact_routes_, |
| 92 | +// param_and_prefix_routes_, regex_routes_) with a single std::shared_mutex. |
| 93 | +template <typename T> |
| 94 | +class radix_tree { |
| 95 | + public: |
| 96 | + radix_tree() : root_(std::make_unique<radix_node<T>>()) {} |
| 97 | + |
| 98 | + // Insert `path` with the given entry. is_prefix selects whether the |
| 99 | + // entry terminates in `prefix_terminus_` (and matches any deeper |
| 100 | + // request path) or `exact_terminus_` (and matches only this path). |
| 101 | + // The radix_tree itself does not look inside `entry` — the caller |
| 102 | + // (webserver_impl) is responsible for keeping the is_prefix argument |
| 103 | + // consistent with route_entry::is_prefix, which is the §4.7 source |
| 104 | + // of truth. Replaces an existing terminus of the same kind. |
| 105 | + void insert(std::string_view path, T entry, bool is_prefix = false) { |
| 106 | + radix_node<T>* node = root_.get(); |
| 107 | + const auto segments = tokenize(path); |
| 108 | + for (const std::string& seg : segments) { |
| 109 | + node = descend_or_create(node, seg); |
| 110 | + } |
| 111 | + if (is_prefix) { |
| 112 | + node->prefix_terminus_ = std::move(entry); |
| 113 | + } else { |
| 114 | + node->exact_terminus_ = std::move(entry); |
| 115 | + } |
| 116 | + } |
| 117 | + |
| 118 | + // Find the most specific match for `path`. Returns true on hit and |
| 119 | + // populates `out`. Lookup preference (most specific first): |
| 120 | + // 1. exact_terminus_ on the matched node, if every request segment |
| 121 | + // consumed by exact-or-wildcard descent. |
| 122 | + // 2. prefix_terminus_ on the deepest ancestor that has one. |
| 123 | + bool find(std::string_view path, radix_match<T>& out) const { |
| 124 | + out = {}; |
| 125 | + const auto segments = tokenize(path); |
| 126 | + const radix_node<T>* node = root_.get(); |
| 127 | + |
| 128 | + // Root path "/" has no segments. Match the root exact terminus |
| 129 | + // first (most specific), falling back to the root prefix terminus. |
| 130 | + if (segments.empty()) { |
| 131 | + if (node->exact_terminus_.has_value()) { |
| 132 | + out.entry = &node->exact_terminus_.value(); |
| 133 | + out.is_prefix_match = false; |
| 134 | + return true; |
| 135 | + } |
| 136 | + if (node->prefix_terminus_.has_value()) { |
| 137 | + out.entry = &node->prefix_terminus_.value(); |
| 138 | + out.is_prefix_match = true; |
| 139 | + return true; |
| 140 | + } |
| 141 | + return false; |
| 142 | + } |
| 143 | + |
| 144 | + // Track best prefix candidate seen during descent (deepest wins). |
| 145 | + const T* best_prefix = nullptr; |
| 146 | + std::vector<std::pair<std::string, std::string>> best_prefix_caps; |
| 147 | + |
| 148 | + // Root prefix terminus: a `register_prefix("/")` matches every |
| 149 | + // request, so seed best_prefix with it before walking deeper. |
| 150 | + if (node->prefix_terminus_.has_value()) { |
| 151 | + best_prefix = &node->prefix_terminus_.value(); |
| 152 | + best_prefix_caps.clear(); |
| 153 | + } |
| 154 | + std::vector<std::pair<std::string, std::string>> caps; |
| 155 | + |
| 156 | + for (std::size_t i = 0; i < segments.size(); ++i) { |
| 157 | + const std::string& seg = segments[i]; |
| 158 | + // Prefer exact child over wildcard. |
| 159 | + auto it = node->children_.find(seg); |
| 160 | + if (it != node->children_.end()) { |
| 161 | + node = it->second.get(); |
| 162 | + } else if (node->wildcard_child_) { |
| 163 | + node = node->wildcard_child_.get(); |
| 164 | + caps.emplace_back(node->wildcard_name_, seg); |
| 165 | + } else { |
| 166 | + // No way forward: best we can do is the deepest prefix |
| 167 | + // candidate seen (or nothing). |
| 168 | + break; |
| 169 | + } |
| 170 | + if (node->prefix_terminus_.has_value()) { |
| 171 | + best_prefix = &node->prefix_terminus_.value(); |
| 172 | + best_prefix_caps = caps; |
| 173 | + } |
| 174 | + // If we just consumed the last request segment AND this node |
| 175 | + // carries an exact terminus, that beats any prefix candidate. |
| 176 | + if (i + 1 == segments.size() |
| 177 | + && node->exact_terminus_.has_value()) { |
| 178 | + out.entry = &node->exact_terminus_.value(); |
| 179 | + out.captures = std::move(caps); |
| 180 | + out.is_prefix_match = false; |
| 181 | + return true; |
| 182 | + } |
| 183 | + } |
| 184 | + |
| 185 | + if (best_prefix != nullptr) { |
| 186 | + out.entry = best_prefix; |
| 187 | + out.captures = std::move(best_prefix_caps); |
| 188 | + out.is_prefix_match = true; |
| 189 | + return true; |
| 190 | + } |
| 191 | + return false; |
| 192 | + } |
| 193 | + |
| 194 | + // Remove the entry at `path`. is_prefix selects which terminus to |
| 195 | + // clear. Returns true iff a terminus was actually cleared. |
| 196 | + bool remove(std::string_view path, bool is_prefix) { |
| 197 | + radix_node<T>* node = root_.get(); |
| 198 | + const auto segments = tokenize(path); |
| 199 | + for (const std::string& seg : segments) { |
| 200 | + auto it = node->children_.find(seg); |
| 201 | + if (it != node->children_.end()) { |
| 202 | + node = it->second.get(); |
| 203 | + continue; |
| 204 | + } |
| 205 | + // Walk wildcard child if seg matches the {name} placeholder |
| 206 | + // shape. We compare the exact registered key, so removal of |
| 207 | + // /users/{id} requires the same {id} string. |
| 208 | + if (node->wildcard_child_ && is_wildcard_segment(seg)) { |
| 209 | + node = node->wildcard_child_.get(); |
| 210 | + continue; |
| 211 | + } |
| 212 | + return false; |
| 213 | + } |
| 214 | + if (is_prefix) { |
| 215 | + if (!node->prefix_terminus_.has_value()) return false; |
| 216 | + node->prefix_terminus_.reset(); |
| 217 | + } else { |
| 218 | + if (!node->exact_terminus_.has_value()) return false; |
| 219 | + node->exact_terminus_.reset(); |
| 220 | + } |
| 221 | + return true; |
| 222 | + // Note: we do not collapse empty branches. This is intentional — |
| 223 | + // dead nodes are cheap (a few pointers) and avoiding rebalancing |
| 224 | + // keeps the data structure trivially safe under the writer lock. |
| 225 | + } |
| 226 | + |
| 227 | + bool empty() const noexcept { |
| 228 | + return is_node_empty(root_.get()); |
| 229 | + } |
| 230 | + |
| 231 | + private: |
| 232 | + static std::vector<std::string> tokenize(std::string_view path) { |
| 233 | + // tokenize_url takes a std::string by value via string_split; copy |
| 234 | + // the view's contents to call it. |
| 235 | + return ::httpserver::http::http_utils::tokenize_url(std::string{path}); |
| 236 | + } |
| 237 | + |
| 238 | + static bool is_wildcard_segment(const std::string& seg) noexcept { |
| 239 | + return seg.size() >= 2 && seg.front() == '{' && seg.back() == '}'; |
| 240 | + } |
| 241 | + |
| 242 | + static radix_node<T>* descend_or_create(radix_node<T>* node, |
| 243 | + const std::string& seg) { |
| 244 | + if (is_wildcard_segment(seg)) { |
| 245 | + // Strip the braces: "{id}" -> "id". |
| 246 | + std::string name = seg.substr(1, seg.size() - 2); |
| 247 | + if (!node->wildcard_child_) { |
| 248 | + node->wildcard_child_ = std::make_unique<radix_node<T>>(); |
| 249 | + node->wildcard_child_->wildcard_name_ = std::move(name); |
| 250 | + } |
| 251 | + // If a wildcard child already exists with a different name, |
| 252 | + // we keep the first registered name. Re-registering with a |
| 253 | + // different name on the same path is a user error and would |
| 254 | + // be caught by the upstream conflict check before insert. |
| 255 | + return node->wildcard_child_.get(); |
| 256 | + } |
| 257 | + auto it = node->children_.find(seg); |
| 258 | + if (it == node->children_.end()) { |
| 259 | + it = node->children_.emplace(seg, |
| 260 | + std::make_unique<radix_node<T>>()).first; |
| 261 | + } |
| 262 | + return it->second.get(); |
| 263 | + } |
| 264 | + |
| 265 | + static bool is_node_empty(const radix_node<T>* n) noexcept { |
| 266 | + if (n == nullptr) return true; |
| 267 | + if (n->exact_terminus_.has_value() |
| 268 | + || n->prefix_terminus_.has_value()) return false; |
| 269 | + for (const auto& kv : n->children_) { |
| 270 | + if (!is_node_empty(kv.second.get())) return false; |
| 271 | + } |
| 272 | + if (n->wildcard_child_ |
| 273 | + && !is_node_empty(n->wildcard_child_.get())) return false; |
| 274 | + return true; |
| 275 | + } |
| 276 | + |
| 277 | + std::unique_ptr<radix_node<T>> root_; |
| 278 | +}; |
| 279 | + |
| 280 | +} // namespace detail |
| 281 | +} // namespace httpserver |
| 282 | + |
| 283 | +#endif // SRC_HTTPSERVER_DETAIL_RADIX_TREE_HPP_ |
0 commit comments