From a0e6cf1420d6c61d64fd0f87005fa9973be15db2 Mon Sep 17 00:00:00 2001
From: Remi Dettai <remi.dettai@sekoia.io>
Date: Mon, 22 Jun 2026 17:48:25 +0200
Subject: [PATCH 1/2] Add test showing problem

---
 .../scenarii/es_compatibility/0031-regex.yaml | 449 ++++++++++++++----
 1 file changed, 351 insertions(+), 98 deletions(-)

diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/0031-regex.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/0031-regex.yaml
index 3a1d0e565e8..e194e973cd8 100644
--- a/quickwit/rest-api-tests/scenarii/es_compatibility/0031-regex.yaml
+++ b/quickwit/rest-api-tests/scenarii/es_compatibility/0031-regex.yaml
@@ -1,107 +1,360 @@
-# Basic regex match
-params:
-  size: 0
-json:
-  track_total_hits: true
-  query:
-    regexp:
-      type:
-        value: ".*event"
-expected:
-  hits:
-    total:
-      value: 100
-      relation: "eq"
----
-# Regex always match from start to end (`(re)` equivalent to `^(re)$`)
-params:
-  size: 3
-json:
-  track_total_hits: true
-  query:
-    regexp:
-      type:
-        value: "event"
-expected:
-  hits:
-    total:
-      value: 0
-      relation: "eq"
+# # Basic regex match
+# params:
+#   size: 0
+# json:
+#   track_total_hits: true
+#   query:
+#     regexp:
+#       type:
+#         value: ".*event"
+# expected:
+#   hits:
+#     total:
+#       value: 100
+#       relation: "eq"
+# ---
+# # Regex always match from start to end (`(re)` equivalent to `^(re)$`)
+# params:
+#   size: 3
+# json:
+#   track_total_hits: true
+#   query:
+#     regexp:
+#       type:
+#         value: "event"
+# expected:
+#   hits:
+#     total:
+#       value: 0
+#       relation: "eq"
+# ---
+# # Regex with case_insensitive flag
+# params:
+#   size: 3
+# json:
+#   track_total_hits: true
+#   query:
+#     regexp:
+#       repo.name:
+#         # lowercased by the tokenizer
+#         value: "RUST.*"
+#         case_insensitive: true
+# expected:
+#   hits:
+#     total:
+#       value: 1
+#       relation: "eq"
+# ---
+# params:
+#   size: 3
+# json:
+#   track_total_hits: true
+#   query:
+#     regexp:
+#       type:
+#         # lowercased by the tokenizer
+#         value: "RUST.*"
+#         case_insensitive: false
+# expected:
+#   hits:
+#     total:
+#       value: 0
+#       relation: "eq"
+# ---
+# # ^ and $ are escaped when they are used as anchors, so
+# # ^pushevent$ only matches if the original term is "^pushevent$".
+# endpoint: "simple_es_compat/_search"
+# params:
+#   size: 3
+# json:
+#   track_total_hits: true
+#   query:
+#     regexp:
+#       keyword_text:
+#         value: "red$"
+# expected:
+#   hits:
+#     total:
+#       value: 0
+#       relation: "eq"
+# ---
+# endpoint: "simple_es_compat/_search"
+# params:
+#   size: 3
+# json:
+#   track_total_hits: true
+#   query:
+#     regexp:
+#       keyword_text:
+#         value: "gold$"
+# expected:
+#   hits:
+#     total:
+#       value: 1
+#       relation: "eq"
+# ---
+# # regex in query_string
+# params:
+#   size: 10
+# json:
+#   query:
+#     query_string:
+#       query: "type:/pushevent/"
+# expected:
+#   hits:
+#     total:
+#       value: 60
+#       relation: "eq"
 ---
-# Regex with case_insensitive flag
+# Many wildcards across a few fields (6 distinct fields, well within the 20-field limit).
+# All targeted fields are absent from the index so 0 hits are expected,
+# but the query must be accepted without error.
 params:
-  size: 3
-json:
-  track_total_hits: true
-  query:
-    regexp:
-      repo.name:
-        # lowercased by the tokenizer
-        value: "RUST.*"
-        case_insensitive: true
-expected:
-  hits:
-    total:
-      value: 1
-      relation: "eq"
----
-params:
-  size: 3
-json:
-  track_total_hits: true
-  query:
-    regexp:
-      type:
-        # lowercased by the tokenizer
-        value: "RUST.*"
-        case_insensitive: false
-expected:
-  hits:
-    total:
-      value: 0
-      relation: "eq"
----
-# ^ and $ are escaped when they are used as anchors, so
-# ^pushevent$ only matches if the original term is "^pushevent$".
-endpoint: "simple_es_compat/_search"
-params:
-  size: 3
+  size: 0
 json:
-  track_total_hits: true
   query:
-    regexp:
-      keyword_text:
-        value: "red$"
+    bool:
+      must:
+        - bool:
+            must:
+              - bool:
+                  must_not:
+                    - bool:
+                        should:
+                          - wildcard:
+                              registry.value:
+                                value: "*atlassian*"
+                                case_insensitive: true
+                          - wildcard:
+                              registry.value:
+                                value: "*Agent*"
+                                case_insensitive: true
+                          - wildcard:
+                              registry.value:
+                                value: "*BlueJeans*"
+                                case_insensitive: true
+                          - wildcard:
+                              registry.value:
+                                value: "*Cisco*"
+                                case_insensitive: true
+                          - wildcard:
+                              registry.value:
+                                value: "*Discord*"
+                                case_insensitive: true
+                          - wildcard:
+                              registry.value:
+                                value: "*Dropbox*"
+                                case_insensitive: true
+                          - wildcard:
+                              registry.value:
+                                value: "*Docker*"
+                                case_insensitive: true
+                          - wildcard:
+                              registry.value:
+                                value: "*EIConnectorProxy*"
+                                case_insensitive: true
+                          - wildcard:
+                              registry.value:
+                                value: "*GoToMeeting*"
+                                case_insensitive: true
+                          - wildcard:
+                              registry.value:
+                                value: "*Lifesize*"
+                                case_insensitive: true
+                          - wildcard:
+                              registry.value:
+                                value: "*Mattermost*"
+                                case_insensitive: true
+                          - wildcard:
+                              registry.value:
+                                value: "*Microsoft Edge Update*"
+                                case_insensitive: true
+                          - wildcard:
+                              registry.value:
+                                value: "*OneDrive*"
+                                case_insensitive: true
+                          - wildcard:
+                              registry.value:
+                                value: "*PTOneClick*"
+                                case_insensitive: true
+                          - wildcard:
+                              registry.value:
+                                value: "*slack*"
+                                case_insensitive: true
+                          - wildcard:
+                              registry.value:
+                                value: "*Teams*"
+                                case_insensitive: true
+                          - wildcard:
+                              registry.value:
+                                value: "*TEHTRIS*"
+                                case_insensitive: true
+                          - wildcard:
+                              registry.value:
+                                value: "*VMware*"
+                                case_insensitive: true
+                          - wildcard:
+                              registry.value:
+                                value: "*W3DClient*"
+                                case_insensitive: true
+                          - wildcard:
+                              registry.value:
+                                value: "*Microsoft.lists*"
+                                case_insensitive: true
+                          - wildcard:
+                              registry.value:
+                                value: "*MicrosoftSaRA*"
+                                case_insensitive: true
+                          - wildcard:
+                              process.executable:
+                                value: "*atlassian*"
+                                case_insensitive: true
+                          - wildcard:
+                              process.executable:
+                                value: "*blujeanslauncher*"
+                                case_insensitive: true
+                          - wildcard:
+                              process.executable:
+                                value: "*dropbox*"
+                                case_insensitive: true
+                          - wildcard:
+                              process.executable:
+                                value: "*msiexec.exe*"
+                                case_insensitive: true
+                          - wildcard:
+                              process.executable:
+                                value: "*onedrivesetup.exe*"
+                                case_insensitive: true
+                          - wildcard:
+                              process.executable:
+                                value: "*sara.exe*"
+                                case_insensitive: true
+                          - wildcard:
+                              process.command_line:
+                                value: "*Letsignit*"
+                                case_insensitive: true
+                        minimum_should_match: 1
+              - bool:
+                  should:
+                    - bool:
+                        must:
+                          - bool:
+                              should:
+                                - wildcard:
+                                    registry.key:
+                                      value: "*Microsoft\\Windows\\CurrentVersion\\Run"
+                                      case_insensitive: true
+                                - wildcard:
+                                    registry.key:
+                                      value: "*Microsoft\\Windows\\CurrentVersion\\Policies\\Explorer\\Run"
+                                      case_insensitive: true
+                                - wildcard:
+                                    registry.key:
+                                      value: "*Microsoft\\Windows\\CurrentVersion\\RunOnce"
+                                      case_insensitive: true
+                              minimum_should_match: 1
+                          - term:
+                              action.properties.MessEventType:
+                                value: "SetValue"
+                          - bool:
+                              should:
+                                - wildcard:
+                                    action.properties.Details:
+                                      value: "*C:\\Windows\\Temp\\*"
+                                      case_insensitive: true
+                                - wildcard:
+                                    action.properties.Details:
+                                      value: "*C:\\ProgramData\\*"
+                                      case_insensitive: true
+                                - wildcard:
+                                    action.properties.Details:
+                                      value: "*Recycle.bin\\*"
+                                      case_insensitive: true
+                                - wildcard:
+                                    action.properties.Details:
+                                      value: "*AppData*"
+                                      case_insensitive: true
+                                - wildcard:
+                                    action.properties.Details:
+                                      value: "*C:\\Temp\\*"
+                                      case_insensitive: true
+                                - wildcard:
+                                    action.properties.Details:
+                                      value: "*C:\\Users\\Public\\*"
+                                      case_insensitive: true
+                                - wildcard:
+                                    action.properties.Details:
+                                      value: "*C:\\Users\\Default\\*"
+                                      case_insensitive: true
+                                - wildcard:
+                                    action.properties.Details:
+                                      value: "*wscript*"
+                                      case_insensitive: true
+                                - wildcard:
+                                    action.properties.Details:
+                                      value: "*script*"
+                                      case_insensitive: true
+                                - wildcard:
+                                    action.properties.Details:
+                                      value: "*.exe"
+                                      case_insensitive: true
+                                - wildcard:
+                                    action.properties.Details:
+                                      value: "*.exe\""
+                                      case_insensitive: true
+                              minimum_should_match: 1
+                    - bool:
+                        must:
+                          - wildcard:
+                              process.command_line:
+                                value: "*reg*"
+                                case_insensitive: true
+                          - wildcard:
+                              process.command_line:
+                                value: "*add*"
+                                case_insensitive: true
+                          - wildcard:
+                              process.command_line:
+                                value: "*microsoft\\windows\\currentversion*"
+                                case_insensitive: true
+                          - wildcard:
+                              process.command_line:
+                                value: "*/f*"
+                                case_insensitive: true
+                          - wildcard:
+                              process.command_line:
+                                value: "*/v*"
+                                case_insensitive: true
+                          - wildcard:
+                              process.command_line:
+                                value: "*reg_sz*"
+                                case_insensitive: true
+                          - wildcard:
+                              process.command_line:
+                                value: "*c:\\users\\*"
+                                case_insensitive: true
+                          - wildcard:
+                              process.command_line:
+                                value: "*.exe*"
+                                case_insensitive: true
+                          - bool:
+                              should:
+                                - wildcard:
+                                    process.command_line:
+                                      value: "*\\run*"
+                                      case_insensitive: true
+                                - wildcard:
+                                    process.command_line:
+                                      value: "*\\policies\\explorer\\run*"
+                                      case_insensitive: true
+                                - wildcard:
+                                    process.command_line:
+                                      value: "*\\runonce*"
+                                      case_insensitive: true
+                              minimum_should_match: 1
+                  minimum_should_match: 1
 expected:
   hits:
     total:
       value: 0
       relation: "eq"
----
-endpoint: "simple_es_compat/_search"
-params:
-  size: 3
-json:
-  track_total_hits: true
-  query:
-    regexp:
-      keyword_text:
-        value: "gold$"
-expected:
-  hits:
-    total:
-      value: 1
-      relation: "eq"
----
-# regex in query_string
-params:
-  size: 10
-json:
-  query:
-    query_string:
-      query: "type:/pushevent/"
-expected:
-  hits:
-    total:
-      value: 60
-      relation: "eq"

From ae0935e97af2b1c791d56564a496f4faa2cca97b Mon Sep 17 00:00:00 2001
From: Remi Dettai <remi.dettai@sekoia.io>
Date: Tue, 23 Jun 2026 15:23:57 +0200
Subject: [PATCH 2/2] Use new fst version that falls back to regex list

---
 quickwit/Cargo.lock                           |   2 +-
 quickwit/Cargo.toml                           |   2 +-
 .../quickwit-doc-mapper/src/doc_mapper/mod.rs |   3 +-
 .../quickwit-doc-mapper/src/query_builder.rs  |   4 +-
 quickwit/quickwit-query/src/lib.rs            |   1 +
 quickwit/quickwit-query/src/query_ast/mod.rs  |  89 ++++++-
 .../src/query_ast/regex_query.rs              |  32 +--
 .../src/query_ast/wildcard_query.rs           |  22 +-
 quickwit/quickwit-search/src/leaf.rs          | 112 ++++++---
 .../scenarii/es_compatibility/0031-regex.yaml | 222 +++++++++---------
 10 files changed, 303 insertions(+), 186 deletions(-)

diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock
index 8ad8eac98dc..5c699c1f7b4 100644
--- a/quickwit/Cargo.lock
+++ b/quickwit/Cargo.lock
@@ -9241,7 +9241,7 @@ dependencies = [
 [[package]]
 name = "tantivy-fst"
 version = "0.5.0"
-source = "git+https://github.com/SekoiaLab/fst/?rev=c37128c307b0ba5d7c0040352f0d2606d6383b68#c37128c307b0ba5d7c0040352f0d2606d6383b68"
+source = "git+https://github.com/SekoiaLab/fst/?rev=39ccb1c9283034815e22eb663d673a9e37023e7c#39ccb1c9283034815e22eb663d673a9e37023e7c"
 dependencies = [
  "byteorder",
  "regex-syntax",
diff --git a/quickwit/Cargo.toml b/quickwit/Cargo.toml
index 334e54470ca..3abd2ce86a9 100644
--- a/quickwit/Cargo.toml
+++ b/quickwit/Cargo.toml
@@ -365,7 +365,7 @@ encoding_rs = "=0.8.35"
 
 [patch.crates-io]
 sasl2-sys = { git = "https://github.com/quickwit-oss/rust-sasl/", rev = "085a4c7" }
-tantivy-fst = { git = "https://github.com/SekoiaLab/fst/", rev = "c37128c307b0ba5d7c0040352f0d2606d6383b68" }
+tantivy-fst = { git = "https://github.com/SekoiaLab/fst/", rev = "39ccb1c9283034815e22eb663d673a9e37023e7c" }
 
 ## this patched version of tracing helps better understand what happens inside futures (when are
 ## they polled, how long does poll take...)
diff --git a/quickwit/quickwit-doc-mapper/src/doc_mapper/mod.rs b/quickwit/quickwit-doc-mapper/src/doc_mapper/mod.rs
index 39dd6da124d..9a7d63474bf 100644
--- a/quickwit/quickwit-doc-mapper/src/doc_mapper/mod.rs
+++ b/quickwit/quickwit-doc-mapper/src/doc_mapper/mod.rs
@@ -39,6 +39,7 @@ pub(crate) use field_mapping_entry::{
 #[cfg(test)]
 pub(crate) use field_mapping_entry::{QuickwitNumericOptions, QuickwitTextOptions};
 pub use field_mapping_type::FieldMappingType;
+use quickwit_query::JsonPath;
 use serde_json::Value as JsonValue;
 use tantivy::Term;
 use tantivy::schema::{Field, FieldType};
@@ -82,7 +83,7 @@ pub enum Automaton {
     /// targeting the same field and json path. They are warmed up as a single combined automaton
     /// matching the union of the patterns. The optional path is the json path prefix when the
     /// field is a json field.
-    Regex(Option<Vec<u8>>, Vec<String>),
+    Regex(Option<JsonPath>, Vec<String>),
     /// An exact-match automaton for a TermSet query.
     TermSet(ExactSetAutomaton),
 }
diff --git a/quickwit/quickwit-doc-mapper/src/query_builder.rs b/quickwit/quickwit-doc-mapper/src/query_builder.rs
index cfe27d841ea..2c434aa6108 100644
--- a/quickwit/quickwit-doc-mapper/src/query_builder.rs
+++ b/quickwit/quickwit-doc-mapper/src/query_builder.rs
@@ -23,7 +23,7 @@ use quickwit_query::query_ast::{
     QueryAstTransformer, QueryAstVisitor, RangeQuery, RegexQuery, TermSetQuery, WildcardQuery,
 };
 use quickwit_query::tokenizers::TokenizerManager;
-use quickwit_query::{InvalidQuery, find_field_or_hit_dynamic};
+use quickwit_query::{InvalidQuery, JsonPath, find_field_or_hit_dynamic};
 use tantivy::Term;
 use tantivy::query::Query;
 use tantivy::schema::{Field, Schema};
@@ -323,7 +323,7 @@ fn coalesce_multi_term_fields_into_automatons(
 /// and cannot be shared). `Automaton::TermSet` entries are left untouched.
 fn coalesce_regexes_by_field(automatons_grouped_by_field: &mut HashMap<Field, HashSet<Automaton>>) {
     for automatons in automatons_grouped_by_field.values_mut() {
-        let mut regexes_by_path: HashMap<Option<Vec<u8>>, Vec<String>> = HashMap::new();
+        let mut regexes_by_path: HashMap<Option<JsonPath>, Vec<String>> = HashMap::new();
         let mut others: Vec<Automaton> = Vec::new();
         for automaton in automatons.drain() {
             match automaton {
diff --git a/quickwit/quickwit-query/src/lib.rs b/quickwit/quickwit-query/src/lib.rs
index 8f70e155933..c8d390deac2 100644
--- a/quickwit/quickwit-query/src/lib.rs
+++ b/quickwit/quickwit-query/src/lib.rs
@@ -35,6 +35,7 @@ pub use elastic_query_dsl::{ElasticQueryDsl, OneFieldMap};
 pub use error::InvalidQuery;
 pub use json_literal::{InterpretUserInput, JsonLiteral};
 pub(crate) use not_nan_f32::NotNaNf32;
+pub use query_ast::JsonPath;
 pub use query_ast::utils::find_field_or_hit_dynamic;
 use serde::{Deserialize, Serialize};
 pub use tantivy::query::Query as TantivyQuery;
diff --git a/quickwit/quickwit-query/src/query_ast/mod.rs b/quickwit/quickwit-query/src/query_ast/mod.rs
index d51e36eb93a..d2c66da3e6a 100644
--- a/quickwit/quickwit-query/src/query_ast/mod.rs
+++ b/quickwit/quickwit-query/src/query_ast/mod.rs
@@ -13,8 +13,9 @@
 // limitations under the License.
 
 use serde::{Deserialize, Serialize};
+use tantivy::Term;
 use tantivy::query::BoostQuery as TantivyBoostQuery;
-use tantivy::schema::Schema as TantivySchema;
+use tantivy::schema::{Field, Schema as TantivySchema};
 
 use crate::tokenizers::TokenizerManager;
 
@@ -325,8 +326,46 @@ pub fn query_ast_from_user_text(user_text: &str, default_fields: Option<Vec<Stri
     .into()
 }
 
+/// Serialized binary JSON-path prefix used to scope automaton queries (regex,
+/// wildcard) to a particular sub-field of a JSON field. The bytes encode the
+/// tantivy path with `\x01` segment separators and a trailing `\x00<type>`
+/// marker.
+#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)]
+pub struct JsonPath(Vec<u8>);
+
+impl std::ops::Deref for JsonPath {
+    type Target = [u8];
+    fn deref(&self) -> &[u8] {
+        &self.0
+    }
+}
+
+impl JsonPath {
+    /// Builds the serialized JSON-path prefix using the Tantivy encoding.
+    pub fn from_json_path(json_path: &str, expand_dots_enabled: bool) -> Self {
+        let mut term =
+            Term::from_field_json_path(Field::from_field_id(0), json_path, expand_dots_enabled);
+        term.append_type_and_str("");
+        // Skip the first byte: it is a JSON-field marker that is not stored in the dictionary.
+        JsonPath(term.value().as_serialized()[1..].to_owned())
+    }
+}
+
+impl std::fmt::Display for JsonPath {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        use std::fmt::Write;
+        let path = &self.0;
+        let path_bytes = &path[..path.iter().position(|&b| b == 0).unwrap_or(path.len())];
+        for ch in String::from_utf8_lossy(path_bytes).chars() {
+            f.write_char(if ch == '\x01' { '.' } else { ch })?;
+        }
+        Ok(())
+    }
+}
+
 #[cfg(test)]
 mod tests {
+    use super::JsonPath;
     use crate::query_ast::tantivy_query_ast::TantivyQueryAst;
     use crate::query_ast::{
         BoolQuery, BuildTantivyAst, BuildTantivyAstContext, QueryAst, UserInputQuery,
@@ -440,4 +479,52 @@ mod tests {
         };
         assert_eq!(input_query.default_operator, BooleanOperand::And);
     }
+
+    #[test]
+    fn test_json_path_display() {
+        #[rustfmt::skip]
+        let cases: &[(&str, bool, &str)] = &[
+            // (json_path_input, expand_dots_enabled, expected_display)
+
+            // empty path (field with no sub-key)
+            ("",                   false, ""),
+            ("",                   true,  ""),
+
+            // single-segment paths
+            ("foo",                false, "foo"),
+            ("foo",                true,  "foo"),
+            ("_my_field",          false, "_my_field"),
+            ("field123",           false, "field123"),
+
+            // two-level paths
+            ("foo.bar",            false, "foo.bar"),
+            ("foo.bar",            true,  "foo.bar"),
+            ("process.executable", false, "process.executable"),
+
+            // three-level and deeper
+            ("a.b.c",              false, "a.b.c"),
+            ("a.b.c.d.e",          false, "a.b.c.d.e"),
+
+            // escaped dot: `split_json_path` treats `\.` as a literal dot in
+            // the segment name, so it becomes a single segment "k8s.node".
+            // With expand_dots=false the writer stores it as literal bytes "k8s.node";
+            // with expand_dots=true the writer re-expands the dot into \x01.
+            // In both cases Display renders it as "k8s.node".
+            ("k8s\\.node",         false, "k8s.node"),
+            ("k8s\\.node",         true,  "k8s.node"),
+
+            // escaped dot in the middle of a longer path
+            ("ns.k8s\\.node.id",   false, "ns.k8s.node.id"),
+            ("ns.k8s\\.node.id",   true,  "ns.k8s.node.id"),
+        ];
+
+        for &(input, expand_dots, expected) in cases {
+            let path = JsonPath::from_json_path(input, expand_dots);
+            assert_eq!(
+                path.to_string(),
+                expected,
+                "from_json_path({input:?}, expand_dots={expand_dots})"
+            );
+        }
+    }
 }
diff --git a/quickwit/quickwit-query/src/query_ast/regex_query.rs b/quickwit/quickwit-query/src/query_ast/regex_query.rs
index 851151e839a..e459b48ad57 100644
--- a/quickwit/quickwit-query/src/query_ast/regex_query.rs
+++ b/quickwit/quickwit-query/src/query_ast/regex_query.rs
@@ -17,12 +17,11 @@ use std::sync::Arc;
 use anyhow::Context;
 pub use prefix::{AutomatonQuery, JsonPathPrefix};
 use serde::{Deserialize, Serialize};
-use tantivy::Term;
 use tantivy::schema::{Field, FieldType, Schema as TantivySchema};
 
 use super::{BuildTantivyAst, BuildTantivyAstContext, QueryAst};
 use crate::query_ast::TantivyQueryAst;
-use crate::{InvalidQuery, find_field_or_hit_dynamic};
+use crate::{InvalidQuery, JsonPath, find_field_or_hit_dynamic};
 
 /// A Regex query
 #[derive(PartialEq, Eq, Debug, Serialize, Deserialize, Clone)]
@@ -51,7 +50,7 @@ impl RegexQuery {
     pub fn to_field_and_regex(
         &self,
         schema: &TantivySchema,
-    ) -> Result<(Field, Option<Vec<u8>>, String), InvalidQuery> {
+    ) -> Result<(Field, Option<JsonPath>, String), InvalidQuery> {
         let Some((field, field_entry, json_path)) = find_field_or_hit_dynamic(&self.field, schema)
         else {
             return Err(InvalidQuery::FieldDoesNotExist {
@@ -79,17 +78,8 @@ impl RegexQuery {
                     ))
                 })?;
 
-                let mut term_for_path = Term::from_field_json_path(
-                    field,
-                    json_path,
-                    json_options.is_expand_dots_enabled(),
-                );
-                term_for_path.append_type_and_str("");
-
-                let value = term_for_path.value();
-                // We skip the 1st byte which is a marker to tell this is json. This isn't present
-                // in the dictionary
-                let byte_path_prefix = value.as_serialized()[1..].to_owned();
+                let byte_path_prefix =
+                    JsonPath::from_json_path(json_path, json_options.is_expand_dots_enabled());
                 Ok((field, Some(byte_path_prefix), self.regex.clone()))
             }
             _ => Err(InvalidQuery::SchemaError(
@@ -125,8 +115,10 @@ mod prefix {
     use tantivy::schema::Field;
     use tantivy_fst::Automaton;
 
+    use crate::JsonPath;
+
     pub struct JsonPathPrefix<A> {
-        pub prefix: Vec<u8>,
+        pub prefix: JsonPath,
         pub automaton: Arc<A>,
     }
 
@@ -246,6 +238,7 @@ mod prefix {
 
 #[cfg(test)]
 mod tests {
+    use std::ops::Deref;
     use std::sync::Arc;
 
     use tantivy::schema::{Schema as TantivySchema, TEXT};
@@ -253,6 +246,7 @@ mod tests {
 
     use super::prefix::JsonPathPrefixState;
     use super::{JsonPathPrefix, RegexQuery};
+    use crate::JsonPath;
 
     #[test]
     fn test_regex_query_text_field() {
@@ -282,7 +276,7 @@ mod tests {
         };
         let (field, path, regex) = query.to_field_and_regex(&schema).unwrap();
         assert_eq!(field, schema.get_field("field").unwrap());
-        assert_eq!(path.unwrap(), b"sub\x01field\0s");
+        assert_eq!(path.unwrap().deref(), b"sub\x01field\0s");
         assert_eq!(regex, query.regex);
 
         // i believe this is how concatenated field behave
@@ -292,7 +286,7 @@ mod tests {
         };
         let (field, path, regex) = query_empty_path.to_field_and_regex(&schema).unwrap();
         assert_eq!(field, schema.get_field("field").unwrap());
-        assert_eq!(path.unwrap(), b"\0s");
+        assert_eq!(path.unwrap().deref(), b"\0s");
         assert_eq!(regex, query_empty_path.regex);
     }
 
@@ -300,7 +294,7 @@ mod tests {
     fn test_json_prefix_automaton_empty_path() {
         let regex = Arc::new(Regex::new("e(f|g.*)").unwrap());
         let empty_path_automaton = JsonPathPrefix {
-            prefix: Vec::new(),
+            prefix: JsonPath::default(),
             automaton: regex.clone(),
         };
 
@@ -312,7 +306,7 @@ mod tests {
     fn test_json_prefix_automaton() {
         let regex = Arc::new(Regex::new("e(f|g.*)").unwrap());
         let automaton = JsonPathPrefix {
-            prefix: b"ab".to_vec(),
+            prefix: JsonPath::from_json_path("ab", false),
             automaton: regex.clone(),
         };
 
diff --git a/quickwit/quickwit-query/src/query_ast/wildcard_query.rs b/quickwit/quickwit-query/src/query_ast/wildcard_query.rs
index 8fc9c7eaf2f..2b9e2ce8b22 100644
--- a/quickwit/quickwit-query/src/query_ast/wildcard_query.rs
+++ b/quickwit/quickwit-query/src/query_ast/wildcard_query.rs
@@ -17,13 +17,12 @@ use std::sync::Arc;
 
 use anyhow::{Context, bail};
 use serde::{Deserialize, Serialize};
-use tantivy::Term;
 use tantivy::schema::{Field, FieldType, Schema as TantivySchema};
 
 use super::{BuildTantivyAst, QueryAst};
 use crate::query_ast::{AutomatonQuery, BuildTantivyAstContext, JsonPathPrefix, TantivyQueryAst};
 use crate::tokenizers::TokenizerManager;
-use crate::{InvalidQuery, find_field_or_hit_dynamic};
+use crate::{InvalidQuery, JsonPath, find_field_or_hit_dynamic};
 
 /// A Wildcard query allows to match 'bond' with a query like 'b*d'.
 #[derive(PartialEq, Eq, Debug, Serialize, Deserialize, Clone)]
@@ -112,7 +111,7 @@ impl WildcardQuery {
         &self,
         schema: &TantivySchema,
         tokenizer_manager: &TokenizerManager,
-    ) -> Result<(Field, Option<Vec<u8>>, String), InvalidQuery> {
+    ) -> Result<(Field, Option<JsonPath>, String), InvalidQuery> {
         let Some((field, field_entry, json_path)) = find_field_or_hit_dynamic(&self.field, schema)
         else {
             return Err(InvalidQuery::FieldDoesNotExist {
@@ -161,17 +160,8 @@ impl WildcardQuery {
                         regex
                     };
 
-                let mut term_for_path = Term::from_field_json_path(
-                    field,
-                    json_path,
-                    json_options.is_expand_dots_enabled(),
-                );
-                term_for_path.append_type_and_str("");
-
-                let value = term_for_path.value();
-                // We skip the 1st byte which is a marker to tell this is json. This isn't present
-                // in the dictionary
-                let byte_path_prefix = value.as_serialized()[1..].to_owned();
+                let byte_path_prefix =
+                    JsonPath::from_json_path(json_path, json_options.is_expand_dots_enabled());
 
                 Ok((field, Some(byte_path_prefix), regex))
             }
@@ -328,7 +318,7 @@ mod tests {
 
             let (_field, path, regex) = query.to_regex(&schema, &tokenizer_manager).unwrap();
             assert_eq!(regex, "MyString Wh1ch.a\\.nOrMal Tokenizer would.*cut");
-            assert_eq!(path.unwrap(), "Inner\u{1}Fie*ld\0s".as_bytes());
+            assert_eq!(path.unwrap().0, "Inner\u{1}Fie*ld\0s".as_bytes());
         }
 
         for tokenizer in [
@@ -347,7 +337,7 @@ mod tests {
 
             let (_field, path, regex) = query.to_regex(&schema, &tokenizer_manager).unwrap();
             assert_eq!(regex, "mystring wh1ch.a\\.normal tokenizer would.*cut");
-            assert_eq!(path.unwrap(), "Inner\u{1}Fie*ld\0s".as_bytes());
+            assert_eq!(path.unwrap().0, "Inner\u{1}Fie*ld\0s".as_bytes());
         }
     }
 
diff --git a/quickwit/quickwit-search/src/leaf.rs b/quickwit/quickwit-search/src/leaf.rs
index 8f007a604fe..f090a58159a 100644
--- a/quickwit/quickwit-search/src/leaf.rs
+++ b/quickwit/quickwit-search/src/leaf.rs
@@ -29,6 +29,7 @@ use quickwit_proto::search::{
     CountHits, LeafSearchRequest, LeafSearchResponse, PartialHit, ResourceStats, SearchRequest,
     SortOrder, SortValue, SplitIdAndFooterOffsets, SplitSearchError,
 };
+use quickwit_query::JsonPath;
 use quickwit_query::query_ast::{
     BoolQuery, CacheNode, QueryAst, QueryAstTransformer, RangeQuery, TermQuery,
 };
@@ -42,8 +43,9 @@ use tantivy::aggregation::{AggContextParams, AggregationLimitsGuard};
 use tantivy::collector::Collector;
 use tantivy::directory::FileSlice;
 use tantivy::fastfield::FastFieldReaders;
-use tantivy::schema::Field;
+use tantivy::schema::{self, Field};
 use tantivy::{DateTime, Index, ReloadPolicy, Searcher, TantivyError, Term};
+use tantivy_fst::DisjunctionRegex;
 use tokio::task::{JoinError, JoinSet};
 use tracing::*;
 
@@ -332,46 +334,55 @@ async fn warm_up_automatons(
             .map_err(|_| std::io::Error::other("task panicked"))?
     };
     for (field, automatons) in terms_grouped_by_field {
-        let field_name = searcher.schema().get_field_name(*field).to_string();
         for segment_reader in searcher.segment_readers() {
             let inv_idx = segment_reader.inverted_index(*field)?;
             for automaton in automatons {
                 let inv_idx_clone = inv_idx.clone();
-                let field_name = field_name.clone();
                 warm_up_futures.push(async move {
                     match automaton {
                         Automaton::Regex(path, patterns) => {
-                            let path_str = path
-                                .as_deref()
-                                .map(|path| String::from_utf8_lossy(path).into_owned())
-                                .unwrap_or_default();
-                            // Combine all patterns so the term dictionary is
-                            // traversed once instead of once per regex.
                             let regex =
                                 tantivy_fst::Regex::from_patterns(patterns).with_context(|| {
                                     format!(
-                                        "failed to build combined regex automaton during warmup \
-                                         for field `{field_name}` (path: `{path_str}`, {} \
-                                         patterns)",
-                                        patterns.len(),
+                                        "failed to build regex during warmup for field `{}`",
+                                        full_path(*field, path, searcher.schema()),
                                     )
                                 })?;
-                            inv_idx_clone
-                                .warm_postings_automaton(
-                                    quickwit_query::query_ast::JsonPathPrefix {
-                                        automaton: regex.into(),
-                                        prefix: path.clone().unwrap_or_default(),
-                                    },
-                                    cpu_intensive_executor,
-                                )
-                                .await
-                                .with_context(|| {
-                                    format!(
-                                        "failed to load automaton for field `{field_name}` (path: \
-                                         `{path_str}`, {} patterns)",
-                                        patterns.len(),
+
+                            match regex {
+                                DisjunctionRegex::Single(regex) => inv_idx_clone
+                                    .warm_postings_automaton(
+                                        quickwit_query::query_ast::JsonPathPrefix {
+                                            automaton: Arc::new(regex),
+                                            prefix: path.clone().unwrap_or_default(),
+                                        },
+                                        cpu_intensive_executor,
+                                    )
+                                    .await
+                                    .with_context(|| {
+                                        format!(
+                                            "failed to warm postings from automaton for field \
+                                             `{}` (type=single)",
+                                            full_path(*field, path, searcher.schema()),
+                                        )
+                                    }),
+                                DisjunctionRegex::Multi(regexes) => inv_idx_clone
+                                    .warm_postings_automaton(
+                                        quickwit_query::query_ast::JsonPathPrefix {
+                                            automaton: Arc::new(regexes),
+                                            prefix: path.clone().unwrap_or_default(),
+                                        },
+                                        cpu_intensive_executor,
                                     )
-                                })
+                                    .await
+                                    .with_context(|| {
+                                        format!(
+                                            "failed to warm postings from automaton for field \
+                                             `{}` (type=multi)",
+                                            full_path(*field, path, searcher.schema()),
+                                        )
+                                    }),
+                            }
                         }
                         Automaton::TermSet(automaton) => inv_idx_clone
                             .warm_postings_automaton(automaton.clone(), cpu_intensive_executor)
@@ -1616,6 +1627,15 @@ async fn leaf_search_single_split_wrapper(
     }
 }
 
+/// Small helper to display field path in errors
+fn full_path(field: Field, path_opt: &Option<JsonPath>, schema: &schema::Schema) -> String {
+    let field_name = schema.get_field_name(field);
+    let Some(path) = path_opt else {
+        return field_name.to_string();
+    };
+    format!("{}.{}", field_name, path)
+}
+
 #[cfg(test)]
 mod tests {
     use std::ops::Bound;
@@ -2186,7 +2206,8 @@ mod tests {
         let indexing_options =
             TextOptions::default().set_indexing_options(TextFieldIndexing::default());
         let mut schema_builder = Schema::builder();
-        let text_field = schema_builder.add_text_field("text", indexing_options);
+        let text_field = schema_builder.add_text_field("text", indexing_options.clone());
+        let json_field = schema_builder.add_json_field("body", indexing_options);
         let schema = schema_builder.build();
 
         let ram_directory = RamDirectory::create();
@@ -2195,6 +2216,7 @@ mod tests {
         let mut doc = TantivyDocument::default();
         doc.add_field_value(text_field, "hello");
         index_writer.add_document(doc).unwrap();
+        // the json value is not set
         index_writer.commit().unwrap();
         let searcher = index.reader().unwrap().searcher();
 
@@ -2210,6 +2232,19 @@ mod tests {
         .collect();
         assert!(warm_up_automatons(&searcher, &valid).await.is_ok());
 
+        // Valid regexes on a JSON sub-field also warm up successfully.
+        let json_path =
+            quickwit_query::query_ast::JsonPath::from_json_path("process.executable", false);
+        let valid_json: HashMap<Field, HashSet<Automaton>> = std::iter::once((
+            json_field,
+            HashSet::from([Automaton::Regex(
+                Some(json_path.clone()),
+                vec!["s.*".to_string(), "b.*".to_string()],
+            )]),
+        ))
+        .collect();
+        assert!(warm_up_automatons(&searcher, &valid_json).await.is_ok());
+
         // An unbuildable regex (here, invalid syntax) must cause warmup to fail
         // rather than fall back to warming the patterns individually.
         let invalid: HashMap<Field, HashSet<Automaton>> = std::iter::once((
@@ -2222,7 +2257,24 @@ mod tests {
             .unwrap_err()
             .to_string();
         assert!(
-            error.contains("failed to build combined regex automaton during warmup"),
+            error.contains("failed to build regex during warmup for field `text`"),
+            "unexpected error: {error}"
+        );
+
+        // An unbuildable regex on a JSON sub-field also produces a useful error message.
+        let invalid_json: HashMap<Field, HashSet<Automaton>> = std::iter::once((
+            json_field,
+            HashSet::from([Automaton::Regex(Some(json_path), vec!["(".to_string()])]),
+        ))
+        .collect();
+        let error = warm_up_automatons(&searcher, &invalid_json)
+            .await
+            .unwrap_err()
+            .to_string();
+        assert!(
+            error.contains(
+                "failed to build regex during warmup for field `body.process.executable`"
+            ),
             "unexpected error: {error}"
         );
     }
diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/0031-regex.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/0031-regex.yaml
index e194e973cd8..7f18e841155 100644
--- a/quickwit/rest-api-tests/scenarii/es_compatibility/0031-regex.yaml
+++ b/quickwit/rest-api-tests/scenarii/es_compatibility/0031-regex.yaml
@@ -1,110 +1,110 @@
-# # Basic regex match
-# params:
-#   size: 0
-# json:
-#   track_total_hits: true
-#   query:
-#     regexp:
-#       type:
-#         value: ".*event"
-# expected:
-#   hits:
-#     total:
-#       value: 100
-#       relation: "eq"
-# ---
-# # Regex always match from start to end (`(re)` equivalent to `^(re)$`)
-# params:
-#   size: 3
-# json:
-#   track_total_hits: true
-#   query:
-#     regexp:
-#       type:
-#         value: "event"
-# expected:
-#   hits:
-#     total:
-#       value: 0
-#       relation: "eq"
-# ---
-# # Regex with case_insensitive flag
-# params:
-#   size: 3
-# json:
-#   track_total_hits: true
-#   query:
-#     regexp:
-#       repo.name:
-#         # lowercased by the tokenizer
-#         value: "RUST.*"
-#         case_insensitive: true
-# expected:
-#   hits:
-#     total:
-#       value: 1
-#       relation: "eq"
-# ---
-# params:
-#   size: 3
-# json:
-#   track_total_hits: true
-#   query:
-#     regexp:
-#       type:
-#         # lowercased by the tokenizer
-#         value: "RUST.*"
-#         case_insensitive: false
-# expected:
-#   hits:
-#     total:
-#       value: 0
-#       relation: "eq"
-# ---
-# # ^ and $ are escaped when they are used as anchors, so
-# # ^pushevent$ only matches if the original term is "^pushevent$".
-# endpoint: "simple_es_compat/_search"
-# params:
-#   size: 3
-# json:
-#   track_total_hits: true
-#   query:
-#     regexp:
-#       keyword_text:
-#         value: "red$"
-# expected:
-#   hits:
-#     total:
-#       value: 0
-#       relation: "eq"
-# ---
-# endpoint: "simple_es_compat/_search"
-# params:
-#   size: 3
-# json:
-#   track_total_hits: true
-#   query:
-#     regexp:
-#       keyword_text:
-#         value: "gold$"
-# expected:
-#   hits:
-#     total:
-#       value: 1
-#       relation: "eq"
-# ---
-# # regex in query_string
-# params:
-#   size: 10
-# json:
-#   query:
-#     query_string:
-#       query: "type:/pushevent/"
-# expected:
-#   hits:
-#     total:
-#       value: 60
-#       relation: "eq"
+# Basic regex match
+params:
+  size: 0
+json:
+  track_total_hits: true
+  query:
+    regexp:
+      type:
+        value: ".*event"
+expected:
+  hits:
+    total:
+      value: 100
+      relation: "eq"
+---
+# Regex always match from start to end (`(re)` equivalent to `^(re)$`)
+params:
+  size: 3
+json:
+  track_total_hits: true
+  query:
+    regexp:
+      type:
+        value: "event"
+expected:
+  hits:
+    total:
+      value: 0
+      relation: "eq"
+---
+# Regex with case_insensitive flag
+params:
+  size: 3
+json:
+  track_total_hits: true
+  query:
+    regexp:
+      repo.name:
+        # lowercased by the tokenizer
+        value: "RUST.*"
+        case_insensitive: true
+expected:
+  hits:
+    total:
+      value: 1
+      relation: "eq"
+---
+params:
+  size: 3
+json:
+  track_total_hits: true
+  query:
+    regexp:
+      type:
+        # lowercased by the tokenizer
+        value: "RUST.*"
+        case_insensitive: false
+expected:
+  hits:
+    total:
+      value: 0
+      relation: "eq"
+---
+# ^ and $ are escaped when they are used as anchors, so
+# ^pushevent$ only matches if the original term is "^pushevent$".
+endpoint: "simple_es_compat/_search"
+params:
+  size: 3
+json:
+  track_total_hits: true
+  query:
+    regexp:
+      keyword_text:
+        value: "red$"
+expected:
+  hits:
+    total:
+      value: 0
+      relation: "eq"
+---
+endpoint: "simple_es_compat/_search"
+params:
+  size: 3
+json:
+  track_total_hits: true
+  query:
+    regexp:
+      keyword_text:
+        value: "gold$"
+expected:
+  hits:
+    total:
+      value: 1
+      relation: "eq"
+---
+# regex in query_string
+params:
+  size: 10
+json:
+  query:
+    query_string:
+      query: "type:/pushevent/"
+expected:
+  hits:
+    total:
+      value: 60
+      relation: "eq"
 ---
 # Many wildcards across a few fields (6 distinct fields, well within the 20-field limit).
 # All targeted fields are absent from the index so 0 hits are expected,
@@ -185,10 +185,6 @@ json:
                               registry.value:
                                 value: "*Teams*"
                                 case_insensitive: true
-                          - wildcard:
-                              registry.value:
-                                value: "*TEHTRIS*"
-                                case_insensitive: true
                           - wildcard:
                               registry.value:
                                 value: "*VMware*"
@@ -229,10 +225,6 @@ json:
                               process.executable:
                                 value: "*sara.exe*"
                                 case_insensitive: true
-                          - wildcard:
-                              process.command_line:
-                                value: "*Letsignit*"
-                                case_insensitive: true
                         minimum_should_match: 1
               - bool:
                   should: