diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index 8ad8eac98dc..5c699c1f7b4 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -9241,7 +9241,7 @@ dependencies = [ [[package]] name = "tantivy-fst" version = "0.5.0" -source = "git+https://github.com/SekoiaLab/fst/?rev=c37128c307b0ba5d7c0040352f0d2606d6383b68#c37128c307b0ba5d7c0040352f0d2606d6383b68" +source = "git+https://github.com/SekoiaLab/fst/?rev=39ccb1c9283034815e22eb663d673a9e37023e7c#39ccb1c9283034815e22eb663d673a9e37023e7c" dependencies = [ "byteorder", "regex-syntax", diff --git a/quickwit/Cargo.toml b/quickwit/Cargo.toml index 334e54470ca..3abd2ce86a9 100644 --- a/quickwit/Cargo.toml +++ b/quickwit/Cargo.toml @@ -365,7 +365,7 @@ encoding_rs = "=0.8.35" [patch.crates-io] sasl2-sys = { git = "https://github.com/quickwit-oss/rust-sasl/", rev = "085a4c7" } -tantivy-fst = { git = "https://github.com/SekoiaLab/fst/", rev = "c37128c307b0ba5d7c0040352f0d2606d6383b68" } +tantivy-fst = { git = "https://github.com/SekoiaLab/fst/", rev = "39ccb1c9283034815e22eb663d673a9e37023e7c" } ## this patched version of tracing helps better understand what happens inside futures (when are ## they polled, how long does poll take...) diff --git a/quickwit/quickwit-doc-mapper/src/doc_mapper/mod.rs b/quickwit/quickwit-doc-mapper/src/doc_mapper/mod.rs index 39dd6da124d..9a7d63474bf 100644 --- a/quickwit/quickwit-doc-mapper/src/doc_mapper/mod.rs +++ b/quickwit/quickwit-doc-mapper/src/doc_mapper/mod.rs @@ -39,6 +39,7 @@ pub(crate) use field_mapping_entry::{ #[cfg(test)] pub(crate) use field_mapping_entry::{QuickwitNumericOptions, QuickwitTextOptions}; pub use field_mapping_type::FieldMappingType; +use quickwit_query::JsonPath; use serde_json::Value as JsonValue; use tantivy::Term; use tantivy::schema::{Field, FieldType}; @@ -82,7 +83,7 @@ pub enum Automaton { /// targeting the same field and json path. They are warmed up as a single combined automaton /// matching the union of the patterns. The optional path is the json path prefix when the /// field is a json field. - Regex(Option>, Vec), + Regex(Option, Vec), /// An exact-match automaton for a TermSet query. TermSet(ExactSetAutomaton), } diff --git a/quickwit/quickwit-doc-mapper/src/query_builder.rs b/quickwit/quickwit-doc-mapper/src/query_builder.rs index cfe27d841ea..2c434aa6108 100644 --- a/quickwit/quickwit-doc-mapper/src/query_builder.rs +++ b/quickwit/quickwit-doc-mapper/src/query_builder.rs @@ -23,7 +23,7 @@ use quickwit_query::query_ast::{ QueryAstTransformer, QueryAstVisitor, RangeQuery, RegexQuery, TermSetQuery, WildcardQuery, }; use quickwit_query::tokenizers::TokenizerManager; -use quickwit_query::{InvalidQuery, find_field_or_hit_dynamic}; +use quickwit_query::{InvalidQuery, JsonPath, find_field_or_hit_dynamic}; use tantivy::Term; use tantivy::query::Query; use tantivy::schema::{Field, Schema}; @@ -323,7 +323,7 @@ fn coalesce_multi_term_fields_into_automatons( /// and cannot be shared). `Automaton::TermSet` entries are left untouched. fn coalesce_regexes_by_field(automatons_grouped_by_field: &mut HashMap>) { for automatons in automatons_grouped_by_field.values_mut() { - let mut regexes_by_path: HashMap>, Vec> = HashMap::new(); + let mut regexes_by_path: HashMap, Vec> = HashMap::new(); let mut others: Vec = Vec::new(); for automaton in automatons.drain() { match automaton { diff --git a/quickwit/quickwit-query/src/lib.rs b/quickwit/quickwit-query/src/lib.rs index 8f70e155933..c8d390deac2 100644 --- a/quickwit/quickwit-query/src/lib.rs +++ b/quickwit/quickwit-query/src/lib.rs @@ -35,6 +35,7 @@ pub use elastic_query_dsl::{ElasticQueryDsl, OneFieldMap}; pub use error::InvalidQuery; pub use json_literal::{InterpretUserInput, JsonLiteral}; pub(crate) use not_nan_f32::NotNaNf32; +pub use query_ast::JsonPath; pub use query_ast::utils::find_field_or_hit_dynamic; use serde::{Deserialize, Serialize}; pub use tantivy::query::Query as TantivyQuery; diff --git a/quickwit/quickwit-query/src/query_ast/mod.rs b/quickwit/quickwit-query/src/query_ast/mod.rs index d51e36eb93a..d2c66da3e6a 100644 --- a/quickwit/quickwit-query/src/query_ast/mod.rs +++ b/quickwit/quickwit-query/src/query_ast/mod.rs @@ -13,8 +13,9 @@ // limitations under the License. use serde::{Deserialize, Serialize}; +use tantivy::Term; use tantivy::query::BoostQuery as TantivyBoostQuery; -use tantivy::schema::Schema as TantivySchema; +use tantivy::schema::{Field, Schema as TantivySchema}; use crate::tokenizers::TokenizerManager; @@ -325,8 +326,46 @@ pub fn query_ast_from_user_text(user_text: &str, default_fields: Option` +/// marker. +#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)] +pub struct JsonPath(Vec); + +impl std::ops::Deref for JsonPath { + type Target = [u8]; + fn deref(&self) -> &[u8] { + &self.0 + } +} + +impl JsonPath { + /// Builds the serialized JSON-path prefix using the Tantivy encoding. + pub fn from_json_path(json_path: &str, expand_dots_enabled: bool) -> Self { + let mut term = + Term::from_field_json_path(Field::from_field_id(0), json_path, expand_dots_enabled); + term.append_type_and_str(""); + // Skip the first byte: it is a JSON-field marker that is not stored in the dictionary. + JsonPath(term.value().as_serialized()[1..].to_owned()) + } +} + +impl std::fmt::Display for JsonPath { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use std::fmt::Write; + let path = &self.0; + let path_bytes = &path[..path.iter().position(|&b| b == 0).unwrap_or(path.len())]; + for ch in String::from_utf8_lossy(path_bytes).chars() { + f.write_char(if ch == '\x01' { '.' } else { ch })?; + } + Ok(()) + } +} + #[cfg(test)] mod tests { + use super::JsonPath; use crate::query_ast::tantivy_query_ast::TantivyQueryAst; use crate::query_ast::{ BoolQuery, BuildTantivyAst, BuildTantivyAstContext, QueryAst, UserInputQuery, @@ -440,4 +479,52 @@ mod tests { }; assert_eq!(input_query.default_operator, BooleanOperand::And); } + + #[test] + fn test_json_path_display() { + #[rustfmt::skip] + let cases: &[(&str, bool, &str)] = &[ + // (json_path_input, expand_dots_enabled, expected_display) + + // empty path (field with no sub-key) + ("", false, ""), + ("", true, ""), + + // single-segment paths + ("foo", false, "foo"), + ("foo", true, "foo"), + ("_my_field", false, "_my_field"), + ("field123", false, "field123"), + + // two-level paths + ("foo.bar", false, "foo.bar"), + ("foo.bar", true, "foo.bar"), + ("process.executable", false, "process.executable"), + + // three-level and deeper + ("a.b.c", false, "a.b.c"), + ("a.b.c.d.e", false, "a.b.c.d.e"), + + // escaped dot: `split_json_path` treats `\.` as a literal dot in + // the segment name, so it becomes a single segment "k8s.node". + // With expand_dots=false the writer stores it as literal bytes "k8s.node"; + // with expand_dots=true the writer re-expands the dot into \x01. + // In both cases Display renders it as "k8s.node". + ("k8s\\.node", false, "k8s.node"), + ("k8s\\.node", true, "k8s.node"), + + // escaped dot in the middle of a longer path + ("ns.k8s\\.node.id", false, "ns.k8s.node.id"), + ("ns.k8s\\.node.id", true, "ns.k8s.node.id"), + ]; + + for &(input, expand_dots, expected) in cases { + let path = JsonPath::from_json_path(input, expand_dots); + assert_eq!( + path.to_string(), + expected, + "from_json_path({input:?}, expand_dots={expand_dots})" + ); + } + } } diff --git a/quickwit/quickwit-query/src/query_ast/regex_query.rs b/quickwit/quickwit-query/src/query_ast/regex_query.rs index 851151e839a..e459b48ad57 100644 --- a/quickwit/quickwit-query/src/query_ast/regex_query.rs +++ b/quickwit/quickwit-query/src/query_ast/regex_query.rs @@ -17,12 +17,11 @@ use std::sync::Arc; use anyhow::Context; pub use prefix::{AutomatonQuery, JsonPathPrefix}; use serde::{Deserialize, Serialize}; -use tantivy::Term; use tantivy::schema::{Field, FieldType, Schema as TantivySchema}; use super::{BuildTantivyAst, BuildTantivyAstContext, QueryAst}; use crate::query_ast::TantivyQueryAst; -use crate::{InvalidQuery, find_field_or_hit_dynamic}; +use crate::{InvalidQuery, JsonPath, find_field_or_hit_dynamic}; /// A Regex query #[derive(PartialEq, Eq, Debug, Serialize, Deserialize, Clone)] @@ -51,7 +50,7 @@ impl RegexQuery { pub fn to_field_and_regex( &self, schema: &TantivySchema, - ) -> Result<(Field, Option>, String), InvalidQuery> { + ) -> Result<(Field, Option, String), InvalidQuery> { let Some((field, field_entry, json_path)) = find_field_or_hit_dynamic(&self.field, schema) else { return Err(InvalidQuery::FieldDoesNotExist { @@ -79,17 +78,8 @@ impl RegexQuery { )) })?; - let mut term_for_path = Term::from_field_json_path( - field, - json_path, - json_options.is_expand_dots_enabled(), - ); - term_for_path.append_type_and_str(""); - - let value = term_for_path.value(); - // We skip the 1st byte which is a marker to tell this is json. This isn't present - // in the dictionary - let byte_path_prefix = value.as_serialized()[1..].to_owned(); + let byte_path_prefix = + JsonPath::from_json_path(json_path, json_options.is_expand_dots_enabled()); Ok((field, Some(byte_path_prefix), self.regex.clone())) } _ => Err(InvalidQuery::SchemaError( @@ -125,8 +115,10 @@ mod prefix { use tantivy::schema::Field; use tantivy_fst::Automaton; + use crate::JsonPath; + pub struct JsonPathPrefix { - pub prefix: Vec, + pub prefix: JsonPath, pub automaton: Arc, } @@ -246,6 +238,7 @@ mod prefix { #[cfg(test)] mod tests { + use std::ops::Deref; use std::sync::Arc; use tantivy::schema::{Schema as TantivySchema, TEXT}; @@ -253,6 +246,7 @@ mod tests { use super::prefix::JsonPathPrefixState; use super::{JsonPathPrefix, RegexQuery}; + use crate::JsonPath; #[test] fn test_regex_query_text_field() { @@ -282,7 +276,7 @@ mod tests { }; let (field, path, regex) = query.to_field_and_regex(&schema).unwrap(); assert_eq!(field, schema.get_field("field").unwrap()); - assert_eq!(path.unwrap(), b"sub\x01field\0s"); + assert_eq!(path.unwrap().deref(), b"sub\x01field\0s"); assert_eq!(regex, query.regex); // i believe this is how concatenated field behave @@ -292,7 +286,7 @@ mod tests { }; let (field, path, regex) = query_empty_path.to_field_and_regex(&schema).unwrap(); assert_eq!(field, schema.get_field("field").unwrap()); - assert_eq!(path.unwrap(), b"\0s"); + assert_eq!(path.unwrap().deref(), b"\0s"); assert_eq!(regex, query_empty_path.regex); } @@ -300,7 +294,7 @@ mod tests { fn test_json_prefix_automaton_empty_path() { let regex = Arc::new(Regex::new("e(f|g.*)").unwrap()); let empty_path_automaton = JsonPathPrefix { - prefix: Vec::new(), + prefix: JsonPath::default(), automaton: regex.clone(), }; @@ -312,7 +306,7 @@ mod tests { fn test_json_prefix_automaton() { let regex = Arc::new(Regex::new("e(f|g.*)").unwrap()); let automaton = JsonPathPrefix { - prefix: b"ab".to_vec(), + prefix: JsonPath::from_json_path("ab", false), automaton: regex.clone(), }; diff --git a/quickwit/quickwit-query/src/query_ast/wildcard_query.rs b/quickwit/quickwit-query/src/query_ast/wildcard_query.rs index 8fc9c7eaf2f..2b9e2ce8b22 100644 --- a/quickwit/quickwit-query/src/query_ast/wildcard_query.rs +++ b/quickwit/quickwit-query/src/query_ast/wildcard_query.rs @@ -17,13 +17,12 @@ use std::sync::Arc; use anyhow::{Context, bail}; use serde::{Deserialize, Serialize}; -use tantivy::Term; use tantivy::schema::{Field, FieldType, Schema as TantivySchema}; use super::{BuildTantivyAst, QueryAst}; use crate::query_ast::{AutomatonQuery, BuildTantivyAstContext, JsonPathPrefix, TantivyQueryAst}; use crate::tokenizers::TokenizerManager; -use crate::{InvalidQuery, find_field_or_hit_dynamic}; +use crate::{InvalidQuery, JsonPath, find_field_or_hit_dynamic}; /// A Wildcard query allows to match 'bond' with a query like 'b*d'. #[derive(PartialEq, Eq, Debug, Serialize, Deserialize, Clone)] @@ -112,7 +111,7 @@ impl WildcardQuery { &self, schema: &TantivySchema, tokenizer_manager: &TokenizerManager, - ) -> Result<(Field, Option>, String), InvalidQuery> { + ) -> Result<(Field, Option, String), InvalidQuery> { let Some((field, field_entry, json_path)) = find_field_or_hit_dynamic(&self.field, schema) else { return Err(InvalidQuery::FieldDoesNotExist { @@ -161,17 +160,8 @@ impl WildcardQuery { regex }; - let mut term_for_path = Term::from_field_json_path( - field, - json_path, - json_options.is_expand_dots_enabled(), - ); - term_for_path.append_type_and_str(""); - - let value = term_for_path.value(); - // We skip the 1st byte which is a marker to tell this is json. This isn't present - // in the dictionary - let byte_path_prefix = value.as_serialized()[1..].to_owned(); + let byte_path_prefix = + JsonPath::from_json_path(json_path, json_options.is_expand_dots_enabled()); Ok((field, Some(byte_path_prefix), regex)) } @@ -328,7 +318,7 @@ mod tests { let (_field, path, regex) = query.to_regex(&schema, &tokenizer_manager).unwrap(); assert_eq!(regex, "MyString Wh1ch.a\\.nOrMal Tokenizer would.*cut"); - assert_eq!(path.unwrap(), "Inner\u{1}Fie*ld\0s".as_bytes()); + assert_eq!(path.unwrap().0, "Inner\u{1}Fie*ld\0s".as_bytes()); } for tokenizer in [ @@ -347,7 +337,7 @@ mod tests { let (_field, path, regex) = query.to_regex(&schema, &tokenizer_manager).unwrap(); assert_eq!(regex, "mystring wh1ch.a\\.normal tokenizer would.*cut"); - assert_eq!(path.unwrap(), "Inner\u{1}Fie*ld\0s".as_bytes()); + assert_eq!(path.unwrap().0, "Inner\u{1}Fie*ld\0s".as_bytes()); } } diff --git a/quickwit/quickwit-search/src/leaf.rs b/quickwit/quickwit-search/src/leaf.rs index 8f007a604fe..f090a58159a 100644 --- a/quickwit/quickwit-search/src/leaf.rs +++ b/quickwit/quickwit-search/src/leaf.rs @@ -29,6 +29,7 @@ use quickwit_proto::search::{ CountHits, LeafSearchRequest, LeafSearchResponse, PartialHit, ResourceStats, SearchRequest, SortOrder, SortValue, SplitIdAndFooterOffsets, SplitSearchError, }; +use quickwit_query::JsonPath; use quickwit_query::query_ast::{ BoolQuery, CacheNode, QueryAst, QueryAstTransformer, RangeQuery, TermQuery, }; @@ -42,8 +43,9 @@ use tantivy::aggregation::{AggContextParams, AggregationLimitsGuard}; use tantivy::collector::Collector; use tantivy::directory::FileSlice; use tantivy::fastfield::FastFieldReaders; -use tantivy::schema::Field; +use tantivy::schema::{self, Field}; use tantivy::{DateTime, Index, ReloadPolicy, Searcher, TantivyError, Term}; +use tantivy_fst::DisjunctionRegex; use tokio::task::{JoinError, JoinSet}; use tracing::*; @@ -332,46 +334,55 @@ async fn warm_up_automatons( .map_err(|_| std::io::Error::other("task panicked"))? }; for (field, automatons) in terms_grouped_by_field { - let field_name = searcher.schema().get_field_name(*field).to_string(); for segment_reader in searcher.segment_readers() { let inv_idx = segment_reader.inverted_index(*field)?; for automaton in automatons { let inv_idx_clone = inv_idx.clone(); - let field_name = field_name.clone(); warm_up_futures.push(async move { match automaton { Automaton::Regex(path, patterns) => { - let path_str = path - .as_deref() - .map(|path| String::from_utf8_lossy(path).into_owned()) - .unwrap_or_default(); - // Combine all patterns so the term dictionary is - // traversed once instead of once per regex. let regex = tantivy_fst::Regex::from_patterns(patterns).with_context(|| { format!( - "failed to build combined regex automaton during warmup \ - for field `{field_name}` (path: `{path_str}`, {} \ - patterns)", - patterns.len(), + "failed to build regex during warmup for field `{}`", + full_path(*field, path, searcher.schema()), ) })?; - inv_idx_clone - .warm_postings_automaton( - quickwit_query::query_ast::JsonPathPrefix { - automaton: regex.into(), - prefix: path.clone().unwrap_or_default(), - }, - cpu_intensive_executor, - ) - .await - .with_context(|| { - format!( - "failed to load automaton for field `{field_name}` (path: \ - `{path_str}`, {} patterns)", - patterns.len(), + + match regex { + DisjunctionRegex::Single(regex) => inv_idx_clone + .warm_postings_automaton( + quickwit_query::query_ast::JsonPathPrefix { + automaton: Arc::new(regex), + prefix: path.clone().unwrap_or_default(), + }, + cpu_intensive_executor, + ) + .await + .with_context(|| { + format!( + "failed to warm postings from automaton for field \ + `{}` (type=single)", + full_path(*field, path, searcher.schema()), + ) + }), + DisjunctionRegex::Multi(regexes) => inv_idx_clone + .warm_postings_automaton( + quickwit_query::query_ast::JsonPathPrefix { + automaton: Arc::new(regexes), + prefix: path.clone().unwrap_or_default(), + }, + cpu_intensive_executor, ) - }) + .await + .with_context(|| { + format!( + "failed to warm postings from automaton for field \ + `{}` (type=multi)", + full_path(*field, path, searcher.schema()), + ) + }), + } } Automaton::TermSet(automaton) => inv_idx_clone .warm_postings_automaton(automaton.clone(), cpu_intensive_executor) @@ -1616,6 +1627,15 @@ async fn leaf_search_single_split_wrapper( } } +/// Small helper to display field path in errors +fn full_path(field: Field, path_opt: &Option, schema: &schema::Schema) -> String { + let field_name = schema.get_field_name(field); + let Some(path) = path_opt else { + return field_name.to_string(); + }; + format!("{}.{}", field_name, path) +} + #[cfg(test)] mod tests { use std::ops::Bound; @@ -2186,7 +2206,8 @@ mod tests { let indexing_options = TextOptions::default().set_indexing_options(TextFieldIndexing::default()); let mut schema_builder = Schema::builder(); - let text_field = schema_builder.add_text_field("text", indexing_options); + let text_field = schema_builder.add_text_field("text", indexing_options.clone()); + let json_field = schema_builder.add_json_field("body", indexing_options); let schema = schema_builder.build(); let ram_directory = RamDirectory::create(); @@ -2195,6 +2216,7 @@ mod tests { let mut doc = TantivyDocument::default(); doc.add_field_value(text_field, "hello"); index_writer.add_document(doc).unwrap(); + // the json value is not set index_writer.commit().unwrap(); let searcher = index.reader().unwrap().searcher(); @@ -2210,6 +2232,19 @@ mod tests { .collect(); assert!(warm_up_automatons(&searcher, &valid).await.is_ok()); + // Valid regexes on a JSON sub-field also warm up successfully. + let json_path = + quickwit_query::query_ast::JsonPath::from_json_path("process.executable", false); + let valid_json: HashMap> = std::iter::once(( + json_field, + HashSet::from([Automaton::Regex( + Some(json_path.clone()), + vec!["s.*".to_string(), "b.*".to_string()], + )]), + )) + .collect(); + assert!(warm_up_automatons(&searcher, &valid_json).await.is_ok()); + // An unbuildable regex (here, invalid syntax) must cause warmup to fail // rather than fall back to warming the patterns individually. let invalid: HashMap> = std::iter::once(( @@ -2222,7 +2257,24 @@ mod tests { .unwrap_err() .to_string(); assert!( - error.contains("failed to build combined regex automaton during warmup"), + error.contains("failed to build regex during warmup for field `text`"), + "unexpected error: {error}" + ); + + // An unbuildable regex on a JSON sub-field also produces a useful error message. + let invalid_json: HashMap> = std::iter::once(( + json_field, + HashSet::from([Automaton::Regex(Some(json_path), vec!["(".to_string()])]), + )) + .collect(); + let error = warm_up_automatons(&searcher, &invalid_json) + .await + .unwrap_err() + .to_string(); + assert!( + error.contains( + "failed to build regex during warmup for field `body.process.executable`" + ), "unexpected error: {error}" ); } diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/0031-regex.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/0031-regex.yaml index 3a1d0e565e8..7f18e841155 100644 --- a/quickwit/rest-api-tests/scenarii/es_compatibility/0031-regex.yaml +++ b/quickwit/rest-api-tests/scenarii/es_compatibility/0031-regex.yaml @@ -105,3 +105,248 @@ expected: total: value: 60 relation: "eq" +--- +# Many wildcards across a few fields (6 distinct fields, well within the 20-field limit). +# All targeted fields are absent from the index so 0 hits are expected, +# but the query must be accepted without error. +params: + size: 0 +json: + query: + bool: + must: + - bool: + must: + - bool: + must_not: + - bool: + should: + - wildcard: + registry.value: + value: "*atlassian*" + case_insensitive: true + - wildcard: + registry.value: + value: "*Agent*" + case_insensitive: true + - wildcard: + registry.value: + value: "*BlueJeans*" + case_insensitive: true + - wildcard: + registry.value: + value: "*Cisco*" + case_insensitive: true + - wildcard: + registry.value: + value: "*Discord*" + case_insensitive: true + - wildcard: + registry.value: + value: "*Dropbox*" + case_insensitive: true + - wildcard: + registry.value: + value: "*Docker*" + case_insensitive: true + - wildcard: + registry.value: + value: "*EIConnectorProxy*" + case_insensitive: true + - wildcard: + registry.value: + value: "*GoToMeeting*" + case_insensitive: true + - wildcard: + registry.value: + value: "*Lifesize*" + case_insensitive: true + - wildcard: + registry.value: + value: "*Mattermost*" + case_insensitive: true + - wildcard: + registry.value: + value: "*Microsoft Edge Update*" + case_insensitive: true + - wildcard: + registry.value: + value: "*OneDrive*" + case_insensitive: true + - wildcard: + registry.value: + value: "*PTOneClick*" + case_insensitive: true + - wildcard: + registry.value: + value: "*slack*" + case_insensitive: true + - wildcard: + registry.value: + value: "*Teams*" + case_insensitive: true + - wildcard: + registry.value: + value: "*VMware*" + case_insensitive: true + - wildcard: + registry.value: + value: "*W3DClient*" + case_insensitive: true + - wildcard: + registry.value: + value: "*Microsoft.lists*" + case_insensitive: true + - wildcard: + registry.value: + value: "*MicrosoftSaRA*" + case_insensitive: true + - wildcard: + process.executable: + value: "*atlassian*" + case_insensitive: true + - wildcard: + process.executable: + value: "*blujeanslauncher*" + case_insensitive: true + - wildcard: + process.executable: + value: "*dropbox*" + case_insensitive: true + - wildcard: + process.executable: + value: "*msiexec.exe*" + case_insensitive: true + - wildcard: + process.executable: + value: "*onedrivesetup.exe*" + case_insensitive: true + - wildcard: + process.executable: + value: "*sara.exe*" + case_insensitive: true + minimum_should_match: 1 + - bool: + should: + - bool: + must: + - bool: + should: + - wildcard: + registry.key: + value: "*Microsoft\\Windows\\CurrentVersion\\Run" + case_insensitive: true + - wildcard: + registry.key: + value: "*Microsoft\\Windows\\CurrentVersion\\Policies\\Explorer\\Run" + case_insensitive: true + - wildcard: + registry.key: + value: "*Microsoft\\Windows\\CurrentVersion\\RunOnce" + case_insensitive: true + minimum_should_match: 1 + - term: + action.properties.MessEventType: + value: "SetValue" + - bool: + should: + - wildcard: + action.properties.Details: + value: "*C:\\Windows\\Temp\\*" + case_insensitive: true + - wildcard: + action.properties.Details: + value: "*C:\\ProgramData\\*" + case_insensitive: true + - wildcard: + action.properties.Details: + value: "*Recycle.bin\\*" + case_insensitive: true + - wildcard: + action.properties.Details: + value: "*AppData*" + case_insensitive: true + - wildcard: + action.properties.Details: + value: "*C:\\Temp\\*" + case_insensitive: true + - wildcard: + action.properties.Details: + value: "*C:\\Users\\Public\\*" + case_insensitive: true + - wildcard: + action.properties.Details: + value: "*C:\\Users\\Default\\*" + case_insensitive: true + - wildcard: + action.properties.Details: + value: "*wscript*" + case_insensitive: true + - wildcard: + action.properties.Details: + value: "*script*" + case_insensitive: true + - wildcard: + action.properties.Details: + value: "*.exe" + case_insensitive: true + - wildcard: + action.properties.Details: + value: "*.exe\"" + case_insensitive: true + minimum_should_match: 1 + - bool: + must: + - wildcard: + process.command_line: + value: "*reg*" + case_insensitive: true + - wildcard: + process.command_line: + value: "*add*" + case_insensitive: true + - wildcard: + process.command_line: + value: "*microsoft\\windows\\currentversion*" + case_insensitive: true + - wildcard: + process.command_line: + value: "*/f*" + case_insensitive: true + - wildcard: + process.command_line: + value: "*/v*" + case_insensitive: true + - wildcard: + process.command_line: + value: "*reg_sz*" + case_insensitive: true + - wildcard: + process.command_line: + value: "*c:\\users\\*" + case_insensitive: true + - wildcard: + process.command_line: + value: "*.exe*" + case_insensitive: true + - bool: + should: + - wildcard: + process.command_line: + value: "*\\run*" + case_insensitive: true + - wildcard: + process.command_line: + value: "*\\policies\\explorer\\run*" + case_insensitive: true + - wildcard: + process.command_line: + value: "*\\runonce*" + case_insensitive: true + minimum_should_match: 1 + minimum_should_match: 1 +expected: + hits: + total: + value: 0 + relation: "eq"