diff --git a/Cargo.lock b/Cargo.lock index 961f2dd..669d6f4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,13 +4,28 @@ version = 4 [[package]] name = "aho-corasick" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" dependencies = [ "memchr", ] +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "anstream" version = "0.6.21" @@ -67,6 +82,18 @@ version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "bitflags" version = "1.3.2" @@ -75,9 +102,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.9.4" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" [[package]] name = "block-buffer" @@ -88,17 +115,102 @@ dependencies = [ "generic-array", ] +[[package]] +name = "bumpalo" +version = "3.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" + [[package]] name = "bytes" version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + +[[package]] +name = "cc" +version = "1.2.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b26a0954ae34af09b50f0de26458fa95369a0d478d8236d3f93082b219bd29" +dependencies = [ + "find-msvc-tools", + "shlex", +] + [[package]] name = "cfg-if" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "chrono" +version = "0.4.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" +dependencies = [ + "iana-time-zone", + "num-traits", + "windows-link", +] + +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + +[[package]] +name = "clap" +version = "4.5.53" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.5.53" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00" +dependencies = [ + "anstyle", + "clap_lex", +] + +[[package]] +name = "clap_lex" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" [[package]] name = "colorchoice" @@ -106,6 +218,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + [[package]] name = "cpufeatures" version = "0.2.17" @@ -115,11 +233,51 @@ dependencies = [ "libc", ] +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools 0.10.5", + "num-traits", + "once_cell", + "oorandom", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools 0.10.5", +] + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + [[package]] name = "deranged" -version = "0.5.4" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a41953f86f8a05768a6cda24def994fd2f424b04ec5c719cf89989779f199071" +checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587" dependencies = [ "powerfmt", "serde_core", @@ -142,7 +300,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -200,11 +358,18 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" name = "esi" version = "0.6.2" dependencies = [ + "base64", + "bytes", + "chrono", + "criterion", "env_logger", "fastly", "html-escape", "log", - "quick-xml", + "md5", + "nom", + "percent-encoding", + "rand", "regex", "thiserror 2.0.17", ] @@ -261,9 +426,9 @@ dependencies = [ [[package]] name = "fastly" -version = "0.11.8" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4843a1889ae95d46272904988743ba15dabff3596ffd2eb1aac129785d69f022" +checksum = "ac590af69cdea42ebbbaa566d0e603c6c0d7d6f53a507fe82cea65260419ab88" dependencies = [ "anyhow", "bytes", @@ -273,7 +438,7 @@ dependencies = [ "fastly-shared", "fastly-sys", "http", - "itertools", + "itertools 0.13.0", "lazy_static", "mime", "serde", @@ -289,9 +454,9 @@ dependencies = [ [[package]] name = "fastly-macros" -version = "0.11.8" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1b646115f6f078dd945a0c7e8234fbef4940bc5c57cee13c95d780fd4b7136f" +checksum = "b012bd5c924ede9a1363ad29a232c4e95c9eb520a124979ad06043a6e44025dc" dependencies = [ "proc-macro2", "quote", @@ -300,9 +465,9 @@ dependencies = [ [[package]] name = "fastly-shared" -version = "0.11.8" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a39bd74fe73d177e7a6190a72f7f8570248d0d7b17c42124aca212e8ad2bcc50" +checksum = "fe8aaf17b8c0b689ce8370052e129c7722f3bd9c5ca27790db7624cf64b8c9b1" dependencies = [ "bitflags 1.3.2", "http", @@ -310,16 +475,22 @@ dependencies = [ [[package]] name = "fastly-sys" -version = "0.11.8" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d855e5c064ef17fe3a68602891515a0406797dd94aee258c9ebc87c334cfd76" +checksum = "a784af8ed4e5f3d32aac54f687b6a2dd844af304390d3bc70d50cbe6a772c1a7" dependencies = [ "bitflags 1.3.2", "fastly-shared", - "wasi", - "wit-bindgen-rt", + "wasip2", + "wit-bindgen", ] +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + [[package]] name = "fnv" version = "1.0.7" @@ -345,12 +516,40 @@ dependencies = [ "version_check", ] +[[package]] +name = "getrandom" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "zerocopy", +] + [[package]] name = "hashbrown" version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "html-escape" version = "0.2.13" @@ -371,11 +570,35 @@ dependencies = [ "itoa", ] +[[package]] +name = "iana-time-zone" +version = "0.1.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "icu_collections" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47" +checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" dependencies = [ "displaydoc", "potential_utf", @@ -386,9 +609,9 @@ dependencies = [ [[package]] name = "icu_locale_core" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a" +checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" dependencies = [ "displaydoc", "litemap", @@ -399,11 +622,10 @@ dependencies = [ [[package]] name = "icu_normalizer" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979" +checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" dependencies = [ - "displaydoc", "icu_collections", "icu_normalizer_data", "icu_properties", @@ -414,42 +636,38 @@ dependencies = [ [[package]] name = "icu_normalizer_data" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3" +checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" [[package]] name = "icu_properties" -version = "2.0.1" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b" +checksum = "e93fcd3157766c0c8da2f8cff6ce651a31f0810eaa1c51ec363ef790bbb5fb99" dependencies = [ - "displaydoc", "icu_collections", "icu_locale_core", "icu_properties_data", "icu_provider", - "potential_utf", "zerotrie", "zerovec", ] [[package]] name = "icu_properties_data" -version = "2.0.1" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632" +checksum = "02845b3647bb045f1100ecd6480ff52f34c35f82d9880e029d329c21d1054899" [[package]] name = "icu_provider" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af" +checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" dependencies = [ "displaydoc", "icu_locale_core", - "stable_deref_trait", - "tinystr", "writeable", "yoke", "zerofrom", @@ -480,19 +698,39 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.11.4" +version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b0f83760fb341a774ed326568e19f5a863af4a952def8c39f9ab92fd95b88e5" +checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f" dependencies = [ "equivalent", "hashbrown", ] +[[package]] +name = "is-terminal" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys", +] + [[package]] name = "is_terminal_polyfill" -version = "1.70.1" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itertools" +version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] [[package]] name = "itertools" @@ -530,7 +768,17 @@ checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", +] + +[[package]] +name = "js-sys" +version = "0.3.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" +dependencies = [ + "once_cell", + "wasm-bindgen", ] [[package]] @@ -547,9 +795,9 @@ checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" [[package]] name = "litemap" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" +checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" [[package]] name = "log" @@ -557,6 +805,12 @@ version = "0.4.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" +[[package]] +name = "md5" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" + [[package]] name = "memchr" version = "2.7.6" @@ -569,17 +823,54 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "num-conv" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + [[package]] name = "once_cell_polyfill" -version = "1.70.1" +version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "oorandom" +version = "11.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" [[package]] name = "opaque-debug" @@ -610,9 +901,9 @@ dependencies = [ [[package]] name = "potential_utf" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84df19adbe5b5a0782edcab45899906947ab039ccf4573713735ee7de1e6b08a" +checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" dependencies = [ "zerovec", ] @@ -624,21 +915,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" [[package]] -name = "proc-macro2" -version = "1.0.101" +name = "ppv-lite86" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" dependencies = [ - "unicode-ident", + "zerocopy", ] [[package]] -name = "quick-xml" -version = "0.38.3" +name = "proc-macro2" +version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42a232e7487fc2ef313d96dde7948e7a3c05101870d8985e4fd8d26aedd27b89" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" dependencies = [ - "memchr", + "unicode-ident", ] [[package]] @@ -650,6 +941,36 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + [[package]] name = "regex" version = "1.12.2" @@ -679,12 +1000,27 @@ version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + [[package]] name = "ryu" version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "serde" version = "1.0.228" @@ -712,7 +1048,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -736,7 +1072,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -764,6 +1100,12 @@ dependencies = [ "opaque-debug", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "smallvec" version = "1.15.1" @@ -789,9 +1131,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.106" +version = "2.0.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" +checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917" dependencies = [ "proc-macro2", "quote", @@ -806,7 +1148,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -835,7 +1177,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -846,7 +1188,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -881,14 +1223,24 @@ dependencies = [ [[package]] name = "tinystr" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" dependencies = [ "displaydoc", "zerovec", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "typenum" version = "1.19.0" @@ -897,9 +1249,9 @@ checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" [[package]] name = "unicode-ident" -version = "1.0.19" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" [[package]] name = "url" @@ -938,14 +1290,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] -name = "wasi" -version = "0.14.7+wasi-0.2.4" +name = "walkdir" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" dependencies = [ - "wasip2", + "same-file", + "winapi-util", ] +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + [[package]] name = "wasip2" version = "1.0.1+wasi-0.2.4" @@ -955,12 +1314,119 @@ dependencies = [ "wit-bindgen", ] +[[package]] +name = "wasm-bindgen" +version = "0.2.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn 2.0.108", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.108", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.108", +] + [[package]] name = "windows-link" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + [[package]] name = "windows-sys" version = "0.60.2" @@ -1041,31 +1507,21 @@ version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" dependencies = [ - "bitflags 2.9.4", -] - -[[package]] -name = "wit-bindgen-rt" -version = "0.42.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "051105bab12bc78e161f8dfb3596e772dd6a01ebf9c4840988e00347e744966a" -dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", ] [[package]] name = "writeable" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" +checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" [[package]] name = "yoke" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc" +checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" dependencies = [ - "serde", "stable_deref_trait", "yoke-derive", "zerofrom", @@ -1073,16 +1529,36 @@ dependencies = [ [[package]] name = "yoke-derive" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" +checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", "synstructure", ] +[[package]] +name = "zerocopy" +version = "0.8.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ea879c944afe8a2b25fef16bb4ba234f47c694565e97383b36f3a878219065c" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf955aa904d6040f70dc8e9384444cb1030aed272ba3cb09bbc4ab9e7c1f34f5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.108", +] + [[package]] name = "zerofrom" version = "0.1.6" @@ -1100,15 +1576,15 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", "synstructure", ] [[package]] name = "zerotrie" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595" +checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" dependencies = [ "displaydoc", "yoke", @@ -1117,9 +1593,9 @@ dependencies = [ [[package]] name = "zerovec" -version = "0.11.4" +version = "0.11.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7aa2bd55086f1ab526693ecbe444205da57e25f4489879da80635a46d90e73b" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" dependencies = [ "yoke", "zerofrom", @@ -1128,11 +1604,11 @@ dependencies = [ [[package]] name = "zerovec-derive" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" +checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] diff --git a/README.md b/README.md index 1a6249b..9d1553a 100644 --- a/README.md +++ b/README.md @@ -2,25 +2,118 @@ This crate provides a streaming Edge Side Includes parser and executor designed for Fastly Compute. -The implementation is a subset of the [ESI Language Specification 1.0](https://www.w3.org/TR/esi-lang/) supporting the following tags: +The implementation is a subset of Akamai ESI 5.0 supporting the following tags: - `` (+ `alt`, `onerror="continue"`) - `` | `` | `` -- `` | `` +- `` | `` (with subscript support for dict/list assignment) - `` | `` | `` +- `` | `` (loop over lists and dicts) - `` - `` +**Note:** The following tags support nested ESI tags: ``, ``, ``, ``, ``, ``, ``, and `` (long form only). + Other tags will be ignored and served to the client as-is. -This implementation also includes an expression interpreter and library of functions that can be used. Current functions include: +### Expression Features + +- **Integer literals**: `42`, `-10`, `0` +- **String literals**: `'single quoted'`, `"double quoted"`, `'''triple quoted'''` +- **Dict literals**: `{'key1': 'value1', 'key2': 'value2'}` +- **List literals**: `['item1', 'item2', 'item3']` +- **Nested structures**: Lists can be nested: `['one', ['a', 'b', 'c'], 'three']` +- **Subscript assignment**: `` or `` +- **Subscript access**: `$(dict{'key'})` or `$(list{0})` +- **Foreach loops**: Iterate over lists or dicts with `` and use `` to exit early +- **Comparison operators**: `==`, `!=`, `<`, `>`, `<=`, `>=`, `has`, `has_i`, `matches`, `matches_i` + - `has` - Case-sensitive substring containment: `$(str) has 'substring'` + - `has_i` - Case-insensitive substring containment: `$(str) has_i 'substring'` + - `matches` - Case-sensitive regex matching: `$(str) matches 'pattern'` + - `matches_i` - Case-insensitive regex matching: `$(str) matches_i 'pattern'` +- **Logical operators**: `&&` (and), `||` (or), `!` (not) + +### Function Library + +This implementation includes a comprehensive library of ESI functions: + +**String Manipulation:** + +- `$lower(string)` - Convert to lowercase +- `$upper(string)` - Convert to uppercase +- `$lstrip(string)`, `$rstrip(string)`, `$strip(string)` - Remove whitespace +- `$substr(string, start [, length])` - Extract substring +- `$replace(haystack, needle, replacement [, count])` - Replace occurrences +- `$str(value)` - Convert to string +- `$join(list, separator)` - Join list elements +- `$string_split(string, delimiter [, maxsplit])` - Split string into list + +**Encoding/Decoding:** + +- `$html_encode(string)`, `$html_decode(string)` - HTML entity encoding +- `$url_encode(string)`, `$url_decode(string)` - URL encoding +- `$base64_encode(string)` - Base64 encoding +- `$convert_to_unicode(string)`, `$convert_from_unicode(string)` - Unicode conversion + +**Quote Helpers:** + +- `$dollar()` - Returns `$` +- `$dquote()` - Returns `"` +- `$squote()` - Returns `'` + +**Type Conversion & Checks:** + +- `$int(value)` - Convert to integer +- `$exists(value)` - Check if value exists +- `$is_empty(value)` - Check if value is empty +- `$len(value)` - Get length of string or list + +**List Operations:** + +- `$list_delitem(list, index)` - Remove item from list +- `$index(string, substring)`, `$rindex(string, substring)` - Find substring position + +**Cryptographic:** + +- `$md5_digest(string)` - Generate MD5 hash + +**Time/Date:** -- `$lower(string)` -- `$html_encode(string)` -- `$replace(haystack, needle, replacement [, count])` +- `$time()` - Current Unix timestamp +- `$http_time(timestamp)` - Format timestamp as HTTP date +- `$strftime(format, timestamp)` - Format timestamp with custom format +- `$bin_int(binary_string)` - Convert binary string to integer + +**Random & Response:** + +- `$rand()` - Generate random number +- `$last_rand()` - Get last generated random number + +**Response Manipulation:** + +These functions modify the HTTP response sent to the client: + +- `$add_header(name, value)` - Add a custom response header + ```html + $add_header('X-Custom-Header', 'my-value') + ``` +- `$set_response_code(code [, body])` - Set HTTP status code and optionally override response body + ```html + $set_response_code(404, 'Page not found') + ``` +- `$set_redirect(url [, code])` - Set HTTP redirect (default 302) + ```html + $set_redirect('https://example.com/new-location') $set_redirect('https://example.com/moved', 301) + ``` + +**Note:** Response manipulation functions are buffered during ESI processing and applied when `process_response()` sends the final response to the client. ## Example Usage +### Streaming Processing (Recommended) + +The recommended approach uses streaming to process the document as it arrives, minimizing memory usage and latency: + ```rust,no_run use fastly::{http::StatusCode, mime, Error, Request, Response}; @@ -51,8 +144,9 @@ fn handle_request(req: Request) -> Result<(), Error> { esi::Configuration::default() ); + // Stream the ESI response directly to the client processor.process_response( - // The ESI source document. Note that the body will be consumed. + // The ESI source document. Body will be consumed and streamed. &mut beresp, // Optionally provide a template for the client response. Some(Response::from_status(StatusCode::OK).with_content_type(mime::TEXT_HTML)), @@ -82,6 +176,37 @@ fn handle_request(req: Request) -> Result<(), Error> { } ``` +### Custom Stream Processing + +For advanced use cases, you can process any `BufRead` source and write to any `Write` destination: + +```rust,no_run +use std::io::{BufReader, Write}; +use esi::{Processor, Configuration}; + +fn process_custom_stream( + input: impl std::io::Read, + output: &mut impl Write, +) -> Result<(), esi::ExecutionError> { + let mut processor = Processor::new(None, Configuration::default()); + + // Process from any readable source + let reader = BufReader::new(input); + + processor.process_stream( + reader, + output, + Some(&|req| { + // Custom fragment dispatcher + Ok(req.send_async("backend")?.into()) + }), + None, + )?; + + Ok(()) +} +``` + See example applications in the [`examples`](./examples) subdirectory or read the hosted documentation at [docs.rs/esi](https://docs.rs/esi). Due to the fact that this processor streams fragments to the client as soon as they are available, it is not possible to return a relevant status code for later errors once we have started streaming the response to the client. For this reason, it is recommended that you refer to the [`esi_example_advanced_error_handling`](./examples/esi_example_advanced_error_handling) application, which allows you to handle errors gracefully by maintaining ownership of the output stream. ## Testing diff --git a/esi/Cargo.toml b/esi/Cargo.toml index dba3cde..28881c7 100644 --- a/esi/Cargo.toml +++ b/esi/Cargo.toml @@ -9,12 +9,26 @@ repository = "https://github.com/fastly/esi" readme = "./README.md" [dependencies] -quick-xml = "0.38.0" thiserror = "2.0.6" fastly = "^0.11" log = "^0.4" regex = "1.11.1" html-escape = "0.2.13" +nom = "7.1.3" +bytes = "1.5" +base64 = "0.22" +percent-encoding = "2.3" +md5 = "0.7" +chrono = { version = "0.4", default-features = false, features = [ + "clock", + "std", +] } +rand = "0.8" [dev-dependencies] env_logger = "^0.11" +criterion = { version = "0.5", default-features = false } + +[[bench]] +name = "parser_benchmarks" +harness = false diff --git a/esi/benches/README.md b/esi/benches/README.md new file mode 100644 index 0000000..e036060 --- /dev/null +++ b/esi/benches/README.md @@ -0,0 +1,148 @@ +# ESI Parser Benchmarks + +This directory contains benchmarks for the nom-based ESI parser. + +## Running Benchmarks + +To run all benchmarks: + +```bash +cargo bench --bench parser_benchmarks +``` + +To run a specific benchmark group: + +```bash +cargo bench --bench parser_benchmarks -- esi_parser +cargo bench --bench parser_benchmarks -- parser_scaling +cargo bench --bench parser_benchmarks -- expression_parsing +cargo bench --bench parser_benchmarks -- interpolated_strings +``` + +To run a specific benchmark: + +```bash +cargo bench --bench parser_benchmarks -- "simple_include" +``` + +## Benchmark Groups + +### 1. `esi_documents` ⚖️ + +**Direct comparison with bench branch (old XML parser)** + +This group uses the exact same test cases as the `bench` branch to enable +apples-to-apples performance comparison between the old XML parser and the new nom parser. + +Test cases: + +- simple_include +- try_block +- try_block_with_content +- nested_try +- vars +- choose +- complex_document + +### 2. `nom_parser_features` + +Tests nom parser-specific features and improvements: + +- HTML comments parsing +- Script tag handling +- Variable assignments (assign) +- Advanced expressions (comparison operators, logical operators) +- Mixed content with multiple ESI directives + +### 3. `parser_scaling` + +Tests how the parser scales with document size: + +- 100, 500, 1000, 5000, and 10000 element documents +- Measures parsing performance as document complexity grows + +### 4. `expression_parsing` + +Tests ESI expression parsing performance: + +- Simple variables +- Variables with keys (e.g., `$(HTTP_COOKIE{name})`) +- Variables with defaults +- Comparison operators (==, !=, >, <, >=, <=) +- Logical operators (&&, ||) +- Negation (!) +- Grouped expressions with parentheses +- Complex nested expressions + +### 5. `interpolated_strings` + +Tests parsing of strings with interpolated variables: + +- Plain text (no interpolation) +- Single variable +- Multiple variables +- Mixed content with HTML + +## Interpreting Results + +Criterion will output: + +- **Time per iteration**: How long each benchmark takes to run +- **Throughput**: How many operations per second (where applicable) +- **Change detection**: Comparison with previous runs to detect regressions + +Results are saved in `target/criterion/` and include HTML reports. + +## Viewing Reports + +After running benchmarks, open the HTML reports: + +```bash +open target/criterion/report/index.html +``` + +## Comparing with the Old XML Parser (bench branch) + +To compare the nom parser performance with the old XML parser: + +1. Run benchmarks on the bench branch (old XML parser): + + ```bash + git checkout bench + cargo bench --bench esi_processing + ``` + +2. Switch to nom-parser-integration and run the comparison benchmark: + ```bash + git checkout nom-parser-integration + cargo bench --bench parser_benchmarks -- esi_documents + ``` + +The `esi_documents` benchmark group uses the **exact same test cases** as the bench branch, +ensuring a fair apples-to-apples comparison between the two parsers. + +## Comparing Between Branches + +To compare performance between any two branches: + +1. Run benchmarks on the baseline branch: + + ```bash + git checkout main + cargo bench --bench parser_benchmarks + ``` + +2. Switch to your branch and run again: + ```bash + git checkout your-branch + cargo bench --bench parser_benchmarks + ``` + +Criterion will automatically show the performance difference. + +## Notes + +- Benchmarks run with optimizations enabled (`--release`) +- Each benchmark is run multiple times to get accurate measurements +- Warm-up iterations are performed before measurement +- Results may vary based on system load and hardware diff --git a/esi/benches/parser_benchmarks.rs b/esi/benches/parser_benchmarks.rs new file mode 100644 index 0000000..ad5fe3a --- /dev/null +++ b/esi/benches/parser_benchmarks.rs @@ -0,0 +1,304 @@ +use bytes::Bytes; +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; +use esi::parse; + +// Benchmark group that matches the test cases from the bench branch (old XML parser) +// This allows direct comparison between the old parser and nom parser +fn benchmark_various_esi_documents(c: &mut Criterion) { + let mut group = c.benchmark_group("esi_documents"); + + // These test cases match exactly what's in the bench branch for fair comparison + let documents = vec![ + ( + "simple_include", + r#""#, + ), + ( + "try_block", + r#" + + +

Fallback

+
+ "#, + ), + ( + "try_block_with_content", + r#" + + + +

Some content

+
+ +

Fallback content

+
+
+ "#, + ), + ( + "nested_try", + r#" + + + + +

Inner fallback

+
+
+

Outer fallback

+
+ "#, + ), + ("vars", r#"$(HTTP_HOST)"#), + ( + "choose", + r#" + + +

Premium content

+
+ +

Regular content

+
+
+ "#, + ), + ( + "complex_document", + r#" + + + + + + +

Default header

+
+
+ $(HTTP_HOST) + + +

Premium content

+
+ +

Regular content

+
+
+ + "#, + ), + ]; + + for (name, xml) in documents { + group.bench_with_input(BenchmarkId::from_parameter(name), &xml, |b, xml| { + b.iter(|| { + let bytes = Bytes::from(*xml); + let result = parse(black_box(&bytes)).unwrap(); + black_box(result); + }); + }); + } + + group.finish(); +} + +// Additional benchmark group for nom parser-specific features +// These test new capabilities not present in the old XML parser +fn benchmark_nom_parser_features(c: &mut Criterion) { + let mut group = c.benchmark_group("nom_parser_features"); + + let documents = vec![ + ( + "simple_text", + r#"

Simple text content

"#, + ), + ( + "html_comment", + r#"

Content

"#, + ), + ( + "vars_long", + r#"User agent: $(HTTP_USER_AGENT), Host: $(HTTP_HOST)"#, + ), + ("assign_short", r#""#), + ( + "assign_long", + r#"Some value with $(VAR)"#, + ), + ( + "choose_multiple_when", + r#" + + +

Premium content

+
+ +

Basic content

+
+ +

Regular content

+
+
+ "#, + ), + ( + "expression_comparison", + r#" + High + Medium + Low + "#, + ), + ( + "expression_logical", + r#" + Access granted + Access denied + "#, + ), + ( + "script_tag", + r#"Content"#, + ), + ( + "mixed_content", + r#" +
+ Text before + + Text after + $(VAR) + More text + + + Final text +
+ "#, + ), + ]; + + for (name, xml) in documents { + group.bench_with_input(BenchmarkId::from_parameter(name), &xml, |b, xml| { + b.iter(|| { + let bytes = Bytes::from(*xml); + let result = parse(black_box(&bytes)).unwrap(); + black_box(result); + }); + }); + } + + group.finish(); +} + +fn benchmark_parser_scaling(c: &mut Criterion) { + let mut group = c.benchmark_group("parser_scaling"); + + // Test how parser scales with document size + let sizes = vec![100, 500, 1000, 5000, 10000]; + + for size in sizes { + let mut doc = String::new(); + doc.push_str(""); + + for i in 0..size { + doc.push_str(&format!( + r#"
Item {}
$(VAR_{})"#, + i, i + )); + } + + doc.push_str(""); + + group.bench_with_input( + BenchmarkId::from_parameter(format!("elements_{}", size * 2)), + &doc, + |b, doc| { + b.iter(|| { + let bytes = Bytes::copy_from_slice(doc.as_bytes()); + let result = parse(black_box(&bytes)).unwrap(); + black_box(result); + }); + }, + ); + } + + group.finish(); +} + +fn benchmark_expression_parsing(c: &mut Criterion) { + let mut group = c.benchmark_group("expression_parsing"); + + let expressions = vec![ + ("simple_var", "$(VAR)"), + ("var_with_key", "$(HTTP_COOKIE{name})"), + ("var_with_default", "$(VAR|'default')"), + ("integer", "42"), + ("string", "'hello world'"), + ("comparison_eq", "$(count) == 10"), + ("comparison_ne", "$(status) != 'error'"), + ("comparison_gt", "$(value) > 100"), + ("comparison_lte", "$(score) <= 50"), + ("logical_and", "$(a) == 1 && $(b) == 2"), + ("logical_or", "$(x) == 'yes' || $(y) == 'no'"), + ("negation", "!($(flag))"), + ("grouped", "($(a) == 1) && ($(b) == 2)"), + ( + "complex", + "(($(role) == 'admin') || ($(role) == 'mod')) && $(active) != false", + ), + ("function_call", "$url_encode($(path))"), + ("nested_function", "$base64_encode($url_encode($(text)))"), + ]; + + for (name, expr) in expressions { + group.bench_with_input(BenchmarkId::from_parameter(name), &expr, |b, expr| { + b.iter(|| { + let result = esi::parse_expression(black_box(expr)).unwrap(); + black_box(result); + }); + }); + } + + group.finish(); +} + +fn benchmark_interpolated_strings(c: &mut Criterion) { + let mut group = c.benchmark_group("interpolated_strings"); + + let strings = vec![ + ("no_interpolation", "Just plain text"), + ("single_var", "Hello $(name)"), + ("multiple_vars", "$(first) $(middle) $(last)"), + ( + "mixed_content", + "User: $(user), Email: $(email), Role: $(role)", + ), + ( + "with_html", + "
Welcome $(user)!

Your score: $(score)

", + ), + ]; + + for (name, string) in strings { + group.bench_with_input(BenchmarkId::from_parameter(name), &string, |b, string| { + b.iter(|| { + let bytes = Bytes::from(*string); + let result = esi::interpolated_content(black_box(&bytes)).unwrap(); + black_box(result); + }); + }); + } + + group.finish(); +} + +criterion_group!( + benches, + benchmark_various_esi_documents, + benchmark_nom_parser_features, + benchmark_parser_scaling, + benchmark_expression_parsing, + benchmark_interpolated_strings +); +criterion_main!(benches); diff --git a/esi/src/document.rs b/esi/src/document.rs deleted file mode 100644 index a791377..0000000 --- a/esi/src/document.rs +++ /dev/null @@ -1,116 +0,0 @@ -use std::collections::VecDeque; - -use crate::{PendingFragmentContent, Result}; -use fastly::Request; -use quick_xml::Writer; - -/// Represents a fragment of a document that can be fetched and processed. -/// -/// A `Fragment` contains the necessary information to make a request for a part of a document, -/// handle potential errors, and retrieve the content asynchronously. -/// -/// # Fields -/// -/// * `request` - Metadata of the request. -/// * `alt` - An optional alternate request to send if the original request fails. -/// * `continue_on_error` - Whether to continue processing on error. -/// * `pending_content` - The pending fragment response, which can be polled to retrieve the content. -pub struct Fragment { - // Metadata of the request - pub(crate) request: Request, - // An optional alternate request to send if the original request fails - pub(crate) alt: Option>, - // Whether to continue on error - pub(crate) continue_on_error: bool, - // The pending fragment response, which can be polled to retrieve the content - pub(crate) pending_content: PendingFragmentContent, -} - -/// `Task` is combining raw data and an include fragment for both `attempt` and `except` arms -/// the result is written to `output`. -/// -/// # Fields: -/// -/// * `queue` - A queue of elements to process. -/// * `output` - The writer to write the processed data to. -/// * `status` - The status of the fetch operation. -pub struct Task { - pub queue: VecDeque, - pub output: Writer>, - pub status: FetchState, -} - -impl Default for Task { - fn default() -> Self { - Self { - queue: VecDeque::new(), - output: Writer::new(Vec::new()), - status: FetchState::default(), - } - } -} - -impl Task { - pub fn new() -> Self { - Self::default() - } -} - -/// A section of the pending response, either raw XML data or a pending fragment request. -/// * `Raw` - Raw XML data. -/// * `Include` - A pending fragment request. -/// * `Try` - A try block with an attempt and except task. -/// -pub enum Element { - Raw(Vec), - Include(Box), - Try { - except_task: Box, - attempt_task: Box, - }, -} - -/// The state of a fetch operation. -/// * `Failed` - The request failed with the given status code. -/// * `Pending` - The request is still pending. -/// * `Succeeded` - The request succeeded. -/// -pub enum FetchState { - Failed(Request, u16), - Pending, - Succeeded, -} -impl Clone for FetchState { - fn clone(&self) -> Self { - match self { - Self::Failed(req, res) => Self::Failed(req.clone_without_body(), *res), - Self::Pending => Self::Pending, - Self::Succeeded => Self::Succeeded, - } - } -} -impl Default for FetchState { - fn default() -> Self { - Self::Pending - } -} - -impl std::fmt::Debug for Element { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Raw(_) => write!(f, "Raw"), - Self::Include(fragment) if fragment.alt.is_some() => { - write!(f, "Include Fragment(with alt)") - } - Self::Include(_) => write!(f, "Include Fragment"), - Self::Try { - attempt_task, - except_task, - } => write!( - f, - "Try - Attempt: {:?}, Except: {:?}", - attempt_task.queue, except_task.queue - ), - } - } -} diff --git a/esi/src/error.rs b/esi/src/error.rs index e4e1f14..bbda6e6 100644 --- a/esi/src/error.rs +++ b/esi/src/error.rs @@ -7,8 +7,9 @@ use fastly::http::request::SendError; #[allow(clippy::large_enum_variant)] pub enum ExecutionError { /// Invalid XML was encountered during parsing. - #[error("xml parsing error: {0}")] - XMLError(#[from] quick_xml::Error), + /// (Legacy - not used by nom parser) + // #[error("xml parsing error: {0}")] + // XMLError(#[from] quick_xml::Error), /// The ESI document contains a tag with a missing parameter. #[error("tag `{0}` is missing required parameter `{1}`")] @@ -53,6 +54,10 @@ pub enum ExecutionError { /// An error occurred while executing a function in an eval context #[error("failed to execute a function: `{0}`")] FunctionError(String), + + /// An error occurred during variable assignment (e.g., out of bounds, type mismatch) + #[error("variable assignment error: `{0}`")] + VariableError(String), } pub type Result = std::result::Result; diff --git a/esi/src/expression.rs b/esi/src/expression.rs index a6ffe0a..86f85b0 100644 --- a/esi/src/expression.rs +++ b/esi/src/expression.rs @@ -1,71 +1,219 @@ +use bytes::Bytes; use fastly::http::Method; use fastly::Request; -use log::debug; use regex::RegexBuilder; -use std::borrow::Cow; -use std::fmt::Write; -use std::iter::Peekable; -use std::slice::Iter; -use std::str::Chars; use std::{collections::HashMap, fmt::Display}; -use crate::{functions, ExecutionError, Result}; -/// Attempts to evaluate an interpolated expression, returning None on failure +use crate::{functions, parser_types, ExecutionError, Result}; + +/// Evaluates a nom-parsed expression directly without re-lexing/parsing /// -/// This function evaluates expressions like `$(HTTP_HOST)` in ESI markup, gracefully -/// handling failures by returning None instead of propagating errors. This ensures -/// that a failed expression evaluation does not halt overall document processing. +/// This function takes an expression that was already parsed by the nom parser +/// and evaluates it using the full expression evaluator, supporting all operators, +/// comparisons, and functions. /// /// # Arguments -/// * `cur` - Peekable character iterator containing the expression to evaluate +/// * `expr` - The parsed expression from nom parser /// * `ctx` - Evaluation context containing variables and state /// /// # Returns -/// * `Option` - The evaluated expression value if successful, None if evaluation fails -/// ``` -pub fn try_evaluate_interpolated( - cur: &mut Peekable, - ctx: &mut EvalContext, -) -> Option { - evaluate_interpolated(cur, ctx) - .map_err(|e| { - // We eat the error here because a failed expression should result in an empty result - // and not prevent the rest of the file from processing. - debug!("Error while evaluating interpolated expression: {e}"); - }) - .ok() -} +/// * `Result` - The evaluated expression result or an error +pub fn eval_expr(expr: &parser_types::Expr, ctx: &mut EvalContext) -> Result { + match expr { + parser_types::Expr::Integer(i) => Ok(Value::Integer(*i)), + parser_types::Expr::String(Some(s)) => Ok(Value::Text(Bytes::from(s.clone()))), + parser_types::Expr::String(None) => Ok(Value::Text(Bytes::new())), + parser_types::Expr::Variable(name, key, default) => { + // Evaluate the key expression if present + let evaluated_key = if let Some(key_expr) = key { + let key_result = eval_expr(key_expr, ctx)?; + Some(key_result.to_string()) + } else { + None + }; + + let value = ctx.get_variable(name, evaluated_key.as_deref()); + + // If value is Null and we have a default, evaluate and use the default + if matches!(value, Value::Null) { + if let Some(default_expr) = default { + return eval_expr(default_expr, ctx); + } + } -fn evaluate_interpolated(cur: &mut Peekable, ctx: &mut EvalContext) -> Result { - lex_interpolated_expr(cur) - .and_then(|tokens| parse(&tokens)) - .and_then(|expr| eval_expr(expr, ctx)) + Ok(value) + } + parser_types::Expr::Comparison { + left, + operator, + right, + } => { + let left_val = eval_expr(left, ctx)?; + let right_val = eval_expr(right, ctx)?; + + match operator { + parser_types::Operator::Matches | parser_types::Operator::MatchesInsensitive => { + let test = left_val.to_string(); + let pattern = right_val.to_string(); + + let re = if *operator == parser_types::Operator::Matches { + RegexBuilder::new(&pattern).build()? + } else { + RegexBuilder::new(&pattern).case_insensitive(true).build()? + }; + + if let Some(captures) = re.captures(&test) { + for (i, cap) in captures.iter().enumerate() { + let capval = cap.map_or(Value::Null, |s| { + Value::Text(Bytes::from(s.as_str().to_string())) + }); + ctx.set_variable( + &ctx.match_name.clone(), + Some(&i.to_string()), + capval, + )?; + } + Ok(Value::Boolean(true)) + } else { + Ok(Value::Boolean(false)) + } + } + parser_types::Operator::Has => { + let haystack = left_val.to_string(); + let needle = right_val.to_string(); + Ok(Value::Boolean(haystack.contains(&needle))) + } + parser_types::Operator::HasInsensitive => { + let haystack = left_val.to_string().to_lowercase(); + let needle = right_val.to_string().to_lowercase(); + Ok(Value::Boolean(haystack.contains(&needle))) + } + parser_types::Operator::Equals => { + // Try numeric comparison first, then string comparison + if let (Value::Integer(l), Value::Integer(r)) = (&left_val, &right_val) { + Ok(Value::Boolean(l == r)) + } else { + Ok(Value::Boolean( + left_val.to_string() == right_val.to_string(), + )) + } + } + parser_types::Operator::NotEquals => { + if let (Value::Integer(l), Value::Integer(r)) = (&left_val, &right_val) { + Ok(Value::Boolean(l != r)) + } else { + Ok(Value::Boolean( + left_val.to_string() != right_val.to_string(), + )) + } + } + parser_types::Operator::LessThan => { + if let (Value::Integer(l), Value::Integer(r)) = (&left_val, &right_val) { + Ok(Value::Boolean(l < r)) + } else { + Ok(Value::Boolean(left_val.to_string() < right_val.to_string())) + } + } + parser_types::Operator::LessThanOrEqual => { + if let (Value::Integer(l), Value::Integer(r)) = (&left_val, &right_val) { + Ok(Value::Boolean(l <= r)) + } else { + Ok(Value::Boolean( + left_val.to_string() <= right_val.to_string(), + )) + } + } + parser_types::Operator::GreaterThan => { + if let (Value::Integer(l), Value::Integer(r)) = (&left_val, &right_val) { + Ok(Value::Boolean(l > r)) + } else { + Ok(Value::Boolean(left_val.to_string() > right_val.to_string())) + } + } + parser_types::Operator::GreaterThanOrEqual => { + if let (Value::Integer(l), Value::Integer(r)) = (&left_val, &right_val) { + Ok(Value::Boolean(l >= r)) + } else { + Ok(Value::Boolean( + left_val.to_string() >= right_val.to_string(), + )) + } + } + parser_types::Operator::And => { + Ok(Value::Boolean(left_val.to_bool() && right_val.to_bool())) + } + parser_types::Operator::Or => { + Ok(Value::Boolean(left_val.to_bool() || right_val.to_bool())) + } + } + } + parser_types::Expr::Call(func_name, args) => { + let mut values = Vec::new(); + for arg in args { + values.push(eval_expr(arg, ctx)?); + } + call_dispatch(func_name, &values, ctx) + } + parser_types::Expr::Not(expr) => { + let inner_value = eval_expr(expr, ctx)?; + Ok(Value::Boolean(!inner_value.to_bool())) + } + parser_types::Expr::DictLiteral(pairs) => { + let mut map = HashMap::new(); + for (key_expr, val_expr) in pairs { + let key = eval_expr(key_expr, ctx)?; + let val = eval_expr(val_expr, ctx)?; + map.insert(key.to_string(), val); + } + Ok(Value::Dict(map)) + } + parser_types::Expr::ListLiteral(items) => { + let mut values = Vec::new(); + for item_expr in items { + values.push(eval_expr(item_expr, ctx)?); + } + Ok(Value::List(values)) + } + parser_types::Expr::Interpolated(elements) => { + // Evaluate each element and concatenate the results + // This handles compound expressions like: prefix$(VAR)suffix + let mut result = String::new(); + for element in elements { + match element { + parser_types::Element::Text(text) => { + result.push_str(&String::from_utf8_lossy(text.as_ref())); + } + parser_types::Element::Html(html) => { + result.push_str(&String::from_utf8_lossy(html.as_ref())); + } + parser_types::Element::Expr(expr) => { + let value = eval_expr(expr, ctx)?; + result.push_str(&value.to_string()); + } + parser_types::Element::Esi(_) => { + // ESI tags in interpolated expressions should not happen + // but if they do, ignore them + } + } + } + Ok(Value::Text(Bytes::from(result))) + } + } } /// Evaluates an ESI expression string in the given context /// /// # Arguments /// * `raw_expr` - The raw expression string to evaluate -/// * `ctx` - Evaluation context containing variables and state -/// -/// # Returns -/// * `Result` - The evaluated expression result or an error -/// -pub fn evaluate_expression(raw_expr: &str, ctx: &mut EvalContext) -> Result { - lex_expr(raw_expr) - .and_then(|tokens| parse(&tokens)) - .and_then(|expr: Expr| eval_expr(expr, ctx)) - .map_err(|e| { - ExecutionError::ExpressionError(format!( - "Error occurred during expression evaluation: {e}" - )) - }) -} - pub struct EvalContext { vars: HashMap, match_name: String, request: Request, + response_headers: Vec<(String, String)>, + last_rand: Option, + response_status: Option, + response_body_override: Option, + query_params_cache: std::cell::RefCell>>>, } impl Default for EvalContext { fn default() -> Self { @@ -73,6 +221,11 @@ impl Default for EvalContext { vars: HashMap::new(), match_name: "MATCHES".to_string(), request: Request::new(Method::GET, "http://localhost"), + response_headers: Vec::new(), + last_rand: None, + response_status: None, + response_body_override: None, + query_params_cache: std::cell::RefCell::new(None), } } } @@ -85,8 +238,76 @@ impl EvalContext { vars, match_name: "MATCHES".to_string(), request: Request::new(Method::GET, "http://localhost"), + response_headers: Vec::new(), + last_rand: None, + response_status: None, + response_body_override: None, + query_params_cache: std::cell::RefCell::new(None), + } + } + + pub fn add_response_header(&mut self, name: String, value: String) { + self.response_headers.push((name, value)); + } + + pub fn set_last_rand(&mut self, v: i32) { + self.last_rand = Some(v); + } + + pub fn last_rand(&self) -> Option { + self.last_rand + } + + pub fn response_headers(&self) -> &[(String, String)] { + &self.response_headers + } + + pub fn set_response_status(&mut self, status: i32) { + self.response_status = Some(status); + } + + pub fn response_status(&self) -> Option { + self.response_status + } + + pub fn set_response_body_override(&mut self, body: Option) { + self.response_body_override = body; + } + + pub fn response_body_override(&self) -> Option<&Bytes> { + self.response_body_override.as_ref() + } + + fn parse_query_params(&self) -> HashMap> { + let mut params: HashMap> = HashMap::new(); + + if let Some(query) = self.request.get_query_str() { + for pair in query.split('&') { + if let Some((key, value)) = pair.split_once('=') { + params + .entry(key.to_string()) + .or_default() + .push(Bytes::from(value.to_string())); + } else if !pair.is_empty() { + // Handle keys without values (e.g., ?flag) + params + .entry(pair.to_string()) + .or_default() + .push(Bytes::new()); + } + } } + + params } + + fn get_query_params(&self) -> std::cell::Ref<'_, Option>>> { + if self.query_params_cache.borrow().is_none() { + *self.query_params_cache.borrow_mut() = Some(self.parse_query_params()); + } + self.query_params_cache.borrow() + } + pub fn get_variable(&self, key: &str, subkey: Option<&str>) -> Value { match key { "REQUEST_METHOD" => Value::Text(self.request.get_method_str().to_string().into()), @@ -97,17 +318,43 @@ impl EvalContext { .map_or_else(String::new, |ip| ip.to_string()) .into(), ), - "QUERY_STRING" => self.request.get_query_str().map_or(Value::Null, |query| { - debug!("Query string: {query}"); - subkey.map_or_else( - || Value::Text(Cow::Owned(query.to_string())), - |field| { - self.request - .get_query_parameter(field) - .map_or(Value::Null, |v| Value::Text(Cow::Owned(v.to_string()))) - }, - ) - }), + "QUERY_STRING" => { + let params_ref = self.get_query_params(); + let params = params_ref.as_ref().unwrap(); + + match subkey { + None => { + // Return Dict of all query params when no subkey specified + if params.is_empty() { + Value::Null + } else { + let mut dict = HashMap::new(); + for (key, values) in params.iter() { + let value = match values.len() { + 0 => Value::Null, + 1 => Value::Text(values[0].clone()), + _ => Value::List( + values.iter().map(|v| Value::Text(v.clone())).collect(), + ), + }; + dict.insert(key.clone(), value); + } + Value::Dict(dict) + } + } + Some(field) => { + // Look up the field in parsed params + match params.get(field) { + None => Value::Null, + Some(values) if values.is_empty() => Value::Null, + Some(values) if values.len() == 1 => Value::Text(values[0].clone()), + Some(values) => { + Value::List(values.iter().map(|v| Value::Text(v.clone())).collect()) + } + } + } + } + } _ if key.starts_with("HTTP_") => { let header = key.strip_prefix("HTTP_").unwrap_or_default(); self.request.get_header(header).map_or(Value::Null, |h| { @@ -128,21 +375,34 @@ impl EvalContext { ) }) } - - _ => self - .vars - .get(&format_key(key, subkey)) - .unwrap_or(&Value::Null) - .to_owned(), + _ => { + let stored = self.vars.get(key).cloned().unwrap_or(Value::Null); + match subkey { + None => stored, + Some(sub) => get_subvalue(&stored, sub), + } + } } } - pub fn set_variable(&mut self, key: &str, subkey: Option<&str>, value: Value) { - let key = format_key(key, subkey); - match value { - Value::Null => {} - _ => { - self.vars.insert(key, value); + pub fn set_variable(&mut self, key: &str, subkey: Option<&str>, value: Value) -> Result<()> { + if matches!(value, Value::Null) { + return Ok(()); + } + + match subkey { + None => { + self.vars.insert(key.to_string(), value); + Ok(()) + } + Some(sub) => { + // If variable exists and is a list with numeric subscript, handle list assignment + // Otherwise create/use dict (dicts can have numeric string keys) + let entry = self + .vars + .entry(key.to_string()) + .or_insert_with(|| Value::Dict(HashMap::new())); + set_subvalue(entry, sub, value) } } } @@ -153,6 +413,12 @@ impl EvalContext { pub fn set_request(&mut self, request: Request) { self.request = request; + // Clear cached query params when request changes + *self.query_params_cache.borrow_mut() = None; + } + + pub const fn get_request(&self) -> &Request { + &self.request } } @@ -162,997 +428,242 @@ impl From<[(String, Value); N]> for EvalContext { } } -fn format_key(key: &str, subkey: Option<&str>) -> String { - subkey.map_or_else(|| key.to_string(), |subkey| format!("{key}[{subkey}]")) -} -/// Represents a value in an ESI expression. -/// -/// Values can be of different types: -/// - `Integer`: A 32-bit signed integer -/// - `String`: A UTF-8 string -/// - `Boolean`: A boolean value (true/false) -/// - `Null`: Represents an absence of value -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum Value { - Integer(i32), - Text(Cow<'static, str>), - Boolean(bool), - Null, -} - -impl Value { - pub(crate) fn to_bool(&self) -> bool { - match self { - &Self::Integer(n) => !matches!(n, 0), - Self::Text(s) => !matches!(s, s if s == &String::new()), - Self::Boolean(b) => *b, - &Self::Null => false, +fn get_subvalue(parent: &Value, subkey: &str) -> Value { + if let Ok(idx) = subkey.parse::() { + if let Value::List(items) = parent { + return items.get(idx).cloned().unwrap_or(Value::Null); } } -} - -impl From for Value { - fn from(s: String) -> Self { - Self::Text(Cow::Owned(s)) // Convert `String` to `Cow::Owned` - } -} - -impl From<&str> for Value { - fn from(s: &str) -> Self { - Self::Text(Cow::Owned(s.to_owned())) // Convert `&str` to owned String - } -} -impl AsRef for Value { - fn as_ref(&self) -> &str { - match *self { - Self::Text(ref text) => text.as_ref(), - _ => panic!("Value is not a Text variant"), - } + if let Value::Dict(map) = parent { + return map.get(subkey).cloned().unwrap_or(Value::Null); } -} -impl Display for Value { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Integer(i) => write!(f, "{i}"), - Self::Text(s) => write!(f, "{s}"), - Self::Boolean(b) => write!( - f, - "{}", - match b { - true => "true", - false => "false", - } - ), - Self::Null => write!(f, "null"), - } - } + Value::Null } -fn eval_expr(expr: Expr, ctx: &mut EvalContext) -> Result { - let result = match expr { - Expr::Integer(i) => Value::Integer(i), - Expr::String(s) => Value::Text(s.into()), - Expr::Variable(key, None) => ctx.get_variable(&key, None), - Expr::Variable(key, Some(subkey_expr)) => { - let subkey = eval_expr(*subkey_expr, ctx)?.to_string(); - ctx.get_variable(&key, Some(&subkey)) - } - Expr::Comparison(c) => { - let left = eval_expr(c.left, ctx)?; - let right = eval_expr(c.right, ctx)?; - match c.operator { - Operator::Matches | Operator::MatchesInsensitive => { - let test = left.to_string(); - let pattern = right.to_string(); - - let re = if c.operator == Operator::Matches { - RegexBuilder::new(&pattern).build()? - } else { - RegexBuilder::new(&pattern).case_insensitive(true).build()? - }; - - if let Some(captures) = re.captures(&test) { - for (i, cap) in captures.iter().enumerate() { - let capval = cap.map_or(Value::Null, |s| { - Value::Text(Cow::Owned(s.as_str().into())) - }); - { - ctx.set_variable( - &ctx.match_name.clone(), - Some(&i.to_string()), - capval, - ); - } - } - Value::Boolean(true) - } else { - Value::Boolean(false) - } - } - Operator::Equals => { - // Try numeric comparison first, then string comparison - if let (Value::Integer(l), Value::Integer(r)) = (&left, &right) { - Value::Boolean(l == r) - } else { - Value::Boolean(left.to_string() == right.to_string()) - } - } - Operator::NotEquals => { - if let (Value::Integer(l), Value::Integer(r)) = (&left, &right) { - Value::Boolean(l != r) - } else { - Value::Boolean(left.to_string() != right.to_string()) - } - } - Operator::LessThan => { - if let (Value::Integer(l), Value::Integer(r)) = (&left, &right) { - Value::Boolean(l < r) - } else { - Value::Boolean(left.to_string() < right.to_string()) - } - } - Operator::LessThanOrEqual => { - if let (Value::Integer(l), Value::Integer(r)) = (&left, &right) { - Value::Boolean(l <= r) - } else { - Value::Boolean(left.to_string() <= right.to_string()) - } +fn set_subvalue(parent: &mut Value, subkey: &str, value: Value) -> Result<()> { + // Check if subscript is a numeric index + if let Ok(idx) = subkey.parse::() { + match parent { + Value::List(items) => { + // For existing lists, index must exist - no auto-expansion + if idx >= items.len() { + return Err(ExecutionError::VariableError(format!( + "list index {} out of range (list has {} elements)", + idx, + items.len() + ))); } - Operator::GreaterThan => { - if let (Value::Integer(l), Value::Integer(r)) = (&left, &right) { - Value::Boolean(l > r) - } else { - Value::Boolean(left.to_string() > right.to_string()) - } - } - Operator::GreaterThanOrEqual => { - if let (Value::Integer(l), Value::Integer(r)) = (&left, &right) { - Value::Boolean(l >= r) - } else { - Value::Boolean(left.to_string() >= right.to_string()) - } - } - Operator::And => Value::Boolean(left.to_bool() && right.to_bool()), - Operator::Or => Value::Boolean(left.to_bool() || right.to_bool()), + items[idx] = value; + return Ok(()); } - } - Expr::Call(identifier, args) => { - let mut values = Vec::new(); - for arg in args { - values.push(eval_expr(arg, ctx)?); - } - call_dispatch(&identifier, &values)? - } - Expr::Not(expr) => { - // Evaluate the inner expression and negate its boolean value - let inner_value = eval_expr(*expr, ctx)?; - Value::Boolean(!inner_value.to_bool()) - } - }; - debug!("Expression result: {result:?}"); - Ok(result) -} - -fn call_dispatch(identifier: &str, args: &[Value]) -> Result { - match identifier { - "ping" => Ok(Value::Text("pong".into())), - "lower" => functions::lower(args), - "html_encode" => functions::html_encode(args), - "replace" => functions::replace(args), - _ => Err(ExecutionError::FunctionError(format!( - "unknown function: {identifier}" - ))), - } -} - -#[derive(Debug, Clone, PartialEq)] -enum Expr { - Integer(i32), - String(String), - Variable(String, Option>), - Comparison(Box), - Call(String, Vec), - Not(Box), // Unary negation -} - -#[derive(Debug, Clone, PartialEq)] -enum Operator { - Matches, - MatchesInsensitive, - Equals, - NotEquals, - LessThan, - LessThanOrEqual, - GreaterThan, - GreaterThanOrEqual, - And, - Or, -} - -#[derive(Debug, Clone, PartialEq)] -struct Comparison { - left: Expr, - operator: Operator, - right: Expr, -} -// The parser attempts to implement this BNF: -// -// Expr <- integer | string | Variable | Call | BinaryOp -// Variable <- '$' '(' bareword ['{' Expr '}'] ')' -// Call <- '$' bareword '(' Expr? [',' Expr] ')' -// BinaryOp <- Expr Operator Expr -// -fn parse(tokens: &[Token]) -> Result { - let mut cur = tokens.iter().peekable(); - - let expr = parse_expr(&mut cur) - .map_err(|e| ExecutionError::ExpressionError(format!("parse error: {e}")))?; - - // Check if we've reached the end of the tokens - if cur.peek().is_some() { - let cur_left = cur.fold(String::new(), |mut acc, t| { - write!(&mut acc, "{t:?}").unwrap(); - acc - }); - return Err(ExecutionError::ExpressionError(format!( - "expected eof. tokens left: {cur_left}" - ))); - } - - Ok(expr) -} - -fn parse_expr(cur: &mut Peekable>) -> Result { - println!("Parsing expression, current token: {cur:?}"); - let node = if let Some(token) = cur.next() { - match token { - Token::Integer(i) => Expr::Integer(*i), - Token::String(s) => Expr::String(s.clone()), - Token::Dollar => parse_dollar(cur)?, - Token::Negation => { - // Handle unary negation by parsing the expression that follows - // and wrapping it in a Not expression - let expr = parse_expr(cur)?; - Expr::Not(Box::new(expr)) + Value::Dict(map) => { + // For dicts, numeric indices are just string keys - allow creation + map.insert(subkey.to_string(), value); + return Ok(()); } - Token::OpenParen => { - // Handle parenthesized expressions - let inner_expr = parse_expr(cur)?; - - // Expect a closing parenthesis - if matches!(cur.next(), Some(Token::CloseParen)) { - inner_expr - } else { - return Err(ExecutionError::ExpressionError( - "missing closing parenthesis".to_string(), - )); - } - } - unexpected => { - return Err(ExecutionError::ExpressionError(format!( - "unexpected token starting expression: {unexpected:?}", - ))); + _ => { + // Per ESI spec: cannot create list on the fly + return Err(ExecutionError::VariableError( + "cannot create list on the fly - list must already exist".to_string(), + )); } } - } else { - return Err(ExecutionError::ExpressionError( - "unexpected end of tokens".to_string(), - )); - }; - - // Check if there's a binary operation, or if we've reached the end of the expression - match cur.peek() { - Some(Token::Operation(op)) => { - let operator = op.clone(); - cur.next(); // consume the operator token - let left = node; - let right = parse_expr(cur)?; - let expr = Expr::Comparison(Box::new(Comparison { - left, - operator, - right, - })); - Ok(expr) - } - _ => Ok(node), } -} -fn parse_dollar(cur: &mut Peekable>) -> Result { - match cur.next() { - Some(Token::OpenParen) => parse_variable(cur), - Some(Token::Bareword(s)) => parse_call(s, cur), - unexpected => Err(ExecutionError::ExpressionError(format!( - "unexpected token: {unexpected:?}", - ))), - } -} - -fn parse_variable(cur: &mut Peekable>) -> Result { - let Some(Token::Bareword(basename)) = cur.next() else { - return Err(ExecutionError::ExpressionError(format!( - "unexpected token: {:?}", - cur.next() - ))); - }; - - match cur.next() { - Some(Token::OpenBracket) => { - // Allow bareword as string in subfield position - let subfield = if let Some(Token::Bareword(s)) = cur.peek() { - debug!("Parsing bareword subfield: {s}"); - cur.next(); - Expr::String(s.clone()) - } else { - debug!("Parsing non-bareword subfield, {:?}", cur.peek()); - // Parse the subfield expression - parse_expr(cur)? - }; - - let Some(Token::CloseBracket) = cur.next() else { - return Err(ExecutionError::ExpressionError(format!( - "unexpected token: {:?}", - cur.next() - ))); - }; - - let Some(Token::CloseParen) = cur.next() else { - return Err(ExecutionError::ExpressionError(format!( - "unexpected token: {:?}", - cur.next() - ))); - }; - - Ok(Expr::Variable( - basename.to_string(), - Some(Box::new(subfield)), + // Non-numeric subscript - dictionary key + match parent { + Value::Dict(map) => { + map.insert(subkey.to_string(), value); + Ok(()) + } + Value::List(_) => { + // Per ESI spec: cannot assign string key to a list + Err(ExecutionError::VariableError( + "cannot assign string key to a list".to_string(), )) } - Some(Token::CloseParen) => Ok(Expr::Variable(basename.to_string(), None)), - unexpected => Err(ExecutionError::ExpressionError(format!( - "unexpected token: {unexpected:?}", - ))), - } -} - -fn parse_call(identifier: &str, cur: &mut Peekable>) -> Result { - match cur.next() { - Some(Token::OpenParen) => { - let mut args = Vec::new(); - loop { - if Some(&&Token::CloseParen) == cur.peek() { - cur.next(); - break; - } - args.push(parse_expr(cur)?); - match cur.peek() { - Some(&&Token::CloseParen) => { - cur.next(); - break; - } - Some(&&Token::Comma) => { - cur.next(); - continue; - } - _ => { - return Err(ExecutionError::ExpressionError( - "unexpected token in arg list".to_string(), - )); - } - } - } - Ok(Expr::Call(identifier.to_string(), args)) + _ => { + // Create new dict for non-numeric keys (per ESI spec, dicts can be created on the fly) + let mut map = HashMap::new(); + map.insert(subkey.to_string(), value); + *parent = Value::Dict(map); + Ok(()) } - _ => Err(ExecutionError::ExpressionError( - "unexpected token following identifier".to_string(), - )), } } -#[derive(Debug, Clone, PartialEq)] -enum Token { +/// Represents a value in an ESI expression. +/// +/// Values can be of different types: +/// - `Integer`: A 32-bit signed integer +/// - `String`: A UTF-8 string +/// - `Boolean`: A boolean value (true/false) +/// - `List`: A list of values (also used for dict iteration as 2-element lists) +/// - `Dict`: A dictionary/map of string keys to values +/// - `Null`: Represents an absence of value +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Value { Integer(i32), - String(String), - OpenParen, - CloseParen, - OpenBracket, - CloseBracket, - Comma, - Dollar, - Operation(Operator), - Negation, - Bareword(String), -} - -fn lex_expr(expr: &str) -> Result> { - let mut cur = expr.chars().peekable(); - // Lex the expression, but don't stop at the first closing paren - let single = false; - lex_tokens(&mut cur, single) -} - -fn lex_interpolated_expr(cur: &mut Peekable) -> Result> { - if cur.peek() != Some(&'$') { - return Err(ExecutionError::ExpressionError("no expression".to_string())); - } - // Lex the expression, but stop at the first closing paren - let single = true; - lex_tokens(cur, single) + Text(Bytes), + Boolean(bool), + List(Vec), + Dict(HashMap), + Null, } -// Lexes an expression, stopping at the first closing paren if `single` is true -fn lex_tokens(cur: &mut Peekable, single: bool) -> Result> { - let mut result = Vec::new(); - let mut paren_depth = 0; - - while let Some(&c) = cur.peek() { - match c { - '\'' => { - cur.next(); - result.push(get_string(cur)?); - } - '$' => { - cur.next(); - result.push(Token::Dollar); - } - '0'..='9' | '-' => { - result.push(get_integer(cur)?); - } - 'a'..='z' | 'A'..='Z' => { - let bareword = get_bareword(cur); - - // Check if it's an operator - if let Token::Bareword(ref word) = bareword { - match word.as_str() { - "matches" => result.push(Token::Operation(Operator::Matches)), - "matches_i" => result.push(Token::Operation(Operator::MatchesInsensitive)), - _ => result.push(bareword), - } - } else { - result.push(get_bareword(cur)); - } - } - '(' | ')' | '{' | '}' | ',' => { - cur.next(); - match c { - '(' => { - result.push(Token::OpenParen); - paren_depth += 1; - } - ')' => { - result.push(Token::CloseParen); - paren_depth -= 1; - if single && paren_depth <= 0 { - break; - } - } - '{' => result.push(Token::OpenBracket), - '}' => result.push(Token::CloseBracket), - ',' => result.push(Token::Comma), - _ => unreachable!(), - } - } - '=' => { - cur.next(); // consume the first '=' - if cur.peek() == Some(&'=') { - cur.next(); // consume the second '=' - result.push(Token::Operation(Operator::Equals)); - } else { - return Err(ExecutionError::ExpressionError( - "single '=' not supported, use '==' for equality".to_string(), - )); - } - } - '!' => { - cur.next(); // consume first '!' - if cur.peek() == Some(&'=') { - cur.next(); // consume the '=' - result.push(Token::Operation(Operator::NotEquals)); - } else { - result.push(Token::Negation); - } - } - '&' => { - cur.next(); // consume first '&' - if cur.peek() == Some(&'&') { - cur.next(); // consume the second '&' - result.push(Token::Operation(Operator::And)); - } else { - return Err(ExecutionError::ExpressionError( - "single '&' not supported, use '&&' for logical AND".to_string(), - )); - } - } - '|' => { - cur.next(); // consume first '|' - if cur.peek() == Some(&'|') { - cur.next(); // consume the second '|' - result.push(Token::Operation(Operator::Or)); - } else { - return Err(ExecutionError::ExpressionError( - "single '|' not supported, use '||' for logical OR".to_string(), - )); - } - } - '<' => { - cur.next(); - if cur.peek() == Some(&'=') { - cur.next(); - result.push(Token::Operation(Operator::LessThanOrEqual)); - } else { - result.push(Token::Operation(Operator::LessThan)); - } - } - '>' => { - cur.next(); - if cur.peek() == Some(&'=') { - cur.next(); - result.push(Token::Operation(Operator::GreaterThanOrEqual)); +impl Value { + pub(crate) fn to_bool(&self) -> bool { + match self { + &Self::Integer(n) => !matches!(n, 0), + Self::Text(s) => !s.is_empty(), + Self::Boolean(b) => *b, + Self::List(items) => !items.is_empty(), + Self::Dict(map) => !map.is_empty(), + &Self::Null => false, + } + } + + /// Convert Value to Bytes - zero-copy for Text variant + pub(crate) fn to_bytes(&self) -> Bytes { + match self { + Self::Integer(i) => Bytes::from(i.to_string()), + Self::Text(b) => b.clone(), // Cheap refcount increment + Self::Boolean(b) => { + if *b { + Bytes::from_static(b"true") } else { - result.push(Token::Operation(Operator::GreaterThan)); + Bytes::from_static(b"false") } } - ' ' => { - cur.next(); // Ignore spaces - } - _ => { - return Err(ExecutionError::ExpressionError( - // "error in lexing interpolated".to_string(), - format!("error in lexing interpolated `{c}`"), - )); - } + Self::List(items) => Bytes::from(items_to_string(items)), + Self::Dict(map) => Bytes::from(dict_to_string(map)), + Self::Null => Bytes::new(), } } - // We should have hit the end of the expression - if paren_depth != 0 { - return Err(ExecutionError::ExpressionError( - "missing closing parenthesis".to_string(), - )); - } - - Ok(result) } -fn get_integer(cur: &mut Peekable) -> Result { - let mut buf = Vec::new(); - let c = cur.next().unwrap(); - buf.push(c); - - if c == '0' { - // Zero is a special case, as the only number that can start with a zero. - let Some(c) = cur.peek() else { - cur.next(); - // EOF after a zero. That's a valid number. - return Ok(Token::Integer(0)); - }; - // Make sure the zero isn't followed by another digit. - if let '0'..='9' = *c { - return Err(ExecutionError::ExpressionError( - "invalid number".to_string(), - )); - } +impl From for Value { + fn from(s: String) -> Self { + Self::Text(Bytes::from(s)) } +} - if c == '-' { - let Some(c) = cur.next() else { - return Err(ExecutionError::ExpressionError( - "invalid number".to_string(), - )); - }; - match c { - '1'..='9' => buf.push(c), - _ => { - return Err(ExecutionError::ExpressionError( - "invalid number".to_string(), - )) - } - } +impl From<&str> for Value { + fn from(s: &str) -> Self { + // Copy the string data into a Bytes buffer + // This is necessary because we can't guarantee the lifetime of &str + Self::Text(Bytes::copy_from_slice(s.as_bytes())) } +} - while let Some(c) = cur.peek() { - match c { - '0'..='9' => buf.push(cur.next().unwrap()), - _ => break, - } +impl From for Value { + fn from(b: Bytes) -> Self { + Self::Text(b) } - let Ok(num) = buf.into_iter().collect::().parse() else { - return Err(ExecutionError::ExpressionError( - "invalid number".to_string(), - )); - }; - Ok(Token::Integer(num)) } -fn get_bareword(cur: &mut Peekable) -> Token { - let mut buf = Vec::new(); - buf.push(cur.next().unwrap()); - - while let Some(c) = cur.peek() { - match c { - 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' => buf.push(cur.next().unwrap()), - _ => break, +impl Display for Value { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Integer(i) => write!(f, "{}", i), + Self::Text(b) => write!(f, "{}", String::from_utf8_lossy(b.as_ref())), + Self::Boolean(b) => write!(f, "{}", if *b { "true" } else { "false" }), + Self::List(items) => write!(f, "{}", items_to_string(items)), + Self::Dict(map) => write!(f, "{}", dict_to_string(map)), + Self::Null => Ok(()), // Empty string for Null } } - Token::Bareword(buf.into_iter().collect()) } -fn get_string(cur: &mut Peekable) -> Result { - let mut buf = Vec::new(); - let mut triple_tick = false; - - if cur.peek() == Some(&'\'') { - // This is either an empty string, or the start of a triple tick string - cur.next(); - if cur.peek() == Some(&'\'') { - // It's a triple tick string - triple_tick = true; - cur.next(); - } else { - // It's an empty string, let's just return it - return Ok(Token::String(String::new())); +fn items_to_string(items: &[Value]) -> String { + let mut out = String::new(); + for (i, v) in items.iter().enumerate() { + if i > 0 { + out.push(','); } + out.push_str(&v.to_string()); } + out +} - while let Some(c) = cur.next() { - match c { - '\'' => { - if !triple_tick { - break; - } - if let Some(c2) = cur.next() { - if c2 == '\'' && cur.peek() == Some(&'\'') { - // End of a triple tick string - cur.next(); - break; - } - // Just two ticks - buf.push(c); - buf.push(c2); - } else { - // error - return Err(ExecutionError::ExpressionError( - "unexpected eof while parsing string".to_string(), - )); - } - } - '\\' => { - if triple_tick { - // no escaping inside a triple tick string - buf.push(c); - } else { - // in a normal string, we'll ignore this and buffer the - // next char - if let Some(escaped_c) = cur.next() { - buf.push(escaped_c); - } else { - // error - return Err(ExecutionError::ExpressionError( - "unexpected eof while parsing string".to_string(), - )); - } - } - } - _ => buf.push(c), - } +fn dict_to_string(map: &HashMap) -> String { + let mut parts: Vec<_> = map.iter().map(|(k, v)| format!("{k}={}", v)).collect(); + parts.sort(); + parts.join("&") +} + +fn call_dispatch(identifier: &str, args: &[Value], ctx: &mut EvalContext) -> Result { + match identifier { + "ping" => Ok(Value::Text("pong".into())), + "lower" => functions::lower(args), + "upper" => functions::upper(args), + "html_encode" => functions::html_encode(args), + "html_decode" => functions::html_decode(args), + "convert_to_unicode" => functions::convert_to_unicode(args), + "convert_from_unicode" => functions::convert_from_unicode(args), + "replace" => functions::replace(args), + "str" => functions::to_str(args), + "lstrip" => functions::lstrip(args), + "rstrip" => functions::rstrip(args), + "strip" => functions::strip(args), + "substr" => functions::substr(args), + "dollar" => functions::dollar(args), + "dquote" => functions::dquote(args), + "squote" => functions::squote(args), + "base64_encode" => functions::base64_encode(args), + "url_encode" => functions::url_encode(args), + "url_decode" => functions::url_decode(args), + "exists" => functions::exists(args), + "is_empty" => functions::is_empty(args), + "string_split" => functions::string_split(args), + "join" => functions::join(args), + "list_delitem" => functions::list_delitem(args), + "int" => functions::int(args), + "len" => functions::len(args), + "index" => functions::index(args), + "rindex" => functions::rindex(args), + "md5_digest" => functions::md5_digest(args), + "bin_int" => functions::bin_int(args), + "time" => functions::time(args), + "http_time" => functions::http_time(args), + "strftime" => functions::strftime(args), + "rand" => functions::rand(args, ctx), + "last_rand" => functions::last_rand(args, ctx), + "add_header" => functions::add_header(args, ctx), + "set_response_code" => functions::set_response_code(args, ctx), + "set_redirect" => functions::set_redirect(args, ctx), + _ => Err(ExecutionError::FunctionError(format!( + "unknown function: {identifier}" + ))), } - Ok(Token::String(buf.into_iter().collect())) } #[cfg(test)] mod tests { use super::*; - use regex::Regex; - - #[test] - fn test_lex_integer() -> Result<()> { - let tokens = lex_expr("1 23 456789 0 -987654 -32 -1 0")?; - assert_eq!( - tokens, - vec![ - Token::Integer(1), - Token::Integer(23), - Token::Integer(456789), - Token::Integer(0), - Token::Integer(-987654), - Token::Integer(-32), - Token::Integer(-1), - Token::Integer(0) - ] - ); - Ok(()) - } - #[test] - fn test_lex_empty_string() -> Result<()> { - let tokens = lex_expr("''")?; - assert_eq!(tokens, vec![Token::String("".to_string())]); - Ok(()) - } - #[test] - fn test_lex_simple_string() -> Result<()> { - let tokens = lex_expr("'hello'")?; - assert_eq!(tokens, vec![Token::String("hello".to_string())]); - Ok(()) - } - #[test] - fn test_lex_escaped_string() -> Result<()> { - let tokens = lex_expr(r#"'hel\'lo'"#)?; - assert_eq!(tokens, vec![Token::String("hel\'lo".to_string())]); - Ok(()) - } - #[test] - fn test_lex_triple_tick_string() -> Result<()> { - let tokens = lex_expr(r#"'''h'el''l\'o\'''"#)?; - assert_eq!(tokens, vec![Token::String(r#"h'el''l\'o\"#.to_string())]); - Ok(()) - } - #[test] - fn test_lex_triple_tick_and_escaping_torture() -> Result<()> { - let tokens = lex_expr(r#"'\\\'triple\'/' matches '''\'triple'/'''"#)?; - assert_eq!(tokens[0], tokens[2]); - let Token::String(ref test) = tokens[0] else { - panic!() - }; - let Token::String(ref pattern) = tokens[2] else { - panic!() - }; - let re = Regex::new(pattern)?; - assert!(re.is_match(test)); - Ok(()) - } - - #[test] - fn test_lex_variable() -> Result<()> { - let tokens = lex_expr("$(hello)")?; - assert_eq!( - tokens, - vec![ - Token::Dollar, - Token::OpenParen, - Token::Bareword("hello".to_string()), - Token::CloseParen - ] - ); - Ok(()) - } - #[test] - fn test_lex_variable_with_subscript() -> Result<()> { - let tokens = lex_expr("$(hello{'goodbye'})")?; - assert_eq!( - tokens, - vec![ - Token::Dollar, - Token::OpenParen, - Token::Bareword("hello".to_string()), - Token::OpenBracket, - Token::String("goodbye".to_string()), - Token::CloseBracket, - Token::CloseParen, - ] - ); - Ok(()) - } - #[test] - fn test_lex_variable_with_integer_subscript() -> Result<()> { - let tokens = lex_expr("$(hello{6})")?; - assert_eq!( - tokens, - vec![ - Token::Dollar, - Token::OpenParen, - Token::Bareword("hello".to_string()), - Token::OpenBracket, - Token::Integer(6), - Token::CloseBracket, - Token::CloseParen, - ] - ); - Ok(()) - } - #[test] - fn test_lex_matches_operator() -> Result<()> { - let tokens = lex_expr("matches")?; - assert_eq!(tokens, vec![Token::Operation(Operator::Matches)]); - Ok(()) - } - #[test] - fn test_lex_matches_i_operator() -> Result<()> { - let tokens = lex_expr("matches_i")?; - assert_eq!(tokens, vec![Token::Operation(Operator::MatchesInsensitive)]); - Ok(()) - } - #[test] - fn test_lex_identifier() -> Result<()> { - let tokens = lex_expr("$foo2BAZ")?; - assert_eq!( - tokens, - vec![Token::Dollar, Token::Bareword("foo2BAZ".to_string())] - ); - Ok(()) - } - #[test] - fn test_lex_simple_call() -> Result<()> { - let tokens = lex_expr("$fn()")?; - assert_eq!( - tokens, - vec![ - Token::Dollar, - Token::Bareword("fn".to_string()), - Token::OpenParen, - Token::CloseParen - ] - ); - Ok(()) - } - #[test] - fn test_lex_call_with_arg() -> Result<()> { - let tokens = lex_expr("$fn('hello')")?; - assert_eq!( - tokens, - vec![ - Token::Dollar, - Token::Bareword("fn".to_string()), - Token::OpenParen, - Token::String("hello".to_string()), - Token::CloseParen - ] - ); - Ok(()) - } - #[test] - fn test_lex_call_with_empty_string_arg() -> Result<()> { - let tokens = lex_expr("$fn('')")?; - assert_eq!( - tokens, - vec![ - Token::Dollar, - Token::Bareword("fn".to_string()), - Token::OpenParen, - Token::String("".to_string()), - Token::CloseParen - ] - ); - Ok(()) - } - #[test] - fn test_lex_call_with_two_args() -> Result<()> { - let tokens = lex_expr("$fn($(hello), 'hello')")?; - assert_eq!( - tokens, - vec![ - Token::Dollar, - Token::Bareword("fn".to_string()), - Token::OpenParen, - Token::Dollar, - Token::OpenParen, - Token::Bareword("hello".to_string()), - Token::CloseParen, - Token::Comma, - Token::String("hello".to_string()), - Token::CloseParen - ] - ); - Ok(()) - } - #[test] - fn test_lex_comparison() -> Result<()> { - let tokens = lex_expr("$(foo) matches 'bar'")?; - assert_eq!( - tokens, - vec![ - Token::Dollar, - Token::OpenParen, - Token::Bareword("foo".to_string()), - Token::CloseParen, - Token::Operation(Operator::Matches), - Token::String("bar".to_string()) - ] - ); - Ok(()) - } - - #[test] - fn test_parse_integer() -> Result<()> { - let tokens = lex_expr("1")?; - let expr = parse(&tokens)?; - assert_eq!(expr, Expr::Integer(1)); - Ok(()) - } - #[test] - fn test_parse_simple_string() -> Result<()> { - let tokens = lex_expr("'hello'")?; - let expr = parse(&tokens)?; - assert_eq!(expr, Expr::String("hello".to_string())); - Ok(()) - } - #[test] - fn test_parse_variable() -> Result<()> { - let tokens = lex_expr("$(hello)")?; - let expr = parse(&tokens)?; - assert_eq!(expr, Expr::Variable("hello".to_string(), None)); - Ok(()) - } - - #[test] - fn test_parse_comparison() -> Result<()> { - let tokens = lex_expr("$(foo) matches 'bar'")?; - let expr = parse(&tokens)?; - assert_eq!( - expr, - Expr::Comparison(Box::new(Comparison { - left: Expr::Variable("foo".to_string(), None), - operator: Operator::Matches, - right: Expr::String("bar".to_string()), - })) - ); - Ok(()) - } - #[test] - fn test_parse_call() -> Result<()> { - let tokens = lex_expr("$hello()")?; - let expr = parse(&tokens)?; - assert_eq!(expr, Expr::Call("hello".to_string(), Vec::new())); - Ok(()) - } - #[test] - fn test_parse_call_with_arg() -> Result<()> { - let tokens = lex_expr("$fn('hello')")?; - let expr = parse(&tokens)?; - assert_eq!( - expr, - Expr::Call("fn".to_string(), vec![Expr::String("hello".to_string())]) - ); - Ok(()) - } - #[test] - fn test_parse_call_with_two_args() -> Result<()> { - let tokens = lex_expr("$fn($(hello), 'hello')")?; - let expr = parse(&tokens)?; - assert_eq!( - expr, - Expr::Call( - "fn".to_string(), - vec![ - Expr::Variable("hello".to_string(), None), - Expr::String("hello".to_string()) - ] - ) - ); - Ok(()) - } - #[test] - fn test_eval_string() -> Result<()> { - let expr = Expr::String("hello".to_string()); - let result = eval_expr(expr, &mut EvalContext::new())?; - assert_eq!(result, Value::Text("hello".into())); - Ok(()) + // Helper function for testing expression evaluation + // Parses and evaluates a raw expression string + // + // # Arguments + // * `raw_expr` - Raw expression string to evaluate + // * `ctx` - Evaluation context containing variables and state + // + // # Returns + // * `Result` - The evaluated expression result or an error + fn evaluate_expression(raw_expr: &str, ctx: &mut EvalContext) -> Result { + let (_, expr) = crate::parser::parse_expression(raw_expr).map_err(|e| { + ExecutionError::ExpressionError(format!("Failed to parse expression: {e}")) + })?; + eval_expr(&expr, ctx).map_err(|e| { + ExecutionError::ExpressionError(format!( + "Error occurred during expression evaluation: {e}" + )) + }) } - #[test] - fn test_eval_variable() -> Result<()> { - let expr = Expr::Variable("hello".to_string(), None); - let result = eval_expr( - expr, - &mut EvalContext::from([("hello".to_string(), Value::Text("goodbye".into()))]), - )?; - assert_eq!(result, Value::Text("goodbye".into())); - Ok(()) - } - #[test] - fn test_eval_subscripted_variable() -> Result<()> { - let expr = Expr::Variable( - "hello".to_string(), - Some(Box::new(Expr::String("abc".to_string()))), - ); - let result = eval_expr( - expr, - &mut EvalContext::from([("hello[abc]".to_string(), Value::Text("goodbye".into()))]), - )?; - assert_eq!(result, Value::Text("goodbye".into())); - Ok(()) - } #[test] fn test_eval_matches_comparison() -> Result<()> { let result = evaluate_expression( @@ -1248,6 +759,110 @@ mod tests { Ok(()) } + #[test] + fn test_context_nested_vars() { + let mut ctx = EvalContext::new(); + ctx.set_variable("foo", Some("bar"), Value::Text("baz".into())) + .unwrap(); + assert_eq!( + ctx.get_variable("foo", Some("bar")), + Value::Text("baz".into()) + ); + + // Per ESI spec: must create list first, then assign to indices + ctx.set_variable( + "arr", + None, + Value::List(vec![Value::Null, Value::Null, Value::Null]), + ) + .unwrap(); + ctx.set_variable("arr", Some("0"), Value::Integer(1)) + .unwrap(); + ctx.set_variable("arr", Some("2"), Value::Integer(3)) + .unwrap(); + + match ctx.get_variable("arr", None) { + Value::List(items) => { + assert_eq!(items.len(), 3); + assert_eq!(items[0], Value::Integer(1)); + assert_eq!(items[1], Value::Null); + assert_eq!(items[2], Value::Integer(3)); + } + other => panic!("Unexpected value: {:?}", other), + } + + assert_eq!(ctx.get_variable("arr", Some("1")), Value::Null); + assert_eq!(ctx.get_variable("arr", Some("2")), Value::Integer(3)); + } + + #[test] + fn test_list_index_out_of_bounds() { + let mut ctx = EvalContext::new(); + // Create a list with 3 elements + ctx.set_variable( + "colors", + None, + Value::List(vec![ + Value::Text("red".into()), + Value::Text("blue".into()), + Value::Text("green".into()), + ]), + ) + .unwrap(); + + // Trying to assign to index 3 should fail (only indices 0, 1, 2 exist) + let result = ctx.set_variable("colors", Some("3"), Value::Text("yellow".into())); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("out of range")); + } + + #[test] + fn test_cannot_assign_string_key_to_list() { + let mut ctx = EvalContext::new(); + // Create a list + ctx.set_variable( + "mylist", + None, + Value::List(vec![Value::Integer(1), Value::Integer(2)]), + ) + .unwrap(); + + // Trying to assign a string key to a list should fail + let result = ctx.set_variable("mylist", Some("foo"), Value::Text("bar".into())); + assert!(result.is_err()); + assert!(result + .unwrap_err() + .to_string() + .contains("cannot assign string key to a list")); + } + + #[test] + fn test_dict_created_on_fly() { + let mut ctx = EvalContext::new(); + // Assign to non-existent variable with string key - should create dict + ctx.set_variable("ages", Some("bob"), Value::Integer(34)) + .unwrap(); + ctx.set_variable("ages", Some("joan"), Value::Integer(28)) + .unwrap(); + + // Verify retrieval + let bob_age = ctx.get_variable("ages", Some("bob")); + assert_eq!(bob_age, Value::Integer(34), "Should retrieve bob's age"); + + let joan_age = ctx.get_variable("ages", Some("joan")); + assert_eq!(joan_age, Value::Integer(28), "Should retrieve joan's age"); + + // Verify the dict itself + let ages_dict = ctx.get_variable("ages", None); + if let Value::Dict(map) = ages_dict { + assert_eq!(map.len(), 2, "Dict should have 2 keys"); + assert_eq!(map.get("bob"), Some(&Value::Integer(34))); + assert_eq!(map.get("joan"), Some(&Value::Integer(28))); + } else { + panic!("ages should be a Dict, got {:?}", ages_dict); + } + } + #[test] fn test_eval_get_request_method() -> Result<()> { let mut ctx = EvalContext::new(); @@ -1255,6 +870,35 @@ mod tests { assert_eq!(result, Value::Text("GET".into())); Ok(()) } + + #[test] + fn test_nested_lists() -> Result<()> { + let mut ctx = EvalContext::new(); + // Test nested list literal: [ 'one', [ 'a', 'x', 'c' ], 'three' ] + let result = evaluate_expression("[ 'one', [ 'a', 'x', 'c' ], 'three' ]", &mut ctx)?; + + match result { + Value::List(items) => { + assert_eq!(items.len(), 3); + assert_eq!(items[0], Value::Text("one".into())); + assert_eq!(items[2], Value::Text("three".into())); + + // Check nested list + match &items[1] { + Value::List(nested) => { + assert_eq!(nested.len(), 3); + assert_eq!(nested[0], Value::Text("a".into())); + assert_eq!(nested[1], Value::Text("x".into())); + assert_eq!(nested[2], Value::Text("c".into())); + } + other => panic!("Expected nested list, got {:?}", other), + } + } + other => panic!("Expected list, got {:?}", other), + } + Ok(()) + } + #[test] fn test_eval_get_request_path() -> Result<()> { let mut ctx = EvalContext::new(); @@ -1270,7 +914,14 @@ mod tests { ctx.set_request(Request::new(Method::GET, "http://localhost?hello")); let result = evaluate_expression("$(QUERY_STRING)", &mut ctx)?; - assert_eq!(result, Value::Text("hello".into())); + // Should return Dict with one entry: "hello" -> empty Text + match result { + Value::Dict(map) => { + assert_eq!(map.len(), 1); + assert_eq!(map.get("hello"), Some(&Value::Text(Bytes::new()))); + } + other => panic!("Expected Dict, got {:?}", other), + } Ok(()) } #[test] @@ -1296,6 +947,60 @@ mod tests { Ok(()) } #[test] + fn test_eval_get_request_query_duplicate_params() -> Result<()> { + let mut ctx = EvalContext::new(); + ctx.set_request(Request::new( + Method::GET, + "http://localhost?x=1&x=2&x=3&y=single", + )); + + // Multiple values for 'x' should return a List + let result = evaluate_expression("$(QUERY_STRING{x})", &mut ctx)?; + match result { + Value::List(items) => { + assert_eq!(items.len(), 3); + assert_eq!(items[0], Value::Text("1".into())); + assert_eq!(items[1], Value::Text("2".into())); + assert_eq!(items[2], Value::Text("3".into())); + } + other => panic!("Expected List, got {:?}", other), + } + + // Single value for 'y' should return Text + let result = evaluate_expression("$(QUERY_STRING{y})", &mut ctx)?; + assert_eq!(result, Value::Text("single".into())); + + // No subkey should return Dict with all params + let result = evaluate_expression("$(QUERY_STRING)", &mut ctx)?; + + // Verify stringification uses & separator (clone before match to avoid borrow issues) + let stringified = result.to_string(); + assert!(stringified.contains("&")); + // The list [1,2,3] stringifies as "1,2,3", so we get "x=1,2,3&y=single" (or reversed due to HashMap) + assert!(stringified == "x=1,2,3&y=single" || stringified == "y=single&x=1,2,3"); + + match result { + Value::Dict(map) => { + assert_eq!(map.len(), 2); + // 'x' should be a list + match map.get("x") { + Some(Value::List(items)) => { + assert_eq!(items.len(), 3); + assert_eq!(items[0], Value::Text("1".into())); + assert_eq!(items[1], Value::Text("2".into())); + assert_eq!(items[2], Value::Text("3".into())); + } + other => panic!("Expected List for 'x', got {:?}", other), + } + // 'y' should be text + assert_eq!(map.get("y"), Some(&Value::Text("single".into()))); + } + other => panic!("Expected Dict, got {:?}", other), + } + + Ok(()) + } + #[test] fn test_eval_get_remote_addr() -> Result<()> { // This is kind of a useless test as this will always return an empty string. let mut ctx = EvalContext::new(); @@ -1404,116 +1109,25 @@ mod tests { assert_eq!(Value::Integer(0).to_string(), "0"); assert_eq!(Value::Text("".into()).to_string(), ""); assert_eq!(Value::Text("hello".into()).to_string(), "hello"); - assert_eq!(Value::Null.to_string(), "null"); - - Ok(()) - } - #[test] - fn test_lex_interpolated_basic() -> Result<()> { - let mut chars = "$(foo)bar".chars().peekable(); - let tokens = lex_interpolated_expr(&mut chars)?; - assert_eq!( - tokens, - vec![ - Token::Dollar, - Token::OpenParen, - Token::Bareword("foo".to_string()), - Token::CloseParen - ] - ); - // Verify remaining chars are untouched - assert_eq!(chars.collect::(), "bar"); - Ok(()) - } + assert_eq!(Value::Null.to_string(), ""); // Null converts to empty string - #[test] - fn test_lex_interpolated_nested() -> Result<()> { - let mut chars = "$(foo{$(bar)})rest".chars().peekable(); - let tokens = lex_interpolated_expr(&mut chars)?; - assert_eq!( - tokens, - vec![ - Token::Dollar, - Token::OpenParen, - Token::Bareword("foo".to_string()), - Token::OpenBracket, - Token::Dollar, - Token::OpenParen, - Token::Bareword("bar".to_string()), - Token::CloseParen, - Token::CloseBracket, - Token::CloseParen - ] - ); - assert_eq!(chars.collect::(), "rest"); Ok(()) } - - #[test] - fn test_lex_interpolated_no_dollar() { - let mut chars = "foo".chars().peekable(); - assert!(lex_interpolated_expr(&mut chars).is_err()); - } - - #[test] - fn test_lex_interpolated_incomplete() { - let mut chars = "$(foo".chars().peekable(); - assert!(lex_interpolated_expr(&mut chars).is_err()); - } - - #[test] - fn test_var_subfield_missing_closing_bracket() { - let input = r#" - - $(QUERY_STRING{param) - - "#; - let mut chars = input.chars().peekable(); - assert!(lex_interpolated_expr(&mut chars).is_err()); - } - - #[test] - fn test_invalid_standalone_bareword() { - let input = r#" - - bareword - - "#; - let mut chars = input.chars().peekable(); - assert!(lex_interpolated_expr(&mut chars).is_err()); - } - - #[test] - fn test_mixed_subfield_types() { - let input = r#"$(QUERY_STRING{param})"#; - let mut chars = input.chars().peekable(); - // let result = - // evaluate_interpolated(&mut chars, &mut ctx).expect("Processing should succeed"); - let result = lex_interpolated_expr(&mut chars).expect("Processing should succeed"); - println!("Tokens: {result:?}"); - assert_eq!( - result, - vec![ - Token::Dollar, - Token::OpenParen, - Token::Bareword("QUERY_STRING".into()), - Token::OpenBracket, - Token::Bareword("param".into()), - Token::CloseBracket, - Token::CloseParen - ] - ); - } - #[test] fn test_get_variable_query_string() { let mut ctx = EvalContext::new(); let req = Request::new(Method::GET, "http://localhost?param=value"); ctx.set_request(req); - // Test without subkey + // Test without subkey - should return Dict let result = ctx.get_variable("QUERY_STRING", None); - assert_eq!(result, Value::Text("param=value".into())); + match result { + Value::Dict(map) => { + assert_eq!(map.len(), 1); + assert_eq!(map.get("param"), Some(&Value::Text("value".into()))); + } + other => panic!("Expected Dict, got {:?}", other), + } // Test with subkey let result = ctx.get_variable("QUERY_STRING", Some("param")); diff --git a/esi/src/functions.rs b/esi/src/functions.rs index 04be031..7ba16c0 100644 --- a/esi/src/functions.rs +++ b/esi/src/functions.rs @@ -1,118 +1,1641 @@ -use crate::{expression::Value, ExecutionError, Result}; -use std::convert::TryFrom; +use crate::{expression::EvalContext, expression::Value, ExecutionError, Result}; +use base64::{engine::general_purpose::STANDARD, Engine as _}; +use bytes::Bytes; +use chrono::{DateTime, Utc}; +use percent_encoding::{percent_decode_str, utf8_percent_encode, NON_ALPHANUMERIC}; +use rand::Rng; +use std::time::{SystemTime, UNIX_EPOCH}; pub fn lower(args: &[Value]) -> Result { if args.len() != 1 { + return Err(ExecutionError::FunctionError(format!( + "lower: expected 1 argument, got {}", + args.len() + ))); + } + + // If the argument is Null, return Null (don't convert to "null" string) + if matches!(args[0], Value::Null) { + return Ok(Value::Null); + } + + // Fast path: mutate a copy of the bytes in-place for ASCII lowering to avoid String allocs + if let Value::Text(bytes) = &args[0] { + let mut buf = bytes.to_vec(); + for b in &mut buf { + *b = b.to_ascii_lowercase(); + } + return Ok(Value::Text(buf.into())); + } + + Ok(Value::Text(args[0].to_string().to_lowercase().into())) +} + +pub fn html_encode(args: &[Value]) -> Result { + if args.len() != 1 { + return Err(ExecutionError::FunctionError(format!( + "html_encode: expected 1 argument, got {}", + args.len() + ))); + } + + let encoded = + html_escape::encode_double_quoted_attribute(args[0].to_string().as_str()).to_string(); + Ok(Value::Text(encoded.into())) +} + +pub fn html_decode(args: &[Value]) -> Result { + if args.len() != 1 { + return Err(ExecutionError::FunctionError(format!( + "html_decode: expected 1 argument, got {}", + args.len() + ))); + } + + let decoded = html_escape::decode_html_entities(args[0].to_string().as_str()).to_string(); + Ok(Value::Text(decoded.into())) +} + +pub fn convert_to_unicode(args: &[Value]) -> Result { + if args.len() != 1 { + return Err(ExecutionError::FunctionError(format!( + "convert_to_unicode: expected 1 argument, got {}", + args.len() + ))); + } + + if let Value::Text(b) = &args[0] { + return Ok(Value::Text(b.clone())); + } + + if matches!(args[0], Value::Null) { + return Ok(Value::Null); + } + + Ok(Value::Text(args[0].to_string().into())) +} + +pub fn convert_from_unicode(args: &[Value]) -> Result { + if args.len() != 1 { + return Err(ExecutionError::FunctionError(format!( + "convert_from_unicode: expected 1 argument, got {}", + args.len() + ))); + } + + if let Value::Text(b) = &args[0] { + return Ok(Value::Text(b.clone())); + } + + if matches!(args[0], Value::Null) { + return Ok(Value::Null); + } + + Ok(Value::Text(args[0].to_string().into())) +} + +pub fn set_response_code(args: &[Value], ctx: &mut EvalContext) -> Result { + if args.is_empty() || args.len() > 2 { + return Err(ExecutionError::FunctionError(format!( + "set_response_code: expected 1-2 arguments, got {}", + args.len() + ))); + } + + let status = parse_i64("set_response_code", &args[0])?; + if !(100..=599).contains(&status) { + return Err(ExecutionError::FunctionError( + "set_response_code: invalid status code".to_string(), + )); + } + + ctx.set_response_status(status as i32); + + if let Some(body_val) = args.get(1) { + if matches!(body_val, Value::Null) { + ctx.set_response_body_override(None); + } else { + ctx.set_response_body_override(Some(Bytes::from(body_val.to_string()))); + } + } + + Ok(Value::Null) +} + +pub fn set_redirect(args: &[Value], ctx: &mut EvalContext) -> Result { + if args.len() != 1 { + return Err(ExecutionError::FunctionError(format!( + "set_redirect: expected 1 argument, got {}", + args.len() + ))); + } + + let location = args[0].to_string(); + ctx.set_response_status(302); + ctx.add_response_header("Location".to_string(), location); + ctx.set_response_body_override(None); + + Ok(Value::Null) +} + +pub fn upper(args: &[Value]) -> Result { + if args.len() != 1 { + return Err(ExecutionError::FunctionError(format!( + "upper: expected 1 argument, got {}", + args.len() + ))); + } + + if matches!(args[0], Value::Null) { + return Ok(Value::Null); + } + + // Fast path: mutate a copy of the bytes in-place for ASCII upper to avoid String allocs + if let Value::Text(bytes) = &args[0] { + let mut buf = bytes.to_vec(); + for b in &mut buf { + *b = b.to_ascii_uppercase(); + } + return Ok(Value::Text(buf.into())); + } + + Ok(Value::Text(args[0].to_string().to_uppercase().into())) +} + +pub fn to_str(args: &[Value]) -> Result { + if args.len() != 1 { + return Err(ExecutionError::FunctionError(format!( + "str: expected 1 argument, got {}", + args.len() + ))); + } + + Ok(Value::Text(args[0].to_string().into())) +} + +pub fn lstrip(args: &[Value]) -> Result { + if args.len() != 1 { + return Err(ExecutionError::FunctionError(format!( + "lstrip: expected 1 argument, got {}", + args.len() + ))); + } + + if matches!(args[0], Value::Null) { + return Ok(Value::Null); + } + + // Zero-copy trim on valid UTF-8 text by slicing the original Bytes + if let Value::Text(bytes) = &args[0] { + if let Ok(s) = std::str::from_utf8(bytes.as_ref()) { + let trimmed = s.trim_start(); + let start = s.len() - trimmed.len(); + return Ok(Value::Text(bytes.slice(start..bytes.len()))); + } + } + + let s = args[0].to_string(); + Ok(Value::Text(s.trim_start().to_string().into())) +} + +pub fn rstrip(args: &[Value]) -> Result { + if args.len() != 1 { + return Err(ExecutionError::FunctionError(format!( + "rstrip: expected 1 argument, got {}", + args.len() + ))); + } + + if matches!(args[0], Value::Null) { + return Ok(Value::Null); + } + + // Zero-copy trim on valid UTF-8 text by slicing the original Bytes + if let Value::Text(bytes) = &args[0] { + if let Ok(s) = std::str::from_utf8(bytes.as_ref()) { + let trimmed = s.trim_end(); + let end = trimmed.len(); + return Ok(Value::Text(bytes.slice(0..end))); + } + } + + let s = args[0].to_string(); + Ok(Value::Text(s.trim_end().to_string().into())) +} + +pub fn strip(args: &[Value]) -> Result { + if args.len() != 1 { + return Err(ExecutionError::FunctionError(format!( + "strip: expected 1 argument, got {}", + args.len() + ))); + } + + if matches!(args[0], Value::Null) { + return Ok(Value::Null); + } + + // Zero-copy trim on valid UTF-8 text by slicing the original Bytes + if let Value::Text(bytes) = &args[0] { + if let Ok(s) = std::str::from_utf8(bytes.as_ref()) { + let trimmed_start = s.trim_start(); + let start = s.len() - trimmed_start.len(); + let trimmed = trimmed_start.trim_end(); + let end = start + trimmed.len(); + return Ok(Value::Text(bytes.slice(start..end))); + } + } + + let s = args[0].to_string(); + Ok(Value::Text(s.trim().to_string().into())) +} + +pub fn dollar(args: &[Value]) -> Result { + if !args.is_empty() { + return Err(ExecutionError::FunctionError(format!( + "dollar: expected 0 arguments, got {}", + args.len() + ))); + } + + Ok(Value::Text(Bytes::from("$"))) +} + +pub fn dquote(args: &[Value]) -> Result { + if !args.is_empty() { + return Err(ExecutionError::FunctionError(format!( + "dquote: expected 0 arguments, got {}", + args.len() + ))); + } + + Ok(Value::Text(Bytes::from("\""))) +} + +pub fn squote(args: &[Value]) -> Result { + if !args.is_empty() { + return Err(ExecutionError::FunctionError(format!( + "squote: expected 0 arguments, got {}", + args.len() + ))); + } + + Ok(Value::Text(Bytes::from("'"))) +} + +pub fn base64_encode(args: &[Value]) -> Result { + if args.len() != 1 { + return Err(ExecutionError::FunctionError(format!( + "base64_encode: expected 1 argument, got {}", + args.len() + ))); + } + + let encoded = STANDARD.encode(args[0].to_string().as_bytes()); + Ok(Value::Text(encoded.into())) +} + +pub fn url_encode(args: &[Value]) -> Result { + if args.len() != 1 { + return Err(ExecutionError::FunctionError(format!( + "url_encode: expected 1 argument, got {}", + args.len() + ))); + } + + let encoded = utf8_percent_encode(&args[0].to_string(), NON_ALPHANUMERIC).to_string(); + Ok(Value::Text(encoded.into())) +} + +pub fn url_decode(args: &[Value]) -> Result { + if args.len() != 1 { + return Err(ExecutionError::FunctionError(format!( + "url_decode: expected 1 argument, got {}", + args.len() + ))); + } + + let input = args[0].to_string(); + let decoded = percent_decode_str(&input) + .decode_utf8() + .map_err(|_| ExecutionError::FunctionError("invalid UTF-8 in 'url_decode'".to_string()))?; + + Ok(Value::Text(Bytes::from(decoded.to_string()))) +} + +fn parse_i64(name: &str, v: &Value) -> Result { + match v { + Value::Integer(i) => Ok(*i as i64), + Value::Text(b) => std::str::from_utf8(b) + .ok() + .and_then(|s| s.trim().parse::().ok()) + .ok_or_else(|| ExecutionError::FunctionError(format!("{name}: invalid integer"))), + Value::Null => Ok(0), + _ => Err(ExecutionError::FunctionError(format!( + "{name}: invalid integer" + ))), + } +} + +fn parse_str<'a>(name: &str, v: &'a Value) -> Result<&'a str> { + if let Value::Text(b) = v { + std::str::from_utf8(b) + .map_err(|_| ExecutionError::FunctionError(format!("{name}: invalid string"))) + } else { + Err(ExecutionError::FunctionError(format!( + "{name}: invalid string" + ))) + } +} + +pub fn len(args: &[Value]) -> Result { + if args.len() != 1 { + return Err(ExecutionError::FunctionError(format!( + "len: expected 1 argument, got {}", + args.len() + ))); + } + + let count = match &args[0] { + Value::Null => 0, + Value::Text(b) => String::from_utf8_lossy(b).chars().count() as i32, + Value::List(items) => items.len() as i32, + Value::Dict(map) => map.len() as i32, + other => other.to_string().chars().count() as i32, + }; + + Ok(Value::Integer(count)) +} + +fn parse_positive_bound(name: &str, v: &Value) -> Result { + let n = parse_i64(name, v)?; + if n <= 0 || n > i32::MAX as i64 { + return Err(ExecutionError::FunctionError(format!( + "{name}: invalid bound" + ))); + } + Ok(n as i32) +} + +pub fn int(args: &[Value]) -> Result { + if args.len() != 1 { + return Err(ExecutionError::FunctionError(format!( + "int: expected 1 argument, got {}", + args.len() + ))); + } + + if let Value::Integer(i) = args[0] { + return Ok(Value::Integer(i)); + } + + if matches!(args[0], Value::Null) { + return Ok(Value::Integer(0)); + } + + let parsed = args[0].to_string().trim().parse::().unwrap_or(0); + Ok(Value::Integer(parsed)) +} + +pub fn exists(args: &[Value]) -> Result { + if args.len() != 1 { + return Err(ExecutionError::FunctionError(format!( + "exists: expected 1 argument, got {}", + args.len() + ))); + } + + let exists = match &args[0] { + Value::Null => false, + Value::Text(b) => !b.is_empty(), + Value::List(items) => !items.is_empty(), + Value::Dict(map) => !map.is_empty(), + _ => true, + }; + + Ok(Value::Boolean(exists)) +} + +pub fn is_empty(args: &[Value]) -> Result { + if args.len() != 1 { + return Err(ExecutionError::FunctionError(format!( + "is_empty: expected 1 argument, got {}", + args.len() + ))); + } + + match &args[0] { + Value::Null => Ok(Value::Boolean(false)), + Value::Text(b) => Ok(Value::Boolean(b.is_empty())), + Value::List(items) => Ok(Value::Boolean(items.is_empty())), + Value::Dict(map) => Ok(Value::Boolean(map.is_empty())), + _ => Ok(Value::Boolean(false)), + } +} + +pub fn index(args: &[Value]) -> Result { + if args.len() != 2 { + return Err(ExecutionError::FunctionError(format!( + "index: expected 2 arguments, got {}", + args.len() + ))); + } + + if matches!(args[0], Value::Null) || matches!(args[1], Value::Null) { + return Ok(Value::Integer(-1)); + } + + let hay = args[0].to_string(); + let needle = args[1].to_string(); + + if needle.is_empty() { + return Ok(Value::Integer(0)); + } + + hay.find(&needle).map_or_else( + || Ok(Value::Integer(-1)), + |byte_idx| { + let pos = hay[..byte_idx].chars().count() as i32; + Ok(Value::Integer(pos)) + }, + ) +} + +pub fn rindex(args: &[Value]) -> Result { + if args.len() != 2 { + return Err(ExecutionError::FunctionError(format!( + "rindex: expected 2 arguments, got {}", + args.len() + ))); + } + + if matches!(args[0], Value::Null) || matches!(args[1], Value::Null) { + return Ok(Value::Integer(-1)); + } + + let hay = args[0].to_string(); + let needle = args[1].to_string(); + + if needle.is_empty() { + return Ok(Value::Integer(hay.chars().count() as i32)); + } + + hay.rfind(&needle).map_or_else( + || Ok(Value::Integer(-1)), + |byte_idx| { + let pos = hay[..byte_idx].chars().count() as i32; + Ok(Value::Integer(pos)) + }, + ) +} + +pub fn md5_digest(args: &[Value]) -> Result { + if args.len() != 1 { + return Err(ExecutionError::FunctionError(format!( + "md5_digest: expected 1 argument, got {}", + args.len() + ))); + } + + if matches!(args[0], Value::Null) { + return Ok(Value::Null); + } + + let input = args[0].to_string(); + let digest = md5::compute(input.as_bytes()); + let hex = format!("{:x}", digest); + Ok(Value::Text(hex.into())) +} + +pub fn time(args: &[Value]) -> Result { + if !args.is_empty() { + return Err(ExecutionError::FunctionError(format!( + "time: expected 0 arguments, got {}", + args.len() + ))); + } + + let secs = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map_err(|_| ExecutionError::FunctionError("system time before UNIX_EPOCH".to_string()))? + .as_secs(); + + let clamped = secs.min(i32::MAX as u64) as i32; + Ok(Value::Integer(clamped)) +} + +pub fn http_time(args: &[Value]) -> Result { + if args.len() > 1 { + return Err(ExecutionError::FunctionError(format!( + "http_time: expected 0-1 arguments, got {}", + args.len() + ))); + } + + let secs = if args.is_empty() || matches!(args[0], Value::Null) { + Utc::now().timestamp() + } else { + parse_i64("http_time", &args[0])? + }; + + let dt = DateTime::::from_timestamp(secs, 0) + .ok_or_else(|| ExecutionError::FunctionError("http_time: invalid timestamp".to_string()))?; + + let formatted = dt.format("%a, %d %b %Y %H:%M:%S GMT").to_string(); + Ok(Value::Text(Bytes::from(formatted))) +} + +pub fn strftime(args: &[Value]) -> Result { + if args.is_empty() || args.len() > 2 { + return Err(ExecutionError::FunctionError(format!( + "strftime: expected 1-2 arguments, got {}", + args.len() + ))); + } + + let fmt = parse_str("strftime", &args[0])?; + + let secs = match args.get(1) { + None => Utc::now().timestamp(), + Some(Value::Null) => Utc::now().timestamp(), + Some(v) => parse_i64("strftime", v)?, + }; + + let dt = DateTime::::from_timestamp(secs, 0) + .ok_or_else(|| ExecutionError::FunctionError("strftime: invalid timestamp".to_string()))?; + + Ok(Value::Text(Bytes::from(dt.format(fmt).to_string()))) +} + +pub fn rand(args: &[Value], ctx: &mut EvalContext) -> Result { + let bound = match args.len() { + 0 => 100_000_000i32, + 1 => parse_positive_bound("rand", &args[0])?, + _ => { + return Err(ExecutionError::FunctionError( + "rand expects 0 or 1 argument".to_string(), + )) + } + }; + + let mut rng = rand::thread_rng(); + let v: i32 = rng.gen_range(0..bound); + ctx.set_last_rand(v); + Ok(Value::Integer(v)) +} + +pub fn last_rand(args: &[Value], ctx: &EvalContext) -> Result { + if !args.is_empty() { return Err(ExecutionError::FunctionError( - "wrong number of arguments to 'lower'".to_string(), + "last_rand expects no arguments".to_string(), )); } - Ok(Value::Text(args[0].to_string().to_lowercase().into())) -} + Ok(ctx.last_rand().map_or_else(|| Value::Null, Value::Integer)) +} + +pub fn bin_int(args: &[Value]) -> Result { + if args.len() != 1 { + return Err(ExecutionError::FunctionError(format!( + "bin_int: expected 1 argument, got {}", + args.len() + ))); + } + + let value = match args[0] { + Value::Integer(i) => i, + _ => { + return Err(ExecutionError::FunctionError( + "incorrect type passed to 'bin_int'".to_string(), + )) + } + }; + + let bytes = value.to_le_bytes(); + Ok(Value::Text(Bytes::copy_from_slice(&bytes))) +} + +pub fn substr(args: &[Value]) -> Result { + if args.len() < 2 || args.len() > 3 { + return Err(ExecutionError::FunctionError(format!( + "substr: expected 2-3 arguments, got {}", + args.len() + ))); + } + + if matches!(args[0], Value::Null) { + return Ok(Value::Null); + } + + let s = args[0].to_string(); + let start_i = match args[1] { + Value::Integer(i) => i, + _ => { + return Err(ExecutionError::FunctionError( + "incorrect type for 'substr' start".to_string(), + )) + } + }; + + let end_i: Option = match args.get(2) { + None => None, + Some(Value::Integer(j)) => Some(*j), + Some(_) => { + return Err(ExecutionError::FunctionError( + "incorrect type for 'substr' end".to_string(), + )) + } + }; + + // Work in chars to respect the spec's character indexing semantics + let chars: Vec = s.chars().collect(); + let len = chars.len() as i32; + + let start = if start_i < 0 { + (len + start_i).max(0) + } else { + start_i.min(len) + } as usize; + + let end = match end_i { + None => len, + Some(j) if j < 0 => (len + j).max(0), + Some(j) => j.min(len), + } as usize; + + if end < start { + return Ok(Value::Text(Bytes::new())); + } + + let slice: String = chars[start..end].iter().collect(); + Ok(Value::Text(slice.into())) +} + +pub fn add_header(args: &[Value], ctx: &mut EvalContext) -> Result { + if args.len() != 2 { + return Err(ExecutionError::FunctionError(format!( + "add_header: expected 2 arguments, got {}", + args.len() + ))); + } + + let name = args[0].to_string(); + let value = args[1].to_string(); + ctx.add_response_header(name, value); + + Ok(Value::Null) +} + +pub fn string_split(args: &[Value]) -> Result { + if args.is_empty() || args.len() > 3 { + return Err(ExecutionError::FunctionError( + "wrong number of arguments to 'string_split'".to_string(), + )); + } + + let source = args[0].to_string(); + let sep = match args.get(1) { + None | Some(Value::Null) => " ".to_string(), + Some(v) => v.to_string(), + }; + + let max_splits = match args.get(2) { + None | Some(Value::Null) => None, + Some(Value::Integer(n)) => Some(*n), + Some(_) => { + return Err(ExecutionError::FunctionError( + "string_split: invalid max_sep".to_string(), + )) + } + }; + + // If max_splits is provided and non-positive, do not split + if let Some(n) = max_splits { + if n <= 0 { + return Ok(Value::List(vec![Value::Text(source.into())])); + } + } + + let parts: Vec = if sep.is_empty() { + let mut out = Vec::new(); + let mut splits_done = 0usize; + let limit = max_splits.map(|n| n as usize); + + let mut chars = source.chars().peekable(); + while let Some(ch) = chars.next() { + if let Some(limit) = limit { + if splits_done >= limit { + let mut rest = String::new(); + rest.push(ch); + for c in chars.by_ref() { + rest.push(c); + } + out.push(rest); + return Ok(Value::List( + out.into_iter().map(|s| Value::Text(s.into())).collect(), + )); + } + } + + out.push(ch.to_string()); + splits_done += 1; + } + + out + } else { + let iter = max_splits.map_or_else( + || source.split(&sep).map(|s| s.to_string()).collect(), + |n| { + source + .splitn(n as usize + 1, &sep) + .map(|s| s.to_string()) + .collect() + }, + ); + iter + }; + + let values = parts.into_iter().map(|s| Value::Text(s.into())).collect(); + Ok(Value::List(values)) +} + +pub fn join(args: &[Value]) -> Result { + if args.is_empty() || args.len() > 2 { + return Err(ExecutionError::FunctionError(format!( + "join: expected 1-2 arguments, got {}", + args.len() + ))); + } + + let sep = match args.get(1) { + None | Some(Value::Null) => " ".to_string(), + Some(v) => v.to_string(), + }; + + let list = match &args[0] { + Value::List(items) => items, + _ => { + return Err(ExecutionError::FunctionError( + "join expects a list as first argument".to_string(), + )) + } + }; + + let mut out = String::new(); + for (i, v) in list.iter().enumerate() { + if i > 0 { + out.push_str(&sep); + } + out.push_str(&v.to_string()); + } + + Ok(Value::Text(out.into())) +} + +pub fn list_delitem(args: &[Value]) -> Result { + if args.len() != 2 { + return Err(ExecutionError::FunctionError(format!( + "list_delitem: expected 2 arguments, got {}", + args.len() + ))); + } + + let list = match &args[0] { + Value::List(items) => items.clone(), + Value::Null => Vec::new(), + _ => { + return Err(ExecutionError::FunctionError( + "list_delitem expects a list as first argument".to_string(), + )) + } + }; + + let idx = parse_i64("list_delitem", &args[1])?; + if idx < 0 { + return Ok(Value::List(list)); + } + + let mut items = list; + if (idx as usize) < items.len() { + items.remove(idx as usize); + } + + Ok(Value::List(items)) +} + +pub fn replace(args: &[Value]) -> Result { + if args.len() < 3 || args.len() > 4 { + return Err(ExecutionError::FunctionError(format!( + "replace: expected 3-4 arguments, got {}", + args.len() + ))); + } + let Value::Text(haystack) = &args[0] else { + return Err(ExecutionError::FunctionError( + "incorrect haystack passed to 'replace'".to_string(), + )); + }; + let Value::Text(needle) = &args[1] else { + return Err(ExecutionError::FunctionError( + "incorrect needle passed to 'replace'".to_string(), + )); + }; + let Value::Text(replacement) = &args[2] else { + return Err(ExecutionError::FunctionError( + "incorrect replacement passed to 'replace'".to_string(), + )); + }; + + let hay = haystack.as_ref(); + let needle = needle.as_ref(); + let replacement = replacement.as_ref(); + + // count is optional, default to usize::MAX; non-positive counts mean "no replacements" + let count = match args.get(3) { + Some(Value::Integer(n)) => { + if *n <= 0 { + 0 + } else { + *n as usize + } + } + Some(_) => { + return Err(ExecutionError::FunctionError( + "incorrect type passed to 'replace'".to_string(), + )); + } + None => usize::MAX, + }; + + if needle.is_empty() { + return Ok(Value::Text(Bytes::copy_from_slice(hay))); + } + + let mut out = Vec::with_capacity(hay.len()); + let mut i = 0usize; + let mut replaced = 0usize; + while i + needle.len() <= hay.len() { + if replaced < count && hay[i..i + needle.len()] == *needle { + out.extend_from_slice(replacement); + i += needle.len(); + replaced += 1; + } else { + out.push(hay[i]); + i += 1; + } + } + + out.extend_from_slice(&hay[i..]); + Ok(Value::Text(out.into())) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashMap; + + #[test] + fn test_lower() { + match lower(&[Value::Text("HELLO".into())]) { + Ok(value) => assert_eq!(value, Value::Text("hello".into())), + Err(err) => panic!("Unexpected error: {:?}", err), + } + match lower(&[Value::Text("Rust".into())]) { + Ok(value) => assert_eq!(value, Value::Text("rust".into())), + Err(err) => panic!("Unexpected error: {:?}", err), + } + match lower(&[Value::Text("".into())]) { + Ok(value) => assert_eq!(value, Value::Text("".into())), + Err(err) => panic!("Unexpected error: {:?}", err), + } + match lower(&[Value::Integer(123), Value::Integer(456)]) { + Ok(_) => panic!("Expected error, but got Ok"), + Err(err) => assert_eq!( + err.to_string(), + ExecutionError::FunctionError("lower: expected 1 argument, got 2".to_string()) + .to_string() + ), + } + } + + #[test] + fn test_html_encode() { + match html_encode(&[Value::Text("
".into())]) { + Ok(value) => assert_eq!(value, Value::Text("<div>".into())), + Err(err) => panic!("Unexpected error: {:?}", err), + } + match html_encode(&[Value::Text("&".into())]) { + Ok(value) => assert_eq!(value, Value::Text("&".into())), + Err(err) => panic!("Unexpected error: {:?}", err), + } + match html_encode(&[Value::Text(r#""quoted""#.into())]) { + Ok(value) => assert_eq!(value, Value::Text(""quoted"".into())), + Err(err) => panic!("Unexpected error: {:?}", err), + }; + match html_encode(&[Value::Integer(123), Value::Integer(456)]) { + Ok(_) => panic!("Expected error, but got Ok"), + Err(err) => assert_eq!( + err.to_string(), + ExecutionError::FunctionError( + "html_encode: expected 1 argument, got 2".to_string() + ) + .to_string() + ), + } + } + + #[test] + fn test_html_decode() { + match html_decode(&[Value::Text("<div>".into())]) { + Ok(value) => assert_eq!(value, Value::Text("
".into())), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match html_decode(&[Value::Text("foo & bar".into())]) { + Ok(value) => assert_eq!(value, Value::Text("foo & bar".into())), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match html_decode(&[Value::Text("x".into()), Value::Text("extra".into())]) { + Ok(_) => panic!("Expected error, but got Ok"), + Err(err) => assert_eq!( + err.to_string(), + ExecutionError::FunctionError( + "html_decode: expected 1 argument, got 2".to_string() + ) + .to_string() + ), + } + } + + #[test] + fn test_convert_unicode_passthrough() { + match convert_to_unicode(&[Value::Text("héllo".into())]) { + Ok(value) => assert_eq!(value, Value::Text("héllo".into())), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match convert_from_unicode(&[Value::Text("héllo".into())]) { + Ok(value) => assert_eq!(value, Value::Text("héllo".into())), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match convert_to_unicode(&[Value::Null]) { + Ok(value) => assert_eq!(value, Value::Null), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match convert_from_unicode(&[Value::Null]) { + Ok(value) => assert_eq!(value, Value::Null), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match convert_to_unicode(&[]) { + Ok(_) => panic!("Expected error, but got Ok"), + Err(err) => assert_eq!( + err.to_string(), + ExecutionError::FunctionError( + "convert_to_unicode: expected 1 argument, got 0".to_string() + ) + .to_string() + ), + } + + match convert_from_unicode(&[Value::Integer(1), Value::Integer(2)]) { + Ok(_) => panic!("Expected error, but got Ok"), + Err(err) => assert_eq!( + err.to_string(), + ExecutionError::FunctionError( + "convert_from_unicode: expected 1 argument, got 2".to_string() + ) + .to_string() + ), + } + } + + #[test] + fn test_upper() { + match upper(&[Value::Text("hello".into())]) { + Ok(value) => assert_eq!(value, Value::Text("HELLO".into())), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match upper(&[Value::Null]) { + Ok(value) => assert_eq!(value, Value::Null), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match upper(&[]) { + Ok(_) => panic!("Expected error, but got Ok"), + Err(err) => assert_eq!( + err.to_string(), + ExecutionError::FunctionError("upper: expected 1 argument, got 0".to_string()) + .to_string() + ), + } + } + + #[test] + fn test_to_str() { + match to_str(&[Value::Integer(42)]) { + Ok(value) => assert_eq!(value, Value::Text("42".into())), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match to_str(&[]) { + Ok(_) => panic!("Expected error, but got Ok"), + Err(err) => assert_eq!( + err.to_string(), + ExecutionError::FunctionError("str: expected 1 argument, got 0".to_string()) + .to_string() + ), + } + } + + #[test] + fn test_literal_helpers() { + match dollar(&[]) { + Ok(value) => assert_eq!(value, Value::Text("$".into())), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match dollar(&[Value::Text("x".into())]) { + Ok(_) => panic!("Expected error, but got Ok"), + Err(err) => assert_eq!( + err.to_string(), + ExecutionError::FunctionError("dollar: expected 0 arguments, got 1".to_string()) + .to_string() + ), + } + + match dquote(&[]) { + Ok(value) => assert_eq!(value, Value::Text("\"".into())), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match squote(&[]) { + Ok(value) => assert_eq!(value, Value::Text("'".into())), + Err(err) => panic!("Unexpected error: {:?}", err), + } + } + + #[test] + fn test_strip_variants() { + match lstrip(&[Value::Text(" hello ".into())]) { + Ok(value) => assert_eq!(value, Value::Text("hello ".into())), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match rstrip(&[Value::Text(" hello ".into())]) { + Ok(value) => assert_eq!(value, Value::Text(" hello".into())), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match strip(&[Value::Text(" hello ".into())]) { + Ok(value) => assert_eq!(value, Value::Text("hello".into())), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match strip(&[Value::Null]) { + Ok(value) => assert_eq!(value, Value::Null), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match strip(&[]) { + Ok(_) => panic!("Expected error, but got Ok"), + Err(err) => assert_eq!( + err.to_string(), + ExecutionError::FunctionError("strip: expected 1 argument, got 0".to_string()) + .to_string() + ), + } + } + + #[test] + fn test_base64_encode() { + match base64_encode(&[Value::Text("hi".into())]) { + Ok(value) => assert_eq!(value, Value::Text("aGk=".into())), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match base64_encode(&[]) { + Ok(_) => panic!("Expected error, but got Ok"), + Err(err) => assert_eq!( + err.to_string(), + ExecutionError::FunctionError( + "base64_encode: expected 1 argument, got 0".to_string() + ) + .to_string() + ), + } + } + + #[test] + fn test_url_encode_decode() { + match url_encode(&[Value::Text("a b".into())]) { + Ok(value) => assert_eq!(value, Value::Text("a%20b".into())), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match url_decode(&[Value::Text("a%20b".into())]) { + Ok(value) => assert_eq!(value, Value::Text("a b".into())), + Err(err) => panic!("Unexpected error: {:?}", err), + } + } + + #[test] + fn test_exists_is_empty() { + match exists(&[Value::Text("".into())]) { + Ok(value) => assert_eq!(value, Value::Boolean(false)), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match is_empty(&[Value::Text("".into())]) { + Ok(value) => assert_eq!(value, Value::Boolean(true)), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match exists(&[Value::Null]) { + Ok(value) => assert_eq!(value, Value::Boolean(false)), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match is_empty(&[Value::Null]) { + Ok(value) => assert_eq!(value, Value::Boolean(false)), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match is_empty(&[Value::Text("data".into())]) { + Ok(value) => assert_eq!(value, Value::Boolean(false)), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match exists(&[Value::List(vec![Value::Integer(1)])]) { + Ok(value) => assert_eq!(value, Value::Boolean(true)), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match is_empty(&[Value::List(Vec::new())]) { + Ok(value) => assert_eq!(value, Value::Boolean(true)), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match exists(&[Value::Dict(Default::default())]) { + Ok(value) => assert_eq!(value, Value::Boolean(false)), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match exists(&[]) { + Ok(_) => panic!("Expected error, but got Ok"), + Err(err) => assert_eq!( + err.to_string(), + ExecutionError::FunctionError("exists: expected 1 argument, got 0".to_string()) + .to_string() + ), + } + + match is_empty(&[Value::Text("x".into()), Value::Text("y".into())]) { + Ok(_) => panic!("Expected error, but got Ok"), + Err(err) => assert_eq!( + err.to_string(), + ExecutionError::FunctionError("is_empty: expected 1 argument, got 2".to_string()) + .to_string() + ), + } + } + + #[test] + fn test_int() { + match int(&[Value::Text("7".into())]) { + Ok(value) => assert_eq!(value, Value::Integer(7)), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match int(&[Value::Text(" 9 ".into())]) { + Ok(value) => assert_eq!(value, Value::Integer(9)), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match int(&[Value::Text("abc".into())]) { + Ok(value) => assert_eq!(value, Value::Integer(0)), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match int(&[Value::Integer(5)]) { + Ok(value) => assert_eq!(value, Value::Integer(5)), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match int(&[Value::Null]) { + Ok(value) => assert_eq!(value, Value::Integer(0)), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match int(&[Value::Text("1".into()), Value::Text("extra".into())]) { + Ok(_) => panic!("Expected error, but got Ok"), + Err(err) => assert_eq!( + err.to_string(), + ExecutionError::FunctionError("int: expected 1 argument, got 2".to_string()) + .to_string() + ), + } + } + + #[test] + fn test_len() { + match len(&[Value::Text("hello".into())]) { + Ok(value) => assert_eq!(value, Value::Integer(5)), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match len(&[Value::Null]) { + Ok(value) => assert_eq!(value, Value::Integer(0)), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match len(&[Value::List(vec![Value::Integer(1), Value::Integer(2)])]) { + Ok(value) => assert_eq!(value, Value::Integer(2)), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match len(&[Value::Dict(HashMap::from([ + ("a".to_string(), Value::Integer(1)), + ("b".to_string(), Value::Integer(2)), + ]))]) { + Ok(value) => assert_eq!(value, Value::Integer(2)), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match len(&[]) { + Ok(_) => panic!("Expected error, but got Ok"), + Err(err) => assert_eq!( + err.to_string(), + ExecutionError::FunctionError("len: expected 1 argument, got 0".to_string()) + .to_string() + ), + } + } + + #[test] + fn test_split_join_list_delitem() { + match string_split(&[Value::Text("a,b,c".into()), Value::Text(",".into())]) { + Ok(Value::List(items)) => assert_eq!(items.len(), 3), + other => panic!("Unexpected result: {:?}", other), + } -pub fn html_encode(args: &[Value]) -> Result { - if args.len() != 1 { - return Err(ExecutionError::FunctionError( - "wrong number of arguments to 'html_encode'".to_string(), - )); + // default separator (space) and max_splits + match string_split(&[Value::Text("a b c".into())]) { + Ok(Value::List(items)) => { + assert_eq!(items.len(), 3); + assert_eq!(items[0], Value::Text("a".into())); + assert_eq!(items[1], Value::Text("b".into())); + assert_eq!(items[2], Value::Text("c".into())); + } + other => panic!("Unexpected result: {:?}", other), + } + + match string_split(&[ + Value::Text("a,b,c,d".into()), + Value::Text(",".into()), + Value::Integer(2), + ]) { + Ok(Value::List(items)) => { + assert_eq!(items.len(), 3); + assert_eq!(items[0], Value::Text("a".into())); + assert_eq!(items[1], Value::Text("b".into())); + assert_eq!(items[2], Value::Text("c,d".into())); + } + other => panic!("Unexpected result: {:?}", other), + } + + // empty separator splits to chars unless max_splits == 0 + match string_split(&[Value::Text("abc".into()), Value::Text("".into())]) { + Ok(Value::List(items)) => { + let joined: String = items.iter().map(|v| v.to_string()).collect(); + assert_eq!(joined, "abc"); + } + other => panic!("Unexpected result: {:?}", other), + } + + match string_split(&[ + Value::Text("abc".into()), + Value::Text("".into()), + Value::Integer(0), + ]) { + Ok(Value::List(items)) => { + assert_eq!(items.len(), 1); + assert_eq!(items[0], Value::Text("abc".into())); + } + other => panic!("Unexpected result: {:?}", other), + } + + let list_value = Value::List(vec![Value::Text("x".into()), Value::Text("y".into())]); + match join(&[list_value.clone(), Value::Text("-".into())]) { + Ok(Value::Text(out)) => assert_eq!(String::from_utf8_lossy(&out), "x-y"), + other => panic!("Unexpected result: {:?}", other), + } + + // default separator is space + match join(&[list_value.clone()]) { + Ok(Value::Text(out)) => assert_eq!(String::from_utf8_lossy(&out), "x y"), + other => panic!("Unexpected result: {:?}", other), + } + + match list_delitem(&[list_value, Value::Integer(0)]) { + Ok(Value::List(items)) => { + assert_eq!(items.len(), 1); + assert_eq!(items[0], Value::Text("y".into())); + } + other => panic!("Unexpected result: {:?}", other), + } } - let encoded = html_escape::encode_double_quoted_attribute(&args[0]).to_string(); - Ok(Value::Text(encoded.into())) -} + #[test] + fn test_index_rindex() { + match index(&[ + Value::Text("hello world".into()), + Value::Text("world".into()), + ]) { + Ok(value) => assert_eq!(value, Value::Integer(6)), + Err(err) => panic!("Unexpected error: {:?}", err), + } -pub fn replace(args: &[Value]) -> Result { - if args.len() < 3 || args.len() > 4 { - return Err(ExecutionError::FunctionError( - "wrong number of arguments to 'replace'".to_string(), - )); + match rindex(&[Value::Text("ababa".into()), Value::Text("ba".into())]) { + Ok(value) => assert_eq!(value, Value::Integer(3)), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match index(&[Value::Text("abc".into()), Value::Text("z".into())]) { + Ok(value) => assert_eq!(value, Value::Integer(-1)), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match rindex(&[Value::Text("abc".into()), Value::Text("".into())]) { + Ok(value) => assert_eq!(value, Value::Integer(3)), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + match index(&[Value::Null, Value::Text("x".into())]) { + Ok(value) => assert_eq!(value, Value::Integer(-1)), + Err(err) => panic!("Unexpected error: {:?}", err), + } } - let Value::Text(haystack) = &args[0] else { - return Err(ExecutionError::FunctionError( - "incorrect haystack passed to 'replace'".to_string(), - )); - }; - let Value::Text(needle) = &args[1] else { - return Err(ExecutionError::FunctionError( - "incorrect needle passed to 'replace'".to_string(), - )); - }; - let Value::Text(replacement) = &args[2] else { - return Err(ExecutionError::FunctionError( - "incorrect replacement passed to 'replace'".to_string(), - )); - }; - // count is optional, default to usize::MAX - let count = match args.get(3) { - Some(Value::Integer(count)) => { - // cap count to usize::MAX - let count: usize = usize::try_from(*count).unwrap_or(usize::MAX); - count + #[test] + fn test_bin_int() { + match bin_int(&[Value::Integer(0x12345678)]) { + Ok(Value::Text(bytes)) => assert_eq!(bytes.as_ref(), &[0x78, 0x56, 0x34, 0x12]), + other => panic!("Unexpected result: {:?}", other), } - Some(_) => { - return Err(ExecutionError::FunctionError( - "incorrect type passed to 'replace'".to_string(), - )); + + match bin_int(&[Value::Integer(-1)]) { + Ok(Value::Text(bytes)) => assert_eq!(bytes.as_ref(), &[0xff, 0xff, 0xff, 0xff]), + other => panic!("Unexpected result: {:?}", other), } - None => usize::MAX, - }; - Ok(Value::Text( - haystack - .replacen(needle.as_ref(), replacement, count) - .into(), - )) -} -#[cfg(test)] -mod tests { - use super::*; + // Example from spec: X$bin_int(127)X -> 58 7F 00 00 00 58 + let mut rendered = Vec::new(); + rendered.push(b'X'); + match bin_int(&[Value::Integer(127)]) { + Ok(Value::Text(bytes)) => rendered.extend_from_slice(bytes.as_ref()), + other => panic!("Unexpected result: {:?}", other), + } + rendered.push(b'X'); + assert_eq!(rendered, b"X\x7f\x00\x00\x00X"); + + match bin_int(&[Value::Text("not-int".into())]) { + Ok(_) => panic!("Expected error, but got Ok"), + Err(err) => assert_eq!( + err.to_string(), + ExecutionError::FunctionError("incorrect type passed to 'bin_int'".to_string()) + .to_string() + ), + } + + match bin_int(&[]) { + Ok(_) => panic!("Expected error, but got Ok"), + Err(err) => assert_eq!( + err.to_string(), + ExecutionError::FunctionError("bin_int: expected 1 argument, got 0".to_string()) + .to_string() + ), + } + } #[test] - fn test_lower() { - match lower(&[Value::Text("HELLO".into())]) { - Ok(value) => assert_eq!(value, Value::Text("hello".into())), + fn test_md5_digest() { + match md5_digest(&[Value::Text("hello".into())]) { + Ok(value) => assert_eq!( + value, + Value::Text("5d41402abc4b2a76b9719d911017c592".into()) + ), Err(err) => panic!("Unexpected error: {:?}", err), } - match lower(&[Value::Text("Rust".into())]) { - Ok(value) => assert_eq!(value, Value::Text("rust".into())), + + match md5_digest(&[Value::Null]) { + Ok(value) => assert_eq!(value, Value::Null), Err(err) => panic!("Unexpected error: {:?}", err), } - match lower(&[Value::Text("".into())]) { - Ok(value) => assert_eq!(value, Value::Text("".into())), - Err(err) => panic!("Unexpected error: {:?}", err), + + match md5_digest(&[]) { + Ok(_) => panic!("Expected error, but got Ok"), + Err(err) => assert_eq!( + err.to_string(), + ExecutionError::FunctionError("md5_digest: expected 1 argument, got 0".to_string()) + .to_string() + ), } - match lower(&[Value::Integer(123), Value::Integer(456)]) { + } + + #[test] + fn test_time() { + match time(&[]) { + Ok(Value::Integer(n)) => assert!(n > 0), + other => panic!("Unexpected result: {:?}", other), + } + + match time(&[Value::Integer(1)]) { Ok(_) => panic!("Expected error, but got Ok"), Err(err) => assert_eq!( err.to_string(), - ExecutionError::FunctionError("wrong number of arguments to 'lower'".to_string()) + ExecutionError::FunctionError("time: expected 0 arguments, got 1".to_string()) .to_string() ), } } #[test] - fn test_html_encode() { - match html_encode(&[Value::Text("
".into())]) { - Ok(value) => assert_eq!(value, Value::Text("<div>".into())), + fn test_http_time() { + match http_time(&[]) { + Ok(Value::Text(s)) => { + let trimmed = String::from_utf8_lossy(&s).trim().to_string(); + assert!(trimmed.ends_with("GMT")); + chrono::DateTime::parse_from_rfc2822(&trimmed).unwrap(); + } + other => panic!("Unexpected result: {:?}", other), + } + + match http_time(&[Value::Integer(0)]) { + Ok(Value::Text(s)) => { + assert_eq!(String::from_utf8_lossy(&s), "Thu, 01 Jan 1970 00:00:00 GMT"); + } + other => panic!("Unexpected result: {:?}", other), + } + + match http_time(&[Value::Text("x".into())]) { + Ok(_) => panic!("Expected error, but got Ok"), + Err(err) => assert_eq!( + err.to_string(), + ExecutionError::FunctionError("http_time: invalid integer".to_string()).to_string() + ), + } + } + + #[test] + fn test_strftime() { + match strftime(&[Value::Text("%Y-%m-%d".into()), Value::Integer(0)]) { + Ok(Value::Text(s)) => assert_eq!(String::from_utf8_lossy(&s), "1970-01-01"), + other => panic!("Unexpected result: {:?}", other), + } + + match strftime(&[Value::Text("%Y".into())]) { + Ok(Value::Text(s)) => { + let year = String::from_utf8_lossy(&s).trim().to_string(); + assert_eq!(year.len(), 4); + year.parse::().unwrap(); + } + other => panic!("Unexpected result: {:?}", other), + } + + match strftime(&[Value::Text("%Y".into()), Value::Text("abc".into())]) { + Ok(_) => panic!("Expected error, but got Ok"), + Err(err) => assert_eq!( + err.to_string(), + ExecutionError::FunctionError("strftime: invalid integer".to_string()).to_string() + ), + } + + match strftime(&[Value::Integer(1)]) { + Ok(_) => panic!("Expected error, but got Ok"), + Err(err) => assert_eq!( + err.to_string(), + ExecutionError::FunctionError("strftime: invalid string".to_string()).to_string() + ), + } + } + + #[test] + fn test_rand_last_rand() { + let mut ctx = EvalContext::new(); + + match last_rand(&[], &ctx) { + Ok(Value::Null) => {} + other => panic!("Unexpected result: {:?}", other), + } + + let first = match rand(&[], &mut ctx) { + Ok(Value::Integer(v)) => v, + other => panic!("Unexpected result: {:?}", other), + }; + assert!(first >= 0 && first < 100_000_000); + + match last_rand(&[], &ctx) { + Ok(Value::Integer(v)) => assert_eq!(v, first), + other => panic!("Unexpected result: {:?}", other), + } + + let second = match rand(&[Value::Integer(10)], &mut ctx) { + Ok(Value::Integer(v)) => v, + other => panic!("Unexpected result: {:?}", other), + }; + assert!(second >= 0 && second < 10); + + match last_rand(&[], &ctx) { + Ok(Value::Integer(v)) => assert_eq!(v, second), + other => panic!("Unexpected result: {:?}", other), + } + + match rand(&[Value::Integer(0)], &mut ctx) { + Ok(_) => panic!("Expected error, but got Ok"), + Err(err) => assert_eq!( + err.to_string(), + ExecutionError::FunctionError("rand: invalid bound".to_string()).to_string() + ), + } + + match last_rand(&[Value::Integer(1)], &ctx) { + Ok(_) => panic!("Expected error, but got Ok"), + Err(err) => assert_eq!( + err.to_string(), + ExecutionError::FunctionError("last_rand expects no arguments".to_string()) + .to_string() + ), + } + } + + #[test] + fn test_substr() { + let s = Value::Text("whether tis nobler in the mind".into()); + + // start/end indices (end exclusive) + match substr(&[s.clone(), Value::Integer(0), Value::Integer(7)]) { + Ok(value) => assert_eq!(value, Value::Text("whether".into())), Err(err) => panic!("Unexpected error: {:?}", err), } - match html_encode(&[Value::Text("&".into())]) { - Ok(value) => assert_eq!(value, Value::Text("&".into())), + + // example: pick range that yields "nobler" + match substr(&[s.clone(), Value::Integer(12), Value::Integer(18)]) { + Ok(value) => assert_eq!(value, Value::Text("nobler".into())), Err(err) => panic!("Unexpected error: {:?}", err), } - match html_encode(&[Value::Text(r#""quoted""#.into())]) { - Ok(value) => assert_eq!(value, Value::Text(""quoted"".into())), + + // omit end -> to end + match substr(&[s.clone(), Value::Integer(22)]) { + Ok(value) => assert_eq!(value, Value::Text("the mind".into())), Err(err) => panic!("Unexpected error: {:?}", err), - }; - match html_encode(&[Value::Integer(123), Value::Integer(456)]) { + } + + // negative end: drop last 5 chars + match substr(&[s.clone(), Value::Integer(0), Value::Integer(-5)]) { + Ok(value) => assert_eq!(value, Value::Text("whether tis nobler in the".into())), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + // negative start, length to end + match substr(&[s.clone(), Value::Integer(-8)]) { + Ok(value) => assert_eq!(value, Value::Text("the mind".into())), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + // negative start and negative end window + match substr(&[s, Value::Integer(-8), Value::Integer(-4)]) { + Ok(value) => assert_eq!(value, Value::Text("the ".into())), + Err(err) => panic!("Unexpected error: {:?}", err), + } + } + + #[test] + fn test_add_header_stub() { + let mut ctx = EvalContext::new(); + match add_header( + &[Value::Text("Name".into()), Value::Text("Value".into())], + &mut ctx, + ) { + Ok(value) => assert_eq!(value, Value::Null), + Err(err) => panic!("Unexpected error: {:?}", err), + } + + assert_eq!( + ctx.response_headers(), + [("Name".to_string(), "Value".to_string())] + ); + + match add_header(&[Value::Text("OnlyOneArg".into())], &mut ctx) { Ok(_) => panic!("Expected error, but got Ok"), Err(err) => assert_eq!( err.to_string(), ExecutionError::FunctionError( - "wrong number of arguments to 'html_encode'".to_string() + "add_header: expected 2 arguments, got 1".to_string() ) .to_string() ), @@ -130,6 +1653,37 @@ mod tests { Err(err) => panic!("Unexpected error: {:?}", err), }; + // match spec example: first occurrence only + match replace(&[ + Value::Text("abcdefabcde".into()), + Value::Text("abc".into()), + Value::Text("xyz".into()), + Value::Integer(1), + ]) { + Ok(value) => assert_eq!(value, Value::Text("xyzdefabcde".into())), + Err(err) => panic!("Unexpected error: {:?}", err), + }; + + // zero or negative maxsplit -> no replacements + match replace(&[ + Value::Text("abc".into()), + Value::Text("a".into()), + Value::Text("z".into()), + Value::Integer(0), + ]) { + Ok(value) => assert_eq!(value, Value::Text("abc".into())), + Err(err) => panic!("Unexpected error: {:?}", err), + }; + match replace(&[ + Value::Text("abc".into()), + Value::Text("a".into()), + Value::Text("z".into()), + Value::Integer(-3), + ]) { + Ok(value) => assert_eq!(value, Value::Text("abc".into())), + Err(err) => panic!("Unexpected error: {:?}", err), + }; + match replace(&[ Value::Text("hello world world".into()), Value::Text("world".into()), @@ -154,7 +1708,7 @@ mod tests { Value::Text("hello world".into()), Value::Text("world".into()), Value::Text("Rust".into()), - Value::Integer(usize::MAX as i32), + Value::Integer(i32::MAX), ]) { Ok(value) => assert_eq!(value, Value::Text("hello Rust".into())), Err(err) => panic!("Unexpected error: {:?}", err), @@ -181,9 +1735,80 @@ mod tests { Ok(_) => panic!("Expected error, but got Ok"), Err(err) => assert_eq!( err.to_string(), - ExecutionError::FunctionError("wrong number of arguments to 'replace'".to_string()) + ExecutionError::FunctionError("replace: expected 3-4 arguments, got 2".to_string()) .to_string() ), }; } + + #[test] + fn test_set_response_code_and_redirect() { + let mut ctx = EvalContext::new(); + + match set_response_code(&[Value::Integer(404)], &mut ctx) { + Ok(Value::Null) => {} + other => panic!("Unexpected result: {:?}", other), + } + assert_eq!(ctx.response_status(), Some(404)); + assert!(ctx.response_body_override().is_none()); + + match set_response_code( + &[Value::Integer(500), Value::Text("error body".into())], + &mut ctx, + ) { + Ok(Value::Null) => {} + other => panic!("Unexpected result: {:?}", other), + } + assert_eq!(ctx.response_status(), Some(500)); + assert_eq!( + ctx.response_body_override() + .map(|b| String::from_utf8_lossy(b.as_ref()).to_string()), + Some("error body".to_string()) + ); + + match set_response_code(&[Value::Integer(99)], &mut ctx) { + Ok(_) => panic!("Expected error, but got Ok"), + Err(err) => assert_eq!( + err.to_string(), + ExecutionError::FunctionError("set_response_code: invalid status code".to_string()) + .to_string() + ), + } + + match set_response_code(&[], &mut ctx) { + Ok(_) => panic!("Expected error, but got Ok"), + Err(err) => assert_eq!( + err.to_string(), + ExecutionError::FunctionError( + "set_response_code: expected 1-2 arguments, got 0".to_string() + ) + .to_string() + ), + } + + match set_redirect(&[Value::Text("http://example.com".into())], &mut ctx) { + Ok(Value::Null) => {} + other => panic!("Unexpected result: {:?}", other), + } + assert_eq!(ctx.response_status(), Some(302)); + assert_eq!( + ctx.response_headers().last(), + Some(&("Location".to_string(), "http://example.com".to_string())) + ); + assert!(ctx.response_body_override().is_none()); + + match set_redirect( + &[Value::Text("a".into()), Value::Text("b".into())], + &mut ctx, + ) { + Ok(_) => panic!("Expected error, but got Ok"), + Err(err) => assert_eq!( + err.to_string(), + ExecutionError::FunctionError( + "set_redirect: expected 1 argument, got 2".to_string() + ) + .to_string() + ), + } + } } diff --git a/esi/src/lib.rs b/esi/src/lib.rs index e2dd277..fd745ba 100644 --- a/esi/src/lib.rs +++ b/esi/src/lib.rs @@ -1,61 +1,112 @@ #![doc = include_str!("../README.md")] mod config; -mod document; mod error; mod expression; mod functions; -mod parse; +mod parser; +pub mod parser_types; -use crate::document::{FetchState, Task}; -use crate::expression::{evaluate_expression, try_evaluate_interpolated, EvalContext}; -use fastly::http::request::PendingRequest; +use crate::expression::EvalContext; +use crate::parser_types::Expr; +use bytes::{Buf, Bytes, BytesMut}; +use fastly::http::request::{PendingRequest, PollResult}; use fastly::http::{header, Method, StatusCode, Url}; -use fastly::{mime, Body, Request, Response}; -use log::{debug, error, trace}; +use fastly::{mime, Request, Response}; +use log::debug; use std::collections::VecDeque; use std::io::{BufRead, Write}; -pub use crate::document::{Element, Fragment}; -pub use crate::error::Result; -pub use crate::parse::{parse_tags, Event, Include, Tag, Tag::Try}; +pub use crate::error::{ExecutionError as ESIError, Result}; +pub use crate::parser::{interpolated_content, parse, parse_complete, parse_expression}; pub use crate::config::Configuration; pub use crate::error::ExecutionError; -// re-export quick_xml Reader and Writer -pub use quick_xml::{Reader, Writer}; - type FragmentRequestDispatcher = dyn Fn(Request) -> Result; type FragmentResponseProcessor = dyn Fn(&mut Request, Response) -> Result; /// Representation of a fragment that is either being fetched, has already been fetched (or generated synthetically), or skipped. pub enum PendingFragmentContent { - PendingRequest(PendingRequest), - CompletedRequest(Response), + PendingRequest(Box), + CompletedRequest(Box), NoContent, } impl From for PendingFragmentContent { fn from(value: PendingRequest) -> Self { - Self::PendingRequest(value) + Self::PendingRequest(Box::new(value)) } } impl From for PendingFragmentContent { fn from(value: Response) -> Self { - Self::CompletedRequest(value) + Self::CompletedRequest(Box::new(value)) } } +/// Representation of an ESI fragment request with its metadata and pending response +pub struct Fragment { + /// Metadata of the request + pub(crate) request: Request, + /// An optional alternate request to send if the original request fails + pub(crate) alt: Option, + /// Whether to continue on error + pub(crate) continue_on_error: bool, + /// The pending fragment response, which can be polled to retrieve the content + pub(crate) pending_content: PendingFragmentContent, +} + +/// Queue element for streaming processing +/// Elements that need to be executed in order +enum QueuedElement { + /// Raw content ready to write (text/html/evaluated expressions) + Content(Bytes), + /// A dispatched include waiting to be executed + Include(Box), + /// A try block with attempts and except clause + /// All includes from all attempts have been dispatched in parallel + Try { + attempt_elements: Vec>, + except_elements: Vec, + }, +} + impl PendingFragmentContent { - fn wait_for_content(self) -> Result { - Ok(match self { - Self::PendingRequest(pending_request) => pending_request.wait()?, - Self::CompletedRequest(response) => response, - Self::NoContent => Response::from_status(StatusCode::NO_CONTENT), - }) + /// Poll to check if the request is ready without blocking + /// Returns the updated PendingFragmentContent (either still Pending or now Completed/NoContent) + pub fn poll(self) -> Self { + match self { + Self::PendingRequest(pending_request) => match pending_request.poll() { + PollResult::Done(result) => result.map_or_else( + |_| Self::NoContent, + |resp| Self::CompletedRequest(Box::new(resp)), + ), + PollResult::Pending(pending_request) => { + // Still pending - put it back + Self::PendingRequest(Box::new(pending_request)) + } + }, + // Already completed - return as-is + other => other, + } + } + + /// Check if the content is ready (completed or no content) + pub const fn is_ready(&self) -> bool { + !matches!(self, Self::PendingRequest(_)) + } + + /// Wait for and retrieve the response from a pending fragment request + pub fn wait(self) -> Result { + match self { + Self::PendingRequest(pending_request) => pending_request.wait().map_err(|e| { + ESIError::ExpressionError(format!("Fragment request wait failed: {}", e)) + }), + Self::CompletedRequest(response) => Ok(*response), + Self::NoContent => Ok(Response::from_status(StatusCode::NO_CONTENT)), + } } } @@ -66,7 +117,7 @@ impl PendingFragmentContent { /// and conditional processing according to the ESI specification. /// /// # Fields -/// * `original_request_metadata` - Optional original client request data used for fragment requests +/// * `ctx` - Evaluation context containing variables and request metadata /// * `configuration` - Configuration settings controlling ESI processing behavior /// /// # Example @@ -84,31 +135,72 @@ impl PendingFragmentContent { /// let processor = Processor::new(Some(request), config); /// ``` pub struct Processor { - // The original client request metadata, if any. - original_request_metadata: Option, + // The evaluation context containing variables and request metadata + ctx: EvalContext, // The configuration for the processor. configuration: Configuration, + // Queue for pending fragments and blocked content + queue: VecDeque, } impl Processor { - pub const fn new( - original_request_metadata: Option, - configuration: Configuration, - ) -> Self { + pub fn new(original_request_metadata: Option, configuration: Configuration) -> Self { + let mut ctx = EvalContext::new(); + if let Some(req) = original_request_metadata { + ctx.set_request(req); + } else { + ctx.set_request(Request::new(Method::GET, "http://localhost")); + } Self { - original_request_metadata, + ctx, configuration, + queue: VecDeque::new(), } } + /// Get the evaluation context (for testing) + /// + /// Provides access to the processor's internal state including variables, + /// response headers, status code, and body overrides set by ESI functions. + pub fn context(&self) -> &EvalContext { + &self.ctx + } + /// Process a response body as an ESI document. Consumes the response body. /// /// This method processes ESI directives in the response body while streaming the output to the client, /// minimizing memory usage for large responses. It handles ESI includes, conditionals, and variable /// substitution according to the ESI specification. /// + /// ## Response Manipulation Functions + /// + /// ESI functions can modify the response that gets sent to the client: + /// + /// ### `$add_header(name, value)` + /// Adds a custom header to the response: + /// ```text + /// $add_header('X-Custom-Header', 'my-value') + /// ``` + /// + /// ### `$set_response_code(code [, body])` + /// Sets the HTTP status code and optionally replaces the response body: + /// ```text + /// $set_response_code(404, 'Page not found') + /// ``` + /// + /// ### `$set_redirect(url [, code])` + /// Sets up an HTTP redirect (default 302): + /// ```text + /// $set_redirect('https://example.com/new-page') + /// $set_redirect('https://example.com/moved', 301) + /// ``` + /// + /// **Note:** These functions modify the response metadata that `process_response` will use + /// when sending the response to the client. The headers, status code, and body override are + /// buffered during processing and applied when the response is sent. + /// /// # Arguments - /// * `src_document` - Source HTTP response containing ESI markup to process + /// * `src_stream` - Source HTTP response containing ESI markup to process /// * `client_response_metadata` - Optional response metadata (headers, status) to send to client /// * `dispatch_fragment_request` - Optional callback for customizing fragment request handling /// * `process_fragment_response` - Optional callback for processing fragment responses @@ -131,7 +223,7 @@ impl Processor { /// // Define a simple fragment dispatcher /// fn default_fragment_dispatcher(req: fastly::Request) -> esi::Result { /// Ok(esi::PendingFragmentContent::CompletedRequest( - /// fastly::Response::from_body("Fragment content") + /// Box::new(fastly::Response::from_body("Fragment content")) /// )) /// } /// // Process the response, streaming the resulting document directly to the client @@ -150,654 +242,1056 @@ impl Processor { /// * Stream writing fails /// * Fragment requests fail pub fn process_response( - self, - src_document: &mut Response, + mut self, + src_stream: &mut Response, client_response_metadata: Option, dispatch_fragment_request: Option<&FragmentRequestDispatcher>, process_fragment_response: Option<&FragmentResponseProcessor>, ) -> Result<()> { - // Create a response to send the headers to the client - let resp = client_response_metadata.unwrap_or_else(|| { + let mut output = Vec::new(); + + self.process_stream( + src_stream.take_body(), + &mut output, + dispatch_fragment_request, + process_fragment_response, + )?; + + let mut resp = client_response_metadata.unwrap_or_else(|| { Response::from_status(StatusCode::OK).with_content_type(mime::TEXT_HTML) }); - // Send the response headers to the client and open an output stream - let output_writer = resp.stream_to_client(); - - // Set up an XML writer to write directly to the client output stream. - let mut xml_writer = Writer::new(output_writer); + for (name, value) in self.ctx.response_headers() { + resp.set_header(name, value); + } - match self.process_document( - reader_from_body(src_document.take_body()), - &mut xml_writer, - dispatch_fragment_request, - process_fragment_response, - ) { - Ok(()) => { - xml_writer.into_inner().finish()?; - Ok(()) - } - Err(err) => { - error!("error processing ESI document: {err}"); - Err(err) - } + if let Some(status) = self.ctx.response_status() { + let status_code = StatusCode::from_u16(status as u16).map_err(|_| { + ExecutionError::FunctionError("set_response_code: invalid status code".to_string()) + })?; + resp.set_status(status_code); } + + let body_bytes = self + .ctx + .response_body_override() + .cloned() + .unwrap_or_else(|| Bytes::from(output)); + + resp.set_body(body_bytes.to_vec()); + resp.send_to_client(); + Ok(()) } - /// Process an ESI document that has already been parsed into a queue of events. - /// - /// Takes a queue of already parsed ESI events and processes them, writing the output - /// to the provided writer. This method is used internally after parsing but can also - /// be called directly if you have pre-parsed events. - /// - /// # Arguments - /// * `src_events` - Queue of parsed ESI events to process - /// * `output_writer` - Writer to stream processed output to - /// * `dispatch_fragment_request` - Optional handler for fragment requests - /// * `process_fragment_response` - Optional processor for fragment responses - /// - /// # Returns - /// * `Result<()>` - Ok if processing completed successfully - /// - /// # Example - /// ``` - /// use std::io::Cursor; - /// use std::collections::VecDeque; - /// use esi::{Event, Reader, Writer, Processor, Configuration}; - /// use quick_xml::events::Event as XmlEvent; + /// Process an ESI stream with industry-grade streaming architecture /// - /// let events = VecDeque::from([Event::Content(XmlEvent::Empty( - /// quick_xml::events::BytesStart::new("div") - /// ))]); + /// This is the low-level streaming API that processes ESI markup from any + /// `BufRead` source to any `Write` destination. For processing Fastly responses, + /// use [`process_response`](Self::process_response) instead. /// - /// let mut writer = Writer::new(Cursor::new(Vec::new())); + /// This method implements **three levels of streaming** for optimal performance: /// - /// let processor = Processor::new(None, esi::Configuration::default()); + /// ## 1. Chunked Input Reading (Memory Efficient) + /// - Reads source stream in 8KB chunks from BufRead + /// - Accumulates chunks until parser can make progress + /// - Prevents loading entire document into memory at once + /// - Bounded memory growth with incremental processing /// - /// processor.process_parsed_document( - /// events, - /// &mut writer, - /// None, - /// None - /// )?; - /// # Ok::<(), esi::ExecutionError>(()) - /// ``` + /// ## 2. Streaming Output (Low Latency) + /// - Writes processed content immediately as elements are parsed + /// - Non-blocking poll checks for completed fragments + /// - Output reaches destination with minimal delay + /// - No buffering of final output /// - /// # Errors - /// Returns error if: - /// * Event processing fails - /// * Writing to output fails - /// * Fragment request/response processing fails + /// ## 3. Streaming Fragments (Maximum Parallelism) + /// - Dispatches all includes immediately (non-blocking) + /// - Uses select() to process whichever fragment completes first + /// - All fragments fetch in parallel, no wasted waiting + /// - Try blocks dispatch all attempts' includes upfront /// - pub fn process_parsed_document( - self, - src_events: VecDeque, - output_writer: &mut Writer, - dispatch_fragment_request: Option<&FragmentRequestDispatcher>, - process_fragment_response: Option<&FragmentResponseProcessor>, - ) -> Result<()> { - // Set up fragment request dispatcher. Use what's provided or use a default - let dispatch_fragment_request = - dispatch_fragment_request.unwrap_or(&default_fragment_dispatcher); - - // If there is a source request to mimic, copy its metadata, otherwise use a default request. - let original_request_metadata = self.original_request_metadata.as_ref().map_or_else( - || Request::new(Method::GET, "http://localhost"), - Request::clone_without_body, - ); - - // `root_task` is the root task that will be used to fetch tags in recursive manner - let root_task = &mut Task::new(); - - // context for the interpreter - let mut ctx = EvalContext::new(); - ctx.set_request(original_request_metadata.clone_without_body()); - - for event in src_events { - event_receiver( - event, - &mut root_task.queue, - self.configuration.is_escaped_content, - &original_request_metadata, - dispatch_fragment_request, - &mut ctx, - )?; - } - - Self::process_root_task( - root_task, - output_writer, - dispatch_fragment_request, - process_fragment_response, - ) - } - - /// Process an ESI document from a [`Reader`], handling includes and directives + /// ## Key Features: + /// - Only fetches fragments that are actually needed (not those in unexecuted branches) + /// - Fully recursive nested try/except blocks + /// - Proper alt fallback and continue_on_error handling + /// - Full ESI specification compliance /// - /// Processes ESI directives while streaming content to the output writer. Handles: - /// - ESI includes with fragment fetching - /// - Variable substitution - /// - Conditional processing - /// - Try/except blocks + /// ## Note on Parsing: + /// The parser (nom-based) requires complete input for each parse operation. + /// We handle this by buffering input chunks until a successful parse, + /// then processing parsed elements immediately while retaining unparsed remainder. /// /// # Arguments - /// * `src_document` - Reader containing source XML/HTML with ESI markup - /// * `output_writer` - Writer to stream processed output to + /// * `src_stream` - BufRead source containing ESI markup (streams in chunks) + /// * `output_writer` - Writer to stream processed output to (writes immediately) /// * `dispatch_fragment_request` - Optional handler for fragment requests /// * `process_fragment_response` - Optional processor for fragment responses /// /// # Returns /// * `Result<()>` - Ok if processing completed successfully /// - /// # Example - /// ``` - /// use esi::{Reader, Writer, Processor, Configuration}; - /// use std::io::Cursor; - /// - /// let xml = r#""#; - /// let reader = Reader::from_str(xml); - /// let mut writer = Writer::new(Cursor::new(Vec::new())); - /// - /// let processor = Processor::new(None, Configuration::default()); - /// - /// // Define a simple fragment dispatcher - /// fn default_fragment_dispatcher(req: fastly::Request) -> esi::Result { - /// Ok(esi::PendingFragmentContent::CompletedRequest( - /// fastly::Response::from_body("Fragment content") - /// )) - /// } - /// processor.process_document( - /// reader, - /// &mut writer, - /// Some(&default_fragment_dispatcher), - /// None - /// )?; - /// # Ok::<(), esi::ExecutionError>(()) - /// ``` - /// /// # Errors /// Returns error if: - /// * ESI markup parsing fails - /// * Fragment requests fail - /// * Output writing fails - pub fn process_document( - self, - mut src_document: Reader, - output_writer: &mut Writer, + /// * ESI markup parsing fails or document is malformed + /// * Fragment requests fail (unless `continue_on_error` is set) + /// * Input reading or output writing fails + /// * Invalid UTF-8 encoding encountered + pub fn process_stream( + &mut self, + mut src_stream: impl BufRead, + output_writer: &mut impl Write, dispatch_fragment_request: Option<&FragmentRequestDispatcher>, process_fragment_response: Option<&FragmentResponseProcessor>, ) -> Result<()> { - // Set up fragment request dispatcher. Use what's provided or use a default - let dispatch_fragment_request = - dispatch_fragment_request.unwrap_or(&default_fragment_dispatcher); - - // If there is a source request to mimic, copy its metadata, otherwise use a default request. - let original_request_metadata = self.original_request_metadata.as_ref().map_or_else( - || Request::new(Method::GET, "http://localhost"), - Request::clone_without_body, - ); + // Set up fragment request dispatcher + let dispatcher = dispatch_fragment_request.unwrap_or(&default_fragment_dispatcher); + + // STREAMING INPUT PARSING: + // Read chunks, parse incrementally, process elements as we parse them + const CHUNK_SIZE: usize = 8192; // 8KB chunks + // Using BytesMut for zero-copy parsing + let mut buffer = BytesMut::with_capacity(CHUNK_SIZE); + let mut read_buf = vec![0u8; CHUNK_SIZE]; + let mut eof = false; + let mut iterations = 0; + const MAX_ITERATIONS: usize = 10000; + + loop { + iterations += 1; + if iterations > MAX_ITERATIONS { + return Err(ESIError::ExpressionError(format!( + "Infinite loop detected after {} iterations, buffer len: {}, eof: {}", + iterations, + buffer.len(), + eof + ))); + } + // Read more data if we haven't hit EOF yet + if !eof { + match src_stream.read(&mut read_buf) { + Ok(0) => { + // EOF reached - parser can now make final decisions + eof = true; + } + Ok(n) => { + // Append new data to buffer (zero-copy extend) + buffer.extend_from_slice(&read_buf[..n]); + } + Err(e) => { + return Err(ESIError::WriterError(e)); + } + } + } - // `root_task` is the root task that will be used to fetch tags in recursive manner - let root_task = &mut Task::new(); + // Freeze a view of the buffer for zero-copy parsing + // We clone here because freeze() consumes, but Bytes cloning is cheap (ref count) + let frozen = buffer.clone().freeze(); + + // Try to parse what we have in the buffer + // Use streaming parser unless we're at EOF, then use complete parser + let parse_result = if eof { + // At EOF - use complete parser which handles Incomplete by treating remainder as text + parser::parse_complete(&frozen) + } else { + // Still streaming - use streaming parser + parser::parse(&frozen) + }; + + match parse_result { + Ok((remaining, elements)) => { + // Successfully parsed some elements + for element in elements { + let _ = + self.process_element_streaming(element, output_writer, dispatcher)?; + // Note: breaks at top-level are ignored + // After each element, check if any queued includes are ready (non-blocking poll) + self.process_ready_queue_items( + output_writer, + dispatcher, + process_fragment_response, + )?; + } - // context for the interpreter - let mut ctx = EvalContext::new(); - ctx.set_request(original_request_metadata.clone_without_body()); - - // Call the library to parse fn `parse_tags` which will call the callback function - // on each tag / event it finds in the document. - // The callback function `handle_events` will handle the event. - parse_tags( - &self.configuration.namespace, - &mut src_document, - &mut |event| { - event_receiver( - event, - &mut root_task.queue, - self.configuration.is_escaped_content, - &original_request_metadata, - dispatch_fragment_request, - &mut ctx, - ) - }, - )?; + // Calculate how many bytes were consumed + let consumed = frozen.len() - remaining.len(); - Self::process_root_task( - root_task, - output_writer, - dispatch_fragment_request, - process_fragment_response, - ) - } + // Keep the unparsed remainder for next iteration + if remaining.is_empty() { + if eof { + // All done - parsed everything and reached EOF + break; + } + // Parsed everything in buffer, clear it and continue reading + buffer.clear(); + } else { + // Have unparsed remainder + if eof { + // At EOF with unparsed data - already handled by parse_complete_bytes + // which treats remainder as Text elements + break; + } + // Keep remainder for next chunk - advance past consumed bytes + buffer.advance(consumed); + } + } + Err(nom::Err::Incomplete(_)) => { + // Streaming parser needs more data + if eof { + // At EOF but parser wants more data - this shouldn't happen + // with parse_complete_bytes, but handle it just in case + if !buffer.is_empty() { + output_writer.write_all(&buffer)?; + } + break; + } + // Not at EOF - loop will read more data + } + Err(nom::Err::Error(e) | nom::Err::Failure(e)) => { + // Parse error + if eof { + // At EOF with parse error - this is a real error + return Err(ESIError::ExpressionError(format!("Parser error: {:?}", e))); + } + // Not at EOF - maybe more data will help, output what we have and continue + output_writer.write_all(&buffer)?; + buffer.clear(); + } + } + } - fn process_root_task( - root_task: &mut Task, - output_writer: &mut Writer, - dispatch_fragment_request: &FragmentRequestDispatcher, - process_fragment_response: Option<&FragmentResponseProcessor>, - ) -> Result<()> { - // set the root depth to 0 - let mut depth = 0; - - debug!("Elements to fetch: {:?}", root_task.queue); - - // Elements dependent on backend requests are queued up. - // The responses will need to be fetched and processed. - // Go over the list for any pending responses and write them to the client output stream. - fetch_elements( - &mut depth, - root_task, - output_writer, - dispatch_fragment_request, - process_fragment_response, - )?; + // DRAIN QUEUE: Wait for all remaining pending fragments (blocking waits) + self.drain_queue(output_writer, dispatcher, process_fragment_response)?; Ok(()) } -} -fn default_fragment_dispatcher(req: Request) -> Result { - debug!("no dispatch method configured, defaulting to hostname"); - let backend = req - .get_url() - .host() - .unwrap_or_else(|| panic!("no host in request: {}", req.get_url())) - .to_string(); - let pending_req = req.send_async(backend)?; - Ok(PendingFragmentContent::PendingRequest(pending_req)) -} + /// Process a single element in streaming mode + fn process_element_streaming( + &mut self, + element: parser_types::Element, + output_writer: &mut impl Write, + dispatcher: &FragmentRequestDispatcher, + ) -> Result { + use parser_types::{Element, Tag}; -// This function is responsible for fetching pending requests and writing their -// responses to the client output stream. It also handles any queued source -// content that needs to be written to the client output stream. -fn fetch_elements( - depth: &mut usize, - task: &mut Task, - output_writer: &mut Writer, - dispatch_fragment_request: &FragmentRequestDispatcher, - process_fragment_response: Option<&FragmentResponseProcessor>, -) -> Result { - while let Some(element) = task.queue.pop_front() { match element { - Element::Raw(raw) => { - process_raw(task, output_writer, &raw, *depth)?; + Element::Text(text) | Element::Html(text) => { + // Non-blocking content + if self.queue.is_empty() { + // Not blocked - write immediately + output_writer.write_all(&text)?; + } else { + // Blocked - queue it + self.queue.push_back(QueuedElement::Content(text)); + } + Ok(false) } - Element::Include(fragment) => { - let result = process_include( - task, - *fragment, - output_writer, - *depth, - dispatch_fragment_request, - process_fragment_response, - )?; - if let FetchState::Failed(_, _) = result { - return Ok(result); + + Element::Expr(expr) => { + // Evaluate and treat as non-blocking content + match expression::eval_expr(&expr, &mut self.ctx) { + Ok(val) if !matches!(val, expression::Value::Null) => { + let bytes = val.to_bytes(); + if !bytes.is_empty() { + if self.queue.is_empty() { + output_writer.write_all(&bytes)?; + } else { + self.queue.push_back(QueuedElement::Content(bytes)); + } + } + } + _ => {} // Skip null or error } + Ok(false) } - Element::Try { - mut attempt_task, - mut except_task, - } => { - *depth += 1; - process_try( - task, - output_writer, - &mut attempt_task, - &mut except_task, - depth, - dispatch_fragment_request, - process_fragment_response, - )?; - *depth -= 1; - if *depth == 0 { - debug!( - "Writing try result: {:?}", - String::from_utf8(task.output.get_mut().as_slice().to_vec()) - ); - output_handler(output_writer, task.output.get_mut().as_ref())?; - task.output.get_mut().clear(); + + Element::Esi(Tag::Assign { + name, + subscript, + value, + }) => { + // Non-blocking - just update context + let val = expression::eval_expr(&value, &mut self.ctx) + .unwrap_or(expression::Value::Text("".into())); + + if let Some(subscript_expr) = subscript { + // Subscript assignment: modify existing collection + // Evaluate the subscript expression to get the key/index + if let Ok(subscript_val) = expression::eval_expr(&subscript_expr, &mut self.ctx) + { + let key_str = subscript_val.to_string(); + self.ctx.set_variable(&name, Some(&key_str), val)?; + } + } else { + // Regular assignment without subscript + self.ctx.set_variable(&name, None, val)?; } + Ok(false) } - } - } - Ok(FetchState::Succeeded) -} -fn process_include( - task: &mut Task, - fragment: Fragment, - output_writer: &mut Writer, - depth: usize, - dispatch_fragment_request: &FragmentRequestDispatcher, - process_fragment_response: Option<&FragmentResponseProcessor>, -) -> Result { - // take the fragment and deconstruct it - let Fragment { - mut request, - alt, - continue_on_error, - pending_content, - } = fragment; - - // wait for `` request to complete - let resp = pending_content.wait_for_content()?; - - let processed_resp = if let Some(process_response) = process_fragment_response { - process_response(&mut request, resp)? - } else { - resp - }; + Element::Esi(Tag::Vars { name: Some(n) }) => { + // Non-blocking - just update context + self.ctx.set_match_name(&n); + Ok(false) + } - // Request has completed, check the status code. - if processed_resp.get_status().is_success() { - if depth == 0 && task.output.get_mut().is_empty() { - debug!("Include is not nested, writing content to the output stream"); - output_handler(output_writer, &processed_resp.into_body_bytes())?; - } else { - debug!("Include is nested, writing content to a buffer"); - task.output - .get_mut() - .extend_from_slice(&processed_resp.into_body_bytes()); - } + Element::Esi(Tag::Vars { name: None }) => { + // No-op when name is None + Ok(false) + } - Ok(FetchState::Succeeded) - } else { - // Response status is NOT success, either continue, fallback to an alt, or fail. - if let Some(request) = alt { - debug!("request poll DONE ERROR, trying alt"); - if let Some(fragment) = - send_fragment_request(request?, None, continue_on_error, dispatch_fragment_request)? - { - task.queue.push_front(Element::Include(Box::new(fragment))); - return Ok(FetchState::Pending); + Element::Esi(Tag::Include { + src, + alt, + continue_on_error, + params, + }) => { + // BLOCKING - dispatch and queue + let queued_element = + self.process_include_tag(src, alt, continue_on_error, params, dispatcher)?; + self.queue.push_back(queued_element); + Ok(false) } - debug!("guest returned None, continuing"); - return Ok(FetchState::Succeeded); - } else if continue_on_error { - debug!("request poll DONE ERROR, NO ALT, continuing"); - return Ok(FetchState::Succeeded); + + Element::Esi(Tag::Choose { + when_branches, + otherwise_events, + }) => { + // Evaluate condition and recursively process chosen branch + let mut chose_branch = false; + + for when_branch in when_branches { + if let Some(ref match_name) = when_branch.match_name { + self.ctx.set_match_name(match_name); + } + + match expression::eval_expr(&when_branch.test, &mut self.ctx) { + Ok(test_result) if test_result.to_bool() => { + // This branch matches - recursively process it + for elem in when_branch.content { + let break_encountered = self.process_element_streaming( + elem, + output_writer, + dispatcher, + )?; + if break_encountered { + return Ok(true); // Propagate break signal + } + } + chose_branch = true; + break; + } + _ => continue, + } + } + + // No when matched - process otherwise + if !chose_branch { + for elem in otherwise_events { + let break_encountered = + self.process_element_streaming(elem, output_writer, dispatcher)?; + if break_encountered { + return Ok(true); // Propagate break signal + } + } + } + Ok(false) + } + + Element::Esi(Tag::Try { + attempt_events, + except_events, + }) => { + // Process try/except with parallel dispatch: + // Dispatch all includes from all attempts, then add try block to queue + let mut attempt_queues = Vec::new(); + + for attempt in attempt_events { + let mut attempt_queue = Vec::new(); + + for elem in attempt { + // Process each element in the attempt, collecting queued items + match elem { + Element::Text(text) => { + attempt_queue.push(QueuedElement::Content(text)); + } + Element::Html(html) => { + attempt_queue.push(QueuedElement::Content(html)); + } + Element::Expr(expr) => { + match expression::eval_expr(&expr, &mut self.ctx) { + Ok(value) => { + if !matches!(value, expression::Value::Null) { + let bytes = value.to_bytes(); + if !bytes.is_empty() { + attempt_queue.push(QueuedElement::Content(bytes)); + } + } + } + Err(e) => { + debug!("Expression evaluation failed: {:?}", e); + } + } + } + Element::Esi(Tag::Include { + src, + alt, + continue_on_error, + params, + }) => { + // Dispatch the include and add to attempt queue + let queued_element = self.process_include_tag( + src, + alt, + continue_on_error, + params, + dispatcher, + )?; + attempt_queue.push(queued_element); + } + Element::Esi(Tag::Choose { + when_branches, + otherwise_events, + }) => { + // Evaluate and process chosen branch inline + let mut chose_branch = false; + for when_branch in when_branches { + if let Some(match_name) = &when_branch.match_name { + self.ctx.set_match_name(match_name); + } + let test_result = + expression::eval_expr(&when_branch.test, &mut self.ctx)?; + if test_result.to_bool() { + chose_branch = true; + for elem in when_branch.content { + self.process_element_streaming( + elem, + output_writer, + dispatcher, + )?; + // Note: breaks within try blocks don't propagate out + } + break; + } + } + if !chose_branch { + for elem in otherwise_events { + self.process_element_streaming( + elem, + output_writer, + dispatcher, + )?; + // Note: breaks within try blocks don't propagate out + } + } + } + Element::Esi(Tag::Try { .. }) => { + // Nested try blocks - process recursively + self.process_element_streaming( + elem.clone(), + output_writer, + dispatcher, + )?; + // Note: breaks within try blocks don't propagate out + } + _ => {} + } + } + + attempt_queues.push(attempt_queue); + } + + // Process except clause elements + let mut except_queue = Vec::new(); + for elem in except_events { + match elem { + Element::Text(text) => { + except_queue.push(QueuedElement::Content(text)); + } + Element::Html(html) => { + except_queue.push(QueuedElement::Content(html)); + } + Element::Expr(expr) => match expression::eval_expr(&expr, &mut self.ctx) { + Ok(value) => { + if !matches!(value, expression::Value::Null) { + let bytes = value.to_bytes(); + if !bytes.is_empty() { + except_queue.push(QueuedElement::Content(bytes)); + } + } + } + Err(e) => { + debug!("Expression evaluation failed: {:?}", e); + } + }, + Element::Esi(Tag::Include { + src, + alt, + continue_on_error, + params, + }) => { + // Dispatch the include and add to except queue + let queued_element = self.process_include_tag( + src, + alt, + continue_on_error, + params, + dispatcher, + )?; + except_queue.push(queued_element); + } + _ => {} + } + } + + // Add the try block to the queue with all attempts and except dispatched + self.queue.push_back(QueuedElement::Try { + attempt_elements: attempt_queues, + except_elements: except_queue, + }); + Ok(false) + } + + Element::Esi(Tag::Foreach { + collection, + item, + content, + }) => { + // Evaluate the collection expression + let collection_value = expression::eval_expr(&collection, &mut self.ctx) + .unwrap_or(expression::Value::Null); + + // Convert to a list if needed + let items = match &collection_value { + expression::Value::List(items) => items.clone(), + expression::Value::Dict(map) => { + // Convert dict entries to a list of 2-element lists [key, value] + map.iter() + .map(|(k, v)| { + expression::Value::List(vec![ + expression::Value::Text(k.clone().into()), + v.clone(), + ]) + }) + .collect() + } + expression::Value::Null => Vec::new(), + other => vec![other.clone()], // Treat single values as a list of one + }; + + // Default item variable name if not specified + let item_var = item.unwrap_or_else(|| "item".to_string()); + + // Iterate through items + 'foreach_loop: for item_value in items { + // Set the item variable + self.ctx.set_variable(&item_var, None, item_value)?; + + // Process content for this iteration + for elem in content.iter() { + let break_encountered = self.process_element_streaming( + elem.clone(), + output_writer, + dispatcher, + )?; + if break_encountered { + break 'foreach_loop; + } + } + } + Ok(false) + } + + Element::Esi(Tag::Break) => { + // Signal break to enclosing foreach + Ok(true) + } + + _ => Ok(false), // Other standalone tags shouldn't appear } + } + + /// Evaluate an Expr to a Bytes value for use in includes + /// Handles variable resolution, function calls, and string interpolation + fn evaluate_expr_to_bytes(&mut self, expr: &Expr) -> Result { + use crate::expression::eval_expr; + + // Evaluate the expression to get a Value + let result = eval_expr(expr, &mut self.ctx)?; - debug!("request poll DONE ERROR, NO ALT, failing"); - Ok(FetchState::Failed( - request, - processed_resp.get_status().into(), - )) + // Convert the Value to Bytes using the built-in to_bytes method + Ok(result.to_bytes()) } -} -// Helper function to write raw content to the client output stream. -// If the depth is 0 and no queue, the content is written directly to the client output stream. -// Otherwise, the content is written to the task's output buffer. -fn process_raw( - task: &mut Task, - output_writer: &mut Writer, - raw: &[u8], - depth: usize, -) -> Result<()> { - if depth == 0 && task.output.get_mut().is_empty() { - debug!("writing previously queued content"); - output_writer - .get_mut() - .write_all(raw) - .map_err(ExecutionError::WriterError)?; - output_writer.get_mut().flush()?; - } else { - trace!("-- Depth: {depth}"); - debug!( - "writing blocked content to a queue {:?} ", - String::from_utf8(raw.to_owned()) - ); - task.output.get_mut().extend_from_slice(raw); + /// Helper to evaluate Include expressions and dispatch the request + /// Returns a QueuedElement ready to be added to any queue (main/attempt/except) + fn process_include_tag( + &mut self, + src: Expr, + alt: Option, + continue_on_error: bool, + params: Vec<(String, Expr)>, + dispatcher: &FragmentRequestDispatcher, + ) -> Result { + // Evaluate src and alt expressions to get actual URLs + let src_bytes = self.evaluate_expr_to_bytes(&src)?; + let alt_bytes = alt + .as_ref() + .map(|e| self.evaluate_expr_to_bytes(e)) + .transpose()?; + + self.dispatch_include_to_element( + &src_bytes, + alt_bytes.as_ref(), + continue_on_error, + ¶ms, + dispatcher, + ) } - Ok(()) -} -// Helper function to handle the end of a tag -fn process_try( - task: &mut Task, - output_writer: &mut Writer, - attempt_task: &mut Task, - except_task: &mut Task, - depth: &mut usize, - dispatch_fragment_request: &FragmentRequestDispatcher, - process_fragment_response: Option<&FragmentResponseProcessor>, -) -> Result<()> { - let attempt_state = fetch_elements( - depth, - attempt_task, - output_writer, - dispatch_fragment_request, - process_fragment_response, - )?; - - let except_state = fetch_elements( - depth, - except_task, - output_writer, - dispatch_fragment_request, - process_fragment_response, - )?; - - trace!("*** Depth: {depth}"); - - match (attempt_state, except_state) { - (FetchState::Succeeded, _) => { - task.output - .get_mut() - .extend_from_slice(&std::mem::take(attempt_task).output.into_inner()); - } - (FetchState::Failed(_, _), FetchState::Succeeded) => { - task.output - .get_mut() - .extend_from_slice(&std::mem::take(except_task).output.into_inner()); - } - (FetchState::Failed(req, res), FetchState::Failed(_req, _res)) => { - // both tasks failed - return Err(ExecutionError::UnexpectedStatus( - req.get_url_str().to_string(), - res, - )); + /// Dispatch an include and return a QueuedElement (for flexible queue insertion) + /// This is the single source of truth for include dispatching logic + fn dispatch_include_to_element( + &mut self, + src: &Bytes, + alt: Option<&Bytes>, + continue_on_error: bool, + params: &[(String, Expr)], + dispatcher: &FragmentRequestDispatcher, + ) -> Result { + // Evaluate params and append to URL + let mut url = String::from_utf8_lossy(src).into_owned(); + if !params.is_empty() { + // Pre-allocate capacity: estimate ~20 chars per param (name + value + separators) + url.reserve(params.len() * 20); + let mut separator = if url.contains('?') { '&' } else { '?' }; + for (name, value_expr) in params { + let value = self.evaluate_expr_to_bytes(value_expr)?; + let value_str = String::from_utf8_lossy(&value); + // Direct string building is more efficient than format! + url.push(separator); + url.push_str(name); + url.push('='); + url.push_str(&value_str); + separator = '&'; + } } - (FetchState::Pending, _) | (FetchState::Failed(_, _), FetchState::Pending) => { - // Request are still pending, re-add it to the front of the queue and wait for the next poll. - task.queue.push_front(Element::Try { - attempt_task: Box::new(std::mem::take(attempt_task)), - except_task: Box::new(std::mem::take(except_task)), - }); + let final_src = Bytes::from(url); + + let req = build_fragment_request( + self.ctx.get_request().clone_without_body(), + &final_src, + self.configuration.is_escaped_content, + )?; + + match dispatcher(req.clone_without_body()) { + Ok(pending) => { + let fragment = Fragment { + request: req, + alt: alt.cloned(), + continue_on_error, + pending_content: pending, + }; + Ok(QueuedElement::Include(Box::new(fragment))) + } + Err(_) if continue_on_error => { + // Try alt or add error placeholder + if let Some(alt_src) = alt { + let alt_req = build_fragment_request( + self.ctx.get_request().clone_without_body(), + alt_src, + self.configuration.is_escaped_content, + )?; + + dispatcher(alt_req.clone_without_body()).map_or_else( + |_| { + Ok(QueuedElement::Content(Bytes::from_static( + b"", + ))) + }, + |alt_pending| { + let alt_fragment = Fragment { + request: alt_req, + alt: None, + continue_on_error, + pending_content: alt_pending, + }; + Ok(QueuedElement::Include(Box::new(alt_fragment))) + }, + ) + } else { + Ok(QueuedElement::Content(Bytes::from_static( + b"", + ))) + } + } + Err(e) => Err(ESIError::ExpressionError(format!( + "Fragment dispatch failed: {}", + e + ))), } } - Ok(()) -} -// Receives `Event` from the parser and process it. -// The result is pushed to a queue of elements or written to the output stream. -fn event_receiver( - event: Event, - queue: &mut VecDeque, - is_escaped: bool, - original_request_metadata: &Request, - dispatch_fragment_request: &FragmentRequestDispatcher, - ctx: &mut EvalContext, -) -> Result<()> { - match event { - Event::ESI(Tag::Include { - src, - alt, - continue_on_error, - }) => { - debug!("Handling tag with src: {src}"); - // Always interpolate src - let interpolated_src = try_evaluate_interpolated_string(&src, ctx)?; - - // Always interpolate alt if present - let interpolated_alt = alt - .map(|a| try_evaluate_interpolated_string(&a, ctx)) - .transpose()?; - let req = build_fragment_request( - original_request_metadata.clone_without_body(), - &interpolated_src, - is_escaped, - ); - let alt_req = interpolated_alt.map(|alt| { - build_fragment_request( - original_request_metadata.clone_without_body(), - &alt, - is_escaped, - ) - }); - if let Some(fragment) = - send_fragment_request(req?, alt_req, continue_on_error, dispatch_fragment_request)? - { - // add the pending request to the queue - queue.push_back(Element::Include(Box::new(fragment))); + /// Check ready queue items - non-blocking poll + /// Process any fragments that are already completed without blocking + fn process_ready_queue_items( + &mut self, + output_writer: &mut impl Write, + dispatcher: &FragmentRequestDispatcher, + processor: Option<&FragmentResponseProcessor>, + ) -> Result<()> { + // Process ready items from the front of the queue without blocking + loop { + // Check what's at the front + let should_try = match self.queue.front() { + Some(QueuedElement::Content(_)) => true, + Some(QueuedElement::Include(_)) => true, + Some(QueuedElement::Try { .. }) => false, // Skip try blocks + None => false, + }; + + if !should_try { + break; } - } - Event::ESI(Tag::Try { - attempt_events, - except_events, - }) => { - let attempt_task = task_handler( - attempt_events, - is_escaped, - original_request_metadata, - dispatch_fragment_request, - ctx, - )?; - let except_task = task_handler( - except_events, - is_escaped, - original_request_metadata, - dispatch_fragment_request, - ctx, - )?; - trace!( - "*** pushing try content to queue: Attempt - {:?}, Except - {:?}", - attempt_task.queue, - except_task.queue - ); - // push the elements - queue.push_back(Element::Try { - attempt_task: Box::new(attempt_task), - except_task: Box::new(except_task), - }); - } - Event::ESI(Tag::Assign { name, value }) => { - // TODO: the 'name' here might have a subfield, we need to parse it - let result = evaluate_expression(&value, ctx)?; - ctx.set_variable(&name, None, result); - } - Event::ESI(Tag::Vars { name }) => { - debug!("Handling tag with name: {name:?}"); - if let Some(name) = name { - let result = evaluate_expression(&name, ctx)?; - debug!("Evaluated result: {result:?}"); - queue.push_back(Element::Raw(result.to_string().into_bytes())); + // Pop and process the front element + let elem = self.queue.pop_front().unwrap(); + match elem { + QueuedElement::Content(content) => { + // Content is always ready + output_writer.write_all(&content)?; + } + QueuedElement::Include(mut fragment) => { + // Poll the fragment (non-blocking check) + let pending_content = std::mem::replace( + &mut fragment.pending_content, + PendingFragmentContent::NoContent, + ); + fragment.pending_content = pending_content.poll(); + + // Check if it's ready now + if fragment.pending_content.is_ready() { + // Process it! + self.process_include_from_queue( + *fragment, + output_writer, + dispatcher, + processor, + )?; + } else { + // Still pending - put it back at the front and stop + self.queue.push_front(QueuedElement::Include(fragment)); + break; + } + } + QueuedElement::Try { .. } => { + unreachable!("Try blocks should be skipped in ready check"); + } } } - Event::ESI(Tag::When { .. }) => unreachable!(), - Event::ESI(Tag::Choose { - when_branches, - otherwise_events, - }) => { - let mut chose_branch = false; - for (when, events) in when_branches { - if let Tag::When { test, match_name } = when { - if let Some(match_name) = match_name { - ctx.set_match_name(&match_name); + Ok(()) + } + + /// Drain queue with efficient waiting using `select()` + /// Uses `select()` to process whichever pending request completes first + fn drain_queue( + &mut self, + output_writer: &mut impl Write, + dispatcher: &FragmentRequestDispatcher, + processor: Option<&FragmentResponseProcessor>, + ) -> Result<()> { + while !self.queue.is_empty() { + // First, write out any content that's at the front + while let Some(QueuedElement::Content(_)) = self.queue.front() { + if let Some(QueuedElement::Content(bytes)) = self.queue.pop_front() { + output_writer.write_all(&bytes)?; + } + } + + if self.queue.is_empty() { + break; + } + + // Collect all pending includes from the queue + let mut pending_fragments: Vec<(usize, Box)> = Vec::new(); + let mut temp_queue: VecDeque = VecDeque::new(); + + for (idx, elem) in self.queue.drain(..).enumerate() { + match elem { + QueuedElement::Include(fragment) => { + if matches!( + fragment.pending_content, + PendingFragmentContent::PendingRequest(_) + ) { + pending_fragments.push((idx, fragment)); + } else { + // Already ready - process immediately + temp_queue.push_back(QueuedElement::Include(fragment)); + } } - let result = evaluate_expression(&test, ctx)?; - if result.to_bool() { - chose_branch = true; - for event in events { - event_receiver( - event, - queue, - is_escaped, - original_request_metadata, - dispatch_fragment_request, - ctx, + other => temp_queue.push_back(other), + } + } + + // Restore the queue with non-pending items + self.queue = temp_queue; + + if pending_fragments.is_empty() { + // Process remaining non-pending items + if let Some(elem) = self.queue.pop_front() { + match elem { + QueuedElement::Include(fragment) => { + self.process_include_from_queue( + *fragment, + output_writer, + dispatcher, + processor, )?; } - break; + QueuedElement::Try { + attempt_elements, + except_elements, + } => { + // Process try block: try each attempt, use except if all fail + self.process_try_block( + attempt_elements, + except_elements, + output_writer, + dispatcher, + processor, + )?; + } + QueuedElement::Content(_) => { + unreachable!("Content should have been processed above"); + } } - } else { - unreachable!() } + continue; } - if !chose_branch { - for event in otherwise_events { - event_receiver( - event, - queue, - is_escaped, - original_request_metadata, - dispatch_fragment_request, - ctx, - )?; + // Extract PendingRequests for select() + let mut pending_reqs: Vec = Vec::new(); + let mut fragments_by_request: Vec<(usize, Box)> = Vec::new(); + + for (idx, mut fragment) in pending_fragments { + if let PendingFragmentContent::PendingRequest(pending_req) = std::mem::replace( + &mut fragment.pending_content, + PendingFragmentContent::NoContent, + ) { + pending_reqs.push(*pending_req); + fragments_by_request.push((idx, fragment)); } } + + if pending_reqs.is_empty() { + continue; + } + + // Wait for any one to complete using select + let (result, remaining) = fastly::http::request::select(pending_reqs); + + // The completed request is the one that's NOT in remaining + let completed_idx = fragments_by_request.len() - remaining.len() - 1; + let (_original_idx, mut completed_fragment) = + fragments_by_request.remove(completed_idx); + + // Update the completed fragment with the result + completed_fragment.pending_content = result.map_or_else( + |_| PendingFragmentContent::NoContent, + |resp| PendingFragmentContent::CompletedRequest(Box::new(resp)), + ); + + // Put remaining fragments back in queue (with their pending requests restored) + for (pending_req, (_idx, mut fragment)) in + remaining.into_iter().zip(fragments_by_request) + { + fragment.pending_content = + PendingFragmentContent::PendingRequest(Box::new(pending_req)); + self.queue.push_back(QueuedElement::Include(fragment)); + } + + // Process the completed fragment + self.process_include_from_queue( + *completed_fragment, + output_writer, + dispatcher, + processor, + )?; } - Event::InterpolatedContent(event) => { - debug!("Handling interpolated content: {event:?}"); - let event_str = String::from_utf8(event.iter().copied().collect()).unwrap_or_default(); + Ok(()) + } - process_interpolated_chars(&event_str, ctx, |segment| { - queue.push_back(Element::Raw(segment.into_bytes())); - Ok(()) - })?; + /// Process a try block recursively, handling nested try blocks naturally + fn process_try_block( + &mut self, + attempt_elements: Vec>, + except_elements: Vec, + output_writer: &mut impl Write, + dispatcher: &FragmentRequestDispatcher, + processor: Option<&FragmentResponseProcessor>, + ) -> Result<()> { + let mut succeeded = false; + + // Try each attempt in order + for attempt in attempt_elements { + match self.process_queued_elements(attempt, dispatcher, processor) { + Ok(buffer) => { + // This attempt succeeded - write it out + output_writer.write_all(&buffer)?; + succeeded = true; + break; + } + Err(_) => { + // This attempt failed - try the next one + continue; + } + } } - Event::Content(event) => { - debug!("pushing content to buffer, len: {}", queue.len()); - let mut buf = vec![]; - let mut writer = Writer::new(&mut buf); - writer.write_event(event)?; - queue.push_back(Element::Raw(buf)); + + // If all attempts failed, process except clause + if !succeeded { + let except_buffer = + self.process_queued_elements(except_elements, dispatcher, processor)?; + output_writer.write_all(&except_buffer)?; } + + Ok(()) } - Ok(()) -} -// Helper function to process a list of events and return a task. -// It's called from `event_receiver` and calls `event_receiver` to process each event in recursion. -fn task_handler( - events: Vec, - is_escaped: bool, - original_request_metadata: &Request, - dispatch_fragment_request: &FragmentRequestDispatcher, - ctx: &mut EvalContext, -) -> Result { - let mut task = Task::new(); - for event in events { - event_receiver( - event, - &mut task.queue, - is_escaped, - original_request_metadata, - dispatch_fragment_request, - ctx, - )?; + /// Process a list of queued elements recursively, returning the output buffer + /// This naturally handles nested try blocks through recursion + fn process_queued_elements( + &mut self, + elements: Vec, + dispatcher: &FragmentRequestDispatcher, + processor: Option<&FragmentResponseProcessor>, + ) -> Result> { + let mut buffer = Vec::new(); + + for elem in elements { + match elem { + QueuedElement::Content(bytes) => { + buffer.write_all(&bytes)?; + } + QueuedElement::Include(fragment) => { + self.process_include_from_queue(*fragment, &mut buffer, dispatcher, processor)?; + } + QueuedElement::Try { + attempt_elements, + except_elements, + } => { + // Recursively process nested try block + self.process_try_block( + attempt_elements, + except_elements, + &mut buffer, + dispatcher, + processor, + )?; + } + } + } + + Ok(buffer) + } + + /// Process an include from the queue (wait and write, handle alt) + fn process_include_from_queue( + &self, + fragment: Fragment, + output_writer: &mut impl Write, + dispatcher: &FragmentRequestDispatcher, + processor: Option<&FragmentResponseProcessor>, + ) -> Result<()> { + let continue_on_error = fragment.continue_on_error; + + // Wait for response + let response = fragment.pending_content.wait()?; + + // Apply processor if provided + let mut req_for_processor = fragment.request.clone_without_body(); + let final_response = if let Some(proc) = processor { + proc(&mut req_for_processor, response)? + } else { + response + }; + + // Check if successful + if final_response.get_status().is_success() { + let body_bytes = final_response.into_body_bytes(); + // Write Bytes directly - no UTF-8 conversion needed! + output_writer.write_all(&body_bytes)?; + Ok(()) + } else if let Some(alt_src) = fragment.alt { + // Try alt + debug!("Main request failed, trying alt"); + let alt_req = build_fragment_request( + self.ctx.get_request().clone_without_body(), + &alt_src, + self.configuration.is_escaped_content, + )?; + + match dispatcher(alt_req.clone_without_body()) { + Ok(alt_pending) => { + let alt_response = alt_pending.wait()?; + let mut alt_req_for_proc = alt_req.clone_without_body(); + let final_alt = if let Some(proc) = processor { + proc(&mut alt_req_for_proc, alt_response)? + } else { + alt_response + }; + + let body_bytes = final_alt.into_body_bytes(); + // Write Bytes directly - no UTF-8 conversion needed! + output_writer.write_all(&body_bytes)?; + Ok(()) + } + Err(_) if continue_on_error => { + output_writer.write_all(b"")?; + Ok(()) + } + Err(_) => Err(ESIError::ExpressionError( + "Both main and alt failed".to_string(), + )), + } + } else if continue_on_error { + output_writer.write_all(b"")?; + Ok(()) + } else { + Err(ESIError::ExpressionError(format!( + "Fragment request failed with status: {}", + final_response.get_status() + ))) + } } - Ok(task) +} + +// Default fragment request dispatcher that uses the request's hostname as backend +fn default_fragment_dispatcher(req: Request) -> Result { + debug!("no dispatch method configured, defaulting to hostname"); + let backend = req + .get_url() + .host() + .unwrap_or_else(|| panic!("no host in request: {}", req.get_url())) + .to_string(); + let pending_req = req.send_async(backend)?; + Ok(PendingFragmentContent::PendingRequest(Box::new( + pending_req, + ))) } // Helper function to build a fragment request from a URL // For HTML content the URL is unescaped if it's escaped (default). // It can be disabled in the processor configuration for a non-HTML content. -fn build_fragment_request(mut request: Request, url: &str, is_escaped: bool) -> Result { +fn build_fragment_request(mut request: Request, url: &Bytes, is_escaped: bool) -> Result { + // Convert Bytes to str for URL parsing + let url_str = std::str::from_utf8(url) + .map_err(|_| ExecutionError::ExpressionError("Invalid UTF-8 in URL".to_string()))?; + let escaped_url = if is_escaped { - match quick_xml::escape::unescape(url) { - Ok(url) => url.to_string(), - Err(err) => { - return Err(ExecutionError::InvalidRequestUrl(err.to_string())); - } - } + html_escape::decode_html_entities(url_str).into_owned() } else { - url.to_string() + url_str.to_string() }; if escaped_url.starts_with('/') { @@ -828,119 +1322,4 @@ fn build_fragment_request(mut request: Request, url: &str, is_escaped: bool) -> Ok(request) } -fn send_fragment_request( - req: Request, - alt: Option>, - continue_on_error: bool, - dispatch_request: &FragmentRequestDispatcher, -) -> Result> { - debug!("Requesting ESI fragment: {}", req.get_url()); - - let request = req.clone_without_body(); - - let pending_content: PendingFragmentContent = dispatch_request(req)?; - - Ok(Some(Fragment { - request, - alt, - continue_on_error, - pending_content, - })) -} - -// Helper function to create an XML reader from a body. -fn reader_from_body(body: Body) -> Reader { - let mut reader = Reader::from_reader(body); - - // TODO: make this configurable - let config = reader.config_mut(); - config.check_end_names = false; - - reader -} - -// helper function to drive output to a response stream -fn output_handler(output_writer: &mut Writer, buffer: &[u8]) -> Result<()> { - output_writer.get_mut().write_all(buffer)?; - output_writer.get_mut().flush()?; - Ok(()) -} - -/// Processes a string containing interpolated expressions using a character-based approach -/// -/// This function evaluates expressions like $(`HTTP_HOST``) in text content and -/// provides the processed segments to the caller through a callback function. -/// -/// # Arguments -/// * `input` - The input string containing potential interpolated expressions -/// * `ctx` - Evaluation context containing variables and state -/// * `segment_handler` - A function that handles each segment (raw text or evaluated expression) -/// -/// # Returns -/// * `Result<()>` - Success or error during processing -/// -pub fn process_interpolated_chars( - input: &str, - ctx: &mut EvalContext, - mut segment_handler: F, -) -> Result<()> -where - F: FnMut(String) -> Result<()>, -{ - let mut buf = vec![]; - let mut cur = input.chars().peekable(); - - while let Some(c) = cur.peek() { - if *c == '$' { - let mut new_cur = cur.clone(); - - if let Some(value) = try_evaluate_interpolated(&mut new_cur, ctx) { - // If we have accumulated text, output it first - if !buf.is_empty() { - segment_handler(buf.into_iter().collect())?; - buf = vec![]; - } - - // Output the evaluated expression result - segment_handler(value.to_string())?; - } - // Update our position - cur = new_cur; - } else { - buf.push(cur.next().unwrap()); - } - } - - // Output any remaining text - if !buf.is_empty() { - segment_handler(buf.into_iter().collect())?; - } - - Ok(()) -} - -/// Evaluates all interpolated expressions in a string and returns the complete result -/// -/// This is a convenience wrapper around `process_interpolated_chars` that collects -/// all output into a single string. -/// -/// # Arguments -/// * `input` - The input string containing potential interpolated expressions -/// * `ctx` - Evaluation context containing variables and state -/// -/// # Returns -/// * `Result` - The fully processed string with all expressions evaluated -/// -/// # Errors -/// Returns error if expression evaluation fails -/// -pub fn try_evaluate_interpolated_string(input: &str, ctx: &mut EvalContext) -> Result { - let mut result = String::new(); - - process_interpolated_chars(input, ctx, |segment| { - result.push_str(&segment); - Ok(()) - })?; - - Ok(result) -} +// Helper Functions diff --git a/esi/src/parse.rs b/esi/src/parse.rs deleted file mode 100644 index 03dc5bf..0000000 --- a/esi/src/parse.rs +++ /dev/null @@ -1,648 +0,0 @@ -use crate::{ExecutionError, Result}; -use log::debug; -use quick_xml::events::{BytesStart, Event as XmlEvent}; -use quick_xml::name::QName; -use quick_xml::Reader; -use std::io::BufRead; -use std::ops::Deref; - -// State carrier of Try branch -#[derive(Debug, PartialEq)] -enum TryTagArms { - Try, - Attempt, - Except, -} - -/// Representation of an ESI tag from a source response. -#[derive(Debug)] -pub struct Include { - pub src: String, - pub alt: Option, - pub continue_on_error: bool, -} - -/// Represents a tag in the ESI parsing process. -#[derive(Debug)] -pub enum Tag<'a> { - Include { - src: String, - alt: Option, - continue_on_error: bool, - }, - Try { - attempt_events: Vec>, - except_events: Vec>, - }, - Assign { - name: String, - value: String, - }, - Vars { - name: Option, - }, - When { - test: String, - match_name: Option, - }, - Choose { - when_branches: Vec<(Tag<'a>, Vec>)>, - otherwise_events: Vec>, - }, -} - -/// Representation of either XML data or a parsed ESI tag. -#[derive(Debug)] -#[allow(clippy::upper_case_acronyms)] -pub enum Event<'e> { - Content(XmlEvent<'e>), - InterpolatedContent(XmlEvent<'e>), - ESI(Tag<'e>), -} - -// #[derive(Debug)] -struct TagNames { - include: Vec, - comment: Vec, - remove: Vec, - r#try: Vec, - attempt: Vec, - except: Vec, - assign: Vec, - vars: Vec, - choose: Vec, - when: Vec, - otherwise: Vec, -} -impl TagNames { - fn init(namespace: &str) -> Self { - Self { - include: format!("{namespace}:include",).into_bytes(), - comment: format!("{namespace}:comment",).into_bytes(), - remove: format!("{namespace}:remove",).into_bytes(), - r#try: format!("{namespace}:try",).into_bytes(), - attempt: format!("{namespace}:attempt",).into_bytes(), - except: format!("{namespace}:except",).into_bytes(), - assign: format!("{namespace}:assign",).into_bytes(), - vars: format!("{namespace}:vars",).into_bytes(), - choose: format!("{namespace}:choose",).into_bytes(), - when: format!("{namespace}:when",).into_bytes(), - otherwise: format!("{namespace}:otherwise",).into_bytes(), - } - } -} - -#[derive(Debug, PartialEq)] -enum ContentType { - Normal, - Interpolated, -} - -fn do_parse<'a, R>( - reader: &mut Reader, - callback: &mut dyn FnMut(Event<'a>) -> Result<()>, - task: &mut Vec>, - use_queue: bool, - try_depth: &mut usize, - choose_depth: &mut usize, - current_arm: &mut Option, - tag: &TagNames, - content_type: &ContentType, -) -> Result<()> -where - R: BufRead, -{ - let mut is_remove_tag = false; - let mut open_include = false; - let mut open_assign = false; - let mut open_vars = false; - - let attempt_events = &mut Vec::new(); - let except_events = &mut Vec::new(); - - // choose/when variables - let when_branches = &mut Vec::new(); - let otherwise_events = &mut Vec::new(); - - let mut buffer = Vec::new(); - - // When you are in the top level of a try or choose block, the - // only allowable tags are attempt/except or when/otherwise. All - // other data should be eaten. - let mut in_try = false; - let mut in_choose = false; - - // Parse tags and build events vec - loop { - match reader.read_event_into(&mut buffer) { - // Handle tags - Ok(XmlEvent::Start(e)) if e.name() == QName(&tag.remove) => { - is_remove_tag = true; - } - - Ok(XmlEvent::End(e)) if e.name() == QName(&tag.remove) => { - if !is_remove_tag { - return unexpected_closing_tag_error(&e); - } - - is_remove_tag = false; - } - _ if is_remove_tag => continue, - - // Handle tags, and ignore the contents if they are not self-closing - Ok(XmlEvent::Empty(e)) if e.name().into_inner().starts_with(&tag.include) => { - include_tag_handler(&e, callback, task, use_queue)?; - } - - Ok(XmlEvent::Start(e)) if e.name().into_inner().starts_with(&tag.include) => { - open_include = true; - include_tag_handler(&e, callback, task, use_queue)?; - } - - Ok(XmlEvent::End(e)) if e.name().into_inner().starts_with(&tag.include) => { - if !open_include { - return unexpected_closing_tag_error(&e); - } - - open_include = false; - } - - _ if open_include => continue, - - // Ignore tags - Ok(XmlEvent::Empty(e)) if e.name().into_inner().starts_with(&tag.comment) => continue, - - // Handle tags - Ok(XmlEvent::Start(ref e)) if e.name() == QName(&tag.r#try) => { - *current_arm = Some(TryTagArms::Try); - *try_depth += 1; - in_try = true; - continue; - } - - // Handle and tags in recursion - Ok(XmlEvent::Start(ref e)) - if e.name() == QName(&tag.attempt) || e.name() == QName(&tag.except) => - { - if *current_arm != Some(TryTagArms::Try) { - return unexpected_opening_tag_error(e); - } - if e.name() == QName(&tag.attempt) { - *current_arm = Some(TryTagArms::Attempt); - do_parse( - reader, - callback, - attempt_events, - true, - try_depth, - choose_depth, - current_arm, - tag, - &ContentType::Interpolated, - )?; - } else if e.name() == QName(&tag.except) { - *current_arm = Some(TryTagArms::Except); - do_parse( - reader, - callback, - except_events, - true, - try_depth, - choose_depth, - current_arm, - tag, - &ContentType::Interpolated, - )?; - } - } - - Ok(XmlEvent::End(ref e)) if e.name() == QName(&tag.r#try) => { - *current_arm = None; - in_try = false; - - if *try_depth == 0 { - return unexpected_closing_tag_error(e); - } - try_end_handler(use_queue, task, attempt_events, except_events, callback)?; - *try_depth -= 1; - continue; - } - - Ok(XmlEvent::End(ref e)) - if e.name() == QName(&tag.attempt) || e.name() == QName(&tag.except) => - { - *current_arm = Some(TryTagArms::Try); - if *try_depth == 0 { - return unexpected_closing_tag_error(e); - } - return Ok(()); - } - - // Handle tags, and ignore the contents if they are not self-closing - // TODO: assign tags have a long form where the contents are interpolated and assigned to the variable - Ok(XmlEvent::Empty(e)) if e.name().into_inner().starts_with(&tag.assign) => { - assign_tag_handler(&e, callback, task, use_queue)?; - } - - Ok(XmlEvent::Start(e)) if e.name().into_inner().starts_with(&tag.assign) => { - open_assign = true; - assign_tag_handler(&e, callback, task, use_queue)?; - } - - Ok(XmlEvent::End(e)) if e.name().into_inner().starts_with(&tag.assign) => { - if !open_assign { - return unexpected_closing_tag_error(&e); - } - - open_assign = false; - } - - // Handle tags - Ok(XmlEvent::Empty(e)) if e.name().into_inner().starts_with(&tag.vars) => { - vars_tag_handler(&e, callback, task, use_queue)?; - } - - Ok(XmlEvent::Start(e)) if e.name().into_inner().starts_with(&tag.vars) => { - open_vars = true; - vars_tag_handler(&e, callback, task, use_queue)?; - } - - Ok(XmlEvent::End(e)) if e.name().into_inner().starts_with(&tag.vars) => { - if !open_vars { - return unexpected_closing_tag_error(&e); - } - - open_vars = false; - } - - // when/choose - Ok(XmlEvent::Start(ref e)) if e.name() == QName(&tag.choose) => { - in_choose = true; - *choose_depth += 1; - } - Ok(XmlEvent::End(ref e)) if e.name() == QName(&tag.choose) => { - in_choose = false; - *choose_depth -= 1; - choose_tag_handler(when_branches, otherwise_events, callback, task, use_queue)?; - } - - Ok(XmlEvent::Start(ref e)) if e.name() == QName(&tag.when) => { - if *choose_depth == 0 { - // invalid when tag outside of choose - return unexpected_opening_tag_error(e); - } - - let when_tag = parse_when(e)?; - let mut when_events = Vec::new(); - do_parse( - reader, - callback, - &mut when_events, - true, - try_depth, - choose_depth, - current_arm, - tag, - &ContentType::Interpolated, - )?; - when_branches.push((when_tag, when_events)); - } - Ok(XmlEvent::End(e)) if e.name() == QName(&tag.when) => { - if *choose_depth == 0 { - return unexpected_closing_tag_error(&e); - } - - return Ok(()); - } - - Ok(XmlEvent::Start(ref e)) if e.name() == QName(&tag.otherwise) => { - if *choose_depth == 0 { - return unexpected_opening_tag_error(e); - } - do_parse( - reader, - callback, - otherwise_events, - true, - try_depth, - choose_depth, - current_arm, - tag, - &ContentType::Interpolated, - )?; - } - Ok(XmlEvent::End(e)) if e.name() == QName(&tag.otherwise) => { - if *choose_depth == 0 { - return unexpected_closing_tag_error(&e); - } - return Ok(()); - } - - Ok(XmlEvent::Eof) => { - debug!("End of document"); - break; - } - Ok(e) => { - if in_try || in_choose { - continue; - } - - let event = if open_vars || content_type == &ContentType::Interpolated { - Event::InterpolatedContent(e.into_owned()) - } else { - Event::Content(e.into_owned()) - }; - if use_queue { - task.push(event); - } else { - callback(event)?; - } - } - _ => {} - } - } - Ok(()) -} - -/// Parses an XML/HTML document looking for ESI tags in the specified namespace -/// -/// This function reads from a buffered reader source and processes XML/HTML events, -/// calling the provided callback for each event that matches an ESI tag. -/// -/// # Arguments -/// * `namespace` - The XML namespace to use for ESI tags (e.g. "esi") -/// * `reader` - Buffered reader containing the XML/HTML document to parse -/// * `callback` - Function called for each matching ESI tag event -/// -/// # Returns -/// * `Result<()>` - Ok if parsing completed successfully, or Error if parsing failed -/// -/// # Example -/// ``` -/// use esi::{Reader, parse_tags}; -/// -/// let xml = r#""#; -/// let mut reader = Reader::from_str(xml); -/// let mut callback = |event| { Ok(()) }; -/// parse_tags("esi", &mut reader, &mut callback)?; -/// -/// # Ok::<(), esi::ExecutionError>(()) -/// ``` -/// # Errors -/// Returns an `ExecutionError` if there is an error reading or parsing the document. -pub fn parse_tags<'a, R>( - namespace: &str, - reader: &mut Reader, - callback: &mut dyn FnMut(Event<'a>) -> Result<()>, -) -> Result<()> -where - R: BufRead, -{ - debug!("Parsing document..."); - - // Initialize the ESI tags - let tags = TagNames::init(namespace); - // set the initial depth of nested tags - let mut try_depth = 0; - let mut choose_depth = 0; - let mut root = Vec::new(); - - let mut current_arm: Option = None; - - do_parse( - reader, - callback, - &mut root, - false, - &mut try_depth, - &mut choose_depth, - &mut current_arm, - &tags, - &ContentType::Normal, - )?; - debug!("Root: {root:?}"); - - Ok(()) -} - -fn parse_include<'a>(elem: &BytesStart) -> Result> { - let src = match elem - .attributes() - .flatten() - .find(|attr| attr.key.into_inner() == b"src") - { - Some(attr) => String::from_utf8(attr.value.to_vec()).unwrap(), - None => { - return Err(ExecutionError::MissingRequiredParameter( - String::from_utf8(elem.name().into_inner().to_vec()).unwrap(), - "src".to_string(), - )); - } - }; - - let alt = elem - .attributes() - .flatten() - .find(|attr| attr.key.into_inner() == b"alt") - .map(|attr| String::from_utf8(attr.value.to_vec()).unwrap()); - - let continue_on_error = elem - .attributes() - .flatten() - .find(|attr| attr.key.into_inner() == b"onerror") - .is_some_and(|attr| &attr.value.to_vec() == b"continue"); - - Ok(Tag::Include { - src, - alt, - continue_on_error, - }) -} - -fn parse_assign<'a>(elem: &BytesStart) -> Result> { - let name = match elem - .attributes() - .flatten() - .find(|attr| attr.key.into_inner() == b"name") - { - Some(attr) => String::from_utf8(attr.value.to_vec()).unwrap(), - None => { - return Err(ExecutionError::MissingRequiredParameter( - String::from_utf8(elem.name().into_inner().to_vec()).unwrap(), - "name".to_string(), - )); - } - }; - - let value = match elem - .attributes() - .flatten() - .find(|attr| attr.key.into_inner() == b"value") - { - Some(attr) => String::from_utf8(attr.value.to_vec()).unwrap(), - None => { - return Err(ExecutionError::MissingRequiredParameter( - String::from_utf8(elem.name().into_inner().to_vec()).unwrap(), - "value".to_string(), - )); - } - }; - - Ok(Tag::Assign { name, value }) -} - -fn parse_vars<'a>(elem: &BytesStart) -> Result> { - let name = elem - .attributes() - .flatten() - .find(|attr| attr.key.into_inner() == b"name") - .map(|attr| String::from_utf8(attr.value.to_vec()).unwrap()); - - Ok(Tag::Vars { name }) -} - -fn parse_when<'a>(elem: &BytesStart) -> Result> { - let test = match elem - .attributes() - .flatten() - .find(|attr| attr.key.into_inner() == b"test") - { - Some(attr) => String::from_utf8(attr.value.to_vec()).unwrap(), - None => { - return Err(ExecutionError::MissingRequiredParameter( - String::from_utf8(elem.name().into_inner().to_vec()).unwrap(), - "test".to_string(), - )); - } - }; - - let match_name = elem - .attributes() - .flatten() - .find(|attr| attr.key.into_inner() == b"matchname") - .map(|attr| String::from_utf8(attr.value.to_vec()).unwrap()); - - Ok(Tag::When { test, match_name }) -} - -// Helper function to handle the end of a tag -// If the depth is 1, the `callback` closure is called with the `Tag::Try` event -// Otherwise, a new `Tag::Try` event is pushed to the `task` vector -fn try_end_handler<'a>( - use_queue: bool, - task: &mut Vec>, - attempt_events: &mut Vec>, - except_events: &mut Vec>, - callback: &mut dyn FnMut(Event<'a>) -> Result<()>, -) -> Result<()> { - if use_queue { - task.push(Event::ESI(Tag::Try { - attempt_events: std::mem::take(attempt_events), - except_events: std::mem::take(except_events), - })); - } else { - callback(Event::ESI(Tag::Try { - attempt_events: std::mem::take(attempt_events), - except_events: std::mem::take(except_events), - }))?; - } - - Ok(()) -} - -// Helper function to handle tags -// If the depth is 0, the `callback` closure is called with the `Tag::Include` event -// Otherwise, a new `Tag::Include` event is pushed to the `task` vector -fn include_tag_handler<'e>( - elem: &BytesStart, - callback: &mut dyn FnMut(Event<'e>) -> Result<()>, - task: &mut Vec>, - use_queue: bool, -) -> Result<()> { - if use_queue { - task.push(Event::ESI(parse_include(elem)?)); - } else { - callback(Event::ESI(parse_include(elem)?))?; - } - - Ok(()) -} - -// Helper function to handle tags -// If the depth is 0, the `callback` closure is called with the `Tag::Assign` event -// Otherwise, a new `Tag::Assign` event is pushed to the `task` vector -fn assign_tag_handler<'e>( - elem: &BytesStart, - callback: &mut dyn FnMut(Event<'e>) -> Result<()>, - task: &mut Vec>, - use_queue: bool, -) -> Result<()> { - if use_queue { - task.push(Event::ESI(parse_assign(elem)?)); - } else { - callback(Event::ESI(parse_assign(elem)?))?; - } - - Ok(()) -} - -// Helper function to handle tags -// If the depth is 0, the `callback` closure is called with the `Tag::Assign` event -// Otherwise, a new `Tag::Vars` event is pushed to the `task` vector -fn vars_tag_handler<'e>( - elem: &BytesStart, - callback: &mut dyn FnMut(Event<'e>) -> Result<()>, - task: &mut Vec>, - use_queue: bool, -) -> Result<()> { - debug!("Handling tag"); - let tag = parse_vars(elem)?; - debug!("Parsed tag: {tag:?}"); - if use_queue { - task.push(Event::ESI(parse_vars(elem)?)); - } else { - callback(Event::ESI(parse_vars(elem)?))?; - } - - Ok(()) -} - -fn choose_tag_handler<'a>( - when_branches: &mut Vec<(Tag<'a>, Vec>)>, - otherwise_events: &mut Vec>, - callback: &mut dyn FnMut(Event<'a>) -> Result<()>, - task: &mut Vec>, - use_queue: bool, -) -> Result<()> { - let choose_tag = Tag::Choose { - when_branches: std::mem::take(when_branches), - otherwise_events: std::mem::take(otherwise_events), - }; - if use_queue { - task.push(Event::ESI(choose_tag)); - } else { - callback(Event::ESI(choose_tag))?; - } - - Ok(()) -} - -// Helper function return UnexpectedClosingTag error -fn unexpected_closing_tag_error(e: &T) -> Result<()> -where - T: Deref, -{ - Err(ExecutionError::UnexpectedClosingTag( - String::from_utf8_lossy(e).to_string(), - )) -} - -// Helper function return UnexpectedClosingTag error -fn unexpected_opening_tag_error(e: &T) -> Result<()> -where - T: Deref, -{ - Err(ExecutionError::UnexpectedOpeningTag( - String::from_utf8_lossy(e).to_string(), - )) -} diff --git a/esi/src/parser.rs b/esi/src/parser.rs new file mode 100644 index 0000000..2937216 --- /dev/null +++ b/esi/src/parser.rs @@ -0,0 +1,2382 @@ +use bytes::Bytes; +// Using STREAMING parsers for document structure - they return Incomplete when they need more data +// This enables TRUE bounded-memory streaming for the main document parsing +use nom::bytes::streaming as streaming_bytes; +use nom::character::streaming as streaming_char; +// Using COMPLETE parsers for expression parsing - expressions are always complete +// (they come from attribute values which are fully extracted before parsing) +use nom::bytes::complete::{tag, take_until, take_while, take_while1}; +use nom::character::complete::multispace0; + +use nom::branch::alt; +use nom::combinator::{map, map_res, not, opt, peek, recognize}; +use nom::error::Error; +use nom::multi::{fold_many0, many0, separated_list0}; +use nom::sequence::{delimited, preceded, separated_pair, terminated, tuple}; +use nom::IResult; +use std::collections::HashMap; + +use crate::parser_types::*; + +// ============================================================================ +// Zero-Copy Helpers +// ============================================================================ + +/// View a slice from nom parsing as a Bytes reference +/// This enables zero-copy: we calculate the slice's offset within the original +/// Bytes and return a new Bytes that references the same underlying data (just increments ref count) +#[inline] +fn slice_as_bytes(original: &Bytes, slice: &[u8]) -> Bytes { + // Calculate the offset of the slice within the original Bytes + let original_ptr = original.as_ptr() as usize; + let slice_ptr = slice.as_ptr() as usize; + + // Safety check: slice must be within original's memory range + debug_assert!( + slice_ptr >= original_ptr && slice_ptr + slice.len() <= original_ptr + original.len(), + "slice must be within original Bytes range" + ); + + let offset = slice_ptr - original_ptr; + let len = slice.len(); + + // Zero-copy: slice the original Bytes (just increments refcount) + original.slice(offset..offset + len) +} + +/// Helper for parsing loops that accumulate results +/// Handles the common pattern of calling a parser in a loop and accumulating elements +enum ParsingMode { + /// Return Incomplete if no elements parsed yet, otherwise return accumulated results + Streaming, + /// Treat Incomplete as EOF, convert remaining bytes to Text + Complete, +} + +/// Parser output that avoids Vec allocation for single elements +/// This is a key optimization: most parsers return exactly one element, +/// so we avoid the Vec allocation overhead in the common case. +enum ParseResult { + /// Single element (most common case - no Vec allocation) + Single(Element), + /// Multiple elements (for parsers that return variable number of elements) + Multiple(Vec), + /// No elements (for esi:comment, esi:remove that produce nothing) + Empty, +} + +impl ParseResult { + /// Append elements to an existing Vec + #[inline] + fn append_to(self, acc: &mut Vec) { + match self { + Self::Single(e) => acc.push(e), + Self::Multiple(mut v) => acc.append(&mut v), + Self::Empty => {} + } + } +} + +/// Zero-copy parse loop that threads Bytes through the parser chain +fn parse_loop<'a, F>( + original: &Bytes, + input: &'a [u8], + mut parser: F, + incomplete_strategy: &ParsingMode, +) -> IResult<&'a [u8], Vec, Error<&'a [u8]>> +where + F: FnMut(&Bytes, &'a [u8]) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>>, +{ + let mut result = Vec::new(); + let mut remaining = input; + + loop { + match parser(original, remaining) { + Ok((rest, parse_result)) => { + parse_result.append_to(&mut result); + + // If we consumed nothing, break to avoid infinite loop + if rest.len() == remaining.len() { + return Ok((rest, result)); + } + remaining = rest; + } + Err(nom::Err::Incomplete(needed)) => { + return match incomplete_strategy { + ParsingMode::Streaming => { + // Return accumulated results or propagate Incomplete + if result.is_empty() { + Err(nom::Err::Incomplete(needed)) + } else { + Ok((remaining, result)) + } + } + ParsingMode::Complete => { + // Treat remaining bytes as text - ZERO COPY + if remaining.is_empty() { + Ok((remaining, result)) + } else { + result.push(Element::Text(slice_as_bytes(original, remaining))); + Ok((&remaining[remaining.len()..], result)) + } + } + }; + } + Err(e) => { + if result.is_empty() { + // Return a real parse error + return Err(e); + } + // Else - return what we have so far + return Ok((remaining, result)); + } + } + } +} + +// ============================================================================ +// Public APIs - Zero-Copy Streaming Parsers +// ============================================================================ + +/// Parse input bytes into ESI elements using TRUE STREAMING parsers +/// +/// Returns Incomplete when more data is needed - this is proper streaming behavior +/// lib.rs must handle Incomplete by reading more data into the buffer +/// ZERO-COPY: Returns Bytes slices that reference the original buffer (no copying!) +pub fn parse(input: &Bytes) -> IResult<&[u8], Vec, Error<&[u8]>> { + parse_loop(input, input.as_ref(), element, &ParsingMode::Streaming) +} + +/// Parse complete document (treats Incomplete as EOF and converts to text) +/// +/// Wrapper for complete input (tests) - treats Incomplete as "done parsing" +/// ZERO-COPY: Returns Bytes slices that reference the original buffer (no copying!) +pub fn parse_complete(input: &Bytes) -> IResult<&[u8], Vec, Error<&[u8]>> { + parse_loop(input, input.as_ref(), element, &ParsingMode::Complete) +} + +// ============================================================================ +// Helper Functions +// ============================================================================ + +/// Convert bytes to String using lossy UTF-8 conversion +#[inline] +fn bytes_to_string(bytes: &[u8]) -> String { + String::from_utf8_lossy(bytes).into_owned() +} + +// ============================================================================ +// Expression Parsing - Uses COMPLETE parsers (input is always complete) +// Expressions come from attribute values which are fully extracted before parsing +// ============================================================================ + +/// Accepts str for convenience but works on bytes internally +pub fn parse_expression(input: &str) -> IResult<&str, Expr, Error<&str>> { + let bytes = input.as_bytes(); + match expr(bytes) { + Ok((remaining_bytes, expr)) => { + let consumed = bytes.len() - remaining_bytes.len(); + Ok((&input[consumed..], expr)) + } + Err(nom::Err::Error(e)) => Err(nom::Err::Error(Error::new(input, e.code))), + Err(nom::Err::Failure(e)) => Err(nom::Err::Failure(Error::new(input, e.code))), + Err(nom::Err::Incomplete(_)) => { + // Complete parsers should never return Incomplete + unreachable!("complete parsers don't return Incomplete") + } + } +} + +// Used by parse_interpolated - zero-copy with original Bytes reference +fn interpolated_text<'a>( + original: &Bytes, + input: &'a [u8], +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { + map( + recognize(streaming_bytes::take_while1(|c| { + !is_opening_bracket(c) && !is_dollar(c) + })), + |s: &[u8]| ParseResult::Single(Element::Text(slice_as_bytes(original, s))), + )(input) +} + +// Complete version for attribute value parsing - doesn't return Incomplete +fn interpolated_text_complete<'a>( + original: &Bytes, + input: &'a [u8], +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { + map( + recognize(take_while1(|c: u8| !is_opening_bracket(c) && !is_dollar(c))), + |s: &[u8]| ParseResult::Single(Element::Text(slice_as_bytes(original, s))), + )(input) +} + +/// Parses a string that may contain interpolated expressions like $(VAR) +/// ZERO-COPY: Accepts &Bytes and returns Bytes slices that reference the original +pub fn interpolated_content(input: &Bytes) -> IResult<&[u8], Vec, Error<&[u8]>> { + // NOTE: This function parses complete strings (like attribute values), not streaming input + // Uses fold_many0 with COMPLETE parsers to avoid Incomplete errors at string boundaries + fold_many0( + |i| { + alt((interpolated_expression, |ii| { + interpolated_text_complete(input, ii) + }))(i) + }, + Vec::new, + |mut acc: Vec, item: ParseResult| { + item.append_to(&mut acc); + acc + }, + )(input.as_ref()) +} + +/// Zero-copy element parser - dispatches to text or tag_dispatch +fn element<'a>( + original: &Bytes, + input: &'a [u8], +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { + alt((|i| text(original, i), |i| tag_handler(original, i)))(input) +} + +fn interpolated_element<'a>( + original: &Bytes, + input: &'a [u8], +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { + alt(( + |i| interpolated_text(original, i), + interpolated_expression, + |i| tag_handler(original, i), + ))(input) +} + +// Parse a sequence of interpolated elements (text + expressions + tags) +// Used for parsing content inside tags that allow nested ESI +fn tag_content<'a>( + original: &Bytes, + input: &'a [u8], +) -> IResult<&'a [u8], Vec, Error<&'a [u8]>> { + fold_many0( + |i| interpolated_element(original, i), + Vec::new, + |mut acc: Vec, item: ParseResult| { + item.append_to(&mut acc); + acc + }, + )(input) +} + +/// Validates a variable name according to ESI spec: +/// - Up to 256 alphanumeric characters (A-Z, a-z, 0-9) +/// - Can include underscores (_) +/// - Cannot start with $ (dollar sign) or digit +/// - First character must be alphabetic (A-Z, a-z) +/// - Can include subscript notation with braces {} containing expressions +fn is_valid_variable_name(name: &str) -> bool { + if name.is_empty() || name.len() > 256 { + return false; + } + + // Check if there's a subscript by finding opening brace + if let Some(brace_pos) = name.find('{') { + // Has subscript - validate base name and check brace matching + let base_name = &name[..brace_pos]; + + // Validate base name strictly (alphanumeric + underscore, starting with alpha) + if !is_valid_base_variable_name(base_name) { + return false; + } + + // Check that subscript has matching closing brace + if !name.ends_with('}') { + return false; + } + + // Subscript content (between braces) can contain any characters for expressions + // We don't validate it here - expression parser will handle it + true + } else { + // No subscript - validate as a simple variable name + is_valid_base_variable_name(name) + } +} + +/// Validates a base variable name (without subscripts): +/// - Must start with alphabetic character +/// - Can only contain alphanumeric characters and underscores +fn is_valid_base_variable_name(name: &str) -> bool { + if name.is_empty() { + return false; + } + + let mut chars = name.chars(); + + // First character must be alphabetic + if let Some(first) = chars.next() { + if !first.is_ascii_alphabetic() { + return false; + } + } else { + return false; + } + + // Remaining characters must be alphanumeric or underscore + chars.all(|c| c.is_ascii_alphanumeric() || c == '_') +} + +// Parse variable name with optional subscript like "colors{0}" or "ages{joan}" +fn parse_variable_name_with_subscript(name: &str) -> (String, Option) { + if let Some(brace_pos) = name.find('{') { + if name.ends_with('}') { + let var_name = &name[..brace_pos]; + let subscript_str = &name[brace_pos + 1..name.len() - 1]; + + // Try to parse the subscript as an expression + // Check different patterns: + let subscript_expr = subscript_str.parse::().map_or_else( + |_| { + if subscript_str + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '_') + { + // Bare identifier like "joan" - treat as string literal key + Some(Expr::String(Some(subscript_str.to_string()))) + } else if let Ok((_, expr)) = parse_expression(subscript_str) { + // Successfully parsed as expression (e.g., "'key'", "$(var)", complex expression) + Some(expr) + } else { + // Failed to parse - ignore subscript + None + } + }, + |num| Some(Expr::Integer(num)), + ); + + if let Some(expr) = subscript_expr { + return (var_name.to_string(), Some(expr)); + } + } + } + (name.to_string(), None) +} + +fn esi_assign<'a>( + original: &Bytes, + input: &'a [u8], +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { + alt((esi_assign_short, |i| esi_assign_long(original, i)))(input) +} + +fn assign_attributes_short(attrs: HashMap) -> ParseResult { + let name = attrs.get("name").cloned().unwrap_or_default(); + + // Validate variable name according to ESI spec + if !is_valid_variable_name(&name) { + // Invalid name - silently drop this tag per ESI spec for invalid constructs + // ParseResult::Empty causes the parser to consume the tag but emit nothing + return ParseResult::Empty; + } + + // Parse name and optional subscript (e.g., "colors{0}" or "ages{joan}") + let (var_name, subscript) = parse_variable_name_with_subscript(&name); + + let value_str = attrs.get("value").cloned().unwrap_or_default(); + + // Per ESI spec, short form value attribute contains an expression + // Try to parse as ESI expression. If it fails, treat as string literal. + let value = match parse_expression(&value_str) { + Ok((_, expr)) => expr, + Err(_) => { + // If parsing fails (e.g., plain text), treat as a string literal + Expr::String(Some(value_str)) + } + }; + + ParseResult::Single(Element::Esi(Tag::Assign { + name: var_name, + subscript, + value, + })) +} + +/// Parse an attribute value as an ESI expression +/// Used for parsing src/alt/param values which can contain variables, functions, etc. +/// Examples: +/// "simple_string" -> Expr::String(Some("simple_string")) +/// "$(VARIABLE)" -> Expr::Variable("VARIABLE", ...) +/// "http://example.com?q=$(QUERY_STRING{'query'})" -> Expr::Interpolated([Text, Expr]) +fn parse_attr_as_expr(value_str: String) -> Expr { + parse_attr_as_expr_with_context(value_str, false) +} + +fn parse_attr_as_expr_with_context(value_str: String, bare_id_as_variable: bool) -> Expr { + // Fast-path: empty string + if value_str.is_empty() { + return Expr::String(Some(String::new())); + } + + // Try to parse as pure ESI expression first (variables/functions/quoted strings/integers/dict/list literals) + if let Ok((remaining, expr)) = parse_expression(&value_str) { + // Only accept if we consumed the entire string (pure expression) + if remaining.is_empty() { + return expr; + } + } + + // Special case: bare identifier (e.g., "items" for collection="items") + // Whether to treat as variable depends on context + if bare_id_as_variable { + let is_bare_identifier = value_str + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '_') + && value_str + .chars() + .next() + .is_some_and(|c| c.is_ascii_alphabetic() || c == '_'); + + if is_bare_identifier { + return Expr::Variable(value_str, None, None); + } + } + + // Not a pure expression - try interpolation (mixed text + expressions) + let bytes = Bytes::from(value_str); + match interpolated_content(&bytes) { + Ok(([], elements)) => { + if elements.len() == 1 { + match elements.into_iter().next().unwrap() { + Element::Expr(expr) => expr, + Element::Text(text) => { + Expr::String(Some(String::from_utf8_lossy(&text).into_owned())) + } + _ => Expr::String(Some(String::from_utf8_lossy(&bytes).into_owned())), + } + } else if !elements.is_empty() { + Expr::Interpolated(elements) + } else { + Expr::String(Some(String::new())) + } + } + _ => Expr::String(Some(String::from_utf8_lossy(&bytes).into_owned())), + } +} + +fn assign_long(attrs: HashMap, mut content: Vec) -> ParseResult { + let name = attrs.get("name").cloned().unwrap_or_default(); + + // Validate variable name according to ESI spec + if !is_valid_variable_name(&name) { + // Invalid name - silently drop this tag per ESI spec for invalid constructs + // ParseResult::Empty causes the parser to consume the tag but emit nothing + return ParseResult::Empty; + } + + // Parse name and optional subscript (e.g., "colors{0}" or "ages{joan}") + let (var_name, subscript) = parse_variable_name_with_subscript(&name); + + // Per ESI spec, long form value comes from content between tags + // Content is already parsed as Vec (can be text, expressions, etc.) + // We need to convert it to a single expression + let value = if content.is_empty() { + // Empty content - empty string + Expr::String(Some(String::new())) + } else if content.len() == 1 { + // Single element - pop to take ownership + match content.pop().expect("checked len == 1") { + Element::Expr(expr) => expr, + Element::Text(text) => { + // Try to parse the text as an expression + let text_str = String::from_utf8_lossy(text.as_ref()).to_string(); + match parse_expression(&text_str) { + Ok((_, expr)) => expr, + Err(_) => Expr::String(Some(text_str)), + } + } + _ => { + // HTML or other - treat as empty string + Expr::String(Some(String::new())) + } + } + } else { + // Multiple elements - this is a compound expression per ESI spec + // Examples: prefix$(VAR)suffix + // $(A) + $(B) + // Store the elements as-is for runtime evaluation + Expr::Interpolated(content) + }; + + ParseResult::Single(Element::Esi(Tag::Assign { + name: var_name, + subscript, + value, + })) +} + +fn esi_assign_short(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { + map( + delimited( + streaming_bytes::tag(b"( + original: &Bytes, + input: &'a [u8], +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { + // Per ESI spec, esi:assign cannot contain nested ESI tags - only text and expressions + // Capture content first with take_until, then parse as complete + map( + tuple(( + delimited( + streaming_bytes::tag(b"".as_ref()), + streaming_bytes::tag(b""), + )), + |(attrs, content, _)| { + // Parse the captured content in complete mode (text + expressions only) + let elements = parse_content_complete(original, content); + assign_long(attrs, elements) + }, + )(input) +} + +fn esi_except<'a>( + original: &Bytes, + input: &'a [u8], +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { + map( + delimited( + streaming_bytes::tag(b""), + |i| tag_content(original, i), + streaming_bytes::tag(b""), + ), + |v| ParseResult::Single(Element::Esi(Tag::Except(v))), + )(input) +} + +fn esi_attempt<'a>( + original: &Bytes, + input: &'a [u8], +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { + map( + delimited( + streaming_bytes::tag(b""), + |i| tag_content(original, i), + streaming_bytes::tag(b""), + ), + |v| ParseResult::Single(Element::Esi(Tag::Attempt(v))), + )(input) +} + +// Zero-copy version used by both esi_tag and esi_tag_old (via parse_interpolated) +fn esi_try<'a>( + original: &Bytes, + input: &'a [u8], +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { + let (input, _) = streaming_bytes::tag(b"")(input)?; + let (input, v) = tag_content(original, input)?; + let (input, _) = streaming_bytes::tag(b"")(input)?; + + let mut attempts = vec![]; + let mut except = None; + for element in v { + match element { + Element::Esi(Tag::Attempt(cs)) => attempts.push(cs), + Element::Esi(Tag::Except(cs)) => { + except = Some(cs); + } + _ => {} // Ignore content outside attempt/except blocks + } + } + Ok(( + input, + ParseResult::Single(Element::Esi(Tag::Try { + attempt_events: attempts, + except_events: except.unwrap_or_default(), + })), + )) +} + +fn esi_otherwise<'a>( + original: &Bytes, + input: &'a [u8], +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { + map( + delimited( + streaming_bytes::tag(b""), + |i| tag_content(original, i), + streaming_bytes::tag(b""), + ), + |content| { + // Return the Otherwise tag followed by its content elements + let mut result = vec![Element::Esi(Tag::Otherwise)]; + result.extend(content); + ParseResult::Multiple(result) + }, + )(input) +} + +fn esi_when<'a>( + original: &Bytes, + input: &'a [u8], +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { + map( + tuple(( + delimited( + streaming_bytes::tag(b""), + )), + |(attrs, content, _)| { + let test = attrs.get("test").cloned().unwrap_or_default(); + let match_name = attrs.get("matchname").cloned(); + + // Return the When tag followed by its content elements as a marker + let mut result = vec![Element::Esi(Tag::When { test, match_name })]; + result.extend(content); + ParseResult::Multiple(result) + }, + )(input) +} + +/// Parse ... +fn esi_foreach<'a>( + original: &Bytes, + input: &'a [u8], +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { + map( + tuple(( + delimited( + streaming_bytes::tag(b""), + )), + |(attrs, content, _)| { + let collection_str = attrs.get("collection").cloned().unwrap_or_default(); + let collection = parse_attr_as_expr_with_context(collection_str, true); + let item = attrs.get("item").cloned(); + + ParseResult::Single(Element::Esi(Tag::Foreach { + collection, + item, + content, + })) + }, + )(input) +} + +/// Parse +fn esi_break(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { + map( + delimited( + streaming_bytes::tag(b"... +fn esi_choose<'a>( + original: &Bytes, + input: &'a [u8], +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { + let (input, _) = streaming_bytes::tag(b"")(input)?; + let (input, v) = tag_content(original, input)?; + let (input, _) = streaming_bytes::tag(b"")(input)?; + + let mut when_branches = vec![]; + let mut otherwise_events = Vec::new(); + let mut current_when: Option = None; + let mut in_otherwise = false; + + for element in v { + match element { + Element::Esi(Tag::When { test, match_name }) => { + // Save any previous when + if let Some(when_branch) = current_when.take() { + when_branches.push(when_branch); + } + in_otherwise = false; + + // Parse the test expression now, at parse time (not at eval time) + let test_expr = match parse_expression(&test) { + Ok((_, expr)) => expr, + Err(_) => { + // If parsing fails, create a simple false expression + // This matches the behavior of treating parse failures gracefully + Expr::Integer(0) + } + }; + + // Start collecting for this new when + current_when = Some(WhenBranch { + test: test_expr, + match_name, + content: Vec::new(), + }); + } + Element::Esi(Tag::Otherwise) => { + // Save any pending when + if let Some(when_branch) = current_when.take() { + when_branches.push(when_branch); + } + in_otherwise = true; + } + _ => { + // Accumulate content for the current when or otherwise + if in_otherwise { + otherwise_events.push(element); + } else if let Some(ref mut when_branch) = current_when { + when_branch.content.push(element); + } + // Content outside when/otherwise blocks is discarded (per ESI spec) + } + } + } + + // Don't forget the last when if there is one + if let Some(when_branch) = current_when { + when_branches.push(when_branch); + } + + Ok(( + input, + ParseResult::Single(Element::Esi(Tag::Choose { + when_branches, + otherwise_events, + })), + )) +} + +// Note: does NOT create a Tag::Vars element. Instead, it parses the content +// (either the body of ... or the name attribute of ) +// and returns the evaluated content directly as Vec. These elements (Text, Expr, Html, etc.) +// are then flattened into the main element stream and processed normally by process_elements() in lib.rs. +fn esi_vars<'a>( + original: &Bytes, + input: &'a [u8], +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { + alt((esi_vars_short, |i| esi_vars_long(original, i)))(input) +} + +fn parse_vars_attributes(attrs: HashMap) -> Result { + attrs.get("name").map_or_else( + || Err("no name field in short form vars"), + |v| { + if let Ok((_, expr)) = parse_expression(v) { + Ok(ParseResult::Single(Element::Expr(expr))) + } else { + Err("failed to parse expression") + } + }, + ) +} + +fn esi_vars_short(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { + map_res( + delimited( + streaming_bytes::tag(b" Vec { + // Text in complete mode - stops at $ or < for expression/tag parsing + fn text_complete<'a>( + original: &Bytes, + input: &'a [u8], + ) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { + map( + take_while1(|c| !is_dollar(c) && !is_opening_bracket(c)), + |s: &[u8]| ParseResult::Single(Element::Text(slice_as_bytes(original, s))), + )(input) + } + + // HTML tag in complete mode - any tag that's NOT an ESI tag + fn html_tag_complete<'a>( + original: &Bytes, + input: &'a [u8], + ) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { + // Check that this is NOT an esi: tag + let (_, _) = peek(tuple((tag(b"<"), not(tag(b"esi:")))))(input)?; + + // Parse the HTML tag (simplified - just capture until >) + let (rest, html) = + recognize(tuple((tag(b"<"), take_while1(|c| c != b'>'), tag(b">"))))(input)?; + + Ok(( + rest, + ParseResult::Single(Element::Html(slice_as_bytes(original, html))), + )) + } + + // Parse content using complete parsers + let mut elements = Vec::new(); + let mut remaining = content; + + while !remaining.is_empty() { + // Try expression first (starts with $) + if let Ok((rest, result)) = interpolated_expression(remaining) { + result.append_to(&mut elements); + remaining = rest; + continue; + } + + // Try HTML tag (starts with < but NOT ( + original: &Bytes, + input: &'a [u8], +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { + // esi:vars supports nested ESI tags (like esi:assign) per common usage patterns + let (input, _) = streaming_bytes::tag(b"")(input)?; + let (input, elements) = tag_content(original, input)?; + let (input, _) = streaming_bytes::tag(b"")(input)?; + + Ok((input, ParseResult::Multiple(elements))) +} + +fn esi_comment(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { + map( + delimited( + streaming_bytes::tag(b" IResult<&[u8], ParseResult, Error<&[u8]>> { + let (input, _) = streaming_bytes::tag(b"")(input)?; + let (input, _) = streaming_bytes::take_until(b"".as_ref())(input)?; + let (input, _) = streaming_bytes::tag(b"")(input)?; + Ok((input, ParseResult::Empty)) +} + +fn esi_text<'a>( + original: &Bytes, + input: &'a [u8], +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { + map( + delimited( + streaming_bytes::tag(b""), + streaming_bytes::take_until(b"".as_ref()), + streaming_bytes::tag(b""), + ), + |v| ParseResult::Single(Element::Text(slice_as_bytes(original, v))), + )(input) +} +fn esi_include(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { + alt((esi_include_self_closing, esi_include_with_params))(input) +} + +fn esi_include_self_closing(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { + map( + delimited( + streaming_bytes::tag(b" IResult<&[u8], ParseResult, Error<&[u8]>> { + map( + tuple(( + delimited( + streaming_bytes::tag(b""[..]), + ), + )), + |(mut attrs, params, _)| { + let src = parse_attr_as_expr(attrs.remove("src").unwrap_or_default()); + let alt = attrs.remove("alt").map(parse_attr_as_expr); + let continue_on_error = attrs.get("onerror").is_some_and(|s| s == "continue"); + + ParseResult::Single(Element::Esi(Tag::Include { + src, + alt, + continue_on_error, + params, + })) + }, + )(input) +} + +fn esi_param(input: &[u8]) -> IResult<&[u8], (String, Expr), Error<&[u8]>> { + map( + delimited( + streaming_bytes::tag(b" IResult<&[u8], HashMap, Error<&[u8]>> { + fold_many0( + separated_pair( + preceded(streaming_char::multispace1, streaming_char::alpha1), + streaming_bytes::tag(b"="), + htmlstring, + ), + HashMap::new, + |mut acc, (k, v)| { + acc.insert(bytes_to_string(k), bytes_to_string(v)); + acc + }, + )(input) +} + +fn htmlstring(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { + alt(( + delimited( + double_quote, + streaming_bytes::take_while(|c| !is_double_quote(c)), + double_quote, + ), + delimited( + single_quote, + streaming_bytes::take_while(|c| !is_single_quote(c)), + single_quote, + ), + ))(input) +} + +// ============================================================================ +// Zero-Copy HTML/Text Parsers +// ============================================================================ +/// Helper to find and consume the closing '>' character +#[inline] +fn closing_bracket(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { + streaming_bytes::tag(b">")(input) +} + +/// Helper to find and consume the closing self-closing tag characters '/>' +#[inline] +fn self_closing(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { + streaming_bytes::tag(b"/>")(input) +} + +/// Helper to find and consume the opening '<' character +#[inline] +fn opening_bracket(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { + streaming_bytes::tag(b"<")(input) +} + +/// Helper to find and consume the closing double quote character +#[inline] +fn double_quote(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { + streaming_bytes::tag(b"\"")(input) +} + +/// Helper to find and consume the closing single quote character +#[inline] +fn single_quote(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { + streaming_bytes::tag(b"\'")(input) +} + +#[inline] +const fn is_closing_bracket(b: u8) -> bool { + b == b'>' +} + +#[inline] +const fn is_double_quote(b: u8) -> bool { + b == b'\"' +} + +#[inline] +const fn is_single_quote(b: u8) -> bool { + b == b'\'' +} + +/// Check if byte can start an HTML/XML tag name (including special constructs like ".as_ref()), + streaming_bytes::tag(b"-->"), + )(input)?; + let full_comment = &start[..start.len() - rest.len()]; + Ok(( + rest, + ParseResult::Single(Element::Html(slice_as_bytes(original, full_comment))), + )) +} + +/// Helper to find closing script tag, handling any content including other closing tags +/// Looks for IResult<&[u8], &[u8], Error<&[u8]>> { + // recognize(many_till(take(1usize), peek(tag_no_case(b"( + original: &Bytes, + input: &'a [u8], +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { + let start = input; + + // Parse opening tag + let (input, _) = recognize(delimited( + streaming_bytes::tag_no_case(b"( + original: &Bytes, + input: &'a [u8], +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { + // Reject ESI closing tags before trying to parse + let (_, _) = peek(not(streaming_bytes::tag(b"(original: &Bytes, input: &'a [u8]) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { + map( + recognize(streaming_bytes::take_while1(|c| !is_opening_bracket(c))), + |s: &[u8]| ParseResult::Single(Element::Text(slice_as_bytes(original, s))), + )(input) +} + +/// Check if byte is the opening bracket '<' +#[inline] +const fn is_opening_bracket(b: u8) -> bool { + b == b'<' +} + +/// Check if byte is a dollar sign '$' +#[inline] +const fn is_dollar(b: u8) -> bool { + b == b'$' +} +#[inline] +const fn is_alphanumeric_or_underscore(c: u8) -> bool { + c.is_ascii_alphanumeric() || c == b'_' +} + +#[inline] +const fn is_lower_alphanumeric_or_underscore(c: u8) -> bool { + c.is_ascii_lowercase() || c.is_ascii_digit() || c == b'_' +} + +fn esi_fn_name(input: &[u8]) -> IResult<&[u8], String, Error<&[u8]>> { + map( + preceded(tag(b"$"), take_while1(is_lower_alphanumeric_or_underscore)), + bytes_to_string, + )(input) +} + +fn esi_var_name(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { + map( + tuple(( + take_while1(is_alphanumeric_or_underscore), + opt(delimited(tag(b"{"), esi_var_key_expr, tag(b"}"))), + opt(preceded(tag(b"|"), fn_nested_argument)), + )), + |(name, key, default): (&[u8], _, _)| { + Expr::Variable( + bytes_to_string(name), + key.map(Box::new), + default.map(Box::new), + ) + }, + )(input) +} + +fn not_dollar_or_curlies(input: &[u8]) -> IResult<&[u8], String, Error<&[u8]>> { + map( + take_while(|c| c != b'$' && c != b'{' && c != b'}' && c != b',' && c != b'"'), + bytes_to_string, + )(input) +} + +// TODO: handle escaping +fn single_quoted_string(input: &[u8]) -> IResult<&[u8], String, Error<&[u8]>> { + map( + delimited(tag(b"'"), take_while(|c| !is_single_quote(c)), tag(b"'")), + bytes_to_string, + )(input) +} +fn triple_quoted_string(input: &[u8]) -> IResult<&[u8], String, Error<&[u8]>> { + map( + delimited(tag(b"'''"), take_until("'''"), tag(b"'''")), + bytes_to_string, + )(input) +} + +fn string(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { + map( + alt((single_quoted_string, triple_quoted_string)), + |string: String| { + if string.is_empty() { + Expr::String(None) + } else { + Expr::String(Some(string)) + } + }, + )(input) +} + +fn var_key(input: &[u8]) -> IResult<&[u8], String, Error<&[u8]>> { + alt(( + single_quoted_string, + triple_quoted_string, + not_dollar_or_curlies, + ))(input) +} + +/// Parse subscript key - can be a string or a nested variable expression +fn esi_var_key_expr(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { + alt(( + // Try to parse as a variable first (e.g., $(keyVar)) + esi_variable, + // Otherwise parse as a string + map(var_key, |s: String| Expr::String(Some(s))), + ))(input) +} + +fn fn_argument(input: &[u8]) -> IResult<&[u8], Vec, Error<&[u8]>> { + let (input, mut parsed) = separated_list0( + tuple((multispace0, tag(b","), multispace0)), + fn_nested_argument, + )(input)?; + + // If the parsed list contains a single empty string element return an empty vec + if parsed.len() == 1 && parsed[0] == Expr::String(None) { + parsed = vec![]; + } + Ok((input, parsed)) +} + +fn fn_nested_argument(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { + alt((esi_function, esi_variable, string, integer, bareword))(input) +} + +fn integer(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { + map_res( + recognize(tuple(( + opt(tag(b"-")), + take_while1(|c: u8| c.is_ascii_digit()), + ))), + |s: &[u8]| String::from_utf8_lossy(s).parse::().map(Expr::Integer), + )(input) +} + +fn bareword(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { + map( + take_while1(is_alphanumeric_or_underscore), + |name: &[u8]| Expr::Variable(bytes_to_string(name), None, None), + )(input) +} + +fn esi_function(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { + let (input, parsed) = tuple(( + esi_fn_name, + delimited( + terminated(tag(b"("), multispace0), + fn_argument, + preceded(multispace0, tag(b")")), + ), + ))(input)?; + + let (name, args) = parsed; + + Ok((input, Expr::Call(name, args))) +} + +fn esi_variable(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { + delimited(tag(b"$("), esi_var_name, tag(b")"))(input) +} + +fn operator(input: &[u8]) -> IResult<&[u8], Operator, Error<&[u8]>> { + alt(( + // Try longer operators first + map(tag(b"matches_i"), |_| Operator::MatchesInsensitive), + map(tag(b"matches"), |_| Operator::Matches), + map(tag(b"has_i"), |_| Operator::HasInsensitive), + map(tag(b"has"), |_| Operator::Has), + map(tag(b"=="), |_| Operator::Equals), + map(tag(b"!="), |_| Operator::NotEquals), + map(tag(b"<="), |_| Operator::LessThanOrEqual), + map(tag(b">="), |_| Operator::GreaterThanOrEqual), + map(tag(b"<"), |_| Operator::LessThan), + map(tag(b">"), |_| Operator::GreaterThan), + map(tag(b"&&"), |_| Operator::And), + map(tag(b"||"), |_| Operator::Or), + ))(input) +} + +fn interpolated_expression(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { + map( + alt(( + dict_literal, + list_literal, + esi_function, + esi_variable, + integer, + string, + )), + |expr| ParseResult::Single(Element::Expr(expr)), + )(input) +} + +fn dict_literal(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { + map( + delimited( + tag(b"{"), + separated_list0( + tuple((multispace0, tag(b","), multispace0)), + tuple(( + delimited(multispace0, primary_expr, multispace0), + preceded(tag(b":"), delimited(multispace0, primary_expr, multispace0)), + )), + ), + preceded(multispace0, tag(b"}")), + ), + Expr::DictLiteral, + )(input) +} + +fn list_literal(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { + map( + delimited( + tag(b"["), + separated_list0( + tuple((multispace0, tag(b","), multispace0)), + delimited(multispace0, primary_expr, multispace0), + ), + preceded(multispace0, tag(b"]")), + ), + Expr::ListLiteral, + )(input) +} + +fn primary_expr(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { + alt(( + // Parse negation: !expr + map( + preceded(tag(b"!"), preceded(multispace0, primary_expr)), + |expr| Expr::Not(Box::new(expr)), + ), + // Parse grouped expression: (expr) + delimited( + tag(b"("), + delimited(multispace0, expr, multispace0), + tag(b")"), + ), + // Parse dictionary literal: {key:value, key:value} + dict_literal, + // Parse list literal: [value, value] + list_literal, + // Parse basic expressions + esi_function, + esi_variable, + integer, + string, + ))(input) +} + +fn expr(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { + let (rest, exp) = primary_expr(input)?; + + if let Ok((rest, (operator, right_exp))) = + tuple((delimited(multispace0, operator, multispace0), expr))(rest) + { + Ok(( + rest, + Expr::Comparison { + left: Box::new(exp), + operator, + right: Box::new(right_exp), + }, + )) + } else { + Ok((rest, exp)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_empty_choose() { + let input = b""; + let bytes = Bytes::from_static(input); + let result = parse_complete(&bytes); + match result { + Ok((rest, _)) => { + assert_eq!(rest.len(), 0, "Should parse completely"); + } + Err(e) => { + panic!("Parse failed with error: {:?}", e); + } + } + } + + #[test] + fn test_choose_with_when() { + let input = b"hi"; + let bytes = Bytes::from_static(input); + let result = parse_complete(&bytes); + match result { + Ok((rest, result)) => { + if rest.is_empty() { + println!("Success! Result: {:?}", result); + } else { + panic!( + "Did not parse completely. Remaining: {:?}", + String::from_utf8_lossy(rest) + ); + } + } + Err(e) => { + panic!("Parse failed with error: {:?}", e); + } + } + } + + #[test] + fn test_parse() { + let input = br#" +foo + +baz + + +hello
+
+ +should not appear + + this $(should) appear unchanged + + +should not appear + + +should not appear +hi +goodbye +should not appear + + +should not appear + +attempt 1 + +should not appear + +attempt 2 + +should not appear + +exception! + +"#; + let bytes = Bytes::from_static(input); + let result = parse_complete(&bytes); + match result { + Ok((rest, _)) => { + // Just test to make sure it parsed the whole thing + if !rest.is_empty() { + panic!( + "Failed to parse completely. Remaining: {:?}", + String::from_utf8_lossy(rest) + ); + } + } + Err(e) => { + panic!("Parse failed with error: {:?}", e); + } + } + } + #[test] + fn test_parse_script() { + let input = b""; + let bytes = Bytes::from_static(input); + let (rest, x) = html_script_tag(&bytes, input).unwrap(); + assert_eq!(rest.len(), 0); + assert!(matches!( + x, + ParseResult::Single(Element::Html(ref h)) if h.as_ref() == b"" + )); + } + #[test] + fn test_parse_script_with_src() { + let input = b""; + let bytes = Bytes::from_static(input); + let (rest, x) = html_script_tag(&bytes, input).unwrap(); + assert_eq!(rest.len(), 0); + assert!(matches!( + x, + ParseResult::Single(Element::Html(ref h)) if h.as_ref() == b"" + )); + } + #[test] + fn test_parse_esi_vars_short() { + let input = br#""#; + let bytes = Bytes::from_static(input); + let (rest, x) = esi_vars(&bytes, input).unwrap(); + assert_eq!(rest.len(), 0); + // esi_vars returns Single when parsing short form with expression + match x { + ParseResult::Single(Element::Expr(Expr::Variable(name, None, None))) => { + assert_eq!(name, "hello"); + } + ParseResult::Single(e) => { + panic!("Expected Variable expression, got {:?}", e); + } + ParseResult::Multiple(_) => { + panic!("Expected ParseResult::Single, got Multiple"); + } + ParseResult::Empty => { + panic!("Expected ParseResult::Single, got Empty"); + } + } + } + #[test] + fn test_parse_esi_vars_long() { + // can contain text, expressions, HTML, and nested ESI tags (like ) + let input = br#"hello
"#; + let bytes = Bytes::from_static(input); + let (rest, x) = parse_complete(&bytes).unwrap(); + assert_eq!(rest.len(), 0); + assert_eq!( + x, + [ + Element::Text(Bytes::from_static(b"hello")), + Element::Html(Bytes::from_static(b"
")), + ] + ); + } + + #[test] + fn test_nested_vars() { + // Nested tags ARE supported - the inner vars tag is parsed recursively + let input = br#"outerinner"#; + let bytes = Bytes::from_static(input); + let (rest, elements) = parse_complete(&bytes).unwrap(); + + assert_eq!(rest.len(), 0, "Should parse completely"); + assert_eq!( + elements, + [ + Element::Text(Bytes::from_static(b"outer")), + Element::Text(Bytes::from_static(b"inner")), + ] + ); + } + + #[test] + fn test_vars_with_expressions() { + // This is the proper use of esi:vars - text with expressions + let input = br#"Hello $(name), welcome!"#; + let bytes = Bytes::from_static(input); + let (rest, elements) = parse_complete(&bytes).unwrap(); + + assert_eq!(rest.len(), 0, "Should parse completely"); + assert_eq!(elements.len(), 3); + assert!(matches!(&elements[0], Element::Text(t) if t.as_ref() == b"Hello ")); + assert!(matches!(&elements[1], Element::Expr(_))); + assert!(matches!(&elements[2], Element::Text(t) if t.as_ref() == b", welcome!")); + } + + #[test] + fn test_assign_inside_vars() { + // Per ESI spec, can contain tags + let input = br#" + + + Result: $(xyz) +"#; + let bytes = Bytes::from_static(input); + let (rest, elements) = parse_complete(&bytes).unwrap(); + + assert_eq!(rest.len(), 0, "Should parse completely"); + + // Should have: whitespace, assign tag, whitespace, text "Result: ", expression $(xyz), whitespace + assert!( + elements.len() >= 3, + "Should have at least assign tag, text, and expression" + ); + + // Find the assign tag + let has_assign = elements + .iter() + .any(|e| matches!(e, Element::Esi(Tag::Assign { name, .. }) if name == "xyz")); + assert!(has_assign, "Should contain esi:assign tag with name='xyz'"); + + // Find the expression + let has_expr = elements + .iter() + .any(|e| matches!(e, Element::Expr(Expr::Variable(name, None, None)) if name == "xyz")); + assert!(has_expr, "Should contain expression $(xyz)"); + } + + #[test] + fn test_parse_complex_expr() { + let input = br#""#; + let bytes = Bytes::from_static(input); + let (rest, x) = parse_complete(&bytes).unwrap(); + assert_eq!(rest.len(), 0); + assert_eq!( + x, + [Element::Expr(Expr::Comparison { + left: Box::new(Expr::Call( + "call".to_string(), + vec![Expr::String(Some("hello".to_string()))] + )), + operator: Operator::Matches, + right: Box::new(Expr::Variable( + "var".to_string(), + Some(Box::new(Expr::String(Some("key".to_string())))), + None + )) + })] + ); + } + + #[test] + fn test_vars_with_content() { + let input = br#" + $(QUERY_STRING{param}) + "#; + let bytes = Bytes::from_static(input); + let result = esi_vars_long(&bytes, input); + assert!( + result.is_ok(), + "esi_vars_long should parse successfully: {:?}", + result.err() + ); + let (rest, _elements) = result.unwrap(); + assert_eq!( + rest.len(), + 0, + "Parser should consume all input. Remaining: '{:?}'", + String::from_utf8_lossy(rest) + ); + } + + #[test] + fn test_exact_failing_input() { + // This is the exact input from the failing test + let input = br#" + + + $(QUERY_STRING{param}) + $(QUERY_STRING{$(keyVar)}) + + "#; + let bytes = Bytes::from_static(input); + let (rest, elements) = parse_complete(&bytes).unwrap(); + eprintln!("Chunks: {:?}", elements); + eprintln!("Remaining: {:?}", String::from_utf8_lossy(rest)); + assert_eq!( + rest.len(), + 0, + "Parser should consume all input. Remaining: '{:?}'", + String::from_utf8_lossy(rest) + ); + } + + #[test] + fn test_esi_vars_directly() { + let input = br#" + $(QUERY_STRING{param}) + $(QUERY_STRING{$(keyVar)}) + "#; + let bytes = Bytes::from_static(input); + let result = esi_vars(&bytes, input); + assert!(result.is_ok(), "esi_vars should parse: {:?}", result.err()); + let (rest, _) = result.unwrap(); + assert_eq!(rest.len(), 0, "Should consume all input"); + } + + #[test] + fn test_esi_tag_on_vars() { + let input = br#" + $(QUERY_STRING{param}) + "#; + let bytes = Bytes::from_static(input); + let (rest, _result) = esi_vars(&bytes, input).unwrap(); + assert_eq!(rest.len(), 0, "Parser should consume all input"); + } + + #[test] + fn test_assign_then_vars() { + // Test simple case without nested variables (which aren't supported yet) + let input = + br#"$(QUERY_STRING{param})"#; + let bytes = Bytes::from_static(input); + let (rest, _elements) = parse_complete(&bytes).unwrap(); + assert_eq!(rest.len(), 0); + } + + #[test] + fn test_parse_plain_text() { + let input = b"hello\nthere"; + let bytes = Bytes::from_static(input); + let (rest, x) = parse_complete(&bytes).unwrap(); + assert_eq!(rest.len(), 0); + assert_eq!(x, [Element::Text(Bytes::from_static(b"hello\nthere"))]); + } + #[test] + fn test_parse_interpolated() { + let input = b"hello $(foo)goodbye $(foo)"; + let bytes = Bytes::from_static(input); + let (rest, x) = parse_complete(&bytes).unwrap(); + assert_eq!(rest.len(), 0); + assert_eq!( + x, + [ + Element::Text(Bytes::from_static(b"hello $(foo)")), + Element::Text(Bytes::from_static(b"goodbye ")), + Element::Expr(Expr::Variable("foo".to_string(), None, None)), + ] + ); + } + #[test] + fn test_parse_examples() { + let input = include_bytes!("../../examples/esi_vars_example/src/index.html"); + let bytes = Bytes::from_static(input); + let (rest, _) = parse_complete(&bytes).unwrap(); + // just make sure it parsed the whole thing + assert_eq!(rest.len(), 0); + } + + #[test] + fn test_parse_equality_operators() { + let input = b"$(foo) == 'bar'"; + let (rest, result) = expr(input).unwrap(); + assert_eq!(rest.len(), 0); + assert!(matches!( + result, + Expr::Comparison { + operator: Operator::Equals, + .. + } + )); + + let input2 = b"$(foo) != 'bar'"; + let (rest, result) = expr(input2).unwrap(); + assert_eq!(rest.len(), 0); + assert!(matches!( + result, + Expr::Comparison { + operator: Operator::NotEquals, + .. + } + )); + } + + #[test] + fn test_parse_comparison_operators() { + // Test via parsing complete ESI documents with esi:when test attributes + // which internally use parse_expression() for complete input handling + + let input1 = b"yes"; + let bytes1 = Bytes::from_static(input1); + let result1 = parse_complete(&bytes1); + assert!( + result1.is_ok(), + "Should parse < operator: {:?}", + result1.err() + ); + + let input2 = b"= 5\">yes"; + let bytes2 = Bytes::from_static(input2); + let result2 = parse_complete(&bytes2); + assert!( + result2.is_ok(), + "Should parse >= operator: {:?}", + result2.err() + ); + + // Test has operator + let input3 = b"yes"; + let bytes3 = Bytes::from_static(input3); + let result3 = parse_complete(&bytes3); + assert!( + result3.is_ok(), + "Should parse 'has' operator: {:?}", + result3.err() + ); + + // Test has_i operator + let input4 = + b"yes"; + let bytes4 = Bytes::from_static(input4); + let result4 = parse_complete(&bytes4); + assert!( + result4.is_ok(), + "Should parse 'has_i' operator: {:?}", + result4.err() + ); + } + + #[test] + fn test_parse_logical_operators() { + // With parentheses to enforce correct precedence + let input = b"($(foo) == 'bar') && ($(baz) == 'qux')"; + let (rest, result) = expr(input).unwrap(); + assert_eq!(rest.len(), 0); + assert!(matches!( + result, + Expr::Comparison { + operator: Operator::And, + .. + } + )); + + let input2 = b"($(foo) == 'bar') || ($(baz) == 'qux')"; + let (rest, result) = expr(input2).unwrap(); + assert_eq!(rest.len(), 0); + assert!(matches!( + result, + Expr::Comparison { + operator: Operator::Or, + .. + } + )); + } + + #[test] + fn test_parse_negation() { + let input = b"!$(flag)"; + let (rest, result) = expr(input).unwrap(); + assert_eq!(rest.len(), 0); + assert!(matches!(result, Expr::Not(_))); + + // Test negation with comparison + let input2 = b"!($(foo) == 'bar')"; + let (rest, result) = expr(input2).unwrap(); + assert_eq!(rest.len(), 0); + assert!(matches!(result, Expr::Not(_))); + } + + #[test] + fn test_parse_grouped_expressions() { + let input = b"($(foo) == 'bar')"; + let (rest, result) = expr(input).unwrap(); + assert_eq!(rest.len(), 0); + assert!(matches!( + result, + Expr::Comparison { + operator: Operator::Equals, + .. + } + )); + } + + #[test] + fn test_single_quoted_attributes() { + // Test single-quoted attributes + let input = b""; + let bytes = Bytes::from_static(input); + let (rest, elements) = parse_complete(&bytes).unwrap(); + assert_eq!(rest.len(), 0, "Should parse completely"); + assert_eq!(elements.len(), 1); + if let Element::Esi(Tag::Include { src, .. }) = &elements[0] { + assert!(matches!(src, Expr::String(Some(s)) if s == "http://example.com/fragment")); + } else { + panic!("Expected Include tag"); + } + + // Test mixed quotes + let input2 = b""; + let bytes2 = Bytes::from_static(input2); + let (rest, elements) = parse_complete(&bytes2).unwrap(); + assert_eq!(rest.len(), 0, "Should parse completely"); + assert_eq!(elements.len(), 1); + if let Element::Esi(Tag::Assign { + name, + subscript: _, + value, + }) = &elements[0] + { + assert_eq!(name, "foo"); + assert_eq!(value, &Expr::String(Some("bar".to_string()))); + } else { + panic!("Expected Assign tag"); + } + } + + #[test] + fn test_assign_valid_variable_names() { + // Valid names + let valid_cases: Vec<&[u8]> = vec![ + b"", + b"", + b"", + b"", + b"", + b"", + ]; + + for input in valid_cases { + let bytes = Bytes::copy_from_slice(input); + let result = parse_complete(&bytes); + assert!( + result.is_ok(), + "Should parse valid name: {:?}", + std::str::from_utf8(input) + ); + let (_, elements) = result.unwrap(); + let has_assign = elements + .iter() + .any(|e| matches!(e, Element::Esi(Tag::Assign { .. }))); + assert!( + has_assign, + "Should have Assign tag for: {:?}", + std::str::from_utf8(input) + ); + } + } + + #[test] + fn test_assign_invalid_variable_names() { + // Invalid names should be rejected (treated as empty/skipped) + let invalid_cases: Vec<&[u8]> = vec![ + b"", // starts with $ + b"", // starts with digit + b"", // starts with underscore + b"", // contains dash + b"", // contains dot + b"", // contains space + b"", // empty name + ]; + + for input in invalid_cases { + let bytes = Bytes::copy_from_slice(input); + let result = parse_complete(&bytes); + assert!( + result.is_ok(), + "Should parse (but skip invalid): {:?}", + std::str::from_utf8(input) + ); + let (_, elements) = result.unwrap(); + let has_assign = elements + .iter() + .any(|e| matches!(e, Element::Esi(Tag::Assign { .. }))); + assert!( + !has_assign, + "Should NOT have Assign tag for invalid name: {:?}", + std::str::from_utf8(input) + ); + } + } + + #[test] + fn test_assign_name_length_limit() { + // Test 256 character limit + let valid_256 = format!(r#""#, "b".repeat(255)); + let bytes = Bytes::from(valid_256.clone()); + let result = parse_complete(&bytes); + assert!(result.is_ok(), "Should parse 256 char name"); + let (_, elements) = result.unwrap(); + let has_assign = elements + .iter() + .any(|e| matches!(e, Element::Esi(Tag::Assign { .. }))); + assert!(has_assign, "Should have Assign tag for 256 char name"); + + // Test 257 characters (should be invalid) + let invalid_257 = format!(r#""#, "b".repeat(256)); + let bytes = Bytes::from(invalid_257); + let result = parse_complete(&bytes); + assert!(result.is_ok(), "Should parse (but skip)"); + let (_, elements) = result.unwrap(); + let has_assign = elements + .iter() + .any(|e| matches!(e, Element::Esi(Tag::Assign { .. }))); + assert!(!has_assign, "Should NOT have Assign tag for 257 char name"); + } + + #[test] + fn test_assign_long_form_invalid_name() { + // Long form with invalid name should also be rejected + let input = b"test value"; + let bytes = Bytes::copy_from_slice(input); + let result = parse_complete(&bytes); + assert!(result.is_ok(), "Should parse"); + let (_, elements) = result.unwrap(); + let has_assign = elements + .iter() + .any(|e| matches!(e, Element::Esi(Tag::Assign { .. }))); + assert!( + !has_assign, + "Should NOT have Assign tag for invalid name in long form" + ); + } + + #[test] + fn test_assign_with_subscript() { + // Test subscript assignment parsing with bare identifier + let input = b""; + let bytes = Bytes::copy_from_slice(input); + let result = parse_complete(&bytes); + assert!(result.is_ok(), "Should parse"); + let (_, elements) = result.unwrap(); + assert_eq!(elements.len(), 1); + + match &elements[0] { + Element::Esi(Tag::Assign { + name, + subscript, + value, + }) => { + assert_eq!(name, "ages"); + assert!(subscript.is_some(), "Should have subscript"); + if let Some(sub) = subscript { + // Should be a string literal "joan" + assert!(matches!(sub, Expr::String(Some(s)) if s == "joan")); + } + assert!(matches!(value, Expr::Integer(28))); + } + _ => panic!("Expected Assign tag"), + } + + // Test with another bare identifier + let input2 = b""; + let bytes2 = Bytes::copy_from_slice(input2); + let result2 = parse_complete(&bytes2); + assert!(result2.is_ok(), "Should parse"); + let (_, elements2) = result2.unwrap(); + assert_eq!(elements2.len(), 1); + + match &elements2[0] { + Element::Esi(Tag::Assign { + name, + subscript, + value, + }) => { + assert_eq!(name, "ages"); + assert!(subscript.is_some(), "Should have subscript"); + if let Some(sub) = subscript { + // Should be a string literal "bob" + assert!( + matches!(sub, Expr::String(Some(s)) if s == "bob"), + "Subscript should be 'bob', got {:?}", + sub + ); + } + assert!(matches!(value, Expr::Integer(34))); + } + _ => panic!("Expected Assign tag"), + } + } + + #[test] + fn test_assign_with_quoted_subscript() { + // Test ESI spec-compliant subscript with quoted strings in assignment + let input = b""; + let bytes = Bytes::copy_from_slice(input); + let result = parse_complete(&bytes); + + assert!( + result.is_ok(), + "Should parse spec-compliant quoted subscript" + ); + let (_, elements) = result.unwrap(); + assert_eq!(elements.len(), 1, "Should have exactly 1 element"); + + match &elements[0] { + Element::Esi(Tag::Assign { + name, + subscript, + value, + }) => { + assert_eq!(name, "ages"); + assert!(subscript.is_some(), "Should have subscript"); + if let Some(sub) = subscript { + // Should be a string literal "joan" + assert!( + matches!(sub, Expr::String(Some(s)) if s == "joan"), + "Subscript should be 'joan', got {:?}", + sub + ); + } + assert!(matches!(value, Expr::Integer(28))); + } + other => panic!("Expected Assign tag, got {:?}", other), + } + + // Test with multiple quoted subscripts + let input2 = b""; + let bytes2 = Bytes::copy_from_slice(input2); + let result2 = parse_complete(&bytes2); + assert!( + result2.is_ok(), + "Should parse assignment with quoted subscript and quoted value" + ); + } + + #[test] + fn test_unclosed_script_tag() { + // Unclosed script tag - should handle gracefully + let input = b" tag + ("", "Script opening tag, REQUIRES closing"), + ( + "