From a8333a16098be8dd2e84ba5872ef49bbb233bf0e Mon Sep 17 00:00:00 2001
From: Ed Savage <ed.savage@elastic.co>
Date: Mon, 30 Mar 2026 10:04:27 +1300
Subject: [PATCH 1/4] [ML] Add EuroBERT/Jina v5 ops to graph validation
 allowlist
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Jina Embeddings v5 is based on EuroBERT, which uses a different
architecture from the BERT family:
- RoPE (rotary position embeddings) → aten::sin, aten::cos
- RMSNorm (instead of LayerNorm) → aten::rsqrt
- SiLU activation (instead of GELU) → aten::silu

Required for Eland PR elastic/eland#818 which adds support for
importing Jina v5 models into Elasticsearch.

Made-with: Cursor
---
 bin/pytorch_inference/CSupportedOperations.cc | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/bin/pytorch_inference/CSupportedOperations.cc b/bin/pytorch_inference/CSupportedOperations.cc
index 56dbbaa84..61229a5e0 100644
--- a/bin/pytorch_inference/CSupportedOperations.cc
+++ b/bin/pytorch_inference/CSupportedOperations.cc
@@ -41,7 +41,8 @@ const CSupportedOperations::TStringViewSet CSupportedOperations::FORBIDDEN_OPERA
 // deepset/tinyroberta-squad2, typeform/squeezebert-mnli,
 // facebook/bart-large-mnli, valhalla/distilbart-mnli-12-6,
 // distilbert-base-uncased-finetuned-sst-2-english,
-// sentence-transformers/all-distilroberta-v1.
+// sentence-transformers/all-distilroberta-v1,
+// jinaai/jina-embeddings-v5-text-nano (EuroBERT + LoRA).
 // Eland-deployed variants of the above models (with pooling/normalization layers).
 // Additional ops from Elasticsearch integration test models
 // (PyTorchModelIT, TextExpansionQueryIT, TextEmbeddingQueryIT).
@@ -68,6 +69,7 @@ const CSupportedOperations::TStringViewSet CSupportedOperations::ALLOWED_OPERATI
     "aten::clone"sv,
     "aten::contiguous"sv,
     "aten::copy_"sv,
+    "aten::cos"sv,
     "aten::cumsum"sv,
     "aten::detach"sv,
     "aten::div"sv,
@@ -117,10 +119,13 @@ const CSupportedOperations::TStringViewSet CSupportedOperations::ALLOWED_OPERATI
     "aten::relu"sv,
     "aten::repeat"sv,
     "aten::reshape"sv,
+    "aten::rsqrt"sv,
     "aten::rsub"sv,
     "aten::scaled_dot_product_attention"sv,
     "aten::select"sv,
     "aten::sign"sv,
+    "aten::silu"sv,
+    "aten::sin"sv,
     "aten::size"sv,
     "aten::slice"sv,
     "aten::softmax"sv,

From b12a4d494b767f78787fb70a3375b3adf751ed0f Mon Sep 17 00:00:00 2001
From: Ed Savage <ed.savage@elastic.co>
Date: Mon, 30 Mar 2026 12:25:28 +1300
Subject: [PATCH 2/4] [ML] Fix graph validator tests for sin/cos now in
 allowlist

aten::sin and aten::cos are now in the allowlist (needed by
EuroBERT/Jina v5 for rotary position embeddings), so tests that
used them as example "unrecognised" ops now fail.

- Replace torch.sin with torch.logit in synthetic test modules
- Update malicious model tests to check for ops that remain
  unrecognised (aten::tan, aten::exp) rather than sin/cos

Made-with: Cursor
---
 .../unittest/CModelGraphValidatorTest.cc      | 38 ++++++++++---------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/bin/pytorch_inference/unittest/CModelGraphValidatorTest.cc b/bin/pytorch_inference/unittest/CModelGraphValidatorTest.cc
index 5180fb403..e292b78b9 100644
--- a/bin/pytorch_inference/unittest/CModelGraphValidatorTest.cc
+++ b/bin/pytorch_inference/unittest/CModelGraphValidatorTest.cc
@@ -259,11 +259,11 @@ BOOST_AUTO_TEST_CASE(testValidModuleWithAllowedOps) {
 }
 
 BOOST_AUTO_TEST_CASE(testModuleWithUnrecognisedOps) {
-    // torch.sin is not in the transformer allowlist.
+    // torch.logit is not in the transformer allowlist.
     ::torch::jit::Module m("__torch__.UnknownOps");
     m.define(R"(
         def forward(self, x: Tensor) -> Tensor:
-            return torch.sin(x)
+            return torch.logit(x)
     )");
 
     auto result = CModelGraphValidator::validate(m);
@@ -271,13 +271,13 @@ BOOST_AUTO_TEST_CASE(testModuleWithUnrecognisedOps) {
     BOOST_REQUIRE(result.s_IsValid == false);
     BOOST_REQUIRE(result.s_ForbiddenOps.empty());
     BOOST_REQUIRE(result.s_UnrecognisedOps.empty() == false);
-    bool foundSin = false;
+    bool foundLogit = false;
     for (const auto& op : result.s_UnrecognisedOps) {
-        if (op == "aten::sin") {
-            foundSin = true;
+        if (op == "aten::logit") {
+            foundLogit = true;
         }
     }
-    BOOST_REQUIRE(foundSin);
+    BOOST_REQUIRE(foundLogit);
 }
 
 BOOST_AUTO_TEST_CASE(testModuleNodeCountPopulated) {
@@ -301,7 +301,7 @@ BOOST_AUTO_TEST_CASE(testModuleWithSubmoduleInlines) {
     ::torch::jit::Module child("__torch__.Child");
     child.define(R"(
         def forward(self, x: Tensor) -> Tensor:
-            return torch.sin(x)
+            return torch.logit(x)
     )");
 
     ::torch::jit::Module parent("__torch__.Parent");
@@ -314,13 +314,13 @@ BOOST_AUTO_TEST_CASE(testModuleWithSubmoduleInlines) {
     auto result = CModelGraphValidator::validate(parent);
 
     BOOST_REQUIRE(result.s_IsValid == false);
-    bool foundSin = false;
+    bool foundLogit = false;
     for (const auto& op : result.s_UnrecognisedOps) {
-        if (op == "aten::sin") {
-            foundSin = true;
+        if (op == "aten::logit") {
+            foundLogit = true;
         }
     }
-    BOOST_REQUIRE(foundSin);
+    BOOST_REQUIRE(foundLogit);
 }
 
 // --- Integration tests with malicious .pt model fixtures ---
@@ -363,34 +363,38 @@ BOOST_AUTO_TEST_CASE(testMaliciousMixedFileReader) {
 BOOST_AUTO_TEST_CASE(testMaliciousHiddenInSubmodule) {
     // Unrecognised ops buried three levels deep in nested submodules.
     // The validator must inline through all submodules to find them.
+    // The model uses aten::sin which is now allowed (EuroBERT/Jina v5),
+    // but also contains other ops that remain unrecognised.
     auto module = ::torch::jit::load("testfiles/malicious_models/malicious_hidden_in_submodule.pt");
     auto result = CModelGraphValidator::validate(module);
 
     BOOST_REQUIRE(result.s_IsValid == false);
     BOOST_REQUIRE(result.s_ForbiddenOps.empty());
-    BOOST_REQUIRE(hasUnrecognisedOp(result, "aten::sin"));
+    BOOST_REQUIRE(result.s_UnrecognisedOps.empty() == false);
 }
 
 BOOST_AUTO_TEST_CASE(testMaliciousConditionalBranch) {
     // An unrecognised op hidden inside a conditional branch. The
     // validator must recurse into prim::If blocks to detect it.
+    // The model uses aten::sin which is now allowed, but also contains
+    // other ops that remain unrecognised.
     auto module = ::torch::jit::load("testfiles/malicious_models/malicious_conditional.pt");
     auto result = CModelGraphValidator::validate(module);
 
     BOOST_REQUIRE(result.s_IsValid == false);
-    BOOST_REQUIRE(hasUnrecognisedOp(result, "aten::sin"));
+    BOOST_REQUIRE(result.s_UnrecognisedOps.empty() == false);
 }
 
 BOOST_AUTO_TEST_CASE(testMaliciousManyUnrecognisedOps) {
-    // A model using many different unrecognised ops (sin, cos, tan, exp).
+    // A model using many different ops (sin, cos, tan, exp).
+    // sin and cos are now allowed (EuroBERT/Jina v5), but tan and exp
+    // remain unrecognised.
     auto module = ::torch::jit::load("testfiles/malicious_models/malicious_many_unrecognised.pt");
     auto result = CModelGraphValidator::validate(module);
 
     BOOST_REQUIRE(result.s_IsValid == false);
     BOOST_REQUIRE(result.s_ForbiddenOps.empty());
-    BOOST_REQUIRE(result.s_UnrecognisedOps.size() >= 4);
-    BOOST_REQUIRE(hasUnrecognisedOp(result, "aten::sin"));
-    BOOST_REQUIRE(hasUnrecognisedOp(result, "aten::cos"));
+    BOOST_REQUIRE(result.s_UnrecognisedOps.size() >= 2);
     BOOST_REQUIRE(hasUnrecognisedOp(result, "aten::tan"));
     BOOST_REQUIRE(hasUnrecognisedOp(result, "aten::exp"));
 }

From 6709f94bd9f6a60fd71ce0b9ed2818e50a096584 Mon Sep 17 00:00:00 2001
From: Ed Savage <ed.savage@elastic.co>
Date: Mon, 30 Mar 2026 14:18:15 +1300
Subject: [PATCH 3/4] [ML] Fix malicious hidden-submodule fixture after sin
 allowlist (use logit)

Regenerate malicious_hidden_in_submodule.pt with aten::logit+clamp so
graph validation still fails when aten::sin is allowed for EuroBERT/Jina.
Update dev-tools/generate_malicious_models.py and test comments.

Made-with: Cursor
---
 .../unittest/CModelGraphValidatorTest.cc      |   6 +++---
 .../malicious_hidden_in_submodule.pt          | Bin 2517 -> 2825 bytes
 dev-tools/generate_malicious_models.py        |   8 ++++++--
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/bin/pytorch_inference/unittest/CModelGraphValidatorTest.cc b/bin/pytorch_inference/unittest/CModelGraphValidatorTest.cc
index e292b78b9..9c4f7d6d5 100644
--- a/bin/pytorch_inference/unittest/CModelGraphValidatorTest.cc
+++ b/bin/pytorch_inference/unittest/CModelGraphValidatorTest.cc
@@ -326,7 +326,7 @@ BOOST_AUTO_TEST_CASE(testModuleWithSubmoduleInlines) {
 // --- Integration tests with malicious .pt model fixtures ---
 //
 // These load real TorchScript models that simulate attack vectors.
-// The .pt files are generated by testfiles/generate_malicious_models.py.
+// The .pt files are generated by dev-tools/generate_malicious_models.py.
 
 namespace {
 bool hasForbiddenOp(const CModelGraphValidator::SResult& result, const std::string& op) {
@@ -363,8 +363,8 @@ BOOST_AUTO_TEST_CASE(testMaliciousMixedFileReader) {
 BOOST_AUTO_TEST_CASE(testMaliciousHiddenInSubmodule) {
     // Unrecognised ops buried three levels deep in nested submodules.
     // The validator must inline through all submodules to find them.
-    // The model uses aten::sin which is now allowed (EuroBERT/Jina v5),
-    // but also contains other ops that remain unrecognised.
+    // The leaf uses aten::logit (still unrecognised) so the fixture stays
+    // invalid when aten::sin is allowed for EuroBERT/Jina v5.
     auto module = ::torch::jit::load("testfiles/malicious_models/malicious_hidden_in_submodule.pt");
     auto result = CModelGraphValidator::validate(module);
 
diff --git a/bin/pytorch_inference/unittest/testfiles/malicious_models/malicious_hidden_in_submodule.pt b/bin/pytorch_inference/unittest/testfiles/malicious_models/malicious_hidden_in_submodule.pt
index 39104c647ef007579fe16960f0521e914b29944c..180d98c88f2080f44629fc5b9ca3870aa10246ad 100644
GIT binary patch
delta 1435
zcmcaA+$lDpM@la}H7~U&u_QG<H!&wOIWxbsI6gN&B{ipbVoxkS*_WDlWG4$Ts&IV`
zwa{Q-WME)mob13T!|chxIk}KkesUcnU%fs~TlESm-JDn%qCjBpq`i5E3`APX=RNA%
zD6&XLR8?qh_mx%7yBEGX)tS4YAm@p^=)Ha0gdI8<k2p+odH(Om9Q}79rF{2PlVkM1
zmS<ESZhCYyy>4O}Q^StM)4ts)el3v5xcdH@ZS{^F+$W>8wqD83SYYb5RPAK|%hC-?
z?|$o#YLiahw(;NA&zCY*g{?F^>$P@gFRM#_&UVK&*KD+|6u((HX|Bn^#4pFDt}d84
z>#@LNrk0JFg%5WKeeaEL)qSkhP|Iu|W1Y<DbN|?RwQxSegt+Lhj-tPXzNx%!e$KjN
zv9@Iu(}p|j^{W@2;9A+o#;-cTb9x~^<Ggbbg<9`i{|HvgtJyr+-D-MU^5x2hJB0Tz
zny0SO2zu#bcuD;ydymTG^UIrSCU||S;^w-x{Ir<xx7{KYacA}Ci`%VDSH05wuS)OC
z2~(HUYnQ|$CZDtq@Mh=G=_)n11;)V}W(I};Z)O$|1`ZAmhRHsR0?fe-PO$jU#v31c
zDXB@N>G1{GIc`pN3=;$OCnoUMm-g+?yW${l?C<k;OneI$EONYci9KCfN<>vANI<0}
z(b0-$qr(ZKZ*hAzq*w2)e(?ImwuQ^x9tmm)Jdzc#%|GD1;oQ+fK~pYfrMvCV-#4>A
ztmLXe&)?<7d&G~qw?91mBhtdUrbwW@`n#O{gEr~RoC4!nFIq}cQu>~y{#9IQ99}PA
zHh)1Hd+QQ`R9m-ucP|$me;8Ksv}8%I=&VI-vF^Qx-@b_Azv80znrUm8oi10<?$sL)
zvtLwR%Q=rbsJkIWNuX3pU>b*Ku;V1Fv(2oZFRkJ-`gCt{>PeRZpD!g!o_jnbd^2~7
zd|Y&fL%441qr)6?H&2<E^3E)|Wbxiy5#RHt>Qh#J|GCCy*`?U)n%-hlZ~I)(eRTKQ
z{(9j`^{)~4_bL8l74Iq0t@U0iA8t~6yyJ74XQbF=CnK#ThxK=@k5x)fXjr(Tc+!`{
zC(cAOsYN6-U({kNaR2CgqirQq@WWi=|N8Zr&t^9~^k`gKa8Z5!+H)B<Zobj3<1Awo
zxxrPx@zb`zo$C`8eAr&PS?&JXd1uvkZ<Z`ITku0n=77(^fQReL_Oa?enA$Y)ko~P?
z%Jy&Pon3GD^R9Y*)TY(>&AhGuzjZvBKJ`C8D1AuW^uKigm_8OWO}@Y?!>rAq4o(v>
z9K0-UPRhW@R^IHu+Q_JHVPtG>VPRopZe(I+Zf<5|WMFJyY-Va~VrFb%VQFS*XaN*5
zG6NYYR?&8C@&`5v<oqwh?j?(y6g`2Z0s>56WPqhaFT5!+z?+eYfdP_+LD~_nXJ80M
zm<MF}GQ*9O!foVaW)5@a3#<^OF&q|5Y#^rE<UJhP5T^WOJ~mEWh_wvBtOKzF<U<W0
z6NF75LQpb{9b#52r=8V$pi@EM2Rnog@MZ(EmUBV)ppr4bo0Sd3V+TTzdWc#85`ZJ*

delta 1118
zcmeAayDB`PXX3Kh$+C=^lM@(KxJ<gff8E5uz_5v7av!4%vp$32<U&rx$>$h(*z^i2
z-JEnMA7s|6-#g)S)?oty*Y7{OBE25`V&-|dt8tp%tyqob9M9ekoz{u2bN8>8dv{pm
zRcx5h_j8ZWNv`2wl<S#tO7@QT8+ZN1p>I~&?77XT5XaIP=-b7l^ycA@uq$?1dnc(0
z9h3adC;R2z?U)p%R~fqBk}T>uCRKd;H|Nju(<ys|zs=w-USC-67ic7)?thqb<5A~n
zlT@d&z17^ldBK{0v0v0HCQC6q<BOZhJ<sCKUH$v(EUt@eGufWJB_}W8nd|!B-zSJ~
zNxRA8kk$31L9}&yyx}_D4|fADT%BS5Jay`Y?3jMfn+rlW=P7C}78lya_m?g3BB!S0
z>1+HGT_%ao;GR0?*|UIudu^rEZ&}rrrW*7uZMI_!@Mh<55Gz;VVq{=YVg`qrB?CV!
z)arqurk9eMRGJ=Nke%b^#LW<e0HuAt`Ij98+OC%c9bo12@Q`tc(mT<3ty<U7HH&ph
zR+huT75fxUZ#`a-xcOV1i;8*P@ARL7H+-FyLuJ>Nl|8F{e)H$!bbk&-dAswI7Ms}H
z``SN?zkKuM^2s)~R(93vJ*Ip$st=6Yc3u6Jd#mZ%=)b!!gr)>ssV=cxs_Aw473bah
z>aTO!PMw#F?d93HsJ+B9{!;3SrZ+dIEx4$+WeKy^v!F$NVUq)TjqX+7FmJwPu!hHY
zSBeH#a%JX*3#o^nCGCG>Rd!JIb=bDI(U#Lqv!<wSYJGBT(Z-w|S$Y>X$lo~3-TiY<
zy}+I)Zx22Byv-@`QQp#Hl6zCt8(n2})xTWb@XPPpmt}81y}wa#^R`pz@m0G8CluZN
zdop)U`JO+j274k30>4^E9qi@3y7xtOOBowWNASZ`sdM+!1P(CEGbuDDvatKTJ;W$u
z^&!US)2#pRp0r+n<HYowuV%W>cF||~4v%@C-B|hC`UBem#|M+H*BOhvm+6Tq+NO8y
zlz2V&XNwyNWo9#km&>hDKdx6P7c+Zp)r&{<ptzJ{Q99KJjLRUV$rspUn6()sfpMwB
z?B*m2O!{J*9oQNf_07yoEzAwgEiH|VjEv0<ERBqf4a`jq%#1B8EzK-V&CLyrjLj@8
zK*oLVOa4Cj1G@xr&JyDAl0hUH1_pg#hC=`+#>s_T3X}Oc`SgK_C%~JLiGe|cfrEns
ztOVga1_n!nCLrq;6HMP^b53)ndn{n?1WpS^RtSrYeewrRZ77eAopZ7)mw*h&eHuXB
xAZ&tSUKkrtTbxxj(0UN~!3LoNyxG`6tYS_G9~e{&0p6@&S#}0?29SD)S^&;Bt=Rwo

diff --git a/dev-tools/generate_malicious_models.py b/dev-tools/generate_malicious_models.py
index 21afe1110..fdf6ddbf1 100644
--- a/dev-tools/generate_malicious_models.py
+++ b/dev-tools/generate_malicious_models.py
@@ -48,7 +48,11 @@ def forward(self, x: Tensor) -> Tensor:
 
 
 class HiddenInSubmodule(torch.nn.Module):
-    """Hides aten::sin (unrecognised) three levels deep in submodules."""
+    """Hides aten::logit (unrecognised) three levels deep in submodules.
+
+    Uses logit+clamp instead of sin so the fixture stays invalid when
+    aten::sin is added to the allowlist for transformer models (e.g. EuroBERT).
+    """
     def __init__(self):
         super().__init__()
         self.inner = _Inner()
@@ -69,7 +73,7 @@ def forward(self, x: Tensor) -> Tensor:
 
 class _Leaf(torch.nn.Module):
     def forward(self, x: Tensor) -> Tensor:
-        return torch.sin(x)
+        return torch.logit(torch.clamp(x, 1e-6, 1.0 - 1e-6))
 
 
 class ConditionalMalicious(torch.nn.Module):

From 2dc084d7d01bfe37e0a03a116a509c3b85c897de Mon Sep 17 00:00:00 2001
From: Ed Savage <ed.savage@elastic.co>
Date: Thu, 2 Apr 2026 10:18:26 +1300
Subject: [PATCH 4/4] [ML] Add Jina v5 to reference/validation models and
 enable trust_remote_code

Add jinaai/jina-embeddings-v5-text-nano to reference_models.json,
validation_models.json, and the golden reference_model_ops.json with
its 36 traced ops (verified all covered by the allowlist).

Pass trust_remote_code=True in torchscript_utils.py so models with
custom code (like Jina v5 / EuroBERT) can be loaded by the extraction
and validation tooling.

Made-with: Cursor
---
 .../testfiles/reference_model_ops.json        | 42 +++++++++++++++++++
 .../extract_model_ops/reference_models.json   |  2 +
 .../extract_model_ops/torchscript_utils.py    |  9 ++--
 .../extract_model_ops/validation_models.json  |  2 +
 4 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/bin/pytorch_inference/unittest/testfiles/reference_model_ops.json b/bin/pytorch_inference/unittest/testfiles/reference_model_ops.json
index bdc975c53..f66a42ec7 100644
--- a/bin/pytorch_inference/unittest/testfiles/reference_model_ops.json
+++ b/bin/pytorch_inference/unittest/testfiles/reference_model_ops.json
@@ -1007,6 +1007,48 @@
         "prim::NumToTensor"
       ]
     },
+    "jina-embeddings-v5-text-nano": {
+      "model_id": "jinaai/jina-embeddings-v5-text-nano",
+      "quantized": false,
+      "ops": [
+        "aten::Int",
+        "aten::add",
+        "aten::arange",
+        "aten::cat",
+        "aten::contiguous",
+        "aten::cos",
+        "aten::detach",
+        "aten::dropout",
+        "aten::embedding",
+        "aten::expand",
+        "aten::floor_divide",
+        "aten::linear",
+        "aten::masked_fill",
+        "aten::matmul",
+        "aten::mean",
+        "aten::mul",
+        "aten::neg",
+        "aten::pow",
+        "aten::reshape",
+        "aten::rsqrt",
+        "aten::scaled_dot_product_attention",
+        "aten::silu",
+        "aten::sin",
+        "aten::size",
+        "aten::slice",
+        "aten::sub",
+        "aten::to",
+        "aten::transpose",
+        "aten::unsqueeze",
+        "aten::view",
+        "prim::Constant",
+        "prim::GetAttr",
+        "prim::ListConstruct",
+        "prim::NumToTensor",
+        "prim::TupleConstruct",
+        "prim::TupleUnpack"
+      ]
+    },
     "qa-tinyroberta-squad2": {
       "model_id": "deepset/tinyroberta-squad2",
       "quantized": false,
diff --git a/dev-tools/extract_model_ops/reference_models.json b/dev-tools/extract_model_ops/reference_models.json
index 5170a0e2e..23368f053 100644
--- a/dev-tools/extract_model_ops/reference_models.json
+++ b/dev-tools/extract_model_ops/reference_models.json
@@ -30,6 +30,8 @@
     "elastic-eis-elser-v2-quantized": {"model_id": "elastic/eis-elser-v2", "quantized": true},
     "elastic-test-elser-v2-quantized": {"model_id": "elastic/test-elser-v2", "quantized": true},
 
+    "jina-embeddings-v5-text-nano": "jinaai/jina-embeddings-v5-text-nano",
+
     "_comment:qa-models": "Models from the Appex QA pytorch_tests suite. BART models require auto_class and config_overrides to trace correctly.",
     "qa-tinyroberta-squad2": {"model_id": "deepset/tinyroberta-squad2", "auto_class": "AutoModelForQuestionAnswering"},
     "qa-squeezebert-mnli": "typeform/squeezebert-mnli",
diff --git a/dev-tools/extract_model_ops/torchscript_utils.py b/dev-tools/extract_model_ops/torchscript_utils.py
index da8fb481b..c412ff2cf 100644
--- a/dev-tools/extract_model_ops/torchscript_utils.py
+++ b/dev-tools/extract_model_ops/torchscript_utils.py
@@ -116,11 +116,14 @@ def load_and_trace_hf_model(model_name: str, quantize: bool = False,
     overrides = config_overrides or {}
 
     try:
-        tokenizer = AutoTokenizer.from_pretrained(model_name, token=token)
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_name, token=token, trust_remote_code=True)
         config = AutoConfig.from_pretrained(
-            model_name, torchscript=True, token=token, **overrides)
+            model_name, torchscript=True, token=token,
+            trust_remote_code=True, **overrides)
         model = model_cls.from_pretrained(
-            model_name, config=config, token=token)
+            model_name, config=config, token=token,
+            trust_remote_code=True)
         model.eval()
     except Exception as exc:
         print(f"    LOAD ERROR: {exc}", file=sys.stderr)
diff --git a/dev-tools/extract_model_ops/validation_models.json b/dev-tools/extract_model_ops/validation_models.json
index 1b36747fd..20aaf98d1 100644
--- a/dev-tools/extract_model_ops/validation_models.json
+++ b/dev-tools/extract_model_ops/validation_models.json
@@ -31,6 +31,8 @@
     "es-cross-encoder-ms-marco": "cross-encoder/ms-marco-MiniLM-L-6-v2",
     "es-dpr-question-encoder": "facebook/dpr-question_encoder-single-nq-base",
 
+    "jina-embeddings-v5-text-nano": "jinaai/jina-embeddings-v5-text-nano",
+
     "_comment:qa-models": "Models from the Appex QA pytorch_tests suite. BART models require auto_class and config_overrides to trace correctly.",
     "qa-tinyroberta-squad2": {"model_id": "deepset/tinyroberta-squad2", "auto_class": "AutoModelForQuestionAnswering"},
     "qa-squeezebert-mnli": "typeform/squeezebert-mnli",