From c3cac9b19b791569fb7fd13169bc2fc5141e223e Mon Sep 17 00:00:00 2001
From: Saumya Tiwari <haribol274@gmail.com>
Date: Wed, 25 Mar 2026 10:45:40 +0000
Subject: [PATCH] Remove deprecated RerankerCalculator and update references

---
 demos/common/export_models/export_model.py    |  94 ----
 src/BUILD                                     |   1 -
 src/rerank/BUILD                              |  28 +-
 src/rerank/rerank_calculator.cc               | 420 ------------------
 src/rerank/rerank_calculator.proto            |  32 --
 src/test/mediapipeflow_test.cpp               |   1 -
 src/test/rerank/with_params/graph.pbtxt       |  47 --
 .../rerank/with_params/invalid_graph.pbtxt    |  47 --
 .../rerank/with_params/invalid_graph_ov.pbtxt |  15 +-
 yarn.lock                                     |  47 ++
 10 files changed, 50 insertions(+), 682 deletions(-)
 delete mode 100644 src/rerank/rerank_calculator.cc
 delete mode 100644 src/rerank/rerank_calculator.proto
 delete mode 100644 src/test/rerank/with_params/graph.pbtxt
 delete mode 100644 src/test/rerank/with_params/invalid_graph.pbtxt

diff --git a/demos/common/export_models/export_model.py b/demos/common/export_models/export_model.py
index 5aa81b0c81..c7785244f9 100644
--- a/demos/common/export_models/export_model.py
+++ b/demos/common/export_models/export_model.py
@@ -64,12 +64,6 @@ def add_common_arguments(parser):
 parser_embeddings_ov.add_argument('--truncate', default=False, action='store_true', help='Truncate the prompts to fit to the embeddings model', dest='truncate')
 parser_embeddings_ov.add_argument('--num_streams', default=1,type=int, help='The number of parallel execution streams to use for the model. Use at least 2 on 2 socket CPU systems.', dest='num_streams')
 
-parser_rerank = subparsers.add_parser('rerank', help='[deprecated] export model for rerank endpoint with models split into separate, versioned directories')
-add_common_arguments(parser_rerank)
-parser_rerank.add_argument('--num_streams', default=1, type=int, help='The number of parallel execution streams to use for the model. Use at least 2 on 2 socket CPU systems.', dest='num_streams')
-parser_rerank.add_argument('--max_doc_length', default=16000, type=int, help='Maximum length of input documents in tokens', dest='max_doc_length')
-parser_rerank.add_argument('--version', default="1", help='version of the model', dest='version')
-
 parser_rerank_ov = subparsers.add_parser('rerank_ov', help='export model for rerank endpoint with directory structure aligned with OpenVINO tools')
 add_common_arguments(parser_rerank_ov)
 parser_rerank_ov.add_argument('--num_streams', default=1, type=int, help='The number of parallel execution streams to use for the model. Use at least 2 on 2 socket CPU systems.', dest='num_streams')
@@ -190,34 +184,6 @@ def add_common_arguments(parser):
 }
 """
 
-rerank_graph_template = """input_stream: "REQUEST_PAYLOAD:input"
-output_stream: "RESPONSE_PAYLOAD:output"
-node {
-  calculator: "OpenVINOModelServerSessionCalculator"
-  output_side_packet: "SESSION:tokenizer"
-  node_options: {
-    [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: {
-      servable_name: "{{model_name}}_tokenizer_model"
-    }
-  }
-}
-node {
-  calculator: "OpenVINOModelServerSessionCalculator"
-  output_side_packet: "SESSION:rerank"
-  node_options: {
-    [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: {
-      servable_name: "{{model_name}}_rerank_model"
-    }
-  }
-}
-node {
-    input_side_packet: "TOKENIZER_SESSION:tokenizer"
-    input_side_packet: "RERANK_SESSION:rerank"
-    calculator: "RerankCalculator"
-    input_stream: "REQUEST_PAYLOAD:input"
-    output_stream: "RESPONSE_PAYLOAD:output"
-}
-"""
 
 text_generation_graph_template = """input_stream: "HTTP_REQUEST_PAYLOAD:input"
 output_stream: "HTTP_RESPONSE_PAYLOAD:output"
@@ -273,24 +239,6 @@ def add_common_arguments(parser):
   }
 }"""
 
-rerank_subconfig_template = """{
-    "model_config_list": [
-    { "config": 
-	    {
-                "name": "{{model_name}}_tokenizer_model",
-                "base_path": "tokenizer"
-            }
-	},
-    { "config": 
-	    {
-                "name": "{{model_name}}_rerank_model",
-                "base_path": "rerank",
-                "target_device": "{{target_device|default("CPU", true)}}",
-                "plugin_config": { "NUM_STREAMS": "{{num_streams|default(1, true)}}" }
-            }
-	}
-   ]
-}"""
 
 image_generation_graph_template = """input_stream: "HTTP_REQUEST_PAYLOAD:input"
 output_stream: "HTTP_RESPONSE_PAYLOAD:output"
@@ -558,46 +506,6 @@ def export_rerank_model_ov(model_repository_path, source_model, model_name, prec
     print("Created graph {}".format(os.path.join(model_repository_path, model_name, 'graph.pbtxt')))
     add_servable_to_config(config_file_path, model_name, os.path.relpath(os.path.join(model_repository_path, model_name), os.path.dirname(config_file_path)))
 
-def export_rerank_model(model_repository_path, source_model, model_name, precision, task_parameters, version, config_file_path, max_doc_length):
-    if os.path.isfile(os.path.join(model_name, 'openvino_model.xml')):
-        print("OV model is source folder. Skipping conversion.")
-        os.makedirs(os.path.join(model_repository_path, model_name, 'rerank', version), exist_ok=True)
-        os.makedirs(os.path.join(model_repository_path, model_name, 'tokenizer', version), exist_ok=True)
-        shutil.move(os.path.join(model_repository_path, model_name, 'openvino_tokenizer.xml'), os.path.join(model_repository_path, model_name, 'tokenizer', version, 'model.xml'))
-        shutil.move(os.path.join(model_repository_path, model_name, 'openvino_tokenizer.bin'), os.path.join(model_repository_path, model_name, 'tokenizer', version, 'model.bin'))
-        shutil.move(os.path.join(model_repository_path, model_name, 'openvino_model.xml'), os.path.join(model_repository_path, model_name, 'rerank', version, 'model.xml'))
-        shutil.move(os.path.join(model_repository_path, model_name, 'openvino_model.bin'), os.path.join(model_repository_path, model_name, 'rerank', version, 'model.bin'))
-    else: # assume HF model name
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            embeddings_path = os.path.join(model_repository_path, model_name, 'rerank', version)
-            print("Exporting rerank model to ",embeddings_path)
-            if not os.path.isdir(embeddings_path) or args['overwrite_models']:
-                optimum_command = "optimum-cli export openvino --disable-convert-tokenizer --model {} --task text-classification --weight-format {} {} --trust-remote-code {}".format(source_model, precision, task_parameters['extra_quantization_params'], tmpdirname)
-                if os.system(optimum_command):
-                    raise ValueError("Failed to export rerank model", source_model)
-                set_rt_info(tmpdirname, 'openvino_model.xml', 'config.json')
-                os.makedirs(embeddings_path, exist_ok=True)
-                shutil.move(os.path.join(tmpdirname, 'openvino_model.xml'), os.path.join(embeddings_path, 'model.xml'))
-                shutil.move(os.path.join(tmpdirname, 'openvino_model.bin'), os.path.join(embeddings_path, 'model.bin'))
-            tokenizer_path = os.path.join(model_repository_path, model_name,'tokenizer', version)
-            print("Exporting tokenizer to ",tokenizer_path)
-            if not os.path.isdir(tokenizer_path) or args['overwrite_models']:
-                export_rerank_tokenizer(source_model, tmpdirname, max_doc_length)
-                set_rt_info(tmpdirname, 'openvino_tokenizer.xml', 'tokenizer_config.json')
-                os.makedirs(tokenizer_path, exist_ok=True)
-                shutil.move(os.path.join(tmpdirname, 'openvino_tokenizer.xml'), os.path.join(tokenizer_path, 'model.xml'))
-                shutil.move(os.path.join(tmpdirname, 'openvino_tokenizer.bin'), os.path.join(tokenizer_path, 'model.bin'))
-    gtemplate = jinja2.Environment(loader=jinja2.BaseLoader).from_string(rerank_graph_template)
-    graph_content = gtemplate.render(model_name=model_name, **task_parameters)
-    with open(os.path.join(model_repository_path, model_name, 'graph.pbtxt'), 'w') as f:
-        f.write(graph_content)
-    print("Created graph {}".format(os.path.join(model_repository_path, model_name, 'graph.pbtxt')))
-    stemplate = jinja2.Environment(loader=jinja2.BaseLoader).from_string(rerank_subconfig_template)
-    subconfig_content = stemplate.render(model_name=model_name, **task_parameters)
-    with open(os.path.join(model_repository_path, model_name, 'subconfig.json'), 'w') as f:
-        f.write(subconfig_content)
-    print("Created subconfig {}".format(os.path.join(model_repository_path, model_name, 'subconfig.json')))
-    add_servable_to_config(config_file_path, model_name, os.path.relpath(os.path.join(model_repository_path, model_name), os.path.dirname(config_file_path)))
 
 
 def export_image_generation_model(model_repository_path, source_model, model_name, precision, task_parameters, config_file_path, num_streams):
@@ -670,8 +578,6 @@ def export_image_generation_model(model_repository_path, source_model, model_nam
 elif args['task'] == 'embeddings_ov':
     export_embeddings_model_ov(args['model_repository_path'], args['source_model'], args['model_name'],  args['precision'], template_parameters, args['config_file_path'], args['truncate'])
 
-elif args['task'] == 'rerank':
-    export_rerank_model(args['model_repository_path'], args['source_model'], args['model_name'] ,args['precision'], template_parameters, str(args['version']), args['config_file_path'], args['max_doc_length'])
 
 elif args['task'] == 'rerank_ov':
     export_rerank_model_ov(args['model_repository_path'], args['source_model'], args['model_name'] ,args['precision'], template_parameters, args['config_file_path'], args['max_doc_length'])
diff --git a/src/BUILD b/src/BUILD
index ea624f5e59..5f422fc736 100644
--- a/src/BUILD
+++ b/src/BUILD
@@ -581,7 +581,6 @@ ovms_cc_library(
                 "//src/image_gen:imagegen_init",
                 "//src/llm:openai_completions_api_handler",
                 "//src/embeddings:embeddingscalculator_ov",
-                "//src/rerank:rerankcalculator",
                 "//src/rerank:rerankcalculator_ov",
                 "//src/llm:llmcalculator",],
         }) + select({
diff --git a/src/rerank/BUILD b/src/rerank/BUILD
index 7f3b1a6ec9..192252efad 100644
--- a/src/rerank/BUILD
+++ b/src/rerank/BUILD
@@ -17,15 +17,7 @@
 load("@mediapipe//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library", "mediapipe_proto_library")
 load("//:common_settings.bzl", "ovms_cc_library")
 
-mediapipe_proto_library(
-    name = "rerank_calculator_proto", # rerank_calculator_cc_proto - just mediapipe stuff with mediapipe_proto_library adding nonvisible target
-    srcs = ["rerank_calculator.proto"],
-    visibility = ["//visibility:private"],
-    deps = [
-        "@mediapipe//mediapipe/framework:calculator_options_proto",
-        "@mediapipe//mediapipe/framework:calculator_proto",
-    ],
-)
+
 
 ovms_cc_library(
     name = "rerank_servable",
@@ -45,24 +37,6 @@ mediapipe_proto_library(
     ],
 )
 
-ovms_cc_library(
-    name = "rerankcalculator",
-    srcs = ["rerank_calculator.cc"],
-    deps = [
-        "@mediapipe//mediapipe/framework:calculator_framework",
-        "@com_github_tencent_rapidjson//:rapidjson",
-        "@model_api//:model_api",
-        "//src:httppayload",
-        "//src:libhttpclientconnection",
-        "//src:libovmslogging",
-        "//src:libovmsprofiler",
-	    "rerank_calculator_cc_proto",
-        ":rerank_api_handler",
-    ],
-    visibility = ["//visibility:public"],
-    alwayslink = 1,
-)
-
 ovms_cc_library(
     name = "rerankcalculator_ov",
     srcs = ["rerank_calculator_ov.cc"],
diff --git a/src/rerank/rerank_calculator.cc b/src/rerank/rerank_calculator.cc
deleted file mode 100644
index bb731690af..0000000000
--- a/src/rerank/rerank_calculator.cc
+++ /dev/null
@@ -1,420 +0,0 @@
-//*****************************************************************************
-// Copyright 2024 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//*****************************************************************************
-#include <algorithm>
-#include <exception>
-#include <string>
-#include <unordered_map>
-#include <utility>
-
-#pragma warning(push)
-#pragma warning(disable : 6001 6385 6386 6326 6011 4309 6246 4005 4456)
-#include "absl/strings/escaping.h"
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-#include "mediapipe/framework/calculator_framework.h"
-#include "mediapipe/framework/port/canonical_errors.h"
-#include "mediapipe/framework/port/ret_check.h"
-#pragma GCC diagnostic pop
-#pragma warning(pop)
-
-#include <adapters/inference_adapter.h>
-#include "src/port/rapidjson_stringbuffer.hpp"
-#include "src/port/rapidjson_writer.hpp"
-
-#include "../http_payload.hpp"
-#include "../logging.hpp"
-#include "../profiler.hpp"
-#include "src/rerank/rerank_calculator.pb.h"
-#include "src/rerank/rerank_utils.hpp"
-
-using namespace rapidjson;
-using namespace ovms;
-
-namespace mediapipe {
-
-using InputDataType = ovms::HttpPayload;
-using OutputDataType = std::string;
-
-class RerankCalculator : public CalculatorBase {
-    static const std::string INPUT_TAG_NAME;
-    static const std::string OUTPUT_TAG_NAME;
-    static constexpr size_t NUMBER_OF_SPECIAL_TOKENS = 4;
-
-    mediapipe::Timestamp timestamp{0};
-    std::chrono::time_point<std::chrono::system_clock> created;
-
-    int64_t bos_token{0};
-    int64_t eos_token{0};
-    int64_t sep_token{0};
-    int64_t pad_token{0};
-
-    uint64_t max_position_embeddings{512};
-
-    size_t max_allowed_chunks{0};  // Read from options in ::Open()
-
-protected:
-    std::shared_ptr<::InferenceAdapter> tokenizer_session{nullptr};
-    std::shared_ptr<::InferenceAdapter> rerank_session{nullptr};
-
-public:
-    static absl::Status GetContract(CalculatorContract* cc) {
-        RET_CHECK(!cc->Inputs().GetTags().empty());
-        RET_CHECK(!cc->Outputs().GetTags().empty());
-        cc->Inputs().Tag(INPUT_TAG_NAME).Set<InputDataType>();
-        cc->Outputs().Tag(OUTPUT_TAG_NAME).Set<OutputDataType>();
-        cc->InputSidePackets().Tag("TOKENIZER_SESSION").Set<std::shared_ptr<InferenceAdapter>>();
-        cc->InputSidePackets().Tag("RERANK_SESSION").Set<std::shared_ptr<InferenceAdapter>>();
-        return absl::OkStatus();
-    }
-
-    absl::Status Close(CalculatorContext* cc) final {
-        OVMS_PROFILE_FUNCTION();
-        SPDLOG_LOGGER_DEBUG(rerank_calculator_logger, "RerankCalculator [Node: {} ] Close", cc->NodeName());
-        return absl::OkStatus();
-    }
-
-    absl::Status Open(CalculatorContext* cc) final {
-        OVMS_PROFILE_FUNCTION();
-        SPDLOG_LOGGER_DEBUG(rerank_calculator_logger, "RerankCalculator  [Node: {}] Open start", cc->NodeName());
-        tokenizer_session = cc->InputSidePackets().Tag("TOKENIZER_SESSION").Get<std::shared_ptr<::InferenceAdapter>>();
-        rerank_session = cc->InputSidePackets().Tag("RERANK_SESSION").Get<std::shared_ptr<::InferenceAdapter>>();
-
-        const auto& options = cc->Options<RerankCalculatorOptions>();
-        this->max_allowed_chunks = options.max_allowed_chunks();
-        SPDLOG_LOGGER_DEBUG(rerank_calculator_logger, "Max allowed chunks: {}", this->max_allowed_chunks);
-
-        try {
-            // special tokens
-            this->bos_token = rerank_session->getModelConfig().at("bos_token_id").as<int64_t>();
-            this->eos_token = rerank_session->getModelConfig().at("eos_token_id").as<int64_t>();
-            if (rerank_session->getModelConfig().count("sep_token_id") == 0) {
-                this->sep_token = this->eos_token;
-            } else {
-                this->sep_token = rerank_session->getModelConfig().at("sep_token_id").as<int64_t>();
-            }
-            this->pad_token = rerank_session->getModelConfig().at("pad_token_id").as<int64_t>();
-
-            // max_position_embeddings
-            if (options.has_max_position_embeddings()) {
-                this->max_position_embeddings = options.max_position_embeddings();
-                SPDLOG_LOGGER_DEBUG(rerank_calculator_logger, "Options defined max_position_embeddings: {}", this->max_position_embeddings);
-            } else {
-                auto maxPositionEmbeddingsIt = rerank_session->getModelConfig().find("max_position_embeddings");
-                if (maxPositionEmbeddingsIt != rerank_session->getModelConfig().end()) {
-                    this->max_position_embeddings = maxPositionEmbeddingsIt->second.as<int64_t>();
-                    SPDLOG_LOGGER_DEBUG(rerank_calculator_logger, "Model max_position_embeddings: {}", this->max_position_embeddings);
-                } else {
-                    auto maxTrainedPositionsIt = rerank_session->getModelConfig().find("max_trained_positions");
-                    if (maxTrainedPositionsIt != rerank_session->getModelConfig().end()) {
-                        this->max_position_embeddings = maxTrainedPositionsIt->second.as<int64_t>();
-                        SPDLOG_LOGGER_DEBUG(rerank_calculator_logger, "Model max_position_embeddings (inherited from max_trained_positions): {}", this->max_position_embeddings);
-                    } else {
-                        SPDLOG_LOGGER_DEBUG(rerank_calculator_logger, "Model missing max_position_embeddings and max_trained_positions in config, using default value: {}", this->max_position_embeddings);
-                    }
-                }
-            }
-
-            // post-validation
-            if (this->max_position_embeddings <= 2 * NUMBER_OF_SPECIAL_TOKENS) {
-                SPDLOG_LOGGER_ERROR(rerank_calculator_logger, "max_position_embeddings should be larger than 2 * NUMBER_OF_SPECIAL_TOKENS");
-                return absl::InvalidArgumentError("max_position_embeddings should be larger than 2 * NUMBER_OF_SPECIAL_TOKENS");
-            }
-        } catch (ov::AssertFailure& e) {
-            SPDLOG_LOGGER_ERROR(rerank_calculator_logger, "OpenVINO Assert Failure: {}", e.what());
-            return absl::InternalError(e.what());
-        } catch (std::out_of_range& e) {
-            SPDLOG_LOGGER_ERROR(rerank_calculator_logger, "{}", e.what());
-            return absl::InternalError(e.what());
-        } catch (...) {
-            SPDLOG_LOGGER_ERROR(rerank_calculator_logger, "Unknown error");
-            return absl::InternalError("Unknown error");
-        }
-        SPDLOG_LOGGER_DEBUG(rerank_calculator_logger, "RerankCalculator [Node: {}] Open end", cc->NodeName());
-        return absl::OkStatus();
-    }
-
-    std::vector<int64_t> ComputeTokensForString(std::string str) const {
-        if (tokenizer_session->getInputNames().size() != 1)
-            throw std::runtime_error("Tokenizer session should have only one input");
-        if (tokenizer_session->getOutputNames().size() != 2)
-            throw std::runtime_error("Tokenizer session should have only two outputs");
-
-        auto tokenizer_input_name = tokenizer_session->getInputNames()[0];
-        ::InferenceInput tokenizer_input_map;
-        tokenizer_input_map[tokenizer_input_name] = ov::Tensor(ov::element::string, ov::Shape{1}, &str);
-        ::InferenceOutput tokenizer_output_map = tokenizer_session->infer(tokenizer_input_map);
-
-        if (tokenizer_output_map.size() != 2)
-            throw std::runtime_error("Tokenizer session should have only two outputs");
-        if (tokenizer_output_map.count("input_ids") != 1)
-            throw std::runtime_error("Tokenizer session should have input_ids output");
-        if (tokenizer_output_map.count("attention_mask") != 1)
-            throw std::runtime_error("Tokenizer session should have attention_mask output");
-
-        auto input_ids = tokenizer_output_map.at("input_ids");
-        if (input_ids.get_shape().size() != 2)
-            throw std::runtime_error("input_ids should have 2 dimensions");
-        if (input_ids.get_shape()[0] != 1)
-            throw std::runtime_error("input_ids should have 1 batch size");
-        if (input_ids.get_element_type() != ov::element::i64)
-            throw std::runtime_error("input_ids should have i64 element type");  // TODO: Add support for other precisions?
-
-        int64_t* input_ids_data = reinterpret_cast<int64_t*>(input_ids.data());
-        return std::vector<int64_t>(input_ids_data, input_ids_data + input_ids.get_shape()[1]);
-    }
-
-    std::pair<ov::Tensor, ov::Tensor> ComputeTokensForBatchedString(std::vector<std::string> strings) const {
-        if (tokenizer_session->getInputNames().size() != 1)
-            throw std::runtime_error("Tokenizer session should have only one input");
-        if (tokenizer_session->getOutputNames().size() != 2)
-            throw std::runtime_error("Tokenizer session should have only two outputs");
-
-        auto tokenizer_input_name = tokenizer_session->getInputNames()[0];
-        ::InferenceInput tokenizer_input_map;
-        tokenizer_input_map[tokenizer_input_name] = ov::Tensor(ov::element::string, ov::Shape{strings.size()}, strings.data());
-        SPDLOG_LOGGER_DEBUG(rerank_calculator_logger, "Starting inference tokenizer model");
-        ::InferenceOutput tokenizer_output_map = tokenizer_session->infer(tokenizer_input_map);
-        SPDLOG_LOGGER_DEBUG(rerank_calculator_logger, "Finished inference tokenizer model");
-
-        if (tokenizer_output_map.size() != 2)
-            throw std::runtime_error("Tokenizer session should have only two outputs");
-        if (tokenizer_output_map.count("input_ids") != 1)
-            throw std::runtime_error("Tokenizer session should have input_ids output");
-        if (tokenizer_output_map.count("attention_mask") != 1)
-            throw std::runtime_error("Tokenizer session should have attention_mask output");
-
-        auto input_ids = tokenizer_output_map.at("input_ids");
-        if (input_ids.get_shape().size() != 2)
-            throw std::runtime_error("input_ids should have 2 dimensions");
-        if (input_ids.get_shape()[0] != strings.size())
-            throw std::runtime_error("input_ids should have batch size equal to number of tokenized strings");
-        if (input_ids.get_element_type() != ov::element::i64)
-            throw std::runtime_error("input_ids should have i64 element type");
-
-        auto attention_mask = tokenizer_output_map.at("attention_mask");
-        if (attention_mask.get_shape().size() != 2)
-            throw std::runtime_error("attention_mask should have 2 dimensions");
-        if (attention_mask.get_shape()[0] != strings.size())
-            throw std::runtime_error("attention_mask should have batch size equal to number of tokenized strings");
-        if (attention_mask.get_element_type() != ov::element::i64)
-            throw std::runtime_error("attention_mask should have i64 element type");  // TODO: Add support for other precisions?
-
-        return std::make_pair(input_ids, attention_mask);
-    }
-
-    std::pair<ov::Tensor, ov::Tensor> PrepareInputsForRerankModel(const RerankHandler& handler, std::vector<size_t>& chunk_mapping) const {
-        // Validate batch size before tokenizing
-        if (handler.getDocumentsList().size() > this->max_allowed_chunks)
-            throw std::runtime_error("Number of documents exceeds max_allowed_chunks");
-        // TODO: Validate max string length for some arbitrary size
-
-        // Compute Query Tokens
-        auto query_tokens = ComputeTokensForString(handler.getQuery());
-
-        // Truncate last tokens if exceeding max_position_embeddings / 2 as mentioned in cohere doc:
-        // https://docs.cohere.com/v2/docs/reranking-best-practices#queries
-        const size_t max_query_tokens = this->max_position_embeddings / 2;
-        if (query_tokens.size() > max_query_tokens) {
-            query_tokens.resize(max_query_tokens);
-            SPDLOG_LOGGER_DEBUG(rerank_calculator_logger, "Number of query tokens: {} exceeded half of max_position_embeddings: {}, truncating to {}", query_tokens.size(), this->max_position_embeddings, max_query_tokens);
-        } else {
-            SPDLOG_LOGGER_DEBUG(rerank_calculator_logger, "Number of query tokens: {}", query_tokens.size());
-        }
-
-        // Compute Document Tokens
-        auto [doc_input_ids, doc_attention_mask] = ComputeTokensForBatchedString(handler.getDocumentsList());
-        SPDLOG_LOGGER_DEBUG(rerank_calculator_logger, "\nMax position embeddings: {}\nQuery tokens: {}\nSpecial tokens: {}\nRemaining space for chunk: {}",
-            this->max_position_embeddings, query_tokens.size(), NUMBER_OF_SPECIAL_TOKENS, this->max_position_embeddings - query_tokens.size() - NUMBER_OF_SPECIAL_TOKENS);
-        SPDLOG_LOGGER_DEBUG(rerank_calculator_logger, "Number of documents: {}; with max token count: {} before chunking", doc_input_ids.get_shape()[0], doc_input_ids.get_shape()[1]);
-
-        // max_tokens_per_chunk can never be <= 0 since query_tokens.size() is at max half of max_position_embeddings
-        // and max_position_embeddings is at least 2 * NUMBER_OF_SPECIAL_TOKENS
-        size_t max_tokens_per_chunk = this->max_position_embeddings - query_tokens.size() - NUMBER_OF_SPECIAL_TOKENS;
-        ov::Tensor out_input_ids, out_attention_mask;
-        auto status = chunkDocuments(
-            doc_input_ids,
-            doc_attention_mask,
-            out_input_ids, out_attention_mask,
-            chunk_mapping, max_tokens_per_chunk,
-            this->max_allowed_chunks, this->pad_token);
-        if (!status.ok()) {
-            throw std::runtime_error(std::string{"Chunking failed: "} + std::string(status.message()));
-        }
-        doc_input_ids = out_input_ids;
-        doc_attention_mask = out_attention_mask;
-
-        SPDLOG_LOGGER_DEBUG(rerank_calculator_logger, "Number of chunks: {}; with max token count: {} after chunking", doc_input_ids.get_shape()[0], doc_input_ids.get_shape()[1]);
-
-        size_t tokens_count_of_longest_document = doc_input_ids.get_shape()[1];
-        if (tokens_count_of_longest_document > max_tokens_per_chunk)
-            throw std::runtime_error("tokens_count_of_longest_document exceeds max_tokens_per_chunk");  // should never happen
-        size_t total_tokens_count_per_batch = tokens_count_of_longest_document + NUMBER_OF_SPECIAL_TOKENS + query_tokens.size();
-        size_t batch_size = doc_input_ids.get_shape()[0];
-        if (batch_size != chunk_mapping.size())
-            throw std::runtime_error("error");  // should never happen
-
-        if (total_tokens_count_per_batch > this->max_position_embeddings)
-            throw std::runtime_error("Query tokens count + special tokens + tokens count of longest document exceeds max_position_embeddings");
-
-        auto input_ids = ov::Tensor(ov::element::i64, ov::Shape{batch_size, total_tokens_count_per_batch});
-        auto attention_mask = ov::Tensor(ov::element::i64, ov::Shape{batch_size, total_tokens_count_per_batch});
-
-        // Combine query and document tokens
-        // Schema (tokenizer must be exported without --add_special_tokens flag, we will add it manually)
-        /*
-            BOS_TOKEN  <QUERY TOKENS>  EOS_TOKEN SEP_TOKEN  <DOCUMENT_1 TOKENS>  EOS_TOKEN
-            BOS_TOKEN  <QUERY TOKENS>  EOS_TOKEN SEP_TOKEN  <DOCUMENT_2 TOKENS>  EOS_TOKEN
-            BOS_TOKEN  <QUERY TOKENS>  EOS_TOKEN SEP_TOKEN  <DOCUMENT_3 TOKENS>  EOS_TOKEN
-            BOS_TOKEN  <QUERY TOKENS>  EOS_TOKEN SEP_TOKEN  <DOCUMENT_N TOKENS>  EOS_TOKEN
-        */
-
-        for (size_t i = 0; i < batch_size; i++) {
-            int64_t* input_ids_data = reinterpret_cast<int64_t*>(input_ids.data()) + i * total_tokens_count_per_batch;
-            int64_t* attention_mask_data = reinterpret_cast<int64_t*>(attention_mask.data()) + i * total_tokens_count_per_batch;
-
-            int64_t* doc_input_ids_data = reinterpret_cast<int64_t*>(doc_input_ids.data()) + i * tokens_count_of_longest_document;
-
-            // Fill input_ids
-            input_ids_data[0] = this->bos_token;
-            std::memcpy(input_ids_data + 1, query_tokens.data(), query_tokens.size() * sizeof(int64_t));
-            input_ids_data[query_tokens.size() + 1] = this->eos_token;
-            input_ids_data[query_tokens.size() + 2] = this->sep_token;
-            std::memcpy(input_ids_data + 1 + query_tokens.size() + 2, doc_input_ids_data, tokens_count_of_longest_document * sizeof(int64_t));
-
-            input_ids_data[total_tokens_count_per_batch - 1] = this->pad_token;
-
-            auto it = std::find(doc_input_ids_data, doc_input_ids_data + tokens_count_of_longest_document, this->pad_token);
-            size_t pad_token_index = (it != doc_input_ids_data + tokens_count_of_longest_document) ? std::distance(doc_input_ids_data, it) : tokens_count_of_longest_document;
-
-            input_ids_data[1 + query_tokens.size() + 2 + pad_token_index] = this->eos_token;
-
-            // Fill attention_mask
-            std::fill(attention_mask_data, attention_mask_data + total_tokens_count_per_batch, int64_t(0));
-            std::fill(attention_mask_data, attention_mask_data + 1 + query_tokens.size() + 2 + pad_token_index + 1, int64_t(1));
-        }
-
-        return std::make_pair(input_ids, attention_mask);
-    }
-
-    std::vector<float> ComputeScoresUsingRerankModel(ov::Tensor input_ids, ov::Tensor attention_mask, const std::vector<size_t>& chunkMapping, size_t actual_batch_size) const {
-        if (rerank_session->getInputNames().size() != 2)  // TODO: Support 3 inputs with token_type_ids
-            throw std::runtime_error("Rerank model should have 2 inputs");
-        if (rerank_session->getOutputNames().size() != 1)  // There should be only one output when exported with --task text-classification
-            throw std::runtime_error("Rerank model should have 1 output");
-
-        // Validate input/output names
-        if (rerank_session->getInputNames()[0] != "input_ids" && rerank_session->getInputNames()[1] != "input_ids")
-            throw std::runtime_error("Rerank model should have input_ids input");
-        if (rerank_session->getInputNames()[0] != "attention_mask" && rerank_session->getInputNames()[1] != "attention_mask")
-            throw std::runtime_error("Rerank model should have attention_mask input");
-        if (rerank_session->getOutputNames()[0] != "logits")
-            throw std::runtime_error("Rerank model should have logits output");
-
-        if (input_ids.get_shape()[1] > this->max_position_embeddings)
-            throw std::runtime_error("exceeding max_position_embeddings");  // should never happen
-
-        ::InferenceInput rerank_input_map;
-        rerank_input_map["input_ids"] = input_ids;
-        rerank_input_map["attention_mask"] = attention_mask;
-
-        SPDLOG_LOGGER_DEBUG(rerank_calculator_logger, "Starting inference rerank model");
-        ::InferenceOutput rerank_output_map = rerank_session->infer(rerank_input_map);
-        SPDLOG_LOGGER_DEBUG(rerank_calculator_logger, "Finished inference rerank model");
-        if (rerank_output_map.size() != 1)
-            throw std::runtime_error("Rerank model results should have 1 output");
-        if (rerank_output_map.count("logits") != 1)
-            throw std::runtime_error("Rerank model results should have logits output");
-
-        auto logits = rerank_output_map.at("logits");
-
-        if (logits.get_shape().size() != 2)  // 2D tensor
-            throw std::runtime_error("Logits should be 2D tensor");
-        if (logits.get_shape()[0] != input_ids.get_shape()[0])
-            throw std::runtime_error("Batch size mismatch");
-
-        std::vector<float> scores;
-        scores.resize(actual_batch_size, 0);
-
-        size_t logits_dim = logits.get_shape()[1];
-
-        for (int i = 0; i < input_ids.get_shape()[0]; ++i) {
-            size_t score_index = chunkMapping[i];
-            if (score_index >= actual_batch_size)
-                throw std::runtime_error("score_index out of bounds");  // should never happen
-            float logit = logits_dim > 1 ? reinterpret_cast<float*>(logits.data())[i * logits_dim + 1] : reinterpret_cast<float*>(logits.data())[i];
-            float score = 1 / (1 + std::exp(-logit));
-            float current_highest_score = scores[score_index];
-            scores[score_index] = std::max(current_highest_score, score);
-        }
-
-        return scores;
-    }
-
-    absl::Status Process(CalculatorContext* cc) final {
-        OVMS_PROFILE_FUNCTION();
-        RET_CHECK(tokenizer_session != nullptr);
-        RET_CHECK(rerank_session != nullptr);
-        if (cc->Inputs().Tag(INPUT_TAG_NAME).IsEmpty()) {
-            return absl::InvalidArgumentError("Input is empty");
-        }
-        InputDataType payload = cc->Inputs().Tag(INPUT_TAG_NAME).Get<InputDataType>();
-        SPDLOG_LOGGER_DEBUG(rerank_calculator_logger, "Request body: {}", payload.body);
-        SPDLOG_LOGGER_DEBUG(rerank_calculator_logger, "Request uri: {}", payload.uri);
-        RerankHandler handler(*payload.parsedJson);
-        absl::Status status = handler.parseRequest();
-        if (!status.ok()) {
-            return status;
-        }
-
-        try {
-            // Prepare inputs for rerank model
-            std::vector<size_t> chunk_mapping;
-            auto [input_ids, attention_mask] = PrepareInputsForRerankModel(handler, chunk_mapping);
-
-            // Compute scores using rerank model
-            size_t batch_size = handler.getDocumentsList().size();
-            auto scores = ComputeScoresUsingRerankModel(
-                input_ids,
-                attention_mask,
-                chunk_mapping,
-                batch_size);
-
-            // Serialize scores
-            StringBuffer buffer;
-            status = handler.parseResponse(buffer, scores);
-            if (!status.ok()) {
-                return status;
-            }
-            cc->Outputs().Tag(OUTPUT_TAG_NAME).Add(new std::string(buffer.GetString()), timestamp);
-            return absl::OkStatus();
-        } catch (ov::AssertFailure& e) {
-            SPDLOG_LOGGER_ERROR(rerank_calculator_logger, "OpenVINO Assert Failure: {}", e.what());
-            return absl::InternalError(e.what());
-        } catch (std::runtime_error& e) {
-            SPDLOG_LOGGER_ERROR(rerank_calculator_logger, "runtime_error: {}", e.what());
-            return absl::InternalError(e.what());
-        } catch (...) {
-            SPDLOG_LOGGER_ERROR(rerank_calculator_logger, "Unknown error");
-            return absl::InternalError("Unknown error");
-        }
-    }
-};
-const std::string RerankCalculator::INPUT_TAG_NAME{"REQUEST_PAYLOAD"};
-const std::string RerankCalculator::OUTPUT_TAG_NAME{"RESPONSE_PAYLOAD"};
-
-REGISTER_CALCULATOR(RerankCalculator);
-
-}  // namespace mediapipe
diff --git a/src/rerank/rerank_calculator.proto b/src/rerank/rerank_calculator.proto
deleted file mode 100644
index 321348727d..0000000000
--- a/src/rerank/rerank_calculator.proto
+++ /dev/null
@@ -1,32 +0,0 @@
-//*****************************************************************************
-// Copyright 2024 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//*****************************************************************************
-
-syntax = "proto2";
-package mediapipe;
-
-import "mediapipe/framework/calculator.proto";
-
-message RerankCalculatorOptions {
-  extend mediapipe.CalculatorOptions {
-    // https://github.com/google/mediapipe/issues/634 have to be unique in app
-    // no rule to obtain this
-    optional RerankCalculatorOptions ext = 113473741;
-    }
-
-    optional uint64 max_allowed_chunks = 1 [default = 10000];  // Default taken from Cohere API documentation
-
-    optional uint64 max_position_embeddings = 2;
-}
diff --git a/src/test/mediapipeflow_test.cpp b/src/test/mediapipeflow_test.cpp
index 55b6ab96ed..1826da8d37 100644
--- a/src/test/mediapipeflow_test.cpp
+++ b/src/test/mediapipeflow_test.cpp
@@ -3797,7 +3797,6 @@ TEST(WhitelistRegistered, MediapipeCalculatorsList) {
         "DetectionsToRectsCalculator",
         "DetectionsToRenderDataCalculator",
         "EmbeddingsCalculatorOV",
-        "RerankCalculator",
         "RerankCalculatorOV",
         "EmptyLabelCalculator",
         "EmptyLabelClassificationCalculator",
diff --git a/src/test/rerank/with_params/graph.pbtxt b/src/test/rerank/with_params/graph.pbtxt
deleted file mode 100644
index d710ba75c4..0000000000
--- a/src/test/rerank/with_params/graph.pbtxt
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright 2025 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-input_stream: "REQUEST_PAYLOAD:input"
-output_stream: "RESPONSE_PAYLOAD:output"
-node {
-  calculator: "OpenVINOModelServerSessionCalculator"
-  output_side_packet: "SESSION:tokenizer"
-  node_options: {
-    [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: {
-      servable_name: "tokenizer_model"
-    }
-  }
-}
-node {
-  calculator: "OpenVINOModelServerSessionCalculator"
-  output_side_packet: "SESSION:rerank"
-  node_options: {
-    [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: {
-      servable_name: "rerank_model"
-    }
-  }
-}
-node {
-    input_side_packet: "TOKENIZER_SESSION:tokenizer"
-    input_side_packet: "RERANK_SESSION:rerank"
-    calculator: "RerankCalculator"
-    input_stream: "REQUEST_PAYLOAD:input"
-    output_stream: "RESPONSE_PAYLOAD:output"
-    node_options: {
-      [type.googleapis.com / mediapipe.RerankCalculatorOptions]: {
-        max_allowed_chunks: 4
-        max_position_embeddings: 12
-      }
-    }
-}
diff --git a/src/test/rerank/with_params/invalid_graph.pbtxt b/src/test/rerank/with_params/invalid_graph.pbtxt
deleted file mode 100644
index 39d3b1be93..0000000000
--- a/src/test/rerank/with_params/invalid_graph.pbtxt
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright 2025 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-input_stream: "REQUEST_PAYLOAD:input"
-output_stream: "RESPONSE_PAYLOAD:output"
-node {
-  calculator: "OpenVINOModelServerSessionCalculator"
-  output_side_packet: "SESSION:tokenizer"
-  node_options: {
-    [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: {
-      servable_name: "tokenizer_model"
-    }
-  }
-}
-node {
-  calculator: "OpenVINOModelServerSessionCalculator"
-  output_side_packet: "SESSION:rerank"
-  node_options: {
-    [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: {
-      servable_name: "rerank_model"
-    }
-  }
-}
-node {
-    input_side_packet: "TOKENIZER_SESSION:tokenizer"
-    input_side_packet: "RERANK_SESSION:rerank"
-    calculator: "RerankCalculator"
-    input_stream: "REQUEST_PAYLOAD:input"
-    output_stream: "RESPONSE_PAYLOAD:output"
-    node_options: {
-      [type.googleapis.com / mediapipe.RerankCalculatorOptions]: {
-        max_allowed_chunks: 4
-        max_position_embeddings: 8  # invalid due to number of special tokens (4) + space for query (4) = 8, no space for document
-      }
-    }
-}
diff --git a/src/test/rerank/with_params/invalid_graph_ov.pbtxt b/src/test/rerank/with_params/invalid_graph_ov.pbtxt
index 39d3b1be93..fb9ebfd5e1 100644
--- a/src/test/rerank/with_params/invalid_graph_ov.pbtxt
+++ b/src/test/rerank/with_params/invalid_graph_ov.pbtxt
@@ -32,16 +32,5 @@ node {
     }
   }
 }
-node {
-    input_side_packet: "TOKENIZER_SESSION:tokenizer"
-    input_side_packet: "RERANK_SESSION:rerank"
-    calculator: "RerankCalculator"
-    input_stream: "REQUEST_PAYLOAD:input"
-    output_stream: "RESPONSE_PAYLOAD:output"
-    node_options: {
-      [type.googleapis.com / mediapipe.RerankCalculatorOptions]: {
-        max_allowed_chunks: 4
-        max_position_embeddings: 8  # invalid due to number of special tokens (4) + space for query (4) = 8, no space for document
-      }
-    }
-}
+
+
diff --git a/yarn.lock b/yarn.lock
index c95abed3b6..964e3ed385 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -11,3 +11,50 @@
     semver "5.6.0"
     source-map-support "0.5.9"
     tsutils "3.21.0"
+
+"@bazel/worker@5.7.2":
+  version "5.7.2"
+  resolved "https://registry.yarnpkg.com/@bazel/worker/-/worker-5.7.2.tgz#43d800dc1b5a3707340a4eb0102da81c53fc6f63"
+  integrity sha512-H+auDA0QKF4mtZxKkZ2OKJvD7hGXVsVKtvcf4lbb93ur0ldpb5k810PcDxngmIGBcIX5kmyxniNTIiGFNobWTg==
+  dependencies:
+    google-protobuf "^3.6.1"
+
+buffer-from@^1.0.0:
+  version "1.1.2"
+  resolved "https://registry.yarnpkg.com/buffer-from/-/buffer-from-1.1.2.tgz#2b146a6fd72e80b4f55d255f35ed59a3a9a41bd5"
+  integrity sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==
+
+google-protobuf@^3.6.1:
+  version "3.21.4"
+  resolved "https://registry.yarnpkg.com/google-protobuf/-/google-protobuf-3.21.4.tgz#2f933e8b6e5e9f8edde66b7be0024b68f77da6c9"
+  integrity sha512-MnG7N936zcKTco4Jd2PX2U96Kf9PxygAPKBug+74LHzmHXmceN16MmRcdgZv+DGef/S9YvQAfRsNCn4cjf9yyQ==
+
+semver@5.6.0:
+  version "5.6.0"
+  resolved "https://registry.yarnpkg.com/semver/-/semver-5.6.0.tgz#7e74256fbaa49c75aa7c7a205cc22799cac80004"
+  integrity sha512-RS9R6R35NYgQn++fkDWaOmqGoj4Ek9gGs+DPxNUZKuwE183xjJroKvyo1IzVFeXvUrvmALy6FWD5xrdJT25gMg==
+
+source-map-support@0.5.9:
+  version "0.5.9"
+  resolved "https://registry.yarnpkg.com/source-map-support/-/source-map-support-0.5.9.tgz#41bc953b2534267ea2d605bccfa7bfa3111ced5f"
+  integrity sha512-gR6Rw4MvUlYy83vP0vxoVNzM6t8MUXqNuRsuBmBHQDu1Fh6X015FrLdgoDKcNdkwGubozq0P4N0Q37UyFVr1EA==
+  dependencies:
+    buffer-from "^1.0.0"
+    source-map "^0.6.0"
+
+source-map@^0.6.0:
+  version "0.6.1"
+  resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.6.1.tgz#74722af32e9614e9c287a8d0bbde48b5e2f1a263"
+  integrity sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==
+
+tslib@^1.8.1:
+  version "1.14.1"
+  resolved "https://registry.yarnpkg.com/tslib/-/tslib-1.14.1.tgz#cf2d38bdc34a134bcaf1091c41f6619e2f672d00"
+  integrity sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==
+
+tsutils@3.21.0:
+  version "3.21.0"
+  resolved "https://registry.yarnpkg.com/tsutils/-/tsutils-3.21.0.tgz#b48717d394cea6c1e096983eed58e9d61715b623"
+  integrity sha512-mHKK3iUXL+3UF6xL5k0PEhKRUBKPBCv/+RkEOpjRWxxx27KKRBmmA60A9pgOUvMi8GKhRMPEmjBRPzs2W7O1OA==
+  dependencies:
+    tslib "^1.8.1"