diff --git a/common_settings.bzl b/common_settings.bzl
index 2a995d59c5..c5bc6ddcc6 100644
--- a/common_settings.bzl
+++ b/common_settings.bzl
@@ -209,8 +209,6 @@ COMMON_STATIC_TEST_COPTS = select({
                     "-Wall",
                     "-Wno-unknown-pragmas",
                     "-Werror",
-                    # ov::Tensor::data method call results in deprecated warning and we use it in multiple places
-                    "-Wno-deprecated-declarations",
                     "-Isrc",
                     "-fconcepts", # for gmock related utils
                     "-fvisibility=hidden",# Needed for pybind targets
diff --git a/demos/benchmark/v3/benchmark.py b/demos/benchmark/v3/benchmark.py
index eec806da09..88c9aa8f18 100644
--- a/demos/benchmark/v3/benchmark.py
+++ b/demos/benchmark/v3/benchmark.py
@@ -438,4 +438,8 @@ async def limited_request_func(request_func_input, pbar):
 print(f"Throughput - Tokens per second: {num_tokens / benchmark_results['duration']:^,.1f}")
 print(f"Mean latency: {np.mean(benchmark_results['latencies'])*1000:.2f} ms")
 print(f"Median latency: {np.median(benchmark_results['latencies'])*1000:.2f} ms")
+# add printing 10 percentiles of latency to better understand latency distribution
+percentiles = [10, 25, 50, 75, 90, 95, 99]
+for p in percentiles:
+    print(f"{p}th percentile latency: {np.percentile(benchmark_results['latencies'], p)*1000:.2f} ms")
 print(f"Average document length: {num_tokens / len(docs)} tokens")
diff --git a/demos/common/export_models/export_model.py b/demos/common/export_models/export_model.py
index 5aa81b0c81..91ef8b2edb 100644
--- a/demos/common/export_models/export_model.py
+++ b/demos/common/export_models/export_model.py
@@ -101,6 +101,17 @@ def add_common_arguments(parser):
 parser_speech2text.add_argument('--enable_word_timestamps', default=False, action='store_true', help='Load model with word timestamps support.', dest='enable_word_timestamps')
 args = vars(parser.parse_args())
 
+
+def _default_graph_queue_size(task_name):
+    if task_name == 'image_generation':
+        return 1
+    return 'AUTO'
+
+
+def _prepend_graph_queue_directive(graph_content, task_name):
+    queue_size = _default_graph_queue_size(task_name)
+    return f"# OVMS_GRAPH_QUEUE_SIZE: {queue_size}\n{graph_content}"
+
 t2s_graph_template = """
 input_stream: "HTTP_REQUEST_PAYLOAD:input"
 output_stream: "HTTP_RESPONSE_PAYLOAD:output"
@@ -488,6 +499,7 @@ def export_text_generation_model(model_repository_path, source_model, model_name
     gtemplate = jinja2.Environment(loader=jinja2.BaseLoader).from_string(text_generation_graph_template)
     print("task_parameters", task_parameters)
     graph_content = gtemplate.render(model_path=model_path, draft_model_dir_name=draft_model_dir_name, **task_parameters)
+    graph_content = _prepend_graph_queue_directive(graph_content, 'text_generation')
     with open(os.path.join(model_repository_path, model_name, 'graph.pbtxt'), 'w') as f:
         f.write(graph_content)
     print("Created graph {}".format(os.path.join(model_repository_path, model_name, 'graph.pbtxt')))
@@ -495,7 +507,19 @@ def export_text_generation_model(model_repository_path, source_model, model_name
 
 def export_embeddings_model_ov(model_repository_path, source_model, model_name, precision, task_parameters, config_file_path, truncate=True):
     set_max_context_length = ""
-    destination_path = os.path.join(model_repository_path, model_name)
+    destination_path = os.path.join(model_repository_path, model_name)ERROR: /ovms/src/llm/BUILD:196:16: Compiling src/llm/language_model/continuous_batching/servable.cpp failed: (Exit 1): gcc failed: error executing command (from target //src/llm:genai_servables) /usr/bin/gcc -U_FORTIFY_SOURCE -fstack-protector -Wall -Wunused-but-set-parameter -Wno-free-nonheap-object -fno-omit-frame-pointer -g0 -O2 '-D_FORTIFY_SOURCE=1' -DNDEBUG -ffunction-sections ... (remaining 156 arguments skipped)
+In file included from src/llm/language_model/continuous_batching/../../../logging.hpp:24,
+                 from src/llm/language_model/continuous_batching/servable.cpp:22:
+src/llm/language_model/continuous_batching/llm_executor.hpp: In member function 'void ovms::LLMExecutor::printMetrics()':
+src/llm/language_model/continuous_batching/llm_executor.hpp:105:104: error: 'struct ov::genai::PipelineMetrics' has no member named 'kv_cache_size_in_bytes'
+  105 |             metrics.requests, metrics.scheduled_requests, formatCacheInfo(metrics.cache_usage, metrics.kv_cache_size_in_bytes, this->isDynamicKVCache));
+      |                                                                                                        ^~~~~~~~~~~~~~~~~~~~~~
+Target //src:ovms failed to build
+Use --verbose_failures to see the command lines of failed build steps.
+INFO: Elapsed time: 9.590s, Critical Path: 8.22s
+INFO: 64 processes: 64 internal.
+FAILED: Build did NOT complete successfully
+root@b6674760ad87:/ovms# bazel build --config mp_on_py_off //src:ovms
     print("Exporting embeddings model to ",destination_path)
     if not os.path.isdir(destination_path) or args['overwrite_models']:
         optimum_command = "optimum-cli export openvino --model {} --disable-convert-tokenizer --task feature-extraction --weight-format {} {} --trust-remote-code {}".format(source_model, precision, task_parameters['extra_quantization_params'], destination_path)
@@ -509,6 +533,7 @@ def export_embeddings_model_ov(model_repository_path, source_model, model_name,
             raise ValueError("Failed to export tokenizer model", source_model)
     gtemplate = jinja2.Environment(loader=jinja2.BaseLoader).from_string(embedding_graph_ov_template)
     graph_content = gtemplate.render(model_path="./", **task_parameters)
+    graph_content = _prepend_graph_queue_directive(graph_content, 'embeddings_ov')
     with open(os.path.join(model_repository_path, model_name, 'graph.pbtxt'), 'w') as f:
         f.write(graph_content)
     print("Created graph {}".format(os.path.join(model_repository_path, model_name, 'graph.pbtxt')))
@@ -523,6 +548,7 @@ def export_text2speech_model(model_repository_path, source_model, model_name, pr
             raise ValueError("Failed to export text2speech model", source_model)
     gtemplate = jinja2.Environment(loader=jinja2.BaseLoader).from_string(t2s_graph_template)
     graph_content = gtemplate.render(model_path="./", **task_parameters)
+    graph_content = _prepend_graph_queue_directive(graph_content, 'text2speech')
     with open(os.path.join(model_repository_path, model_name, 'graph.pbtxt'), 'w') as f:
         f.write(graph_content)
     print("Created graph {}".format(os.path.join(model_repository_path, model_name, 'graph.pbtxt')))
@@ -537,6 +563,7 @@ def export_speech2text_model(model_repository_path, source_model, model_name, pr
             raise ValueError("Failed to export speech2text model", source_model)
     gtemplate = jinja2.Environment(loader=jinja2.BaseLoader).from_string(s2t_graph_template)
     graph_content = gtemplate.render(model_path="./", **task_parameters)
+    graph_content = _prepend_graph_queue_directive(graph_content, 'speech2text')
     with open(os.path.join(model_repository_path, model_name, 'graph.pbtxt'), 'w') as f:
         f.write(graph_content)
     print("Created graph {}".format(os.path.join(model_repository_path, model_name, 'graph.pbtxt')))
@@ -553,6 +580,7 @@ def export_rerank_model_ov(model_repository_path, source_model, model_name, prec
         export_rerank_tokenizer(source_model, destination_path, max_doc_length)
     gtemplate = jinja2.Environment(loader=jinja2.BaseLoader).from_string(rerank_graph_ov_template)
     graph_content = gtemplate.render(model_path="./", **task_parameters)
+    graph_content = _prepend_graph_queue_directive(graph_content, 'rerank_ov')
     with open(os.path.join(model_repository_path, model_name, 'graph.pbtxt'), 'w') as f:
         f.write(graph_content)
     print("Created graph {}".format(os.path.join(model_repository_path, model_name, 'graph.pbtxt')))
@@ -589,6 +617,7 @@ def export_rerank_model(model_repository_path, source_model, model_name, precisi
                 shutil.move(os.path.join(tmpdirname, 'openvino_tokenizer.bin'), os.path.join(tokenizer_path, 'model.bin'))
     gtemplate = jinja2.Environment(loader=jinja2.BaseLoader).from_string(rerank_graph_template)
     graph_content = gtemplate.render(model_name=model_name, **task_parameters)
+    graph_content = _prepend_graph_queue_directive(graph_content, 'rerank')
     with open(os.path.join(model_repository_path, model_name, 'graph.pbtxt'), 'w') as f:
         f.write(graph_content)
     print("Created graph {}".format(os.path.join(model_repository_path, model_name, 'graph.pbtxt')))
@@ -635,6 +664,7 @@ def export_image_generation_model(model_repository_path, source_model, model_nam
 
     gtemplate = jinja2.Environment(loader=jinja2.BaseLoader).from_string(image_generation_graph_template)
     graph_content = gtemplate.render(model_path=model_path, **task_parameters)
+    graph_content = _prepend_graph_queue_directive(graph_content, 'image_generation')
     with open(os.path.join(model_repository_path, model_name, 'graph.pbtxt'), 'w') as f:
          f.write(graph_content)
     print("Created graph {}".format(os.path.join(model_repository_path, model_name, 'graph.pbtxt')))
diff --git a/docs/mediapipe.md b/docs/mediapipe.md
index 73f0eb1f15..e19cc44418 100644
--- a/docs/mediapipe.md
+++ b/docs/mediapipe.md
@@ -215,6 +215,48 @@ Nodes in the MediaPipe graphs can reference both the models configured in model_
 
 Subconfig file may only contain *model_config_list* section  - in the same format as in [models config file](starting_server.md).
 
+### Graph Pool (Pre-initialized Graph Queue)
+
+OpenVINO Model Server can pre-initialize a pool of MediaPipe `CalculatorGraph` instances for a graph definition. Graphs in the pool are started once during server initialization and reused across inference requests, eliminating per-request graph initialization and teardown overhead. This is especially beneficial for graphs that involve expensive setup, done in calculators `Open()` method.
+
+#### How it works
+
+Without graph pool (legacy behavior), each incoming request creates a new `CalculatorGraph`, calls `StartRun()` with side packets, processes the request, then tears down the graph via `CloseAllPacketSources()` and `WaitUntilDone()`.
+
+With graph pool enabled, a fixed number of graphs are pre-initialized and kept in a queue. When a request arrives, an idle graph is acquired from the queue. After processing, the graph is returned to the queue for the next request. The graph is never torn down — instead, `WaitUntilIdle()` is called between requests and the internal timestamp is incremented.
+
+#### Configuration
+
+The graph pool size is controlled via a comment directive in the graph `.pbtxt` file:
+
+```
+# OVMS_GRAPH_QUEUE_SIZE: AUTO
+```
+
+| Value | Behavior |
+|:------|:---------|
+| `AUTO` | Pool size is set to the number of hardware threads (`std::thread::hardware_concurrency()`), or 16 if detection fails |
+| Positive integer (e.g. `4`) | Pool size set to the given value (must not exceed hardware thread count) |
+| `0` or `-1` | Graph pool disabled — falls back to per-request graph creation |
+| *(directive absent)* | Default: graph pool is disabled |
+
+**Default behavior:** graph pool stays disabled unless `OVMS_GRAPH_QUEUE_SIZE` is explicitly present in `graph.pbtxt`.
+
+**Generated graphs from exporters:**
+- `demos/common/export_models/export_model.py` and OVMS `--pull --task ...` graph export emit `OVMS_GRAPH_QUEUE_SIZE` automatically.
+- In `export_model.py`: image generation graphs use `1`, and all other graph types use `AUTO`.
+- In OVMS `--task ...` graph export: image generation graphs use `1`, and other graph types use `min(physical_cores, rest_workers)` (with OVMS default REST worker calculation when `rest_workers` is not provided explicitly).
+
+#### Important considerations for graph developers
+
+**Stateful calculators:**
+Since graphs in the pool are reused across requests, any state held by a calculator between `Process()` calls will persist across requests. If your calculator accumulates state (e.g. counters, buffers, history), that state will carry over to the next request that reuses the same graph instance. Design your calculators to either:
+- Be stateless (reset any per-request state at the beginning of each `Process()` call), or
+- Explicitly handle the fact that the graph may have already processed prior requests.
+
+**Input side packets from requests are not supported:**
+When graph pool is enabled, side packets are set once at pool construction time and cannot be overridden per request. If a client sends request parameters that would normally become input side packets (e.g. KServe request parameters other than `OVMS_MP_TIMESTAMP`), the request will be rejected with an error. If your graph relies on per-request side packets to configure calculator behavior, either disable the graph pool (`# OVMS_GRAPH_QUEUE_SIZE: 0`) or redesign the graph to accept such parameters as regular input stream packets instead of side packets.
+
 
 ## Deployment testing
 ### Debug logs
diff --git a/src/BUILD b/src/BUILD
index d3e5af3861..41477c7822 100644
--- a/src/BUILD
+++ b/src/BUILD
@@ -150,6 +150,39 @@ ovms_cc_library(
     hdrs = ["queue.hpp"],
     visibility = ["//visibility:public",],
 )
+ovms_cc_library(
+    name = "mediapipe_internal_graph_side_packets",
+    hdrs = ["mediapipe_internal/graph_side_packets.hpp"],
+    visibility = ["//visibility:public",],
+)
+ovms_cc_library(
+    name = "mediapipe_internal_graph_executor_constants",
+    hdrs = ["mediapipe_internal/graph_executor_constants.hpp"],
+    visibility = ["//visibility:public"],
+)
+ovms_cc_library(
+    name = "mediapipe_internal_graphqueue",
+    hdrs = [
+    "mediapipe_internal/graphqueue.hpp",
+    "mediapipe_internal/outputstreamobserver.hpp",
+    ], # TODO FIXME
+    srcs = ["mediapipe_internal/graphqueue.cpp"],
+    deps = [
+        "libovms_queue",
+        "libovmslogging",
+        "libovms_execution_context",
+        "libovmstimer",
+        "libovmsmetrics",
+        "model_metric_reporter",
+        "mediapipe_internal_graph_executor_constants",
+        "mediapipe_internal_graph_side_packets",
+        "//third_party:openvino",
+        "@mediapipe//mediapipe/framework:calculator_graph",
+        "//src/python:libovmspythonmodule", # TODO not split
+        "//src/llm:genai_servables", # TODO split!
+    ],
+    visibility = ["//visibility:public",],
+)
 ovms_cc_library(
     name = "libovms_ovinferrequestsqueue",
     hdrs = ["ovinferrequestsqueue.hpp"],
@@ -542,6 +575,7 @@ ovms_cc_library(
                 "mediapipe_internal/mediapipegraphconfig.cpp",
                 "mediapipe_internal/mediapipegraphdefinition.cpp",
                 "mediapipe_internal/mediapipegraphdefinition.hpp",
+                "mediapipe_internal/outputstreamobserver.hpp",
                 "mediapipe_internal/mediapipegraphexecutor.cpp",
                 "mediapipe_internal/mediapipegraphexecutor.hpp",
                 "mediapipe_internal/packettypes.hpp",
@@ -682,6 +716,8 @@ ovms_cc_library(
                 })
             + select({
             "//conditions:default": [
+                "mediapipe_internal_graph_executor_constants",
+                "mediapipe_internal_graphqueue",
                 "@mediapipe_calculators//:mediapipe_calculators", # Need this dependencies here because we use ovms/src - cannot add in ovms_dependencies because we copy src directory later in Dockerfile
                 "@mediapipe//mediapipe/graphs/holistic_tracking:holistic_tracking_to_render_data",
                 "@mediapipe//mediapipe/graphs/iris_tracking:iris_tracking_cpu_deps",
@@ -3016,6 +3052,7 @@ cc_library(
         ":test_test_with_temp_dir",
         "//src/graph_export:graph_export",
         "//src:libovms_server_settings",
+        "//src:libovms_systeminfo",
         "@com_google_googletest//:gtest",
     ],
     local_defines = COMMON_LOCAL_DEFINES,
diff --git a/src/capi_frontend/server_settings.hpp b/src/capi_frontend/server_settings.hpp
index 5b8a3dce54..77645dda7a 100644
--- a/src/capi_frontend/server_settings.hpp
+++ b/src/capi_frontend/server_settings.hpp
@@ -158,6 +158,7 @@ struct ExportSettings {
     std::string modelName = "";
     std::string modelPath = "./";
     std::string targetDevice = "CPU";
+    std::optional<uint32_t> restWorkers;
     std::optional<std::string> extraQuantizationParams;
     std::optional<std::string> vocoder;
     std::string precision = "int8";
diff --git a/src/cli_parser.cpp b/src/cli_parser.cpp
index dd7141b1ce..4122bc3e14 100644
--- a/src/cli_parser.cpp
+++ b/src/cli_parser.cpp
@@ -728,6 +728,7 @@ void CLIParser::prepareGraph(ServerSettingsImpl& serverSettings, HFSettingsImpl&
             hfSettings.exportSettings.extraQuantizationParams = result->operator[]("extra_quantization_params").as<std::string>();
         if (result->count("vocoder"))
             hfSettings.exportSettings.vocoder = result->operator[]("vocoder").as<std::string>();
+        hfSettings.exportSettings.restWorkers = serverSettings.restWorkers;
         hfSettings.downloadPath = result->operator[]("model_repository_path").as<std::string>();
         if (result->count("task")) {
             hfSettings.task = stringToEnum(result->operator[]("task").as<std::string>());
diff --git a/src/graph_export/BUILD b/src/graph_export/BUILD
index 47dc407a1e..57179bced7 100644
--- a/src/graph_export/BUILD
+++ b/src/graph_export/BUILD
@@ -29,6 +29,7 @@ ovms_cc_library(
         "@ovms//src:libovms_module",
         "@ovms//src:libovmsfilesystem",
         "@ovms//src:libovmslocalfilesystem",
+        "@ovms//src:libovms_systeminfo",
         "@com_github_tencent_rapidjson//:rapidjson",
         "@ovms//src:libovmsschema",
         "@ovms//src:libovms_version",
diff --git a/src/graph_export/graph_export.cpp b/src/graph_export/graph_export.cpp
index dadbd57777..0ca05875e1 100644
--- a/src/graph_export/graph_export.cpp
+++ b/src/graph_export/graph_export.cpp
@@ -53,6 +53,22 @@
 namespace ovms {
 
 static const std::string OVMS_VERSION_GRAPH_LINE = std::string("# File created with: ") + PROJECT_NAME + std::string(" ") + PROJECT_VERSION + std::string("\n");
+static const std::string OVMS_GRAPH_QUEUE_SIZE_LINE_PREFIX = "# OVMS_GRAPH_QUEUE_SIZE: ";
+static const std::string OVMS_GRAPH_QUEUE_SIZE_AUTO = "AUTO";
+
+static std::string getDefaultGraphQueueSizeDirective(const HFSettingsImpl& hfSettings) {
+    if (hfSettings.task == IMAGE_GENERATION_GRAPH) {
+        return "1";
+    }
+    return OVMS_GRAPH_QUEUE_SIZE_AUTO;
+}
+
+static std::string buildGraphHeader(const HFSettingsImpl& hfSettings) {
+    std::ostringstream oss;
+    oss << OVMS_VERSION_GRAPH_LINE;
+    oss << OVMS_GRAPH_QUEUE_SIZE_LINE_PREFIX << getDefaultGraphQueueSizeDirective(hfSettings) << "\n";
+    return oss.str();
+}
 
 static std::string constructModelsPath(const std::string& modelPath, const std::optional<std::string>& ggufFilenameOpt) {
     std::string modelsPath;
@@ -116,7 +132,7 @@ static Status createTextGenerationGraphTemplate(const std::string& directoryPath
     auto& exportSettings = hfSettings.exportSettings;
 
     std::ostringstream oss;
-    oss << OVMS_VERSION_GRAPH_LINE;
+    oss << buildGraphHeader(hfSettings);
     std::string modelsPath = constructModelsPath(exportSettings.modelPath, ggufFilename);
     SPDLOG_TRACE("modelsPath: {}, directoryPath: {}, ggufFilename: {}", modelsPath, directoryPath, ggufFilename.value_or("std::nullopt"));
     GET_PLUGIN_CONFIG_OPT_OR_FAIL_AND_RETURN(exportSettings);
@@ -211,7 +227,7 @@ static Status createRerankGraphTemplate(const std::string& directoryPath, const
     auto& exportSettings = hfSettings.exportSettings;
 
     std::ostringstream oss;
-    oss << OVMS_VERSION_GRAPH_LINE;
+    oss << buildGraphHeader(hfSettings);
     // Windows path creation - graph parser needs forward slashes in paths
     std::string modelsPath = constructModelsPath(exportSettings.modelPath, ggufFilename);
     SPDLOG_TRACE("modelsPath: {}, directoryPath: {}, ggufFilename: {}", modelsPath, directoryPath, ggufFilename.value_or("std::nullopt"));
@@ -255,7 +271,7 @@ static Status createEmbeddingsGraphTemplate(const std::string& directoryPath, co
     auto& exportSettings = hfSettings.exportSettings;
 
     std::ostringstream oss;
-    oss << OVMS_VERSION_GRAPH_LINE;
+    oss << buildGraphHeader(hfSettings);
     std::string modelsPath = constructModelsPath(exportSettings.modelPath, ggufFilename);
     SPDLOG_TRACE("modelsPath: {}, directoryPath: {}, ggufFilename: {}", modelsPath, directoryPath, ggufFilename.value_or("std::nullopt"));
     GET_PLUGIN_CONFIG_OPT_OR_FAIL_AND_RETURN(exportSettings);
@@ -301,7 +317,7 @@ static Status createTextToSpeechGraphTemplate(const std::string& directoryPath,
     auto& exportSettings = hfSettings.exportSettings;
 
     std::ostringstream oss;
-    oss << OVMS_VERSION_GRAPH_LINE;
+    oss << buildGraphHeader(hfSettings);
     std::string modelsPath = constructModelsPath(exportSettings.modelPath, ggufFilename);
     SPDLOG_TRACE("modelsPath: {}, directoryPath: {}, ggufFilename: {}", modelsPath, directoryPath, ggufFilename.value_or("std::nullopt"));
     GET_PLUGIN_CONFIG_OPT_OR_FAIL_AND_RETURN(exportSettings);
@@ -352,7 +368,7 @@ static Status createSpeechToTextGraphTemplate(const std::string& directoryPath,
     auto& exportSettings = hfSettings.exportSettings;
 
     std::ostringstream oss;
-    oss << OVMS_VERSION_GRAPH_LINE;
+    oss << buildGraphHeader(hfSettings);
     std::string modelsPath = constructModelsPath(exportSettings.modelPath, ggufFilename);
     SPDLOG_TRACE("modelsPath: {}, directoryPath: {}, ggufFilename: {}", modelsPath, directoryPath, ggufFilename.value_or("std::nullopt"));
     GET_PLUGIN_CONFIG_OPT_OR_FAIL_AND_RETURN(exportSettings);
@@ -406,7 +422,7 @@ static Status createImageGenerationGraphTemplate(const std::string& directoryPat
     GET_PLUGIN_CONFIG_OPT_OR_FAIL_AND_RETURN(exportSettings);
 
     std::ostringstream oss;
-    oss << OVMS_VERSION_GRAPH_LINE;
+    oss << buildGraphHeader(hfSettings);
     // clang-format off
     oss << R"(
 input_stream: "HTTP_REQUEST_PAYLOAD:input"
diff --git a/src/http_frontend/http_graph_executor_impl.cpp b/src/http_frontend/http_graph_executor_impl.cpp
index b970f62594..4848f3760a 100644
--- a/src/http_frontend/http_graph_executor_impl.cpp
+++ b/src/http_frontend/http_graph_executor_impl.cpp
@@ -38,6 +38,10 @@ namespace ovms {
 
 static const std::string UNUSED_REQUEST_ID = "";
 
+bool requestHasInputSidePackets(const HttpPayload& request) {
+    return false;
+}
+
 Status deserializeInputSidePacketsFromFirstRequestImpl(
     std::map<std::string, mediapipe::Packet>& inputSidePackets,  // out
     const HttpPayload& request) {                                // in
diff --git a/src/http_frontend/http_graph_executor_impl.hpp b/src/http_frontend/http_graph_executor_impl.hpp
index 9846b10158..205d428a1b 100644
--- a/src/http_frontend/http_graph_executor_impl.hpp
+++ b/src/http_frontend/http_graph_executor_impl.hpp
@@ -48,6 +48,9 @@ class PythonBackend;
 
 using HttpReaderWriter = HttpAsyncWriter;
 
+// Checks whether the request contains user-provided input side packets.
+bool requestHasInputSidePackets(const HttpPayload& request);
+
 // Deserialization of parameters inside KServe gRPC request
 // into mediapipe Packets.
 // To be used by both - infer & inferStream.
diff --git a/src/kfs_frontend/kfs_graph_executor_impl.cpp b/src/kfs_frontend/kfs_graph_executor_impl.cpp
index 034f6f0907..b5033501d9 100644
--- a/src/kfs_frontend/kfs_graph_executor_impl.cpp
+++ b/src/kfs_frontend/kfs_graph_executor_impl.cpp
@@ -24,6 +24,7 @@
 
 #include "../kfs_frontend/kfs_utils.hpp"
 #include "../logging.hpp"
+#include "../mediapipe_internal/graph_executor_constants.hpp"
 #include "../mediapipe_internal/mediapipe_utils.hpp"
 #include "../mediapipe_internal/mediapipegraphdefinition.hpp"
 #include "../predict_request_validation_utils.hpp"
@@ -925,6 +926,7 @@ static Status createPacketAndPushIntoGraph(const std::string& name, std::shared_
     }
     std::unique_ptr<T> inputTensor;
     OVMS_RETURN_ON_FAIL(deserializeTensor(name, *request, inputTensor, pythonBackend));
+    SPDLOG_TRACE("Current Timestamp before actual pushing:{}", timestamp.Value());
     MP_RETURN_ON_FAIL(graph.AddPacketToInputStream(
                           name,
                           ::mediapipe::packet_internal::Create(
@@ -1152,10 +1154,19 @@ Status createAndPushPacketsImpl(
     return StatusCode::OK;
 }
 
+bool requestHasInputSidePackets(const KFSRequest& request) {
+    static const std::string TIMESTAMP_PARAM{"OVMS_MP_TIMESTAMP"};
+    for (const auto& [name, valueChoice] : request.parameters()) {
+        if (name != TIMESTAMP_PARAM) {
+            return true;
+        }
+    }
+    return false;
+}
+
 Status deserializeInputSidePacketsFromFirstRequestImpl(
     std::map<std::string, mediapipe::Packet>& inputSidePackets,
     const KFSRequest& request) {
-    static const std::string PYTHON_SESSION_SIDE_PACKET_TAG{"py"};
     for (const auto& [name, valueChoice] : request.parameters()) {
         SPDLOG_DEBUG("Found: {}; parameter in request for: {};", name, request.model_name());
         if (name == TIMESTAMP_PARAMETER_NAME) {
diff --git a/src/kfs_frontend/kfs_graph_executor_impl.hpp b/src/kfs_frontend/kfs_graph_executor_impl.hpp
index cfa65b6a57..1c6e697455 100644
--- a/src/kfs_frontend/kfs_graph_executor_impl.hpp
+++ b/src/kfs_frontend/kfs_graph_executor_impl.hpp
@@ -36,6 +36,10 @@ namespace ovms {
 class PythonBackend;
 class Status;
 
+// Checks whether the request contains user-provided input side packets
+// (parameters other than the reserved OVMS_MP_TIMESTAMP).
+bool requestHasInputSidePackets(const KFSRequest& request);
+
 // Deserialization of parameters inside KServe gRPC request
 // into mediapipe Packets.
 // To be used by both - infer & inferStream.
diff --git a/src/llm/BUILD b/src/llm/BUILD
index ae37d936ca..5f64ad197f 100644
--- a/src/llm/BUILD
+++ b/src/llm/BUILD
@@ -24,6 +24,7 @@ ovms_cc_library(
         "//third_party:openvino",
         "@mediapipe//mediapipe/framework:calculator_framework",
         "@com_github_tencent_rapidjson//:rapidjson",
+        "//src:mediapipe_internal_graph_side_packets",
         "//src/kfserving_api:kfserving_api_cpp",
         "//src:libovmsprofiler",
         ":genai_servables",
diff --git a/src/llm/http_llm_calculator.cc b/src/llm/http_llm_calculator.cc
index ae6461c61a..2415ae08da 100644
--- a/src/llm/http_llm_calculator.cc
+++ b/src/llm/http_llm_calculator.cc
@@ -14,6 +14,7 @@
 // limitations under the License.
 //*****************************************************************************
 #include <atomic>
+#include <mutex>
 #include <string>
 
 #pragma warning(push)
@@ -27,6 +28,7 @@
 
 #include "../http_payload.hpp"
 #include "../logging.hpp"
+#include "../mediapipe_internal/graph_side_packets.hpp"
 #include "../profiler.hpp"
 #include "apis/openai_completions.hpp"
 #include "servable.hpp"
@@ -36,9 +38,11 @@ using namespace ovms;
 namespace mediapipe {
 
 const std::string LLM_SESSION_SIDE_PACKET_TAG = "LLM_NODE_RESOURCES";
+const std::string LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG = "LLM_NODE_EXECUTION_CONTEXTS";
 
 class HttpLLMCalculator : public CalculatorBase {
     std::shared_ptr<GenAiServable> servable;
+    std::shared_ptr<GenAiExecutionContextHolder> executionContextHolder;
     std::shared_ptr<GenAiServableExecutionContext> executionContext;
 
     static const std::string INPUT_TAG_NAME;
@@ -54,6 +58,9 @@ class HttpLLMCalculator : public CalculatorBase {
         cc->Inputs().Tag(INPUT_TAG_NAME).Set<ovms::HttpPayload>();
         cc->Inputs().Tag(LOOPBACK_TAG_NAME).Set<bool>();
         cc->InputSidePackets().Tag(LLM_SESSION_SIDE_PACKET_TAG).Set<ovms::GenAiServableMap>();
+        if (cc->InputSidePackets().HasTag(LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG)) {
+            cc->InputSidePackets().Tag(LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG).Set<ovms::GenAiExecutionContextMap>();
+        }
         cc->Outputs().Tag(OUTPUT_TAG_NAME).Set<std::string>();
         cc->Outputs().Tag(LOOPBACK_TAG_NAME).Set<bool>();
         return absl::OkStatus();
@@ -72,7 +79,17 @@ class HttpLLMCalculator : public CalculatorBase {
         auto it = servableMap.find(cc->NodeName());
         RET_CHECK(it != servableMap.end()) << "Could not find initialized LLM node named: " << cc->NodeName();
         this->servable = it->second;
-        this->executionContext = servable->createExecutionContext();
+
+        if (cc->InputSidePackets().HasTag(LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG) && !cc->InputSidePackets().Tag(LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG).IsEmpty()) {
+            ovms::GenAiExecutionContextMap executionContextMap = cc->InputSidePackets().Tag(LLM_EXECUTION_CONTEXT_SIDE_PACKET_TAG).Get<ovms::GenAiExecutionContextMap>();
+            auto contextIt = executionContextMap.find(cc->NodeName());
+            RET_CHECK(contextIt != executionContextMap.end()) << "Could not find LLM execution context holder for node named: " << cc->NodeName();
+            this->executionContextHolder = contextIt->second;
+        }
+
+        if (!this->executionContextHolder) {
+            this->executionContext = servable->createExecutionContext();
+        }
         SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "LLMCalculator [Node: {}] Open end", cc->NodeName());
         return absl::OkStatus();
     }
@@ -81,6 +98,12 @@ class HttpLLMCalculator : public CalculatorBase {
         OVMS_PROFILE_FUNCTION();
         RET_CHECK(this->servable != nullptr);
 
+        if (this->executionContextHolder) {
+            std::lock_guard<std::mutex> lock(this->executionContextHolder->mutex);
+            this->executionContext = this->executionContextHolder->executionContext;
+        }
+        RET_CHECK(this->executionContext != nullptr) << "LLM execution context not initialized for node: " << cc->NodeName();
+
         // For cases where MediaPipe decides to trigger Process() when there are no inputs
         if (cc->Inputs().Tag(INPUT_TAG_NAME).IsEmpty() && cc->Inputs().Tag(LOOPBACK_TAG_NAME).IsEmpty()) {
             return absl::OkStatus();
diff --git a/src/logging.cpp b/src/logging.cpp
index e89fce9a07..aee9e4bc2e 100644
--- a/src/logging.cpp
+++ b/src/logging.cpp
@@ -41,7 +41,8 @@ std::shared_ptr<spdlog::logger> rerank_calculator_logger = std::make_shared<spdl
 #if (OV_TRACE == 1)
 std::shared_ptr<spdlog::logger> ov_logger = std::make_shared<spdlog::logger>("openvino");
 #endif
-const std::string default_pattern = "[%Y-%m-%d %T.%e][%t][%n][%l][%s:%#] %v";
+// const std::string default_pattern = "[%i] [%Y-%m-%d %T.%f][%t][%n][%l][%s:%#] %v";
+const std::string default_pattern = "[%Y-%m-%d %T.%f][%t][%n][%l][%s:%#] %v";
 
 static void set_log_level(const std::string log_level, std::shared_ptr<spdlog::logger> logger) {
     logger->set_level(spdlog::level::info);
diff --git a/src/mediapipe_internal/graph_executor_constants.hpp b/src/mediapipe_internal/graph_executor_constants.hpp
new file mode 100644
index 0000000000..55e3af7f59
--- /dev/null
+++ b/src/mediapipe_internal/graph_executor_constants.hpp
@@ -0,0 +1,35 @@
+//*****************************************************************************
+// Copyright 2026 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#pragma once
+
+#include <cstdint>
+#include <string>
+
+namespace ovms {
+
+inline const std::string PYTHON_SESSION_SIDE_PACKET_TAG = "py";
+inline const std::string LLM_SESSION_SIDE_PACKET_TAG = "llm";
+inline const std::string LLM_EXECUTION_CONTEXT_SESSION_SIDE_PACKET_TAG = "llm_ctx";
+inline const std::string IMAGE_GEN_SESSION_SIDE_PACKET_TAG = "pipes";
+inline const std::string EMBEDDINGS_SESSION_SIDE_PACKET_TAG = "embeddings_servable";
+inline const std::string RERANK_SESSION_SIDE_PACKET_TAG = "rerank_servable";
+inline const std::string STT_SESSION_SIDE_PACKET_TAG = "s2t_servable";
+inline const std::string TTS_SESSION_SIDE_PACKET_TAG = "t2s_servable";
+inline const std::string PYTHON_SIDE_PACKET_NAME = "py";
+inline const std::string LLM_SESSION_PACKET_NAME = "llm";
+inline constexpr int64_t STARTING_TIMESTAMP_VALUE = 0;
+
+}  // namespace ovms
diff --git a/src/mediapipe_internal/graph_side_packets.hpp b/src/mediapipe_internal/graph_side_packets.hpp
new file mode 100644
index 0000000000..8b67bd3bc0
--- /dev/null
+++ b/src/mediapipe_internal/graph_side_packets.hpp
@@ -0,0 +1,80 @@
+//*****************************************************************************
+// Copyright 2025 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#pragma once
+
+#include <mutex>
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+namespace ovms {
+
+// Forward declarations - only shared_ptrs are stored so full definitions are not needed
+class PythonNodeResources;
+class GenAiServable;
+struct GenAiServableExecutionContext;
+struct ImageGenerationPipelines;
+struct EmbeddingsServable;
+struct RerankServable;
+struct SttServable;
+class TtsServable;
+
+using PythonNodeResourcesMap = std::unordered_map<std::string, std::shared_ptr<PythonNodeResources>>;
+using GenAiServableMap = std::unordered_map<std::string, std::shared_ptr<GenAiServable>>;
+using RerankServableMap = std::unordered_map<std::string, std::shared_ptr<RerankServable>>;
+using SttServableMap = std::unordered_map<std::string, std::shared_ptr<SttServable>>;
+using TtsServableMap = std::unordered_map<std::string, std::shared_ptr<TtsServable>>;
+using EmbeddingsServableMap = std::unordered_map<std::string, std::shared_ptr<EmbeddingsServable>>;
+using ImageGenerationPipelinesMap = std::unordered_map<std::string, std::shared_ptr<ImageGenerationPipelines>>;
+
+struct GenAiExecutionContextHolder {
+    std::mutex mutex;
+    std::shared_ptr<GenAiServableExecutionContext> executionContext;
+};
+using GenAiExecutionContextMap = std::unordered_map<std::string, std::shared_ptr<GenAiExecutionContextHolder>>;
+
+struct GraphSidePackets {
+    PythonNodeResourcesMap pythonNodeResourcesMap;
+    GenAiServableMap genAiServableMap;
+    GenAiExecutionContextMap genAiExecutionContextMap;
+    ImageGenerationPipelinesMap imageGenPipelinesMap;
+    EmbeddingsServableMap embeddingsServableMap;
+    RerankServableMap rerankServableMap;
+    SttServableMap sttServableMap;
+    TtsServableMap ttsServableMap;
+    void clear() {
+        pythonNodeResourcesMap.clear();
+        genAiServableMap.clear();
+        genAiExecutionContextMap.clear();
+        imageGenPipelinesMap.clear();
+        embeddingsServableMap.clear();
+        rerankServableMap.clear();
+        sttServableMap.clear();
+        ttsServableMap.clear();
+    }
+    bool empty() {
+        return (pythonNodeResourcesMap.empty() &&
+                genAiServableMap.empty() &&
+                genAiExecutionContextMap.empty() &&
+                imageGenPipelinesMap.empty() &&
+                embeddingsServableMap.empty() &&
+                rerankServableMap.empty() &&
+                sttServableMap.empty() &&
+                ttsServableMap.empty());
+    }
+};
+
+}  // namespace ovms
diff --git a/src/mediapipe_internal/graphqueue.cpp b/src/mediapipe_internal/graphqueue.cpp
new file mode 100644
index 0000000000..01d04425c6
--- /dev/null
+++ b/src/mediapipe_internal/graphqueue.cpp
@@ -0,0 +1,114 @@
+//*****************************************************************************
+// Copyright 2025 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#include "graphqueue.hpp"
+
+#include <atomic>
+#include <condition_variable>
+#include <future>
+#include <map>
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <queue>
+#include <thread>
+#include <utility>
+#include <vector>
+
+#include "../queue.hpp"
+#include "src/python/pythonnoderesources.hpp"
+#include "src/llm/servable.hpp"
+
+#pragma warning(push)
+#pragma warning(disable : 4324 6001 6385 6386 6326 6011 4309 4005 4456 6246)
+#include "mediapipe/framework/calculator_graph.h"
+#include "mediapipe/framework/port/status.h"
+#pragma warning(pop)
+
+#include "graph_executor_constants.hpp"
+#include "outputstreamobserver.hpp"
+namespace ovms {
+GraphQueue::GraphQueue(const ::mediapipe::CalculatorGraphConfig& config, std::shared_ptr<GraphSidePackets> sidePacketMaps, int streamsLength) :
+    Queue(streamsLength),
+    sidePacketMaps(sidePacketMaps) {
+    inferRequests.reserve(streamsLength);
+    for (auto i = 0; i < streamsLength; ++i) {
+        // Build observer map locally before constructing GraphHelper (const map)
+        std::unordered_map<std::string, std::shared_ptr<ObserverHolder>> observers;
+        for (auto& name : config.output_stream()) {
+            std::string streamName = getStreamName(name);
+            auto holder = std::make_shared<ObserverHolder>();
+            holder->current = std::make_shared<NullOutputStreamObserver>();
+            observers[streamName] = holder;
+        }
+
+        auto gh = std::make_shared<GraphHelper>(std::move(observers));
+        gh->graph = std::make_unique<::mediapipe::CalculatorGraph>();
+        gh->currentTimestamp = ::mediapipe::Timestamp(0);
+
+        auto absStatus = gh->graph->Initialize(config);
+        if (!absStatus.ok()) {
+            SPDLOG_ERROR("Graph queue initialization failed: {}", absStatus.ToString());
+            throw std::runtime_error(absStatus.ToString());
+        }
+        for (const auto& [streamName, holder] : gh->outStreamObservers) {
+            // Lambda captures holder (shared_ptr) by value — safe regardless of map layout
+            absStatus = gh->graph->ObserveOutputStream(streamName, [holder](const ::mediapipe::Packet& packet) -> absl::Status { return holder->current->handlePacket(packet); });
+            if (!absStatus.ok()) {
+                SPDLOG_ERROR("Graph queue ObserveOutputStream failed: {}", absStatus.ToString());
+                throw std::runtime_error(absStatus.ToString());
+            }
+        }
+        for (const auto& [nodeName, _] : sidePacketMaps->genAiServableMap) {
+            gh->genAiExecutionContextMap[nodeName] = std::make_shared<GenAiExecutionContextHolder>();
+        }
+        std::map<std::string, mediapipe::Packet> inputSidePackets;
+#if (PYTHON_DISABLE == 0)
+        inputSidePackets[PYTHON_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<PythonNodeResourcesMap>(sidePacketMaps->pythonNodeResourcesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+#endif
+        inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<GenAiServableMap>(sidePacketMaps->genAiServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+        inputSidePackets[LLM_EXECUTION_CONTEXT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<GenAiExecutionContextMap>(gh->genAiExecutionContextMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+        inputSidePackets[IMAGE_GEN_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<ImageGenerationPipelinesMap>(sidePacketMaps->imageGenPipelinesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+        inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<EmbeddingsServableMap>(sidePacketMaps->embeddingsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+        inputSidePackets[RERANK_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<RerankServableMap>(sidePacketMaps->rerankServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+        inputSidePackets[STT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<SttServableMap>(sidePacketMaps->sttServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+        inputSidePackets[TTS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<TtsServableMap>(sidePacketMaps->ttsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+        absStatus = gh->graph->StartRun(inputSidePackets);
+        if (!absStatus.ok()) {
+            SPDLOG_ERROR("Graph queue StartRun failed: {}", absStatus.ToString());
+            throw std::runtime_error(absStatus.ToString());
+        }
+        inferRequests.emplace_back(std::move(gh));
+    }
+}
+GraphQueue::~GraphQueue() {
+    for (auto& graphHelper : inferRequests) {
+        auto absStatus = graphHelper->graph->WaitUntilIdle();
+        if (!absStatus.ok()) {
+            SPDLOG_DEBUG("Graph queue WaitUntilIdle error: {}", absStatus.ToString());
+        }
+        absStatus = graphHelper->graph->CloseAllPacketSources();
+        if (!absStatus.ok()) {
+            SPDLOG_DEBUG("Graph queue CloseAllPacketSources error: {}", absStatus.ToString());
+        }
+        absStatus = graphHelper->graph->WaitUntilDone();
+        if (!absStatus.ok()) {
+            SPDLOG_DEBUG("Graph queue WaitUntilDone error: {}", absStatus.ToString());
+        }
+        graphHelper->graph->Cancel();
+        graphHelper->graph.reset();
+    }
+}
+}  // namespace ovms
diff --git a/src/mediapipe_internal/graphqueue.hpp b/src/mediapipe_internal/graphqueue.hpp
new file mode 100644
index 0000000000..ab80e6d095
--- /dev/null
+++ b/src/mediapipe_internal/graphqueue.hpp
@@ -0,0 +1,102 @@
+//*****************************************************************************
+// Copyright 2025 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#pragma once
+
+#include <atomic>
+#include <condition_variable>
+#include <future>
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <string>
+#include <queue>
+#include <thread>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "../queue.hpp"
+
+#pragma warning(push)
+#pragma warning(disable : 4324 6001 6385 6386 6326 6011 4309 4005 4456 6246)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#include "mediapipe/framework/calculator_graph.h"
+#include "mediapipe/framework/port/status.h"
+#pragma GCC diagnostic pop
+#pragma warning(pop)
+
+#include "graph_executor_constants.hpp"
+#include "graph_side_packets.hpp"
+#include "outputstreamobserver.hpp"
+namespace ovms {
+class OutputStreamObserverI;
+class NullOutputStreamObserver;
+struct ObserverHolder;
+struct GraphHelper {
+    std::unique_ptr<::mediapipe::CalculatorGraph> graph;
+    // const after construction: keys are fixed, but observer implementations
+    // can be swapped via the mutable ObserverHolder inside each shared_ptr.
+    const std::unordered_map<std::string, std::shared_ptr<ObserverHolder>> outStreamObservers;
+    GenAiExecutionContextMap genAiExecutionContextMap;
+    ::mediapipe::Timestamp currentTimestamp;
+    GraphHelper() = default;
+    // Constructor that takes the pre-built observer map
+    GraphHelper(std::unordered_map<std::string, std::shared_ptr<ObserverHolder>>&& observers) :
+        outStreamObservers(std::move(observers)) {}
+    GraphHelper(const GraphHelper&) = delete;
+    GraphHelper& operator=(const GraphHelper&) = delete;
+    GraphHelper(GraphHelper&& gh) :
+        graph(std::move(gh.graph)),
+        outStreamObservers(std::move(const_cast<std::unordered_map<std::string, std::shared_ptr<ObserverHolder>>&>(gh.outStreamObservers))),
+        genAiExecutionContextMap(std::move(gh.genAiExecutionContextMap)),
+        currentTimestamp(gh.currentTimestamp) {}
+    GraphHelper& operator=(GraphHelper&&) = delete;
+};
+// we need to keep Graph alive during MP reload hence shared_ptr
+class GraphQueue : public Queue<std::shared_ptr<GraphHelper>> {
+public:  // XXX TODO make private? we need to access in mediapipegraphdefinition to set side packets though
+    std::shared_ptr<GraphSidePackets> sidePacketMaps;
+
+public:
+    GraphQueue(const ::mediapipe::CalculatorGraphConfig& config, std::shared_ptr<GraphSidePackets> sidePacketMaps, int streamsLength);
+    ~GraphQueue();
+};
+
+struct GraphIdGuard {
+    std::weak_ptr<GraphQueue> weakQueue;
+    const int id;
+    // shared_ptr because GraphIdGuard (and the executor holding it) must keep
+    // the GraphHelper alive even after the GraphQueue is destroyed during
+    // mediapipe graph reload/retire — the in-flight request continues using
+    // the old graph until completion.
+    std::shared_ptr<GraphHelper> gh;
+    ::mediapipe::CalculatorGraph& graph;
+    GraphIdGuard(std::shared_ptr<GraphQueue>& queue) :
+        weakQueue(queue),
+        id(queue->getIdleStream().get()),
+        gh((queue->getInferRequest(id))),
+        graph(*gh->graph) {
+    }
+    GraphIdGuard(GraphIdGuard&&) = default;
+    GraphIdGuard(const GraphIdGuard&) = delete;
+    ~GraphIdGuard() {
+        auto existingQueue = weakQueue.lock();
+        if (existingQueue)
+            existingQueue->returnStream(this->id);
+    }
+};
+}  // namespace ovms
diff --git a/src/mediapipe_internal/mediapipegraphconfig.hpp b/src/mediapipe_internal/mediapipegraphconfig.hpp
index 2e4f3d428e..193576b416 100644
--- a/src/mediapipe_internal/mediapipegraphconfig.hpp
+++ b/src/mediapipe_internal/mediapipegraphconfig.hpp
@@ -15,7 +15,12 @@
 //*****************************************************************************
 #pragma once
 
+#include <optional>
 #include <string>
+#include <thread>
+#include <variant>
+
+#include <spdlog/spdlog.h>
 #pragma warning(push)
 #pragma warning(disable : 6313)
 #include <rapidjson/document.h>
@@ -27,6 +32,22 @@ extern const std::string DEFAULT_GRAPH_FILENAME;
 extern const std::string DEFAULT_SUBCONFIG_FILENAME;
 extern const std::string DEFAULT_MODELMESH_SUBCONFIG_FILENAME;
 
+/**
+ * @brief Tag type representing AUTO graph queue size (determined at runtime).
+ */
+struct GraphQueueAutoTag {
+    bool operator==(const GraphQueueAutoTag&) const { return true; }
+};
+
+/**
+ * @brief Represents the user's graph_queue_size setting.
+ *
+ * - std::nullopt              => user did not set this field
+ * - int                       => user explicitly set a numeric value
+ * - GraphQueueAutoTag         => user explicitly set "AUTO"
+ */
+using GraphQueueSizeValue = std::optional<std::variant<int, GraphQueueAutoTag>>;
+
 class Status;
 
 /**
@@ -69,6 +90,15 @@ class MediapipeGraphConfig {
      */
     std::string currentGraphPbTxtMD5;
 
+    /**
+     * @brief Graph queue size configuration.
+     *
+     * - std::nullopt              => user did not set this field
+     * - int                       => user explicitly set a numeric size
+     * - GraphQueueAutoTag         => user explicitly set "AUTO"
+     */
+    GraphQueueSizeValue graphQueueSize;
+
 public:
     /**
          * @brief Construct a new Mediapie Graph configuration object
@@ -206,6 +236,55 @@ class MediapipeGraphConfig {
         this->currentGraphPbTxtMD5 = currentGraphPbTxtMD5;
     }
 
+    /**
+     * @brief Get the graph queue size setting.
+     *
+     * @return const GraphQueueSizeValue& - nullopt if not set, int or GraphQueueAutoTag
+     */
+    const GraphQueueSizeValue& getGraphQueueSize() const {
+        return this->graphQueueSize;
+    }
+
+    /**
+     * @brief Set the graph queue size to an explicit numeric value.
+     */
+    void setGraphQueueSize(int size) {
+        this->graphQueueSize = size;
+    }
+
+    /**
+     * @brief Set the graph queue size to AUTO.
+     */
+    void setGraphQueueSizeAuto() {
+        this->graphQueueSize = GraphQueueAutoTag{};
+    }
+
+    /**
+     * @brief Resolve the graph queue size setting to a concrete integer.
+     *
+     * Returns:
+     *   -1  => queue creation disabled (user set -1 or not set)
+     *   >0  => explicit size or resolved AUTO
+     *
+     * Value 0 is rejected at parse time (resolveGraphQueueSize).
+     * When not set (nullopt): returns -1 (queue disabled).
+     * When AUTO: returns hardware_concurrency() or 16 as fallback.
+     */
+    int getInitialQueueSize() const {
+        if (!this->graphQueueSize.has_value()) {
+            return -1;  // not set - queue disabled by default
+        }
+        if (std::holds_alternative<GraphQueueAutoTag>(*this->graphQueueSize)) {
+            unsigned int hwThreads = std::thread::hardware_concurrency();
+            if (hwThreads == 0) {
+                SPDLOG_WARN("std::thread::hardware_concurrency() returned 0 (unknown). Falling back to graph queue size 16.");
+                return 16;
+            }
+            return static_cast<int>(hwThreads);
+        }
+        return std::get<int>(*this->graphQueueSize);
+    }
+
     bool isReloadRequired(const MediapipeGraphConfig& rhs) const;
 
     /**
diff --git a/src/mediapipe_internal/mediapipegraphdefinition.cpp b/src/mediapipe_internal/mediapipegraphdefinition.cpp
index 9047765e75..7057dc5898 100644
--- a/src/mediapipe_internal/mediapipegraphdefinition.cpp
+++ b/src/mediapipe_internal/mediapipegraphdefinition.cpp
@@ -18,31 +18,33 @@
 #include <algorithm>
 #include <iostream>
 #include <memory>
+#include <regex>
 #include <sstream>
 #include <string>
+#include <thread>
 #include <unordered_map>
 #include <utility>
 #include <vector>
 
-#include "../execution_context.hpp"
-#include "../filesystem.hpp"
-#include "../kfs_frontend/kfs_utils.hpp"
-#include "../kfs_frontend/kfs_request_utils.hpp"
-#include "../deserialization_main.hpp"
-#include "../metric.hpp"
-#include "../model_metric_reporter.hpp"
-#include "../modelmanager.hpp"
-#include "../ov_utils.hpp"
-#include "../llm/servable.hpp"
-#include "../llm/servable_initializer.hpp"
+#include "src/execution_context.hpp"
+#include "src/filesystem.hpp"
+#include "src/kfs_frontend/kfs_utils.hpp"
+#include "src/kfs_frontend/kfs_request_utils.hpp"
+#include "src/deserialization_main.hpp"
+#include "src/metric.hpp"
+#include "src/model_metric_reporter.hpp"
+#include "src/modelmanager.hpp"
+#include "src/ov_utils.hpp"
+#include "src/llm/servable.hpp"
+#include "src/llm/servable_initializer.hpp"
 #if (PYTHON_DISABLE == 0)
-#include "../python/pythonnoderesources.hpp"
+#include "src/python/pythonnoderesources.hpp"
 #endif
-#include "../status.hpp"
-#include "../stringutils.hpp"
-#include "../tensorinfo.hpp"
-#include "../timer.hpp"
-#include "../version.hpp"
+#include "src/status.hpp"
+#include "src/stringutils.hpp"
+#include "src/tensorinfo.hpp"
+#include "src/timer.hpp"
+#include "src/version.hpp"
 #include "mediapipe/framework/port/parse_text_proto.h"
 #include "mediapipe/framework/port/status.h"
 #include "mediapipe_utils.hpp"
@@ -54,6 +56,12 @@
 #include "src/image_gen/imagegen_init.hpp"
 #include "src/image_gen/image_gen_calculator.pb.h"
 
+#include "src/sidepacket_servable.hpp"
+#include "src/embeddings/embeddings_servable.hpp"
+#include "src/rerank/rerank_servable.hpp"
+#include "src/audio/speech_to_text/s2t_servable.hpp"
+#include "src/audio/text_to_speech/t2s_servable.hpp"
+
 namespace ovms {
 MediapipeGraphConfig MediapipeGraphDefinition::MGC;
 
@@ -95,6 +103,48 @@ Status MediapipeGraphDefinition::validateForConfigFileExistence() {
     return StatusCode::OK;
 }
 
+Status MediapipeGraphDefinition::resolveGraphQueueSize() {
+    // 1. Explicit pbtxt directive: # OVMS_GRAPH_QUEUE_SIZE: <value>
+    //    Always honored regardless of env var or calculator checks.
+    //    Value -1 disables the queue, AUTO or positive integer enables it.
+    //    Value 0 is rejected as invalid.
+    static const std::regex directiveRegex(
+        R"((?:^|\n)\s*#\s*OVMS_GRAPH_QUEUE_SIZE\s*:\s*(\S+)\s*(?:\r?\n|$))");
+    std::smatch match;
+    if (std::regex_search(this->chosenConfig, match, directiveRegex)) {
+        std::string value = match[1].str();
+        if (value == "AUTO") {
+            this->mgconfig.setGraphQueueSizeAuto();
+            return StatusCode::OK;
+        }
+        auto parsed = stoi32(value);
+        if (!parsed.has_value()) {
+            SPDLOG_ERROR("Invalid OVMS_GRAPH_QUEUE_SIZE value: '{}'. Expected integer or 'AUTO'.", value);
+            return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID;
+        }
+        int queueSize = parsed.value();
+        if (queueSize < -1 || queueSize == 0) {
+            SPDLOG_ERROR("Invalid OVMS_GRAPH_QUEUE_SIZE value: {}. Must be -1 (disabled) or a positive integer.", queueSize);
+            return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID;
+        }
+        if (queueSize == -1) {
+            SPDLOG_DEBUG("Graph queue explicitly disabled (OVMS_GRAPH_QUEUE_SIZE=-1) for mediapipe: {}", getName());
+            return StatusCode::OK;
+        }
+        unsigned int maxThreads = std::thread::hardware_concurrency();
+        if (maxThreads > 0 && queueSize > static_cast<int>(maxThreads)) {
+            SPDLOG_WARN("OVMS_GRAPH_QUEUE_SIZE value: {} exceeds available hardware threads: {}. Clamping to {}.", queueSize, maxThreads, maxThreads);
+            queueSize = static_cast<int>(maxThreads);
+        }
+        this->mgconfig.setGraphQueueSize(queueSize);
+        return StatusCode::OK;
+    }
+
+    // 2. Default: queue disabled unless graph explicitly provides directive.
+    SPDLOG_DEBUG("Graph queue disabled by default for mediapipe: {}. Add '# OVMS_GRAPH_QUEUE_SIZE: <value>' directive in graph.pbtxt to enable.", getName());
+    return StatusCode::OK;
+}
+
 Status MediapipeGraphDefinition::validateForConfigLoadableness() {
     if (chosenConfig.empty()) {
         SPDLOG_LOGGER_ERROR(modelmanager_logger, "Trying to parse empty mediapipe graph definition: {} failed", this->getName(), this->chosenConfig);
@@ -129,7 +179,7 @@ Status MediapipeGraphDefinition::dryInitializeTest() {
 }
 Status MediapipeGraphDefinition::validate(ModelManager& manager) {
     SPDLOG_LOGGER_DEBUG(modelmanager_logger, "Started validation of mediapipe: {}", getName());
-    if (!this->sidePacketMaps.empty()) {
+    if (!this->sidePacketMaps->empty()) {
         SPDLOG_ERROR("Internal Error: MediaPipe definition is in unexpected state.");
         return StatusCode::INTERNAL_ERROR;
     }
@@ -177,6 +227,14 @@ Status MediapipeGraphDefinition::validate(ModelManager& manager) {
     if (!status.ok()) {
         return status;
     }
+    status = this->resolveGraphQueueSize();
+    if (!status.ok()) {
+        return status;
+    }
+    status = this->initializeQueueIfRequired();
+    if (!status.ok()) {
+        return status;
+    }
 
     lock.unlock();
     notifier.passed = true;
@@ -187,11 +245,31 @@ Status MediapipeGraphDefinition::validate(ModelManager& manager) {
     return StatusCode::OK;
 }
 
+Status MediapipeGraphDefinition::initializeQueueIfRequired() {
+    int initialQueueSize = this->mgconfig.getInitialQueueSize();
+    if (initialQueueSize < 0) {
+        SPDLOG_DEBUG("Graph queue creation disabled for mediapipe: {} (graph_queue_size={})", getName(), initialQueueSize);
+        return StatusCode::OK;
+    }
+    try {
+        this->queue = std::make_shared<GraphQueue>(this->config, this->sidePacketMaps, initialQueueSize);
+    } catch (const std::exception& e) {
+        SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to create graph queue for mediapipe: {} error: {}", getName(), e.what());
+        return StatusCode::INTERNAL_ERROR;
+    } catch (...) {
+        SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to create graph queue for mediapipe: {} unknown error", getName());
+        return StatusCode::INTERNAL_ERROR;
+    }
+    SPDLOG_DEBUG("Created graph queue with size {} for mediapipe: {}", initialQueueSize, getName());
+    return StatusCode::OK;
+}
+
 MediapipeGraphDefinition::MediapipeGraphDefinition(const std::string name,
     const MediapipeGraphConfig& config,
     MetricRegistry* registry,
     const MetricConfig* metricConfig,
     PythonBackend* pythonBackend) :
+    sidePacketMaps(std::make_shared<GraphSidePackets>()),
     name(name),
     status(SCHEDULER_CLASS_NAME, this->name),
     pythonBackend(pythonBackend),
@@ -261,11 +339,19 @@ Status MediapipeGraphDefinition::create(std::unique_ptr<MediapipeGraphExecutor>&
         return status;
     }
     SPDLOG_DEBUG("Creating Mediapipe graph executor: {}", getName());
-
-    pipeline = std::make_unique<MediapipeGraphExecutor>(getName(), std::to_string(getVersion()),
-        this->config, this->inputTypes, this->outputTypes, this->inputNames, this->outputNames,
-        this->sidePacketMaps,
-        this->pythonBackend, this->reporter.get());
+    if (this->queue) {
+        GraphIdGuard graphIdGuard(this->queue);
+        pipeline = std::make_unique<MediapipeGraphExecutor>(getName(), std::to_string(getVersion()),
+            this->config, this->inputTypes, this->outputTypes, this->inputNames, this->outputNames,
+            *this->sidePacketMaps,
+            this->pythonBackend, this->reporter.get(), std::move(graphIdGuard));
+    } else {
+        pipeline = std::make_unique<MediapipeGraphExecutor>(getName(), std::to_string(getVersion()),
+            this->config, this->inputTypes, this->outputTypes, this->inputNames, this->outputNames,
+            *this->sidePacketMaps,
+            this->pythonBackend, this->reporter.get());
+    }
+    SPDLOG_DEBUG("Created Mediapipe graph executor: {}", getName());
     return status;
 }
 
@@ -339,12 +425,15 @@ Status MediapipeGraphDefinition::reload(ModelManager& manager, const MediapipeGr
         std::this_thread::sleep_for(std::chrono::microseconds(1));
     }
     this->mgconfig = config;
-    this->sidePacketMaps.clear();
+    this->queue.reset();
+    this->sidePacketMaps = std::make_shared<GraphSidePackets>();
     return validate(manager);
 }
 
 void MediapipeGraphDefinition::retire(ModelManager& manager) {
-    this->sidePacketMaps.clear();
+    this->queue.reset();
+    // now we reset shared ptr maps so ongoing executions can continue
+    this->sidePacketMaps.reset();
     this->status.handle(RetireEvent());
 }
 
@@ -411,7 +500,7 @@ class ResourcesCleaningGuard {
         resources(resources) {}
     ~ResourcesCleaningGuard() {
         if (shouldCleanup) {
-            resources.clear();
+            resources.clear();  // TODO FIXME @atobisze check
         }
     }
     void disableCleaning() {
@@ -423,7 +512,7 @@ Status MediapipeGraphDefinition::initializeNodes() {
     SPDLOG_INFO("MediapipeGraphDefinition initializing graph nodes");
     for (int i = 0; i < config.node().size(); i++) {
 #if (PYTHON_DISABLE == 0)
-        auto& pythonNodeResourcesMap = this->sidePacketMaps.pythonNodeResourcesMap;
+        auto& pythonNodeResourcesMap = this->sidePacketMaps->pythonNodeResourcesMap;
         if (config.node(i).calculator() == PYTHON_NODE_CALCULATOR_NAME) {
             ResourcesCleaningGuard<PythonNodeResourcesMap> pythonResourcesCleaningGuard(pythonNodeResourcesMap);
             if (!config.node(i).node_options().size()) {
@@ -453,7 +542,8 @@ Status MediapipeGraphDefinition::initializeNodes() {
 #endif
         // Passed to both calculators that require LLM Engine (gRPC KServe & HTTP OpenAI)
         if (endsWith(config.node(i).calculator(), LLM_NODE_CALCULATOR_NAME)) {
-            auto& genAiServableMap = this->sidePacketMaps.genAiServableMap;
+            auto& genAiServableMap = this->sidePacketMaps->genAiServableMap;
+            auto& genAiExecutionContextMap = this->sidePacketMaps->genAiExecutionContextMap;
             ResourcesCleaningGuard<GenAiServableMap> genAiServablesCleaningGuard(genAiServableMap);
             if (!config.node(i).node_options().size()) {
                 SPDLOG_LOGGER_ERROR(modelmanager_logger, "LLM node missing options in graph: {}. ", this->name);
@@ -468,6 +558,10 @@ Status MediapipeGraphDefinition::initializeNodes() {
                 SPDLOG_LOGGER_ERROR(modelmanager_logger, "LLM node name: {} already used in graph: {}. ", nodeName, this->name);
                 return StatusCode::LLM_NODE_NAME_ALREADY_EXISTS;
             }
+            if (genAiExecutionContextMap.find(nodeName) != genAiExecutionContextMap.end()) {
+                SPDLOG_LOGGER_ERROR(modelmanager_logger, "LLM execution context holder for node name: {} already exists in graph: {}. ", nodeName, this->name);
+                return StatusCode::LLM_NODE_NAME_ALREADY_EXISTS;
+            }
             std::shared_ptr<GenAiServable> servable;
             Status status = initializeGenAiServable(servable, config.node(i), mgconfig.getBasePath());
             if (!status.ok()) {
@@ -475,11 +569,12 @@ Status MediapipeGraphDefinition::initializeNodes() {
                 return status;
             }
             genAiServableMap.insert(std::pair<std::string, std::shared_ptr<GenAiServable>>(nodeName, std::move(servable)));
+            genAiExecutionContextMap.insert(std::pair<std::string, std::shared_ptr<GenAiExecutionContextHolder>>(nodeName, std::make_shared<GenAiExecutionContextHolder>()));
             genAiServablesCleaningGuard.disableCleaning();
         }
         // Passed to both calculators that require Image Generation pipelines
         if (endsWith(config.node(i).calculator(), IMAGE_GEN_CALCULATOR_NAME)) {
-            auto& imageGenPipelinesMap = this->sidePacketMaps.imageGenPipelinesMap;
+            auto& imageGenPipelinesMap = this->sidePacketMaps->imageGenPipelinesMap;
             ResourcesCleaningGuard<ImageGenerationPipelinesMap> guard(imageGenPipelinesMap);
             if (!config.node(i).node_options().size()) {
                 SPDLOG_LOGGER_ERROR(modelmanager_logger, "Image Gen node missing options in graph: {}. ", this->name);
@@ -513,7 +608,7 @@ Status MediapipeGraphDefinition::initializeNodes() {
             guard.disableCleaning();
         }
         if (endsWith(config.node(i).calculator(), EMBEDDINGS_NODE_CALCULATOR_NAME)) {
-            auto& embeddingsServableMap = this->sidePacketMaps.embeddingsServableMap;
+            auto& embeddingsServableMap = this->sidePacketMaps->embeddingsServableMap;
             ResourcesCleaningGuard<EmbeddingsServableMap> embeddingsServablesCleaningGuard(embeddingsServableMap);
             if (!config.node(i).node_options().size()) {
                 SPDLOG_LOGGER_ERROR(modelmanager_logger, "Embeddings node missing options in graph: {}. ", this->name);
@@ -546,7 +641,7 @@ Status MediapipeGraphDefinition::initializeNodes() {
             embeddingsServablesCleaningGuard.disableCleaning();
         }
         if (endsWith(config.node(i).calculator(), RERANK_NODE_CALCULATOR_NAME)) {
-            auto& rerankServableMap = this->sidePacketMaps.rerankServableMap;
+            auto& rerankServableMap = this->sidePacketMaps->rerankServableMap;
             ResourcesCleaningGuard<RerankServableMap> rerankServablesCleaningGuard(rerankServableMap);
             if (!config.node(i).node_options().size()) {
                 SPDLOG_LOGGER_ERROR(modelmanager_logger, "Rerank node missing options in graph: {}. ", this->name);
@@ -569,7 +664,7 @@ Status MediapipeGraphDefinition::initializeNodes() {
             rerankServablesCleaningGuard.disableCleaning();
         }
         if (endsWith(config.node(i).calculator(), STT_NODE_CALCULATOR_NAME)) {
-            auto& sttServableMap = this->sidePacketMaps.sttServableMap;
+            auto& sttServableMap = this->sidePacketMaps->sttServableMap;
             ResourcesCleaningGuard<SttServableMap> sttServablesCleaningGuard(sttServableMap);
             if (!config.node(i).node_options().size()) {
                 SPDLOG_LOGGER_ERROR(modelmanager_logger, "SpeechToText node missing options in graph: {}. ", this->name);
@@ -595,7 +690,7 @@ Status MediapipeGraphDefinition::initializeNodes() {
             sttServablesCleaningGuard.disableCleaning();
         }
         if (endsWith(config.node(i).calculator(), TTS_NODE_CALCULATOR_NAME)) {
-            auto& ttsServableMap = this->sidePacketMaps.ttsServableMap;
+            auto& ttsServableMap = this->sidePacketMaps->ttsServableMap;
             ResourcesCleaningGuard<TtsServableMap> ttsServablesCleaningGuard(ttsServableMap);
             if (!config.node(i).node_options().size()) {
                 SPDLOG_LOGGER_ERROR(modelmanager_logger, "TextToSpeech node missing options in graph: {}. ", this->name);
diff --git a/src/mediapipe_internal/mediapipegraphdefinition.hpp b/src/mediapipe_internal/mediapipegraphdefinition.hpp
index 14c9e0679f..e94e89c802 100644
--- a/src/mediapipe_internal/mediapipegraphdefinition.hpp
+++ b/src/mediapipe_internal/mediapipegraphdefinition.hpp
@@ -40,14 +40,10 @@
 #pragma GCC diagnostic pop
 #pragma warning(pop)
 
+#include "graph_side_packets.hpp"
 #include "mediapipegraphconfig.hpp"
 #include "packettypes.hpp"
-
-#include "../sidepacket_servable.hpp"
-#include "../embeddings/embeddings_servable.hpp"
-#include "../rerank/rerank_servable.hpp"
-#include "../audio/speech_to_text/s2t_servable.hpp"
-#include "../audio/text_to_speech/t2s_servable.hpp"
+#include "graphqueue.hpp"
 
 namespace ovms {
 class MediapipeGraphDefinitionUnloadGuard;
@@ -58,44 +54,6 @@ class ModelManager;
 class MediapipeGraphExecutor;
 class Status;
 class PythonBackend;
-class PythonNodeResources;
-class GenAiServable;
-struct ImageGenerationPipelines;
-using PythonNodeResourcesMap = std::unordered_map<std::string, std::shared_ptr<PythonNodeResources>>;
-using GenAiServableMap = std::unordered_map<std::string, std::shared_ptr<GenAiServable>>;
-using RerankServableMap = std::unordered_map<std::string, std::shared_ptr<RerankServable>>;
-using SttServableMap = std::unordered_map<std::string, std::shared_ptr<SttServable>>;
-using TtsServableMap = std::unordered_map<std::string, std::shared_ptr<TtsServable>>;
-using EmbeddingsServableMap = std::unordered_map<std::string, std::shared_ptr<EmbeddingsServable>>;
-using ImageGenerationPipelinesMap = std::unordered_map<std::string, std::shared_ptr<ImageGenerationPipelines>>;
-
-struct GraphSidePackets {
-    PythonNodeResourcesMap pythonNodeResourcesMap;
-    GenAiServableMap genAiServableMap;
-    ImageGenerationPipelinesMap imageGenPipelinesMap;
-    EmbeddingsServableMap embeddingsServableMap;
-    RerankServableMap rerankServableMap;
-    SttServableMap sttServableMap;
-    TtsServableMap ttsServableMap;
-    void clear() {
-        pythonNodeResourcesMap.clear();
-        genAiServableMap.clear();
-        imageGenPipelinesMap.clear();
-        embeddingsServableMap.clear();
-        rerankServableMap.clear();
-        sttServableMap.clear();
-        ttsServableMap.clear();
-    }
-    bool empty() {
-        return (pythonNodeResourcesMap.empty() &&
-                genAiServableMap.empty() &&
-                imageGenPipelinesMap.empty() &&
-                embeddingsServableMap.empty() &&
-                rerankServableMap.empty() &&
-                sttServableMap.empty() &&
-                ttsServableMap.empty());
-    }
-};
 
 class MediapipeGraphDefinition {
     friend MediapipeGraphDefinitionUnloadGuard;
@@ -142,7 +100,7 @@ class MediapipeGraphDefinition {
     static constexpr model_version_t VERSION = 1;
 
 protected:
-    GraphSidePackets sidePacketMaps;
+    std::shared_ptr<GraphSidePackets> sidePacketMaps;
 
     struct ValidationResultNotifier {
         ValidationResultNotifier(PipelineDefinitionStatus& status, std::condition_variable& loadedNotify) :
@@ -165,10 +123,13 @@ class MediapipeGraphDefinition {
     };
 
     virtual Status validateForConfigFileExistence();
+    Status resolveGraphQueueSize();
     Status validateForConfigLoadableness();
 
     Status setStreamTypes();
     Status dryInitializeTest();
+    Status initializeQueueIfRequired();
+
     std::string chosenConfig;
     static MediapipeGraphConfig MGC;
     const std::string name;
@@ -179,7 +140,7 @@ class MediapipeGraphDefinition {
     PipelineDefinitionStatus status;
 
     MediapipeGraphConfig mgconfig;
-    ::mediapipe::CalculatorGraphConfig config;
+    ::mediapipe::CalculatorGraphConfig config;  // TODO rename configs
 
     Status createInputsInfo();
     Status createOutputsInfo();
@@ -209,6 +170,7 @@ class MediapipeGraphDefinition {
     PythonBackend* pythonBackend;
 
     std::unique_ptr<MediapipeServableMetricReporter> reporter;
+    std::shared_ptr<GraphQueue> queue;
 };
 
 class MediapipeGraphDefinitionUnloadGuard {
diff --git a/src/mediapipe_internal/mediapipegraphexecutor.cpp b/src/mediapipe_internal/mediapipegraphexecutor.cpp
index 93b53fdf8e..b821d1fef1 100644
--- a/src/mediapipe_internal/mediapipegraphexecutor.cpp
+++ b/src/mediapipe_internal/mediapipegraphexecutor.cpp
@@ -19,6 +19,8 @@
 #include <utility>
 #include <vector>
 
+#include "graph_executor_constants.hpp"
+
 #pragma warning(push)
 #pragma warning(disable : 4324 6001 6385 6386 6326 6011 4309 4005 4456 6246)
 #pragma GCC diagnostic push
@@ -28,10 +30,11 @@
 #pragma warning(pop)
 
 #if (PYTHON_DISABLE == 0)
-#include "../python/python_backend.hpp"
+#include "src/python/python_backend.hpp"
 #endif
 
-#include "../image_gen/pipelines.hpp"
+#include "src/image_gen/pipelines.hpp"
+#include "src/llm/servable.hpp"
 
 namespace ovms {
 
@@ -43,14 +46,10 @@ MediapipeGraphExecutor::MediapipeGraphExecutor(
     stream_types_mapping_t outputTypes,
     std::vector<std::string> inputNames,
     std::vector<std::string> outputNames,
-    const PythonNodeResourcesMap& pythonNodeResourcesMap,
-    const GenAiServableMap& llmNodeResourcesMap,
-    const EmbeddingsServableMap& embeddingsServableMap,
-    const RerankServableMap& rerankServableMap,
-    const SttServableMap& sttServableMap,
-    const TtsServableMap& ttsServableMap,
+    const GraphSidePackets& sidePacketMaps,
     PythonBackend* pythonBackend,
-    MediapipeServableMetricReporter* mediapipeServableMetricReporter) :
+    MediapipeServableMetricReporter* mediapipeServableMetricReporter,
+    GraphIdGuard&& guard) :
     name(name),
     version(version),
     config(config),
@@ -58,10 +57,11 @@ MediapipeGraphExecutor::MediapipeGraphExecutor(
     outputTypes(std::move(outputTypes)),
     inputNames(std::move(inputNames)),
     outputNames(std::move(outputNames)),
-    sidePacketMaps({pythonNodeResourcesMap, llmNodeResourcesMap, {}, embeddingsServableMap, rerankServableMap, sttServableMap, ttsServableMap}),
+    sidePacketMaps(sidePacketMaps),
     pythonBackend(pythonBackend),
-    currentStreamTimestamp(STARTING_TIMESTAMP),
-    mediapipeServableMetricReporter(mediapipeServableMetricReporter) {}
+    currentStreamTimestamp(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)),
+    mediapipeServableMetricReporter(mediapipeServableMetricReporter),
+    guard(std::move(guard)) {}
 MediapipeGraphExecutor::MediapipeGraphExecutor(
     const std::string& name,
     const std::string& version,
@@ -82,16 +82,35 @@ MediapipeGraphExecutor::MediapipeGraphExecutor(
     outputNames(std::move(outputNames)),
     sidePacketMaps(sidePacketMaps),
     pythonBackend(pythonBackend),
-    currentStreamTimestamp(STARTING_TIMESTAMP),
+    currentStreamTimestamp(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE)),
     mediapipeServableMetricReporter(mediapipeServableMetricReporter) {}
 
-const std::string MediapipeGraphExecutor::PYTHON_SESSION_SIDE_PACKET_TAG = "py";
-const std::string MediapipeGraphExecutor::LLM_SESSION_SIDE_PACKET_TAG = "llm";
-const std::string MediapipeGraphExecutor::IMAGE_GEN_SESSION_SIDE_PACKET_TAG = "pipes";
-const std::string MediapipeGraphExecutor::EMBEDDINGS_SESSION_SIDE_PACKET_TAG = "embeddings_servable";
-const std::string MediapipeGraphExecutor::RERANK_SESSION_SIDE_PACKET_TAG = "rerank_servable";
-const std::string MediapipeGraphExecutor::STT_SESSION_SIDE_PACKET_TAG = "s2t_servable";
-const std::string MediapipeGraphExecutor::TTS_SESSION_SIDE_PACKET_TAG = "t2s_servable";
-const ::mediapipe::Timestamp MediapipeGraphExecutor::STARTING_TIMESTAMP = ::mediapipe::Timestamp(0);
+Status MediapipeGraphExecutor::initializeLlmExecutionContexts(GenAiExecutionContextMap& executionContextMap) {
+    for (const auto& [nodeName, servable] : this->sidePacketMaps.genAiServableMap) {
+        auto it = executionContextMap.find(nodeName);
+        if (it == executionContextMap.end() || !it->second) {
+            SPDLOG_DEBUG("Missing LLM execution context holder for node: {}", nodeName);
+            return StatusCode::INTERNAL_ERROR;
+        }
+        auto& holder = it->second;
+        std::lock_guard<std::mutex> lock(holder->mutex);
+        holder->executionContext = servable->createExecutionContext();
+        if (!holder->executionContext) {
+            SPDLOG_DEBUG("Failed to create LLM execution context for node: {}", nodeName);
+            return StatusCode::INTERNAL_ERROR;
+        }
+    }
+    return StatusCode::OK;
+}
+
+void MediapipeGraphExecutor::resetLlmExecutionContexts(GenAiExecutionContextMap& executionContextMap) {
+    for (auto& [_, holder] : executionContextMap) {
+        if (!holder) {
+            continue;
+        }
+        std::lock_guard<std::mutex> lock(holder->mutex);
+        holder->executionContext.reset();
+    }
+}
 
 }  // namespace ovms
diff --git a/src/mediapipe_internal/mediapipegraphexecutor.hpp b/src/mediapipe_internal/mediapipegraphexecutor.hpp
index c165469395..57f8b659b9 100644
--- a/src/mediapipe_internal/mediapipegraphexecutor.hpp
+++ b/src/mediapipe_internal/mediapipegraphexecutor.hpp
@@ -16,6 +16,7 @@
 #pragma once
 #include <map>
 #include <memory>
+#include <optional>
 #include <set>
 #include <sstream>
 #include <string>
@@ -36,9 +37,11 @@
 #include "mediapipe/framework/port/status.h"
 #pragma GCC diagnostic pop
 #pragma warning(pop)
+#include "graph_executor_constants.hpp"
 #include "mediapipe_utils.hpp"
 #include "mediapipegraphdefinition.hpp"  // for version in response and PythonNodeResourceMap
 #include "packettypes.hpp"
+#include "graphqueue.hpp"
 
 namespace ovms {
 class PythonBackend;
@@ -71,9 +74,56 @@ inline StatusCode mediapipeAbslToOvmsStatus(absl::StatusCode code) {
     }                                                                    \
     _Pragma("warning(pop)")
 
+template <typename RequestType, typename ResponseType>
+struct MyFunctor : public OutputStreamObserverI {
+    const std::string& requestId;
+    MediapipeGraphExecutor& exec;
+    const std::string outputStreamName;
+    mediapipe_packet_type_enum packetType;
+    ResponseType& response;
+    MyFunctor(const std::string& outputStreamName, mediapipe_packet_type_enum packetType, MediapipeGraphExecutor& exec, const RequestType& request, ResponseType& response) :
+        requestId(getRequestId(request)),
+        exec(exec),
+        outputStreamName(outputStreamName),
+        packetType(packetType),
+        response(response) {
+    }
+    absl::Status handlePacket(const ::mediapipe::Packet& packet) override;
+    ~MyFunctor() = default;
+};
+
+template <typename ReaderWriterType>
+struct StreamingFunctor : public OutputStreamObserverI {
+    ReaderWriterType& serverReaderWriter;
+    std::mutex& sendMutex;
+    const std::string& executorName;
+    const std::string& executorVersion;
+    const std::string outputStreamName;
+    mediapipe_packet_type_enum packetType;
+    ExecutionContext executionContext;
+    MediapipeServableMetricReporter* metricReporter;
+    StreamingFunctor(const std::string& outputStreamName, mediapipe_packet_type_enum packetType,
+        const std::string& executorName, const std::string& executorVersion,
+        ReaderWriterType& serverReaderWriter, std::mutex& sendMutex,
+        ExecutionContext executionContext, MediapipeServableMetricReporter* metricReporter) :
+        serverReaderWriter(serverReaderWriter),
+        sendMutex(sendMutex),
+        executorName(executorName),
+        executorVersion(executorVersion),
+        outputStreamName(outputStreamName),
+        packetType(packetType),
+        executionContext(executionContext),
+        metricReporter(metricReporter) {
+    }
+    absl::Status handlePacket(const ::mediapipe::Packet& packet) override;
+    ~StreamingFunctor() = default;
+};
 class MediapipeGraphExecutor {
+public:
     const std::string name;
     const std::string version;
+
+private:
     const ::mediapipe::CalculatorGraphConfig config;
     stream_types_mapping_t inputTypes;
     stream_types_mapping_t outputTypes;
@@ -86,30 +136,22 @@ class MediapipeGraphExecutor {
     ::mediapipe::Timestamp currentStreamTimestamp;
 
     MediapipeServableMetricReporter* mediapipeServableMetricReporter;
+    std::optional<GraphIdGuard> guard;
 
 public:
-    static const std::string PYTHON_SESSION_SIDE_PACKET_TAG;
-    static const std::string LLM_SESSION_SIDE_PACKET_TAG;
-    static const std::string IMAGE_GEN_SESSION_SIDE_PACKET_TAG;
-    static const std::string EMBEDDINGS_SESSION_SIDE_PACKET_TAG;
-    static const std::string RERANK_SESSION_SIDE_PACKET_TAG;
-    static const std::string STT_SESSION_SIDE_PACKET_TAG;
-    static const std::string TTS_SESSION_SIDE_PACKET_TAG;
-    static const ::mediapipe::Timestamp STARTING_TIMESTAMP;
-
-    MediapipeGraphExecutor(const std::string& name, const std::string& version, const ::mediapipe::CalculatorGraphConfig& config,
+    MediapipeGraphExecutor(const std::string& name,
+        const std::string& version,
+        const ::mediapipe::CalculatorGraphConfig& config,
         stream_types_mapping_t inputTypes,
         stream_types_mapping_t outputTypes,
         std::vector<std::string> inputNames, std::vector<std::string> outputNames,
-        const PythonNodeResourcesMap& pythonNodeResourcesMap,
-        const GenAiServableMap& llmNodeResourcesMap,
-        const EmbeddingsServableMap& embeddingsServableMap,
-        const RerankServableMap& rerankServableMap,
-        const SttServableMap& sttServableMap,
-        const TtsServableMap& ttsServableMap,
+        const GraphSidePackets& sidePacketMaps,
         PythonBackend* pythonBackend,
-        MediapipeServableMetricReporter* mediapipeServableMetricReporter);
-    MediapipeGraphExecutor(const std::string& name, const std::string& version, const ::mediapipe::CalculatorGraphConfig& config,
+        MediapipeServableMetricReporter* mediapipeServableMetricReporter, GraphIdGuard&& guard);
+    // Constructor without graph queue (old path - graph created per-request)
+    MediapipeGraphExecutor(const std::string& name,
+        const std::string& version,
+        const ::mediapipe::CalculatorGraphConfig& config,
         stream_types_mapping_t inputTypes,
         stream_types_mapping_t outputTypes,
         std::vector<std::string> inputNames, std::vector<std::string> outputNames,
@@ -117,18 +159,82 @@ class MediapipeGraphExecutor {
         PythonBackend* pythonBackend,
         MediapipeServableMetricReporter* mediapipeServableMetricReporter);
 
+    Status initializeLlmExecutionContexts(GenAiExecutionContextMap& executionContextMap);
+
+    void resetLlmExecutionContexts(GenAiExecutionContextMap& executionContextMap);
+
     template <typename RequestType, typename ResponseType>
     Status infer(const RequestType* request, ResponseType* response, ExecutionContext executionContext) {
         OVMS_PROFILE_FUNCTION();
         SPDLOG_DEBUG("Start unary KServe request mediapipe graph: {} execution", this->name);
         MetricCounterGuard failedRequestsGuard(this->mediapipeServableMetricReporter->getRequestsMetric(executionContext, false));
         MetricGaugeGuard currentGraphsGuard(this->mediapipeServableMetricReporter->currentGraphs.get());
+        if (this->guard.has_value()) {
+            return inferWithQueue(request, response, executionContext, failedRequestsGuard);
+        } else {
+            return inferWithoutQueue(request, response, executionContext, failedRequestsGuard);
+        }
+    }
+
+    template <typename RequestType, typename ResponseType>
+    Status inferWithQueue(const RequestType* request, ResponseType* response, ExecutionContext executionContext, MetricCounterGuard& failedRequestsGuard) {
+        ::mediapipe::CalculatorGraph& graph = this->guard->graph;
+        auto llmContextStatus = initializeLlmExecutionContexts(this->guard->gh->genAiExecutionContextMap);
+        if (!llmContextStatus.ok()) {
+            return llmContextStatus;
+        }
+        for (auto& name : this->outputNames) {
+            if (name.empty()) {
+                SPDLOG_DEBUG("Creating Mediapipe graph outputs name failed for: {}", name);
+                return StatusCode::MEDIAPIPE_GRAPH_ADD_OUTPUT_STREAM_ERROR;
+            }
+            guard->gh->outStreamObservers.at(name)->current = std::make_shared<MyFunctor<RequestType, ResponseType>>(name, this->outputTypes.at(name), *this, *request, *response);
+        }
+
+        size_t numberOfPacketsCreated = 0;
+        auto ovms_status = createAndPushPacketsImpl(
+            std::shared_ptr<const RequestType>(request, [](const RequestType*) {}),
+            this->inputTypes,
+            this->pythonBackend,
+            graph,
+            this->guard->gh->currentTimestamp,
+            numberOfPacketsCreated);
+        if (!ovms_status.ok()) {
+            INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext));
+            return ovms_status;
+        }
+
+        if (this->inputNames.size() > numberOfPacketsCreated) {
+            SPDLOG_DEBUG("Not all input packets created. Expected: {}, Actual: {}. Aborting execution of mediapipe graph: {}",
+                this->inputNames.size(), numberOfPacketsCreated, this->name);
+            return Status(StatusCode::INVALID_NO_OF_INPUTS, "Not all input packets created");
+        }
+
+        failedRequestsGuard.disable();
+        INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getRequestsMetric(executionContext, true));
+
+        auto status = graph.WaitUntilIdle();
+        if (!status.ok()) {
+            INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext));
+        }
+        resetLlmExecutionContexts(this->guard->gh->genAiExecutionContextMap);
+        MP_RETURN_ON_FAIL(status, "graph wait until idle", mediapipeAbslToOvmsStatus(status.code()));
+        // Increment timestamp for next request reusing this graph from the queue
+        this->guard->gh->currentTimestamp = ::mediapipe::Timestamp(this->guard->gh->currentTimestamp.Value() + 1);
+        SPDLOG_DEBUG("Received all output stream packets for graph: {}", this->name);
+        return StatusCode::OK;
+    }
+
+    template <typename RequestType, typename ResponseType>
+    Status inferWithoutQueue(const RequestType* request, ResponseType* response, ExecutionContext executionContext, MetricCounterGuard& failedRequestsGuard) {
         ::mediapipe::CalculatorGraph graph;
         MP_RETURN_ON_FAIL(graph.Initialize(this->config), std::string("failed initialization of MediaPipe graph: ") + this->name, StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR);
-        enum : unsigned int {
-            PROCESS,
-            TIMER_END2
-        };
+        auto llmContextStatus = initializeLlmExecutionContexts(this->sidePacketMaps.genAiExecutionContextMap);
+        if (!llmContextStatus.ok()) {
+            return llmContextStatus;
+        }
+        enum : unsigned int { PROCESS,
+            TIMER_END2 };
         Timer<TIMER_END2> timer;
         timer.start(PROCESS);
         std::unordered_map<std::string, ::mediapipe::OutputStreamPoller> outputPollers;
@@ -148,15 +254,15 @@ class MediapipeGraphExecutor {
         std::map<std::string, mediapipe::Packet> inputSidePackets;
         OVMS_RETURN_ON_FAIL(deserializeInputSidePacketsFromFirstRequestImpl(inputSidePackets, *request));
 #if (PYTHON_DISABLE == 0)
-        inputSidePackets[PYTHON_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<PythonNodeResourcesMap>(this->sidePacketMaps.pythonNodeResourcesMap).At(STARTING_TIMESTAMP);
+        inputSidePackets[PYTHON_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<PythonNodeResourcesMap>(this->sidePacketMaps.pythonNodeResourcesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
 #endif
-        inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<GenAiServableMap>(this->sidePacketMaps.genAiServableMap).At(STARTING_TIMESTAMP);
-        inputSidePackets[IMAGE_GEN_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<ImageGenerationPipelinesMap>(this->sidePacketMaps.imageGenPipelinesMap).At(STARTING_TIMESTAMP);
-        inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<EmbeddingsServableMap>(this->sidePacketMaps.embeddingsServableMap).At(STARTING_TIMESTAMP);
-
-        inputSidePackets[RERANK_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<RerankServableMap>(this->sidePacketMaps.rerankServableMap).At(STARTING_TIMESTAMP);
-        inputSidePackets[STT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<SttServableMap>(this->sidePacketMaps.sttServableMap).At(STARTING_TIMESTAMP);
-        inputSidePackets[TTS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<TtsServableMap>(this->sidePacketMaps.ttsServableMap).At(STARTING_TIMESTAMP);
+        inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<GenAiServableMap>(this->sidePacketMaps.genAiServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+        inputSidePackets[LLM_EXECUTION_CONTEXT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<GenAiExecutionContextMap>(this->sidePacketMaps.genAiExecutionContextMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+        inputSidePackets[IMAGE_GEN_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<ImageGenerationPipelinesMap>(this->sidePacketMaps.imageGenPipelinesMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+        inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<EmbeddingsServableMap>(this->sidePacketMaps.embeddingsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+        inputSidePackets[RERANK_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<RerankServableMap>(this->sidePacketMaps.rerankServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+        inputSidePackets[STT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<SttServableMap>(this->sidePacketMaps.sttServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+        inputSidePackets[TTS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<TtsServableMap>(this->sidePacketMaps.ttsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
 
         MP_RETURN_ON_FAIL(graph.StartRun(inputSidePackets), std::string("start MediaPipe graph: ") + this->name, StatusCode::MEDIAPIPE_GRAPH_START_ERROR);
 
@@ -165,11 +271,7 @@ class MediapipeGraphExecutor {
 
         size_t numberOfPacketsCreated = 0;
         auto ovms_status = createAndPushPacketsImpl(
-            std::shared_ptr<const RequestType>(request,
-                // Custom deleter to avoid deallocation by custom holder
-                // Conversion to shared_ptr is required for unified deserialization method
-                // for first and subsequent requests
-                [](const RequestType*) {}),
+            std::shared_ptr<const RequestType>(request, [](const RequestType*) {}),
             this->inputTypes,
             this->pythonBackend,
             graph,
@@ -180,25 +282,20 @@ class MediapipeGraphExecutor {
             return ovms_status;
         }
 
-        // This differs from inferStream - we require user to feed all streams
         if (this->inputNames.size() > numberOfPacketsCreated) {
             SPDLOG_DEBUG("Not all input packets created. Expected: {}, Actual: {}. Aborting execution of mediapipe graph: {}",
-                this->inputNames.size(),
-                numberOfPacketsCreated,
-                this->name);
+                this->inputNames.size(), numberOfPacketsCreated, this->name);
             return Status(StatusCode::INVALID_NO_OF_INPUTS, "Not all input packets created");
         }
 
         failedRequestsGuard.disable();
         INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getRequestsMetric(executionContext, true));
 
-        // we wait idle since some calculators could hold ownership on packet content while nodes further down the graph
-        // can be still processing those. Closing packet sources triggers Calculator::Close() on nodes that do not expect
-        // new packets
         auto status = graph.WaitUntilIdle();
-        if (!status.ok()) {  // Collect error metric after Open()
+        if (!status.ok()) {
             INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext));
         }
+        resetLlmExecutionContexts(this->sidePacketMaps.genAiExecutionContextMap);
         MP_RETURN_ON_FAIL(status, "graph wait until idle", mediapipeAbslToOvmsStatus(status.code()));
 
         MP_RETURN_ON_FAIL(graph.CloseAllPacketSources(), "graph close all packet sources", StatusCode::MEDIAPIPE_GRAPH_CLOSE_INPUT_STREAM_ERROR);
@@ -226,7 +323,7 @@ class MediapipeGraphExecutor {
             SPDLOG_TRACE("Received all: {} packets for: {}", receivedOutputs, outputStreamName);
         }
         status = graph.WaitUntilDone();
-        if (!status.ok()) {  // Collect error metric after Process()
+        if (!status.ok()) {
             INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext));
         }
         MP_RETURN_ON_FAIL(status, "graph wait until done", mediapipeAbslToOvmsStatus(status.code()));
@@ -245,6 +342,131 @@ class MediapipeGraphExecutor {
     template <typename RequestType, typename ReaderWriterType>
     Status inferStream(const RequestType& req, ReaderWriterType& serverReaderWriter, ExecutionContext executionContext) {
         OVMS_PROFILE_FUNCTION();
+        if (this->guard.has_value()) {
+            return inferStreamWithQueue(req, serverReaderWriter, executionContext);
+        } else {
+            return inferStreamWithoutQueue(req, serverReaderWriter, executionContext);
+        }
+    }
+
+    template <typename RequestType, typename ReaderWriterType>
+    Status inferStreamWithQueue(const RequestType& req, ReaderWriterType& serverReaderWriter, ExecutionContext executionContext) {
+        SPDLOG_DEBUG("Start streaming mediapipe graph: {} execution (queue path)", this->name);
+        std::mutex sendMutex;
+        try {
+            // Graph queue does not support user-provided input side packets.
+            // Side packets are set at queue construction time.
+            if (requestHasInputSidePackets(req)) {
+                SPDLOG_DEBUG("Graph queue does not support user-provided input side packets. "
+                             "Side packets are set at graph queue construction time. Graph: {}",
+                    this->name);
+                return Status(StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR,
+                    "Input side packets are not supported for graphs with queue enabled");
+            }
+            MetricGaugeGuard currentGraphs(this->mediapipeServableMetricReporter->currentGraphs.get());
+            ::mediapipe::CalculatorGraph& graph = this->guard->graph;
+            auto llmContextStatus = initializeLlmExecutionContexts(this->guard->gh->genAiExecutionContextMap);
+            if (!llmContextStatus.ok()) {
+                return llmContextStatus;
+            }
+
+            enum : unsigned int {
+                PROCESS,
+                TIMER_END2
+            };
+            Timer<TIMER_END2> timer;
+            timer.start(PROCESS);
+
+            // Swap output stream observers to streaming functors.
+            // Observers are already installed on the graph at queue construction time;
+            // we only replace the functor implementation to serialize+send to the client.
+            // Lifetime: sendMutex and serverReaderWriter are stack-local in this method
+            // and outlive all callbacks because we WaitUntilIdle() before returning.
+            for (const auto& outputName : this->outputNames) {
+                if (outputName.empty()) {
+                    SPDLOG_DEBUG("Creating Mediapipe graph outputs name failed for: {}", outputName);
+                    return StatusCode::MEDIAPIPE_GRAPH_ADD_OUTPUT_STREAM_ERROR;
+                }
+                guard->gh->outStreamObservers.at(outputName)->current = std::make_shared<StreamingFunctor<ReaderWriterType>>(
+                    outputName, this->outputTypes.at(outputName),
+                    this->name, this->version,
+                    serverReaderWriter, sendMutex,
+                    executionContext, this->mediapipeServableMetricReporter);
+            }
+
+            size_t numberOfPacketsCreated = 0;
+            {
+                OVMS_PROFILE_SCOPE("Mediapipe graph deserializing first request");
+                bool isSuccess = true;
+                OVMS_WRITE_ERROR_ON_FAIL_AND_CONTINUE(
+                    createAndPushPacketsImpl(
+                        std::shared_ptr<const RequestType>(&req,
+                            [](const RequestType*) {}),
+                        this->inputTypes,
+                        this->pythonBackend,
+                        graph,
+                        this->guard->gh->currentTimestamp,
+                        numberOfPacketsCreated),
+                    "partial deserialization of first request", isSuccess);
+                INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getRequestsMetric(executionContext, isSuccess));
+            }
+
+            // Read loop
+            auto newReq = std::make_shared<RequestType>();
+            while (waitForNewRequest(serverReaderWriter, *newReq)) {
+                auto pstatus = validateSubsequentRequestImpl(
+                    *newReq,
+                    this->name,
+                    this->version,
+                    this->inputTypes);
+                bool isSuccess = true;
+                if (pstatus.ok()) {
+                    OVMS_WRITE_ERROR_ON_FAIL_AND_CONTINUE(
+                        createAndPushPacketsImpl(
+                            newReq,
+                            this->inputTypes,
+                            this->pythonBackend,
+                            graph,
+                            this->guard->gh->currentTimestamp,
+                            numberOfPacketsCreated),
+                        "partial deserialization of subsequent requests", isSuccess);
+                } else {
+                    OVMS_WRITE_ERROR_ON_FAIL_AND_CONTINUE(std::move(pstatus), "validate subsequent requests", isSuccess);
+                }
+                INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getRequestsMetric(executionContext, isSuccess));
+
+                if (graph.HasError()) {
+                    INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext));
+                    SPDLOG_DEBUG("Graph {}: encountered an error, stopping the execution", this->name);
+                    break;
+                }
+
+                newReq = std::make_shared<RequestType>();
+            }
+
+            // Do NOT CloseAllPacketSources or WaitUntilDone - graph stays alive for reuse
+            auto status = graph.WaitUntilIdle();
+            if (!status.ok()) {
+                INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext));
+            }
+            resetLlmExecutionContexts(this->guard->gh->genAiExecutionContextMap);
+            MP_RETURN_ON_FAIL(status, "graph wait until idle", mediapipeAbslToOvmsStatus(status.code()));
+            // Increment timestamp for next request reusing this graph from the queue
+            this->guard->gh->currentTimestamp = ::mediapipe::Timestamp(this->guard->gh->currentTimestamp.Value() + 1);
+            SPDLOG_DEBUG("Graph {}: Done streaming execution (queue path)", this->name);
+
+            timer.stop(PROCESS);
+            double processTime = timer.template elapsed<std::chrono::microseconds>(PROCESS);
+            OBSERVE_IF_ENABLED(this->mediapipeServableMetricReporter->getProcessingTimeMetric(executionContext), processTime);
+            return StatusCode::OK;
+        } catch (...) {
+            SPDLOG_DEBUG("Graph {}: Exception while processing MediaPipe graph (queue path)", this->name);
+            return Status(StatusCode::UNKNOWN_ERROR, "Exception while processing MediaPipe graph");
+        }
+    }
+
+    template <typename RequestType, typename ReaderWriterType>
+    Status inferStreamWithoutQueue(const RequestType& req, ReaderWriterType& serverReaderWriter, ExecutionContext executionContext) {
         SPDLOG_DEBUG("Start MediapipeGraphExecutor::inferEx mediapipe graph: {} execution", this->name);
         std::mutex sendMutex;
         try {
@@ -255,6 +477,10 @@ class MediapipeGraphExecutor {
                 // Init
                 MP_RETURN_ON_FAIL(graph.Initialize(this->config), "graph initialization", StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR);
             }
+            auto llmContextStatus = initializeLlmExecutionContexts(this->sidePacketMaps.genAiExecutionContextMap);
+            if (!llmContextStatus.ok()) {
+                return llmContextStatus;
+            }
             enum : unsigned int {
                 PROCESS,
                 TIMER_END2
@@ -299,10 +525,11 @@ class MediapipeGraphExecutor {
                 OVMS_RETURN_ON_FAIL(deserializeInputSidePacketsFromFirstRequestImpl(inputSidePackets, req));
 #if (PYTHON_DISABLE == 0)
                 inputSidePackets[PYTHON_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<PythonNodeResourcesMap>(this->sidePacketMaps.pythonNodeResourcesMap)
-                                                                       .At(STARTING_TIMESTAMP);
+                                                                       .At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
 #endif
-                inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<GenAiServableMap>(this->sidePacketMaps.genAiServableMap).At(STARTING_TIMESTAMP);
-                inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<EmbeddingsServableMap>(this->sidePacketMaps.embeddingsServableMap).At(STARTING_TIMESTAMP);
+                inputSidePackets[LLM_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<GenAiServableMap>(this->sidePacketMaps.genAiServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+                inputSidePackets[LLM_EXECUTION_CONTEXT_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<GenAiExecutionContextMap>(this->sidePacketMaps.genAiExecutionContextMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
+                inputSidePackets[EMBEDDINGS_SESSION_SIDE_PACKET_TAG] = mediapipe::MakePacket<EmbeddingsServableMap>(this->sidePacketMaps.embeddingsServableMap).At(::mediapipe::Timestamp(STARTING_TIMESTAMP_VALUE));
                 // Add image generation side packet in case image generation allow for streaming
             }
 
@@ -380,6 +607,7 @@ class MediapipeGraphExecutor {
                 if (!status.ok()) {
                     INCREMENT_IF_ENABLED(this->mediapipeServableMetricReporter->getGraphErrorMetric(executionContext));
                 }
+                resetLlmExecutionContexts(this->sidePacketMaps.genAiExecutionContextMap);
                 MP_RETURN_ON_FAIL(status, "graph wait until done", mediapipeAbslToOvmsStatus(status.code()));
                 SPDLOG_DEBUG("Graph {}: Done execution", this->name);
             }
@@ -394,4 +622,44 @@ class MediapipeGraphExecutor {
     }
 };
 
+template <typename RequestType, typename ResponseType>
+absl::Status MyFunctor<RequestType, ResponseType>::handlePacket(const ::mediapipe::Packet& packet) {
+    auto status = onPacketReadySerializeImpl(
+        this->requestId,
+        this->exec.name,
+        this->exec.version,
+        this->outputStreamName,
+        this->packetType,
+        packet,
+        response);
+    return status.ok() ? absl::OkStatus() : absl::Status(absl::StatusCode::kInternal, "Some error");
+}
+
+template <typename ReaderWriterType>
+absl::Status StreamingFunctor<ReaderWriterType>::handlePacket(const ::mediapipe::Packet& packet) {
+    OVMS_PROFILE_SCOPE("Mediapipe Packet Ready Callback");
+    try {
+        std::lock_guard<std::mutex> lock(sendMutex);
+        auto status = onPacketReadySerializeAndSendImpl(
+            "" /*no ids for streaming*/,
+            executorName,
+            executorVersion,
+            outputStreamName,
+            packetType,
+            packet,
+            serverReaderWriter);
+        if (!status.ok()) {
+            SPDLOG_DEBUG("error in send packet routine {}", status.string());
+            return absl::Status(absl::StatusCode::kInternal, "error in send packet routine");
+        }
+        auto now = std::chrono::system_clock::now();
+        auto currentTimestamp = ::mediapipe::Timestamp(std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count());
+        OBSERVE_IF_ENABLED(metricReporter->getRequestLatencyMetric(executionContext), (currentTimestamp - packet.Timestamp()).Microseconds());
+        INCREMENT_IF_ENABLED(metricReporter->getResponsesMetric(executionContext));
+        return absl::OkStatus();
+    } catch (...) {
+        SPDLOG_DEBUG("Error occurred during packet serialization in mediapipe graph: {}", executorName);
+        return absl::Status(absl::StatusCode::kCancelled, "error in serialization");
+    }
+}
 }  // namespace ovms
diff --git a/src/mediapipe_internal/outputstreamobserver.hpp b/src/mediapipe_internal/outputstreamobserver.hpp
new file mode 100644
index 0000000000..5c267e4187
--- /dev/null
+++ b/src/mediapipe_internal/outputstreamobserver.hpp
@@ -0,0 +1,64 @@
+//*****************************************************************************
+// Copyright 2025 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#pragma once
+#include <map>
+#include <memory>
+#include <set>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "../execution_context.hpp"
+#include "../model_metric_reporter.hpp"
+#include "../profiler.hpp"
+#include "../status.hpp"
+#include "../timer.hpp"
+#pragma warning(push)
+#pragma warning(disable : 4324 6001 6385 6386 6326 6011 4309 4005 4456 6246)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#include "mediapipe/framework/calculator_graph.h"
+#include "mediapipe/framework/port/status.h"
+#pragma GCC diagnostic pop
+#pragma warning(pop)
+#include "mediapipe_utils.hpp"
+#include "packettypes.hpp"
+#include "graphqueue.hpp"
+
+namespace ovms {
+class PythonBackend;
+class ServableMetricReporter;
+class OutputStreamObserverI {
+public:
+    virtual absl::Status handlePacket(const ::mediapipe::Packet& packet) = 0;
+    virtual ~OutputStreamObserverI() = default;
+};
+class NullOutputStreamObserver : public OutputStreamObserverI {
+public:
+    NullOutputStreamObserver() = default;
+    absl::Status handlePacket(const ::mediapipe::Packet& packet) override {
+        SPDLOG_ERROR("NullOutputStreamObserver::handlePacket called - graph observer was not replaced before execution");
+        throw std::runtime_error("NullOutputStreamObserver should have been replaced before graph execution");
+    }
+};
+// Mutable holder for an observer, allowing the observer implementation to be
+// swapped while the map that owns this holder remains const.
+struct ObserverHolder {
+    std::shared_ptr<OutputStreamObserverI> current;
+};
+}  // namespace ovms
diff --git a/src/python/BUILD b/src/python/BUILD
index f4fd4c571e..539abaf355 100644
--- a/src/python/BUILD
+++ b/src/python/BUILD
@@ -75,7 +75,7 @@ ovms_cc_library(
         "pythonexecutorcalculator_cc_proto",
         "utils",
     ],
-    visibility = ["//visibility:private"],
+    visibility = ["//visibility:public"], # TODO FIXME?
     alwayslink = 1,
     data = ["//src/python/binding:pyovms.so"],
 )
diff --git a/src/systeminfo.cpp b/src/systeminfo.cpp
index 16531df9b6..ff0a6461fe 100644
--- a/src/systeminfo.cpp
+++ b/src/systeminfo.cpp
@@ -15,16 +15,12 @@
 //*****************************************************************************
 #include "systeminfo.hpp"
 
-#include <fstream>
-#include <sstream>
-#include <string>
+#include <cstdint>
 #include <thread>
 
-#include "logging.hpp"
-#include "status.hpp"
-
 namespace ovms {
 uint16_t getCoreCount() {
-    return std::thread::hardware_concurrency();
+    auto cores = std::thread::hardware_concurrency();
+    return cores == 0 ? 1 : static_cast<uint16_t>(cores);
 }
 }  // namespace ovms
diff --git a/src/systeminfo.hpp b/src/systeminfo.hpp
index 2dc66fffe9..87d91d018f 100644
--- a/src/systeminfo.hpp
+++ b/src/systeminfo.hpp
@@ -22,4 +22,5 @@ namespace ovms {
  * @return uint16_t Available number of cores in the system
  */
 uint16_t getCoreCount();
+
 }  // namespace ovms
diff --git a/src/test/ensemble_config_change_stress.cpp b/src/test/ensemble_config_change_stress.cpp
index 7fa5a70d31..6ebaeb0e18 100644
--- a/src/test/ensemble_config_change_stress.cpp
+++ b/src/test/ensemble_config_change_stress.cpp
@@ -813,7 +813,8 @@ TEST_F(StressMediapipeChanges, ReloadMediapipeGraphDuringMetadataLoad) {
     SetUpConfig(basicMediapipeConfig);
     bool performWholeConfigReload = true;
     std::set<StatusCode> requiredLoadResults = {StatusCode::OK};  // we expect full continuity of operation
-    std::set<StatusCode> allowedLoadResults = {};
+    // Graph path change triggers real reload, briefly entering NOT_LOADED_YET state
+    std::set<StatusCode> allowedLoadResults = {StatusCode::MEDIAPIPE_DEFINITION_NOT_LOADED_YET};
     performStressTest(
         &ConfigChangeStressTest::triggerKFSGetPipelineMetadataInALoop,
         &ConfigChangeStressTest::reloadMediapipeGraph,
@@ -821,4 +822,90 @@ TEST_F(StressMediapipeChanges, ReloadMediapipeGraphDuringMetadataLoad) {
         requiredLoadResults,
         allowedLoadResults);
 }
+
+class StressMediapipeQueueChanges : public StressPipelineConfigChanges {
+    const std::string modelName = PIPELINE_1_DUMMY_NAME;
+    const std::string modelInputName = "b";
+    const std::string modelOutputName = "a";
+
+public:
+    std::string getServableName() override {
+        return modelName;
+    }
+    void SetUp() override {
+        SetUpCAPIServerInstance(createStressTestPipelineOneDummyConfig());
+    }
+};
+TEST_F(StressMediapipeQueueChanges, AddGraphDuringPredictLoad) {
+    // we add another graph definition during load (queue-enabled graph)
+    SetUpConfig(basicMediapipeQueueConfig);
+    bool performWholeConfigReload = true;
+    std::set<StatusCode> requiredLoadResults = {StatusCode::OK};  // we expect full continuity of operation
+    std::set<StatusCode> allowedLoadResults = {};
+    performStressTest(
+        &ConfigChangeStressTest::triggerPredictInALoop<KFSRequest, KFSResponse, ovms::MediapipeGraphExecutor>,
+        &ConfigChangeStressTest::addNewMediapipeQueueGraph,
+        performWholeConfigReload,
+        requiredLoadResults,
+        allowedLoadResults);
+}
+TEST_F(StressMediapipeQueueChanges, RemoveGraphDuringPredictLoad) {
+    SetUpConfig(basicMediapipeQueueConfig);
+    bool performWholeConfigReload = true;
+    std::set<StatusCode> requiredLoadResults = {StatusCode::OK,
+        StatusCode::MEDIAPIPE_DEFINITION_NOT_LOADED_ANYMORE};
+    std::set<StatusCode> allowedLoadResults = {};
+    performStressTest(
+        &ConfigChangeStressTest::triggerPredictInALoop<KFSRequest, KFSResponse, ovms::MediapipeGraphExecutor>,
+        &ConfigChangeStressTest::removeMediapipeQueueGraph,
+        performWholeConfigReload,
+        requiredLoadResults,
+        allowedLoadResults);
+}
+TEST_F(StressMediapipeQueueChanges, RemoveModelDuringPredictLoad) {
+    SetUpConfig(basicMediapipeQueueConfig);
+    bool performWholeConfigReload = true;
+    // With queue path, pre-initialized graphs may keep working with cached sessions
+    // even after model removal, so MEDIAPIPE_PRECONDITION_FAILED may not occur
+    std::set<StatusCode> requiredLoadResults = {
+        StatusCode::OK,
+    };
+    std::set<StatusCode> allowedLoadResults = {
+        StatusCode::MEDIAPIPE_EXECUTION_ERROR,
+        StatusCode::MEDIAPIPE_GRAPH_ADD_PACKET_INPUT_STREAM,
+        StatusCode::MEDIAPIPE_PRECONDITION_FAILED,
+    };
+    performStressTest(
+        &ConfigChangeStressTest::triggerPredictInALoop<KFSRequest, KFSResponse, ovms::MediapipeGraphExecutor>,
+        &ConfigChangeStressTest::removeMediapipeQueueGraphUsedModel,
+        performWholeConfigReload,
+        requiredLoadResults,
+        allowedLoadResults);
+}
+TEST_F(StressMediapipeQueueChanges, ReloadModelDuringPredictLoad) {
+    SetUpConfig(basicMediapipeQueueConfig);
+    bool performWholeConfigReload = true;
+    std::set<StatusCode> requiredLoadResults = {StatusCode::OK};
+    std::set<StatusCode> allowedLoadResults = {};
+    performStressTest(
+        &ConfigChangeStressTest::triggerPredictInALoop<KFSRequest, KFSResponse, ovms::MediapipeGraphExecutor>,
+        &ConfigChangeStressTest::reloadMediapipeQueueGraphUsedModel,
+        performWholeConfigReload,
+        requiredLoadResults,
+        allowedLoadResults);
+}
+TEST_F(StressMediapipeQueueChanges, ReloadMediapipeGraphDuringPredictLoad) {
+    SetUpConfig(basicMediapipeQueueConfig);
+    bool performWholeConfigReload = true;
+    std::set<StatusCode> requiredLoadResults = {StatusCode::OK};
+    std::set<StatusCode> allowedLoadResults = {};
+    performStressTest(
+        &ConfigChangeStressTest::triggerPredictInALoop<KFSRequest, KFSResponse, ovms::MediapipeGraphExecutor>,
+        &ConfigChangeStressTest::reloadMediapipeQueueGraph,
+        performWholeConfigReload,
+        requiredLoadResults,
+        allowedLoadResults);
+}
+// Status and metadata tests are not duplicated for queue fixture because
+// neither status nor metadata operations exercise the graph queue path.
 #endif
diff --git a/src/test/graph_export_test.cpp b/src/test/graph_export_test.cpp
index 777792e7d3..26a1684596 100644
--- a/src/test/graph_export_test.cpp
+++ b/src/test/graph_export_test.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //*****************************************************************************
 #include <memory>
+#include <sstream>
 #include <string>
 
 #include <gmock/gmock.h>
@@ -504,10 +505,65 @@ class GraphCreationTest : public TestWithTempDir {
         TestWithTempDir::TearDown();
     }
 
-    // Removes # OpenVINO Model Server REPLACE_PROJECT_VERSION comment added for debug purpose in graph export at the begging of graph.pbtxt
-    // This string differs per build and setup
-    std::string removeVersionString(std::string input) {
-        return input.erase(0, input.find("\n") + 1);
+    std::string getExpectedGraphQueueSizeDirective(const ovms::HFSettingsImpl& hfSettings) const {
+        if (hfSettings.task == ovms::IMAGE_GENERATION_GRAPH) {
+            return "1";
+        }
+        return "AUTO";
+    }
+
+    std::string createGraphAndReadContents(const ovms::HFSettingsImpl& hfSettings) {
+        std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
+        std::unique_ptr<ovms::GraphExport> graphExporter = std::make_unique<ovms::GraphExport>();
+        auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
+        if (status != ovms::StatusCode::OK) {
+            ADD_FAILURE() << status.string();
+            return "";
+        }
+        return GetFileContents(graphPath);
+    }
+
+    void assertGraphQueueHeader(const std::string& graphContents, const ovms::HFSettingsImpl& hfSettings) {
+        const std::string queueLinePrefix = "# OVMS_GRAPH_QUEUE_SIZE: ";
+        auto firstLineEnd = graphContents.find("\n");
+        ASSERT_NE(firstLineEnd, std::string::npos) << graphContents;
+        auto queueLineStart = firstLineEnd + 1;
+        auto queueLineEnd = graphContents.find("\n", queueLineStart);
+        ASSERT_NE(queueLineEnd, std::string::npos) << graphContents;
+
+        std::string actualQueueLine = graphContents.substr(queueLineStart, queueLineEnd - queueLineStart);
+        ASSERT_EQ(0, actualQueueLine.rfind(queueLinePrefix, 0)) << graphContents;
+        std::string expectedQueueLine = queueLinePrefix + getExpectedGraphQueueSizeDirective(hfSettings);
+        ASSERT_EQ(expectedQueueLine, actualQueueLine) << graphContents;
+    }
+
+    void assertCreatedGraphEquals(const ovms::HFSettingsImpl& hfSettings, const std::string& expectedGraphContents, bool assertVersion = false) {
+        std::string graphContents = createGraphAndReadContents(hfSettings);
+        if (assertVersion) {
+            ASSERT_EQ(0, graphContents.find(getVersionString())) << graphContents;
+        }
+        assertGraphQueueHeader(graphContents, hfSettings);
+        ASSERT_EQ(expectedGraphContents, removeGeneratedGraphHeaders(graphContents)) << graphContents;
+    }
+
+    // Removes generated graph header lines (version and optional queue size directive)
+    // which differ across build/runtime setup.
+    std::string removeGeneratedGraphHeaders(std::string input) {
+        auto firstLineEnd = input.find("\n");
+        if (firstLineEnd == std::string::npos) {
+            return "";
+        }
+        input.erase(0, firstLineEnd + 1);
+
+        const std::string queueLinePrefix = "# OVMS_GRAPH_QUEUE_SIZE:";
+        if (input.rfind(queueLinePrefix, 0) == 0) {
+            auto secondLineEnd = input.find("\n");
+            if (secondLineEnd == std::string::npos) {
+                return "";
+            }
+            input.erase(0, secondLineEnd + 1);
+        }
+        return input;
     }
 
     std::string getVersionString() {
@@ -519,14 +575,7 @@ class GraphCreationTest : public TestWithTempDir {
 
 TEST_F(GraphCreationTest, positiveDefaultWithVersionString) {
     ovms::HFSettingsImpl hfSettings;
-    std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
-    std::unique_ptr<ovms::GraphExport> graphExporter = std::make_unique<ovms::GraphExport>();
-    auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
-    ASSERT_EQ(status, ovms::StatusCode::OK);
-
-    std::string graphContents = GetFileContents(graphPath);
-    std::string expected = getVersionString() + expectedDefaultGraphContents;
-    ASSERT_EQ(expected, graphContents) << graphContents;
+    assertCreatedGraphEquals(hfSettings, expectedDefaultGraphContents, true);
 }
 
 TEST_F(GraphCreationTest, positiveRerankWithVersionString) {
@@ -535,14 +584,7 @@ TEST_F(GraphCreationTest, positiveRerankWithVersionString) {
     hfSettings.task = ovms::RERANK_GRAPH;
     ovms::RerankGraphSettingsImpl rerankGraphSettings;
     hfSettings.graphSettings = std::move(rerankGraphSettings);
-    std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
-    std::unique_ptr<ovms::GraphExport> graphExporter = std::make_unique<ovms::GraphExport>();
-    auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
-    ASSERT_EQ(status, ovms::StatusCode::OK);
-
-    std::string graphContents = GetFileContents(graphPath);
-    std::string expected = getVersionString() + expectedRerankGraphContentsDefault;
-    ASSERT_EQ(expected, graphContents) << graphContents;
+    assertCreatedGraphEquals(hfSettings, expectedRerankGraphContentsDefault, true);
 }
 
 TEST_F(GraphCreationTest, positiveEmbeddingsWithVersionString) {
@@ -551,14 +593,7 @@ TEST_F(GraphCreationTest, positiveEmbeddingsWithVersionString) {
     hfSettings.task = ovms::EMBEDDINGS_GRAPH;
     ovms::EmbeddingsGraphSettingsImpl embeddingsGraphSettings;
     hfSettings.graphSettings = std::move(embeddingsGraphSettings);
-    std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
-    std::unique_ptr<ovms::GraphExport> graphExporter = std::make_unique<ovms::GraphExport>();
-    auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
-    ASSERT_EQ(status, ovms::StatusCode::OK);
-
-    std::string graphContents = GetFileContents(graphPath);
-    std::string expected = getVersionString() + expectedEmbeddingsGraphContentsDefault;
-    ASSERT_EQ(expected, graphContents) << graphContents;
+    assertCreatedGraphEquals(hfSettings, expectedEmbeddingsGraphContentsDefault, true);
 }
 
 TEST_F(GraphCreationTest, positiveTextToSpeechWithVersionString) {
@@ -566,14 +601,7 @@ TEST_F(GraphCreationTest, positiveTextToSpeechWithVersionString) {
     hfSettings.task = ovms::TEXT_TO_SPEECH_GRAPH;
     ovms::TextToSpeechGraphSettingsImpl textToSpeechGraphSettings;
     hfSettings.graphSettings = std::move(textToSpeechGraphSettings);
-    std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
-    std::unique_ptr<ovms::GraphExport> graphExporter = std::make_unique<ovms::GraphExport>();
-    auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
-    ASSERT_EQ(status, ovms::StatusCode::OK);
-
-    std::string graphContents = GetFileContents(graphPath);
-    std::string expected = getVersionString() + expectedTextToSpeechGraphContentsDefault;
-    ASSERT_EQ(expected, graphContents) << graphContents;
+    assertCreatedGraphEquals(hfSettings, expectedTextToSpeechGraphContentsDefault, true);
 }
 
 TEST_F(GraphCreationTest, positiveSTTWithVersionString) {
@@ -581,14 +609,7 @@ TEST_F(GraphCreationTest, positiveSTTWithVersionString) {
     hfSettings.task = ovms::SPEECH_TO_TEXT_GRAPH;
     ovms::SpeechToTextGraphSettingsImpl speechToTextGraphSettings;
     hfSettings.graphSettings = std::move(speechToTextGraphSettings);
-    std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
-    std::unique_ptr<ovms::GraphExport> graphExporter = std::make_unique<ovms::GraphExport>();
-    auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
-    ASSERT_EQ(status, ovms::StatusCode::OK);
-
-    std::string graphContents = GetFileContents(graphPath);
-    std::string expected = getVersionString() + expectedSpeechToTextGraphContentsDefault;
-    ASSERT_EQ(expected, graphContents) << graphContents;
+    assertCreatedGraphEquals(hfSettings, expectedSpeechToTextGraphContentsDefault, true);
 }
 
 TEST_F(GraphCreationTest, positiveImageGenWithVersionString) {
@@ -596,25 +617,12 @@ TEST_F(GraphCreationTest, positiveImageGenWithVersionString) {
     hfSettings.task = ovms::IMAGE_GENERATION_GRAPH;
     ovms::ImageGenerationGraphSettingsImpl imageGenerationGraphSettings;
     hfSettings.graphSettings = std::move(imageGenerationGraphSettings);
-    std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
-    std::unique_ptr<ovms::GraphExport> graphExporter = std::make_unique<ovms::GraphExport>();
-    auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
-    ASSERT_EQ(status, ovms::StatusCode::OK);
-
-    std::string graphContents = GetFileContents(graphPath);
-    std::string expected = getVersionString() + expectedImageGenerationGraphContentsDefault;
-    ASSERT_EQ(expected, graphContents) << graphContents;
+    assertCreatedGraphEquals(hfSettings, expectedImageGenerationGraphContentsDefault, true);
 }
 
 TEST_F(GraphCreationTest, positiveDefault) {
     ovms::HFSettingsImpl hfSettings;
-    std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
-    std::unique_ptr<ovms::GraphExport> graphExporter = std::make_unique<ovms::GraphExport>();
-    auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
-    ASSERT_EQ(status, ovms::StatusCode::OK);
-
-    std::string graphContents = GetFileContents(graphPath);
-    ASSERT_EQ(expectedDefaultGraphContents, removeVersionString(graphContents)) << graphContents;
+    assertCreatedGraphEquals(hfSettings, expectedDefaultGraphContents);
 }
 
 TEST_F(GraphCreationTest, positiveDraftAndFuse) {
@@ -624,45 +632,24 @@ TEST_F(GraphCreationTest, positiveDraftAndFuse) {
     graphSettings.dynamicSplitFuse = "false";
 
     hfSettings.graphSettings = std::move(graphSettings);
-    std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
-    std::unique_ptr<ovms::GraphExport> graphExporter = std::make_unique<ovms::GraphExport>();
-    auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
-    ASSERT_EQ(status, ovms::StatusCode::OK);
-
-    std::string graphContents = GetFileContents(graphPath);
-    ASSERT_EQ(expectedDraftAndFuseGraphContents, removeVersionString(graphContents)) << graphContents;
+    assertCreatedGraphEquals(hfSettings, expectedDraftAndFuseGraphContents);
 }
 
 TEST_F(GraphCreationTest, positiveGGUF) {
     this->filesToPrintInCaseOfFailure.emplace_back("graph.pbtxt");
     ovms::HFSettingsImpl hfSettings;
     hfSettings.ggufFilename = "PRETTY_GOOD_GGUF_MODEL.gguf";
-    std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
-    std::unique_ptr<ovms::GraphExport> graphExporter = std::make_unique<ovms::GraphExport>();
-    auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
-    ASSERT_EQ(status, ovms::StatusCode::OK);
-
-    std::string graphContents = GetFileContents(graphPath);
-    ASSERT_EQ(expectedGGUFGraphContents, removeVersionString(graphContents)) << graphContents;
+    assertCreatedGraphEquals(hfSettings, expectedGGUFGraphContents);
 }
 
 TEST_F(GraphCreationTest, WillOverwriteExistingGraphPbtxtGGUF) {
     this->filesToPrintInCaseOfFailure.emplace_back("graph.pbtxt");
     ovms::HFSettingsImpl hfSettings;
-    std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
-    std::unique_ptr<ovms::GraphExport> graphExporter = std::make_unique<ovms::GraphExport>();
-
     hfSettings.ggufFilename = "PRETTY_GOOD_GGUF_MODEL.gguf";
-    auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
-    ASSERT_EQ(status, ovms::StatusCode::OK);
-    std::string graphContents = GetFileContents(graphPath);
-    ASSERT_EQ(expectedGGUFGraphContents, removeVersionString(graphContents)) << graphContents;
+    assertCreatedGraphEquals(hfSettings, expectedGGUFGraphContents);
 
     hfSettings.ggufFilename = "PRETTY_GOOD_GGUF_MODEL_Q8-00001-of-20000.gguf";
-    status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
-    ASSERT_EQ(status, ovms::StatusCode::OK);
-    graphContents = GetFileContents(graphPath);
-    ASSERT_EQ(expectedGGUFGraphContents2, removeVersionString(graphContents)) << graphContents;
+    assertCreatedGraphEquals(hfSettings, expectedGGUFGraphContents2);
 }
 
 TEST_F(GraphCreationTest, rerankPositiveNonDefault) {
@@ -677,13 +664,7 @@ TEST_F(GraphCreationTest, rerankPositiveNonDefault) {
     rerankGraphSettings.maxAllowedChunks = 18;
     hfSettings.graphSettings = std::move(rerankGraphSettings);
 
-    std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
-    std::unique_ptr<ovms::GraphExport> graphExporter = std::make_unique<ovms::GraphExport>();
-    auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
-    ASSERT_EQ(status, ovms::StatusCode::OK);
-
-    std::string graphContents = GetFileContents(graphPath);
-    ASSERT_EQ(expectedRerankGraphContentsNonDefault, removeVersionString(graphContents)) << graphContents;
+    assertCreatedGraphEquals(hfSettings, expectedRerankGraphContentsNonDefault);
 }
 
 TEST_F(GraphCreationTest, rerankPositiveDefault) {
@@ -693,13 +674,7 @@ TEST_F(GraphCreationTest, rerankPositiveDefault) {
     ovms::RerankGraphSettingsImpl rerankGraphSettings;
     hfSettings.graphSettings = std::move(rerankGraphSettings);
 
-    std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
-    std::unique_ptr<ovms::GraphExport> graphExporter = std::make_unique<ovms::GraphExport>();
-    auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
-    ASSERT_EQ(status, ovms::StatusCode::OK);
-
-    std::string graphContents = GetFileContents(graphPath);
-    ASSERT_EQ(expectedRerankGraphContentsDefault, removeVersionString(graphContents)) << graphContents;
+    assertCreatedGraphEquals(hfSettings, expectedRerankGraphContentsDefault);
 }
 
 TEST_F(GraphCreationTest, rerankCreatedPbtxtInvalid) {
@@ -733,13 +708,7 @@ TEST_F(GraphCreationTest, embeddingsPositiveNonDefault) {
     embeddingsGraphSettings.truncate = "true";
     embeddingsGraphSettings.pooling = "LAST";
     hfSettings.graphSettings = std::move(embeddingsGraphSettings);
-    std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
-    std::unique_ptr<ovms::GraphExport> graphExporter = std::make_unique<ovms::GraphExport>();
-    auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
-    ASSERT_EQ(status, ovms::StatusCode::OK);
-
-    std::string graphContents = GetFileContents(graphPath);
-    ASSERT_EQ(expectedEmbeddingsGraphContents, removeVersionString(graphContents)) << graphContents;
+    assertCreatedGraphEquals(hfSettings, expectedEmbeddingsGraphContents);
 }
 
 TEST_F(GraphCreationTest, embeddingsPositiveDefault) {
@@ -748,13 +717,7 @@ TEST_F(GraphCreationTest, embeddingsPositiveDefault) {
     ovms::EmbeddingsGraphSettingsImpl embeddingsGraphSettings;
     hfSettings.graphSettings = std::move(embeddingsGraphSettings);
     hfSettings.exportSettings.pluginConfig.numStreams = 1;
-    std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
-    std::unique_ptr<ovms::GraphExport> graphExporter = std::make_unique<ovms::GraphExport>();
-    auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
-    ASSERT_EQ(status, ovms::StatusCode::OK);
-
-    std::string graphContents = GetFileContents(graphPath);
-    ASSERT_EQ(expectedEmbeddingsGraphContentsDefault, removeVersionString(graphContents)) << graphContents;
+    assertCreatedGraphEquals(hfSettings, expectedEmbeddingsGraphContentsDefault);
 }
 
 TEST_F(GraphCreationTest, embeddingsCreatedPbtxtInvalid) {
@@ -808,13 +771,7 @@ TEST_F(GraphCreationTest, textToSpeechPositiveNonDefault) {
     hfSettings.exportSettings.modelPath = "/model1/path";
     hfSettings.exportSettings.pluginConfig.numStreams = 2;
     hfSettings.graphSettings = std::move(textToSpeechGraphSettings);
-    std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
-    std::unique_ptr<ovms::GraphExport> graphExporter = std::make_unique<ovms::GraphExport>();
-    auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
-    ASSERT_EQ(status, ovms::StatusCode::OK);
-
-    std::string graphContents = GetFileContents(graphPath);
-    ASSERT_EQ(expectedTextToSpeechGraphContents, removeVersionString(graphContents)) << graphContents;
+    assertCreatedGraphEquals(hfSettings, expectedTextToSpeechGraphContents);
 }
 
 TEST_F(GraphCreationTest, textToSpeechPositiveDefault) {
@@ -822,13 +779,7 @@ TEST_F(GraphCreationTest, textToSpeechPositiveDefault) {
     hfSettings.task = ovms::TEXT_TO_SPEECH_GRAPH;
     ovms::TextToSpeechGraphSettingsImpl textToSpeechGraphSettings;
     hfSettings.graphSettings = std::move(textToSpeechGraphSettings);
-    std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
-    std::unique_ptr<ovms::GraphExport> graphExporter = std::make_unique<ovms::GraphExport>();
-    auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
-    ASSERT_EQ(status, ovms::StatusCode::OK);
-
-    std::string graphContents = GetFileContents(graphPath);
-    ASSERT_EQ(expectedTextToSpeechGraphContentsDefault, removeVersionString(graphContents)) << graphContents;
+    assertCreatedGraphEquals(hfSettings, expectedTextToSpeechGraphContentsDefault);
 }
 
 TEST_F(GraphCreationTest, textToSpeechCreatedPbtxtInvalid) {
@@ -857,13 +808,7 @@ TEST_F(GraphCreationTest, speechToTextPositiveNonDefault) {
     hfSettings.exportSettings.modelPath = "/model1/path";
     hfSettings.exportSettings.pluginConfig.numStreams = 2;
     hfSettings.graphSettings = std::move(speechToTextGraphSettings);
-    std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
-    std::unique_ptr<ovms::GraphExport> graphExporter = std::make_unique<ovms::GraphExport>();
-    auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
-    ASSERT_EQ(status, ovms::StatusCode::OK);
-
-    std::string graphContents = GetFileContents(graphPath);
-    ASSERT_EQ(expectedSpeechToTextGraphContents, removeVersionString(graphContents)) << graphContents;
+    assertCreatedGraphEquals(hfSettings, expectedSpeechToTextGraphContents);
 }
 
 TEST_F(GraphCreationTest, speechToTextPositiveDefault) {
@@ -871,13 +816,7 @@ TEST_F(GraphCreationTest, speechToTextPositiveDefault) {
     hfSettings.task = ovms::SPEECH_TO_TEXT_GRAPH;
     ovms::SpeechToTextGraphSettingsImpl speechToTextGraphSettings;
     hfSettings.graphSettings = std::move(speechToTextGraphSettings);
-    std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
-    std::unique_ptr<ovms::GraphExport> graphExporter = std::make_unique<ovms::GraphExport>();
-    auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
-    ASSERT_EQ(status, ovms::StatusCode::OK);
-
-    std::string graphContents = GetFileContents(graphPath);
-    ASSERT_EQ(expectedSpeechToTextGraphContentsDefault, removeVersionString(graphContents)) << graphContents;
+    assertCreatedGraphEquals(hfSettings, expectedSpeechToTextGraphContentsDefault);
 }
 
 TEST_F(GraphCreationTest, speechToTextCreatedPbtxtInvalid) {
@@ -906,13 +845,7 @@ TEST_F(GraphCreationTest, positivePluginConfigAll) {
 
     hfSettings.graphSettings = std::move(graphSettings);
 
-    std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
-    std::unique_ptr<ovms::GraphExport> graphExporter = std::make_unique<ovms::GraphExport>();
-    auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
-    ASSERT_EQ(status, ovms::StatusCode::OK);
-
-    std::string graphContents = GetFileContents(graphPath);
-    ASSERT_EQ(expectedFullPluginGraphContents, removeVersionString(graphContents)) << graphContents;
+    assertCreatedGraphEquals(hfSettings, expectedFullPluginGraphContents);
 }
 
 TEST_F(GraphCreationTest, positiveWithParsersAndToolGuidedGeneration) {
@@ -924,13 +857,7 @@ TEST_F(GraphCreationTest, positiveWithParsersAndToolGuidedGeneration) {
 
     hfSettings.graphSettings = std::move(graphSettings);
 
-    std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
-    std::unique_ptr<ovms::GraphExport> graphExporter = std::make_unique<ovms::GraphExport>();
-    auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
-    ASSERT_EQ(status, ovms::StatusCode::OK);
-
-    std::string graphContents = GetFileContents(graphPath);
-    ASSERT_EQ(expectedGraphContentsWithResponseParser, removeVersionString(graphContents)) << graphContents;
+    assertCreatedGraphEquals(hfSettings, expectedGraphContentsWithResponseParser);
 }
 
 TEST_F(GraphCreationTest, positivePluginConfigOne) {
@@ -939,13 +866,7 @@ TEST_F(GraphCreationTest, positivePluginConfigOne) {
     hfSettings.exportSettings.pluginConfig.kvCachePrecision = "u8";
     hfSettings.graphSettings = std::move(graphSettings);
 
-    std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
-    std::unique_ptr<ovms::GraphExport> graphExporter = std::make_unique<ovms::GraphExport>();
-    auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
-    ASSERT_EQ(status, ovms::StatusCode::OK);
-
-    std::string graphContents = GetFileContents(graphPath);
-    ASSERT_EQ(expectedOneSettingPluginGraphContents, removeVersionString(graphContents)) << graphContents;
+    assertCreatedGraphEquals(hfSettings, expectedOneSettingPluginGraphContents);
 }
 
 TEST_F(GraphCreationTest, negativeCreateFileWrongDirectoryPaths) {
@@ -1016,11 +937,8 @@ TEST_F(GraphCreationTest, positiveTextGeneration) {
     hfSettings.graphSettings = std::move(graphSettings);
     hfSettings.exportSettings.targetDevice = "NPU";
     hfSettings.exportSettings.pluginConfig.useNpuPrefixCaching = true;
-    std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
-    std::string subconfigPath = ovms::FileSystem::appendSlash(this->directoryPath) + "subconfig.json";
-    std::unique_ptr<ovms::GraphExport> graphExporter = std::make_unique<ovms::GraphExport>();
-    auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
-    ASSERT_EQ(status, ovms::StatusCode::OK);
+    std::string graphContents = createGraphAndReadContents(hfSettings);
+    assertGraphQueueHeader(graphContents, hfSettings);
 }
 
 TEST_F(GraphCreationTest, imageGenerationPositiveDefault) {
@@ -1028,13 +946,7 @@ TEST_F(GraphCreationTest, imageGenerationPositiveDefault) {
     hfSettings.task = ovms::IMAGE_GENERATION_GRAPH;
     ovms::ImageGenerationGraphSettingsImpl imageGenerationGraphSettings;
     hfSettings.graphSettings = std::move(imageGenerationGraphSettings);
-    std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
-    std::unique_ptr<ovms::GraphExport> graphExporter = std::make_unique<ovms::GraphExport>();
-    auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
-    ASSERT_EQ(status, ovms::StatusCode::OK);
-
-    std::string graphContents = GetFileContents(graphPath);
-    ASSERT_EQ(expectedImageGenerationGraphContentsDefault, removeVersionString(graphContents)) << graphContents;
+    assertCreatedGraphEquals(hfSettings, expectedImageGenerationGraphContentsDefault);
 }
 
 TEST_F(GraphCreationTest, imageGenerationPositiveFull) {
@@ -1050,13 +962,7 @@ TEST_F(GraphCreationTest, imageGenerationPositiveFull) {
     imageGenerationGraphSettings.defaultNumInferenceSteps = 2;
     imageGenerationGraphSettings.maxNumInferenceSteps = 3;
     hfSettings.graphSettings = std::move(imageGenerationGraphSettings);
-    std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
-    std::unique_ptr<ovms::GraphExport> graphExporter = std::make_unique<ovms::GraphExport>();
-    auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
-    ASSERT_EQ(status, ovms::StatusCode::OK);
-
-    std::string graphContents = GetFileContents(graphPath);
-    ASSERT_EQ(expectedImageGenerationGraphContents, removeVersionString(graphContents)) << graphContents;
+    assertCreatedGraphEquals(hfSettings, expectedImageGenerationGraphContents);
 }
 TEST_F(GraphCreationTest, pluginConfigAsString) {
     ovms::ExportSettings exportSettings;
diff --git a/src/test/http_openai_handler_test.cpp b/src/test/http_openai_handler_test.cpp
index 94648d0e68..5f03176a25 100644
--- a/src/test/http_openai_handler_test.cpp
+++ b/src/test/http_openai_handler_test.cpp
@@ -212,7 +212,7 @@ Key: content-type; Value: application/json
         }
     
 JSON Parser:
-{"model":"gpt","stream":false,"messages":[]}0)";
+{"model":"gpt","stream":false,"messages":[]}0)";  // non-queue path: fresh graph, poller gets first packet only
     ASSERT_EQ(response, expectedResponse);
 }
 
@@ -244,7 +244,7 @@ Key: test2; Value: header
         }
     
 JSON Parser:
-{"model":"gpt","stream":false,"messages":[]}0)";
+{"model":"gpt","stream":false,"messages":[]}0)";  // non-queue path: fresh graph, poller gets first packet only
     ASSERT_EQ(response, expectedResponse);
 }
 
@@ -1777,6 +1777,98 @@ TEST_F(HttpOpenAIHandlerParsingTest, responseFormatNullValue) {
     EXPECT_FALSE(apiHandler->getResponseFormat().has_value());
 }
 
+// ==================== HttpOpenAIHandlerWithQueueTest ====================
+// Same as HttpOpenAIHandlerTest but uses config with graph_queue_size=1
+// to verify the graph pool (GraphQueue) path works correctly.
+class HttpOpenAIHandlerWithQueueTest : public ::testing::Test {
+protected:
+    ovms::Server& server = ovms::Server::instance();
+    std::unique_ptr<ovms::HttpRestApiHandler> handler;
+
+    std::unique_ptr<std::thread> t;
+    std::string port = "9173";
+
+    std::unordered_map<std::string, std::string> headers{{"content-type", "application/json"}};
+    ovms::HttpRequestComponents comp;
+    std::string endpoint = "/v3/chat/completions";
+    std::shared_ptr<MockedServerRequestInterface> writer;
+    std::shared_ptr<MockedMultiPartParser> multiPartParser;
+    std::string response;
+    ovms::HttpResponseComponents responseComponents;
+
+    void SetUpServer(const char* configPath) {
+        ::SetUpServer(this->t, this->server, this->port, configPath);
+        EnsureServerStartedWithTimeout(this->server, 5);
+        handler = std::make_unique<ovms::HttpRestApiHandler>(server, 5);
+    }
+
+    void SetUp() {
+        writer = std::make_shared<MockedServerRequestInterface>();
+        multiPartParser = std::make_shared<MockedMultiPartParser>();
+        SetUpServer(getGenericFullPathForSrcTest("/ovms/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock_with_queue.json").c_str());
+        ASSERT_EQ(handler->parseRequestComponents(comp, "POST", endpoint, headers), ovms::StatusCode::OK);
+    }
+
+    void TearDown() {
+        handler.reset();
+        server.setShutdownRequest(1);
+        t->join();
+        server.setShutdownRequest(0);
+    }
+};
+
+TEST_F(HttpOpenAIHandlerWithQueueTest, UnaryWithQueue) {
+    std::string requestBody = R"(
+        {
+            "model": "gpt",
+            "stream": false,
+            "messages": []
+        }
+    )";
+
+    const std::string URI = "/v3/something";
+    ASSERT_EQ(
+        handler->dispatchToProcessor(URI, requestBody, &response, comp, responseComponents, writer, multiPartParser),
+        ovms::StatusCode::OK);
+
+    std::string expectedResponse = R"(URI: /v3/something
+Key: content-type; Value: application/json
+Body:
+
+        {
+            "model": "gpt",
+            "stream": false,
+            "messages": []
+        }
+    
+JSON Parser:
+{"model":"gpt","stream":false,"messages":[]}012345678)";
+    ASSERT_EQ(response, expectedResponse);
+}
+
+TEST_F(HttpOpenAIHandlerWithQueueTest, StreamWithQueue) {
+    std::string requestBody = R"(
+        {
+            "model": "gpt",
+            "stream": true,
+            "messages": []
+        }
+    )";
+
+    EXPECT_CALL(*writer, PartialReplyBegin(::testing::_)).WillOnce(testing::Invoke([](std::function<void()> fn) { fn(); }));
+    EXPECT_CALL(*writer, PartialReplyEnd()).Times(1);
+    // The calculator produces 9 packets (timestamps 0-8) via loopback,
+    // each containing the accumulated body + timestamp. The '8' in the body stops the loop.
+    EXPECT_CALL(*writer, PartialReply(::testing::_)).Times(9);
+    EXPECT_CALL(*writer, IsDisconnected()).Times(9);
+
+    ASSERT_EQ(
+        handler->dispatchToProcessor("/v3/completions", requestBody, &response, comp, responseComponents, writer, multiPartParser),
+        ovms::StatusCode::PARTIAL_END);
+
+    // For streaming, the response body stays empty (content goes through PartialReply callbacks)
+    ASSERT_EQ(response, "");
+}
 TEST_F(HttpOpenAIHandlerParsingTest, parseChatTemplateKwargsWithBooleanValue) {
     std::string json = R"({
     "model": "llama",
diff --git a/src/test/llm/config_queue.json b/src/test/llm/config_queue.json
new file mode 100644
index 0000000000..1e16802ed9
--- /dev/null
+++ b/src/test/llm/config_queue.json
@@ -0,0 +1,9 @@
+{
+    "model_config_list": [],
+    "mediapipe_config_list": [
+    {
+        "name":"lm_cb_regular_queue",
+        "graph_path":"/ovms/src/test/llm/lm_cb_regular_queue.pbtxt"
+    }
+    ]
+}
diff --git a/src/test/llm/llmnode_test.cpp b/src/test/llm/llmnode_test.cpp
index 2e52e4fa59..a3def1a82b 100644
--- a/src/test/llm/llmnode_test.cpp
+++ b/src/test/llm/llmnode_test.cpp
@@ -174,6 +174,51 @@ std::shared_ptr<ov::genai::ContinuousBatchingPipeline> LLMFlowHttpTest::cbPipe;
 std::shared_ptr<LLMExecutorWrapper> LLMFlowHttpTest::llmExecutorWrapper;
 std::unique_ptr<std::thread> LLMFlowHttpTest::t;
 
+class LLMFlowHttpQueueGraphTest : public ::testing::Test {
+protected:
+    static std::unique_ptr<std::thread> t;
+
+public:
+    std::unique_ptr<ovms::HttpRestApiHandler> handler;
+    std::unordered_map<std::string, std::string> headers{{"content-type", "application/json"}};
+    ovms::HttpRequestComponents comp;
+    const std::string endpointChatCompletions = "/v3/chat/completions";
+    const std::string endpointCompletions = "/v3/completions";
+    std::shared_ptr<MockedServerRequestInterface> writer;
+    std::shared_ptr<MockedMultiPartParser> multiPartParser;
+    std::string response;
+    rapidjson::Document parsedResponse;
+    ovms::HttpResponseComponents responseComponents;
+
+    static void SetUpTestSuite() {
+        std::string port = "9173";
+        ovms::Server& server = ovms::Server::instance();
+        ::SetUpServer(t, server, port, getGenericFullPathForSrcTest("/ovms/src/test/llm/config_queue.json").c_str(), 60);
+    }
+
+    static void TearDownTestSuite() {
+        ovms::Server& server = ovms::Server::instance();
+        server.setShutdownRequest(1);
+        t->join();
+        server.setShutdownRequest(0);
+    }
+
+    void SetUp() {
+        writer = std::make_shared<MockedServerRequestInterface>();
+        multiPartParser = std::make_shared<MockedMultiPartParser>();
+        ON_CALL(*writer, PartialReplyBegin(::testing::_)).WillByDefault(testing::Invoke([](std::function<void()> fn) { fn(); }));
+        ovms::Server& server = ovms::Server::instance();
+        handler = std::make_unique<ovms::HttpRestApiHandler>(server, 5);
+        ASSERT_EQ(handler->parseRequestComponents(comp, "POST", endpointCompletions, headers), ovms::StatusCode::OK);
+    }
+
+    void TearDown() {
+        handler.reset();
+    }
+};
+
+std::unique_ptr<std::thread> LLMFlowHttpQueueGraphTest::t;
+
 // --------------------------------------- OVMS LLM nodes tests
 
 /* 
@@ -249,6 +294,157 @@ TEST_P(LLMFlowHttpTestParameterized, unaryCompletionsJson) {
     }
 }
 
+TEST_F(LLMFlowHttpQueueGraphTest, unaryCompletionsJsonQueueGraph) {
+    std::string requestBody = R"(
+        {
+            "model": "lm_cb_regular_queue",
+            "stream": false,
+            "seed" : 1,
+            "best_of": 16,
+            "max_tokens": 5,
+            "prompt": "What is OpenVINO?"
+        }
+    )";
+
+    ASSERT_EQ(
+        handler->dispatchToProcessor(endpointCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser),
+        ovms::StatusCode::OK);
+    parsedResponse.Parse(response.c_str());
+    ASSERT_TRUE(parsedResponse["choices"].IsArray());
+    ASSERT_EQ(parsedResponse["choices"].Capacity(), 1);
+    for (auto& choice : parsedResponse["choices"].GetArray()) {
+        ASSERT_TRUE(choice["finish_reason"].IsString());
+        ASSERT_FALSE(choice["logprobs"].IsObject());
+        ASSERT_TRUE(choice["text"].IsString());
+    }
+
+    ASSERT_TRUE(parsedResponse["usage"].IsObject());
+    ASSERT_TRUE(parsedResponse["usage"].GetObject()["prompt_tokens"].IsInt());
+    ASSERT_TRUE(parsedResponse["usage"].GetObject()["completion_tokens"].IsInt());
+    ASSERT_TRUE(parsedResponse["usage"].GetObject()["total_tokens"].IsInt());
+    ASSERT_EQ(parsedResponse["usage"].GetObject()["completion_tokens"].GetInt(), 5);
+    EXPECT_STREQ(parsedResponse["model"].GetString(), "lm_cb_regular_queue");
+    EXPECT_STREQ(parsedResponse["object"].GetString(), "text_completion");
+}
+
+TEST_F(LLMFlowHttpQueueGraphTest, unaryChatCompletionsJsonQueueGraph) {
+    std::string requestBody = R"(
+        {
+            "model": "lm_cb_regular_queue",
+            "stream": false,
+            "seed" : 1,
+            "max_tokens": 5,
+            "messages": [
+            {
+                "role": "user",
+                "content": "What is OpenVINO?"
+            }
+            ]
+        }
+    )";
+
+    ASSERT_EQ(
+        handler->dispatchToProcessor(endpointChatCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser),
+        ovms::StatusCode::OK);
+    parsedResponse.Parse(response.c_str());
+    ASSERT_TRUE(parsedResponse["choices"].IsArray());
+    ASSERT_EQ(parsedResponse["choices"].Capacity(), 1);
+    for (auto& choice : parsedResponse["choices"].GetArray()) {
+        ASSERT_TRUE(choice["finish_reason"].IsString());
+        ASSERT_TRUE(choice["message"].IsObject());
+        ASSERT_TRUE(choice["message"]["content"].IsString());
+        EXPECT_STREQ(choice["message"]["role"].GetString(), "assistant");
+    }
+
+    ASSERT_TRUE(parsedResponse["usage"].IsObject());
+    ASSERT_TRUE(parsedResponse["usage"].GetObject()["prompt_tokens"].IsInt());
+    ASSERT_TRUE(parsedResponse["usage"].GetObject()["completion_tokens"].IsInt());
+    ASSERT_TRUE(parsedResponse["usage"].GetObject()["total_tokens"].IsInt());
+    ASSERT_EQ(parsedResponse["usage"].GetObject()["completion_tokens"].GetInt(), 5);
+    EXPECT_STREQ(parsedResponse["model"].GetString(), "lm_cb_regular_queue");
+    EXPECT_STREQ(parsedResponse["object"].GetString(), "chat.completion");
+}
+
+TEST_F(LLMFlowHttpQueueGraphTest, streamChatCompletionsQueueGraph) {
+    std::string requestBody = R"(
+        {
+            "model": "lm_cb_regular_queue",
+            "stream": true,
+            "seed" : 1,
+            "max_tokens": 5,
+            "ignore_eos": true,
+            "messages": [
+            {
+                "role": "user",
+                "content": "What is OpenVINO?"
+            }
+            ]
+        }
+    )";
+    ON_CALL(*writer, PartialReply).WillByDefault([this](std::string response) {
+        rapidjson::Document d;
+        std::string dataPrefix = "data:";
+        ASSERT_STREQ(response.substr(0, dataPrefix.size()).c_str(), dataPrefix.c_str());
+        size_t pos = response.find("\n");
+        ASSERT_NE(pos, response.npos);
+        rapidjson::ParseResult parsingSucceeded = d.Parse(response.substr(dataPrefix.size(), (pos - dataPrefix.size())).c_str());
+        ASSERT_EQ(parsingSucceeded.Code(), 0);
+        ASSERT_TRUE(d["choices"].IsArray());
+        ASSERT_EQ(d["choices"].Capacity(), 1);
+        int i = 0;
+        for (auto& choice : d["choices"].GetArray()) {
+            if (choice["finish_reason"].IsString()) {
+                EXPECT_STREQ(choice["finish_reason"].GetString(), "length");
+            } else {
+                ASSERT_TRUE(choice["finish_reason"].IsNull());
+            }
+            ASSERT_EQ(choice["index"], i++);
+            ASSERT_TRUE(choice["delta"].IsObject());
+            ASSERT_TRUE(choice["delta"]["content"].IsString());
+        }
+        EXPECT_STREQ(d["model"].GetString(), "lm_cb_regular_queue");
+        EXPECT_STREQ(d["object"].GetString(), "chat.completion.chunk");
+    });
+    ASSERT_EQ(
+        handler->dispatchToProcessor(endpointChatCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser),
+        ovms::StatusCode::PARTIAL_END);
+}
+
+// Test that verifies graph reuse works correctly with queue size 1
+// Sends 2 sequential requests to ensure the same graph instance is reused
+TEST_F(LLMFlowHttpQueueGraphTest, queueGraphReuseTwoRequests) {
+    std::string requestBody = R"(
+        {
+            "model": "lm_cb_regular_queue",
+            "stream": false,
+            "seed" : 1,
+            "max_tokens": 5,
+            "prompt": "What is OpenVINO?"
+        }
+    )";
+
+    // First request
+    ASSERT_EQ(
+        handler->dispatchToProcessor(endpointCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser),
+        ovms::StatusCode::OK);
+    parsedResponse.Parse(response.c_str());
+    ASSERT_TRUE(parsedResponse["choices"].IsArray());
+    ASSERT_EQ(parsedResponse["choices"].Capacity(), 1);
+    ASSERT_TRUE(parsedResponse["choices"].GetArray()[0]["text"].IsString());
+
+    // Second request - reuses the same graph from the queue
+    // This validates that timestamp increment works for graph reuse
+    response.clear();
+    ASSERT_EQ(
+        handler->dispatchToProcessor(endpointCompletions, requestBody, &response, comp, responseComponents, writer, multiPartParser),
+        ovms::StatusCode::OK);
+    parsedResponse.Parse(response.c_str());
+    ASSERT_TRUE(parsedResponse["choices"].IsArray());
+    ASSERT_EQ(parsedResponse["choices"].Capacity(), 1);
+    ASSERT_TRUE(parsedResponse["choices"].GetArray()[0]["text"].IsString());
+    // Note: Responses may differ due to KV cache state despite same seed
+}
+
 TEST_P(LLMFlowHttpTestParameterized, unaryCompletionsJsonEchoWithCompletion) {
     auto params = GetParam();
     // TODO: In the next step we should break this suite into smaller ones, use proper configuration instead of skipping
diff --git a/src/test/llm/lm_cb_regular_queue.pbtxt b/src/test/llm/lm_cb_regular_queue.pbtxt
new file mode 100644
index 0000000000..60ef13f6b7
--- /dev/null
+++ b/src/test/llm/lm_cb_regular_queue.pbtxt
@@ -0,0 +1,47 @@
+# Copyright 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# OVMS_GRAPH_QUEUE_SIZE: 1
+input_stream: "HTTP_REQUEST_PAYLOAD:input"
+output_stream: "HTTP_RESPONSE_PAYLOAD:output"
+node {
+    name: "llmNode1"
+    calculator: "HttpLLMCalculator"
+    input_side_packet: "LLM_NODE_RESOURCES:llm"
+    input_side_packet: "LLM_NODE_EXECUTION_CONTEXTS:llm_ctx"
+    input_stream: "LOOPBACK:loopback"
+    input_stream: "HTTP_REQUEST_PAYLOAD:input"
+    output_stream: "LOOPBACK:loopback"
+    output_stream: "HTTP_RESPONSE_PAYLOAD:output"
+    input_stream_info: {
+      tag_index: 'LOOPBACK:0',
+      back_edge: true
+    }
+    node_options: {
+        [type.googleapis.com/mediapipe.LLMCalculatorOptions]: {
+          models_path: "/ovms/src/test/llm_testing/HuggingFaceTB/SmolLM2-360M-Instruct"
+          cache_size: 1
+        }
+    }
+    input_stream_handler {
+      input_stream_handler: "SyncSetInputStreamHandler",
+      options {
+        [mediapipe.SyncSetInputStreamHandlerOptions.ext] {
+          sync_set {
+            tag_index: "LOOPBACK:0"
+          }
+        }
+      }
+    }
+}
diff --git a/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock.json b/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock.json
index 5137dbea92..d2803b795f 100644
--- a/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock.json
+++ b/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock.json
@@ -6,4 +6,4 @@
             "graph_path": "/ovms/src/test/mediapipe/graph_gpt.pbtxt"
         }
     ]
-}
\ No newline at end of file
+}
diff --git a/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock_with_queue.json b/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock_with_queue.json
new file mode 100644
index 0000000000..ea25079556
--- /dev/null
+++ b/src/test/mediapipe/config_mediapipe_openai_chat_completions_mock_with_queue.json
@@ -0,0 +1,9 @@
+{
+    "model_config_list": [],
+    "mediapipe_config_list": [
+        {
+            "name": "gpt",
+            "graph_path": "/ovms/src/test/mediapipe/graph_gpt_with_queue.pbtxt"
+        }
+    ]
+}
diff --git a/src/test/mediapipe/graph_gpt_with_queue.pbtxt b/src/test/mediapipe/graph_gpt_with_queue.pbtxt
new file mode 100644
index 0000000000..43c2ef68c1
--- /dev/null
+++ b/src/test/mediapipe/graph_gpt_with_queue.pbtxt
@@ -0,0 +1,40 @@
+#
+# Copyright 2026 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# OVMS_GRAPH_QUEUE_SIZE: 1
+input_stream: "HTTP_REQUEST_PAYLOAD:input"
+output_stream: "HTTP_RESPONSE_PAYLOAD:output"
+
+node: {
+  calculator: "OpenAIChatCompletionsMockCalculator"
+  input_stream: "LOOPBACK:loopback"
+  input_stream: "HTTP_REQUEST_PAYLOAD:input"
+  output_stream: "LOOPBACK:loopback"
+  output_stream: "HTTP_RESPONSE_PAYLOAD:output"
+  input_stream_info: {
+    tag_index: 'LOOPBACK:0',
+    back_edge: true
+  }
+  input_stream_handler {
+    input_stream_handler: "SyncSetInputStreamHandler",
+    options {
+      [mediapipe.SyncSetInputStreamHandlerOptions.ext] {
+        sync_set {
+          tag_index: "LOOPBACK:0"
+        }
+      }
+    }
+  }
+}
diff --git a/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt b/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt
new file mode 100644
index 0000000000..2a5016a7fb
--- /dev/null
+++ b/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt
@@ -0,0 +1,46 @@
+#
+# Copyright 2026 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# OVMS_GRAPH_QUEUE_SIZE: 16
+input_stream: "custom_dummy_input"
+output_stream: "custom_dummy_output"
+node {
+  calculator: "OpenVINOModelServerSessionCalculator"
+  output_side_packet: "SESSION:session"
+  node_options: {
+    [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: {
+      servable_name: "dummy"
+      servable_version: "1"
+    }
+  }
+}
+node {
+  calculator: "OpenVINOInferenceCalculator"
+  input_side_packet: "SESSION:session"
+  input_stream: "B:custom_dummy_input"
+  output_stream: "A:custom_dummy_output"
+  node_options: {
+    [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: {
+      tag_to_input_tensor_names {
+        key: "B"
+        value: "b"
+      }
+      tag_to_output_tensor_names {
+        key: "A"
+        value: "a"
+      }
+    }
+  }
+}
diff --git a/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames_newpath.pbtxt b/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames_newpath.pbtxt
new file mode 100644
index 0000000000..2a5016a7fb
--- /dev/null
+++ b/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames_newpath.pbtxt
@@ -0,0 +1,46 @@
+#
+# Copyright 2026 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# OVMS_GRAPH_QUEUE_SIZE: 16
+input_stream: "custom_dummy_input"
+output_stream: "custom_dummy_output"
+node {
+  calculator: "OpenVINOModelServerSessionCalculator"
+  output_side_packet: "SESSION:session"
+  node_options: {
+    [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: {
+      servable_name: "dummy"
+      servable_version: "1"
+    }
+  }
+}
+node {
+  calculator: "OpenVINOInferenceCalculator"
+  input_side_packet: "SESSION:session"
+  input_stream: "B:custom_dummy_input"
+  output_stream: "A:custom_dummy_output"
+  node_options: {
+    [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: {
+      tag_to_input_tensor_names {
+        key: "B"
+        value: "b"
+      }
+      tag_to_output_tensor_names {
+        key: "A"
+        value: "a"
+      }
+    }
+  }
+}
diff --git a/src/test/mediapipe/graphdummyadapterfull_dummyinputnames_newpath.pbtxt b/src/test/mediapipe/graphdummyadapterfull_dummyinputnames_newpath.pbtxt
new file mode 100644
index 0000000000..01521b1c08
--- /dev/null
+++ b/src/test/mediapipe/graphdummyadapterfull_dummyinputnames_newpath.pbtxt
@@ -0,0 +1,45 @@
+#
+# Copyright 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+input_stream: "custom_dummy_input"
+output_stream: "custom_dummy_output"
+node {
+  calculator: "OpenVINOModelServerSessionCalculator"
+  output_side_packet: "SESSION:session"
+  node_options: {
+    [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: {
+      servable_name: "dummy"
+      servable_version: "1"
+    }
+  }
+}
+node {
+  calculator: "OpenVINOInferenceCalculator"
+  input_side_packet: "SESSION:session"
+  input_stream: "B:custom_dummy_input"
+  output_stream: "A:custom_dummy_output"
+  node_options: {
+    [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: {
+      tag_to_input_tensor_names {
+        key: "B"
+        value: "b"
+      }
+      tag_to_output_tensor_names {
+        key: "A"
+        value: "a"
+      }
+    }
+  }
+}
diff --git a/src/test/mediapipe_framework_test.cpp b/src/test/mediapipe_framework_test.cpp
index 85abfbd519..a0bd350e83 100644
--- a/src/test/mediapipe_framework_test.cpp
+++ b/src/test/mediapipe_framework_test.cpp
@@ -18,6 +18,7 @@
 #include <sstream>
 #include <string>
 #include <thread>
+#include <chrono>
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
@@ -29,8 +30,11 @@
 #include "../grpcservermodule.hpp"
 #include "../http_rest_api_handler.hpp"
 #include "../kfs_frontend/kfs_grpc_inference_service.hpp"
+#include "../mediapipe_internal/outputstreamobserver.hpp"
 #include "../mediapipe_internal/mediapipefactory.hpp"
 #include "../mediapipe_internal/mediapipegraphdefinition.hpp"
+#include "../mediapipe_internal/mediapipe_utils.hpp"
+#include "mediapipe/framework/thread_pool_executor.h"
 #include "../metric_config.hpp"
 #include "../metric_module.hpp"
 #include "../model_service.hpp"
@@ -79,9 +83,385 @@ class MediapipeFrameworkTest : public TestWithTempDir {
 class MediapipeNegativeFrameworkTest : public MediapipeFrameworkTest {
 };
 
-// purpose of this test is to ensure there is no hang in case of one of the graph nodes
-// not producing output packet
+using mediapipe::Adopt;
+using mediapipe::CalculatorGraphConfig;
+using mediapipe::Packet;
+using mediapipe::ParseTextProtoOrDie;
+using mediapipe::Timestamp;
+
+#define MP_ERROR_STOP(A)                                         \
+    {                                                            \
+        absStatus = A;                                           \
+        if (!absStatus.ok()) {                                   \
+            const std::string absMessage = absStatus.ToString(); \
+            SPDLOG_DEBUG("{}", absMessage);                      \
+            ASSERT_TRUE(false);                                  \
+        }                                                        \
+    }
+TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerCheckNoInputPackets) {
+    // we need it only so that dummy is available via C-API
+    ServerGuard servGuard(getGenericFullPathForSrcTest("/ovms/src/test/configs/config_benchmark.json"));
+    std::string graph_proto = R"(
+      input_stream: "IN:input"
+      output_stream: "OUT:output"
+      node {
+          calculator: "OpenVINOModelServerSessionCalculator"
+          output_side_packet: "SESSION:session"
+          node_options: {
+            [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: {
+              servable_name: "dummy"
+            }
+          }
+      }
+      node {
+        calculator: "OpenVINOInferenceCalculator"
+        input_side_packet: "SESSION:session"
+        input_stream: "OVTENSOR:input"
+        output_stream: "OVTENSOR:output"
+        node_options: {
+            [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: {
+                tag_to_input_tensor_names {
+                    key: "OVTENSOR"
+                    value: "b"
+                }
+                tag_to_output_tensor_names {
+                    key: "OVTENSOR"
+                    value: "a"
+                }
+            }
+        }
+      }
+    )";
+    CalculatorGraphConfig graphConfig =
+        ParseTextProtoOrDie<CalculatorGraphConfig>(graph_proto);
+    const std::string inputStreamName = "input";
+    const std::string outputStreamName = "output";
+    // avoid creating pollers, retrieving packets etc.
+    //////////////////
+    // model mgmt thread
+    //////////////////
+    // std::shared_ptr<ovms::GraphQueue> queue;
+    // queue = std::make_shared<GraphQueue>(graphConfig, 1);
+    ::mediapipe::CalculatorGraph graph;
+    EXPECT_EQ(graph.Initialize(graphConfig).code(), absl::StatusCode::kOk);
+    // Install NullObserver
+    // its not per graph but per output
+    std::shared_ptr<OutputStreamObserverI> perGraphObserverFunctor = std::make_shared<NullOutputStreamObserver>();
+    const std::string outputName{"output"};
+    absl::Status absStatus;
+    MP_ERROR_STOP(graph.ObserveOutputStream(outputStreamName, [&perGraphObserverFunctor](const ::mediapipe::Packet& packet) -> absl::Status { return perGraphObserverFunctor->handlePacket(packet); }));
+    // Here ends model management
+    // Here starts mp graph executor
+    // ovms::GraphIdGuard graphIdGuard(queue); // TODO timeout?
+    // get graphIdGuard from queue
+    // create FrontendAppropriateObserver
+    float expVal = 13.5;
+    struct MyFunctor : public OutputStreamObserverI {
+        float expVal;
+        MyFunctor(float expVal) :
+            expVal(expVal) {
+            SPDLOG_TRACE("MyFunctor observer constructed:{}", (void*)this);
+        }
+        absl::Status handlePacket(const ::mediapipe::Packet& packet) override {
+            SPDLOG_TRACE("my functor:{}", (void*)this);
+            const ov::Tensor& outputTensor =
+                packet.Get<ov::Tensor>();
+            auto datatype = ov::element::Type_t::f32;
+            EXPECT_EQ(datatype, outputTensor.get_element_type());
+            EXPECT_THAT(outputTensor.get_shape(), testing::ElementsAre(1, 10));
+            const void* outputData = outputTensor.data();
+            EXPECT_EQ(*((float*)outputData), expVal);
+            return absl::OkStatus();
+        }
+    };
+    perGraphObserverFunctor = std::make_shared<MyFunctor>(expVal);
+    auto copyOfMyFunctor = perGraphObserverFunctor;
+    // now start execution
+    absStatus = graph.StartRun({});
+    auto datatype = ov::element::Type_t::f32;
+    ov::Shape shape{1, 10};
+    int timestamp{0};
+    std::vector<float> data{expVal - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+    auto inputTensor = std::make_unique<ov::Tensor>(datatype, shape, data.data());
+    MP_ERROR_STOP(graph.AddPacketToInputStream(
+        inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++))));
+    MP_ERROR_STOP(graph.WaitUntilIdle());
+    SPDLOG_TRACE("Now swap Functor, we don't have to call ObserverOutputStream");
+    expVal = 42;
+    data[0] = expVal - 1;
+    perGraphObserverFunctor = std::make_shared<MyFunctor>(expVal);
+    // now add second packet
+    auto inputTensor2 = std::make_unique<ov::Tensor>(datatype, shape, data.data());
+    // MP_ERROR_STOP(graph.AddPacketToInputStream(
+    //    inputStreamName, Adopt(inputTensor2.release()).At(Timestamp(timestamp++))));
+    // MP_ERROR_STOP(graph.WaitUntilIdle());
+    MP_ERROR_STOP(graph.CloseAllPacketSources());
+    MP_ERROR_STOP(graph.WaitUntilDone());
+}
+TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerPOC) {
+    // we need it only so that dummy is available via C-API
+    ServerGuard servGuard(getGenericFullPathForSrcTest("/ovms/src/test/configs/config_benchmark.json"));
+    std::string graph_proto = R"(
+      input_stream: "IN:input"
+      output_stream: "OUT:output"
+      node {
+          calculator: "OpenVINOModelServerSessionCalculator"
+          output_side_packet: "SESSION:session"
+          node_options: {
+            [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: {
+              servable_name: "dummy"
+            }
+          }
+      }
+      node {
+        calculator: "OpenVINOInferenceCalculator"
+        input_side_packet: "SESSION:session"
+        input_stream: "OVTENSOR:input"
+        output_stream: "OVTENSOR:output"
+        node_options: {
+            [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: {
+                tag_to_input_tensor_names {
+                    key: "OVTENSOR"
+                    value: "b"
+                }
+                tag_to_output_tensor_names {
+                    key: "OVTENSOR"
+                    value: "a"
+                }
+            }
+        }
+      }
+    )";
+    CalculatorGraphConfig graphConfig =
+        ParseTextProtoOrDie<CalculatorGraphConfig>(graph_proto);
+    const std::string inputStreamName = "input";
+    const std::string outputStreamName = "output";
+    // avoid creating pollers, retrieving packets etc.
+    //////////////////
+    // model mgmt thread
+    //////////////////
+    // std::shared_ptr<ovms::GraphQueue> queue;
+    // queue = std::make_shared<GraphQueue>(graphConfig, 1);
+    ::mediapipe::CalculatorGraph graph;
+    EXPECT_EQ(graph.Initialize(graphConfig).code(), absl::StatusCode::kOk);
+    // Install NullObserver
+    // its not per graph but per output
+    std::shared_ptr<OutputStreamObserverI> perGraphObserverFunctor = std::make_shared<NullOutputStreamObserver>();
+    const std::string outputName{"output"};
+    absl::Status absStatus;
+    MP_ERROR_STOP(graph.ObserveOutputStream(outputStreamName, [&perGraphObserverFunctor](const ::mediapipe::Packet& packet) -> absl::Status { return perGraphObserverFunctor->handlePacket(packet); }));
+    // Here ends model management
+    // Here starts mp graph executor
+    // ovms::GraphIdGuard graphIdGuard(queue); // TODO timeout?
+    // get graphIdGuard from queue
+    // create FrontendAppropriateObserver
+    float expVal = 13.5;
+    struct MyFunctor : public OutputStreamObserverI {
+        float expVal;
+        MyFunctor(float expVal) :
+            expVal(expVal) {
+        }
+        absl::Status handlePacket(const ::mediapipe::Packet& packet) override {
+            const ov::Tensor& outputTensor =
+                packet.Get<ov::Tensor>();
+            auto datatype = ov::element::Type_t::f32;
+            EXPECT_EQ(datatype, outputTensor.get_element_type());
+            EXPECT_THAT(outputTensor.get_shape(), testing::ElementsAre(1, 10));
+            const void* outputData = outputTensor.data();
+            EXPECT_EQ(*((float*)outputData), expVal);
+            return absl::OkStatus();
+        }
+    };
+    perGraphObserverFunctor = std::make_shared<MyFunctor>(expVal);
+    auto copyOfMyFunctor = perGraphObserverFunctor;
+    // now start execution
+    absStatus = graph.StartRun({});
+    auto datatype = ov::element::Type_t::f32;
+    ov::Shape shape{1, 10};
+    int timestamp{0};
+    std::vector<float> data{expVal - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+    auto inputTensor = std::make_unique<ov::Tensor>(datatype, shape, data.data());
+    MP_ERROR_STOP(graph.AddPacketToInputStream(
+        inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++))));
+    MP_ERROR_STOP(graph.WaitUntilIdle());
+    expVal = 42;
+    data[0] = expVal - 1;
+    perGraphObserverFunctor = std::make_shared<MyFunctor>(expVal);
+    // now add second packet
+    auto inputTensor2 = std::make_unique<ov::Tensor>(datatype, shape, data.data());
+    MP_ERROR_STOP(graph.AddPacketToInputStream(
+        inputStreamName, Adopt(inputTensor2.release()).At(Timestamp(timestamp++))));
+    MP_ERROR_STOP(graph.WaitUntilIdle());
+}
+TEST_F(MediapipeFrameworkTest, HotReloadOutputStreamHandlerPOCCompare) {
+    // we need it only so that dummy is available via C-API
+    ServerGuard servGuard(getGenericFullPathForSrcTest("/ovms/src/test/configs/config_standard_dummy.json"));
+    std::string graph_proto = R"(
+      input_stream: "IN:input"
+      output_stream: "OUT:output"
+      node {
+          calculator: "OpenVINOModelServerSessionCalculator"
+          output_side_packet: "SESSION:session"
+          node_options: {
+            [type.googleapis.com / mediapipe.OpenVINOModelServerSessionCalculatorOptions]: {
+              servable_name: "dummy"
+            }
+          }
+      }
+      node {
+        calculator: "OpenVINOInferenceCalculator"
+        input_side_packet: "SESSION:session"
+        input_stream: "OVTENSOR:input"
+        output_stream: "OVTENSOR:output"
+        node_options: {
+            [type.googleapis.com / mediapipe.OpenVINOInferenceCalculatorOptions]: {
+                tag_to_input_tensor_names {
+                    key: "OVTENSOR"
+                    value: "b"
+                }
+                tag_to_output_tensor_names {
+                    key: "OVTENSOR"
+                    value: "a"
+                }
+            }
+        }
+      }
+    )";
+    CalculatorGraphConfig graphConfig =
+        ParseTextProtoOrDie<CalculatorGraphConfig>(graph_proto);
+    const std::string inputStreamName = "input";
+    const std::string outputStreamName = "output";
+    // avoid creating pollers, retrieving packets etc.
+    //////////////////
+    // model mgmt thread
+    //////////////////
+    // std::shared_ptr<ovms::GraphQueue> queue;
+    // queue = std::make_shared<GraphQueue>(graphConfig, 1);
+    auto datatype = ov::element::Type_t::f32;
+    ov::Shape shape{1, 10};
+    int timestamp{0};
+    float expVal = 13.5;
+    std::vector<float> data{expVal - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+    ovms::Timer<3> timer;
+    const std::string outputName{"output"};
+    int N = 1000;
+
+    absl::Status absStatus;
+    // here starts new case of ovms
+    {  // new case of ovms
+        ::mediapipe::CalculatorGraph graph;
+        EXPECT_EQ(graph.Initialize(graphConfig).code(), absl::StatusCode::kOk);
+        auto inputTensor = std::make_unique<ov::Tensor>(datatype, shape, data.data());
+        // Install NullObserver
+        // its not per graph but per output
+        std::shared_ptr<ovms::OutputStreamObserverI> perGraphObserverFunctor = std::make_shared<NullOutputStreamObserver>();
+        MP_ERROR_STOP(graph.ObserveOutputStream(outputStreamName, [&perGraphObserverFunctor](const ::mediapipe::Packet& packet) -> absl::Status { return perGraphObserverFunctor->handlePacket(packet); }));
+        // Here ends model management
+        // Here starts mp graph executor
+        // ovms::GraphIdGuard graphIdGuard(queue); // TODO timeout?
+        // get graphIdGuard from queue
+        // create FrontendAppropriateObserver
+        struct MyFunctor : public OutputStreamObserverI {
+            float expVal;
+            MyFunctor(float expVal) :
+                expVal(expVal) {
+            }
+            absl::Status handlePacket(const ::mediapipe::Packet& packet) override {
+                const ov::Tensor& outputTensor =
+                    packet.Get<ov::Tensor>();
+                auto datatype = ov::element::Type_t::f32;
+                EXPECT_EQ(datatype, outputTensor.get_element_type());
+                EXPECT_THAT(outputTensor.get_shape(), testing::ElementsAre(1, 10));
+                const void* outputData = outputTensor.data();
+                EXPECT_EQ(*((float*)outputData), expVal);
+                return absl::OkStatus();
+            }
+        };
+        absStatus = graph.StartRun({});
+        {
+            perGraphObserverFunctor = std::make_shared<MyFunctor>(expVal);
+            auto copyOfMyFunctor = perGraphObserverFunctor;
+            auto inputTensor = std::make_unique<ov::Tensor>(datatype, shape, data.data());
+            MP_ERROR_STOP(graph.AddPacketToInputStream(
+                inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++))));
+        }
+        std::this_thread::sleep_for(std::chrono::seconds(1));
+        timer.start(0);
+        for (auto i = 0; i < N; ++i) {  // iter begin
+            perGraphObserverFunctor = std::make_shared<MyFunctor>(expVal);
+            auto copyOfMyFunctor = perGraphObserverFunctor;
+            auto inputTensor = std::make_unique<ov::Tensor>(datatype, shape, data.data());
+            MP_ERROR_STOP(graph.AddPacketToInputStream(
+                inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++))));
+            MP_ERROR_STOP(graph.WaitUntilIdle());
+        }  // iter end
+        timer.stop(0);
+    }  // end of new case ovms
+    {  // current ovms case
+        timer.start(1);
+        for (auto i = 0; i < N; ++i) {  // iter begin
+            ::mediapipe::CalculatorGraph graph;
+            EXPECT_EQ(graph.Initialize(graphConfig).code(), absl::StatusCode::kOk);
+            auto absStatusOrPoller = graph.AddOutputStreamPoller(outputName);
+            MP_ERROR_STOP(graph.StartRun({}));
+            auto inputTensor = std::make_unique<ov::Tensor>(datatype, shape, data.data());
+            MP_ERROR_STOP(graph.AddPacketToInputStream(
+                inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++))));
+            ::mediapipe::Packet packet;
+            absStatusOrPoller.value().Next(&packet);
+            const ov::Tensor& outputTensor =
+                packet.Get<ov::Tensor>();
+            auto datatype = ov::element::Type_t::f32;
+            EXPECT_EQ(datatype, outputTensor.get_element_type());
+            EXPECT_THAT(outputTensor.get_shape(), testing::ElementsAre(1, 10));
+            const void* outputData = outputTensor.data();
+            EXPECT_EQ(*((float*)outputData), expVal);
+            MP_ERROR_STOP(graph.WaitUntilIdle());
+            MP_ERROR_STOP(graph.CloseAllPacketSources());
+            MP_ERROR_STOP(graph.WaitUntilDone());
+        }  // iter end
+        timer.stop(1);
+    }
+    {  // thread pool case
+        // auto sharedThreadPool = std::make_shared<mediapipe::ThreadPoolExecutor>(std::thread::hardware_concurrency());
+        auto sharedThreadPool = std::make_shared<mediapipe::ThreadPoolExecutor>(24);
+        timer.start(2);
+        for (auto i = 0; i < N; ++i) {  // iter begin
+            ::mediapipe::CalculatorGraph graph;
+            MP_ERROR_STOP(graph.SetExecutor("", sharedThreadPool));
+            EXPECT_EQ(graph.Initialize(graphConfig).code(), absl::StatusCode::kOk);
+            auto absStatusOrPoller = graph.AddOutputStreamPoller(outputName);
+            MP_ERROR_STOP(graph.StartRun({}));
+            auto inputTensor = std::make_unique<ov::Tensor>(datatype, shape, data.data());
+            MP_ERROR_STOP(graph.AddPacketToInputStream(
+                inputStreamName, Adopt(inputTensor.release()).At(Timestamp(timestamp++))));
+            ::mediapipe::Packet packet;
+            absStatusOrPoller.value().Next(&packet);
+            const ov::Tensor& outputTensor =
+                packet.Get<ov::Tensor>();
+            auto datatype = ov::element::Type_t::f32;
+            EXPECT_EQ(datatype, outputTensor.get_element_type());
+            EXPECT_THAT(outputTensor.get_shape(), testing::ElementsAre(1, 10));
+            const void* outputData = outputTensor.data();
+            EXPECT_EQ(*((float*)outputData), expVal);
+            MP_ERROR_STOP(graph.WaitUntilIdle());
+            MP_ERROR_STOP(graph.CloseAllPacketSources());
+            MP_ERROR_STOP(graph.WaitUntilDone());
+        }  // iter end
+        timer.stop(2);
+    }  // end of thread pool case
+    double ms = timer.elapsed<std::chrono::microseconds>(0) / 1000;
+    SPDLOG_DEBUG("{} iterations of new flow took:{} ms. FPS:{}", N, ms, N / ms * 1000);
+    ms = timer.elapsed<std::chrono::microseconds>(1) / 1000;
+    SPDLOG_DEBUG("{} iterations of old flow took:{} ms. FPS:{}", N, ms, N / ms * 1000);
+    ms = timer.elapsed<std::chrono::microseconds>(2) / 1000;
+    SPDLOG_DEBUG("{} iterations of thread pool flow took:{} ms. FPS:{}", N, ms, N / ms * 1000);
+    SPDLOG_DEBUG("Threads: {}", std::thread::hardware_concurrency());
+}
+
 TEST_F(MediapipeNegativeFrameworkTest, NoOutputPacketProduced) {
+    // purpose of this test is to ensure there is no hang in case of one of the graph nodes
+    // not producing output packet
     SetUpServer(getGenericFullPathForSrcTest("/ovms/src/test/mediapipe/negative/config_no_calc_output_stream.json").c_str());
     const ovms::Module* grpcModule = server.getModule(ovms::GRPC_SERVER_MODULE_NAME);
     KFSInferenceServiceImpl& impl = dynamic_cast<const ovms::GRPCServerModule*>(grpcModule)->getKFSGrpcImpl();
@@ -99,7 +479,7 @@ TEST_F(MediapipeNegativeFrameworkTest, NoOutputPacketProduced) {
 }
 
 TEST_F(MediapipeNegativeFrameworkTest, ExceptionDuringProcess) {
-    GTEST_SKIP() << "Terminate called otherwise";
+    GTEST_SKIP() << "Terminate called otherwise"; // TODO FIXME check
     SetUpServer(getGenericFullPathForSrcTest("/ovms/src/test/mediapipe/negative/config_exception_during_process.json").c_str());
     const ovms::Module* grpcModule = server.getModule(ovms::GRPC_SERVER_MODULE_NAME);
     KFSInferenceServiceImpl& impl = dynamic_cast<const ovms::GRPCServerModule*>(grpcModule)->getKFSGrpcImpl();
@@ -116,12 +496,12 @@ TEST_F(MediapipeNegativeFrameworkTest, ExceptionDuringProcess) {
         auto status = impl.ModelInfer(nullptr, &request, &response);
         ASSERT_EQ(status.error_code(), grpc::StatusCode::INVALID_ARGUMENT) << status.error_message();
     } catch (std::exception& e) {
-        SPDLOG_ERROR("ERs");
+        SPDLOG_ERROR("ER: {}", e.what());
     } catch (...) {
-        SPDLOG_ERROR("ER");
+        SPDLOG_ERROR("ER: unknown exception");
     }
 }
-TEST_F(MediapipeNegativeFrameworkTest, ExceptionDuringGetContract) {
+TEST_F(MediapipeNegativeFrameworkTest, ExceptionDuringGetContract) { // TODO FIXME add checks to exception handling?
     SetUpServer(getGenericFullPathForSrcTest("/ovms/src/test/mediapipe/negative/config_exception_during_getcontract.json").c_str());
     const ovms::Module* grpcModule = server.getModule(ovms::GRPC_SERVER_MODULE_NAME);
     KFSInferenceServiceImpl& impl = dynamic_cast<const ovms::GRPCServerModule*>(grpcModule)->getKFSGrpcImpl();
@@ -138,9 +518,9 @@ TEST_F(MediapipeNegativeFrameworkTest, ExceptionDuringGetContract) {
         auto status = impl.ModelInfer(nullptr, &request, &response);
         ASSERT_EQ(status.error_code(), grpc::StatusCode::UNAVAILABLE) << status.error_message();
     } catch (std::exception& e) {
-        SPDLOG_ERROR("ERs");
+        SPDLOG_ERROR("ER: {}", e.what());
     } catch (...) {
-        SPDLOG_ERROR("ER");
+        SPDLOG_ERROR("ER: unknown exception");
     }
 }
 TEST_F(MediapipeNegativeFrameworkTest, ExceptionDuringGetOpen) {
@@ -161,9 +541,9 @@ TEST_F(MediapipeNegativeFrameworkTest, ExceptionDuringGetOpen) {
         auto status = impl.ModelInfer(nullptr, &request, &response);
         ASSERT_EQ(status.error_code(), grpc::StatusCode::INVALID_ARGUMENT) << status.error_message();
     } catch (std::exception& e) {
-        SPDLOG_ERROR("ERs");
+        SPDLOG_ERROR("ER: {}", e.what());
     } catch (...) {
-        SPDLOG_ERROR("ER");
+        SPDLOG_ERROR("ER: unknown exception");
     }
 }
 TEST_F(MediapipeNegativeFrameworkTest, ExceptionDuringClose) {
@@ -184,8 +564,8 @@ TEST_F(MediapipeNegativeFrameworkTest, ExceptionDuringClose) {
         auto status = impl.ModelInfer(nullptr, &request, &response);
         ASSERT_EQ(status.error_code(), grpc::StatusCode::INVALID_ARGUMENT) << status.error_message();
     } catch (std::exception& e) {
-        SPDLOG_ERROR("ERs");
+        SPDLOG_ERROR("ER: {}", e.what());
     } catch (...) {
-        SPDLOG_ERROR("ER");
+        SPDLOG_ERROR("ER: unknown exception");
     }
 }
diff --git a/src/test/mediapipeflow_test.cpp b/src/test/mediapipeflow_test.cpp
index 55b6ab96ed..19875915f7 100644
--- a/src/test/mediapipeflow_test.cpp
+++ b/src/test/mediapipeflow_test.cpp
@@ -232,9 +232,11 @@ class MediapipeFlowTest : public ::testing::TestWithParam<std::string> {
     void SetUp() override {
     }
     void TearDown() {
-        server.setShutdownRequest(1);
-        t->join();
-        server.setShutdownRequest(0);
+        if (t) {
+            server.setShutdownRequest(1);
+            t->join();
+            server.setShutdownRequest(0);
+        }
     }
 };
 
@@ -1724,7 +1726,7 @@ TEST_F(MediapipeFlowTest, InferWithParams) {
         ASSERT_EQ(it->shape_size(), 1);
         ASSERT_EQ(it->shape(0), stringParamValue.size());
         const std::string& content = response.raw_output_contents(outputId);
-        SPDLOG_ERROR("Received output size:{} content:{}", content.size(), content);
+        SPDLOG_DEBUG("Received output size:{} content:{}", content.size(), content);
         EXPECT_EQ(content, stringParamValue);
         break;
     }
@@ -1743,7 +1745,7 @@ TEST_F(MediapipeFlowTest, InferWithParams) {
         const std::string& content = response.raw_output_contents(outputId);
         ASSERT_EQ(content.size(), sizeof(bool));
         const bool castContent = *((bool*)content.data());
-        SPDLOG_ERROR("Received output size:{} content:{}; castContent:{}", content.size(), content, castContent);
+        SPDLOG_DEBUG("Received output size:{} content:{}; castContent:{}", content.size(), content, castContent);
         EXPECT_EQ(castContent, boolParamValue);
         break;
     }
@@ -1762,7 +1764,7 @@ TEST_F(MediapipeFlowTest, InferWithParams) {
         const std::string& content = response.raw_output_contents(outputId);
         ASSERT_EQ(content.size(), sizeof(int64_t));
         const int64_t castContent = *((int64_t*)content.data());
-        SPDLOG_ERROR("Received output size:{} content:{}; castContent:{}", content.size(), content, castContent);
+        SPDLOG_DEBUG("Received output size:{} content:{}; castContent:{}", content.size(), content, castContent);
         EXPECT_EQ(castContent, int64ParamValue);
         break;
     }
@@ -1991,6 +1993,28 @@ TEST(Mediapipe, MetadataDummyInputTypes) {
                 }
             }
         }
+        node {
+            calculator: "OVMSOVCalculator"
+            input_stream: "B:in2"
+            output_stream: "A:out2"
+            node_options: {
+                [type.googleapis.com / mediapipe.OVMSCalculatorOptions]: {
+                  servable_name: "dummyUpper"
+                  servable_version: "1"
+                }
+            }
+        }
+        node {
+            calculator: "OVMSOVCalculator"
+            input_stream: "B:in2"
+            output_stream: "A:out3"
+            node_options: {
+                [type.googleapis.com / mediapipe.OVMSCalculatorOptions]: {
+                  servable_name: "dummyUpper"
+                  servable_version: "1"
+                }
+            }
+        }
     )";
 
     ovms::MediapipeGraphConfig mgc{"mediaDummy", "", ""};
@@ -2681,13 +2705,17 @@ class MediapipeSerialization : public ::testing::Test {
             stream_types_mapping_t inputTypes,
             stream_types_mapping_t outputTypes,
             std::vector<std::string> inputNames, std::vector<std::string> outputNames,
-            const PythonNodeResourcesMap& pythonNodeResourcesMap,
-            MediapipeServableMetricReporter* mediapipeServableMetricReporter) :
-            MediapipeGraphExecutor(name, version, config, inputTypes, outputTypes, inputNames, outputNames, pythonNodeResourcesMap, {}, {}, {}, {}, {}, nullptr, mediapipeServableMetricReporter) {}
+            const GraphSidePackets& sidePackets,
+            MediapipeServableMetricReporter* mediapipeServableMetricReporter, GraphIdGuard&& guard) :
+            MediapipeGraphExecutor(name, version, config, inputTypes, outputTypes, inputNames, outputNames,
+                sidePackets,
+                nullptr, mediapipeServableMetricReporter, std::move(guard)) {}
     };
 
 protected:
     std::unique_ptr<MediapipeServableMetricReporter> reporter;
+    std::shared_ptr<GraphSidePackets> sidePackets;
+    std::shared_ptr<GraphQueue> queue;
     std::unique_ptr<MockedMediapipeGraphExecutor> executor;
     ::inference::ModelInferResponse mp_response;
     void SetUp() {
@@ -2700,9 +2728,11 @@ class MediapipeSerialization : public ::testing::Test {
         const std::vector<std::string> inputNames;
         const std::vector<std::string> outputNames;
         const ::mediapipe::CalculatorGraphConfig config;
-        PythonNodeResourcesMap pythonNodeResourcesMap;
         this->reporter = std::make_unique<MediapipeServableMetricReporter>(nullptr, nullptr, "");  // disabled reporter
-        executor = std::make_unique<MockedMediapipeGraphExecutor>("", "", config, mapping, mapping, inputNames, outputNames, pythonNodeResourcesMap, this->reporter.get());
+        sidePackets = std::make_shared<GraphSidePackets>();
+        queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
+        GraphIdGuard guard(queue);
+        executor = std::make_unique<MockedMediapipeGraphExecutor>("", "", config, mapping, mapping, inputNames, outputNames, *sidePackets, this->reporter.get(), std::move(guard));
     }
 };
 
@@ -3099,7 +3129,7 @@ class MediapipeFlowStartTest : public TestWithTempDir {
         auto start = std::chrono::high_resolution_clock::now();
         while (!isMpReady(waitForServable) &&
                (std::chrono::duration_cast<std::chrono::seconds>(std::chrono::high_resolution_clock::now() - start).count() < SERVER_START_FROM_CONFIG_TIMEOUT_SECONDS)) {
-            std::this_thread::sleep_for(std::chrono::microseconds(100));
+            std::this_thread::sleep_for(std::chrono::microseconds(1000));
         }
         const ovms::Module* grpcModule = server.getModule(ovms::GRPC_SERVER_MODULE_NAME);
         if (!grpcModule) {
@@ -4036,3 +4066,119 @@ TEST(WhitelistRegistered, MediapipeSubgraphList) {
 
     ASSERT_THAT(mediapipe::SubgraphRegistry::GetRegisteredNames(), UnorderedElementsAreArray(expected)) << readableSetError(mediapipe::SubgraphRegistry::GetRegisteredNames(), expected);
 }
+
+// --- OVMS_GRAPH_QUEUE_SIZE pbtxt directive tests ---
+
+// Minimal valid pbtxt that MediaPipe can parse (uses a registered test calculator)
+static const char* MINIMAL_PBTXT_TEMPLATE = R"(
+input_stream: "HTTP_REQUEST_PAYLOAD:input"
+output_stream: "HTTP_RESPONSE_PAYLOAD:output"
+node: {
+  calculator: "OpenAIChatCompletionsMockCalculator"
+  input_stream: "LOOPBACK:loopback"
+  input_stream: "HTTP_REQUEST_PAYLOAD:input"
+  output_stream: "LOOPBACK:loopback"
+  output_stream: "HTTP_RESPONSE_PAYLOAD:output"
+  input_stream_info: {
+    tag_index: 'LOOPBACK:0',
+    back_edge: true
+  }
+  input_stream_handler {
+    input_stream_handler: "SyncSetInputStreamHandler",
+    options {
+      [mediapipe.SyncSetInputStreamHandlerOptions.ext] {
+        sync_set {
+          tag_index: "LOOPBACK:0"
+        }
+      }
+    }
+  }
+}
+)";
+
+static std::string makePbtxtWithDirective(const std::string& directive) {
+    return directive + "\n" + MINIMAL_PBTXT_TEMPLATE;
+}
+
+TEST(MediapipeGraphQueueSizeDirective, NoDirectiveMeansDisabled) {
+    ovms::MediapipeGraphConfig mgc;
+    DummyMediapipeGraphDefinition def("test", mgc, MINIMAL_PBTXT_TEMPLATE);
+    ovms::ModelManager manager;
+    auto status = def.validate(manager);
+    ASSERT_EQ(status, ovms::StatusCode::OK);
+    EXPECT_FALSE(mgc.getGraphQueueSize().has_value());
+    // getInitialQueueSize on default mgc returns -1
+    EXPECT_EQ(def.getMediapipeGraphConfig().getInitialQueueSize(), -1);
+}
+
+TEST(MediapipeGraphQueueSizeDirective, ExplicitPositiveValue) {
+    std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_SIZE: 4");
+    ovms::MediapipeGraphConfig mgc;
+    DummyMediapipeGraphDefinition def("test", mgc, pbtxt);
+    ovms::ModelManager manager;
+    auto status = def.validate(manager);
+    ASSERT_EQ(status, ovms::StatusCode::OK);
+    EXPECT_EQ(def.getMediapipeGraphConfig().getInitialQueueSize(), 4);
+}
+
+TEST(MediapipeGraphQueueSizeDirective, DisabledExplicitly) {
+    std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_SIZE: -1");
+    ovms::MediapipeGraphConfig mgc;
+    DummyMediapipeGraphDefinition def("test", mgc, pbtxt);
+    ovms::ModelManager manager;
+    auto status = def.validate(manager);
+    ASSERT_EQ(status, ovms::StatusCode::OK);
+    EXPECT_EQ(def.getMediapipeGraphConfig().getInitialQueueSize(), -1);
+}
+
+TEST(MediapipeGraphQueueSizeDirective, AutoValue) {
+    std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_SIZE: AUTO");
+    ovms::MediapipeGraphConfig mgc;
+    DummyMediapipeGraphDefinition def("test", mgc, pbtxt);
+    ovms::ModelManager manager;
+    auto status = def.validate(manager);
+    ASSERT_EQ(status, ovms::StatusCode::OK);
+    EXPECT_GT(def.getMediapipeGraphConfig().getInitialQueueSize(), 0);
+}
+
+TEST(MediapipeGraphQueueSizeDirective, ZeroRejected) {
+    std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_SIZE: 0");
+    ovms::MediapipeGraphConfig mgc;
+    DummyMediapipeGraphDefinition def("test", mgc, pbtxt);
+    ovms::ModelManager manager;
+    auto status = def.validate(manager);
+    EXPECT_EQ(status, ovms::StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID);
+}
+
+TEST(MediapipeGraphQueueSizeDirective, NegativeBelowMinusOneRejected) {
+    std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_SIZE: -2");
+    ovms::MediapipeGraphConfig mgc;
+    DummyMediapipeGraphDefinition def("test", mgc, pbtxt);
+    ovms::ModelManager manager;
+    auto status = def.validate(manager);
+    EXPECT_EQ(status, ovms::StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID);
+}
+
+TEST(MediapipeGraphQueueSizeDirective, ExceedsHardwareThreads) {
+    unsigned int maxThreads = std::thread::hardware_concurrency();
+    if (maxThreads == 0) {
+        GTEST_SKIP() << "hardware_concurrency() returned 0, cannot test thread limit";
+    }
+    int oversized = static_cast<int>(maxThreads) + 1;
+    std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_SIZE: " + std::to_string(oversized));
+    ovms::MediapipeGraphConfig mgc;
+    DummyMediapipeGraphDefinition def("test", mgc, pbtxt);
+    ovms::ModelManager manager;
+    auto status = def.validate(manager);
+    // Queue size is clamped to hardware_concurrency with a warning, not rejected
+    EXPECT_EQ(status, ovms::StatusCode::OK);
+}
+
+TEST(MediapipeGraphQueueSizeDirective, InvalidStringRejected) {
+    std::string pbtxt = makePbtxtWithDirective("# OVMS_GRAPH_QUEUE_SIZE: INVALID");
+    ovms::MediapipeGraphConfig mgc;
+    DummyMediapipeGraphDefinition def("test", mgc, pbtxt);
+    ovms::ModelManager manager;
+    auto status = def.validate(manager);
+    EXPECT_EQ(status, ovms::StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID);
+}
diff --git a/src/test/pull_hf_model_test.cpp b/src/test/pull_hf_model_test.cpp
index b29bbee326..a4047680b2 100644
--- a/src/test/pull_hf_model_test.cpp
+++ b/src/test/pull_hf_model_test.cpp
@@ -66,10 +66,24 @@ class HfDownloaderPullHfModel : public TestWithTempDir {
         TestWithTempDir::TearDown();
     }
 
-    // Removes # OpenVINO Model Server REPLACE_PROJECT_VERSION comment added for debug purpose in graph export at the begging of graph.pbtxt
-    // This string differs per build and setup
-    std::string removeVersionString(std::string input) {
-        return input.erase(0, input.find("\n") + 1);
+    // Removes generated graph header lines (version and optional queue size directive)
+    // which differ across build/runtime setup.
+    std::string removeGeneratedGraphHeaders(std::string input) {
+        auto firstLineEnd = input.find("\n");
+        if (firstLineEnd == std::string::npos) {
+            return "";
+        }
+        input.erase(0, firstLineEnd + 1);
+
+        const std::string queueLinePrefix = "# OVMS_GRAPH_QUEUE_SIZE:";
+        if (input.rfind(queueLinePrefix, 0) == 0) {
+            auto secondLineEnd = input.find("\n");
+            if (secondLineEnd == std::string::npos) {
+                return "";
+            }
+            input.erase(0, secondLineEnd + 1);
+        }
+        return input;
     }
 };
 
@@ -165,7 +179,7 @@ TEST_F(HfDownloaderPullHfModel, PositiveDownload) {
     ASSERT_EQ(std::filesystem::file_size(modelPath), 52417240);
     std::string graphContents = GetFileContents(graphPath);
 
-    ASSERT_EQ(expectedGraphContents, removeVersionString(graphContents)) << graphContents;
+    ASSERT_EQ(expectedGraphContents, removeGeneratedGraphHeaders(graphContents)) << graphContents;
 }
 
 TEST_F(HfDownloaderPullHfModel, PositiveDownloadAndStart) {
@@ -189,7 +203,7 @@ TEST_F(HfDownloaderPullHfModel, PositiveDownloadAndStart) {
     ASSERT_EQ(std::filesystem::file_size(modelPath), 52417240);
     std::string graphContents = GetFileContents(graphPath);
 
-    ASSERT_EQ(expectedGraphContents, removeVersionString(graphContents)) << graphContents;
+    ASSERT_EQ(expectedGraphContents, removeGeneratedGraphHeaders(graphContents)) << graphContents;
 }
 
 TEST_F(HfDownloaderPullHfModel, ModelOutOfOvOrg) {
@@ -217,7 +231,7 @@ TEST_F(HfDownloaderPullHfModel, ModelOutOfOvOrg) {
     ASSERT_EQ(std::filesystem::file_size(modelPath), 52417240);
     std::string graphContents = GetFileContents(graphPath);
 
-    ASSERT_EQ(expectedGraphContents, removeVersionString(graphContents)) << graphContents;
+    ASSERT_EQ(expectedGraphContents, removeGeneratedGraphHeaders(graphContents)) << graphContents;
 
     std::string changePath = ovms::FileSystem::joinPath({this->directoryPath, "OpenVINO"});
     std::string newPath = ovms::FileSystem::joinPath({this->directoryPath, "META"});
@@ -253,7 +267,7 @@ TEST_F(HfDownloaderPullHfModel, PositiveDownloadAndStartModelOutsideOvOrg) {
     ASSERT_EQ(std::filesystem::exists(graphPath), true) << graphPath;
     std::string graphContents = GetFileContents(graphPath);
 
-    ASSERT_EQ(expectedGraphContents, removeVersionString(graphContents)) << graphContents;
+    ASSERT_EQ(expectedGraphContents, removeGeneratedGraphHeaders(graphContents)) << graphContents;
 }
 
 TEST_F(HfDownloaderPullHfModel, DownloadDraftModel) {
@@ -276,7 +290,7 @@ TEST_F(HfDownloaderPullHfModel, DownloadDraftModel) {
     ASSERT_EQ(std::filesystem::file_size(modelPath), 52417240);
     std::string graphContents = GetFileContents(graphPath);
 
-    ASSERT_EQ(expectedGraphContentsDraft, removeVersionString(graphContents)) << graphContents;
+    ASSERT_EQ(expectedGraphContentsDraft, removeGeneratedGraphHeaders(graphContents)) << graphContents;
 
     std::string basePath2 = ovms::FileSystem::joinPath({basePath, "OpenVINO-distil-small.en-int4-ov"});
     std::string modelPath2 = ovms::FileSystem::appendSlash(basePath2) + "openvino_tokenizer.bin";
diff --git a/src/test/pythonnode_test.cpp b/src/test/pythonnode_test.cpp
index 54c9acbfa1..6f9dc6bfa8 100644
--- a/src/test/pythonnode_test.cpp
+++ b/src/test/pythonnode_test.cpp
@@ -1002,10 +1002,12 @@ class MockedMediapipeGraphExecutorPy : public ovms::MediapipeGraphExecutor {
         stream_types_mapping_t inputTypes,
         stream_types_mapping_t outputTypes,
         std::vector<std::string> inputNames, std::vector<std::string> outputNames,
-        const PythonNodeResourcesMap& pythonNodeResourcesMap,
+        const GraphSidePackets& sidePackets,
         PythonBackend* pythonBackend,
-        MediapipeServableMetricReporter* mediapipeServableMetricReporter) :
-        MediapipeGraphExecutor(name, version, config, inputTypes, outputTypes, inputNames, outputNames, pythonNodeResourcesMap, {}, {}, {}, {}, {}, pythonBackend, mediapipeServableMetricReporter) {}
+        MediapipeServableMetricReporter* mediapipeServableMetricReporter, GraphIdGuard&& guard) :
+        MediapipeGraphExecutor(name, version, config, inputTypes, outputTypes, inputNames, outputNames,
+            sidePackets,
+            pythonBackend, mediapipeServableMetricReporter, std::move(guard)) {}
 };
 
 TEST_F(PythonFlowTest, SerializePyObjectWrapperToKServeResponse) {
@@ -1014,8 +1016,10 @@ TEST_F(PythonFlowTest, SerializePyObjectWrapperToKServeResponse) {
     const std::vector<std::string> inputNames;
     const std::vector<std::string> outputNames;
     const ::mediapipe::CalculatorGraphConfig config;
-    PythonNodeResourcesMap pythonNodeResourcesMap;
-    auto executor = MockedMediapipeGraphExecutorPy("", "", config, mapping, mapping, inputNames, outputNames, pythonNodeResourcesMap, getPythonBackend(), this->reporter.get());
+    auto sidePackets = std::make_shared<GraphSidePackets>();
+    std::shared_ptr<GraphQueue> queue = std::make_shared<GraphQueue>(config, sidePackets, 1);
+    GraphIdGuard guard(queue);
+    auto executor = MockedMediapipeGraphExecutorPy("", "", config, mapping, mapping, inputNames, outputNames, *sidePackets, getPythonBackend(), this->reporter.get(), std::move(guard));
 
     std::string datatype = "FP32";
     std::string name = "python_result";
diff --git a/src/test/streaming_test.cpp b/src/test/streaming_test.cpp
index 02e7c4178a..b61d8a48ef 100644
--- a/src/test/streaming_test.cpp
+++ b/src/test/streaming_test.cpp
@@ -70,6 +70,35 @@ class StreamingTest : public Test {
     }
 };
 
+class StreamingQueueTest : public StreamingTest {
+protected:
+    std::shared_ptr<GraphQueue> queue;
+
+    MediapipeGraphExecutor createQueueExecutor(
+        const ::mediapipe::CalculatorGraphConfig& config,
+        stream_types_mapping_t inputTypes,
+        stream_types_mapping_t outputTypes,
+        std::vector<std::string> inputNames,
+        std::vector<std::string> outputNames,
+        int queueSize = 1) {
+        auto sidePackets = std::make_shared<GraphSidePackets>();
+        queue = std::make_shared<GraphQueue>(config, sidePackets, queueSize);
+        GraphIdGuard graphIdGuard(queue);
+        return MediapipeGraphExecutor{
+            this->name,
+            this->version,
+            config,
+            std::move(inputTypes),
+            std::move(outputTypes),
+            std::move(inputNames),
+            std::move(outputNames),
+            *sidePackets,
+            nullptr,
+            this->reporter.get(),
+            std::move(graphIdGuard)};
+    }
+};
+
 #if (PYTHON_DISABLE == 0)
 class PythonStreamingTest : public StreamingTest {
 protected:
@@ -359,7 +388,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::KFS_REQUEST}},
         {{"out", mediapipe_packet_type_enum::KFS_RESPONSE}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Mock receiving 3 requests and disconnection
     prepareRequest(this->firstRequest, {{"in", 3.5f}});
@@ -416,7 +445,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Mock receiving 3 requests and disconnection
     prepareRequest(this->firstRequest, {{"in", 3.5f}});  // no timestamp specified, server will assign one
@@ -559,7 +588,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Mock receiving 3 requests with manually (client) assigned ascending order of timestamp and disconnection
     prepareRequest(this->firstRequest, {{"in", 3.5f}}, 3);  // first request with timestamp 3
@@ -604,7 +633,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Mock only 1 request and disconnect immediately
     prepareRequest(this->firstRequest, {{"in", 3.5f}});
@@ -621,6 +650,184 @@ node {
     ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::OK);
 }
 
+TEST_F(StreamingQueueTest, SingleStreamSend3Receive3AutomaticTimestamp) {
+    const std::string pbTxt{R"(
+input_stream: "in"
+output_stream: "out"
+node {
+  calculator: "AddOneSingleStreamTestCalculator"
+  input_stream: "in"
+  output_stream: "out"
+}
+    )"};
+    ::mediapipe::CalculatorGraphConfig config;
+    ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
+
+    auto executor = createQueueExecutor(
+        config,
+        {{"in", mediapipe_packet_type_enum::OVTENSOR}},
+        {{"out", mediapipe_packet_type_enum::OVTENSOR}},
+        {"in"},
+        {"out"},
+        1);
+
+    prepareRequest(this->firstRequest, {{"in", 3.5f}});
+    EXPECT_CALL(this->stream, Read(_))
+        .WillOnce(Receive({{"in", 7.2f}}))
+        .WillOnce(Receive({{"in", 102.4f}}))
+        .WillOnce(Disconnect());
+
+    auto timestamp = std::make_shared<int64_t>(-1);
+    EXPECT_CALL(this->stream, Write(_, _))
+        .WillOnce(SendWithAutomaticTimestamp({{"out", 4.5f}}, timestamp))
+        .WillOnce(SendWithAutomaticTimestamp({{"out", 8.2f}}, timestamp))
+        .WillOnce(SendWithAutomaticTimestamp({{"out", 103.4f}}, timestamp));
+
+    ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::OK);
+}
+
+TEST_F(StreamingQueueTest, SingleStreamSend1Receive3) {
+    const std::string pbTxt{R"(
+input_stream: "in"
+output_stream: "out"
+node {
+  calculator: "AddOne3CycleIterationsTestCalculator"
+  input_stream: "in"
+  input_stream: "signal"
+  input_stream_info: {
+    tag_index: ':1',
+    back_edge: true
+  }
+  input_stream_handler {
+    input_stream_handler: 'ImmediateInputStreamHandler'
+  }
+  output_stream: "out"
+  output_stream: "signal"
+}
+    )"};
+    ::mediapipe::CalculatorGraphConfig config;
+    ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
+
+    auto executor = createQueueExecutor(
+        config,
+        {{"in", mediapipe_packet_type_enum::OVTENSOR}},
+        {{"out", mediapipe_packet_type_enum::OVTENSOR}},
+        {"in"},
+        {"out"},
+        1);
+
+    prepareRequest(this->firstRequest, {{"in", 3.5f}});
+    EXPECT_CALL(this->stream, Read(_))
+        .WillOnce(Disconnect());
+
+    EXPECT_CALL(this->stream, Write(_, _))
+        .WillOnce(SendWithTimestamp({{"out", 4.5f}}, 1))
+        .WillOnce(SendWithTimestamp({{"out", 5.5f}}, 2))
+        .WillOnce(SendWithTimestamp({{"out", 6.5f}}, 3));
+
+    ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::OK);
+}
+
+TEST_F(StreamingQueueTest, ExitOnDisconnectionDuringRead) {
+    const std::string pbTxt{R"(
+input_stream: "in"
+output_stream: "out"
+node {
+  calculator: "AddOneSingleStreamTestCalculator"
+  input_stream: "in"
+  output_stream: "out"
+}
+    )"};
+    ::mediapipe::CalculatorGraphConfig config;
+    ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
+
+    auto executor = createQueueExecutor(
+        config,
+        {{"in", mediapipe_packet_type_enum::OVTENSOR}},
+        {{"out", mediapipe_packet_type_enum::OVTENSOR}},
+        {"in"},
+        {"out"},
+        1);
+
+    prepareRequest(this->firstRequest, {});
+    EXPECT_CALL(this->stream, Read(_))
+        .WillOnce(Disconnect());
+
+    EXPECT_CALL(this->stream, Write(_, _)).Times(0);
+
+    ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::OK);
+}
+
+TEST_F(StreamingQueueTest, ErrorOnDisconnectionDuringWrite) {
+    const std::string pbTxt{R"(
+input_stream: "in"
+output_stream: "out"
+node {
+  calculator: "AddOneSingleStreamTestCalculator"
+  input_stream: "in"
+  output_stream: "out"
+}
+    )"};
+    ::mediapipe::CalculatorGraphConfig config;
+    ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
+
+    auto executor = createQueueExecutor(
+        config,
+        {{"in", mediapipe_packet_type_enum::OVTENSOR}},
+        {{"out", mediapipe_packet_type_enum::OVTENSOR}},
+        {"in"},
+        {"out"},
+        1);
+
+    std::promise<void> signalPromise;
+    std::future<void> signalFuture = signalPromise.get_future();
+
+    prepareRequest(this->firstRequest, {{"in", 3.5f}});
+    EXPECT_CALL(this->stream, Read(_))
+        .WillOnce(DisconnectWhenNotified(signalFuture));
+
+    EXPECT_CALL(this->stream, Write(_, _))
+        .WillOnce(DisconnectOnWriteAndNotifyEnd(signalPromise));
+
+    ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::MEDIAPIPE_EXECUTION_ERROR);
+}
+
+TEST_F(StreamingQueueTest, ErrorDuringFirstRequestDeserialization) {
+    const std::string pbTxt{R"(
+input_stream: "in"
+output_stream: "out"
+node {
+  calculator: "AddOneSingleStreamTestCalculator"
+  input_stream: "in"
+  output_stream: "out"
+}
+    )"};
+    ::mediapipe::CalculatorGraphConfig config;
+    ASSERT_TRUE(::google::protobuf::TextFormat::ParseFromString(pbTxt, &config));
+
+    auto executor = createQueueExecutor(
+        config,
+        {{"in", mediapipe_packet_type_enum::OVTENSOR}},
+        {{"out", mediapipe_packet_type_enum::OVTENSOR}},
+        {"in"},
+        {"out"},
+        1);
+
+    prepareInvalidRequest(this->firstRequest, {"in"});
+
+    std::promise<void> signalPromise;
+    std::future<void> signalFuture = signalPromise.get_future();
+
+    EXPECT_CALL(this->stream, Read(_))
+        .WillOnce(DisconnectWhenNotified(signalFuture));
+    EXPECT_CALL(this->stream, Write(_, _))
+        .WillOnce(SendErrorAndNotifyEnd(
+            Status(StatusCode::INVALID_CONTENT_SIZE).string() + std::string{" - Expected: 4 bytes; Actual: 0 bytes; input name: in; partial deserialization of first request"},
+            signalPromise));
+
+    ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::OK);
+}
+
 // PYTHON CALCULATOR CASES
 
 #if (PYTHON_DISABLE == 0)
@@ -1230,7 +1437,7 @@ node {
             {"out3", mediapipe_packet_type_enum::OVTENSOR}},
         {"in1", "in2", "in3"},
         {"out1", "out2", "out3"},
-        {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+        {}, nullptr, this->reporter.get()};
 
     std::promise<void> signalPromise;
     std::future<void> signalFuture = signalPromise.get_future();
@@ -1282,7 +1489,7 @@ node {
             {"out3", mediapipe_packet_type_enum::OVTENSOR}},
         {"in1", "in2", "in3"},
         {"out1", "out2", "out3"},
-        {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+        {}, nullptr, this->reporter.get()};
 
     std::promise<void> signalPromise;
     std::future<void> signalFuture = signalPromise.get_future();
@@ -1317,7 +1524,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     std::promise<void> signalPromise;
     std::future<void> signalFuture = signalPromise.get_future();
@@ -1351,7 +1558,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"wrong_name"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};  // cannot install observer due to wrong output name (should never happen due to validation)
+        {"in"}, {"wrong_name"}, {}, nullptr, this->reporter.get()};  // cannot install observer due to wrong output name (should never happen due to validation)
 
     EXPECT_CALL(this->stream, Read(_)).Times(0);
     EXPECT_CALL(this->stream, Write(_, _)).Times(0);
@@ -1376,7 +1583,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     prepareRequest(this->firstRequest, {});
     EXPECT_CALL(this->stream, Read(_))
@@ -1404,7 +1611,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     std::promise<void> signalPromise;
     std::future<void> signalFuture = signalPromise.get_future();
@@ -1440,7 +1647,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     prepareRequest(this->firstRequest, {{"in", 3.5f}});
     ASSERT_EQ(executor.inferStream(this->firstRequest, this->stream, this->executionContext), StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR);
@@ -1463,7 +1670,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Invalid request - missing data in buffer
     prepareInvalidRequest(this->firstRequest, {"in"});  // no timestamp specified, server will assign one
@@ -1498,7 +1705,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     std::promise<void> signalPromise[3];
     std::future<void> signalFuture[3] = {
@@ -1545,7 +1752,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     prepareRequest(this->firstRequest, {{"in", 3.5f}}, 0);
     EXPECT_CALL(this->stream, Read(_))
@@ -1573,7 +1780,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     prepareRequest(this->firstRequest, {{"in", 3.5f}});
     setRequestTimestamp(this->firstRequest, std::string("not an int"));
@@ -1608,7 +1815,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Timestamps not allowed in stream
     // Expect continuity of operation and response with error message
@@ -1650,7 +1857,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Allowed in stream
     for (auto timestamp : std::vector<::mediapipe::Timestamp>{
@@ -1686,7 +1893,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Mock receiving 3 requests and disconnection
     prepareRequestWithParam(this->firstRequest, {{"in", 3.5f}}, {"val", 65});  // request with parameter val
@@ -1723,7 +1930,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Mock receiving the invalid request and disconnection
     // Request with invalid param py (special pythons session side packet)
@@ -1752,7 +1959,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     prepareRequest(this->firstRequest, {{"in", 3.5f}});  // missing required request param
     EXPECT_CALL(this->stream, Read(_)).Times(0);
@@ -1778,7 +1985,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     // Mock receiving 2 requests and disconnection
     prepareRequest(this->firstRequest, {{"in", 3.5f}}, std::nullopt, this->name, this->version);  // no timestamp specified, server will assign one
@@ -1812,7 +2019,7 @@ node {
         this->name, this->version, config,
         {{"in", mediapipe_packet_type_enum::OVTENSOR}},
         {{"out", mediapipe_packet_type_enum::OVTENSOR}},
-        {"in"}, {"out"}, {}, {}, {}, {}, {}, {}, nullptr, this->reporter.get()};
+        {"in"}, {"out"}, {}, nullptr, this->reporter.get()};
 
     std::promise<void> signalPromise;
     std::future<void> signalFuture = signalPromise.get_future();
diff --git a/src/test/stress_test_utils.hpp b/src/test/stress_test_utils.hpp
index ccbdd60758..740e8af1d1 100644
--- a/src/test/stress_test_utils.hpp
+++ b/src/test/stress_test_utils.hpp
@@ -50,6 +50,7 @@
 #include "../server.hpp"
 #include "../status.hpp"
 #include "../stringutils.hpp"
+#include "src/timer.hpp"
 #include "../tfs_frontend/tfs_utils.hpp"
 #include "c_api_test_utils.hpp"
 #include "test_utils.hpp"
@@ -1067,7 +1068,99 @@ static const std::string basicMediapipeConfigWithNewGraphPath = R"({
     "mediapipe_config_list": [
     {
         "name":"pipeline1Dummy",
-        "graph_path":"/ovms/src/test/mediapipe/graphdummyadapterfull_dummyinputnames.pbtxt"
+        "graph_path":"/ovms/src/test/mediapipe/graphdummyadapterfull_dummyinputnames_newpath.pbtxt"
+    }
+    ]
+})";
+
+const std::string basicMediapipeQueueConfig = R"({
+    "model_config_list": [
+        {"config": {
+                "name": "dummy",
+                "base_path": "/ovms/src/test/dummy"
+        }
+        }
+    ],
+    "mediapipe_config_list": [
+    {
+        "name":"pipeline1Dummy",
+        "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt"
+    }
+    ]
+})";
+
+static const std::string basicMediapipeQueueConfigWithAddedGraph = R"({
+    "model_config_list": [
+        {"config": {
+                "name": "dummy",
+                "base_path": "/ovms/src/test/dummy"
+        }
+        }
+    ],
+    "mediapipe_config_list": [
+    {
+        "name":"pipeline1Dummy",
+        "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt"
+    },
+    {
+        "name":"pipeline2Dummy",
+        "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt"
+    }
+    ]
+})";
+
+static const std::string basicMediapipeQueueConfigWithRemovedGraph = R"({
+    "model_config_list": [
+        {"config": {
+                "name": "dummy",
+                "base_path": "/ovms/src/test/dummy"
+        }
+        }
+    ],
+    "mediapipe_config_list": [
+    ]
+})";
+
+static const std::string basicMediapipeQueueConfigWithRemovedModel = R"({
+    "model_config_list": [
+    ],
+    "mediapipe_config_list": [
+    {
+        "name":"pipeline1Dummy",
+        "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt"
+    }
+    ]
+})";
+
+static const std::string basicMediapipeQueueConfigWithReloadedModel = R"({
+    "model_config_list": [
+        {"config": {
+                "name": "dummy",
+                "base_path": "/ovms/src/test/dummy",
+                "nireq": 47
+        }
+        }
+    ],
+    "mediapipe_config_list": [
+    {
+        "name":"pipeline1Dummy",
+        "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames.pbtxt"
+    }
+    ]
+})";
+
+static const std::string basicMediapipeQueueConfigWithNewGraphPath = R"({
+    "model_config_list": [
+        {"config": {
+                "name": "dummy",
+                "base_path": "/ovms/src/test/dummy"
+        }
+        }
+    ],
+    "mediapipe_config_list": [
+    {
+        "name":"pipeline1Dummy",
+        "graph_path":"/ovms/src/test/mediapipe/graph_queue_dummyadapterfull_dummyinputnames_newpath.pbtxt"
     }
     ]
 })";
@@ -1094,9 +1187,16 @@ static void mediacreate(std::unique_ptr<MediapipeGraphExecutor>& executorPtr, ov
         sc = static_cast<StatusCode>(code);                   \
     }
 
+enum StressTimerSlot : unsigned int {
+    STRESS_LOOP,
+    CREATE,
+    EXECUTE,
+    TIMER_END
+};
+
 class ConfigChangeStressTest : public TestWithTempDir {
 protected:
-    const uint32_t loadThreadCount = 20;
+    const uint32_t loadThreadCount = 16;
     const uint32_t beforeConfigChangeLoadTimeMs = 30;
     const uint32_t afterConfigChangeLoadTimeMs = 50;
     const int stressIterationsLimit = 10000;
@@ -1291,6 +1391,12 @@ class ConfigChangeStressTest : public TestWithTempDir {
         createConfigFileWithContent(ovmsConfig, configFilePath);
         SPDLOG_INFO("{} end", __FUNCTION__);
     }
+    void addNewMediapipeQueueGraph() {
+        SPDLOG_INFO("{} start", __FUNCTION__);
+        SetUpConfig(basicMediapipeQueueConfigWithAddedGraph);
+        createConfigFileWithContent(ovmsConfig, configFilePath);
+        SPDLOG_INFO("{} end", __FUNCTION__);
+    }
     void removeMediapipeGraph() {
         SPDLOG_INFO("{} start", __FUNCTION__);
         SetUpConfig(basicMediapipeConfigWithRemovedGraph);
@@ -1315,6 +1421,30 @@ class ConfigChangeStressTest : public TestWithTempDir {
         createConfigFileWithContent(ovmsConfig, configFilePath);
         SPDLOG_INFO("{} end", __FUNCTION__);
     }
+    void removeMediapipeQueueGraph() {
+        SPDLOG_INFO("{} start", __FUNCTION__);
+        SetUpConfig(basicMediapipeQueueConfigWithRemovedGraph);
+        createConfigFileWithContent(ovmsConfig, configFilePath);
+        SPDLOG_INFO("{} end", __FUNCTION__);
+    }
+    void removeMediapipeQueueGraphUsedModel() {
+        SPDLOG_INFO("{} start", __FUNCTION__);
+        SetUpConfig(basicMediapipeQueueConfigWithRemovedModel);
+        createConfigFileWithContent(ovmsConfig, configFilePath);
+        SPDLOG_INFO("{} end", __FUNCTION__);
+    }
+    void reloadMediapipeQueueGraphUsedModel() {
+        SPDLOG_INFO("{} start", __FUNCTION__);
+        SetUpConfig(basicMediapipeQueueConfigWithReloadedModel);
+        createConfigFileWithContent(ovmsConfig, configFilePath);
+        SPDLOG_INFO("{} end", __FUNCTION__);
+    }
+    void reloadMediapipeQueueGraph() {
+        SPDLOG_INFO("{} start", __FUNCTION__);
+        SetUpConfig(basicMediapipeQueueConfigWithNewGraphPath);
+        createConfigFileWithContent(ovmsConfig, configFilePath);
+        SPDLOG_INFO("{} end", __FUNCTION__);
+    }
     void checkMetricGreaterThan(const std::string& metricName, double value, std::string& metricOutput, bool& result) {
         ASSERT_THAT(metricOutput, ::testing::HasSubstr(metricName + std::string{"{name=\"dummy\",version=\"1\"} "})) << "cannot find dummys " << metricName << " metric\n"
                                                                                                                      << metricOutput;
@@ -1706,6 +1836,8 @@ class ConfigChangeStressTest : public TestWithTempDir {
         auto stressIterationsCounter = stressIterationsLimit;
         bool breakLoop = false;
         while (stressIterationsCounter-- > 0) {
+            ovms::Timer<TIMER_END> timer;
+            timer.start(STRESS_LOOP);
             auto futureWaitResult = stopSignal.wait_for(std::chrono::milliseconds(0));
             if (true == breakLoop) {
                 SPDLOG_INFO("Ending Load");
@@ -1725,6 +1857,7 @@ class ConfigChangeStressTest : public TestWithTempDir {
             RequestType request2;
             RequestType request = preparePipelinePredictRequest(request2);
             ovms::Status createPipelineStatus = StatusCode::UNKNOWN_ERROR;
+            timer.start(CREATE);
             if (typeid(ServableType) == typeid(ovms::Pipeline)) {
                 createPipelineStatus = this->manager->createPipeline(pipelinePtr, pipelineName, &request, &response);
 #if (MEDIAPIPE_DISABLE == 0)
@@ -1732,6 +1865,8 @@ class ConfigChangeStressTest : public TestWithTempDir {
                 mediacreate(executorPtr, *(this->manager), request, response, createPipelineStatus);
 #endif
             }
+            timer.stop(CREATE);
+            SPDLOG_TRACE("XYZ creation time: {} us", timer.elapsed<std::chrono::microseconds>(CREATE));
             // we need to make sure that expected status happened and still accept
             // some that could happen but we may not hit them
             EXPECT_TRUE((requiredLoadResults.find(createPipelineStatus.getCode()) != requiredLoadResults.end()) ||
@@ -1743,6 +1878,7 @@ class ConfigChangeStressTest : public TestWithTempDir {
             }
 
             ovms::Status executePipelineStatus = StatusCode::UNKNOWN_ERROR;
+            timer.start(EXECUTE);
             if (typeid(ServableType) == typeid(ovms::Pipeline)) {
                 executePipelineStatus = pipelinePtr->execute(ovms::ExecutionContext(
                     ovms::ExecutionContext::Interface::GRPC,
@@ -1752,6 +1888,7 @@ class ConfigChangeStressTest : public TestWithTempDir {
                 mediaexec(executorPtr, *(this->manager), request, response, executePipelineStatus);
 #endif
             }
+            timer.stop(EXECUTE);
             createPipelineRetCodesCounters[executePipelineStatus.getCode()]++;
             EXPECT_TRUE((requiredLoadResults.find(executePipelineStatus.getCode()) != requiredLoadResults.end()) ||
                         (allowedLoadResults.find(executePipelineStatus.getCode()) != allowedLoadResults.end()))
@@ -1763,6 +1900,7 @@ class ConfigChangeStressTest : public TestWithTempDir {
                 SPDLOG_INFO("Earlier fail detected. Stopping execution");
                 break;
             }
+            timer.stop(STRESS_LOOP);
         }
         for (auto& [retCode, counter] : createPipelineRetCodesCounters) {
             if (counter > 0) {
diff --git a/src/test/test_utils.hpp b/src/test/test_utils.hpp
index 879ab1313e..8a1e7dfd19 100644
--- a/src/test/test_utils.hpp
+++ b/src/test/test_utils.hpp
@@ -816,8 +816,8 @@ class DummyMediapipeGraphDefinition : public ovms::MediapipeGraphDefinition {
     std::string inputConfig;
 #if (PYTHON_DISABLE == 0)
     ovms::PythonNodeResources* getPythonNodeResources(const std::string& nodeName) {
-        auto it = this->sidePacketMaps.pythonNodeResourcesMap.find(nodeName);
-        if (it == std::end(this->sidePacketMaps.pythonNodeResourcesMap)) {
+        auto it = this->sidePacketMaps->pythonNodeResourcesMap.find(nodeName);
+        if (it == std::end(this->sidePacketMaps->pythonNodeResourcesMap)) {
             return nullptr;
         } else {
             return it->second.get();
@@ -826,8 +826,8 @@ class DummyMediapipeGraphDefinition : public ovms::MediapipeGraphDefinition {
 #endif
 
     ovms::GenAiServable* getGenAiServable(const std::string& nodeName) {
-        auto it = this->sidePacketMaps.genAiServableMap.find(nodeName);
-        if (it == std::end(this->sidePacketMaps.genAiServableMap)) {
+        auto it = this->sidePacketMaps->genAiServableMap.find(nodeName);
+        if (it == std::end(this->sidePacketMaps->genAiServableMap)) {
             return nullptr;
         } else {
             return it->second.get();
@@ -838,13 +838,15 @@ class DummyMediapipeGraphDefinition : public ovms::MediapipeGraphDefinition {
         return this->validateForConfigLoadableness();
     }
 
-    ovms::GenAiServableMap& getGenAiServableMap() { return this->sidePacketMaps.genAiServableMap; }
+    ovms::GenAiServableMap& getGenAiServableMap() { return this->sidePacketMaps->genAiServableMap; }
 
     DummyMediapipeGraphDefinition(const std::string name,
         const ovms::MediapipeGraphConfig& config,
         std::string inputConfig,
         ovms::PythonBackend* pythonBackend = nullptr) :
-        ovms::MediapipeGraphDefinition(name, config, nullptr, nullptr, pythonBackend) { this->inputConfig = inputConfig; }
+        ovms::MediapipeGraphDefinition(name, config, nullptr, nullptr, pythonBackend) {
+        this->inputConfig = inputConfig;
+    }
 
     // Do not read from path - use predefined config contents
     ovms::Status validateForConfigFileExistence() override {