openvinotoolkit · atobiszei · Mar 2, 2026 · Mar 9, 2026 · Mar 11, 2026 · Mar 13, 2026
diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
@@ -65,7 +65,10 @@ When analyzing a Pull Request, follow this protocol:
     - **Keep headers self-contained but minimal**: each header must compile on its own, but should not pull in transitive dependencies that callers don't need.
     - **Prefer opaque types / Pimpl**: for complex implementation details, consider the Pimpl idiom to keep implementation-only types out of the public header entirely.
     - **Never include a header solely for a typedef or enum**: forward-declare the enum (`enum class Foo;` in C++17) or relocate the typedef to a lightweight `fwd.hpp`-style header.
-13. Be mindful when accepting `const T&` in constructors or functions that store the reference: verify that the referenced object's lifetime outlives the usage to avoid dangling references.
+13. **No dangling references or temporaries bound to `const T&`**:
+    - Never use `const T&` parameters with default arguments that construct temporaries (e.g. `const std::string& param = ""`). This binds a reference to a temporary — use a function overload instead, or pass by value.
+    - When accepting `const T&` in constructors or functions that store the reference, verify that the referenced object's lifetime outlives the usage to avoid dangling references.
+    - Prefer overloads over default arguments for non-trivial types passed by reference.
 
 ## Build System
 

diff --git a/demos/common/export_models/export_model.py b/demos/common/export_models/export_model.py
@@ -86,6 +86,11 @@ def add_common_arguments(parser):
 parser_image_generation.add_argument('--max_num_images_per_prompt', type=int, default=0, help='Max allowed number of images client is allowed to request for a given prompt', dest='max_num_images_per_prompt')
 parser_image_generation.add_argument('--default_num_inference_steps', type=int, default=0, help='Default number of inference steps when not specified by client', dest='default_num_inference_steps')
 parser_image_generation.add_argument('--max_num_inference_steps', type=int, default=0, help='Max allowed number of inference steps client is allowed to request for a given prompt', dest='max_num_inference_steps')
+parser_image_generation.add_argument('--source_loras', default=None,
+    help='LoRA adapters to apply. Format: alias1=org1/repo1,alias2=org2/repo2@lora_file.safetensors '
+         'where @filename is optional and specifies which .safetensors file to use from the downloaded repo '
+         '(auto-detected when repo contains exactly one). Only for image_generation task.',
+    dest='source_loras')
 
 parser_text2speech = subparsers.add_parser('text2speech', help='export model for text2speech endpoint')
 add_common_arguments(parser_text2speech)
@@ -323,6 +328,9 @@ def add_common_arguments(parser):
       default_num_inference_steps: {{default_num_inference_steps}},{% endif %}
       {%- if max_num_inference_steps > 0 %}
       max_num_inference_steps: {{max_num_inference_steps}},{% endif %}
+      {%- for lora in lora_adapters %}
+      lora_adapters { alias: "{{lora.alias}}" path: "{{lora.path}}" }
+      {%- endfor %}
     }
   }
 }"""
@@ -600,7 +608,7 @@ def export_rerank_model(model_repository_path, source_model, model_name, precisi
     add_servable_to_config(config_file_path, model_name, os.path.relpath(os.path.join(model_repository_path, model_name), os.path.dirname(config_file_path)))
 
 
-def export_image_generation_model(model_repository_path, source_model, model_name, precision, task_parameters, config_file_path, num_streams):
+def export_image_generation_model(model_repository_path, source_model, model_name, precision, task_parameters, config_file_path, num_streams, source_loras):
     model_path = "./"
     target_path = os.path.join(model_repository_path, model_name)
     model_index_path = os.path.join(target_path, 'model_index.json')
@@ -613,6 +621,41 @@ def export_image_generation_model(model_repository_path, source_model, model_nam
         if os.system(optimum_command):
             raise ValueError("Failed to export image generation model", source_model)
 
+    # Download and resolve LoRA adapters
+    lora_adapters = []
+    if source_loras:
+        from huggingface_hub import snapshot_download
+        entries = source_loras.split(',')
+        for entry in entries:
+            entry = entry.strip()
+            if '=' in entry:
+                alias, repo_and_file = entry.split('=', 1)
+            else:
+                repo_and_file = entry
+                alias = entry.split('/')[-1] if '/' in entry else entry
+            safetensors_file = ''
+            if '@' in repo_and_file:
+                repo, safetensors_file = repo_and_file.rsplit('@', 1)
+            else:
+                repo = repo_and_file
+            lora_dir = os.path.join(target_path, 'loras', repo)
+            if not os.path.isdir(lora_dir):
+                print(f"Downloading LoRA adapter: {repo} to {lora_dir}")
+                snapshot_download(repo_id=repo, local_dir=lora_dir)
+            else:
+                print(f"LoRA adapter directory already exists: {lora_dir}")
+            if not safetensors_file:
+                st_files = [f for f in os.listdir(lora_dir) if f.endswith('.safetensors')]
+                if len(st_files) == 0:
+                    raise ValueError(f"No .safetensors files found in LoRA adapter: {repo}")
+                if len(st_files) > 1:
+                    raise ValueError(f"Multiple .safetensors files in LoRA adapter: {repo}. Use @filename to specify.")
+                safetensors_file = st_files[0]
+            lora_path = 'loras/' + repo + '/' + safetensors_file
+            lora_adapters.append({'alias': alias, 'path': lora_path})
+            print(f"LoRA adapter: {alias} -> {lora_path}")
+    task_parameters['lora_adapters'] = lora_adapters
+
     plugin_config = {}
     assert num_streams >= 0, "num_streams should be a non-negative integer"
     if num_streams > 0:
@@ -695,4 +738,4 @@ def export_image_generation_model(model_repository_path, source_model, model_nam
         'max_num_inference_steps',
         'extra_quantization_params'
     ]}
-    export_image_generation_model(args['model_repository_path'], args['source_model'], args['model_name'], args['precision'], template_parameters, args['config_file_path'], args['num_streams'])
+    export_image_generation_model(args['model_repository_path'], args['source_model'], args['model_name'], args['precision'], template_parameters, args['config_file_path'], args['num_streams'], args['source_loras'])
diff --git a/demos/image_generation/README.md b/demos/image_generation/README.md
@@ -528,6 +528,12 @@ Output file (`edit_output.png`):
 
 Inpainting replaces a masked region in an image based on the prompt. The `mask` is a black-and-white image where white pixels mark the area to repaint.
 
+Download sample images:
+```console
+curl -O https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/image_generation/cat.png
+curl -O https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/image_generation/cat_mask.png
+```
+
 ![cat](./cat.png) ![cat_mask](./cat_mask.png)
 
 ::::{tab-set}
@@ -599,6 +605,12 @@ Outpainting extends an image beyond its original borders. Prepare two images:
 - **outpaint_input.png** — the original image centered on a larger canvas (e.g. 768×768) with black borders
 - **outpaint_mask.png** — white where the new content should be generated (the borders), black where the original image is
 
+Download sample images:
+```console
+curl -O https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/image_generation/outpaint_input.png
+curl -O https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/image_generation/outpaint_mask.png
+```
+
 ![outpaint_input](./outpaint_input.png) ![outpaint_mask](./outpaint_mask.png)
 
 ::::{tab-set}

diff --git a/src/BUILD b/src/BUILD
@@ -3067,6 +3067,7 @@ cc_library(
         ":test_light_test_utils",
         ":test_test_with_temp_dir",
         "//src/graph_export:graph_export",
+        "//src/graph_export:image_generation_graph_cli_parser",
         "//src:libovms_server_settings",
         "@com_google_googletest//:gtest",
     ],

diff --git a/src/capi_frontend/server_settings.hpp b/src/capi_frontend/server_settings.hpp
@@ -143,6 +143,29 @@ struct RerankGraphSettingsImpl {
     uint64_t maxAllowedChunks = 10000;
 };
 
+enum class LoraSourceType {
+    HF_REPO,
+    DIRECT_URL,
+    LOCAL_FILE
+};
+
+struct LoraAdapterSettings {
+    std::string alias;
+    std::string sourceLora;       // HF repo, direct URL, or local file path
+    std::string safetensorsFile;  // resolved filename, empty = auto-detect (HF only)
+    LoraSourceType sourceType = LoraSourceType::HF_REPO;
+};
+
+struct CompositeLoraComponent {
+    std::string adapterAlias;  // references a LoraAdapterSettings alias
+    float weight = 1.0f;
+};
+
+struct CompositeLoraSettings {
+    std::string alias;
+    std::vector<CompositeLoraComponent> components;
+};
+
 struct ImageGenerationGraphSettingsImpl {
     std::string resolution = "";
     std::string maxResolution = "";
@@ -152,6 +175,8 @@ struct ImageGenerationGraphSettingsImpl {
     std::optional<uint32_t> maxNumberImagesPerPrompt;
     std::optional<uint32_t> defaultNumInferenceSteps;
     std::optional<uint32_t> maxNumInferenceSteps;
+    std::vector<LoraAdapterSettings> loraAdapters;
+    std::vector<CompositeLoraSettings> compositeLoraAdapters;
 };
 
 struct ExportSettings {
@@ -169,6 +194,7 @@ struct HFSettingsImpl {
     std::string sourceModel = "";
     std::optional<std::string> ggufFilename;
     std::string downloadPath = "";
+    std::string sourceLoras = "";  // raw --source_loras value, parsed by image gen CLI parser
     bool overwriteModels = false;
     ModelDownlaodType downloadType = GIT_CLONE_DOWNLOAD;
     GraphExportType task = TEXT_GENERATION_GRAPH;

diff --git a/src/cli_parser.cpp b/src/cli_parser.cpp
@@ -115,11 +115,11 @@ std::variant<bool, std::pair<int, std::string>> CLIParser::parse(int argc, char*
                 cxxopts::value<std::string>(), "GRPC_CHANNEL_ARGUMENTS")
             ("file_system_poll_wait_seconds",
                 "Time interval between config and model versions changes detection. Default is 1. Zero or negative value disables changes monitoring.",
-                cxxopts::value<uint32_t>()->default_value("1"),
+                cxxopts::value<uint32_t>()->default_value("0"),
                 "FILE_SYSTEM_POLL_WAIT_SECONDS")
             ("sequence_cleaner_poll_wait_minutes",
                 "Time interval between two consecutive sequence cleanup scans. Default is 5. Zero value disables sequence cleaner. It also sets the schedule for releasing free memory from the heap.",
-                cxxopts::value<uint32_t>()->default_value("5"),
+                cxxopts::value<uint32_t>()->default_value("0"),
                 "SEQUENCE_CLEANER_POLL_WAIT_MINUTES")
             ("custom_node_resources_cleaner_interval_seconds",
                 "Time interval between two consecutive resources cleanup scans. Default is 300. Zero value disables resources cleaner.",
@@ -213,6 +213,10 @@ std::variant<bool, std::pair<int, std::string>> CLIParser::parse(int argc, char*
             "HF source model path",
             cxxopts::value<std::string>(),
             "HF_SOURCE")
+            ("source_loras",
+            "LoRA adapters for image generation. Format: alias1=org1/repo1,alias2=org2/repo2@file.safetensors,alias3=https://url/file.safetensors,alias4=/local/path/file.safetensors",
+            cxxopts::value<std::string>(),
+            "SOURCE_LORAS")
             ("gguf_filename",
             "Name of the GGUF file",
             cxxopts::value<std::string>(),
@@ -715,6 +719,9 @@ void CLIParser::prepareGraph(ServerSettingsImpl& serverSettings, HFSettingsImpl&
         } else if (result->count("model_name")) {
             hfSettings.sourceModel = result->operator[]("model_name").as<std::string>();
         }
+        if (result->count("source_loras")) {
+            hfSettings.sourceLoras = result->operator[]("source_loras").as<std::string>();
+        }
         if ((result->count("weight-format") || result->count("extra_quantization_params")) && isOptimumCliDownload(hfSettings.sourceModel, hfSettings.ggufFilename)) {
             hfSettings.downloadType = OPTIMUM_CLI_DOWNLOAD;
         }

diff --git a/src/graph_export/graph_export.cpp b/src/graph_export/graph_export.cpp
@@ -467,6 +467,37 @@ node: {
           max_num_inference_steps: )" << graphSettings.maxNumInferenceSteps.value();
     }
 
+    for (const auto& adapter : graphSettings.loraAdapters) {
+        std::string loraPath;
+        if (adapter.sourceType == LoraSourceType::LOCAL_FILE) {
+            loraPath = adapter.sourceLora;
+        } else if (adapter.sourceType == LoraSourceType::HF_REPO) {
+            loraPath = "loras/" + adapter.sourceLora + "/" + adapter.safetensorsFile;
+        } else {  // cURL direct link
+            loraPath = "loras/" + adapter.alias + "/" + adapter.safetensorsFile;
+        }
+        oss << R"(
+          lora_adapters { alias: ")" << adapter.alias << R"(" path: ")" << loraPath << R"(")";
+        // Only omit alpha when default (1.0) - let proto handle it
+        oss << R"( })";
+    }
+
+    for (const auto& composite : graphSettings.compositeLoraAdapters) {
+        oss << R"(
+          composite_lora_adapters {
+            alias: ")" << composite.alias << R"("
+)";
+        for (const auto& component : composite.components) {
+            oss << R"(            components { adapter_alias: ")" << component.adapterAlias << R"(")";
+            if (component.weight != 1.0f) {
+                oss << R"( weight: )" << component.weight;
+            }
+            oss << R"( }
+)";
+        }
+        oss << R"(          })";
+    }
+
     oss << R"(
       }
   }