From bacfdb1b38ed7ee3df239f83138068715d67c908 Mon Sep 17 00:00:00 2001 From: Michelangelo Partipilo Date: Mon, 18 May 2026 12:56:54 +0200 Subject: [PATCH 1/2] Add text2vec-digitalocean vectorizer module Adds support for the new text2vec-digitalocean vectorizer. The module shape mirrors text2vec-mistral exactly (model + baseURL + vectorizeClassName), so the existing serialization path is reused unchanged. Closes #339 Co-Authored-By: Claude Opus 4.7 (1M context) --- .../Unit/TestVectorizers.cs | 79 +++++++++++++++++++ .../Configure/VectorizerFactory.cs | 21 +++++ src/Weaviate.Client/Models/Vectorizer.cs | 33 ++++++++ src/Weaviate.Client/PublicAPI.Unshipped.txt | 19 +++++ 4 files changed, 152 insertions(+) diff --git a/src/Weaviate.Client.Tests/Unit/TestVectorizers.cs b/src/Weaviate.Client.Tests/Unit/TestVectorizers.cs index a27dfa09..75dd301d 100644 --- a/src/Weaviate.Client.Tests/Unit/TestVectorizers.cs +++ b/src/Weaviate.Client.Tests/Unit/TestVectorizers.cs @@ -413,4 +413,83 @@ public void Test_Multi2VecGoogleGemini_Serializes_AudioFields_WeightedFields() Assert.Contains("\"textFields\"", json); Assert.Contains("\"videoFields\"", json); } + + /// + /// Tests that Text2VecDigitalOcean serializes baseURL and model correctly under the + /// text2vec-digitalocean module key. + /// + [Fact] + [System.Diagnostics.CodeAnalysis.SuppressMessage( + "Performance", + "CA1869:Cache and reuse 'JsonSerializerOptions' instances", + Justification = "" + )] + public void Test_Text2VecDigitalOcean_Serializes_BaseURL_And_Model() + { + // Arrange + var vc = Configure.Vector( + "default", + v => + v.Text2VecDigitalOcean( + baseURL: "https://inference.do-ai.run", + model: "qwen3-embedding-0.6b", + vectorizeCollectionName: false + ) + ); + + // Act + var dto = vc.Vectorizer?.ToDto() ?? default; + var json = JsonSerializer.Serialize( + dto, + new JsonSerializerOptions + { + PropertyNamingPolicy = JsonNamingPolicy.CamelCase, + WriteIndented = false, + } + ); + + // Assert + Assert.Contains("\"text2vec-digitalocean\"", json); + Assert.Contains("\"baseURL\":\"https://inference.do-ai.run\"", json); + Assert.Contains("\"model\":\"qwen3-embedding-0.6b\"", json); + Assert.Contains("\"vectorizeClassName\":false", json); + } + + /// + /// Tests that Text2VecDigitalOcean omits unset optional fields so the server can apply + /// its defaults (no baseURL, no model). + /// + [Fact] + [System.Diagnostics.CodeAnalysis.SuppressMessage( + "Performance", + "CA1869:Cache and reuse 'JsonSerializerOptions' instances", + Justification = "" + )] + public void Test_Text2VecDigitalOcean_Omits_Unset_Optionals() + { + // Arrange + var vc = Configure.Vector("default", v => v.Text2VecDigitalOcean()); + + // Act + var dto = vc.Vectorizer?.ToDto() ?? default; + var json = JsonSerializer.Serialize( + dto, + new JsonSerializerOptions + { + PropertyNamingPolicy = JsonNamingPolicy.CamelCase, + DefaultIgnoreCondition = System + .Text + .Json + .Serialization + .JsonIgnoreCondition + .WhenWritingNull, + WriteIndented = false, + } + ); + + // Assert + Assert.Contains("\"text2vec-digitalocean\"", json); + Assert.DoesNotContain("\"baseURL\"", json); + Assert.DoesNotContain("\"model\"", json); + } } diff --git a/src/Weaviate.Client/Configure/VectorizerFactory.cs b/src/Weaviate.Client/Configure/VectorizerFactory.cs index 240f3c22..59fb54f2 100644 --- a/src/Weaviate.Client/Configure/VectorizerFactory.cs +++ b/src/Weaviate.Client/Configure/VectorizerFactory.cs @@ -750,6 +750,27 @@ public VectorizerConfig Text2VecMistral( VectorizeCollectionName = vectorizeCollectionName, }; + /// + /// Creates a configuration for the text2vec-digitalocean vectorizer. + /// See the documentation + /// for detailed usage. + /// + /// The base URL where API requests should go. Defaults to null, which uses the server-defined default of https://inference.do-ai.run. + /// The model to use, e.g. qwen3-embedding-0.6b. Required by the server. + /// Whether to vectorize the collection name. + /// The vectorizer config + public VectorizerConfig Text2VecDigitalOcean( + string? baseURL = null, + string? model = null, + bool? vectorizeCollectionName = null + ) => + new Text2VecDigitalOcean + { + BaseURL = baseURL, + Model = model, + VectorizeCollectionName = vectorizeCollectionName, + }; + /// /// Texts the 2 vec model 2 vec using the specified inference url /// diff --git a/src/Weaviate.Client/Models/Vectorizer.cs b/src/Weaviate.Client/Models/Vectorizer.cs index 12510a98..e27ce4f9 100644 --- a/src/Weaviate.Client/Models/Vectorizer.cs +++ b/src/Weaviate.Client/Models/Vectorizer.cs @@ -1064,6 +1064,39 @@ internal Text2VecMistral() { } public bool? VectorizeCollectionName { get; set; } = null; } + /// + /// The configuration for text vectorization using the DigitalOcean module. + /// See the documentation + /// for detailed usage. + /// + [Vectorizer("text2vec-digitalocean")] + public record Text2VecDigitalOcean : VectorizerConfig + { + /// + /// Initializes a new instance of the class + /// + [JsonConstructor] + internal Text2VecDigitalOcean() { } + + /// + /// Gets or sets the base URL where API requests should go. + /// Defaults to null, which uses the server-defined default of https://inference.do-ai.run. + /// + [JsonPropertyName("baseURL")] + public string? BaseURL { get; set; } = null; + + /// + /// Gets or sets the model to use, e.g. qwen3-embedding-0.6b. Required by the server. + /// + public string? Model { get; set; } = null; + + /// + /// Gets or sets the value of the vectorize collection name + /// + [JsonPropertyName("vectorizeClassName")] + public bool? VectorizeCollectionName { get; set; } = null; + } + /// /// The configuration for text vectorization using the Model2Vec module. /// See the documentation for detailed usage. diff --git a/src/Weaviate.Client/PublicAPI.Unshipped.txt b/src/Weaviate.Client/PublicAPI.Unshipped.txt index 7dc5c581..64a605ff 100644 --- a/src/Weaviate.Client/PublicAPI.Unshipped.txt +++ b/src/Weaviate.Client/PublicAPI.Unshipped.txt @@ -1 +1,20 @@ #nullable enable +override sealed Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean.Equals(Weaviate.Client.Models.VectorizerConfig? other) -> bool +override Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean.$() -> Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean! +override Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean.EqualityContract.get -> System.Type! +override Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean.Equals(object? obj) -> bool +override Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean.GetHashCode() -> int +override Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean.PrintMembers(System.Text.StringBuilder! builder) -> bool +override Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean.ToString() -> string! +static Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean.operator !=(Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean? left, Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean? right) -> bool +static Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean.operator ==(Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean? left, Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean? right) -> bool +virtual Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean.Equals(Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean? other) -> bool +Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean +Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean.BaseURL.get -> string? +Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean.BaseURL.set -> void +Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean.Model.get -> string? +Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean.Model.set -> void +Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean.Text2VecDigitalOcean(Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean! original) -> void +Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean.VectorizeCollectionName.get -> bool? +Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean.VectorizeCollectionName.set -> void +Weaviate.Client.VectorizerFactory.Text2VecDigitalOcean(string? baseURL = null, string? model = null, bool? vectorizeCollectionName = null) -> Weaviate.Client.Models.VectorizerConfig! From 0885c2ece21a8b7254b1b1a365b10eaa79610c1b Mon Sep 17 00:00:00 2001 From: Michelangelo Partipilo Date: Mon, 18 May 2026 14:07:47 +0200 Subject: [PATCH 2/2] Make `model` required on Text2VecDigitalOcean factory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The server requires `model` (e.g. `qwen3-embedding-0.6b`); the factory should require it too rather than silently sending a payload the server will reject. - `VectorizerFactory.Text2VecDigitalOcean(string model, string? baseURL = null, bool? vectorizeCollectionName = null)` — `model` is now required and reordered to the first position, since C# requires non-default params before default params. - `PublicAPI.Unshipped.txt` updated to reflect the new signature (`string! model` first). - `Test_Text2VecDigitalOcean_Omits_Unset_Optionals` renamed to `Test_Text2VecDigitalOcean_Omits_Unset_BaseURL`. It now passes `model` and only asserts that `baseURL` is omitted when unset. The old "model is also omitted when unset" assertion was inconsistent with the spec (`model` is required by the server) and would no longer be reachable through the factory anyway. The `Text2VecDigitalOcean` record's `Model` property remains nullable at the type level (matching the `Text2VecMistral` record); required-ness is enforced at the factory entry point, which is the documented user-facing API. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/Weaviate.Client.Tests/Unit/TestVectorizers.cs | 14 +++++++++----- src/Weaviate.Client/Configure/VectorizerFactory.cs | 4 ++-- src/Weaviate.Client/PublicAPI.Unshipped.txt | 2 +- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/Weaviate.Client.Tests/Unit/TestVectorizers.cs b/src/Weaviate.Client.Tests/Unit/TestVectorizers.cs index 75dd301d..4ecbb419 100644 --- a/src/Weaviate.Client.Tests/Unit/TestVectorizers.cs +++ b/src/Weaviate.Client.Tests/Unit/TestVectorizers.cs @@ -431,8 +431,8 @@ public void Test_Text2VecDigitalOcean_Serializes_BaseURL_And_Model() "default", v => v.Text2VecDigitalOcean( - baseURL: "https://inference.do-ai.run", model: "qwen3-embedding-0.6b", + baseURL: "https://inference.do-ai.run", vectorizeCollectionName: false ) ); @@ -457,7 +457,8 @@ public void Test_Text2VecDigitalOcean_Serializes_BaseURL_And_Model() /// /// Tests that Text2VecDigitalOcean omits unset optional fields so the server can apply - /// its defaults (no baseURL, no model). + /// its defaults (no baseURL). model is required by the factory so it is + /// always present. /// [Fact] [System.Diagnostics.CodeAnalysis.SuppressMessage( @@ -465,10 +466,13 @@ public void Test_Text2VecDigitalOcean_Serializes_BaseURL_And_Model() "CA1869:Cache and reuse 'JsonSerializerOptions' instances", Justification = "" )] - public void Test_Text2VecDigitalOcean_Omits_Unset_Optionals() + public void Test_Text2VecDigitalOcean_Omits_Unset_BaseURL() { // Arrange - var vc = Configure.Vector("default", v => v.Text2VecDigitalOcean()); + var vc = Configure.Vector( + "default", + v => v.Text2VecDigitalOcean(model: "qwen3-embedding-0.6b") + ); // Act var dto = vc.Vectorizer?.ToDto() ?? default; @@ -489,7 +493,7 @@ public void Test_Text2VecDigitalOcean_Omits_Unset_Optionals() // Assert Assert.Contains("\"text2vec-digitalocean\"", json); + Assert.Contains("\"model\":\"qwen3-embedding-0.6b\"", json); Assert.DoesNotContain("\"baseURL\"", json); - Assert.DoesNotContain("\"model\"", json); } } diff --git a/src/Weaviate.Client/Configure/VectorizerFactory.cs b/src/Weaviate.Client/Configure/VectorizerFactory.cs index 59fb54f2..c4bf056d 100644 --- a/src/Weaviate.Client/Configure/VectorizerFactory.cs +++ b/src/Weaviate.Client/Configure/VectorizerFactory.cs @@ -755,13 +755,13 @@ public VectorizerConfig Text2VecMistral( /// See the documentation /// for detailed usage. /// - /// The base URL where API requests should go. Defaults to null, which uses the server-defined default of https://inference.do-ai.run. /// The model to use, e.g. qwen3-embedding-0.6b. Required by the server. + /// The base URL where API requests should go. Defaults to null, which uses the server-defined default of https://inference.do-ai.run. /// Whether to vectorize the collection name. /// The vectorizer config public VectorizerConfig Text2VecDigitalOcean( + string model, string? baseURL = null, - string? model = null, bool? vectorizeCollectionName = null ) => new Text2VecDigitalOcean diff --git a/src/Weaviate.Client/PublicAPI.Unshipped.txt b/src/Weaviate.Client/PublicAPI.Unshipped.txt index 64a605ff..7fae55fa 100644 --- a/src/Weaviate.Client/PublicAPI.Unshipped.txt +++ b/src/Weaviate.Client/PublicAPI.Unshipped.txt @@ -17,4 +17,4 @@ Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean.Model.set -> void Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean.Text2VecDigitalOcean(Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean! original) -> void Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean.VectorizeCollectionName.get -> bool? Weaviate.Client.Models.Vectorizer.Text2VecDigitalOcean.VectorizeCollectionName.set -> void -Weaviate.Client.VectorizerFactory.Text2VecDigitalOcean(string? baseURL = null, string? model = null, bool? vectorizeCollectionName = null) -> Weaviate.Client.Models.VectorizerConfig! +Weaviate.Client.VectorizerFactory.Text2VecDigitalOcean(string! model, string? baseURL = null, bool? vectorizeCollectionName = null) -> Weaviate.Client.Models.VectorizerConfig!