From 1b524ebd1092ad0663c73090f9a5252012b52d0f Mon Sep 17 00:00:00 2001 From: Michelangelo Partipilo Date: Mon, 18 May 2026 12:56:05 +0200 Subject: [PATCH 1/2] Add text2vec-digitalocean vectorizer module Adds support for the new text2vec-digitalocean vectorizer. The module shape mirrors text2vec-mistral exactly (model + baseURL + vectorizeClassName), so the existing serialization path is reused unchanged. Closes #569 Co-Authored-By: Claude Opus 4.7 (1M context) --- .../v1/api/collections/VectorConfig.java | 48 +++++++ .../Text2VecDigitalOceanVectorizer.java | 132 ++++++++++++++++++ .../client6/v1/internal/json/JSONTest.java | 49 +++++++ 3 files changed, 229 insertions(+) create mode 100644 src/main/java/io/weaviate/client6/v1/api/collections/vectorizers/Text2VecDigitalOceanVectorizer.java diff --git a/src/main/java/io/weaviate/client6/v1/api/collections/VectorConfig.java b/src/main/java/io/weaviate/client6/v1/api/collections/VectorConfig.java index cae805373..44eb595f6 100644 --- a/src/main/java/io/weaviate/client6/v1/api/collections/VectorConfig.java +++ b/src/main/java/io/weaviate/client6/v1/api/collections/VectorConfig.java @@ -36,6 +36,7 @@ import io.weaviate.client6.v1.api.collections.vectorizers.Text2VecGoogleVectorizer; import io.weaviate.client6.v1.api.collections.vectorizers.Text2VecHuggingFaceVectorizer; import io.weaviate.client6.v1.api.collections.vectorizers.Text2VecJinaAiVectorizer; +import io.weaviate.client6.v1.api.collections.vectorizers.Text2VecDigitalOceanVectorizer; import io.weaviate.client6.v1.api.collections.vectorizers.Text2VecMistralVectorizer; import io.weaviate.client6.v1.api.collections.vectorizers.Text2VecModel2VecVectorizer; import io.weaviate.client6.v1.api.collections.vectorizers.Text2VecMorphVectorizer; @@ -59,6 +60,7 @@ public enum Kind implements JsonEnum { TEXT2VEC_HUGGINGFACE("text2vec-huggingface"), REF2VEC_CENTROID("ref2vec-centroid"), TEXT2VEC_JINAAI("text2vec-jinaai"), + TEXT2VEC_DIGITALOCEAN("text2vec-digitalocean"), TEXT2VEC_MISTRAL("text2vec-mistral"), TEXT2VEC_MORPH("text2vec-morph"), TEXT2VEC_MODEL2VEC("text2vec-model2vec"), @@ -1053,6 +1055,41 @@ public static Map.Entry text2vecJinaAi(String vectorName, return Map.entry(vectorName, Text2VecJinaAiVectorizer.of(fn)); } + /** Create a vector index with an {@code text2vec-digitalocean} vectorizer. */ + public static Map.Entry text2vecDigitalOcean() { + return text2vecDigitalOcean(VectorIndex.DEFAULT_VECTOR_NAME); + } + + /** + * Create a vector index with an {@code text2vec-digitalocean} vectorizer. + * + * @param fn Lambda expression for optional parameters. + */ + public static Map.Entry text2vecDigitalOcean( + Function> fn) { + return text2vecDigitalOcean(VectorIndex.DEFAULT_VECTOR_NAME, fn); + } + + /** + * Create a named vector index with an {@code text2vec-digitalocean} vectorizer. + * + * @param vectorName Vector name. + */ + public static Map.Entry text2vecDigitalOcean(String vectorName) { + return Map.entry(vectorName, Text2VecDigitalOceanVectorizer.of()); + } + + /** + * Create a named vector index with an {@code text2vec-digitalocean} vectorizer. + * + * @param vectorName Vector name. + * @param fn Lambda expression for optional parameters. + */ + public static Map.Entry text2vecDigitalOcean(String vectorName, + Function> fn) { + return Map.entry(vectorName, Text2VecDigitalOceanVectorizer.of(fn)); + } + /** Create a vector index with an {@code text2vec-mistral} vectorizer. */ public static Map.Entry text2vecMistral() { return text2vecMistral(VectorIndex.DEFAULT_VECTOR_NAME); @@ -1558,6 +1595,16 @@ default public Text2VecJinaAiVectorizer asText2VecJinaAi() { return _as(VectorConfig.Kind.TEXT2VEC_JINAAI); } + /** Is this an instance of {@link Text2VecDigitalOceanVectorizer}? */ + default public boolean isText2VecDigitalOcean() { + return _is(VectorConfig.Kind.TEXT2VEC_DIGITALOCEAN); + } + + /** Convert this instance to {@link Text2VecDigitalOceanVectorizer}. */ + default public Text2VecDigitalOceanVectorizer asText2VecDigitalOcean() { + return _as(VectorConfig.Kind.TEXT2VEC_DIGITALOCEAN); + } + /** Is this an instance of {@link Text2VecMistralVectorizer}? */ default public boolean isText2VecMistral() { return _is(VectorConfig.Kind.TEXT2VEC_MISTRAL); @@ -1668,6 +1715,7 @@ private final void init(Gson gson) { addAdapter(gson, VectorConfig.Kind.TEXT2VEC_HUGGINGFACE, Text2VecHuggingFaceVectorizer.class); addAdapter(gson, VectorConfig.Kind.REF2VEC_CENTROID, Ref2VecCentroidVectorizer.class); addAdapter(gson, VectorConfig.Kind.TEXT2VEC_JINAAI, Text2VecJinaAiVectorizer.class); + addAdapter(gson, VectorConfig.Kind.TEXT2VEC_DIGITALOCEAN, Text2VecDigitalOceanVectorizer.class); addAdapter(gson, VectorConfig.Kind.TEXT2VEC_MISTRAL, Text2VecMistralVectorizer.class); addAdapter(gson, VectorConfig.Kind.TEXT2VEC_MORPH, Text2VecMorphVectorizer.class); addAdapter(gson, VectorConfig.Kind.TEXT2VEC_MODEL2VEC, Text2VecModel2VecVectorizer.class); diff --git a/src/main/java/io/weaviate/client6/v1/api/collections/vectorizers/Text2VecDigitalOceanVectorizer.java b/src/main/java/io/weaviate/client6/v1/api/collections/vectorizers/Text2VecDigitalOceanVectorizer.java new file mode 100644 index 000000000..f2298b1a1 --- /dev/null +++ b/src/main/java/io/weaviate/client6/v1/api/collections/vectorizers/Text2VecDigitalOceanVectorizer.java @@ -0,0 +1,132 @@ +package io.weaviate.client6.v1.api.collections.vectorizers; + +import java.util.Arrays; +import java.util.List; +import java.util.function.Function; + +import com.google.gson.annotations.SerializedName; + +import io.weaviate.client6.v1.api.collections.Quantization; +import io.weaviate.client6.v1.api.collections.VectorConfig; +import io.weaviate.client6.v1.api.collections.VectorIndex; +import io.weaviate.client6.v1.internal.ObjectBuilder; + +public record Text2VecDigitalOceanVectorizer( + @SerializedName("baseURL") String baseUrl, + @SerializedName("model") String model, + + /** + * Weaviate defaults to {@code true} if the value is not provided. + * To avoid that we send "vectorizeClassName": false all the time + * and make it impossible to enable this feature, as it is deprecated. + */ + @Deprecated @SerializedName("vectorizeClassName") boolean vectorizeCollectionName, + /** Properties included in the embedding. */ + @SerializedName("properties") List sourceProperties, + /** Vector index configuration. */ + VectorIndex vectorIndex, + /** Vector quantization method. */ + Quantization quantization) implements VectorConfig { + + @Override + public VectorConfig.Kind _kind() { + return VectorConfig.Kind.TEXT2VEC_DIGITALOCEAN; + } + + @Override + public Object _self() { + return this; + } + + public static Text2VecDigitalOceanVectorizer of() { + return of(ObjectBuilder.identity()); + } + + public static Text2VecDigitalOceanVectorizer of( + Function> fn) { + return fn.apply(new Builder()).build(); + } + + /** + * Canonical constructor always sets {@link #vectorizeCollectionName} to false. + */ + public Text2VecDigitalOceanVectorizer( + String baseUrl, + String model, + + boolean vectorizeCollectionName, + List sourceProperties, + VectorIndex vectorIndex, + Quantization quantization) { + this.baseUrl = baseUrl; + this.model = model; + + this.vectorizeCollectionName = false; + this.sourceProperties = sourceProperties; + this.vectorIndex = vectorIndex; + this.quantization = quantization; + } + + public Text2VecDigitalOceanVectorizer(Builder builder) { + this( + builder.baseUrl, + builder.model, + + builder.vectorizeCollectionName, + builder.sourceProperties, + builder.vectorIndex, + builder.quantization); + } + + public static class Builder implements ObjectBuilder { + private final boolean vectorizeCollectionName = false; + private Quantization quantization; + private List sourceProperties; + private VectorIndex vectorIndex = VectorIndex.DEFAULT_VECTOR_INDEX; + + private String baseUrl; + private String model; + + public Builder baseUrl(String baseUrl) { + this.baseUrl = baseUrl; + return this; + } + + public Builder model(String model) { + this.model = model; + return this; + } + + /** Add properties to include in the embedding. */ + public Builder sourceProperties(String... properties) { + return sourceProperties(Arrays.asList(properties)); + } + + /** Add properties to include in the embedding. */ + public Builder sourceProperties(List properties) { + this.sourceProperties = properties; + return this; + } + + /** + * Override default vector index configuration. + * + * HNSW + * is the default vector index. + */ + public Builder vectorIndex(VectorIndex vectorIndex) { + this.vectorIndex = vectorIndex; + return this; + } + + public Builder quantization(Quantization quantization) { + this.quantization = quantization; + return this; + } + + public Text2VecDigitalOceanVectorizer build() { + return new Text2VecDigitalOceanVectorizer(this); + } + } +} diff --git a/src/test/java/io/weaviate/client6/v1/internal/json/JSONTest.java b/src/test/java/io/weaviate/client6/v1/internal/json/JSONTest.java index 991fa937e..781a90223 100644 --- a/src/test/java/io/weaviate/client6/v1/internal/json/JSONTest.java +++ b/src/test/java/io/weaviate/client6/v1/internal/json/JSONTest.java @@ -55,6 +55,7 @@ import io.weaviate.client6.v1.api.collections.vectorizers.Text2VecGoogleVectorizer; import io.weaviate.client6.v1.api.collections.vectorizers.Text2VecHuggingFaceVectorizer; import io.weaviate.client6.v1.api.collections.vectorizers.Text2VecJinaAiVectorizer; +import io.weaviate.client6.v1.api.collections.vectorizers.Text2VecDigitalOceanVectorizer; import io.weaviate.client6.v1.api.collections.vectorizers.Text2VecMistralVectorizer; import io.weaviate.client6.v1.api.collections.vectorizers.Text2VecModel2VecVectorizer; import io.weaviate.client6.v1.api.collections.vectorizers.Text2VecMorphVectorizer; @@ -347,6 +348,54 @@ public static Object[][] testCases() { } """, }, + { + VectorConfig.class, + Text2VecDigitalOceanVectorizer.of(), + """ + { + "vectorIndexType": "hnsw", + "vectorIndexConfig": {}, + "vectorizer": { + "text2vec-digitalocean": { + "vectorizeClassName": false + } + } + } + """, + }, + { + VectorConfig.class, + Text2VecDigitalOceanVectorizer.of(v -> v.model("qwen3-embedding-0.6b").baseUrl("https://inference.do-ai.run")), + """ + { + "vectorIndexType": "hnsw", + "vectorIndexConfig": {}, + "vectorizer": { + "text2vec-digitalocean": { + "baseURL": "https://inference.do-ai.run", + "model": "qwen3-embedding-0.6b", + "vectorizeClassName": false + } + } + } + """, + }, + { + VectorConfig.class, + Text2VecDigitalOceanVectorizer.of(v -> v.sourceProperties("a")), + """ + { + "vectorIndexType": "hnsw", + "vectorIndexConfig": {}, + "vectorizer": { + "text2vec-digitalocean": { + "properties": ["a"], + "vectorizeClassName": false + } + } + } + """, + }, { VectorConfig.class, Text2VecModel2VecVectorizer.of(), From c4eaf1b8db4cc93ce32e5d30dd40971c0cac8d9c Mon Sep 17 00:00:00 2001 From: Michelangelo Partipilo Date: Mon, 18 May 2026 14:54:02 +0200 Subject: [PATCH 2/2] Merge two DigitalOcean test cases per @bevzzz review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per @bevzzz on #570: the `model + baseURL` test and the `sourceProperties` test both check the same thing — that a parameter is serialized correctly when set. Combine them into a single case that exercises all parameters at once. The empty/default test stays because it covers a structurally different scenario (the default `vectorizeClassName: false` payload with no other fields). This matches the leaner pattern used by other vectorizers in the same file. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../client6/v1/internal/json/JSONTest.java | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/src/test/java/io/weaviate/client6/v1/internal/json/JSONTest.java b/src/test/java/io/weaviate/client6/v1/internal/json/JSONTest.java index 781a90223..496e88e12 100644 --- a/src/test/java/io/weaviate/client6/v1/internal/json/JSONTest.java +++ b/src/test/java/io/weaviate/client6/v1/internal/json/JSONTest.java @@ -365,7 +365,7 @@ public static Object[][] testCases() { }, { VectorConfig.class, - Text2VecDigitalOceanVectorizer.of(v -> v.model("qwen3-embedding-0.6b").baseUrl("https://inference.do-ai.run")), + Text2VecDigitalOceanVectorizer.of(v -> v.model("qwen3-embedding-0.6b").baseUrl("https://inference.do-ai.run").sourceProperties("a")), """ { "vectorIndexType": "hnsw", @@ -374,21 +374,6 @@ public static Object[][] testCases() { "text2vec-digitalocean": { "baseURL": "https://inference.do-ai.run", "model": "qwen3-embedding-0.6b", - "vectorizeClassName": false - } - } - } - """, - }, - { - VectorConfig.class, - Text2VecDigitalOceanVectorizer.of(v -> v.sourceProperties("a")), - """ - { - "vectorIndexType": "hnsw", - "vectorIndexConfig": {}, - "vectorizer": { - "text2vec-digitalocean": { "properties": ["a"], "vectorizeClassName": false }