Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions docs/embedding/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,7 @@ embedding generation, allowing you to focus on your application logic.

## Embedding Registry

<Badge color="green">OSS</Badge>

In LanceDB OSS, you can get a supported embedding function from the registry, and then use it in your table schema.
You can get a supported embedding function from the registry, and then use it in your table schema.
Once configured, the embedding function will automatically generate embeddings when you insert data
into the table. Query-time behavior depends on SDK: Python/TypeScript can query with text directly,
while Rust examples typically compute query embeddings explicitly before vector search.
Expand Down
26 changes: 19 additions & 7 deletions docs/snippets/tables.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -50,19 +50,25 @@ export const PyCreateTableFromPydantic = "from lancedb.pydantic import LanceMode

export const PyCreateTableNestedSchema = "from lancedb.pydantic import LanceModel, Vector\n\n# --8<-- [start:tables_document_model]\nfrom pydantic import BaseModel\n\nclass Document(BaseModel):\n content: str\n source: str\n\n# --8<-- [end:tables_document_model]\n\nclass NestedSchema(LanceModel):\n id: str\n vector: Vector(1536)\n document: Document\n\ndb = tmp_db\ntbl = db.create_table(\"nested_table\", schema=NestedSchema, mode=\"overwrite\")\n";

export const PyDeleteOperation = "# delete data\npredicate = \"price = 30.0\"\ntable.delete(predicate)\n";
export const PyDeleteOperation = "# delete data\npredicate = \"id = 3\"\ntable.delete(predicate)\n";

export const PyDropColumnsMultiple = "# Remove the second temporary column\ntable.drop_columns([\"temp_col2\"])\n";

export const PyDropColumnsSingle = "# Remove the first temporary column\ntable.drop_columns([\"temp_col1\"])\n";

export const PyDropTable = "db = tmp_db\n# Create a table first\ndata = [{\"vector\": [1.1, 1.2], \"lat\": 45.5}]\ndb.create_table(\"my_table\", data, mode=\"overwrite\")\n\n# Drop the table\ndb.drop_table(\"my_table\")\n";

export const PyInsertIfNotExists = "# Create example table\ntable = db.create_table(\n \"domains\",\n [\n {\"domain\": \"google.com\", \"name\": \"Google\"},\n {\"domain\": \"github.com\", \"name\": \"GitHub\"},\n ],\n mode=\"overwrite\",\n)\n\n# Prepare new data - one existing and one new record\nnew_domains = [\n {\"domain\": \"google.com\", \"name\": \"Google\"},\n {\"domain\": \"facebook.com\", \"name\": \"Facebook\"},\n]\n\n# Insert only if domain doesn't exist\ntable.merge_insert(\"domain\").when_not_matched_insert_all().execute(new_domains)\n\n# Verify count - should be 3 (original 2 plus 1 new)\nprint(f\"Total domains: {table.count_rows()}\") # 3\n";
export const PyInsertIfNotExists = "import pyarrow as pa\n\ntable = db.create_table(\n \"users_example\",\n data=pa.table(\n {\n \"id\": [1, 2],\n \"name\": [\"Alice\", \"Bob\"],\n \"login_count\": [10, 20],\n }\n ),\n mode=\"overwrite\",\n)\n\nincoming_users = pa.table(\n {\n \"id\": [2, 3],\n \"name\": [\"Bobby\", \"Charlie\"],\n \"login_count\": [21, 5],\n }\n)\n\n(\n table.merge_insert(\"id\")\n .when_not_matched_insert_all()\n .execute(incoming_users)\n)\n";

export const PyOpenExistingTable = "db = tmp_db\n# Create a table first\ndata = [{\"vector\": [1.1, 1.2], \"lat\": 45.5, \"long\": -122.7}]\ndb.create_table(\"test_table\", data, mode=\"overwrite\")\n\n# List table names\nprint(db.table_names())\n\n# Open existing table\ntbl = db.open_table(\"test_table\")\n";
export const PyMergeDeleteMissingBySource = "import pyarrow as pa\n\ntable = db.create_table(\n \"users_example\",\n data=pa.table(\n {\n \"id\": [1, 2, 3],\n \"name\": [\"Alice\", \"Bob\", \"Charlie\"],\n \"login_count\": [10, 20, 5],\n }\n ),\n mode=\"overwrite\",\n)\n\nincoming_users = pa.table(\n {\n \"id\": [2, 3],\n \"name\": [\"Bobby\", \"Charlie\"],\n \"login_count\": [21, 5],\n }\n)\n\n(\n table.merge_insert(\"id\")\n .when_matched_update_all()\n .when_not_matched_insert_all()\n .when_not_matched_by_source_delete()\n .execute(incoming_users)\n)\n";

export const PyMergeMatchedUpdateOnly = "import pyarrow as pa\n\ntable = db.create_table(\n \"users_example\",\n data=pa.table(\n {\n \"id\": [1, 2],\n \"name\": [\"Alice\", \"Bob\"],\n \"login_count\": [10, 20],\n }\n ),\n mode=\"overwrite\",\n)\n\nincoming_users = pa.table(\n {\n \"id\": [2, 3],\n \"name\": [\"Bobby\", \"Charlie\"],\n \"login_count\": [21, 5],\n }\n)\n\n(\n table.merge_insert(\"id\")\n .when_matched_update_all()\n .execute(incoming_users)\n)\n";

export const PyMergePartialColumns = "import pyarrow as pa\n\ntable = db.create_table(\n \"users_example\",\n data=pa.table(\n {\n \"id\": [1, 2],\n \"name\": [\"Alice\", \"Bob\"],\n \"login_count\": [10, 20],\n }\n ),\n mode=\"overwrite\",\n)\n\nincoming_users = pa.table(\n {\n \"id\": [2, 3],\n \"name\": [\"Bobby\", \"Charlie\"],\n }\n)\n\n(\n table.merge_insert(\"id\")\n .when_matched_update_all()\n .when_not_matched_insert_all()\n .execute(incoming_users)\n)\n";

export const PyMergeUpdateInsert = "import pyarrow as pa\n\ntable = db.create_table(\n \"users_example\",\n data=pa.table(\n {\n \"id\": [1, 2],\n \"name\": [\"Alice\", \"Bob\"],\n \"login_count\": [10, 20],\n }\n ),\n mode=\"overwrite\",\n)\n\nincoming_users = pa.table(\n {\n \"id\": [2, 3],\n \"name\": [\"Bobby\", \"Charlie\"],\n \"login_count\": [21, 5],\n }\n)\n\n(\n table.merge_insert(\"id\")\n .when_matched_update_all()\n .when_not_matched_insert_all()\n .execute(incoming_users)\n)\n";

export const PyReplaceRangeOperation = "# Create example table with document chunks\ntable = db.create_table(\n \"chunks\",\n [\n {\"doc_id\": 0, \"chunk_id\": 0, \"text\": \"Hello\"},\n {\"doc_id\": 0, \"chunk_id\": 1, \"text\": \"World\"},\n {\"doc_id\": 1, \"chunk_id\": 0, \"text\": \"Foo\"},\n {\"doc_id\": 1, \"chunk_id\": 1, \"text\": \"Bar\"},\n {\"doc_id\": 2, \"chunk_id\": 0, \"text\": \"Baz\"},\n ],\n mode=\"overwrite\",\n)\n\n# New data - replacing all chunks for doc_id 1 with just one chunk\nnew_chunks = [\n {\"doc_id\": 1, \"chunk_id\": 0, \"text\": \"Zoo\"},\n]\n\n# Replace all chunks for doc_id 1\n(\n table.merge_insert([\"doc_id\"])\n .when_matched_update_all()\n .when_not_matched_insert_all()\n .when_not_matched_by_source_delete(\"doc_id = 1\")\n .execute(new_chunks)\n)\n\n# Verify count for doc_id = 1 - should be 1\nprint(f\"Chunks for doc_id = 1: {table.count_rows('doc_id = 1')}\") # 1\n";
export const PyOpenExistingTable = "db = tmp_db\n# Create a table first\ndata = [{\"vector\": [1.1, 1.2], \"lat\": 45.5, \"long\": -122.7}]\ndb.create_table(\"test_table\", data, mode=\"overwrite\")\n\n# List table names\nprint(db.table_names())\n\n# Open existing table\ntbl = db.open_table(\"test_table\")\n";

export const PySchemaAddSetup = "table_name = \"schema_evolution_add_example\"\nif data is None:\n data = [\n {\n \"id\": 1,\n \"name\": \"Laptop\",\n \"price\": 1200.00,\n \"vector\": np.random.random(128).tolist(),\n },\n {\n \"id\": 2,\n \"name\": \"Smartphone\",\n \"price\": 800.00,\n \"vector\": np.random.random(128).tolist(),\n },\n {\n \"id\": 3,\n \"name\": \"Headphones\",\n \"price\": 150.00,\n \"vector\": np.random.random(128).tolist(),\n },\n ]\ntable = tmp_db.create_table(table_name, data, mode=\"overwrite\")\n";

Expand All @@ -76,11 +82,17 @@ export const PyTablesImports = "import lancedb\nimport numpy as np\nimport panda

export const PyTablesTzValidator = "from datetime import datetime\nfrom zoneinfo import ZoneInfo\n\nfrom lancedb.pydantic import LanceModel\nfrom pydantic import Field, ValidationError, ValidationInfo, field_validator\n\ntzname = \"America/New_York\"\ntz = ZoneInfo(tzname)\n\nclass TestModel(LanceModel):\n dt_with_tz: datetime = Field(json_schema_extra={\"tz\": tzname})\n\n @field_validator(\"dt_with_tz\")\n @classmethod\n def tz_must_match(cls, dt: datetime) -> datetime:\n assert dt.tzinfo == tz\n return dt\n\nok = TestModel(dt_with_tz=datetime.now(tz))\n\ntry:\n TestModel(dt_with_tz=datetime.now(ZoneInfo(\"Asia/Shanghai\")))\n assert 0 == 1, \"this should raise ValidationError\"\nexcept ValidationError:\n print(\"A ValidationError was raised.\")\n pass\n";

export const PyUpdateOperation = "import pandas as pd\n\n# Create a table from a pandas DataFrame\ndata = pd.DataFrame({\"x\": [1, 2, 3], \"vector\": [[1, 2], [3, 4], [5, 6]]})\ntbl = db.create_table(\"test_table\", data, mode=\"overwrite\")\n# Update the table where x = 2\ntbl.update(where=\"x = 2\", values={\"vector\": [10, 10]})\n# Get the updated table as a pandas DataFrame\ndf = tbl.to_pandas()\nprint(df)\n";
export const PyUpdateConnectCloud = "import lancedb\n\ndb = lancedb.connect(\n uri=\"db://your-project-slug\",\n api_key=\"your-api-key\",\n region=\"us-east-1\",\n)\n";

export const PyUpdateConnectLocal = "import lancedb\n\ndb = lancedb.connect(\"./data\")\n";

export const PyUpdateExampleTableSetup = "import pyarrow as pa\n\ntable = db.create_table(\n \"users_example\",\n data=pa.table(\n {\n \"id\": [1, 2],\n \"name\": [\"Alice\", \"Bob\"],\n \"login_count\": [10, 20],\n }\n ),\n mode=\"overwrite\",\n)\n";

export const PyUpdateOperation = "import pyarrow as pa\n\ntable = db.create_table(\n \"users_example\",\n data=pa.table(\n {\n \"id\": [1, 2],\n \"name\": [\"Alice\", \"Bob\"],\n \"login_count\": [10, 20],\n }\n ),\n mode=\"overwrite\",\n)\ntable.update(where=\"id = 2\", values={\"name\": \"Bobby\"})\n";

export const PyUpdateUsingSql = "import pandas as pd\n\n# Create a table from a pandas DataFrame\ndata = pd.DataFrame({\"x\": [1, 2, 3], \"vector\": [[1, 2], [3, 4], [5, 6]]})\ntbl = db.create_table(\"test_table\", data, mode=\"overwrite\")\n# Update all rows: increment x by 1\ntbl.update(values_sql={\"x\": \"x + 1\"})\nprint(tbl.to_pandas())\n";
export const PyUpdateOptimizeCleanup = "from datetime import timedelta\n\ntable.optimize(cleanup_older_than=timedelta(days=1))\n";

export const PyUpsertOperation = "# Create example table\nusers_table_name = \"users_example\"\ntable = db.create_table(\n users_table_name,\n [\n {\"id\": 0, \"name\": \"Alice\"},\n {\"id\": 1, \"name\": \"Bob\"},\n ],\n mode=\"overwrite\",\n)\nprint(f\"Created users table with {table.count_rows()} rows\")\n\n# Prepare data for upsert\nnew_users = [\n {\"id\": 1, \"name\": \"Bobby\"}, # Will update existing record\n {\"id\": 2, \"name\": \"Charlie\"}, # Will insert new record\n]\n\n# Upsert by id\n(\n table.merge_insert(\"id\")\n .when_matched_update_all()\n .when_not_matched_insert_all()\n .execute(new_users)\n)\n\n# Verify results - should be 3 records total\nprint(f\"Total users: {table.count_rows()}\") # 3\n";
export const PyUpdateUsingSql = "import pyarrow as pa\n\ntable = db.create_table(\n \"users_example\",\n data=pa.table(\n {\n \"id\": [1, 2],\n \"name\": [\"Alice\", \"Bob\"],\n \"login_count\": [10, 20],\n }\n ),\n mode=\"overwrite\",\n)\ntable.update(where=\"id = 2\", values_sql={\"login_count\": \"login_count + 1\"})\n";

export const PyVersioningAddData = "# Add more data\nmore_data = [\n {\n \"id\": 4,\n \"author\": \"Richard Daniel Sanchez\",\n \"quote\": \"That's the way the news goes!\",\n },\n {\"id\": 5, \"author\": \"Morty\", \"quote\": \"Aww geez, Rick!\"},\n]\ntable.add(more_data)\n";

Expand Down
Loading
Loading