From cff538895f065837208be52fcd84a3f3821a7659 Mon Sep 17 00:00:00 2001 From: prrao87 <35005448+prrao87@users.noreply.github.com> Date: Fri, 20 Feb 2026 09:59:08 -0500 Subject: [PATCH] Add Python async quickstart snippets --- docs/quickstart.mdx | 35 +++++++++++++++++++++++++++----- docs/search/full-text-search.mdx | 7 ++++++- docs/snippets/basic_usage.mdx | 2 ++ docs/snippets/connection.mdx | 6 ++++++ docs/snippets/quickstart.mdx | 9 +++++--- docs/tables/index.mdx | 12 +++++++++++ tests/py/test_basic_usage.py | 26 ++++++++++++++++++++++++ tests/py/test_connection.py | 30 +++++++++++++++++++++++++++ tests/py/test_quickstart.py | 31 ++++++++++++++++++++++++++++ 9 files changed, 149 insertions(+), 9 deletions(-) diff --git a/docs/quickstart.mdx b/docs/quickstart.mdx index 0edd879..1b0cff7 100644 --- a/docs/quickstart.mdx +++ b/docs/quickstart.mdx @@ -6,8 +6,11 @@ icon: rocket --- import { PyConnect, + PyConnectAsync, PyConnectCloud, + PyConnectCloudAsync, PyConnectObjectStorage, + PyConnectObjectStorageAsync, RsConnect, RsConnectCloud, RsConnectObjectStorage, @@ -17,7 +20,9 @@ import { } from '/snippets/connection.mdx'; import { PyQuickstartCreateTable, + PyQuickstartCreateTableAsync, PyQuickstartVectorSearch1, + PyQuickstartVectorSearch1Async, PyQuickstartOutputPandas, RsQuickstartCreateTable, RsQuickstartDefineStruct, @@ -61,10 +66,14 @@ The simplest way to begin is to use LanceDB OSS. Simply import LanceDB as an emb client SDK of choice and point to a local path. - + {PyConnect} + + {PyConnectAsync} + + {TsConnect} @@ -82,10 +91,14 @@ client SDK of choice and point to a local path. You can also connect LanceDB OSS directly to object storage: - + {PyConnectObjectStorage} + + {PyConnectObjectStorageAsync} + + {TsConnectObjectStorage} @@ -105,10 +118,14 @@ along with any encessary credentials. Simply replace the local path with a remot that points to where your data is stored, and you're ready to go. - + {PyConnectCloud} + + {PyConnectCloudAsync} + + {TsConnectCloud} @@ -131,10 +148,14 @@ Note that LanceDB tables require a schema. If you don't provide one, LanceDB will infer it from the data. - + {PyQuickstartCreateTable} + + {PyQuickstartCreateTableAsync} + + {TsQuickstartCreateTable} @@ -161,10 +182,14 @@ Our query is a vector that represents a "warrior". Let's find the result that's to it! - + {PyQuickstartVectorSearch1} + + {PyQuickstartVectorSearch1Async} + + {TsQuickstartVectorSearch1} diff --git a/docs/search/full-text-search.mdx b/docs/search/full-text-search.mdx index 29b881f..2d0d28b 100644 --- a/docs/search/full-text-search.mdx +++ b/docs/search/full-text-search.mdx @@ -64,6 +64,12 @@ let tbl = db Create a full-text search index on your text column: + +In Python, this page shows the synchronous `create_fts_index(...)` form. For the +asynchronous equivalent (`await table.create_index("text", config=FTS(...))`), see +[FTS index](/indexing/fts-index). + + ```python Python icon="python" table.create_fts_index("text") @@ -1097,4 +1103,3 @@ const phraseResults = await table.query() console.log(phraseResults); ``` - diff --git a/docs/snippets/basic_usage.mdx b/docs/snippets/basic_usage.mdx index e33df7f..7d6ea72 100644 --- a/docs/snippets/basic_usage.mdx +++ b/docs/snippets/basic_usage.mdx @@ -4,6 +4,8 @@ export const PyBasicAddColumns = "table.add_columns(\n {\n \"power\": export const PyBasicAddData = "magical_characters = [\n {\n \"id\": 9,\n \"name\": \"Morgan le Fay\",\n \"role\": \"Sorceress\",\n \"description\": \"A powerful enchantress, Arthur's half-sister, and a complex figure who oscillates between aiding and opposing Camelot.\",\n \"vector\": [0.10, 0.84, 0.25, 0.70],\n \"stats\": { \"strength\": 2, \"courage\": 3, \"magic\": 5, \"wisdom\": 4 }\n },\n {\n \"id\": 10,\n \"name\": \"The Lady of the Lake\",\n \"role\": \"Mystical Guardian\",\n \"description\": \"A mysterious supernatural figure associated with Avalon, known for giving Arthur the sword Excalibur.\",\n \"vector\": [0.00, 0.90, 0.58, 0.88],\n \"stats\": { \"strength\": 2, \"courage\": 3, \"magic\": 5, \"wisdom\": 5 }\n }\n]\ntable.add(magical_characters)\n"; +export const PyBasicAsyncApi = "import lancedb\n\nasync_db = await lancedb.connect_async(uri)\nasync_table = await async_db.create_table(\n \"camelot_async\",\n data=data,\n mode=\"overwrite\",\n)\n\nquery_vector = [0.03, 0.85, 0.61, 0.90]\nasync_results = await (\n await async_table.search(query_vector)\n).limit(5).select([\"name\", \"role\", \"description\"]).to_polars()\nprint(async_results)\n"; + export const PyBasicCreateEmptyTable = "schema = pa.schema(\n [\n pa.field(\"id\", pa.uint16()),\n pa.field(\"name\", pa.string()),\n pa.field(\"role\", pa.string()),\n pa.field(\"description\", pa.string()),\n pa.field(\"vector\", pa.list_(pa.float32(), 4)),\n pa.field(\n \"stats\",\n pa.struct(\n [\n pa.field(\"strength\", pa.int8()),\n pa.field(\"courage\", pa.int8()),\n pa.field(\"magic\", pa.int8()),\n pa.field(\"wisdom\", pa.int8()),\n ]\n ),\n ),\n ]\n)\ndb.create_table(\"camelot_pa\", schema=schema, mode=\"overwrite\")\n"; export const PyBasicCreateTable = "table = db.create_table(\"camelot\", data=data, mode=\"overwrite\")\n"; diff --git a/docs/snippets/connection.mdx b/docs/snippets/connection.mdx index 5977a33..bdb202e 100644 --- a/docs/snippets/connection.mdx +++ b/docs/snippets/connection.mdx @@ -2,10 +2,16 @@ export const PyConnect = "import lancedb\n\nuri = \"ex_lancedb\"\ndb = lancedb.connect(uri)\n"; +export const PyConnectAsync = "import lancedb\n\nuri = \"ex_lancedb\"\nasync_db = await lancedb.connect_async(uri)\n"; + export const PyConnectCloud = "uri = \"db://your-database-uri\"\napi_key = \"your-api-key\"\nregion = \"us-east-1\"\n"; +export const PyConnectCloudAsync = "uri = \"db://your-database-uri\"\napi_key = \"your-api-key\"\nregion = \"us-east-1\"\n"; + export const PyConnectObjectStorage = "import lancedb\n\nuri = \"s3://your-bucket/path\"\n# You can also use \"gs://your-bucket/path\" or \"az://your-container/path\".\ndb = lancedb.connect(uri)\n"; +export const PyConnectObjectStorageAsync = "import lancedb\n\nuri = \"s3://your-bucket/path\"\n# You can also use \"gs://your-bucket/path\" or \"az://your-container/path\".\nasync_db = await lancedb.connect_async(uri)\n"; + export const TsConnect = "import * as lancedb from \"@lancedb/lancedb\";\n\nasync function connectExample(uri: string) {\n const db = await lancedb.connect(uri);\n return db;\n}\n"; export const TsConnectCloud = "const uri = \"db://your-database-uri\";\nconst apiKey = \"your-api-key\";\nconst region = \"us-east-1\";\n"; diff --git a/docs/snippets/quickstart.mdx b/docs/snippets/quickstart.mdx index 570b8da..11479ed 100644 --- a/docs/snippets/quickstart.mdx +++ b/docs/snippets/quickstart.mdx @@ -4,6 +4,8 @@ export const PyQuickstartAddData = "more_data = [\n {\"id\": \"7\", \"text\": export const PyQuickstartCreateTable = "data = [\n {\"id\": \"1\", \"text\": \"knight\", \"vector\": [0.9, 0.4, 0.8]},\n {\"id\": \"2\", \"text\": \"ranger\", \"vector\": [0.8, 0.4, 0.7]},\n {\"id\": \"9\", \"text\": \"priest\", \"vector\": [0.6, 0.2, 0.6]},\n {\"id\": \"4\", \"text\": \"rogue\", \"vector\": [0.7, 0.4, 0.7]},\n]\ntable = db.create_table(\"adventurers\", data=data, mode=\"overwrite\")\n"; +export const PyQuickstartCreateTableAsync = "async_table = await async_db.create_table(\n \"adventurers\",\n data=data,\n mode=\"overwrite\",\n)\n"; + export const PyQuickstartCreateTableNoOverwrite = "table = db.create_table(\"adventurers\", data=data)\n"; export const PyQuickstartOpenTable = "table = db.open_table(\"adventurers\")\n"; @@ -12,8 +14,12 @@ export const PyQuickstartOutputPandas = "# Ensure you run `pip install pandas` b export const PyQuickstartVectorSearch1 = "# Let's search for vectors similar to \"warrior\"\nquery_vector = [0.8, 0.3, 0.8]\n\n# Ensure you run `pip install polars` beforehand\nresult = table.search(query_vector).limit(2).to_polars()\nprint(result)\n"; +export const PyQuickstartVectorSearch1Async = "# Let's search for vectors similar to \"warrior\"\nquery_vector = [0.8, 0.3, 0.8]\n\n# Ensure you run `pip install polars` beforehand\nasync_result = await (await async_table.search(query_vector)).limit(2).to_polars()\nprint(async_result)\n"; + export const PyQuickstartVectorSearch2 = "# Let's search for vectors similar to \"wizard\"\nquery_vector = [0.7, 0.3, 0.5]\n\nresults = table.search(query_vector).limit(2).to_polars()\nprint(results)\n"; +export const TsQuickstartOutputPandas = "result = await table.search(queryVector).limit(2).toArray();\n"; + export const TsQuickstartAddData = "const moreData = [\n { id: \"7\", text: \"mage\", vector: [0.6, 0.3, 0.4] },\n { id: \"8\", text: \"bard\", vector: [0.3, 0.8, 0.4] },\n];\n\n// Add data to table\nawait table.add(moreData);\n"; export const TsQuickstartCreateTable = "const data = [\n { id: \"1\", text: \"knight\", vector: [0.9, 0.4, 0.8] },\n { id: \"2\", text: \"ranger\", vector: [0.8, 0.4, 0.7] },\n { id: \"9\", text: \"priest\", vector: [0.6, 0.2, 0.6] },\n { id: \"4\", text: \"rogue\", vector: [0.7, 0.4, 0.7] },\n];\nlet table = await db.createTable(\"adventurers\", data, { mode: \"overwrite\" });\n"; @@ -24,8 +30,6 @@ export const TsQuickstartOpenTable = "table = await db.openTable(\"adventurers\" export const TsQuickstartOutputArray = "result = await table.search(queryVector).limit(2).toArray();\nconsole.table(result);\n"; -export const TsQuickstartOutputPandas = "result = await table.search(queryVector).limit(2).toArray();\n"; - export const TsQuickstartVectorSearch1 = "// Let's search for vectors similar to \"warrior\"\nlet queryVector = [0.8, 0.3, 0.8];\n\nlet result = await table.search(queryVector).limit(2).toArray();\nconsole.table(result);\n"; export const TsQuickstartVectorSearch2 = "// Let's search for vectors similar to \"wizard\"\nqueryVector = [0.7, 0.3, 0.5];\n\nconst results = await table.search(queryVector).limit(2).toArray();\nconsole.table(results);\n"; @@ -45,4 +49,3 @@ export const RsQuickstartOutputArray = "let result: DataFrame = table\n .quer export const RsQuickstartVectorSearch1 = "// Let's search for vectors similar to \"warrior\"\nlet query_vector = [0.8, 0.3, 0.8];\n\nlet result: DataFrame = table\n .query()\n .nearest_to(&query_vector)\n .unwrap()\n .limit(2)\n .select(Select::Columns(vec![\"text\".to_string()]))\n .execute()\n .await\n .unwrap()\n .into_polars()\n .await\n .unwrap();\nprintln!(\"{result:?}\");\n"; export const RsQuickstartVectorSearch2 = "// Let's search for vectors similar to \"wizard\"\nlet query_vector = [0.7, 0.3, 0.5];\n\nlet result: DataFrame = table\n .query()\n .nearest_to(&query_vector)\n .unwrap()\n .limit(2)\n .select(Select::Columns(vec![\"text\".to_string()]))\n .execute()\n .await\n .unwrap()\n .into_polars()\n .await\n .unwrap();\nprintln!(\"{result:?}\");\nlet text_col = result.column(\"text\").unwrap().str().unwrap();\nlet top_two = vec![\n text_col.get(0).unwrap().to_string(),\n text_col.get(1).unwrap().to_string(),\n];\n"; - diff --git a/docs/tables/index.mdx b/docs/tables/index.mdx index a89769e..818ed74 100644 --- a/docs/tables/index.mdx +++ b/docs/tables/index.mdx @@ -15,6 +15,7 @@ import { PyBasicCreateEmptyTable, PyBasicCreateTablePandas, PyBasicCreateTablePolars, + PyBasicAsyncApi, PyBasicVectorSearch, PyBasicVectorSearchQ1, PyBasicVectorSearchQ2, @@ -64,6 +65,17 @@ explore some more table operations you'll typically need when working with Lance - **Filtered queries** that can operate on nested structs - **Interoperate with DuckDB** and run traditional SQL queries on an Arrow table (Python) + +This page uses synchronous Python snippets for readability. If your app uses `asyncio`, +the same flow works with `connect_async(...)` and `await`-based table/query calls. +Use the example below as a template, and see [Quickstart](/quickstart#python-sync-and-async-apis) +for the general mapping. + + +{PyBasicAsyncApi} + + + ## Dataset We'll work with this small dataset based on characters from the legends of Camelot. Note that diff --git a/tests/py/test_basic_usage.py b/tests/py/test_basic_usage.py index cd27e80..52e80af 100644 --- a/tests/py/test_basic_usage.py +++ b/tests/py/test_basic_usage.py @@ -169,3 +169,29 @@ def test_basic_usage(db_path_factory): db.drop_table("camelot") # --8<-- [end:basic_drop_table] assert "camelot" not in db.table_names() + + +@pytest.mark.asyncio +async def test_basic_usage_async_api(db_path_factory): + uri = db_path_factory("basic_usage_async_db") + with open(data_path, "r") as f: + data = json.load(f) + + # --8<-- [start:basic_async_api] + import lancedb + + async_db = await lancedb.connect_async(uri) + async_table = await async_db.create_table( + "camelot_async", + data=data, + mode="overwrite", + ) + + query_vector = [0.03, 0.85, 0.61, 0.90] + async_results = await ( + await async_table.search(query_vector) + ).limit(5).select(["name", "role", "description"]).to_polars() + print(async_results) + # --8<-- [end:basic_async_api] + + assert async_results.height >= 1 diff --git a/tests/py/test_connection.py b/tests/py/test_connection.py index 32195df..5762f23 100644 --- a/tests/py/test_connection.py +++ b/tests/py/test_connection.py @@ -16,6 +16,17 @@ def test_connection(): assert not Path(uri).exists() +async def connect_async_example(): + # --8<-- [start:connect_async] + import lancedb + + uri = "ex_lancedb" + async_db = await lancedb.connect_async(uri) + # --8<-- [end:connect_async] + + return async_db + + # --8<-- [start:connect_cloud] uri = "db://your-database-uri" api_key = "your-api-key" @@ -23,6 +34,13 @@ def test_connection(): # --8<-- [end:connect_cloud] +# --8<-- [start:connect_cloud_async] +uri = "db://your-database-uri" +api_key = "your-api-key" +region = "us-east-1" +# --8<-- [end:connect_cloud_async] + + def connect_object_storage_config(): # --8<-- [start:connect_object_storage] import lancedb @@ -33,3 +51,15 @@ def connect_object_storage_config(): # --8<-- [end:connect_object_storage] return db + + +async def connect_object_storage_config_async(): + # --8<-- [start:connect_object_storage_async] + import lancedb + + uri = "s3://your-bucket/path" + # You can also use "gs://your-bucket/path" or "az://your-container/path". + async_db = await lancedb.connect_async(uri) + # --8<-- [end:connect_object_storage_async] + + return async_db diff --git a/tests/py/test_quickstart.py b/tests/py/test_quickstart.py index 0e952bb..58d25c2 100644 --- a/tests/py/test_quickstart.py +++ b/tests/py/test_quickstart.py @@ -2,6 +2,7 @@ # SPDX-FileCopyrightText: Copyright The LanceDB Authors import lancedb +import pytest def test_quickstart(db_path_factory): uri = "quickstart_db" @@ -67,4 +68,34 @@ def test_quickstart(db_path_factory): assert results.head(1)["text"][0] == "mage" +@pytest.mark.asyncio +async def test_quickstart_async_api(db_path_factory): + db_uri = db_path_factory("quickstart_async_db") + import lancedb + async_db = await lancedb.connect_async(db_uri) + data = [ + {"id": "1", "text": "knight", "vector": [0.9, 0.4, 0.8]}, + {"id": "2", "text": "ranger", "vector": [0.8, 0.4, 0.7]}, + {"id": "9", "text": "priest", "vector": [0.6, 0.2, 0.6]}, + {"id": "4", "text": "rogue", "vector": [0.7, 0.4, 0.7]}, + ] + + # --8<-- [start:quickstart_create_table_async] + async_table = await async_db.create_table( + "adventurers", + data=data, + mode="overwrite", + ) + # --8<-- [end:quickstart_create_table_async] + + # --8<-- [start:quickstart_vector_search_1_async] + # Let's search for vectors similar to "warrior" + query_vector = [0.8, 0.3, 0.8] + + # Ensure you run `pip install polars` beforehand + async_result = await (await async_table.search(query_vector)).limit(2).to_polars() + print(async_result) + # --8<-- [end:quickstart_vector_search_1_async] + + assert async_result.head(1)["text"][0] == "knight"