Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 30 additions & 5 deletions docs/quickstart.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@ icon: rocket
---
import {
PyConnect,
PyConnectAsync,
PyConnectCloud,
PyConnectCloudAsync,
PyConnectObjectStorage,
PyConnectObjectStorageAsync,
RsConnect,
RsConnectCloud,
RsConnectObjectStorage,
Expand All @@ -17,7 +20,9 @@ import {
} from '/snippets/connection.mdx';
import {
PyQuickstartCreateTable,
PyQuickstartCreateTableAsync,
PyQuickstartVectorSearch1,
PyQuickstartVectorSearch1Async,
PyQuickstartOutputPandas,
RsQuickstartCreateTable,
RsQuickstartDefineStruct,
Expand Down Expand Up @@ -61,10 +66,14 @@ The simplest way to begin is to use LanceDB OSS. Simply import LanceDB as an emb
client SDK of choice and point to a local path.

<CodeGroup >
<CodeBlock filename="Python" language="Python" icon="python">
<CodeBlock filename="Python (sync)" language="Python" icon="python">
{PyConnect}
</CodeBlock>

<CodeBlock filename="Python (async)" language="Python" icon="python">
{PyConnectAsync}
</CodeBlock>

<CodeBlock filename="TypeScript" language="TypeScript" icon="square-js">
{TsConnect}
</CodeBlock>
Expand All @@ -82,10 +91,14 @@ client SDK of choice and point to a local path.
You can also connect LanceDB OSS directly to object storage:

<CodeGroup >
<CodeBlock filename="Python" language="Python" icon="python">
<CodeBlock filename="Python (sync)" language="Python" icon="python">
{PyConnectObjectStorage}
</CodeBlock>

<CodeBlock filename="Python (async)" language="Python" icon="python">
{PyConnectObjectStorageAsync}
</CodeBlock>

<CodeBlock filename="TypeScript" language="TypeScript" icon="square-js">
{TsConnectObjectStorage}
</CodeBlock>
Expand All @@ -105,10 +118,14 @@ along with any encessary credentials. Simply replace the local path with a remot
that points to where your data is stored, and you're ready to go.

<CodeGroup >
<CodeBlock filename="Python" language="Python" icon="python">
<CodeBlock filename="Python (sync)" language="Python" icon="python">
{PyConnectCloud}
</CodeBlock>

<CodeBlock filename="Python (async)" language="Python" icon="python">
{PyConnectCloudAsync}
</CodeBlock>

<CodeBlock filename="TypeScript" language="TypeScript" icon="square-js">
{TsConnectCloud}
</CodeBlock>
Expand All @@ -131,10 +148,14 @@ Note that LanceDB tables require a schema. If you don't provide one, LanceDB
will infer it from the data.

<CodeGroup >
<CodeBlock filename="Python" language="Python" icon="python">
<CodeBlock filename="Python (sync)" language="Python" icon="python">
{PyQuickstartCreateTable}
</CodeBlock>

<CodeBlock filename="Python (async)" language="Python" icon="python">
{PyQuickstartCreateTableAsync}
</CodeBlock>

<CodeBlock filename="TypeScript" language="TypeScript" icon="square-js">
{TsQuickstartCreateTable}
</CodeBlock>
Expand All @@ -161,10 +182,14 @@ Our query is a vector that represents a "warrior". Let's find the result that's
to it!

<CodeGroup >
<CodeBlock filename="Python" language="Python" icon="python">
<CodeBlock filename="Python (sync)" language="Python" icon="python">
{PyQuickstartVectorSearch1}
</CodeBlock>

<CodeBlock filename="Python (async)" language="Python" icon="python">
{PyQuickstartVectorSearch1Async}
</CodeBlock>

<CodeBlock filename="TypeScript" language="TypeScript" icon="square-js">
{TsQuickstartVectorSearch1}
</CodeBlock>
Expand Down
7 changes: 6 additions & 1 deletion docs/search/full-text-search.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,12 @@ let tbl = db

Create a full-text search index on your text column:

<Note>
In Python, this page shows the synchronous `create_fts_index(...)` form. For the
asynchronous equivalent (`await table.create_index("text", config=FTS(...))`), see
[FTS index](/indexing/fts-index).
</Note>

<CodeGroup>
```python Python icon="python"
table.create_fts_index("text")
Expand Down Expand Up @@ -1097,4 +1103,3 @@ const phraseResults = await table.query()
console.log(phraseResults);
```
</CodeGroup>

2 changes: 2 additions & 0 deletions docs/snippets/basic_usage.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ export const PyBasicAddColumns = "table.add_columns(\n {\n \"power\":

export const PyBasicAddData = "magical_characters = [\n {\n \"id\": 9,\n \"name\": \"Morgan le Fay\",\n \"role\": \"Sorceress\",\n \"description\": \"A powerful enchantress, Arthur's half-sister, and a complex figure who oscillates between aiding and opposing Camelot.\",\n \"vector\": [0.10, 0.84, 0.25, 0.70],\n \"stats\": { \"strength\": 2, \"courage\": 3, \"magic\": 5, \"wisdom\": 4 }\n },\n {\n \"id\": 10,\n \"name\": \"The Lady of the Lake\",\n \"role\": \"Mystical Guardian\",\n \"description\": \"A mysterious supernatural figure associated with Avalon, known for giving Arthur the sword Excalibur.\",\n \"vector\": [0.00, 0.90, 0.58, 0.88],\n \"stats\": { \"strength\": 2, \"courage\": 3, \"magic\": 5, \"wisdom\": 5 }\n }\n]\ntable.add(magical_characters)\n";

export const PyBasicAsyncApi = "import lancedb\n\nasync_db = await lancedb.connect_async(uri)\nasync_table = await async_db.create_table(\n \"camelot_async\",\n data=data,\n mode=\"overwrite\",\n)\n\nquery_vector = [0.03, 0.85, 0.61, 0.90]\nasync_results = await (\n await async_table.search(query_vector)\n).limit(5).select([\"name\", \"role\", \"description\"]).to_polars()\nprint(async_results)\n";

export const PyBasicCreateEmptyTable = "schema = pa.schema(\n [\n pa.field(\"id\", pa.uint16()),\n pa.field(\"name\", pa.string()),\n pa.field(\"role\", pa.string()),\n pa.field(\"description\", pa.string()),\n pa.field(\"vector\", pa.list_(pa.float32(), 4)),\n pa.field(\n \"stats\",\n pa.struct(\n [\n pa.field(\"strength\", pa.int8()),\n pa.field(\"courage\", pa.int8()),\n pa.field(\"magic\", pa.int8()),\n pa.field(\"wisdom\", pa.int8()),\n ]\n ),\n ),\n ]\n)\ndb.create_table(\"camelot_pa\", schema=schema, mode=\"overwrite\")\n";

export const PyBasicCreateTable = "table = db.create_table(\"camelot\", data=data, mode=\"overwrite\")\n";
Expand Down
6 changes: 6 additions & 0 deletions docs/snippets/connection.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,16 @@

export const PyConnect = "import lancedb\n\nuri = \"ex_lancedb\"\ndb = lancedb.connect(uri)\n";

export const PyConnectAsync = "import lancedb\n\nuri = \"ex_lancedb\"\nasync_db = await lancedb.connect_async(uri)\n";

export const PyConnectCloud = "uri = \"db://your-database-uri\"\napi_key = \"your-api-key\"\nregion = \"us-east-1\"\n";

export const PyConnectCloudAsync = "uri = \"db://your-database-uri\"\napi_key = \"your-api-key\"\nregion = \"us-east-1\"\n";

export const PyConnectObjectStorage = "import lancedb\n\nuri = \"s3://your-bucket/path\"\n# You can also use \"gs://your-bucket/path\" or \"az://your-container/path\".\ndb = lancedb.connect(uri)\n";

export const PyConnectObjectStorageAsync = "import lancedb\n\nuri = \"s3://your-bucket/path\"\n# You can also use \"gs://your-bucket/path\" or \"az://your-container/path\".\nasync_db = await lancedb.connect_async(uri)\n";

export const TsConnect = "import * as lancedb from \"@lancedb/lancedb\";\n\nasync function connectExample(uri: string) {\n const db = await lancedb.connect(uri);\n return db;\n}\n";

export const TsConnectCloud = "const uri = \"db://your-database-uri\";\nconst apiKey = \"your-api-key\";\nconst region = \"us-east-1\";\n";
Expand Down
9 changes: 6 additions & 3 deletions docs/snippets/quickstart.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ export const PyQuickstartAddData = "more_data = [\n {\"id\": \"7\", \"text\":

export const PyQuickstartCreateTable = "data = [\n {\"id\": \"1\", \"text\": \"knight\", \"vector\": [0.9, 0.4, 0.8]},\n {\"id\": \"2\", \"text\": \"ranger\", \"vector\": [0.8, 0.4, 0.7]},\n {\"id\": \"9\", \"text\": \"priest\", \"vector\": [0.6, 0.2, 0.6]},\n {\"id\": \"4\", \"text\": \"rogue\", \"vector\": [0.7, 0.4, 0.7]},\n]\ntable = db.create_table(\"adventurers\", data=data, mode=\"overwrite\")\n";

export const PyQuickstartCreateTableAsync = "async_table = await async_db.create_table(\n \"adventurers\",\n data=data,\n mode=\"overwrite\",\n)\n";

export const PyQuickstartCreateTableNoOverwrite = "table = db.create_table(\"adventurers\", data=data)\n";

export const PyQuickstartOpenTable = "table = db.open_table(\"adventurers\")\n";
Expand All @@ -12,8 +14,12 @@ export const PyQuickstartOutputPandas = "# Ensure you run `pip install pandas` b

export const PyQuickstartVectorSearch1 = "# Let's search for vectors similar to \"warrior\"\nquery_vector = [0.8, 0.3, 0.8]\n\n# Ensure you run `pip install polars` beforehand\nresult = table.search(query_vector).limit(2).to_polars()\nprint(result)\n";

export const PyQuickstartVectorSearch1Async = "# Let's search for vectors similar to \"warrior\"\nquery_vector = [0.8, 0.3, 0.8]\n\n# Ensure you run `pip install polars` beforehand\nasync_result = await (await async_table.search(query_vector)).limit(2).to_polars()\nprint(async_result)\n";

export const PyQuickstartVectorSearch2 = "# Let's search for vectors similar to \"wizard\"\nquery_vector = [0.7, 0.3, 0.5]\n\nresults = table.search(query_vector).limit(2).to_polars()\nprint(results)\n";

export const TsQuickstartOutputPandas = "result = await table.search(queryVector).limit(2).toArray();\n";

export const TsQuickstartAddData = "const moreData = [\n { id: \"7\", text: \"mage\", vector: [0.6, 0.3, 0.4] },\n { id: \"8\", text: \"bard\", vector: [0.3, 0.8, 0.4] },\n];\n\n// Add data to table\nawait table.add(moreData);\n";

export const TsQuickstartCreateTable = "const data = [\n { id: \"1\", text: \"knight\", vector: [0.9, 0.4, 0.8] },\n { id: \"2\", text: \"ranger\", vector: [0.8, 0.4, 0.7] },\n { id: \"9\", text: \"priest\", vector: [0.6, 0.2, 0.6] },\n { id: \"4\", text: \"rogue\", vector: [0.7, 0.4, 0.7] },\n];\nlet table = await db.createTable(\"adventurers\", data, { mode: \"overwrite\" });\n";
Expand All @@ -24,8 +30,6 @@ export const TsQuickstartOpenTable = "table = await db.openTable(\"adventurers\"

export const TsQuickstartOutputArray = "result = await table.search(queryVector).limit(2).toArray();\nconsole.table(result);\n";

export const TsQuickstartOutputPandas = "result = await table.search(queryVector).limit(2).toArray();\n";

export const TsQuickstartVectorSearch1 = "// Let's search for vectors similar to \"warrior\"\nlet queryVector = [0.8, 0.3, 0.8];\n\nlet result = await table.search(queryVector).limit(2).toArray();\nconsole.table(result);\n";

export const TsQuickstartVectorSearch2 = "// Let's search for vectors similar to \"wizard\"\nqueryVector = [0.7, 0.3, 0.5];\n\nconst results = await table.search(queryVector).limit(2).toArray();\nconsole.table(results);\n";
Expand All @@ -45,4 +49,3 @@ export const RsQuickstartOutputArray = "let result: DataFrame = table\n .quer
export const RsQuickstartVectorSearch1 = "// Let's search for vectors similar to \"warrior\"\nlet query_vector = [0.8, 0.3, 0.8];\n\nlet result: DataFrame = table\n .query()\n .nearest_to(&query_vector)\n .unwrap()\n .limit(2)\n .select(Select::Columns(vec![\"text\".to_string()]))\n .execute()\n .await\n .unwrap()\n .into_polars()\n .await\n .unwrap();\nprintln!(\"{result:?}\");\n";

export const RsQuickstartVectorSearch2 = "// Let's search for vectors similar to \"wizard\"\nlet query_vector = [0.7, 0.3, 0.5];\n\nlet result: DataFrame = table\n .query()\n .nearest_to(&query_vector)\n .unwrap()\n .limit(2)\n .select(Select::Columns(vec![\"text\".to_string()]))\n .execute()\n .await\n .unwrap()\n .into_polars()\n .await\n .unwrap();\nprintln!(\"{result:?}\");\nlet text_col = result.column(\"text\").unwrap().str().unwrap();\nlet top_two = vec![\n text_col.get(0).unwrap().to_string(),\n text_col.get(1).unwrap().to_string(),\n];\n";

12 changes: 12 additions & 0 deletions docs/tables/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import {
PyBasicCreateEmptyTable,
PyBasicCreateTablePandas,
PyBasicCreateTablePolars,
PyBasicAsyncApi,
PyBasicVectorSearch,
PyBasicVectorSearchQ1,
PyBasicVectorSearchQ2,
Expand Down Expand Up @@ -64,6 +65,17 @@ explore some more table operations you'll typically need when working with Lance
- **Filtered queries** that can operate on nested structs
- **Interoperate with DuckDB** and run traditional SQL queries on an Arrow table (Python)

<Note title="Python async users">
This page uses synchronous Python snippets for readability. If your app uses `asyncio`,
the same flow works with `connect_async(...)` and `await`-based table/query calls.
Use the example below as a template, and see [Quickstart](/quickstart#python-sync-and-async-apis)
for the general mapping.

<CodeBlock filename="Python (async)" language="Python" icon="python">
{PyBasicAsyncApi}
</CodeBlock>
</Note>

## Dataset

We'll work with this small dataset based on characters from the legends of Camelot. Note that
Expand Down
26 changes: 26 additions & 0 deletions tests/py/test_basic_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,3 +169,29 @@ def test_basic_usage(db_path_factory):
db.drop_table("camelot")
# --8<-- [end:basic_drop_table]
assert "camelot" not in db.table_names()


@pytest.mark.asyncio
async def test_basic_usage_async_api(db_path_factory):
uri = db_path_factory("basic_usage_async_db")
with open(data_path, "r") as f:
data = json.load(f)

# --8<-- [start:basic_async_api]
import lancedb

async_db = await lancedb.connect_async(uri)
async_table = await async_db.create_table(
"camelot_async",
data=data,
mode="overwrite",
)

query_vector = [0.03, 0.85, 0.61, 0.90]
async_results = await (
await async_table.search(query_vector)
).limit(5).select(["name", "role", "description"]).to_polars()
print(async_results)
# --8<-- [end:basic_async_api]

assert async_results.height >= 1
30 changes: 30 additions & 0 deletions tests/py/test_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,31 @@ def test_connection():
assert not Path(uri).exists()


async def connect_async_example():
# --8<-- [start:connect_async]
import lancedb

uri = "ex_lancedb"
async_db = await lancedb.connect_async(uri)
# --8<-- [end:connect_async]

return async_db


# --8<-- [start:connect_cloud]
uri = "db://your-database-uri"
api_key = "your-api-key"
region = "us-east-1"
# --8<-- [end:connect_cloud]


# --8<-- [start:connect_cloud_async]
uri = "db://your-database-uri"
api_key = "your-api-key"
region = "us-east-1"
# --8<-- [end:connect_cloud_async]


def connect_object_storage_config():
# --8<-- [start:connect_object_storage]
import lancedb
Expand All @@ -33,3 +51,15 @@ def connect_object_storage_config():
# --8<-- [end:connect_object_storage]

return db


async def connect_object_storage_config_async():
# --8<-- [start:connect_object_storage_async]
import lancedb

uri = "s3://your-bucket/path"
# You can also use "gs://your-bucket/path" or "az://your-container/path".
async_db = await lancedb.connect_async(uri)
# --8<-- [end:connect_object_storage_async]

return async_db
31 changes: 31 additions & 0 deletions tests/py/test_quickstart.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright The LanceDB Authors

import lancedb
import pytest

def test_quickstart(db_path_factory):
uri = "quickstart_db"
Expand Down Expand Up @@ -67,4 +68,34 @@ def test_quickstart(db_path_factory):
assert results.head(1)["text"][0] == "mage"


@pytest.mark.asyncio
async def test_quickstart_async_api(db_path_factory):
db_uri = db_path_factory("quickstart_async_db")
import lancedb
async_db = await lancedb.connect_async(db_uri)

data = [
{"id": "1", "text": "knight", "vector": [0.9, 0.4, 0.8]},
{"id": "2", "text": "ranger", "vector": [0.8, 0.4, 0.7]},
{"id": "9", "text": "priest", "vector": [0.6, 0.2, 0.6]},
{"id": "4", "text": "rogue", "vector": [0.7, 0.4, 0.7]},
]

# --8<-- [start:quickstart_create_table_async]
async_table = await async_db.create_table(
"adventurers",
data=data,
mode="overwrite",
)
# --8<-- [end:quickstart_create_table_async]

# --8<-- [start:quickstart_vector_search_1_async]
# Let's search for vectors similar to "warrior"
query_vector = [0.8, 0.3, 0.8]

# Ensure you run `pip install polars` beforehand
async_result = await (await async_table.search(query_vector)).limit(2).to_polars()
print(async_result)
# --8<-- [end:quickstart_vector_search_1_async]

assert async_result.head(1)["text"][0] == "knight"
Loading