Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: CI

on:
push:
branches: [main]
pull_request:

jobs:
tests:
strategy:
matrix:
os: [ubuntu-latest, windows-laest, macos-latest]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Install Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
cache: 'pip'
- name: Install the project dependencies
run: |
pip install --upgrade pip
make install-dev
- name: Run lint
run: |
make lint-check
- name: Run tests
run: |
make tests
34 changes: 34 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: Release

on:
release:
types: [released]

jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
cache: 'pip'
- name: Install pypa/build
run: pip install build --user
- name: Install the project dependencies
run: make install
- name: Build wheel and tarball
run: python -m build . --outdir dist/

pypi-publish:
name: Publish to PyPI
needs:
- build
runs-on: ubuntu-latest
permissions:
# IMPORTANT: this permission is mandatory for trusted publishing
id-token: write
steps:
- name: Publish package distributions to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ lint-check:
ruff format . --check
ruff check **/*.py

test:
pytest -n auto
tests:
pytest -n 5
2 changes: 1 addition & 1 deletion chromacache/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from .chromacache import ChromaCache
from .chromacache import *
from .embedding_functions import *
13 changes: 2 additions & 11 deletions chromacache/chromacache.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,5 @@
try:
import chromadb
from chromadb import Documents, EmbeddingFunction, Embeddings
except Exception:
__import__("pysqlite3")
import sys

sys.modules["sqlite3"] = sys.modules.pop("pysqlite3")

import chromadb
from chromadb import EmbeddingFunction
import chromadb
from chromadb import Documents, EmbeddingFunction, Embeddings


class ChromaCache:
Expand Down
10 changes: 1 addition & 9 deletions chromacache/embedding_functions/AbstractEmbeddingFunction.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
try:
__import__("pysqlite3")
import sys

sys.modules["sqlite3"] = sys.modules.pop("pysqlite3")
except Exception:
pass

from abc import ABC, abstractmethod

import tiktoken
Expand Down Expand Up @@ -70,7 +62,7 @@ def __call__(self, input: Documents) -> Embeddings:
"""Wrapper that truncates the documents, encodes them

Args:
documents (Documents): List of documents
input (Documents): List of documents

Returns:
Embeddings: the encoded sentences
Expand Down
10 changes: 10 additions & 0 deletions chromacache/embedding_functions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,13 @@
from .OpenAIEmbeddingFunction import OpenAIEmbeddingFunction
from .SentenceTransformerEmbeddingFunction import SentenceTransformerEmbeddingFunction
from .VoyageAIEmbeddingFunction import VoyageAIEmbeddingFunction

__all__ = [
"AbstractEmbeddingFunction",
"CohereEmbeddingFunction",
"LaserEmbeddingFunction",
"MistralAIEmbeddingFunction",
"OpenAIEmbeddingFunction",
"SentenceTransformerEmbeddingFunction",
"VoyageAIEmbeddingFunction",
]
19 changes: 11 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
requires = ["setuptools>=69.0", "wheel"]
build-backend = "setuptools.build_meta"

[tool.setuptools.packages.find]
exclude = ["tests"]

[project]
name = "chromacache"
version = "0.0.1"
Expand All @@ -22,12 +25,15 @@ dependencies = [
"voyageai>=0.1.6",
"tiktoken>=0.5.2",
"openai>=1.6.1",
"chromadb>=0.4.21",
"chromadb>=0.5.3",
"python-dotenv>=1.0.1",
"cohere==4.41",
"laser_encoders==0.0.1",
"sentence-transformers==2.2.2",
"mistralai==0.0.12"
"cohere>=4.41",
"laser_encoders>=0.0.1",
"sentence-transformers>=2.2.2",
"mistralai>=0.0.12",
"fairseq>=0.12",
"hydra-core==1.3.2",
"omegaconf==2.3.0"
]

[project.urls]
Expand All @@ -36,9 +42,6 @@ Homepage = "https://github.com/Lyon-NLP/chroma_cache"
[project.optional-dependencies]
dev = ["ruff>=0.0.254", "pytest", "pytest-xdist"]

[tool.setuptools.packages.find]
exclude = ["tests", "results"]

[tool.ruff]
target-version = "py38"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def test_truncate_documents(tokenizer, sentences):
)

assert len(truncated_sentences) == len(sentences)

tokenized_truncated_sentences = [tokenizer.encode(s) for s in truncated_sentences]
for truncated_sentence in tokenized_truncated_sentences:
assert len(truncated_sentence) <= max_token_length