From 750a72b571fc0b56a01360501a79cf2980239314 Mon Sep 17 00:00:00 2001 From: Imene Kerboua Date: Sun, 23 Jun 2024 23:09:25 +0200 Subject: [PATCH 1/7] add CI and release workflows --- .github/workflows/ci.yml | 28 ++++++++++++++++++++++++++++ .github/workflows/release.yml | 33 +++++++++++++++++++++++++++++++++ pyproject.toml | 6 +++--- 3 files changed, 64 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/ci.yml create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..114f750 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,28 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + +jobs: + tests: + strategy: + matrix: + os: [ubuntu-latest] + python-version: ["3.10", "3.11", "3.12"] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install the project dependencies + run: make install-dev + - name: Run lint + run: | + make lint-check + - name: Run tests + run: | + make test \ No newline at end of file diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..914aa14 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,33 @@ +name: Release + +on: + release: + types: [released] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: "3.10" + - name: Install pypa/build + run: pip install build --user + - name: Install the project dependencies + run: make install + - name: Build wheel and tarball + run: python -m build . --outdir dist/ + + pypi-publish: + name: Publish to PyPI + needs: + - build + runs-on: ubuntu-latest + permissions: + # IMPORTANT: this permission is mandatory for trusted publishing + id-token: write + steps: + - name: Publish package distributions to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 70ff350..3c430b6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,9 +1,9 @@ [build-system] -requires = ["setuptools>=69.0"] +requires = ["setuptools>=69.0", "wheel"] build-backend = "setuptools.build_meta" -[tool.setuptools.packages] -find = {} +[tool.setuptools.packages.find] +exclude = ["tests"] [project] name = "chromacache" From 6018110cc49c2f0fdfa1a0e965eb7567101f824b Mon Sep 17 00:00:00 2001 From: Imene Kerboua Date: Mon, 24 Jun 2024 10:42:09 +0200 Subject: [PATCH 2/7] Add linting --- chromacache/__init__.py | 2 +- .../embedding_functions/AbstractEmbeddingFunction.py | 2 +- chromacache/embedding_functions/__init__.py | 7 ------- .../embedding_functions/test_AbstractEmbeddingFunction.py | 2 +- 4 files changed, 3 insertions(+), 10 deletions(-) diff --git a/chromacache/__init__.py b/chromacache/__init__.py index 128c0bf..90e0536 100644 --- a/chromacache/__init__.py +++ b/chromacache/__init__.py @@ -1,2 +1,2 @@ -from .chromacache import ChromaCache +from .chromacache import * from .embedding_functions import * diff --git a/chromacache/embedding_functions/AbstractEmbeddingFunction.py b/chromacache/embedding_functions/AbstractEmbeddingFunction.py index a01ff07..9432453 100644 --- a/chromacache/embedding_functions/AbstractEmbeddingFunction.py +++ b/chromacache/embedding_functions/AbstractEmbeddingFunction.py @@ -70,7 +70,7 @@ def __call__(self, input: Documents) -> Embeddings: """Wrapper that truncates the documents, encodes them Args: - documents (Documents): List of documents + input (Documents): List of documents Returns: Embeddings: the encoded sentences diff --git a/chromacache/embedding_functions/__init__.py b/chromacache/embedding_functions/__init__.py index b7a8116..e69de29 100644 --- a/chromacache/embedding_functions/__init__.py +++ b/chromacache/embedding_functions/__init__.py @@ -1,7 +0,0 @@ -from .AbstractEmbeddingFunction import AbstractEmbeddingFunction -from .CohereEmbeddingFunction import CohereEmbeddingFunction -from .LaserEmbeddingFunction import LaserEmbeddingFunction -from .MistralAIEmbeddingFunction import MistralAIEmbeddingFunction -from .OpenAIEmbeddingFunction import OpenAIEmbeddingFunction -from .SentenceTransformerEmbeddingFunction import SentenceTransformerEmbeddingFunction -from .VoyageAIEmbeddingFunction import VoyageAIEmbeddingFunction diff --git a/tests/embedding_functions/test_AbstractEmbeddingFunction.py b/tests/embedding_functions/test_AbstractEmbeddingFunction.py index 578f1c0..6b2e76d 100644 --- a/tests/embedding_functions/test_AbstractEmbeddingFunction.py +++ b/tests/embedding_functions/test_AbstractEmbeddingFunction.py @@ -29,7 +29,7 @@ def test_truncate_documents(tokenizer, sentences): ) assert len(truncated_sentences) == len(sentences) - + tokenized_truncated_sentences = [tokenizer.encode(s) for s in truncated_sentences] for truncated_sentence in tokenized_truncated_sentences: assert len(truncated_sentence) <= max_token_length From 78143b026f93fec27fb498877e76d1af5da390d8 Mon Sep 17 00:00:00 2001 From: Imene Kerboua Date: Mon, 24 Jun 2024 10:45:03 +0200 Subject: [PATCH 3/7] Add cache for dependencies --- .github/workflows/ci.yml | 6 +++--- .github/workflows/release.yml | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 114f750..8da43e3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,15 +9,15 @@ jobs: tests: strategy: matrix: - os: [ubuntu-latest] - python-version: ["3.10", "3.11", "3.12"] + os: [ubuntu-latest, windows-laest, macos-latest] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 - name: Install Python uses: actions/setup-python@v4 with: - python-version: ${{ matrix.python-version }} + python-version: '3.10' + cache: 'pip' - name: Install the project dependencies run: make install-dev - name: Run lint diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 914aa14..6d5c0d9 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -13,6 +13,7 @@ jobs: uses: actions/setup-python@v4 with: python-version: "3.10" + cache: "pip" - name: Install pypa/build run: pip install build --user - name: Install the project dependencies From 290cb64bd6adcc8d137985be5845fd0203877f1b Mon Sep 17 00:00:00 2001 From: Imene Kerboua Date: Mon, 24 Jun 2024 10:58:51 +0200 Subject: [PATCH 4/7] upgrade chroma --- chromacache/chromacache.py | 13 ++----------- .../AbstractEmbeddingFunction.py | 8 -------- pyproject.toml | 10 +++++----- 3 files changed, 7 insertions(+), 24 deletions(-) diff --git a/chromacache/chromacache.py b/chromacache/chromacache.py index ba4cf28..36e73e8 100644 --- a/chromacache/chromacache.py +++ b/chromacache/chromacache.py @@ -1,14 +1,5 @@ -try: - import chromadb - from chromadb import Documents, EmbeddingFunction, Embeddings -except Exception: - __import__("pysqlite3") - import sys - - sys.modules["sqlite3"] = sys.modules.pop("pysqlite3") - - import chromadb - from chromadb import EmbeddingFunction +import chromadb +from chromadb import Documents, EmbeddingFunction, Embeddings class ChromaCache: diff --git a/chromacache/embedding_functions/AbstractEmbeddingFunction.py b/chromacache/embedding_functions/AbstractEmbeddingFunction.py index 9432453..3652b52 100644 --- a/chromacache/embedding_functions/AbstractEmbeddingFunction.py +++ b/chromacache/embedding_functions/AbstractEmbeddingFunction.py @@ -1,11 +1,3 @@ -try: - __import__("pysqlite3") - import sys - - sys.modules["sqlite3"] = sys.modules.pop("pysqlite3") -except Exception: - pass - from abc import ABC, abstractmethod import tiktoken diff --git a/pyproject.toml b/pyproject.toml index ccd1bf6..2cc01af 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,12 +25,12 @@ dependencies = [ "voyageai>=0.1.6", "tiktoken>=0.5.2", "openai>=1.6.1", - "chromadb>=0.4.21", + "chromadb>=0.5.3", "python-dotenv>=1.0.1", - "cohere==4.41", - "laser_encoders==0.0.1", - "sentence-transformers==2.2.2", - "mistralai==0.0.12" + "cohere>=4.41", + "laser_encoders>=0.0.1", + "sentence-transformers>=2.2.2", + "mistralai>=0.0.12" ] [project.urls] From dffa2e643305c024dac2f3d036caae7254d82820 Mon Sep 17 00:00:00 2001 From: Imene Kerboua Date: Mon, 24 Jun 2024 11:13:19 +0200 Subject: [PATCH 5/7] Fix dependencies and tests --- .github/workflows/ci.yml | 4 ++-- .github/workflows/release.yml | 6 +++--- Makefile | 4 ++-- chromacache/embedding_functions/__init__.py | 17 +++++++++++++++++ pyproject.toml | 5 ++++- 5 files changed, 28 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8da43e3..58d7a4f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,7 +14,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Install Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.10' cache: 'pip' @@ -25,4 +25,4 @@ jobs: make lint-check - name: Run tests run: | - make test \ No newline at end of file + make tests \ No newline at end of file diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 6d5c0d9..53f4169 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -10,10 +10,10 @@ jobs: steps: - uses: actions/checkout@v4 - name: Install Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: - python-version: "3.10" - cache: "pip" + python-version: '3.10' + cache: 'pip' - name: Install pypa/build run: pip install build --user - name: Install the project dependencies diff --git a/Makefile b/Makefile index efebc18..b1aced4 100644 --- a/Makefile +++ b/Makefile @@ -12,5 +12,5 @@ lint-check: ruff format . --check ruff check **/*.py -test: - pytest -n auto \ No newline at end of file +tests: + pytest -n 5 \ No newline at end of file diff --git a/chromacache/embedding_functions/__init__.py b/chromacache/embedding_functions/__init__.py index e69de29..6862128 100644 --- a/chromacache/embedding_functions/__init__.py +++ b/chromacache/embedding_functions/__init__.py @@ -0,0 +1,17 @@ +from .AbstractEmbeddingFunction import AbstractEmbeddingFunction +from .CohereEmbeddingFunction import CohereEmbeddingFunction +from .LaserEmbeddingFunction import LaserEmbeddingFunction +from .MistralAIEmbeddingFunction import MistralAIEmbeddingFunction +from .OpenAIEmbeddingFunction import OpenAIEmbeddingFunction +from .SentenceTransformerEmbeddingFunction import SentenceTransformerEmbeddingFunction +from .VoyageAIEmbeddingFunction import VoyageAIEmbeddingFunction + +__all__ = [ + "AbstractEmbeddingFunction", + "CohereEmbeddingFunction", + "LaserEmbeddingFunction", + "MistralAIEmbeddingFunction", + "OpenAIEmbeddingFunction", + "SentenceTransformerEmbeddingFunction", + "VoyageAIEmbeddingFunction", +] diff --git a/pyproject.toml b/pyproject.toml index 2cc01af..1d5f79a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,10 @@ dependencies = [ "cohere>=4.41", "laser_encoders>=0.0.1", "sentence-transformers>=2.2.2", - "mistralai>=0.0.12" + "mistralai>=0.0.12", + "fairseq==0.12.3.1", + "hydra-core==1.3.2", + "omegaconf==2.3.0" ] [project.urls] From b977f2c3821a6dd1971a6327c29709bc98eeab6f Mon Sep 17 00:00:00 2001 From: Imene Kerboua Date: Mon, 24 Jun 2024 11:18:47 +0200 Subject: [PATCH 6/7] Fix fairseq dependency --- .github/workflows/ci.yml | 4 +++- pyproject.toml | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 58d7a4f..ddc62d4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,7 +19,9 @@ jobs: python-version: '3.10' cache: 'pip' - name: Install the project dependencies - run: make install-dev + run: | + pip install --upgrade pip + make install-dev - name: Run lint run: | make lint-check diff --git a/pyproject.toml b/pyproject.toml index 1d5f79a..5f91568 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,7 @@ dependencies = [ "laser_encoders>=0.0.1", "sentence-transformers>=2.2.2", "mistralai>=0.0.12", - "fairseq==0.12.3.1", + "fairseq==0.12.3", "hydra-core==1.3.2", "omegaconf==2.3.0" ] From 66b98e08bd31396b7c15737e0eef54ef41a8a3a9 Mon Sep 17 00:00:00 2001 From: Imene Kerboua Date: Mon, 24 Jun 2024 11:19:11 +0200 Subject: [PATCH 7/7] Fix fairseq dependency --- .github/workflows/ci.yml | 4 +++- pyproject.toml | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 58d7a4f..ddc62d4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,7 +19,9 @@ jobs: python-version: '3.10' cache: 'pip' - name: Install the project dependencies - run: make install-dev + run: | + pip install --upgrade pip + make install-dev - name: Run lint run: | make lint-check diff --git a/pyproject.toml b/pyproject.toml index 1d5f79a..a8c7096 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,7 @@ dependencies = [ "laser_encoders>=0.0.1", "sentence-transformers>=2.2.2", "mistralai>=0.0.12", - "fairseq==0.12.3.1", + "fairseq>=0.12", "hydra-core==1.3.2", "omegaconf==2.3.0" ]