lamalab-org · n0w0f · Mar 4, 2026 · Mar 4, 2026 · Mar 4, 2026 · Mar 4, 2026
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -16,16 +16,16 @@ jobs:
 
       - uses: actions/setup-python@v5
         with:
-          python-version: "3.10"
-          cache: pip
-          cache-dependency-path: pyproject.toml
+          python-version: "3.10.19"
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v3
+        with:
+          version: "latest"
 
       - name: Install dependencies
         run: |
-          python -m pip install --upgrade pip
-          pip install wheel setuptools
-          pip install -e .
-          pip install mkdocs mkdocs-material "mkdocstrings[python]" mkdocs-autorefs 
+          uv pip install --system -e ".[docs]"
 
       - name: Build
         run: mkdocs build

diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
@@ -0,0 +1,21 @@
+name: Pre-commit
+
+on:
+  pull_request:
+  push:
+    branches: [main, development]
+
+jobs:
+  pre-commit:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.10.19'
+
+      - name: Run pre-commit
+        uses: pre-commit/action@v3.0.1
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -11,39 +11,32 @@ jobs:
   tests:
     runs-on: ubuntu-latest
     strategy:
-      fail-fast: true
+      fail-fast: false
       matrix:
-        python-version: ["3.9"]
+        python-version: ["3.10", "3.11"]
     timeout-minutes: 30
-    defaults:
-      run:
-        shell: bash -l {0}
     steps:
       - name: Check out repository
         uses: actions/checkout@v4
 
-      # - uses: pdm-project/setup-pdm@v3
-      #   name: Set up PDM
-      #   with:
-      #     python-version: ${{ matrix.python-version }}
-      #     cache: true
-
-      - name: Setup Mambaforge
-        uses: conda-incubator/setup-miniconda@v3
+      - name: Set up Python
+        uses: actions/setup-python@v5
         with:
-          miniforge-variant: Mambaforge
-          miniforge-version: latest
-          use-mamba: true
           python-version: ${{ matrix.python-version }}
-          conda-channels: anaconda, conda-forge
-          activate-environment: test
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v3
+        with:
+          version: "latest"
+
+      - name: Install system dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y openbabel libfftw3-dev
 
       - name: Install dependencies
         run: |
-          mamba install -c conda-forge openbabel fftw -y
-          pip install -e ".[dev]" 
-          pip install pyxtal 
-          pip install "numpy<2.0"
-
+          uv pip install --system -e ".[dev]"
+
       - name: Test
-        run: pytest tests
+        run: pytest tests
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,26 @@
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+        exclude: ^mkdocs\.yml$
+      - id: check-added-large-files
+        args: ['--maxkb=1000']
+      - id: check-merge-conflict
+      - id: check-toml
+      - id: debug-statements
+
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.3.4
+    hooks:
+      - id: ruff
+        args: [--fix]
+      - id: ruff-format
+
+  - repo: https://github.com/PyCQA/docformatter
+    rev: v1.7.6
+    hooks:
+      - id: docformatter
+        args: [--in-place, --config, ./pyproject.toml]
diff --git a/LICENSE b/LICENSE
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -18,27 +18,31 @@
 </p>
 
 
-MatText is a framework for text-based materials modeling. It supports 
+MatText is a framework for text-based materials modeling. It supports
 
-- conversion of crystal structures in to text representations 
+- conversion of crystal structures in to text representations
 - transformations of crystal structures for sensitivity analyses
 - decoding of text representations to crystal structures
 - tokenization of text-representation of crystal structures
-- pre-training, finetuning and testing of language models on text-representations of crystal structures 
+- pre-training, finetuning and testing of language models on text-representations of crystal structures
 - analysis of language models trained on text-representations of crystal structures
 
 
 
 ## Local Installation
 
-We recommend that you create a virtual conda environment on your computer in which you install the dependencies for this package. To do so head over to [Miniconda](https://docs.conda.io/en/latest/miniconda.html) and follow the installation instructions there.
+**Requirements:**
+- Python 3.10 or 3.11 (tested and supported)
+- [uv](https://docs.astral.sh/uv/) package manager (recommended)
 
+We recommend using [uv](https://docs.astral.sh/uv/) for fast and reliable Python package management. To install uv, follow the [installation instructions](https://docs.astral.sh/uv/getting-started/installation/).
 
-<!-- ### Install latest release
+
+### Install latest release
 
 ```bash
-pip install mattext
-``` -->
+uv pip install git+https://github.com/lamalab-org/mattext.git
+```
 
 ### Install development version
 
@@ -49,16 +53,32 @@ git clone https://github.com/lamalab-org/mattext.git
 cd mattext
 ```
 
+Create a virtual environment and install:
+
+```bash
+uv venv --python 3.10
+source .venv/bin/activate  # On Windows: .venv\Scripts\activate
+uv pip install -e ".[dev]"
+```
+
+Install pre-commit hooks (optional, for development):
+
 ```bash
-pip install -e .
+pre-commit install
 ```
 
-If you want to use the Local Env representation, you will also need to install OpenBabel, e.g. using 
+If you want to use the Local Env representation, you will also need to install OpenBabel. You can install it via conda/mamba:
 
-```bash 
+```bash
 conda install openbabel -c conda-forge
 ```
 
+or on Ubuntu/Debian:
+
+```bash
+sudo apt-get install openbabel
+```
+
 ## Getting started
 
 ### Converting crystals into text
@@ -94,18 +114,18 @@ requested_text_reps = text_rep.get_requested_text_reps(requested_reps)
 python main.py -cn=pretrain model=pretrain_example +model.representation=composition +model.dataset_type=pretrain30k +model.context_length=32
 ```
 
-### Running a benchmark 
+### Running a benchmark
 
 ```bash
-python main.py -cn=benchmark model=benchmark_example +model.dataset_type=filtered +model.representation=composition +model.dataset=perovskites +model.checkpoint=path/to/checkpoint  
+python main.py -cn=benchmark model=benchmark_example +model.dataset_type=filtered +model.representation=composition +model.dataset=perovskites +model.checkpoint=path/to/checkpoint
 ```
 
 The `+` symbol before a configuration key indicates that you are adding a new key-value pair to the configuration. This is useful when you want to specify parameters that are not part of the default configuration.
 
 To override the existing default configuration, use `++`, for e.g., `++model.pretrain.training_arguments.per_device_train_batch_size=32`. Refer to the [docs](https://lamalab-org.github.io/MatText/) for more examples and advanced ways to use the configs with config groups.
-To override the existing default configuration, use `++`, for e.g., `++model.pretrain.training_arguments.per_device_train_batch_size=32`. Refer to the [docs](https://lamalab-org.github.io/MatText/) for more examples and advanced ways to use the configs with config groups.
+To override the existing default configuration, use `++`, e.g., `++model.pretrain.training_arguments.per_device_train_batch_size=32`. Refer to the [docs](https://lamalab-org.github.io/MatText/) for more examples and advanced ways to use the configs with config groups.
-To override the existing default configuration, use `++`, for e.g., `++model.pretrain.training_arguments.per_device_train_batch_size=32`. Refer to the [docs](https://lamalab-org.github.io/MatText/) for more examples and advanced ways to use the configs with config groups.
+To override the existing default configuration, use `++`, e.g., `++model.pretrain.training_arguments.per_device_train_batch_size=32`. Refer to the [docs](https://lamalab-org.github.io/MatText/) for more examples and advanced ways to use the configs with config groups.
-To override the existing default configuration, use `++`, for e.g., `++model.pretrain.training_arguments.per_device_train_batch_size=32`. Refer to the [docs](https://lamalab-org.github.io/MatText/) for more examples and advanced ways to use the configs with config groups.
+To override the existing default configuration, use `++`, e.g., `++model.pretrain.training_arguments.per_device_train_batch_size=32`. Refer to the [docs](https://lamalab-org.github.io/MatText/) for more examples and advanced ways to use the configs with config groups.
-To override the existing default configuration, use `++`, for e.g., `++model.pretrain.training_arguments.per_device_train_batch_size=32`. Refer to the [docs](https://lamalab-org.github.io/MatText/) for more examples and advanced ways to use the configs with config groups.
+To override the existing default configuration, use `++`, e.g., `++model.pretrain.training_arguments.per_device_train_batch_size=32`. Refer to the [docs](https://lamalab-org.github.io/MatText/) for more examples and advanced ways to use the configs with config groups.
 
 
-### Using data 
+### Using data
 
 The MatText datasets can be easily obtained from [HuggingFace](https://huggingface.co/datasets/n0w0f/MatText), for example
 
@@ -123,19 +143,19 @@ Contributions, whether filing an issue, making a pull request, or forking, are a
 
 ## 👋 Attribution
 
-### Citation 
+### Citation
 
-If you use MatText in your work, please cite 
+If you use MatText in your work, please cite
 
 ```
 @misc{alampara2024mattextlanguagemodelsneed,
-      title={MatText: Do Language Models Need More than Text & Scale for Materials Modeling?}, 
+      title={MatText: Do Language Models Need More than Text & Scale for Materials Modeling?},
       author={Nawaf Alampara and Santiago Miret and Kevin Maik Jablonka},
       year={2024},
       eprint={2406.17295},
       archivePrefix={arXiv},
       primaryClass={cond-mat.mtrl-sci}
-      url={https://arxiv.org/abs/2406.17295}, 
+      url={https://arxiv.org/abs/2406.17295},
 }
 ```
 
@@ -146,4 +166,4 @@ The code in this package is licensed under the MIT License.
 
 ### 💰 Funding
 
-This project has been supported by the [Carl Zeiss Foundation](https://www.carl-zeiss-stiftung.de/en/) as well as Intel and Merck.
+This project has been supported by the [Carl Zeiss Foundation](https://www.carl-zeiss-stiftung.de/en/) as well as Intel and Merck.
diff --git a/conf/archived_experiments/config-hydra.yaml b/conf/archived_experiments/config-hydra.yaml
@@ -6,7 +6,7 @@ hydra:
   sweep:
     dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
     subdir: ${hydra.job.override_dirname}
-    
+
   launcher:
     _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher
     submitit_folder: ${hydra.sweep.dir}/.submitit/%j

diff --git a/conf/archived_experiments/config-potential.yaml b/conf/archived_experiments/config-potential.yaml
@@ -6,7 +6,7 @@
     sweep:
       dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
       subdir: ${hydra.job.override_dirname}
-      
+
     launcher:
       _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher
       submitit_folder: ${hydra.sweep.dir}/.submitit/%j
@@ -51,4 +51,3 @@
 
     - name: potential_run
       tasks: [potential]
-
diff --git a/conf/archived_experiments/config-sft.yaml b/conf/archived_experiments/config-sft.yaml
@@ -6,7 +6,7 @@
     sweep:
       dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
       subdir: ${hydra.job.override_dirname}
-      
+
     launcher:
       _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher
       submitit_folder: ${hydra.sweep.dir}/.submitit/%j
@@ -51,4 +51,3 @@
 
     # - name: potential_run
     #   tasks: [potential]
-
diff --git a/conf/archived_experiments/config-smiles.yaml b/conf/archived_experiments/config-smiles.yaml
@@ -6,7 +6,7 @@
     sweep:
       dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
       subdir: ${hydra.job.override_dirname}
-      
+
     launcher:
       _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher
       submitit_folder: ${hydra.sweep.dir}/.submitit/%j
@@ -51,4 +51,3 @@
 
     # - name: potential_run
     #   tasks: [potential]
-
diff --git a/conf/archived_experiments/config.yaml b/conf/archived_experiments/config.yaml
@@ -6,7 +6,7 @@
     sweep:
       dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
       subdir: ${hydra.job.override_dirname}
-      
+
     launcher:
       _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher
       submitit_folder: ${hydra.sweep.dir}/.submitit/%j
@@ -51,4 +51,3 @@
 
     # - name: potential_run
     #   tasks: [potential]
-
diff --git a/conf/archived_experiments/santiago/cifp1.yaml b/conf/archived_experiments/santiago/cifp1.yaml
@@ -10,4 +10,3 @@ model:
       per_device_train_batch_size: 64
     path:
       pretrained_checkpoint: /home/so87pot/n0w0f/santiago_ckpt/cif_p1_pt_30k/checkpoint-23000
-
diff --git a/conf/archived_experiments/santiago_100k/cifp1.yaml b/conf/archived_experiments/santiago_100k/cifp1.yaml
@@ -9,4 +9,4 @@ model:
     training_arguments:
       per_device_train_batch_size: 64
     path:
-      pretrained_checkpoint: /home/so87pot/n0w0f/santiago_ckpt/cif_p1_pt_100k/checkpoint-26000
+      pretrained_checkpoint: /home/so87pot/n0w0f/santiago_ckpt/cif_p1_pt_100k/checkpoint-26000
diff --git a/conf/archived_experiments/testing_perturb_100/cifp1.yaml b/conf/archived_experiments/testing_perturb_100/cifp1.yaml
@@ -10,4 +10,3 @@ model:
       per_device_train_batch_size: 64
     path:
       pretrained_checkpoint: ft_100k_mb_small
-
diff --git a/conf/archived_experiments/testing_perturb_100/cifsymmetrized.yaml b/conf/archived_experiments/testing_perturb_100/cifsymmetrized.yaml
@@ -4,11 +4,11 @@ model:
   logging:
     wandb_project: perturb_1
 
-  
+
   finetune:
     model_name: ft_100k_mb_small
     context_length: 1024
     training_arguments:
       per_device_train_batch_size: 64
     path:
-      pretrained_checkpoint: ft_100k_mb_small
+      pretrained_checkpoint: ft_100k_mb_small
diff --git a/conf/archived_experiments/testing_perturb_100/crystal_llm.yaml b/conf/archived_experiments/testing_perturb_100/crystal_llm.yaml
@@ -3,12 +3,11 @@ model:
   representation: crystal_llm_rep
   logging:
     wandb_project: perturb_1
-  
+
   finetune:
     model_name: ft_100k_mb_small
     context_length: 512
     training_arguments:
       per_device_train_batch_size: 64
     path:
       pretrained_checkpoint: ft_100k_mb_small
-
diff --git a/conf/archived_experiments/testing_perturb_300/cifp1.yaml b/conf/archived_experiments/testing_perturb_300/cifp1.yaml
@@ -10,4 +10,3 @@ model:
       per_device_train_batch_size: 64
     path:
       pretrained_checkpoint: ft_300k_mb_small
-
diff --git a/conf/archived_experiments/testing_perturb_300/cifsymmetrized.yaml b/conf/archived_experiments/testing_perturb_300/cifsymmetrized.yaml
@@ -4,11 +4,11 @@ model:
   logging:
     wandb_project: perturb_1
 
-  
+
   finetune:
     model_name: ft_300k_mb_small
     context_length: 1024
     training_arguments:
       per_device_train_batch_size: 64
     path:
-      pretrained_checkpoint: ft_300k_mb_small
+      pretrained_checkpoint: ft_300k_mb_small
Original file line number	Diff line number	Diff line change
Expand Up		@@ -10,4 +10,3 @@ model:
		per_device_train_batch_size: 64
		path:
		pretrained_checkpoint: /home/so87pot/n0w0f/santiago_ckpt/cif_p1_pt_30k/checkpoint-23000