diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..41b0232 Binary files /dev/null and b/.DS_Store differ diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..7147277 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,34 @@ +name: Tests + +on: + push: + branches: [ main, master ] + pull_request: + branches: [ main, master ] + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - run: pip install ruff + - run: ruff check pepdata/ + + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.9", "3.10", "3.11", "3.12"] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - run: | + pip install pytest pytest-cov + pip install -r requirements.txt + pip install . + - run: pytest tests/ diff --git a/.hypothesis/unicode_data/13.0.0/charmap.json.gz b/.hypothesis/unicode_data/13.0.0/charmap.json.gz new file mode 100644 index 0000000..d975496 Binary files /dev/null and b/.hypothesis/unicode_data/13.0.0/charmap.json.gz differ diff --git a/lint.sh b/lint.sh index 386fa76..7d7bc32 100755 --- a/lint.sh +++ b/lint.sh @@ -1,9 +1,6 @@ #!/bin/bash set -o errexit -find pepdata test -name '*.py' \ - | xargs pylint \ - --errors-only \ - --disable=print-statement +ruff check pepdata/ tests/ -echo 'Passes pylint check' +echo 'Passes ruff check' diff --git a/pepdata/.DS_Store b/pepdata/.DS_Store new file mode 100644 index 0000000..0a224be Binary files /dev/null and b/pepdata/.DS_Store differ diff --git a/pepdata/amino_acid.py b/pepdata/amino_acid.py index 53aea1e..3c2aa4c 100644 --- a/pepdata/amino_acid.py +++ b/pepdata/amino_acid.py @@ -11,8 +11,6 @@ # limitations under the License. -from __future__ import print_function, division, absolute_import - class AminoAcid(object): def __init__( self, full_name, short_name, letter, contains=None): diff --git a/pepdata/amino_acid_alphabet.py b/pepdata/amino_acid_alphabet.py index 1789b0d..07150db 100644 --- a/pepdata/amino_acid_alphabet.py +++ b/pepdata/amino_acid_alphabet.py @@ -15,8 +15,6 @@ Quantify amino acids by their physical/chemical properties """ -from __future__ import print_function, division, absolute_import - import numpy as np from .amino_acid import AminoAcid diff --git a/pepdata/amino_acid_properties.py b/pepdata/amino_acid_properties.py index 394cc08..47306d9 100644 --- a/pepdata/amino_acid_properties.py +++ b/pepdata/amino_acid_properties.py @@ -10,8 +10,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function, division, absolute_import - from .amino_acid_alphabet import letter_to_index """ diff --git a/pepdata/blosum.py b/pepdata/blosum.py index a58e18b..8b5b036 100644 --- a/pepdata/blosum.py +++ b/pepdata/blosum.py @@ -10,8 +10,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function, division, absolute_import - from os.path import join from .static_data import MATRIX_DIR diff --git a/pepdata/chou_fasman.py b/pepdata/chou_fasman.py index 59cbbe4..54172ce 100644 --- a/pepdata/chou_fasman.py +++ b/pepdata/chou_fasman.py @@ -10,8 +10,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function, division, absolute_import - from .amino_acid_alphabet import amino_acid_name_indices # Chou-Fasman of structural properties from diff --git a/pepdata/common.py b/pepdata/common.py index bd0dd40..5d64beb 100644 --- a/pepdata/common.py +++ b/pepdata/common.py @@ -13,8 +13,6 @@ # limitations under the License. -from __future__ import print_function, division, absolute_import - import numpy as np def transform_peptide(peptide, property_dict): diff --git a/pepdata/iedb/alleles.py b/pepdata/iedb/alleles.py index 09617d2..210a90b 100644 --- a/pepdata/iedb/alleles.py +++ b/pepdata/iedb/alleles.py @@ -10,7 +10,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function, division, absolute_import from collections import namedtuple import os import xml @@ -63,7 +62,7 @@ class and source organism. continue name = name_element.text - synonyms = set([]) + synonyms = set() for synonym_element in allele.iterfind("Synonyms"): for synonym in synonym_element.text.split(","): synonyms.add(synonym.strip()) diff --git a/pepdata/iedb/memoize.py b/pepdata/iedb/memoize.py index 93ef44a..84e61b5 100644 --- a/pepdata/iedb/memoize.py +++ b/pepdata/iedb/memoize.py @@ -10,8 +10,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function, division, absolute_import - from functools import wraps def _prepare_memoization_key(args, kwargs): diff --git a/pepdata/iedb/mhc.py b/pepdata/iedb/mhc.py index b3843b7..9cbaaf9 100644 --- a/pepdata/iedb/mhc.py +++ b/pepdata/iedb/mhc.py @@ -1,3 +1,5 @@ +from __future__ import annotations + # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -10,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function, division, absolute_import import logging import os diff --git a/pepdata/iedb/tcell.py b/pepdata/iedb/tcell.py index b946e87..dbe2b6d 100644 --- a/pepdata/iedb/tcell.py +++ b/pepdata/iedb/tcell.py @@ -1,3 +1,5 @@ +from __future__ import annotations + # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,7 +13,6 @@ # limitations under the License. -from __future__ import print_function, division, absolute_import import logging import os @@ -24,15 +25,9 @@ from .common import bad_amino_acids, cache from .columns import ( get_assay_method, - get_assay_num_tested, - get_assay_response_measured, - get_assay_units, get_host_name, get_mhc_allele, - get_mhc_assay, get_mhc_class, - get_epitope_source_organism, - get_epitope_type, get_epitope_name, ) @@ -119,10 +114,10 @@ def load_dataframe( encoding="latin-1") mhc = get_mhc_allele(df) - mhc_class = get_mhc_class(df) + mhc_class_series = get_mhc_class(df) epitopes = get_epitope_name(df) organism = get_host_name(df) - assay_method = get_assay_method(df) + assay_method_series = get_assay_method(df) # Sometimes the IEDB seems to put in an extra comma in the @@ -177,23 +172,19 @@ def load_dataframe( # "HLA-Class I,allele undetermined" # or # "Class I,allele undetermined" -] - - if hla: - mask &= df[mhc_allele_column_key].str.contains(hla, na=False) - if exclude_hla: - mask &= ~(df[mhc_allele_column_key].str.contains(exclude_hla, na=False)) + if mhc_pattern: + mask &= mhc.str.contains(mhc_pattern, na=False) - if assay_group: - mask &= df[assay_group_column_key].str.contains(assay_group) + if exclude_mhc: + mask &= ~(mhc.str.contains(exclude_mhc, na=False)) - if assay_method: - mask &= df[assay_method_column_key].str.contains(assay_method) + if assay_method is not None and assay_method_series is not None: + mask &= assay_method_series.str.contains(assay_method, na=False) if peptide_length: assert peptide_length > 0 - mask &= df[epitope_column_key].str.len() == peptide_length + mask &= epitopes.str.len() == peptide_length df = df[mask] diff --git a/pepdata/peptide_vectorizer.py b/pepdata/peptide_vectorizer.py index 05cc9de..86fa6a7 100644 --- a/pepdata/peptide_vectorizer.py +++ b/pepdata/peptide_vectorizer.py @@ -13,8 +13,6 @@ # limitations under the License. -from __future__ import print_function, division, absolute_import - import numpy as np from sklearn.feature_extraction.text import CountVectorizer from sklearn.preprocessing import normalize @@ -28,7 +26,7 @@ def make_count_vectorizer(reduced_alphabet, max_ngram): return CountVectorizer( analyzer='char', ngram_range=(1, max_ngram), - dtype=np.float, + dtype=np.float64, preprocessor=preprocessor) class PeptideVectorizer(object): diff --git a/pepdata/pmbec.py b/pepdata/pmbec.py index 529d6c1..d97b8b6 100644 --- a/pepdata/pmbec.py +++ b/pepdata/pmbec.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function, division, absolute_import from os.path import join from .static_data import MATRIX_DIR diff --git a/pepdata/reduced_alphabet.py b/pepdata/reduced_alphabet.py index c257333..f7121ae 100644 --- a/pepdata/reduced_alphabet.py +++ b/pepdata/reduced_alphabet.py @@ -18,8 +18,6 @@ Peterson, Kondev, et al. http://www.rpgroup.caltech.edu/publications/Peterson2008.pdf """ -from __future__ import print_function, division, absolute_import - def dict_from_list(groups): aa_to_group = {} for i, group in enumerate(groups): diff --git a/pepdata/residue_contact_energies.py b/pepdata/residue_contact_energies.py index 3bb133e..d5961c2 100644 --- a/pepdata/residue_contact_energies.py +++ b/pepdata/residue_contact_energies.py @@ -10,8 +10,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function, division, absolute_import - from os.path import join from .amino_acid_alphabet import canonical_amino_acid_letters, dict_to_amino_acid_matrix diff --git a/pepdata/static_data.py b/pepdata/static_data.py index 7c69805..ee803cf 100644 --- a/pepdata/static_data.py +++ b/pepdata/static_data.py @@ -11,7 +11,6 @@ # limitations under the License. -from __future__ import print_function, division, absolute_import from os.path import dirname, realpath, join PACKAGE_DIR = dirname(realpath(__file__)) diff --git a/pepdata/version.py b/pepdata/version.py index f8aa15b..8b75a2c 100644 --- a/pepdata/version.py +++ b/pepdata/version.py @@ -1,4 +1,4 @@ -__version__ = "1.2.0" +__version__ = "1.2.2" def print_version(): diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..a50892f --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,39 @@ +[build-system] +requires = ["setuptools>=64"] +build-backend = "setuptools.build_meta" + +[project] +name = "pepdata" +requires-python = ">=3.9" +authors = [{name="Alex Rubinsteyn", email="alex.rubinsteyn@mssm.edu"}] +description = "Immunological peptide datasets and amino acid properties" +classifiers = [ + "Development Status :: 4 - Beta", + "Environment :: Console", + "Operating System :: OS Independent", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python", + "Topic :: Scientific/Engineering :: Bio-Informatics", +] +readme = "README.md" +dynamic = ["version", "dependencies"] + +[tool.setuptools.dynamic] +version = {attr = "pepdata.version.__version__"} +dependencies = {file = ["requirements.txt"]} + +[tool.setuptools.packages.find] +exclude = ["test", "test.*"] + +[project.urls] +"Homepage" = "https://github.com/openvax/pepdata" +"Bug Tracker" = "https://github.com/openvax/pepdata/issues" + +[tool.ruff.lint] +select = ["E", "F"] +ignore = ["F821", "E501", "F841", "E731", "E741", "E722", "E721"] + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["F401"] +"test*/*" = ["F401"] diff --git a/requirements.txt b/requirements.txt index 321e6fa..251113e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -numpy>=1.7 +numpy>=2.0.0,<3.0.0 scipy>=0.9 pandas>=0.17 scikit-learn>=0.14.1 diff --git a/setup.py b/setup.py deleted file mode 100644 index 1a011ae..0000000 --- a/setup.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright (c) 2014-2018. Mount Sinai School of Medicine -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from __future__ import print_function, division, absolute_import -import os -import re - -from setuptools import setup, find_packages - -readme_dir = os.path.dirname(__file__) -readme_path = os.path.join(readme_dir, 'README.md') - -try: - with open(readme_path, 'r') as f: - readme_markdown = f.read() -except: - print("Failed to load README file") - readme_markdown = "" - -try: - import pypandoc - readme_restructured = pypandoc.convert(readme_markdown, to='rst', format='md') -except: - readme_restructured = readme_markdown - print("Conversion of long_description from markdown to reStructuredText failed, skipping...") - -with open('pepdata/__init__.py', 'r') as f: - version = re.search( - r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', - f.read(), - re.MULTILINE).group(1) - -if __name__ == '__main__': - setup( - name='pepdata', - version=version, - description="Immunological peptide datasets and amino acid properties", - author="Alex Rubinsteyn", - author_email="alex.rubinsteyn@mssm.edu", - url="https://github.com/openvax/pepdata", - license="http://www.apache.org/licenses/LICENSE-2.0.html", - classifiers=[ - 'Development Status :: 3 - Alpha', - 'Environment :: Console', - 'Operating System :: OS Independent', - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: Apache Software License', - 'Programming Language :: Python', - 'Topic :: Scientific/Engineering :: Bio-Informatics', - ], - install_requires=[ - 'numpy>=1.7', - 'scipy>=0.9', - 'pandas>=0.17', - 'scikit-learn>=0.14.1', - 'progressbar33', - 'biopython>=1.65', - 'datacache>=0.4.4', - 'lxml', - ], - long_description=readme_restructured, - packages=find_packages(exclude="test"), - include_package_data=True - ) diff --git a/tests/test_amino_acids.py b/tests/test_amino_acids.py index d335068..9b29073 100644 --- a/tests/test_amino_acids.py +++ b/tests/test_amino_acids.py @@ -1,4 +1,3 @@ -from nose.tools import eq_ from pepdata.amino_acid_alphabet import ( canonical_amino_acids, canonical_amino_acid_letters, @@ -13,7 +12,7 @@ def test_canonical_amino_acids_letters(): assert len(canonical_amino_acid_letters) == 20 assert "X" not in canonical_amino_acid_letters expected_letters = [aa.letter for aa in canonical_amino_acids] - eq_(expected_letters, canonical_amino_acid_letters) + assert expected_letters == canonical_amino_acid_letters def test_extended_amino_acids(): assert len(extended_amino_acids) > 20 @@ -23,4 +22,4 @@ def test_extended_amino_acids_letters(): assert "X" in extended_amino_acid_letters assert "J" in extended_amino_acid_letters expected_letters = [aa.letter for aa in extended_amino_acids] - eq_(expected_letters, extended_amino_acid_letters) + assert expected_letters == extended_amino_acid_letters diff --git a/tests/test_iedb_alleles.py b/tests/test_iedb_alleles.py index 9cc40bd..b1e7f8f 100644 --- a/tests/test_iedb_alleles.py +++ b/tests/test_iedb_alleles.py @@ -11,33 +11,30 @@ # limitations under the License. -from __future__ import print_function, division, absolute_import - -from nose.tools import eq_ from pepdata import iedb def test_iedb_human_class1_allele(): allele_dict = iedb.alleles.load_alleles_dict() allele = allele_dict["HLA-C*07:02"] - eq_(allele.mhc_class, "I") - eq_(allele.locus, "C") + assert allele.mhc_class == "I" + assert allele.locus == "C" def test_iedb_human_class2_allele(): allele_dict = iedb.alleles.load_alleles_dict() allele = allele_dict["HLA-DRA*01:01/DRB1*04:04"] - eq_(allele.mhc_class, "II") - eq_(allele.locus, "DR") + assert allele.mhc_class == "II" + assert allele.locus == "DR" def test_iedb_mouse_class1_allele(): allele_dict = iedb.alleles.load_alleles_dict() allele = allele_dict["H-2-Ds"] - eq_(allele.mhc_class, "I") - eq_(allele.locus, "D") + assert allele.mhc_class == "I" + assert allele.locus == "D" def test_iedb_mouse_class2_allele(): allele_dict = iedb.alleles.load_alleles_dict() allele = allele_dict["H-2-IAq"] - eq_(allele.mhc_class, "II") - eq_(allele.locus, "IA") + assert allele.mhc_class == "II" + assert allele.locus == "IA" diff --git a/tests/test_iedb_mhc.py b/tests/test_iedb_mhc.py index 4f467af..f9ce353 100644 --- a/tests/test_iedb_mhc.py +++ b/tests/test_iedb_mhc.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function, division, absolute_import - from pepdata import iedb def test_mhc_hla_a2(): diff --git a/tests/test_ngram.py b/tests/test_ngram.py index c8ad3e5..da90c2d 100644 --- a/tests/test_ngram.py +++ b/tests/test_ngram.py @@ -12,8 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function, division, absolute_import -from six.moves import cPickle +import pickle from pepdata import PeptideVectorizer