diff --git a/.editorconfig b/.editorconfig deleted file mode 100644 index 1ba1998..0000000 --- a/.editorconfig +++ /dev/null @@ -1,12 +0,0 @@ -# Editor configuration, see https://editorconfig.org -root = true - -[*] -charset = utf-8 -indent_style = space -indent_size = 2 -insert_final_newline = true -trim_trailing_whitespace = true - -[*.md] -max_line_length = off diff --git a/.flake8 b/.flake8 deleted file mode 100644 index f34fd6a..0000000 --- a/.flake8 +++ /dev/null @@ -1,13 +0,0 @@ -[flake8] -# Rule definitions: http://flake8.pycqa.org/en/latest/user/error-codes.html -# D203: 1 blank line required before class docstring -# W503: line break before binary operator -# W504: line break after binary operator -# F401: file imported but not used -# F841: local variable is assigned to but never used -exclude = __pycache__,node_modules,.git,.pytest_cache,docs -ignore = D203,W503,W504,F401 -max-complexity = 24 -max-line-length = 120 -per-file-ignores = - codonPython/tests/file_utils_test.py:F841 \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..3a84a0f --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,28 @@ +name: CI +on: + push: + branches: + - main + pull_request: + branches: + - main + workflow_dispatch: +jobs: + ci-pipeline: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + cache: 'pip' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install . + - name: Running pytest + id: pytest + run: | + python -m pytest + - name: Test build package + run: python -m build \ No newline at end of file diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index efa8a4a..0000000 --- a/.travis.yml +++ /dev/null @@ -1,24 +0,0 @@ -language: python - -python: - - 3.6 - -install: - - sudo apt-get install unixodbc-dev - - pip install -r requirements.txt - - pip install codecov - - pip install pytest pytest-cov - -script: - - pytest --cov=./ - -after_success: - - codecov - - cd docs && make html - -deploy: - provider: pages - skip_cleanup: true - github_token: $githubtoken - local_dir: docs/build/html - keep_history: true diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 4e4470f..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,5 +0,0 @@ -recursive-include codonPython *.py - -include LICENSE -include README.md -# instructions about what to include in the distribution build \ No newline at end of file diff --git a/README.md b/README.md index 9269f87..f5fa278 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,8 @@ # National Reusable Code Library 🌍 +[![CI](https://github.com/nhsengland/reusable-code-library/actions/workflows/ci.yml/badge.svg)](https://github.com/nhsengland/reusable-code-library/actions/workflows/ci.yml) +![Static Badge](https://img.shields.io/badge/status-development-blue) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) +[![Code Standard: RAP](https://img.shields.io/badge/code%20standard-RAP-000099.svg)](https://nhsdigital.github.io/rap-community-of-practice/) +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) ## What is the `National Reusable Code Library`? diff --git a/codonPython/ODS_lookup.py b/codonPython/ODS_lookup.py deleted file mode 100644 index e67df2c..0000000 --- a/codonPython/ODS_lookup.py +++ /dev/null @@ -1,97 +0,0 @@ -import requests -from typing import Dict, Iterable, Callable, List, Optional -import pandas as pd -import numpy as np - - -def query_api(code: str) -> Dict: - """Query the ODS (organisation data service) API for a single org code - and return the full JSON result. Full API docs can be found here: - https://digital.nhs.uk/services/organisation-data-service/guidance-for-developers/organisation-endpoint - - Parameters - ---------- - code : str - 3 character organization code. - - Returns - ---------- - dict - The data returned from the API. - - Examples - --------- - >>> result = query_api("X26") - >>> result["Organisation"]["Name"] - 'NHS DIGITAL' - >>> result["Organisation"]["GeoLoc"]["Location"]["AddrLn1"] - '1 TREVELYAN SQUARE' - """ - if not isinstance(code, str): - raise ValueError(f"ODS code must be a string, received {type(code)}") - - response = requests.get( - f"https://directory.spineservices.nhs.uk/ORD/2-0-0/organisations/{code}" - ).json() - if "errorCode" in response: - error_code = response["errorCode"] - error_text = response["errorText"] - raise ValueError( - f"API query failed with code {error_code} and text '{error_text}'." - ) - return response - - -def get_addresses(codes: Iterable[str]) -> pd.DataFrame: - """Query the ODS (organisation data service) API for a series of - org codes and return a data frame containing names and addresses. - Invalid codes will cause a message to be printed but will - otherwise be ignored, as an incomplete merge table is more - useful than no table at all. - - Parameters - ---------- - codes : list, ndarray or pd.Series - 3 character organization codes to retrieve information for. - - Returns - ---------- - DataFrame - Address information for the given org codes. - - Examples - --------- - >>> result = get_addresses(pd.Series(["X26"])) - >>> result.reindex(columns=sorted(result.columns)) - Org_AddrLn1 Org_Code Org_Country Org_Name Org_PostCode Org_Town - 0 1 TREVELYAN SQUARE X26 ENGLAND NHS Digital LS1 6AE LEEDS - """ - - # Internal helper function to take the full result of a query - # and extract the relevant fields - def extract_data(api_result: Dict, code: str) -> Dict[str, str]: - org_info = api_result["Organisation"] - org_name = org_info["Name"] - org_address = org_info["GeoLoc"]["Location"] - result = { - "Org_Code": code, - "Org_Name": org_name.title().replace("Nhs", "NHS"), - **{f"Org_{k}": v for k, v in org_address.items() if k != "UPRN"}, - } - return result - - # Remove duplicate values - to_query = set(codes) - if np.nan in to_query: - # 'NaN' is actually a valid code but we don't want it for null values - to_query.remove(np.nan) - - result = [] - for code in to_query: - try: - api_result = query_api(code) - result.append(extract_data(api_result, code)) - except ValueError as e: - print(f"No result for ODS code {code}. {e}") - continue - return pd.DataFrame(result) diff --git a/codonPython/SQL_connections.py b/codonPython/SQL_connections.py deleted file mode 100644 index f2cac3e..0000000 --- a/codonPython/SQL_connections.py +++ /dev/null @@ -1,33 +0,0 @@ -''' Author(s): Sam Hollings -Desc: this module contains SQL_alchemy engines to connect to commonly used databases''' - -from sqlalchemy import create_engine - - -def conn_dss(): - '''Returns sqlalchemy Engine to connect to the DSS 2008 server (DMEDSS) DSS_CORPORATE database ''' - engine = create_engine('mssql+pyodbc://DMEDSS/DSS_CORPORATE?driver=SQL+Server') - return engine - - -def conn_dss2016uat(): - '''Returns sqlalchemy Engine to connect to the DSS 2016 server (UAT) (DSSUAT) DSS_CORPORATE database ''' - conn = create_engine('mssql+pyodbc://DSSUAT/DSS_CORPORATE?driver=SQL+Server') - return conn - - -def conn_dummy(path=r''): - '''connect to the sqlite3 database in memory, or at specified path - parameters - ---------- - path : string - The location and file in which the database for conn_dummy will be stored. Default is memory (RAM) - ''' - - conn_string = 'sqlite://' - if path != '': - path = '/' + path - - conn = create_engine(r'{0}{1}'.format(conn_string, path)) - - return conn diff --git a/codonPython/age_bands.py b/codonPython/age_bands.py deleted file mode 100644 index ce1e28b..0000000 --- a/codonPython/age_bands.py +++ /dev/null @@ -1,87 +0,0 @@ -import math - - -def age_band_5_years(age: int) -> str: - """ - Place age into appropriate 5 year band - - This function takes the age supplied as an argument and returns a string - representing the relevant 5 year banding. - - Parameters - ---------- - age : int - Age of the person - - Returns - ------- - out : str - The 5 year age band - - Examples - -------- - >>> age_band_5_years(3) - '0-4' - >>> age_band_5_years(None) - 'Age not known' - >>> age_band_5_years(95) - '90 and over' - """ - - if age is None: - return "Age not known" - - if age >= 90: - if age >= 150: - raise ValueError("The age input: {} is too large.".format(age)) - else: - return "90 and over" - elif age < 0: - raise ValueError("The age input: {} is too low.".format(age)) - else: - lowerbound = 5 * int(math.floor(age / 5)) - upperbound = lowerbound + 4 - return "{}-{}".format(lowerbound, upperbound) - - -def age_band_10_years(age: int) -> str: - """ - Place age into appropriate 10 year band - - This function takes the age supplied as an argument and returns a string - representing the relevant 10 year banding. - - Parameters - ---------- - age : int - Age of the person - - Returns - ------- - out : str - The 10 year age band - - Examples - -------- - >>> age_band_10_years(3) - '0-9' - >>> age_band_10_years(None) - 'Age not known' - >>> age_band_10_years(95) - '90 and over' - """ - - if age is None: - return "Age not known" - - if age >= 90: - if age >= 150: - raise ValueError("The age input: {} is too large.".format(age)) - else: - return "90 and over" - elif age < 0: - raise ValueError("The age input: {} is too low.".format(age)) - else: - lowerbound = 10 * int(math.floor(age / 10)) - upperbound = lowerbound + 9 - return "{}-{}".format(lowerbound, upperbound) diff --git a/codonPython/file_utils.py b/codonPython/file_utils.py deleted file mode 100644 index 83ae2cc..0000000 --- a/codonPython/file_utils.py +++ /dev/null @@ -1,332 +0,0 @@ -import pandas as pd -import os - - -def file_search(path=".", doctype="csv", like=[""], strict=False): - """ - This function creates a list of all files of a certain type, satisfying the criteria outlined - in like = [...] parameter. The function only searches for files in the specified folder - of the current working directory that is set by the user. - - Parameters - ----------- - path : string - Path to a folder in the current working directory - default = '.', i.e. current working directory folder - doctype : string - Document format to search for - e.g. 'csv' or 'xlsx' - default = 'csv' - like : list - A list of words to filter the file search on - default = [''], i.e. no filter - strict : bool - Set True to search for filenames containing all words from 'like' list ( - default = False - - Returns - ------- - list - - Examples - ------- - >>> file_search(doctype = 'md') - ['README.md', 'CONTRIBUTING.md'] - - >>> file_search(doctype = 'md', like = ['READ']) - ['README.md'] - - """ - - if not isinstance(path, str): - raise ValueError("Please input path as a string") - elif not isinstance(doctype, str): - raise ValueError("Please input doctype as a string") - elif not isinstance(like, list): - raise ValueError("Please input like as a list") - elif not isinstance(strict, bool): - raise ValueError("Please input strict as a bool") - else: - pass - - list_of_files = [] - - if strict is False: - for file in os.listdir(path): - if (file.split(".")[-1] == doctype) & (any(x in file for x in like)): - list_of_files.append(file) - else: - for file in os.listdir(path): - if (file.split(".")[-1] == doctype) & (all(x in file for x in like)): - list_of_files.append(file) - - return list_of_files - - -def import_files( - path=".", doctype="csv", sheet="Sheet1", subdir=False, like=[""], strict=False -): - """ - This function imports all documents of a given format to a dictionary - and returns this dictionary, keeping original file names. - - Parameters - ---------- - path : string - Path to a folder in the current working directory - default = '.', i.e. current working directory folder - doctype : string - Document format to search for - e.g. 'csv' or 'xlsx' - default = 'csv' - sheet : string - Sheet name of the xlsx file - default = 'Sheet1' - subdir : bool - True to allow download all files, including the subdirectories - default = False - like : list - A list of words to filter the file search on - default = [''], i.e. no filter - strict : bool - Set True to search for filenames containing all words from 'like' list - default = False - - Returns - ------- - out : dict - - Examples - -------- - - '>>> import_files()' - - File Data_AprF_2019 is successfully imported - - File Data_AugF_2019 is successfully imported - - File Data_JulF_2019 is successfully imported - - File Data_JunF_2019_v1 is successfully imported - - File Data_MayF_2019 is successfully imported - - File Data_SepP_2019 is successfully imported - - '>>> import_files(like = ['Aug','Sep'])' - - File Data_AugF_2019 is successfully imported - - File Data_SepP_2019 is successfully imported - - - """ - - if not isinstance(path, str): - raise ValueError("Please input path as a string") - elif not isinstance(doctype, str): - raise ValueError("Please input doctype as a string") - elif not isinstance(sheet, str): - raise ValueError("Please input sheet as a string") - elif not isinstance(subdir, bool): - raise ValueError("Please input subdir as a bool") - elif not isinstance(like, list): - raise ValueError("Please input like as a list") - elif not isinstance(strict, bool): - raise ValueError("Please input strict as a bool") - else: - pass - - dict_files = {} - if subdir is True: - - for r, d, f in os.walk(path): - for file in f: - b = any(x in file for x in like) - if strict is True: - b = all(x in file for x in like) - if (file.split(".")[-1] == doctype) & (b is True): - k = file.strip("." + doctype) - try: - name = os.path.join(r, file) - print("\nImporting " + k + "...", end="", flush=True) - if doctype == "csv": - dict_files[name.strip(".\\").strip(".csv")] = pd.read_csv( - name - ) - print("\rFile " + k + " is successfully imported") - else: - dict_files[ - name.strip(".\\").strip(".xlsx") - ] = pd.read_excel(name, sheet_name=sheet) - print("\rFile " + k + " is successfully imported") - except Exception as ex: - raise (ex) - else: - for file in os.listdir(path): - b = any(x in file for x in like) - if strict is True: - b = all(x in file for x in like) - - if (file.split(".")[-1] == doctype) & (b is True): - k = file.strip("." + doctype) - try: - name = os.path.join(path, file) - print("\nImporting " + k + "...", end="", flush=True) - if doctype == "csv": - dict_files[k] = pd.read_csv(name) - print("\rFile " + k + " is successfully imported") - else: - dict_files[k] = pd.read_excel(name, sheet_name=sheet) - print("\rFile " + k + " is successfully imported") - except Exception as ex: - raise (ex) - - return dict_files - - -def compare(x, y, names=["x", "y"], dups=False, same=False, comment=False): - """ - This function returns a dictionary with: - - 1. Same values between data frames x and y - 2. Values in x, not in y - 3. Values in y, not in x - - (optional): - (4) Duplicates of x - (5) Duplicates of y - (6) Boolean of whether x and y are the same - - Parameters - ---------- - x : pandas.DataFrame - DataFrame #1 - y : pandas.DataFrame - DataFrame #2 - names : list - a list of user preferred file names - e.g. ['File1', 'File2'] - default = ['x','y'] - dups : bool - True to include duplicates check for each file - default = False - same : bool - True to activate. Outputs True if DataFrames are the same - default = False - comment : bool - True to activate. Prints out statistics of the compariosn results - e.g. number of same valeus, number of duplicates, number of outliers and whether the DataFrames are the same - default = False - - Returns - ------- - out : dict - - Examples - -------- - - '>>> c = compare(df1, df2, names = ['df1','df2'], dups = True, same = True, comment =True)' - - There are 133891 same values - There are 16531 outliers in df1 - There are 20937 outliers in df2 - There are 48704 duplicates in df1 - There are 0 duplicates in df2 - The DataFrames are not the same - - '>>> c = compare(df2, df2, names = ['df2','df2'], dups = True, same = True, comment =True)' - - There are 154444 same values - There are 0 outliers in df2 - There are 0 outliers in df2 - There are 0 duplicates in df2 - There are 0 duplicates in df2 - The DataFrames are the same - """ - - if not isinstance(x, pd.DataFrame): - raise ValueError("Please input x as a pandas.DataFrame") - elif not isinstance(y, pd.DataFrame): - raise ValueError("Please input y as a pandas.DataFrame") - elif not isinstance(names, list): - raise ValueError("Please input names as a list") - elif not isinstance(dups, bool): - raise ValueError("Please input dups as a bool") - elif not isinstance(same, bool): - raise ValueError("Please input same as a bool") - elif not isinstance(comment, bool): - raise ValueError("Please input comment as a bool") - - dict_temp = {} - - try: - dict_temp["same_values"] = pd.merge( - x.drop_duplicates(), y.drop_duplicates(), how="inner" - ) - except Exception as ex: - raise (ex) - try: - dict_temp[names[0] + "_not_" + names[1]] = pd.concat( - [x, dict_temp["same_values"]], ignore_index=True - ).drop_duplicates(keep=False) - dict_temp[names[1] + "_not_" + names[0]] = pd.concat( - [y, dict_temp["same_values"]], ignore_index=True - ).drop_duplicates(keep=False) - except Exception as ex: - raise (ex) - - if dups is True: - try: - dict_temp[names[0] + "_dups"] = x[x.duplicated()] - dict_temp[names[1] + "_dups"] = y[y.duplicated()] - except Exception as ex: - raise (ex) - if same is True: - try: - if (x.shape == y.shape) & (x.shape == dict_temp["same_values"].shape): - dict_temp["Same"] = True - else: - dict_temp["Same"] = False - except Exception as ex: - raise (ex) - try: - if comment is True: - print( - "\nThere are " + str(dict_temp["same_values"].shape[0]) + " same values" - ) - print( - "There are " - + str(dict_temp[names[0] + "_not_" + names[1]].shape[0]) - + " outliers in " - + str(names[0]) - ) - print( - "There are " - + str(dict_temp[names[1] + "_not_" + names[0]].shape[0]) - + " outliers in " - + str(names[1]) - ) - if dups is True: - print( - "There are " - + str(dict_temp[names[0] + "_dups"].shape[0]) - + " duplicates in " - + names[0] - ) - print( - "There are " - + str(dict_temp[names[1] + "_dups"].shape[0]) - + " duplicates in " - + names[1] - ) - if same is True: - if dict_temp["Same"] is True: - s = "the same" - else: - s = "not the same" - print("DataFrames are " + s) - except Exception as ex: - raise (ex) - - return dict_temp diff --git a/codonPython/mesh/__init__.py b/codonPython/mesh/__init__.py deleted file mode 100644 index 3b24788..0000000 --- a/codonPython/mesh/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from .mesh import MESHConnection, generate_authorization -from .exceptions import ( - MESHAuthenticationError, - MESHDownloadErrors, - MESHInvalidRecipient, - MESHMessageAlreadyDownloaded, - MESHMessageMissing, - MESHMultipleMatches, - MESHUnknownError, -) diff --git a/codonPython/mesh/exceptions.py b/codonPython/mesh/exceptions.py deleted file mode 100644 index 9ad7c0c..0000000 --- a/codonPython/mesh/exceptions.py +++ /dev/null @@ -1,56 +0,0 @@ -from requests.exceptions import ConnectionError - - -class MESHAuthenticationError(ConnectionError): - """The MESH request authentication was invalid""" - - @property - def msg(self): - return "Invalid authentication" - - -class MESHMessageMissing(ConnectionError): - """The message requested does not exist""" - - @property - def msg(self): - return "Message does not exist" - - -class MESHMessageAlreadyDownloaded(ConnectionError): - """The MESH request has already been downloaded""" - - @property - def msg(self): - return "Message already downloaded" - - -class MESHDownloadErrors(Exception): - """There were errors downloading MESH messages""" - - def __init__(self, exceptions): - self.exceptions = exceptions - - -class MESHInvalidRecipient(ConnectionError): - """The recipient is unknown or otherwise invalid""" - - @property - def msg(self): - return "Invalid recipient" - - -class MESHMultipleMatches(ConnectionError): - """There are multiple messages with the provided local ID""" - - @property - def msg(self): - return "Multiple messages found" - - -class MESHUnknownError(ConnectionError): - """There was an unknown error with the connection""" - - @property - def msg(self): - return "Unknown" diff --git a/codonPython/mesh/mesh.py b/codonPython/mesh/mesh.py deleted file mode 100644 index a590595..0000000 --- a/codonPython/mesh/mesh.py +++ /dev/null @@ -1,879 +0,0 @@ -import platform -from dataclasses import dataclass -from datetime import datetime -from .exceptions import ( - MESHAuthenticationError, - MESHDownloadErrors, - MESHInvalidRecipient, - MESHMessageAlreadyDownloaded, - MESHMessageMissing, - MESHMultipleMatches, - MESHUnknownError, -) -from gzip import compress, decompress -from hashlib import md5 -from hmac import new as hmac -from math import ceil -from os import path -from uuid import uuid4 -import logging -from typing import Generator, Union - -import requests as r - - -@dataclass -class MESHConnection: - """Class for handling MESH API interactions. - - Parameters - ---------- - mailbox : string - The MESH ID of the mailbox this client is for - password : string - The password to this mailbox - api_shared_key : string - The shared API key for the MESH environment the mailbox is in - cert_loc : string - Path to the MESH API certificate location - key_loc : string - Path to the MESH API certificate private key location - base_ca_loc : string - Path to the base MESH certificate authority certificate bundle. - Set to False to disable inbound SSL checks if necessary - root_url : string, default = "https://mesh-sync.national.ncrs.nhs.uk" - Root MESH URL. Default value is the live MESH service - org : string, default = "NHS Digital" - Name of organisation owning the mailbox - """ - - mailbox: str - password: str - api_shared_key: str - cert_loc: str - key_loc: str - base_ca_loc: str - root_url: str = "https://mesh-sync.national.ncrs.nhs.uk" - org: str = "NHS Digital" - - def check_authentication(self) -> bool: - """ - Check authentication with the MESH API. - This should be done at the start of any session (per the API docs) - - Returns - ---------- - bool - Indicates if authentication was successful or not - - Raises - ---------- - MESHUnknownError - There was an unexpected return status from the MESH API - - Examples - ---------- - >>> client.check_authentication() #doctest: +SKIP - True - """ - resp = r.post( - f"{self.root_url}/messageexchange/{self.mailbox}", - headers={ - "Authorization": generate_authorization( - self.mailbox, self.password, self.api_shared_key - ), - "Mex-ClientVersion": f"pyMESHAPI0.1a", - "Mex-OSArchitecture": platform.machine(), - "Mex-OSName": platform.system(), - "Mex-OSVersion": platform.version(), - }, - cert=(self.cert_loc, self.key_loc), - verify=self.base_ca_loc, - ) - if resp.status_code == 403: - return False - if resp.status_code == 200: - return True - raise MESHUnknownError(response=resp) - - def send_file( - self, - dest_mailbox: str, - message_location: str, - workflow_id: str, - message_subject: str = None, - message_id: str = None, - process_id: str = None, - compress_message: bool = True, - encrypted: bool = False, - ): - """ - Send a file to the MESH API. - This will automatically chunk the message if required, splitting into chunks at 80MB (MESH API has a - chunk size limit of 100MB). If required, this will also compress the message before transmission using - gzip. - - Parameters - ---------- - dest_mailbox : string - MESH Mailbox ID of the recipient - message_location : string - Path to the readable file to send as a message - workflow_id : string - DTS Workflow ID - message_subject : string, default = None - Optional subject line to use for the message, for SMTP (email) messages. - message_id : string, default = None - Optional local identifier for the message. Required to track the message later. - process_id : string, default = None - Optional process ID for the MESH message. Currently not used in MESH, but included to ensure - future compatibility. - compress_message : boolean, default = True - Indicates if the message should be compressed. If true, then the message will be compressed - using gzip before sending to MESH. - encrypted : boolean, default = False - Indicates if the file to send has been encrypted. This is solely used to pass a flag to MESH - and does not encrypt the file or otherwise alter processing. - - Returns - ---------- - dict - Dictionary of returned values from the MESH API - - * messageID (str): value of the MESH internal ID assigned to the sent message - - Raises - ---------- - MESHAuthenticationError - There was an authentication error accessing this page. Either the SSL certificate used is invalid, - or the client provided the wrong Mailbox ID, Password, or Shared Key. - MESHInvalidRecipient - The mailbox ID provided is not a valid recipient for this message - MESHUnknownError - There was an unexpected return status from the MESH API - - Examples - ---------- - >>> client.send_file("TEST", 'c:/test/test.txt', 'test_flow') #doctest: +SKIP - {'messageID': '20200211115928515346_9359E2'} - """ - with open(message_location, "rb") as file: - message = file.read() - filename = path.basename(message_location) - return self.send_message( - dest_mailbox=dest_mailbox, - message=message, - filename=filename, - workflow_id=workflow_id, - message_subject=message_subject, - message_id=message_id, - process_id=process_id, - compress_message=compress_message, - encrypted=encrypted, - ) - - def send_message( - self, - dest_mailbox: str, - message: bytes, - filename: str, - workflow_id: str, - message_subject: str = None, - message_id: str = None, - process_id: str = None, - compress_message: bool = True, - encrypted: bool = False, - ): - """ - Send a message to the MESH API. - This will automatically chunk the message if required, splitting into chunks at 80MB (MESH API has a - chunk size limit of 100MB). If required, this will also compress the message before transmission using - gzip. - - Parameters - ---------- - dest_mailbox : string - MESH Mailbox ID of the recipient - message : bytes - Bytes representation of the file to transmit - filename : string - Original filename for the message being transmitted - workflow_id : string - DTS Workflow ID - message_subject : string, default = None - Optional subject line to use for the message, for SMTP (email) messages. - message_id : string, default = None - Optional local identifier for the message. Required to track the message later. - process_id : string, default = None - Optional process ID for the MESH message. Currently not used in MESH, but included to ensure - future compatibility. - compress_message : boolean, default = True - Indicates if the message should be compressed. If true, then the message will be compressed - using gzip before sending to MESH. - encrypted : boolean, default = False - Indicates if the file to send has been encrypted. This is solely used to pass a flag to MESH - and does not encrypt the file or otherwise alter processing. - - Returns - ---------- - dict - Dictionary of returned values from the MESH API - - * messageID (str): value of the MESH internal ID assigned to the sent message - - Raises - ---------- - MESHAuthenticationError - There was an authentication error accessing this page. Either the SSL certificate used is invalid, - or the client provided the wrong Mailbox ID, Password, or Shared Key. - MESHInvalidRecipient - The mailbox ID provided is not a valid recipient for this message - MESHUnknownError - There was an unexpected return status from the MESH API - - Examples - ---------- - >>> client.send_message("TEST", b'test', 'test.txt', 'test_flow') #doctest: +SKIP - {'messageID': '20200211115928515346_9359E2'} - """ - checksum = md5(message).hexdigest() - if compress_message: - message = compress(message) - - headers = { - "Authorization": generate_authorization( - self.mailbox, self.password, self.api_shared_key - ), - "Content-Type": "application/octet-stream", - "Mex-From": self.mailbox, - "Mex-To": dest_mailbox, - "Mex-WorkflowID": workflow_id, - "Mex-Filename": filename, - "Mex-MessageType": "DATA", - "Mex-Version": "1.0", - "Mex-Checksum": f"md5 {checksum}", - } - - if process_id is not None: - headers["Mex-ProcessID"] = process_id - if message_id is not None: - headers["Mex-LocalID"] = message_id - if compress_message: - headers["Mex-Content-Compressed"] = "Y" - headers["Content-Encoding"] = "gzip" - if encrypted: - headers["Mex-Content-Encrypted"] = "Y" - if message_subject is not None: - headers["Mex-Subject"] = message_subject - if len(message) > 80000000: - headers["Mex-Chunk-Range"] = f"1:{ceil(len(message)/80000000)}" - - if len(message) > 80000000: - resp = r.post( - url=f"{self.root_url}/messageexchange/{self.mailbox}/outbox", - data=message[0:80000000], - headers=headers, - cert=(self.cert_loc, self.key_loc), - verify=self.base_ca_loc, - ) - if resp.status_code == 403: - raise MESHAuthenticationError(response=resp) - if resp.status_code == 417: - raise MESHInvalidRecipient(response=resp) - if resp.status_code != 202: - raise MESHUnknownError(response=resp) - message_id = resp.json()["messageID"] - for chunk in range(2, ceil(len(message) / 80000000) + 1): - self._send_message_chunk( - message_id=message_id, - message_chunk=message[(chunk - 1) * 80000000:chunk * 80000000], - chunk_no=chunk, - chunk_range=ceil(len(message) / 80000000), - compressed=compress_message, - ) - else: - resp = r.post( - url=f"{self.root_url}/messageexchange/{self.mailbox}/outbox", - data=message, - headers=headers, - cert=(self.cert_loc, self.key_loc), - verify=self.base_ca_loc, - ) - if resp.status_code == 403: - raise MESHAuthenticationError(response=resp) - if resp.status_code == 417: - raise MESHInvalidRecipient(response=resp) - if resp.status_code != 202: - raise MESHUnknownError(response=resp) - - return resp.json() - - def _send_message_chunk( - self, - message_id: str, - message_chunk: bytes, - chunk_no: int, - chunk_range: int, - compressed: bool = True, - ) -> None: - """ - Send a message chunk to the MESH API. - This is expected to only be called by the send_message method. - - Parameters - ---------- - message_id : string - The internal MESH ID of the message to upload a chunk for - message_chunk : bytes - The data to send in this chunk - chunk_no : integer - The number of the chunk to upload - chunk_range : integer - How many chunks there are to upload in total - compressed : boolean, default = True - Is the message compressed? - - Raises - ---------- - MESHAuthenticationError - There was an authentication error accessing this page. Either the SSL certificate used is invalid, - or the client provided the wrong Mailbox ID, Password, or Shared Key. - MESHUnknownError - There was an unexpected return status from the MESH API - - Examples - ---------- - >>> client._send_message_chunk("20200211115754892283_BC7B68", b'test', 2) #doctest: +SKIP - """ - headers = { - "Authorization": generate_authorization( - self.mailbox, self.password, self.api_shared_key - ), - "Mex-From": self.mailbox, - "Content-Type": "application/octet-stream", - "Mex-Chunk-Range": f"{chunk_no}:{chunk_range}", - } - if compressed: - headers["Content-Encoding"] = "gzip" - resp = r.post( - url=f"{self.root_url}/messageexchange/{self.mailbox}/outbox/{message_id}/{chunk_no}", - data=message_chunk, - headers=headers, - cert=(self.cert_loc, self.key_loc), - verify=self.base_ca_loc, - ) - if resp.status_code == 403: - raise MESHAuthenticationError(response=resp) - if resp.status_code != 202: - raise MESHUnknownError(response=resp) - - def check_message_status(self, message_id: str) -> dict: - """ - Check status of a sent message. - - Parameters - ---------- - message_id : string - The local message ID, eg. as provided to send_message. Does NOT work with MESH Message IDs, only - the local ID optionally provided on sending the message. - - Returns - ---------- - dict - The full response from the MESH API for this local ID. For details, consult the MESH API documentation - - Raises - ---------- - MESHAuthenticationError - There was an authentication error accessing this page. Either the SSL certificate used is invalid, - or the client provided the wrong Mailbox ID, Password, or Shared Key. - MESHMultipleMatches - There are multiple messages in the outbox with this local ID - MESHUnknownError - There was an unexpected return status from the MESH API - - Examples - ---------- - >>> client.check_message_status(test) #doctest: +SKIP - {"statusSuccess": ...} - """ - resp = r.get( - url=f"{self.root_url}/messageexchange/{self.mailbox}/outbox/tracking/{message_id}", - headers={ - "Authorization": generate_authorization( - self.mailbox, self.password, self.api_shared_key - ) - }, - cert=(self.cert_loc, self.key_loc), - verify=self.base_ca_loc, - ) - if resp.status_code == 403: - raise MESHAuthenticationError(response=resp) - # There is an error in the API itself - in case of multiple match - # will send an error page with status 200 instead of 300 - if (resp.status_code == 300) or ( - resp.text - == "300: Multiple Choices300: Multiple Choices" - ): - raise MESHMultipleMatches(response=resp) - if resp.status_code == 404: - raise MESHMessageMissing(response=resp) - if resp.status_code != 200: - raise MESHUnknownError(response=resp) - return resp.json() - - def check_inbox(self) -> list: - """ - Determine the MESH IDs of the contents of the inbox. - This will return at most 500 entries, owing to the limitations of the API. - - Returns - ---------- - list - The MESH IDs of the messages in the inbox (str) - - Raises - ---------- - MESHAuthenticationError - There was an authentication error accessing this page. Either the SSL certificate used is invalid, - or the client provided the wrong Mailbox ID, Password, or Shared Key. - MESHUnknownError - There was an unexpected return status from the MESH API - - Examples - ---------- - >>> client.check_inbox() #doctest: +SKIP - ["20200211115754892283_BC7B68", "20200211115928515346_9359E2"] - """ - resp = r.get( - url=f"{self.root_url}/messageexchange/{self.mailbox}/inbox", - headers={ - "Authorization": generate_authorization( - self.mailbox, self.password, self.api_shared_key - ) - }, - cert=(self.cert_loc, self.key_loc), - verify=self.base_ca_loc, - ) - if resp.status_code == 403: - raise MESHAuthenticationError(response=resp) - if resp.status_code != 200: - raise MESHUnknownError(response=resp) - return resp.json()["messages"] - - def check_inbox_count(self) -> int: - """ - Determine how many messages are in the MESH mailbox to download. - - Returns - ---------- - int - The number of messages ready to download - - Raises - ---------- - MESHAuthenticationError - There was an authentication error accessing this page. Either the SSL certificate used is invalid, - or the client provided the wrong Mailbox ID, Password, or Shared Key. - MESHUnknownError - There was an unexpected return status from the MESH API - - Examples - ---------- - >>> client.check_inbox_count() #doctest: +SKIP - 2 - """ - resp = r.get( - url=f"{self.root_url}/messageexchange/{self.mailbox}/count", - headers={ - "Authorization": generate_authorization( - self.mailbox, self.password, self.api_shared_key - ) - }, - cert=(self.cert_loc, self.key_loc), - verify=self.base_ca_loc, - ) - if resp.status_code == 403: - raise MESHAuthenticationError(response=resp) - if resp.status_code != 200: - raise MESHUnknownError(response=resp) - return resp.json()["count"] - - def check_and_download( - self, save_folder: str = None, recursive: bool = True - ) -> Union[Generator[dict, None, None], None]: - """ - Download all messages in the inbox. - This will automatically handle reconstructing chunked messages, and automatically decompress any messages - which have Content-Encoding value of gzip. - WARNING: each downloaded message will be fully reconstructed and decompressed if needed. This may cause - issue for machines with very limited memory if there are very large files to download. - - If save_folder is provided, then downloaded files will be saved into that folder with their original filenames - (and non-delivery receipts will be saved there). This may cause issue if there are multiple files with the - same filename. - - If no save_folder is provided, then this function will return a generator which will yield each message in turn. - When the generator yields a message, it will send an acknowledgement to the MESH API for the previous - message; it is important that processing of the messages be complete and any required final outputs saved - before this - once acknowledged a message cannot be downloaded from MESH again. - - Parameters - ---------- - save_folder : string, default = None - If provided, the folder to save all downloaded files to when this function is called. The function - will not yield intermediate results. - - * For data files, the file will be saved in this folder with its original filename. - * For non-delivery reports, there will be a file created in the folder with filename - 'Non delivery report: (MESH message ID of failed delivery).txt', and with - content 'Message not delivered. All known details below' followed by the full - dictionary of headers from the download response. - - If not provided, then this function will instead yield results as documented below. - recursive : boolean, default = True - If true, then this method will be called recursively so long as there are more than 500 messages - in the inbox, the maximum number of messages the MESH API will provide IDs for at once. If false, - then only one call will be made to retrieve inbox contents, and at most 500 messages will be downloaded. - - Yields - ---------- - dict - Dictionary of details about the downloaded file. - - * filename (str): Filename of the original file (if provided). - * contents (bytes): Contents of the file (reconstructed and decompressed if necessary). - * headers (dict): Dictionary of headers returned by MESH on the initial download request. - For full details see the MESH API documentation. - * datafile (boolean): Indicates if this was a data file or a non-delivery report. - - Raises - ---------- - MESHAuthenticationError - There was an authentication error accessing the inbox. Either the SSL certificate used is invalid, - or the client provided the wrong Mailbox ID, Password, or Shared Key. - MESHUnknownError - There was an unexpected return status from the MESH API when accessing the inbox - MESHDownloadErrors - There were errors during the download process. This exception has the attribute 'exceptions', - which contains a full list of messages which generated exceptions, along with the exception. - This is only raised after completion of all non-error downloads, and downloads which raise - an exception are not acknowledged to the MESH API. - - Examples - ---------- - >>> client.check_and_download("C:/Test Folder/") #doctest: +SKIP - >>> for message in client.check_and_download(): #doctest: +SKIP - >>> print(message) #doctest: +SKIP - {'filename': 'test.txt', 'contents': b'test_message', 'headers': {...}, datafile: True} - {'filename': 'test2.txt', 'contents': b'test_message_2', 'headers': {...}, datafile: True} - {'filename': None, 'contents': b'', 'headers': {'Mex-Linkedmsgid': '1234567890', ...}, datafile: False} - """ - if save_folder is None: - return self._check_download_generator(recursive) - else: - self._check_download_save(save_folder, recursive) - - def _check_download_generator(self, recursive: bool) -> Generator[dict, None, None]: - """Internal only - generator to return for check_and_download""" - message_ids = self.check_inbox() - exceptions = [] - if recursive: - repeat_needed = self.check_inbox_count() > 500 - for message_id in message_ids: - try: - yield self.download_message(message_id, save_folder=None) - except Exception as e: - exceptions.append((message_id, e)) - else: - self.ack_download_message(message_id) - # Force termination if there are enough messages failing to download that they fill the inbox - # Reduces risk of infinite loops - if len(exceptions) >= 500: - raise MESHDownloadErrors(exceptions) - if recursive and repeat_needed: - try: - for msg in self._check_download_generator(recursive=True): - yield msg - except MESHDownloadErrors as e: - exceptions.extend(e.exceptions) - if exceptions: - raise MESHDownloadErrors(exceptions) - - def _check_download_save(self, save_folder: str, recursive: bool) -> None: - """Internal only - function to save results for check_and_download""" - message_ids = self.check_inbox() - exceptions = [] - if recursive: - repeat_needed = self.check_inbox_count() > 500 - for message_id in message_ids: - try: - self.download_message(message_id, save_folder) - except Exception as e: - exceptions.append((message_id, e)) - else: - self.ack_download_message(message_id) - # Force termination if there are enough messages failing to download that they fill the inbox - # Reduces risk of infinite loops - if len(exceptions) >= 500: - raise MESHDownloadErrors(exceptions) - if recursive and repeat_needed: - try: - self._check_download_save(save_folder, recursive=True) - except MESHDownloadErrors as e: - exceptions.extend(e.exceptions) - if exceptions: - raise MESHDownloadErrors(exceptions) - - def download_message(self, message_id: str, save_folder: str = None) -> dict: - """ - Request a message from the MESH API. - This will automatically handle reconstructing chunked messages, and automatically decompress any messages - which have Content-Encoding value of gzip. - WARNING: the full, reconstructed message will be held in memory, including after decompression. This may - cause problems, if you are using the API to download very large files on a machine with very limited memory. - - Parameters - ---------- - message_id : string - The internal MESH ID of the message to download - save_folder : string, default = None - Optional, the folder to save the downloaded message to. If not provided, then the files are not saved. - - * For data files, the file will be saved in this folder with its original filename. - * For non-delivery reports, there will be a file created in the folder with filename - 'Non delivery report: (MESH message ID of failed delivery).txt', and with - content 'Message not delivered. All known details below' followed by the full - dictionary of headers from the download response. - - Returns - ---------- - dict - Dictionary of details about the downloaded file. - - * filename (str): Filename of the original file (if provided). - * contents (bytes): Contents of the file (reconstructed and decompressed if necessary). - * headers (dict): Dictionary of headers returned by MESH on the initial download request. - For full details see the MESH API documentation. - * datafile (boolean): Indicates if this was a data file or a non-delivery report. - - Raises - ---------- - MESHAuthenticationError - There was an authentication error accessing this page. Either the SSL certificate used is invalid, - or the client provided the wrong Mailbox ID, Password, or Shared Key. - MESHMessageMissing - There is no message with the provided message ID in the mailbox - MESHMessageAlreadyDownloaded - The message with the provided message ID has already been downloaded and acknowledged - MESHUnknownError - There was an unexpected return status from the MESH API - - Examples - ---------- - >>> client.download_message("20200211115754892283_BC7B68", "C:/Test Folder/") #doctest: +SKIP - {'filename': 'test.txt', 'contents': b'test_message', 'headers': {'Mex-Filename': 'test.txt', ...}, data: True} - >>> client.download_message("20200211115754892283_BC7B69") #doctest: +SKIP - {'filename': None, 'contents': b'', 'headers': {'Mex-Linkedmsgid': '1234567890', ...}, data: False} - """ - resp = r.get( - url=f"{self.root_url}/messageexchange/{self.mailbox}/inbox/{message_id}", - headers={ - "Authorization": generate_authorization( - self.mailbox, self.password, self.api_shared_key - ), - "Accept-Encoding": "gzip", - }, - cert=(self.cert_loc, self.key_loc), - verify=self.base_ca_loc, - stream=True, - ) - if resp.status_code == 403: - raise MESHAuthenticationError(response=resp) - elif resp.status_code == 404: - raise MESHMessageMissing(response=resp) - elif resp.status_code == 410: - raise MESHMessageAlreadyDownloaded(response=resp) - elif resp.status_code == 206: - core_data = resp.raw.data - chunk_count = int(resp.headers["Mex-Chunk-Range"][2:]) - for chunk in range(2, chunk_count + 1): - core_data += self._download_message_chunk(message_id, chunk) - elif resp.status_code == 200: - core_data = resp.raw.data - else: - raise MESHUnknownError(response=resp) - - # If this header exists, the message is a non delivery report - if ("Mex-Linkedmsgid" in resp.headers) or ( - resp.headers["Mex-MessageType"] == "REPORT" - ): - logging.info( - f"Non delivery report for message {resp.headers['Mex-Linkedmsgid']}" - ) - if save_folder is not None: - with open( - path.join( - save_folder, - f"Non delivery report: {resp.headers['Mex-Linkedmsgid']}.txt", - ), - "w", - ) as file: - file.write( - "Message not delivered. All known details below\n" - + str(resp.headers) - ) - return { - "filename": resp.headers.get("Mex-Filename"), - "contents": resp.content, - "headers": resp.headers, - "datafile": False, - } - - if ("Content-Encoding" in resp.headers) and ( - resp.headers["Content-Encoding"] == "gzip" - ): - core_data = decompress(core_data) - - if save_folder is not None: - with open( - path.join(save_folder, resp.headers["Mex-Filename"]), "wb" - ) as file: - file.write(core_data) - return { - "filename": resp.headers["Mex-Filename"], - "contents": core_data, - "headers": resp.headers, - "datafile": True, - } - - def _download_message_chunk(self, message_id: str, chunk_no: int) -> bytes: - """ - Request a message chunk from the MESH API. - This is expected to only be called by the download_message method. - - Parameters - ---------- - message_id : string - The internal MESH ID of the message to download a chunk from - chunk_no : integer - The number of the chunk to download - - Returns - ---------- - bytes - The raw content of the downloaded chunk - - Raises - ---------- - MESHAuthenticationError - There was an authentication error accessing this page. Either the SSL certificate used is invalid, - or the client provided the wrong Mailbox ID, Password, or Shared Key. - MESHMessageMissing - There is no message with the provided message ID in the mailbox - MESHMessageAlreadyDownloaded - The message with the provided message ID has already been downloaded and acknowledged - MESHUnknownError - There was an unexpected return status from the MESH API - - Examples - ---------- - >>> client._download_message_chunk("20200211115754892283_BC7B68", 1) #doctest: +SKIP - b'test_message' - """ - resp = r.get( - url=f"{self.root_url}/messageexchange/{self.mailbox}/inbox/{message_id}/{chunk_no}", - headers={ - "Authorization": generate_authorization( - self.mailbox, self.password, self.api_shared_key - ), - "Accept-Encoding": "gzip", - }, - cert=(self.cert_loc, self.key_loc), - verify=self.base_ca_loc, - stream=True, - ) - if resp.status_code == 403: - raise MESHAuthenticationError(response=resp) - elif resp.status_code == 404: - raise MESHMessageMissing(response=resp) - elif resp.status_code == 410: - raise MESHMessageAlreadyDownloaded(response=resp) - elif resp.status_code in (200, 206): - return resp.raw.data - else: - raise MESHUnknownError(response=resp) - - def ack_download_message(self, message_id: str) -> None: - """ - Send acknowledgement to the MESH API that a message has finished downloading. - This should only be done after the message has successfully been saved - - once sent, the message is remvoed from the MESH server. - Per the API, this must be sent once a message has been successfully processed. - - Parameters - ---------- - message_id : string - The internal MESH ID of the downloaded message - - Raises - ---------- - MESHAuthenticationError - There was an authentication error accessing this page. Either the SSL certificate used is invalid, - or the client provided the wrong Mailbox ID, Password, or Shared Key. - MESHUnknownError - There was an unexpected return status from the MESH API - - Examples - ---------- - >>> client.ack_download_message("20200211115754892283_BC7B68") #doctest: +SKIP - """ - resp = r.put( - url=f"{self.root_url}/messageexchange/{self.mailbox}/inbox/{message_id}/status/acknowledged", - headers={ - "Authorization": generate_authorization( - self.mailbox, self.password, self.api_shared_key - ) - }, - cert=(self.cert_loc, self.key_loc), - verify=self.base_ca_loc, - ) - if resp.status_code == 403: - raise MESHAuthenticationError(response=resp) - if resp.status_code != 200: - raise MESHUnknownError(response=resp) - - -def generate_authorization(mailbox: str, password: str, api_shared_key: str) -> str: - """ - Generate an authorization string as specified by the MESH API documentation v1.14 - - Parameters - ---------- - mailbox : string - The mailbox ID to generate authorization for - password : string - The password for the mailbox - api_shared_key : string - The shared API key for the MESH environment the request is being made to - - Returns - ---------- - string - The generated authentication string - - Examples - ---------- - >>> generate_authorization("TEST_BOX", "TEST_PW", "TEST_KEY") #doctest: +SKIP - "NHSMESH TEST_BOX:ccd54b96-ee41-4d34-9700-7f9ec63d0720:1:202002120857:763 ... 872c" - >>> generate_authorization("NEW_BOX", "NEW_PW", "TEST_KEY") #doctest: +SKIP - "NHSMESH NEW_BOX:662c4ffa-c85c-4858-bae8-7327e09aeeb5:1:202002120858:7f1 ... 0d95" - """ - nonce = uuid4() - time = datetime.now().strftime("%Y%m%d%H%M") - hash_out = hmac( - api_shared_key.encode(), - msg=f"{mailbox}:{nonce}:1:{password}:{time}".encode("utf8"), - digestmod="sha256", - ).hexdigest() - return f"NHSMESH {mailbox}:{nonce}:1:{time}:{hash_out}" diff --git a/codonPython/mesh/tests/conftest.py b/codonPython/mesh/tests/conftest.py deleted file mode 100644 index 98825e9..0000000 --- a/codonPython/mesh/tests/conftest.py +++ /dev/null @@ -1,24 +0,0 @@ -import pytest - - -def mock_generate_authorization(*args): - return "xxxauthorizationxxx" - - -@pytest.fixture -def mesh_connection(monkeypatch): - import codonPython.mesh as mesh - - monkeypatch.setattr( - mesh.mesh, "generate_authorization", mock_generate_authorization - ) - - return mesh.MESHConnection( - mailbox="TestMailboxId", - password="secret_password", - api_shared_key="api_shared_key", - cert_loc="keys/mesh.cert", - key_loc="keys/mesh.key", - base_ca_loc="keys/mesh.ca-bundle", - root_url="http://root", - ) diff --git a/codonPython/mesh/tests/test_ack_download_message.py b/codonPython/mesh/tests/test_ack_download_message.py deleted file mode 100644 index cdc19d9..0000000 --- a/codonPython/mesh/tests/test_ack_download_message.py +++ /dev/null @@ -1,33 +0,0 @@ -import pytest - -import codonPython.mesh as mesh - - -def test_Ack_ValidRequest_CallsOnce(requests_mock, mesh_connection): - requests_mock.put( - url="http://root/messageexchange/TestMailboxId/inbox/1/status/acknowledged", - request_headers={"Authorization": "xxxauthorizationxxx"}, - status_code=200, - ) - mesh_connection.ack_download_message("1") - assert requests_mock.call_count == 1 - - -def test_Ack_403_RaisesAuthError(requests_mock, mesh_connection): - requests_mock.put( - url="http://root/messageexchange/TestMailboxId/inbox/1/status/acknowledged", - request_headers={"Authorization": "xxxauthorizationxxx"}, - status_code=403, - ) - with pytest.raises(mesh.MESHAuthenticationError): - mesh_connection.ack_download_message("1") - - -def test_Ack_400_RaisesUnknownError(requests_mock, mesh_connection): - requests_mock.put( - url="http://root/messageexchange/TestMailboxId/inbox/1/status/acknowledged", - request_headers={"Authorization": "xxxauthorizationxxx"}, - status_code=400, - ) - with pytest.raises(mesh.MESHUnknownError): - mesh_connection.ack_download_message("1") diff --git a/codonPython/mesh/tests/test_check_and_download.py b/codonPython/mesh/tests/test_check_and_download.py deleted file mode 100644 index ee00a79..0000000 --- a/codonPython/mesh/tests/test_check_and_download.py +++ /dev/null @@ -1,374 +0,0 @@ -from itertools import chain -from os import path - -import pytest - -import codonPython.mesh as mesh - - -def mock_download(message_id, save_folder=None): - if save_folder is not None: - with open(path.join(save_folder, str(message_id)), "w") as file: - file.write(str(message_id)) - return { - "filename": message_id, - "contents": message_id, - "headers": {}, - "datafile": True, - } - - -def mock_download_fail_auth(message_id, save_folder=None): - raise mesh.MESHAuthenticationError - - -def mock_download_fail_gone(message_id, save_folder=None): - raise mesh.MESHMessageAlreadyDownloaded - - -def mock_download_chooser_factory(auth_ids, gone_ids): - def mock(message_id, save_folder=None): - if message_id in auth_ids: - return mock_download_fail_auth(message_id, save_folder) - if message_id in gone_ids: - return mock_download_fail_gone(message_id, save_folder) - return mock_download(message_id, save_folder) - - return mock - - -def mock_inbox_factory(outputs_list): - output_iter = (out for out in outputs_list) - - def mock_output(*args, **kwargs): - try: - return next(output_iter) - except StopIteration: - return [] - - return mock_output - - -def mock_count_factory(counts_list): - output_iter = (out for out in counts_list) - - def mock_output(*args, **kwargs): - try: - return next(output_iter) - except StopIteration: - return 0 - - return mock_output - - -class Tracker: - def __init__(self): - self.count = 0 - self.data = [] - - def inc(self, *args, **kwargs): - self.count += 1 - self.data.append((args, kwargs)) - - -@pytest.fixture -def track_ack(monkeypatch, mesh_connection): - tracker = Tracker() - monkeypatch.setattr(mesh_connection, "ack_download_message", tracker.inc) - return tracker - - -@pytest.fixture -def patch_valid(monkeypatch, mesh_connection): - monkeypatch.setattr(mesh_connection, "download_message", mock_download) - monkeypatch.setattr(mesh_connection, "check_inbox_count", mock_count_factory([3])) - monkeypatch.setattr( - mesh_connection, "check_inbox", mock_inbox_factory([["1", "2", "3"]]) - ) - return mesh_connection - - -def test_CheckDownload_DownloadsCorrectSave(patch_valid, track_ack, tmpdir): - p = tmpdir.mkdir("dl") - out = patch_valid.check_and_download(save_folder=str(p), recursive=False) - assert out is None - assert p.join("1").read() == "1" - assert p.join("2").read() == "2" - assert p.join("3").read() == "3" - assert track_ack.data == [(("1",), {}), (("2",), {}), (("3",), {})] - - -def test_CheckDownload_DownloadsCorrectGenerator(patch_valid, track_ack): - out = patch_valid.check_and_download(save_folder=None, recursive=False) - msg = next(out) - assert msg == {"filename": "1", "contents": "1", "headers": {}, "datafile": True} - msg = next(out) - assert msg == {"filename": "2", "contents": "2", "headers": {}, "datafile": True} - assert track_ack.data == [(("1",), {})] - msg = next(out) - assert msg == {"filename": "3", "contents": "3", "headers": {}, "datafile": True} - assert track_ack.data == [(("1",), {}), (("2",), {})] - with pytest.raises(StopIteration): - msg = next(out) - assert track_ack.data == [(("1",), {}), (("2",), {}), (("3",), {})] - - -@pytest.fixture -def patch_recurse(monkeypatch, mesh_connection): - monkeypatch.setattr(mesh_connection, "download_message", mock_download) - monkeypatch.setattr( - mesh_connection, "check_inbox_count", mock_count_factory([501, 501, 1]) - ) - monkeypatch.setattr( - mesh_connection, - "check_inbox", - mock_inbox_factory([["1", "2", "3"], ["4"], ["5"]]), - ) - return mesh_connection - - -def test_CheckDownload_NoRecurseSave(patch_recurse, track_ack, tmpdir): - p = tmpdir.mkdir("dl") - out = patch_recurse.check_and_download(save_folder=str(p), recursive=False) - assert out is None - assert p.join("1").read() == "1" - assert p.join("2").read() == "2" - assert p.join("3").read() == "3" - assert p.join("4").exists() is False - assert p.join("5").exists() is False - assert track_ack.data == [(("1",), {}), (("2",), {}), (("3",), {})] - - -def test_CheckDownload_RecurseSave(patch_recurse, track_ack, tmpdir): - p = tmpdir.mkdir("dl") - out = patch_recurse.check_and_download(save_folder=str(p), recursive=True) - assert out is None - assert p.join("1").read() == "1" - assert p.join("2").read() == "2" - assert p.join("3").read() == "3" - assert p.join("4").read() == "4" - assert p.join("5").read() == "5" - assert track_ack.data == [ - (("1",), {}), - (("2",), {}), - (("3",), {}), - (("4",), {}), - (("5",), {}), - ] - - -def test_CheckDownload_NoRecurseGen(patch_recurse, track_ack): - out = patch_recurse.check_and_download(save_folder=None, recursive=False) - msg = next(out) - assert msg == {"filename": "1", "contents": "1", "headers": {}, "datafile": True} - msg = next(out) - assert msg == {"filename": "2", "contents": "2", "headers": {}, "datafile": True} - assert track_ack.data == [(("1",), {})] - msg = next(out) - assert msg == {"filename": "3", "contents": "3", "headers": {}, "datafile": True} - assert track_ack.data == [(("1",), {}), (("2",), {})] - with pytest.raises(StopIteration): - msg = next(out) - assert track_ack.data == [(("1",), {}), (("2",), {}), (("3",), {})] - - -def test_CheckDownload_RecurseGen(patch_recurse, track_ack): - out = patch_recurse.check_and_download(save_folder=None, recursive=True) - msg = next(out) - assert msg == {"filename": "1", "contents": "1", "headers": {}, "datafile": True} - msg = next(out) - assert msg == {"filename": "2", "contents": "2", "headers": {}, "datafile": True} - assert track_ack.data == [(("1",), {})] - msg = next(out) - assert msg == {"filename": "3", "contents": "3", "headers": {}, "datafile": True} - assert track_ack.data == [(("1",), {}), (("2",), {})] - msg = next(out) - assert msg == {"filename": "4", "contents": "4", "headers": {}, "datafile": True} - assert track_ack.data == [(("1",), {}), (("2",), {}), (("3",), {})] - msg = next(out) - assert msg == {"filename": "5", "contents": "5", "headers": {}, "datafile": True} - assert track_ack.data == [(("1",), {}), (("2",), {}), (("3",), {}), (("4",), {})] - with pytest.raises(StopIteration): - msg = next(out) - assert track_ack.data == [ - (("1",), {}), - (("2",), {}), - (("3",), {}), - (("4",), {}), - (("5",), {}), - ] - - -@pytest.fixture -def patch_errors(monkeypatch, mesh_connection): - monkeypatch.setattr( - mesh_connection, - "download_message", - mock_download_chooser_factory(["1", "2", "6"], ["3", "4", "9"]), - ) - monkeypatch.setattr( - mesh_connection, "check_inbox_count", mock_count_factory([501, 501, 1]) - ) - monkeypatch.setattr( - mesh_connection, - "check_inbox", - mock_inbox_factory([["1", "2", "3", "4", "5"], ["6", "7"], ["8", "9"]]), - ) - return mesh_connection - - -def test_CheckDownload_ErrorsNoRecurseSave(patch_errors, track_ack, tmpdir): - p = tmpdir.mkdir("dl") - with pytest.raises(mesh.MESHDownloadErrors) as exc: - patch_errors.check_and_download(save_folder=str(p), recursive=False) - assert exc.value.exceptions[0][0] == "1" - assert type(exc.value.exceptions[0][1]) == mesh.MESHAuthenticationError - assert exc.value.exceptions[1][0] == "2" - assert type(exc.value.exceptions[1][1]) == mesh.MESHAuthenticationError - assert exc.value.exceptions[2][0] == "3" - assert type(exc.value.exceptions[2][1]) == mesh.MESHMessageAlreadyDownloaded - assert exc.value.exceptions[3][0] == "4" - assert type(exc.value.exceptions[3][1]) == mesh.MESHMessageAlreadyDownloaded - assert len(exc.value.exceptions) == 4 - - assert p.join("1").exists() is False - assert p.join("2").exists() is False - assert p.join("3").exists() is False - assert p.join("4").exists() is False - assert p.join("5").read() == "5" - assert track_ack.data == [(("5",), {})] - - -def test_CheckDownload_ErrorsNoRecurseGen(patch_errors, track_ack): - out = patch_errors.check_and_download(save_folder=None, recursive=False) - msg = next(out) - assert msg == {"filename": "5", "contents": "5", "headers": {}, "datafile": True} - with pytest.raises(mesh.MESHDownloadErrors) as exc: - msg = next(out) - assert exc.value.exceptions[0][0] == "1" - assert type(exc.value.exceptions[0][1]) == mesh.MESHAuthenticationError - assert exc.value.exceptions[1][0] == "2" - assert type(exc.value.exceptions[1][1]) == mesh.MESHAuthenticationError - assert exc.value.exceptions[2][0] == "3" - assert type(exc.value.exceptions[2][1]) == mesh.MESHMessageAlreadyDownloaded - assert exc.value.exceptions[3][0] == "4" - assert type(exc.value.exceptions[3][1]) == mesh.MESHMessageAlreadyDownloaded - assert len(exc.value.exceptions) == 4 - - assert track_ack.data == [(("5",), {})] - - -def test_CheckDownload_ErrorsRecurseSave(patch_errors, track_ack, tmpdir): - p = tmpdir.mkdir("dl") - with pytest.raises(mesh.MESHDownloadErrors) as exc: - patch_errors.check_and_download(save_folder=str(p), recursive=True) - assert exc.value.exceptions[0][0] == "1" - assert type(exc.value.exceptions[0][1]) == mesh.MESHAuthenticationError - assert exc.value.exceptions[1][0] == "2" - assert type(exc.value.exceptions[1][1]) == mesh.MESHAuthenticationError - assert exc.value.exceptions[2][0] == "3" - assert type(exc.value.exceptions[2][1]) == mesh.MESHMessageAlreadyDownloaded - assert exc.value.exceptions[3][0] == "4" - assert type(exc.value.exceptions[3][1]) == mesh.MESHMessageAlreadyDownloaded - assert exc.value.exceptions[4][0] == "6" - assert type(exc.value.exceptions[4][1]) == mesh.MESHAuthenticationError - assert exc.value.exceptions[5][0] == "9" - assert type(exc.value.exceptions[5][1]) == mesh.MESHMessageAlreadyDownloaded - assert len(exc.value.exceptions) == 6 - - assert p.join("1").exists() is False - assert p.join("2").exists() is False - assert p.join("3").exists() is False - assert p.join("4").exists() is False - assert p.join("5").read() == "5" - assert p.join("6").exists() is False - assert p.join("7").read() == "7" - assert p.join("8").read() == "8" - assert p.join("9").exists() is False - assert track_ack.data == [(("5",), {}), (("7",), {}), (("8",), {})] - - -def test_CheckDownload_ErrorsRecurseGen(patch_errors, track_ack): - out = patch_errors.check_and_download(save_folder=None, recursive=True) - msg = next(out) - assert msg == {"filename": "5", "contents": "5", "headers": {}, "datafile": True} - msg = next(out) - assert msg == {"filename": "7", "contents": "7", "headers": {}, "datafile": True} - assert track_ack.data == [(("5",), {})] - msg = next(out) - assert msg == {"filename": "8", "contents": "8", "headers": {}, "datafile": True} - assert track_ack.data == [(("5",), {}), (("7",), {})] - with pytest.raises(mesh.MESHDownloadErrors) as exc: - msg = next(out) - assert exc.value.exceptions[0][0] == "1" - assert type(exc.value.exceptions[0][1]) == mesh.MESHAuthenticationError - assert exc.value.exceptions[1][0] == "2" - assert type(exc.value.exceptions[1][1]) == mesh.MESHAuthenticationError - assert exc.value.exceptions[2][0] == "3" - assert type(exc.value.exceptions[2][1]) == mesh.MESHMessageAlreadyDownloaded - assert exc.value.exceptions[3][0] == "4" - assert type(exc.value.exceptions[3][1]) == mesh.MESHMessageAlreadyDownloaded - assert exc.value.exceptions[4][0] == "6" - assert type(exc.value.exceptions[4][1]) == mesh.MESHAuthenticationError - assert exc.value.exceptions[5][0] == "9" - assert type(exc.value.exceptions[5][1]) == mesh.MESHMessageAlreadyDownloaded - assert len(exc.value.exceptions) == 6 - - assert track_ack.data == [(("5",), {}), (("7",), {}), (("8",), {})] - - -@pytest.fixture -def patch_many_errors(monkeypatch, mesh_connection): - # This is for testing the early abort of the recursion if we hit 500 failed messages in one fetch - # Failed messages will not be acknowledged, and will thus stay in the MESH system - # If the issue is inherent to the message, and the inbox is full of messages with these issues - # then we could enter an infinite loop without this abort - monkeypatch.setattr( - mesh_connection, - "download_message", - mock_download_chooser_factory( - list(chain(range(300), range(500, 800))), list(range(1000, 1500)) - ), - ) - monkeypatch.setattr( - mesh_connection, "check_inbox_count", mock_count_factory([501, 501, 501, 1]) - ) - monkeypatch.setattr( - mesh_connection, - "check_inbox", - mock_inbox_factory( - [range(500), range(500, 1000), range(1000, 1500), range(1500, 1501)] - ), - ) - return mesh_connection - - -def test_CheckDownload_ErrorsEarlyTerminateSave(patch_many_errors, tmpdir, track_ack): - p = tmpdir.mkdir("dl") - with pytest.raises(mesh.MESHDownloadErrors) as exc: - patch_many_errors.check_and_download(save_folder=p, recursive=True) - assert len(exc.value.exceptions) == 1100 - for e, index in zip( - exc.value.exceptions, chain(range(300), range(500, 800), range(1000, 1500)) - ): - assert e[0] == index - assert len(p.listdir()) == 400 - - -def test_CheckDownload_ErrorsEarlyTerminateGen(patch_many_errors, track_ack): - with pytest.raises(mesh.MESHDownloadErrors) as exc: - for msg, index in zip( - patch_many_errors.check_and_download(save_folder=None, recursive=True), - chain(range(300, 500), range(800, 1000)), - ): - assert msg == { - "filename": index, - "contents": index, - "headers": {}, - "datafile": True, - } - assert len(exc.value.exceptions) == 1100 - for e, index in zip( - exc.value.exceptions, chain(range(300), range(500, 800), range(1000, 1500)) - ): - assert e[0] == index diff --git a/codonPython/mesh/tests/test_check_authentication.py b/codonPython/mesh/tests/test_check_authentication.py deleted file mode 100644 index 60dbbf3..0000000 --- a/codonPython/mesh/tests/test_check_authentication.py +++ /dev/null @@ -1,51 +0,0 @@ -import pytest - -import codonPython.mesh as mesh - - -def test_CheckAuthentication_ValidRequest_ReturnsTrue(requests_mock, mesh_connection): - requests_mock.post( - url="http://root/messageexchange/TestMailboxId", status_code=200, - ) - test_check_authentication = mesh_connection.check_authentication() - assert requests_mock.call_count == 1 - assert test_check_authentication - - -def test_CheckAuthentication_HasRequiredHeaders(requests_mock, mesh_connection): - requests_mock.post( - url="http://root/messageexchange/TestMailboxId", - request_headers={"Authorization": "xxxauthorizationxxx"}, - status_code=200, - ) - mesh_connection.check_authentication() - assert requests_mock.call_count == 1 - assert all( - header in requests_mock.request_history[0].headers - for header in [ - "Mex-ClientVersion", - "Mex-OSArchitecture", - "Mex-OSName", - "Mex-OSVersion", - ] - ) - - -def test_CheckAuthentication_403StatusCode_ReturnsFalse(requests_mock, mesh_connection): - requests_mock.post( - url="http://root/messageexchange/TestMailboxId", status_code=403, - ) - test_check_authentication = mesh_connection.check_authentication() - assert requests_mock.call_count == 1 - assert not test_check_authentication - - -def test_CheckAuthentication_400StatusCode_ReturnsUnknownError( - requests_mock, mesh_connection -): - requests_mock.post( - url="http://root/messageexchange/TestMailboxId", status_code=400, - ) - with pytest.raises(mesh.MESHUnknownError): - mesh_connection.check_authentication() - assert requests_mock.call_count == 1 diff --git a/codonPython/mesh/tests/test_check_inbox.py b/codonPython/mesh/tests/test_check_inbox.py deleted file mode 100644 index ba1ba0c..0000000 --- a/codonPython/mesh/tests/test_check_inbox.py +++ /dev/null @@ -1,39 +0,0 @@ -import pytest - -import codonPython.mesh as mesh - - -def test_CheckInbox_ValidRequest_ReturnsJson(requests_mock, mesh_connection): - requests_mock.get( - url="http://root/messageexchange/TestMailboxId/inbox", - request_headers={"Authorization": "xxxauthorizationxxx"}, - status_code=200, - json={"messages": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}, - ) - test_check_inbox_count = mesh_connection.check_inbox() - assert requests_mock.call_count == 1 - assert test_check_inbox_count == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - - -def test_CheckInbox_403StatusCode_ReturnsAuthenticationError( - requests_mock, mesh_connection -): - requests_mock.get( - url="http://root/messageexchange/TestMailboxId/inbox", - request_headers={"Authorization": "xxxauthorizationxxx"}, - status_code=403, - ) - with pytest.raises(mesh.MESHAuthenticationError): - mesh_connection.check_inbox() - assert requests_mock.call_count == 1 - - -def test_CheckInbox_400StatusCode_ReturnsUnknownError(requests_mock, mesh_connection): - requests_mock.get( - url="http://root/messageexchange/TestMailboxId/inbox", - request_headers={"Authorization": "xxxauthorizationxxx"}, - status_code=400, - ) - with pytest.raises(mesh.MESHUnknownError): - mesh_connection.check_inbox() - assert requests_mock.call_count == 1 diff --git a/codonPython/mesh/tests/test_check_inbox_count.py b/codonPython/mesh/tests/test_check_inbox_count.py deleted file mode 100644 index bb5eaa6..0000000 --- a/codonPython/mesh/tests/test_check_inbox_count.py +++ /dev/null @@ -1,41 +0,0 @@ -import pytest - -import codonPython.mesh as mesh - - -def test_CheckInboxCount_ValidRequest_ReturnsJson(requests_mock, mesh_connection): - requests_mock.get( - url="http://root/messageexchange/TestMailboxId/count", - request_headers={"Authorization": "xxxauthorizationxxx"}, - status_code=200, - json={"count": 100}, - ) - test_check_inbox_count = mesh_connection.check_inbox_count() - assert test_check_inbox_count == 100 - assert requests_mock.call_count == 1 - - -def test_CheckInboxCount_403StatusCode_ReturnsAuthenticationError( - requests_mock, mesh_connection -): - requests_mock.get( - url="http://root/messageexchange/TestMailboxId/count", - request_headers={"Authorization": "xxxauthorizationxxx"}, - status_code=403, - ) - with pytest.raises(mesh.MESHAuthenticationError): - mesh_connection.check_inbox_count() - assert requests_mock.call_count == 1 - - -def test_CheckInboxCount_400StatusCode_RaisesUnknownError( - requests_mock, mesh_connection -): - requests_mock.get( - url="http://root/messageexchange/TestMailboxId/count", - request_headers={"Authorization": "xxxauthorizationxxx"}, - status_code=400, - ) - with pytest.raises(mesh.MESHUnknownError): - mesh_connection.check_inbox_count() - assert requests_mock.call_count == 1 diff --git a/codonPython/mesh/tests/test_check_message_status.py b/codonPython/mesh/tests/test_check_message_status.py deleted file mode 100644 index 001cfe7..0000000 --- a/codonPython/mesh/tests/test_check_message_status.py +++ /dev/null @@ -1,105 +0,0 @@ -import pytest -import codonPython.mesh as mesh - - -@pytest.fixture -def base_params(): - return { - "message_id": "1", - } - - -@pytest.fixture -def base_headers(): - return { - "Authorization": "xxxauthorizationxxx", - } - - -def test_CheckMessage_403_RaisesAuthenticationError( - mesh_connection, requests_mock, base_params, base_headers -): - requests_mock.get( - url=f"http://root/messageexchange/TestMailboxId/outbox/tracking/{base_params['message_id']}", - request_headers=base_headers, - status_code=403, - ) - with pytest.raises(mesh.MESHAuthenticationError): - mesh_connection.check_message_status(**base_params) - - -def test_CheckMessage_404_RaisesMissingError( - mesh_connection, requests_mock, base_params, base_headers -): - requests_mock.get( - url=f"http://root/messageexchange/TestMailboxId/outbox/tracking/{base_params['message_id']}", - request_headers=base_headers, - status_code=404, - ) - with pytest.raises(mesh.MESHMessageMissing): - mesh_connection.check_message_status(**base_params) - - -def test_CheckMessage_400_RaisesUnknownError( - mesh_connection, requests_mock, base_params, base_headers -): - requests_mock.get( - url=f"http://root/messageexchange/TestMailboxId/outbox/tracking/{base_params['message_id']}", - request_headers=base_headers, - status_code=400, - ) - with pytest.raises(mesh.MESHUnknownError): - mesh_connection.check_message_status(**base_params) - - -def test_CheckMessage_300_RaisesMultipleError( - mesh_connection, requests_mock, base_params, base_headers -): - requests_mock.get( - url=f"http://root/messageexchange/TestMailboxId/outbox/tracking/{base_params['message_id']}", - request_headers=base_headers, - status_code=300, - ) - with pytest.raises(mesh.MESHMultipleMatches): - mesh_connection.check_message_status(**base_params) - - -# Due to errors in the API, test for a 300 error sent with code 200 -def test_CheckMessage_Fake300_RaisesMultipleError( - mesh_connection, requests_mock, base_params, base_headers -): - requests_mock.get( - url=f"http://root/messageexchange/TestMailboxId/outbox/tracking/{base_params['message_id']}", - request_headers=base_headers, - status_code=200, - text="300: Multiple Choices300: Multiple Choices", - ) - with pytest.raises(mesh.MESHMultipleMatches): - mesh_connection.check_message_status(**base_params) - - -def test_CheckMessage_Valid_RequestsOnce( - mesh_connection, requests_mock, base_params, base_headers -): - resp = {"test": "true"} - requests_mock.get( - url=f"http://root/messageexchange/TestMailboxId/outbox/tracking/{base_params['message_id']}", - request_headers=base_headers, - status_code=200, - json=resp, - ) - mesh_connection.check_message_status(**base_params) - assert requests_mock.call_count == 1 - - -def test_CheckMessage_Valid_ReturnsJSON( - mesh_connection, requests_mock, base_params, base_headers -): - resp = {"test": "true"} - requests_mock.get( - url=f"http://root/messageexchange/TestMailboxId/outbox/tracking/{base_params['message_id']}", - request_headers=base_headers, - status_code=200, - json=resp, - ) - assert mesh_connection.check_message_status(**base_params) == resp diff --git a/codonPython/mesh/tests/test_download_message.py b/codonPython/mesh/tests/test_download_message.py deleted file mode 100644 index 2fbed55..0000000 --- a/codonPython/mesh/tests/test_download_message.py +++ /dev/null @@ -1,284 +0,0 @@ -import pytest - -import codonPython.mesh as mesh - - -@pytest.fixture -def base_params(): - return { - "message_id": "1", - } - - -def test_DownloadMessage_SimpleFileReturnsCorrect( - requests_mock, mesh_connection, base_params, tmpdir -): - requests_mock.get( - url=f"http://root/messageexchange/TestMailboxId/inbox/{base_params['message_id']}", - request_headers={ - "Authorization": "xxxauthorizationxxx", - "Accept-Encoding": "gzip", - }, - status_code=200, - headers={"Mex-FileName": "test.txt", "Mex-MessageType": "DATA"}, - text="test", - ) - assert mesh_connection.download_message(**base_params) == { - "filename": "test.txt", - "contents": b"test", - "headers": {"Mex-FileName": "test.txt", "Mex-MessageType": "DATA"}, - "datafile": True, - } - p = tmpdir.mkdir("save") - base_params["save_folder"] = str(p) - assert mesh_connection.download_message(**base_params) == { - "filename": "test.txt", - "contents": b"test", - "headers": {"Mex-FileName": "test.txt", "Mex-MessageType": "DATA"}, - "datafile": True, - } - assert p.join("test.txt").read() == "test" - - -def test_DownloadMessage_ZipFileReturnsCorrect( - requests_mock, mesh_connection, base_params, tmpdir -): - import gzip - - message = gzip.compress(b"test") - requests_mock.get( - url=f"http://root/messageexchange/TestMailboxId/inbox/{base_params['message_id']}", - request_headers={ - "Authorization": "xxxauthorizationxxx", - "Accept-Encoding": "gzip", - }, - status_code=200, - headers={ - "Mex-FileName": "test.txt", - "Content-Encoding": "gzip", - "Mex-MessageType": "DATA", - }, - content=message, - ) - assert mesh_connection.download_message(**base_params) == { - "filename": "test.txt", - "contents": b"test", - "headers": { - "Mex-FileName": "test.txt", - "Content-Encoding": "gzip", - "Mex-MessageType": "DATA", - }, - "datafile": True, - } - p = tmpdir.mkdir("save") - base_params["save_folder"] = str(p) - assert mesh_connection.download_message(**base_params) == { - "filename": "test.txt", - "contents": b"test", - "headers": { - "Mex-FileName": "test.txt", - "Content-Encoding": "gzip", - "Mex-MessageType": "DATA", - }, - "datafile": True, - } - assert p.join("test.txt").read() == "test" - - -def test_DownloadMessage_NonDeliveryReturnsCorrect( - requests_mock, mesh_connection, base_params, tmpdir -): - headers = { - "Mex-Linkedmsgid": "1", - "Mex-MessageType": "REPORT", - } - requests_mock.get( - url=f"http://root/messageexchange/TestMailboxId/inbox/{base_params['message_id']}", - request_headers={ - "Authorization": "xxxauthorizationxxx", - "Accept-Encoding": "gzip", - }, - status_code=200, - headers=headers, - ) - assert mesh_connection.download_message(**base_params) == { - "filename": None, - "contents": b'', - "headers": headers, - "datafile": False, - } - p = tmpdir.mkdir("save") - base_params["save_folder"] = str(p) - assert mesh_connection.download_message(**base_params) == { - "filename": None, - "contents": b'', - "headers": headers, - "datafile": False, - } - assert p.join( - "Non delivery report: 1.txt" - ).read() == "Message not delivered. All known details below\n" + str(headers) - - -def test_DownloadMessage_ChunkedFileReturnsCorrect( - requests_mock, mesh_connection, base_params, tmpdir -): - requests_mock.get( - url=f"http://root/messageexchange/TestMailboxId/inbox/{base_params['message_id']}", - request_headers={ - "Authorization": "xxxauthorizationxxx", - "Accept-Encoding": "gzip", - }, - status_code=206, - headers={ - "Mex-FileName": "test.txt", - "Mex-MessageType": "DATA", - "Mex-Chunk-Range": "1:3", - }, - text="test-", - ) - requests_mock.get( - url=f"http://root/messageexchange/TestMailboxId/inbox/{base_params['message_id']}/2", - status_code=206, - headers={"Mex-Chunk-Range": "2:3"}, - text="test2-", - ) - requests_mock.get( - url=f"http://root/messageexchange/TestMailboxId/inbox/{base_params['message_id']}/3", - status_code=200, - headers={"Mex-Chunk-Range": "3:3"}, - text="test3", - ) - assert mesh_connection.download_message(**base_params) == { - "filename": "test.txt", - "contents": b"test-test2-test3", - "headers": { - "Mex-FileName": "test.txt", - "Mex-MessageType": "DATA", - "Mex-Chunk-Range": "1:3", - }, - "datafile": True, - } - p = tmpdir.mkdir("save") - base_params["save_folder"] = str(p) - assert mesh_connection.download_message(**base_params) == { - "filename": "test.txt", - "contents": b"test-test2-test3", - "headers": { - "Mex-FileName": "test.txt", - "Mex-MessageType": "DATA", - "Mex-Chunk-Range": "1:3", - }, - "datafile": True, - } - assert p.join("test.txt").read() == "test-test2-test3" - - -def test_DownloadMessage_ChunkedZipFileReturnsCorrect( - requests_mock, mesh_connection, base_params, tmpdir -): - import gzip - from math import floor - - message = gzip.compress(b"test-test2-test3") - split = floor(len(message) / 3) - requests_mock.get( - url=f"http://root/messageexchange/TestMailboxId/inbox/{base_params['message_id']}", - status_code=206, - headers={ - "Mex-FileName": "test.txt", - "Mex-MessageType": "DATA", - "Mex-Chunk-Range": "1:3", - "Content-Encoding": "gzip", - }, - content=message[:split], - ) - requests_mock.get( - url=f"http://root/messageexchange/TestMailboxId/inbox/{base_params['message_id']}/2", - status_code=206, - headers={"Mex-Chunk-Range": "2:3"}, - content=message[split:split * 2], - ) - requests_mock.get( - url=f"http://root/messageexchange/TestMailboxId/inbox/{base_params['message_id']}/3", - status_code=200, - headers={"Mex-Chunk-Range": "3:3"}, - content=message[split * 2:], - ) - assert mesh_connection.download_message(**base_params) == { - "filename": "test.txt", - "contents": b"test-test2-test3", - "headers": { - "Mex-FileName": "test.txt", - "Mex-MessageType": "DATA", - "Mex-Chunk-Range": "1:3", - "Content-Encoding": "gzip", - }, - "datafile": True, - } - p = tmpdir.mkdir("save") - base_params["save_folder"] = str(p) - assert mesh_connection.download_message(**base_params) == { - "filename": "test.txt", - "contents": b"test-test2-test3", - "headers": { - "Mex-FileName": "test.txt", - "Mex-MessageType": "DATA", - "Mex-Chunk-Range": "1:3", - "Content-Encoding": "gzip", - }, - "datafile": True, - } - assert p.join("test.txt").read() == "test-test2-test3" - - -def test_DownloadMessage_403StatusCode_ReturnsAuthenticationError( - requests_mock, mesh_connection -): - requests_mock.get( - url="http://root/messageexchange/TestMailboxId/inbox/8", - request_headers={"Authorization": "xxxauthorizationxxx"}, - status_code=403, - ) - with pytest.raises(mesh.MESHAuthenticationError): - mesh_connection.download_message(message_id=8, save_folder="save_folder") - assert requests_mock.call_count == 1 - - -def test_DownloadMessage_404StatusCode_ReturnsMessageDoesNotExistError( - requests_mock, mesh_connection -): - requests_mock.get( - url="http://root/messageexchange/TestMailboxId/inbox/9", - request_headers={"Authorization": "xxxauthorizationxxx"}, - status_code=404, - ) - with pytest.raises(mesh.MESHMessageMissing): - mesh_connection.download_message(message_id=9, save_folder="save_folder") - assert requests_mock.call_count == 1 - - -def test_DownloadMessage_410StatusCode_ReturnsMessageAlreadyDownloadedError( - requests_mock, mesh_connection -): - requests_mock.get( - url="http://root/messageexchange/TestMailboxId/inbox/10", - request_headers={"Authorization": "xxxauthorizationxxx"}, - status_code=410, - ) - with pytest.raises(mesh.MESHMessageAlreadyDownloaded): - mesh_connection.download_message(message_id=10, save_folder="save_folder") - assert requests_mock.call_count == 1 - - -def test_DownloadMessage_400StatusCode_RaisesUnknownError( - requests_mock, mesh_connection -): - requests_mock.get( - url="http://root/messageexchange/TestMailboxId/inbox/10", - request_headers={"Authorization": "xxxauthorizationxxx"}, - status_code=400, - ) - with pytest.raises(mesh.MESHUnknownError): - mesh_connection.download_message(message_id=10, save_folder="save_folder") - assert requests_mock.call_count == 1 diff --git a/codonPython/mesh/tests/test_download_message_chunk.py b/codonPython/mesh/tests/test_download_message_chunk.py deleted file mode 100644 index 81d9739..0000000 --- a/codonPython/mesh/tests/test_download_message_chunk.py +++ /dev/null @@ -1,101 +0,0 @@ -import pytest -import codonPython.mesh as mesh - - -@pytest.fixture -def base_params(): - return { - "message_id": "1", - "chunk_no": 2, - } - - -@pytest.fixture -def base_headers(): - return {"Authorization": "xxxauthorizationxxx", "Accept-Encoding": "gzip"} - - -def test_DownloadMessageChunk_403_RaisesAuthenticationError( - mesh_connection, requests_mock, base_params, base_headers -): - requests_mock.get( - url=f"http://root/messageexchange/TestMailboxId/inbox/{base_params['message_id']}/{base_params['chunk_no']}", - request_headers=base_headers, - status_code=403, - ) - with pytest.raises(mesh.MESHAuthenticationError): - mesh_connection._download_message_chunk(**base_params) - - -def test_DownloadMessageChunk_404_RaisesMissingError( - mesh_connection, requests_mock, base_params, base_headers -): - requests_mock.get( - url=f"http://root/messageexchange/TestMailboxId/inbox/{base_params['message_id']}/{base_params['chunk_no']}", - request_headers=base_headers, - status_code=404, - ) - with pytest.raises(mesh.MESHMessageMissing): - mesh_connection._download_message_chunk(**base_params) - - -def test_DownloadMessageChunk_410_RaisesGoneError( - mesh_connection, requests_mock, base_params, base_headers -): - requests_mock.get( - url=f"http://root/messageexchange/TestMailboxId/inbox/{base_params['message_id']}/{base_params['chunk_no']}", - request_headers=base_headers, - status_code=410, - ) - with pytest.raises(mesh.MESHMessageAlreadyDownloaded): - mesh_connection._download_message_chunk(**base_params) - - -def test_DownloadMessageChunk_400_RaisesUnknownError( - mesh_connection, requests_mock, base_params, base_headers -): - requests_mock.get( - url=f"http://root/messageexchange/TestMailboxId/inbox/{base_params['message_id']}/{base_params['chunk_no']}", - request_headers=base_headers, - status_code=400, - ) - with pytest.raises(mesh.MESHUnknownError): - mesh_connection._download_message_chunk(**base_params) - - -def test_DownloadMessageChunk_Valid_SentOnce( - mesh_connection, requests_mock, base_params, base_headers -): - requests_mock.get( - url=f"http://root/messageexchange/TestMailboxId/inbox/{base_params['message_id']}/{base_params['chunk_no']}", - request_headers=base_headers, - status_code=200, - text="test", - ) - mesh_connection._download_message_chunk(**base_params) - assert requests_mock.call_count == 1 - - -def test_DownloadMessageChunk_206_NoRaise( - mesh_connection, requests_mock, base_params, base_headers -): - requests_mock.get( - url=f"http://root/messageexchange/TestMailboxId/inbox/{base_params['message_id']}/{base_params['chunk_no']}", - request_headers=base_headers, - status_code=206, - text="test", - ) - mesh_connection._download_message_chunk(**base_params) - assert requests_mock.call_count == 1 - - -def test_DownloadMessageChunk_ReturnsCorrect( - mesh_connection, requests_mock, base_params, base_headers -): - requests_mock.get( - url=f"http://root/messageexchange/TestMailboxId/inbox/{base_params['message_id']}/{base_params['chunk_no']}", - request_headers=base_headers, - status_code=200, - text="test", - ) - assert mesh_connection._download_message_chunk(**base_params) == b"test" diff --git a/codonPython/mesh/tests/test_generate_authorization.py b/codonPython/mesh/tests/test_generate_authorization.py deleted file mode 100644 index a31d337..0000000 --- a/codonPython/mesh/tests/test_generate_authorization.py +++ /dev/null @@ -1,48 +0,0 @@ -import pytest -import re - -import codonPython.mesh as mesh - -mailbox = "(Test_Mailbox|)" -nonce = "[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12}" -time = "[0-9]{12}" -hash_out = "[0-9a-z]{64}" -auth_regex = re.compile(f"NHSMESH {mailbox}:{nonce}:1:{time}:{hash_out}") - - -class Test_generate_authorization: - def test_generate_authorization(self): - mailbox = "Test_Mailbox" - password = "Secret_Password" - api_shared_key = "api_shared_key" - test_generate_authorization = mesh.generate_authorization( - mailbox, password, api_shared_key - ) - assert re.match(auth_regex, test_generate_authorization,) - - def test_generate_authorization_with_blank_mailbox(self): - mailbox = "" - password = "Secret_Password" - api_shared_key = "api_shared_key" - test_generate_authorization = mesh.generate_authorization( - mailbox, password, api_shared_key - ) - assert re.match(auth_regex, test_generate_authorization,) - - def test_generate_authorization_with_blank_password(self): - mailbox = "Test_Mailbox" - password = "" - api_shared_key = "api_shared_key" - test_generate_authorization = mesh.generate_authorization( - mailbox, password, api_shared_key - ) - assert re.match(auth_regex, test_generate_authorization,) - - def test_generate_authorization_with_blank_api_key(self): - mailbox = "Test_Mailbox" - password = "Secret_Password" - api_shared_key = "" - test_generate_authorization = mesh.generate_authorization( - mailbox, password, api_shared_key - ) - assert re.match(auth_regex, test_generate_authorization,) diff --git a/codonPython/mesh/tests/test_send_file.py b/codonPython/mesh/tests/test_send_file.py deleted file mode 100644 index 04e8b15..0000000 --- a/codonPython/mesh/tests/test_send_file.py +++ /dev/null @@ -1,43 +0,0 @@ -import pytest - - -@pytest.fixture -def make_params(tmpdir): - p = tmpdir.mkdir("folder").join("test.txt") - p.write("test") - params = { - "dest_mailbox": "TESTMB", - "message_location": str(p), - "workflow_id": "TESTWF", - "message_subject": "TESTSUB", - "message_id": "TESTID", - "process_id": "TESTPROC", - "compress_message": True, - "encrypted": True, - } - return params - - -def track_args(**kwargs): - return kwargs - - -@pytest.fixture -def patch_message(mesh_connection, monkeypatch): - monkeypatch.setattr(mesh_connection, "send_message", track_args) - return mesh_connection - - -def test_SendFile_HandlesParams(patch_message, make_params): - params = patch_message.send_file(**make_params) - assert params == { - "dest_mailbox": "TESTMB", - "message": b"test", - "filename": "test.txt", - "workflow_id": "TESTWF", - "message_subject": "TESTSUB", - "message_id": "TESTID", - "process_id": "TESTPROC", - "compress_message": True, - "encrypted": True, - } diff --git a/codonPython/mesh/tests/test_send_message.py b/codonPython/mesh/tests/test_send_message.py deleted file mode 100644 index c1c5528..0000000 --- a/codonPython/mesh/tests/test_send_message.py +++ /dev/null @@ -1,285 +0,0 @@ -import pytest -import codonPython.mesh as mesh - - -@pytest.fixture -def base_params(): - return { - "dest_mailbox": "TESTMB", - "message": b"TEST", - "filename": "TEST.txt", - "workflow_id": "TESTWF", - } - - -@pytest.fixture -def base_headers(): - return { - "Authorization": "xxxauthorizationxxx", - "Content-Type": "application/octet-stream", - "Mex-From": "TestMailboxId", - "Mex-To": "TESTMB", - "Mex-WorkflowId": "TESTWF", - "Mex-FileName": "TEST.txt", - "Mex-MessageType": "DATA", - "Mex-Version": "1.0", - } - - -def test_SendMessage_403_RaisesAuthenticationError( - mesh_connection, requests_mock, base_params, base_headers -): - requests_mock.post( - url="http://root/messageexchange/TestMailboxId/outbox", - request_headers=base_headers, - status_code=403, - ) - with pytest.raises(mesh.MESHAuthenticationError): - mesh_connection.send_message(**base_params) - assert requests_mock.call_count == 1 - - -def test_SendMessage_417_RaisesRecipientError( - mesh_connection, requests_mock, base_params, base_headers -): - requests_mock.post( - url="http://root/messageexchange/TestMailboxId/outbox", - request_headers=base_headers, - status_code=417, - ) - with pytest.raises(mesh.MESHInvalidRecipient): - mesh_connection.send_message(**base_params) - assert requests_mock.call_count == 1 - - -def test_SendMessage_400_RaisesUnknownError( - mesh_connection, requests_mock, base_params, base_headers -): - requests_mock.post( - url="http://root/messageexchange/TestMailboxId/outbox", - request_headers=base_headers, - status_code=400, - ) - with pytest.raises(mesh.MESHUnknownError): - mesh_connection.send_message(**base_params) - assert requests_mock.call_count == 1 - - -def test_SendMessage_ValidHash( - mesh_connection, requests_mock, base_params, base_headers -): - import hashlib - - requests_mock.post( - url="http://root/messageexchange/TestMailboxId/outbox", - request_headers=base_headers, - status_code=202, - json={}, - ) - mesh_connection.send_message(**base_params) - checksum = hashlib.md5(base_params["message"]).hexdigest() - assert requests_mock.call_count == 1 - assert requests_mock.request_history[0].headers["Mex-Checksum"] == f"md5 {checksum}" - - -def test_SendMessage_AbsentOptional_Skipped( - mesh_connection, requests_mock, base_params, base_headers -): - requests_mock.post( - url="http://root/messageexchange/TestMailboxId/outbox", - request_headers=base_headers, - status_code=202, - json={}, - ) - mesh_connection.send_message(**base_params) - assert requests_mock.call_count == 1 - assert not any( - header in requests_mock.request_history[0].headers - for header in [ - "Mex-ProcessID", - "Mex-LocalID", - "Mex-Subject", - "Mex-Content-Encrypted", - ] - ) - - -def test_SendMessage_PresentSubject_Included( - mesh_connection, requests_mock, base_params, base_headers -): - base_params["message_subject"] = "TESTSUB" - requests_mock.post( - url="http://root/messageexchange/TestMailboxId/outbox", - request_headers=base_headers, - status_code=202, - json={}, - ) - mesh_connection.send_message(**base_params) - assert requests_mock.call_count == 1 - assert requests_mock.request_history[0].headers["Mex-Subject"] == "TESTSUB" - - -def test_SendMessage_PresentMessageID_Included( - mesh_connection, requests_mock, base_params, base_headers -): - base_params["message_id"] = "TESTMSG" - requests_mock.post( - url="http://root/messageexchange/TestMailboxId/outbox", - request_headers=base_headers, - status_code=202, - json={}, - ) - mesh_connection.send_message(**base_params) - assert requests_mock.call_count == 1 - assert requests_mock.request_history[0].headers["Mex-LocalID"] == "TESTMSG" - - -def test_SendMessage_PresentProcess_Included( - mesh_connection, requests_mock, base_params, base_headers -): - base_params["process_id"] = "TESTPROC" - requests_mock.post( - url="http://root/messageexchange/TestMailboxId/outbox", - request_headers=base_headers, - status_code=202, - json={}, - ) - mesh_connection.send_message(**base_params) - assert requests_mock.call_count == 1 - assert requests_mock.request_history[0].headers["Mex-ProcessID"] == "TESTPROC" - - -def test_SendMessage_Encrypted_Included( - mesh_connection, requests_mock, base_params, base_headers -): - base_params["encrypted"] = True - requests_mock.post( - url="http://root/messageexchange/TestMailboxId/outbox", - request_headers=base_headers, - status_code=202, - json={}, - ) - mesh_connection.send_message(**base_params) - assert requests_mock.call_count == 1 - assert "Mex-Content-Encrypted" in requests_mock.request_history[0].headers - - -def test_compress_if_set(mesh_connection, requests_mock, base_params, base_headers): - import gzip - - requests_mock.post( - url="http://root/messageexchange/TestMailboxId/outbox", - request_headers=base_headers, - status_code=202, - json={}, - ) - expected_message = gzip.compress(base_params["message"]) - mesh_connection.send_message(**base_params) - assert requests_mock.call_count == 1 - assert "Mex-Content-Compressed" in requests_mock.request_history[0].headers - assert requests_mock.request_history[0].headers["Content-Encoding"] == "gzip" - assert requests_mock.request_history[0].body == expected_message - - -def test_no_compress_if_not_set( - mesh_connection, requests_mock, base_params, base_headers -): - base_params["compress_message"] = False - requests_mock.post( - url="http://root/messageexchange/TestMailboxId/outbox", - request_headers=base_headers, - status_code=202, - json={}, - ) - mesh_connection.send_message(**base_params) - assert requests_mock.call_count == 1 - assert "Mex-Content-Compressed" not in requests_mock.request_history[0].headers - assert "Content-Encoding" not in requests_mock.request_history[0].headers - assert requests_mock.request_history[0].body == base_params["message"] - - -class Tracker: - def __init__(self): - self.count = 0 - self.data = [] - - def inc(self, **kwargs): - self.count += 1 - self.data.append(kwargs) - - -def test_chunk_massive_file( - mesh_connection, requests_mock, base_params, base_headers, monkeypatch -): - chunks_sent = Tracker() - monkeypatch.setattr(mesh_connection, "_send_message_chunk", chunks_sent.inc) - base_params["compress_message"] = False - base_params["message"] = ("x" * 200000000).encode() - requests_mock.post( - url="http://root/messageexchange/TestMailboxId/outbox", - request_headers=base_headers, - status_code=202, - json={"messageID": "1"}, - ) - mesh_connection.send_message(**base_params) - assert requests_mock.call_count == 1 - assert requests_mock.request_history[0].headers["Mex-Chunk-Range"] == "1:3" - assert requests_mock.request_history[0].body == base_params["message"][0:80000000] - assert chunks_sent.count == 2 - assert ( - chunks_sent.data[0]["message_chunk"] - == base_params["message"][80000000:160000000] - ) - assert ( - chunks_sent.data[1]["message_chunk"] - == base_params["message"][160000000:240000000] - ) - assert chunks_sent.data[0]["message_id"] == "1" - assert chunks_sent.data[1]["message_id"] == "1" - assert chunks_sent.data[0]["chunk_no"] == 2 - assert chunks_sent.data[1]["chunk_no"] == 3 - - -def test_SendMessage_403_RaisesAuthenticationError_MassiveFile( - mesh_connection, requests_mock, base_params, base_headers -): - requests_mock.post( - url="http://root/messageexchange/TestMailboxId/outbox", - request_headers=base_headers, - status_code=403, - ) - base_params["message"] = ("x" * 200000000).encode() - base_params["compress_message"] = False - with pytest.raises(mesh.MESHAuthenticationError): - mesh_connection.send_message(**base_params) - assert requests_mock.call_count == 1 - - -def test_SendMessage_417_RaisesRecipientError_MassiveFile( - mesh_connection, requests_mock, base_params, base_headers -): - requests_mock.post( - url="http://root/messageexchange/TestMailboxId/outbox", - request_headers=base_headers, - status_code=417, - ) - base_params["message"] = ("x" * 200000000).encode() - base_params["compress_message"] = False - with pytest.raises(mesh.MESHInvalidRecipient): - mesh_connection.send_message(**base_params) - assert requests_mock.call_count == 1 - - -def test_SendMessage_400_RaisesUnknownError_MassiveFile( - mesh_connection, requests_mock, base_params, base_headers -): - requests_mock.post( - url="http://root/messageexchange/TestMailboxId/outbox", - request_headers=base_headers, - status_code=400, - ) - base_params["message"] = ("x" * 200000000).encode() - base_params["compress_message"] = False - with pytest.raises(mesh.MESHUnknownError): - mesh_connection.send_message(**base_params) - assert requests_mock.call_count == 1 diff --git a/codonPython/mesh/tests/test_send_message_chunk.py b/codonPython/mesh/tests/test_send_message_chunk.py deleted file mode 100644 index b8a09ad..0000000 --- a/codonPython/mesh/tests/test_send_message_chunk.py +++ /dev/null @@ -1,83 +0,0 @@ -import pytest -import codonPython.mesh as mesh - - -@pytest.fixture -def base_params(): - return { - "message_id": "1", - "message_chunk": b"TEST", - "chunk_no": 2, - "chunk_range": 3, - } - - -@pytest.fixture -def base_headers(): - return { - "Authorization": "xxxauthorizationxxx", - "Content-Type": "application/octet-stream", - "Mex-From": "TestMailboxId", - "Mex-Chunk-Range": "2:3", - } - - -def test_SendMessageChunk_403_RaisesAuthenticationError( - mesh_connection, requests_mock, base_params, base_headers -): - requests_mock.post( - url=f"http://root/messageexchange/TestMailboxId/outbox/{base_params['message_id']}/{base_params['chunk_no']}", - request_headers=base_headers, - status_code=403, - ) - with pytest.raises(mesh.MESHAuthenticationError): - mesh_connection._send_message_chunk(**base_params) - - -def test_SendMessageChunk_400_RaisesUnknownError( - mesh_connection, requests_mock, base_params, base_headers -): - requests_mock.post( - url=f"http://root/messageexchange/TestMailboxId/outbox/{base_params['message_id']}/{base_params['chunk_no']}", - request_headers=base_headers, - status_code=400, - ) - with pytest.raises(mesh.MESHUnknownError): - mesh_connection._send_message_chunk(**base_params) - - -def test_SendMessageChunk_Valid_SentOnce( - mesh_connection, requests_mock, base_params, base_headers -): - requests_mock.post( - url=f"http://root/messageexchange/TestMailboxId/outbox/{base_params['message_id']}/{base_params['chunk_no']}", - request_headers=base_headers, - status_code=202, - ) - mesh_connection._send_message_chunk(**base_params) - assert requests_mock.call_count == 1 - - -def test_SendMessageChunk_Compressed_CorrectHeaders( - mesh_connection, requests_mock, base_params, base_headers -): - requests_mock.post( - url=f"http://root/messageexchange/TestMailboxId/outbox/{base_params['message_id']}/{base_params['chunk_no']}", - request_headers=base_headers, - status_code=202, - ) - mesh_connection._send_message_chunk(**base_params) - assert requests_mock.request_history[0].headers["Content-Encoding"] == "gzip" - - -def test_SendMessageChunk_NotCompressed_CorrectHeaders( - mesh_connection, requests_mock, base_params, base_headers -): - base_params["compressed"] = False - requests_mock.post( - url=f"http://root/messageexchange/TestMailboxId/outbox/{base_params['message_id']}/{base_params['chunk_no']}", - request_headers=base_headers, - status_code=202, - ) - mesh_connection._send_message_chunk(**base_params) - assert "Content-Encoding" not in requests_mock.request_history[0].headers diff --git a/codonPython/nhsd_colours.py b/codonPython/nhsd_colours.py deleted file mode 100644 index 8258e79..0000000 --- a/codonPython/nhsd_colours.py +++ /dev/null @@ -1,111 +0,0 @@ -import seaborn as sns -import random - - -def nhsd_colours(): - """Returns a dictionary full of the different official NHSD colours from the - style guide: - https://digital.nhs.uk/about-nhs-digital/corporate-information-and-documents/nhs-digital-style-guidelines/how-we-look/colour-palette - - Parameters - ---------- - None - - Returns - -------- - colour_dict : dict (Python dictionary) - A dictionary containing sets of official NHS Digital branding colours - (Hexidecimal format) and fonts. - """ - - nhsd_chart_colours = ["#005EB8", "#71CCEF", "#84919C", "#003087", "#D0D5D6"] - nhsd_chart_background = {"chart_grey_3": "#F8F8F8", "white": "#FFFFFF"} - nhsd_core_colours = { - "white": "#ffffff", - "white_tints": ["#f9fafb", "#f3f5f6", "#edeff1", "#def2e5"], - "nhs_blue": "#005eb8", - "blue_tints": ["#337EC6", "#ACCAE8", "#D4E4F3", "#E6EFF8"], - "nhs_dark_grey": "#425563", - "grey_tints": [ - "#687784", - "#98A4AD", - "#B3BBC1", - "#DFE2E5", - "#EDEFF1", - "#F3F5F6", - "#F9FAFB", - ], - "nhs_mild_grey": "#768692", - "nhs_warm_yellow": "#FFB81C", - "warm_yellow_tints": ["#FFE8B4", "#FFF1CC", "#FFF8E8"], - } - nhsd_font = ["Frutiger Light", "Frutiger Roman"] - nhsd_font_backup = ["Arial"] - colour_dict = { - "chart": nhsd_chart_colours, - "chart_background": nhsd_chart_background, - "core": nhsd_core_colours, - "font": nhsd_font, - "font_backup": nhsd_font_backup, - } - return colour_dict - - -def nhsd_seaborn_style(): - """Sets the seaborn style to be inline with NHSD guidlines. This means your - graphs in Seaborn, or in Matplotlib will come out looking as per the NHSD - style guide. Simply run this function. - - Parameters - ---------- - None - - Returns - ---------- - None""" - nhs_colours = nhsd_colours() - chart_background = nhs_colours["chart_background"] - font_backup = nhs_colours["font_backup"] - chart_colours = nhs_colours["chart"] - - additional_colours = ( - nhsd_colours()["core"]["blue_tints"] - + nhsd_colours()["core"]["grey_tints"] - + nhsd_colours()["core"]["nhs_warm_yellow"] - + nhsd_colours()["core"]["warm_yellow_tints"] - ) - random.shuffle(additional_colours) - nhs_colours = chart_colours + additional_colours - - sns.set_palette(nhs_colours) - - seaborn_style_dict = { - "axes.axisbelow": True, - "axes.edgecolor": ".8", - "axes.facecolor": chart_background["chart_grey_3"], - "axes.grid": True, - "axes.labelcolor": ".15", - "axes.spines.bottom": False, # no spines - "axes.spines.left": False, # no spines - "axes.spines.right": False, # no spines - "axes.spines.top": False, # no spines - "figure.facecolor": chart_background["chart_grey_3"], - "font.family": ["sans-serif"], - "font.sans-serif": font_backup, - "grid.color": ".8", - "grid.linestyle": "-", - "image.cmap": "rocket", - "lines.solid_capstyle": "round", - "patch.edgecolor": "w", - "patch.force_edgecolor": True, - "text.color": ".15", - "xtick.bottom": False, - "xtick.color": ".15", - "xtick.direction": "out", - "xtick.top": False, - "ytick.color": ".15", - "ytick.direction": "out", - "ytick.left": False, - "ytick.right": False, - } - sns.set_style("whitegrid", seaborn_style_dict) diff --git a/codonPython/tableFromSql.py b/codonPython/tableFromSql.py deleted file mode 100644 index 01aa3d4..0000000 --- a/codonPython/tableFromSql.py +++ /dev/null @@ -1,88 +0,0 @@ -import sqlalchemy -from sqlalchemy import create_engine -import pandas as pd - - -def tableFromSql( - server: str, - database: str, - table_name: str, - user: str = "", - password: str = "", - schema: str = None, - index_col: str = None, - coerce_float: bool = True, - parse_dates: list = None, - columns: list = None, - chunksize: int = None, -): - """ - Returns a SQL table in a DataFrame. - - Convert a table stored in SQL Server 2016 into a pandas dataframe. - Uses sqlalchemy and pandas. - - Parameters - ---------- - server : string - Name of the SQL server - database : string - Name of the SQL database - user : string, default: "" - If verification is required, name of the user - password : string, default: "" - If verification is required, password of the user - table_name : string - Name of SQL table in database. - schema : string, default : None - Name of SQL schema in database to query (if database flavor supports this). Uses - default schema if None (default). - index_col : string or list of strings, default : None - Column(s) to set as index(MultiIndex). - coerce_float : boolean, default : True - Attempts to convert values of non-string, non-numeric objects (like decimal.Decimal) - to floating point. Can result in loss of Precision. - parse_dates : list or dict, default : None - - List of column names to parse as dates. - - Dict of {column_name: format string} where format string is strftime compatible in - case of parsing string times or is one of (D, s, ns, ms, us) in case of parsing - integer timestamps. - - Dict of {column_name: arg dict}, where the arg dict corresponds to the keyword - arguments of pandas.to_datetime() Especially useful with databases without native - Datetime support, such as SQLite. - columns : list, default : None - List of column names to select from SQL table - chunksize : int, default : None - If specified, returns an iterator where chunksize is the number of rows to include - in each chunk. - - Returns - ---------- - pd.DataFrame - Dataframe of the table requested from sql server - - Examples - --------- - # >>> tableFromSql("myServer2", "myDatabase2", "myTable2") - # pd.DataFrame - # >>> tableFromSql("myServer", "myDatabase", "myTable", schema="specialSchema", columns=["col_1", "col_3"]) - # pd.DataFrame - """ - - try: - uri = "mssql+pyodbc://{}:{}@{}/{}?driver=SQL Server Native Client 11.0".format( - user, password, server, database - ) - engine = create_engine(uri) - return pd.read_sql_table( - table_name, - engine, - schema=schema, - index_col=index_col, - coerce_float=coerce_float, - parse_dates=parse_dates, - columns=columns, - chunksize=chunksize, - ) - except Exception as error: - raise error diff --git a/codonPython/tests/ODS_test.py b/codonPython/tests/ODS_test.py deleted file mode 100644 index ba05796..0000000 --- a/codonPython/tests/ODS_test.py +++ /dev/null @@ -1,27 +0,0 @@ -import pytest -import numpy as np -from codonPython import ODS_lookup - - -def test_successful_query(): - NHSD_code = "X26" - result = ODS_lookup.query_api(NHSD_code) - assert result["Organisation"]["Name"] == "NHS DIGITAL" - - -def test_unsuccessful_query(): - invalid_code = "ASDF" - with pytest.raises(ValueError): - ODS_lookup.query_api(invalid_code) - - -def test_wrong_type(): - invalid_code = 0 - with pytest.raises(ValueError): - ODS_lookup.query_api(invalid_code) - - -def test_unsuccessful_address_query(): - invalid_code = ["ASDF", np.nan, None] - result = ODS_lookup.get_addresses(invalid_code) - assert result.empty diff --git a/codonPython/tests/SQL_connections_test.py b/codonPython/tests/SQL_connections_test.py deleted file mode 100644 index 9ad8fe1..0000000 --- a/codonPython/tests/SQL_connections_test.py +++ /dev/null @@ -1,15 +0,0 @@ -'''test script for SQL_connections -- test the connections can run a dummy script (SELECT 1 as [Code], 'test' as [Name])''' -import pandas as pd -import pytest -import codonPython.SQL_connections as conn - - -@pytest.mark.parametrize("connection", - [conn.conn_dummy(), - conn.conn_dummy('test.db') - ]) -def test_select1(connection): - result = pd.read_sql("""SELECT 1 as [Code], 'Test' as [Name]""", connection).iloc[0, 0] - expected = pd.DataFrame([{'Code': 1, 'Name': 'Test'}]).iloc[0, 0] - assert result == expected diff --git a/codonPython/tests/age_bands_test.py b/codonPython/tests/age_bands_test.py deleted file mode 100644 index f0200c5..0000000 --- a/codonPython/tests/age_bands_test.py +++ /dev/null @@ -1,116 +0,0 @@ -from codonPython import age_bands -import numpy as np -import math -import pytest - - -@pytest.mark.parametrize( - "age, expected", - [ - (0, "0-4"), - (1, "0-4"), - (12, "10-14"), - (23, "20-24"), - (34, "30-34"), - (35, "35-39"), - (46, "45-49"), - (57, "55-59"), - (68, "65-69"), - (79, "75-79"), - (90, "90 and over"), - ], -) -def test_age_band_5_years_BAU(age, expected): - assert expected == age_bands.age_band_5_years(age) - - -def test_age_band_5_years_typeErrors(): - with pytest.raises(TypeError): - age_bands.age_band_5_years("age") - - -@pytest.mark.parametrize("age", [np.nan, math.inf, -3, 343, -0.1]) -def test_age_band_5_years_valueErrors(age): - with pytest.raises(ValueError): - age_bands.age_band_5_years(age) - - -@pytest.mark.parametrize("age, expected", [(None, "Age not known")]) -def test_age_band_5_years_edgeCases(age, expected): - assert expected == age_bands.age_band_5_years(age) - - -@pytest.mark.parametrize( - "age, expected", - [ - (0.1, "0-4"), - (1.2, "0-4"), - (12.3, "10-14"), - (23.4, "20-24"), - (34.5, "30-34"), - (35.6, "35-39"), - (46.7, "45-49"), - (57.8, "55-59"), - (68.9, "65-69"), - (79.0, "75-79"), - (90.1, "90 and over"), - ], -) -def test_age_band_5_years_BAU_floats(age, expected): - assert expected == age_bands.age_band_5_years(age) - - -@pytest.mark.parametrize( - "age, expected", - [ - (0, "0-9"), - (1, "0-9"), - (12, "10-19"), - (23, "20-29"), - (34, "30-39"), - (35, "30-39"), - (46, "40-49"), - (57, "50-59"), - (68, "60-69"), - (79, "70-79"), - (90, "90 and over"), - ], -) -def test_age_band_10_years_BAU(age, expected): - assert expected == age_bands.age_band_10_years(age) - - -def test_age_band_10_years_typeErrors(): - with pytest.raises(TypeError): - age_bands.age_band_10_years("age") - - -@pytest.mark.parametrize("age", [np.nan, math.inf, -3, 343, -0.1]) -def test_age_band_10_years_valueErrors(age): - with pytest.raises(ValueError): - age_bands.age_band_10_years(age) - - -@pytest.mark.parametrize("age, expected", [(None, "Age not known")]) -def test_age_band_10_years_edgeCases(age, expected): - assert expected == age_bands.age_band_10_years(age) - - -@pytest.mark.parametrize( - "age, expected", - [ - (0.1, "0-9"), - (1.2, "0-9"), - (12.3, "10-19"), - (23.4, "20-29"), - (34.5, "30-39"), - (35.6, "30-39"), - (46.7, "40-49"), - (57.8, "50-59"), - (68.9, "60-69"), - (79.0, "70-79"), - (90.1, "90 and over"), - ], -) -def test_age_band_10_years_BAU_floats(age, expected): - assert expected == age_bands.age_band_10_years(age) diff --git a/codonPython/tests/check_consistent_measures_test.py b/codonPython/tests/check_consistent_measures_test.py deleted file mode 100644 index 0ab7e2c..0000000 --- a/codonPython/tests/check_consistent_measures_test.py +++ /dev/null @@ -1,115 +0,0 @@ -from codonPython.validation.check_consistent_measures import check_consistent_measures -import pandas as pd -import numpy as np -import pytest - - -@pytest.mark.parametrize( - "data, geography_col, measure_col, measures_set, expected", - [ - ( - pd.DataFrame( - { - "Geog": [ - "National", - "National", - "Region", - "Region", - "Local", - "Local", - ], - "measure": ["m1", "m2", "m1", "m2", "m1", "m2"], - "Value_Unsuppressed": [4, 2, 2, 1, 2, 1], - } - ), - "Geog", - "measure", - set({"m1", "m2"}), - True, - ), - ( - pd.DataFrame( - { - "Geog": [ - "National", - "National", - "Region", - "Region", - "Local", - "Local", - ], - "measure": ["m1", "m2", "m1", "m3", "m1", "m2"], - "Value_Unsuppressed": [4, 2, 2, 1, 2, 1], - } - ), - "Geog", - "measure", - set({"m1", "m2"}), - False, - ), - ], -) -def test_each_org_levels_BAU(data, geography_col, measure_col, measures_set, expected): - assert expected == check_consistent_measures( - data, geography_col, measure_col, measures_set - ) - - -@pytest.mark.parametrize( - "data, geography_col, measure_col, measures_set", - [ - ( - pd.DataFrame( - { - "Geog": [ - "National", - "National", - "Region", - "Region", - "Local", - "Local", - ], - "measure": ["m1", "m2", "m1", np.nan, "m1", "m2"], - "Value_Unsuppressed": [4, 2, 2, 1, 2, 1], - } - ), - "Geog", - "measure", - set({"m1", "m2"}), - ) - ], -) -def test_each_org_levels_valueErrors_measure_col( - data, geography_col, measure_col, measures_set -): - with pytest.raises(ValueError): - check_consistent_measures(data, geography_col, measure_col, measures_set) - - -@pytest.mark.parametrize( - "data, geography_col, measure_col, measures_set", - [ - ( - pd.DataFrame( - { - "Geog": [ - "National", - "National", - "Region", - "Region", - "Local", - "Local", - ], - "measure": ["m1", "m2", "m1", "m2", "m1", "m2"], - "Value_Unsuppressed": [4, 2, 2, 1, 2, 1], - } - ), - "Global", - "measure", - set({"m1", "m2"}), - ) - ], -) -def test_each_geography_col_keyError(data, geography_col, measure_col, measures_set): - with pytest.raises(KeyError): - check_consistent_measures(data, geography_col, measure_col, measures_set) diff --git a/codonPython/tests/check_consistent_submissions_test.py b/codonPython/tests/check_consistent_submissions_test.py deleted file mode 100644 index 49c30d5..0000000 --- a/codonPython/tests/check_consistent_submissions_test.py +++ /dev/null @@ -1,132 +0,0 @@ -from codonPython.validation.check_consistent_submissions import check_consistent_submissions -import pandas as pd -import numpy as np -import pytest - - -@pytest.mark.parametrize( - "data, national_geog_level, geography_col, submissions_col, measure_col, expected", - [ - ( - pd.DataFrame( - { - "Geog": ["N", "N", "Region", "Region", "Local", "Local"], - "measure": ["m1", "m2", "m1", "m2", "m1", "m2"], - "submissions": [4, 2, 2, 1, 2, 1], - } - ), - "N", - "Geog", - "submissions", - "measure", - True, - ), - ( - pd.DataFrame( - { - "Org_Level": [ - "National", - "National", - "Region", - "Region", - "Local", - "Local", - ], - "Measure": ["m1", "m2", "m1", "m2", "m1", "m2"], - "Value_Unsuppressed": [4, 2, 3, 1, 2, 1], - } - ), - "National", - "Org_Level", - "Value_Unsuppressed", - "Measure", - False, - ), - ], -) -def test_each_consistent_measure_BAU( - data, national_geog_level, geography_col, submissions_col, measure_col, expected -): - assert expected == check_consistent_submissions( - data, national_geog_level, geography_col, submissions_col, measure_col - ) - - -@pytest.mark.parametrize( - "data, national_geog_level, geography_col, submissions_col, measure_col", - [ - ( - pd.DataFrame( - { - "Geog": ["N", "N", "Region", "Region", "Local", "Local"], - "measure": ["m1", "m2", "m1", "m2", "m1", "m2"], - "submissions": [4, 2, 2, 1, 2, 1], - } - ), - 1, - "Geog", - "submissions", - "measure", - ), - ( - pd.DataFrame( - { - "Geog": ["N", "N", "Region", "Region", "Local", "Local"], - "measure": ["m1", "m2", "m1", "m2", "m1", "m2"], - "submissions": [4, 2, 2, 1, 2, 1], - } - ), - "N", - False, - "submissions", - "measure", - ), - ( - pd.DataFrame( - { - "Geog": ["N", "N", "Region", "Region", "Local", "Local"], - "measure": ["m1", "m2", "m2", "m2", "m1", "m2"], - "submissions": [4, 2, 2, 1, 2, 1], - } - ), - "N", - "Geog", - 4.2, - "measure", - ), - ], -) -def test_each_consistent_submissions_valueErrors( - data, national_geog_level, geography_col, submissions_col, measure_col -): - with pytest.raises(ValueError): - check_consistent_submissions( - data, national_geog_level, geography_col, submissions_col, measure_col - ) - - -@pytest.mark.parametrize( - "data, national_geog_level, geography_col, submissions_col, measure_col", - [ - ( - pd.DataFrame( - { - "Geog": ["N", "N", "Region", "Region", "Local", "Local"], - "measure": ["m1", "m2", "m1", "m2", "m1", "m2"], - "submissions": [4, 2, 2, 1, 2, 1], - } - ), - "N", - "Geog", - "submissions", - "measurez", - ) - ], -) -def test_each_consistent_submissions_colError( - data, national_geog_level, geography_col, submissions_col, measure_col -): - with pytest.raises(KeyError): - check_consistent_submissions( - data, national_geog_level, geography_col, submissions_col, measure_col - ) diff --git a/codonPython/tests/check_nat_val_test.py b/codonPython/tests/check_nat_val_test.py deleted file mode 100644 index 2e311ba..0000000 --- a/codonPython/tests/check_nat_val_test.py +++ /dev/null @@ -1,108 +0,0 @@ -from codonPython.validation.check_nat_val import check_nat_val -import pytest -import pandas as pd - - -df = pd.DataFrame( - { - "Breakdown": [ - "National", - "CCG", - "CCG", - "Provider", - "Provider", - "National", - "CCG", - "CCG", - "Provider", - "Provider", - "National", - "CCG", - "CCG", - "Provider", - "Provider", - ], - "measure": [ - "m1", - "m1", - "m1", - "m1", - "m1", - "m2", - "m2", - "m2", - "m2", - "m2", - "m3", - "m3", - "m3", - "m3", - "m3", - ], - "Value_Unsuppressed": [9, 4, 5, 3, 6, 11, 2, 9, 7, 4, 9, 5, 4, 6, 3], - } -) - - -@pytest.mark.parametrize( - "df, breakdown_col, measure_col, value_col, nat_val, expected", - [(df, "Breakdown", "measure", "Value_Unsuppressed", "National", True)], -) -def test_BAU(df, breakdown_col, measure_col, value_col, nat_val, expected): - assert ( - check_nat_val( - df, - breakdown_col=breakdown_col, - measure_col=measure_col, - value_col=value_col, - nat_val=nat_val, - ) - == expected - ) - - -@pytest.mark.parametrize( - "df, breakdown_col, measure_col, value_col, nat_val", - [ - (df, "Breakdown", 23, "Value_Unsuppressed", "National"), # Not a string - (df, 0.1, "Measure", "Value_Unsuppressed", "National"), # Not a string - ( - df, - "Breakdown", - "Measure", - pd.DataFrame({"wrong": [1, 2, 3]}), # Not a string - "National", - ), - ( - df, - "Breakdown", - "Measure", - "Value_Unsuppressed", - set({"m1", "m2"}), # Not a string - ), - ], -) -def test_ValueErrors(df, breakdown_col, measure_col, value_col, nat_val): - with pytest.raises(ValueError): - check_nat_val( - df, - breakdown_col=breakdown_col, - measure_col=measure_col, - value_col=value_col, - nat_val=nat_val, - ) - - -@pytest.mark.parametrize( - "df, breakdown_col, measure_col, value_col, nat_val", - [(df, "Breakdown", "measure", "Wrong_Column", "National")], -) -def test_KeyErrors(df, breakdown_col, measure_col, value_col, nat_val): - with pytest.raises(KeyError): - check_nat_val( - df, - breakdown_col=breakdown_col, - measure_col=measure_col, - value_col=value_col, - nat_val=nat_val, - ) diff --git a/codonPython/tests/check_null_test.py b/codonPython/tests/check_null_test.py deleted file mode 100644 index 1a64e33..0000000 --- a/codonPython/tests/check_null_test.py +++ /dev/null @@ -1,33 +0,0 @@ -from codonPython.validation.check_null import check_null -import numpy as np -import pandas as pd -import pytest - -testdata = pd.DataFrame( - { - "col1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], - "col2": [11, 12, 13, 14, 15, np.nan, np.nan, 18, 19, 20], - } -) - - -@pytest.mark.parametrize( - "dataframe, columns_to_be_checked, expected", - [(testdata.iloc[:5, :], ["col1", "col2"], 0), (testdata, ["col2"], 2)], -) -def test_BAU(dataframe, columns_to_be_checked, expected): - assert check_null(dataframe, columns_to_be_checked) == expected - - -@pytest.mark.parametrize("dataframe, columns_to_be_checked", [(testdata, 0.01)]) -def test_ValueError(dataframe, columns_to_be_checked): - with pytest.raises(ValueError): - check_null(dataframe, columns_to_be_checked) - - -@pytest.mark.parametrize( - "dataframe, columns_to_be_checked", [(testdata, ["wrong_column"])] -) -def test_KeyError(dataframe, columns_to_be_checked): - with pytest.raises(KeyError): - check_null(dataframe, columns_to_be_checked) diff --git a/codonPython/tests/file_utils_test.py b/codonPython/tests/file_utils_test.py deleted file mode 100644 index 12effb2..0000000 --- a/codonPython/tests/file_utils_test.py +++ /dev/null @@ -1,311 +0,0 @@ -from codonPython.file_utils import compare -from codonPython.file_utils import file_search -from codonPython.file_utils import import_files -import numpy as np -import pytest -import pandas as pd - -df1 = pd.DataFrame( - { - "A": [1, 5, 6, 1, 8, 5, 9], - "B": [2, 8, 5, 2, 21, 3, 5], - "C": [3, 4, 5, 3, 1, 5, 9], - "D": [2, 8, 5, 2, 4, 6, 2], - "E": [1, 2, 6, 1, 3, 5, 5], - } -) - -df2 = pd.DataFrame( - { - "A": [1, 5, 6, 1, 9, 5, 9], - "B": [2, 9, 5, 2, 21, 3, 5], - "C": [3, 4, 5, 3, 1, 35, 9], - "D": [2, 8, 7, 2, 4, 6, 2], - "E": [1, 2, 46, 1, 3, 8, 5], - } -) - -dict_test = { - "same_values": pd.DataFrame( - np.array([[1, 2, 3, 2, 1], [9, 5, 9, 2, 5]]), columns=["A", "B", "C", "D", "E"] - ), - "df1_not_df2": pd.DataFrame( - np.array([[5, 8, 4, 8, 2], [6, 5, 5, 5, 6], [8, 21, 1, 4, 3], [5, 3, 5, 6, 5]]), - columns=["A", "B", "C", "D", "E"], - ), - "df2_not_df1": pd.DataFrame( - np.array( - [[5, 9, 4, 8, 2], [6, 5, 5, 7, 46], [9, 21, 1, 4, 3], [5, 3, 35, 6, 8]] - ), - columns=["A", "B", "C", "D", "E"], - ), - "df1_dups": pd.DataFrame( - np.array([[1, 2, 3, 2, 1]]), columns=["A", "B", "C", "D", "E"] - ), - "df2_dups": pd.DataFrame( - np.array([[1, 2, 3, 2, 1]]), columns=["A", "B", "C", "D", "E"] - ), - "Same": False, -} - - -@pytest.mark.parametrize( - "x, y, names, dups, same, expected", - [ - ( - pd.DataFrame( - { - "A": [1, 5, 6, 1, 8, 5, 9], - "B": [2, 8, 5, 2, 21, 3, 5], - "C": [3, 4, 5, 3, 1, 5, 9], - "D": [2, 8, 5, 2, 4, 6, 2], - "E": [1, 2, 6, 1, 3, 5, 5], - } - ), - pd.DataFrame( - { - "A": [1, 5, 6, 1, 9, 5, 9], - "B": [2, 9, 5, 2, 21, 3, 5], - "C": [3, 4, 5, 3, 1, 35, 9], - "D": [2, 8, 7, 2, 4, 6, 2], - "E": [1, 2, 46, 1, 3, 8, 5], - } - ), - ["df1", "df2"], - True, - True, - dict_test, - ) - ], -) -def test_compare_BAU(x, y, names, dups, same, expected): - dict_test1 = compare(x, y, names=["df1", "df2"], dups=True, same=True) - for i in expected.keys(): - if i == "Same": - assert dict_test1[i] == expected[i] - else: - for j in expected[i]: - list_test1 = list(dict_test1[i][j]) - list_exp = list(expected[i][j]) - assert list_test1 == list_exp - - -@pytest.mark.parametrize( - "doctype, like, strict, expected", [("md", ["README"], True, ["README.md"])] -) -def test_file_search_BAU(doctype, like, strict, expected): - assert file_search(doctype=doctype, like=like, strict=strict) == expected - - -@pytest.mark.parametrize("expected", [({})]) -def test_import_files_BAU(expected): - assert import_files() == expected - - -@pytest.mark.parametrize("subdir, expected", [(True, {})]) -def test_import_files_BAU_2(subdir, expected): - assert import_files(subdir=subdir) == expected - - -@pytest.mark.parametrize("strict,subdir, expected", [(True, True, {})]) -def test_import_files_BAU_3(strict, subdir, expected): - assert import_files(strict=strict, subdir=subdir) == expected - - -# ----------------Console output------------------------- - - -@pytest.mark.parametrize( - "x, y, names, dups, same, comment", - [ - ( - pd.DataFrame( - { - "A": [1, 5, 6, 1, 8, 5, 9], - "B": [2, 8, 5, 2, 21, 3, 5], - "C": [3, 4, 5, 3, 1, 5, 9], - "D": [2, 8, 5, 2, 4, 6, 2], - "E": [1, 2, 6, 1, 3, 5, 5], - } - ), - pd.DataFrame( - { - "A": [1, 5, 6, 1, 9, 5, 9], - "B": [2, 9, 5, 2, 21, 3, 5], - "C": [3, 4, 5, 3, 1, 35, 9], - "D": [2, 8, 7, 2, 4, 6, 2], - "E": [1, 2, 46, 1, 3, 8, 5], - } - ), - ["df1", "df2"], - True, - True, - True, - ) - ], -) -def test_compare_console(x, y, names, dups, same, comment, capsys): - dict_test1 = compare( - x, y, names=["df1", "df2"], dups=True, same=True, comment=comment - ) - captured = capsys.readouterr() - assert ( - captured.out - == "\nThere are " - + str(dict_test1["same_values"].shape[0]) - + " same values\nThere are " - + str(dict_test1[names[0] + "_not_" + names[1]].shape[0]) - + " outliers in " - + str(names[0]) - + "\nThere are " - + str(dict_test1[names[1] + "_not_" + names[0]].shape[0]) - + " outliers in " - + str(names[1]) - + "\nThere are " - + str(dict_test1[names[0] + "_dups"].shape[0]) - + " duplicates in " - + str(names[0]) - + "\nThere are " - + str(dict_test1[names[1] + "_dups"].shape[0]) - + " duplicates in " - + str(names[1]) - + "\nDataFrames are not the same\n" - ) - -# -------------ValueError tests----------------- - -# -------------File Search---------------------- - - -@pytest.mark.parametrize("like", [("txt")]) -def test_file_search_ValueError_1(like): - - with pytest.raises(ValueError): - - file_search(like=like) - - -@pytest.mark.parametrize("path", [(1)]) -def test_file_search_ValueError_2(path): - - with pytest.raises(ValueError): - - file_search(path=path) - - -@pytest.mark.parametrize("doctype", [(["txt"])]) -def test_file_search_ValueError_3(doctype): - - with pytest.raises(ValueError): - - file_search(doctype=doctype) - - -@pytest.mark.parametrize("strict", [("True")]) -def test_file_search_ValueError_4(strict): - - with pytest.raises(ValueError): - - file_search(strict=strict) - - -# -----------------Import files------------------------- - - -@pytest.mark.parametrize("like", [("txt")]) -def test_import_files_ValueError_1(like): - - with pytest.raises(ValueError): - - import_files(like=like) - - -@pytest.mark.parametrize("subdir", [("True")]) -def test_import_files_ValueError_2(subdir): - - with pytest.raises(ValueError): - - import_files(subdir=subdir) - - -@pytest.mark.parametrize("doctype", [(["txt"])]) -def test_import_files_ValueError_3(doctype): - - with pytest.raises(ValueError): - - import_files(doctype=doctype) - - -@pytest.mark.parametrize("sheet", [(1)]) -def test_import_files_ValueError_4(sheet): - - with pytest.raises(ValueError): - - import_files(sheet=sheet) - - -@pytest.mark.parametrize("path", [(["Desktop"])]) -def test_import_files_ValueError_5(path): - - with pytest.raises(ValueError): - - import_files(path=path) - - -@pytest.mark.parametrize("strict", [("True")]) -def test_import_files_ValueError_6(strict): - - with pytest.raises(ValueError): - - import_files(strict=strict) - - -# ---------------Compare-------------------------- - - -@pytest.mark.parametrize("names", [("txt")]) -def test_compare_ValueError_1(names): - - with pytest.raises(ValueError): - - compare(df1, df2, names=names) - - -@pytest.mark.parametrize("x", [([1, 2, 3])]) -def test_compare_ValueError_2(x): - - with pytest.raises(ValueError): - - compare(x, df2, names=["x", "df2"]) - - -@pytest.mark.parametrize("dups", [("True")]) -def test_compare_ValueError_3(dups): - - with pytest.raises(ValueError): - - compare(df1, df2, names=["df1", "df2"], dups=dups) - - -@pytest.mark.parametrize("same", [("True")]) -def test_compare_ValueError_4(same): - - with pytest.raises(ValueError): - - compare(df1, df2, names=["df1", "df2"], same=same) - - -@pytest.mark.parametrize("comment", [("True")]) -def test_compare_ValueError_5(comment): - - with pytest.raises(ValueError): - - compare(df1, df2, names=["df1", "df2"], comment=comment) - - -@pytest.mark.parametrize("y", [([1, 2, 3])]) -def test_compare_ValueError_6(y): - - with pytest.raises(ValueError): - - compare(df1, y, names=["df1", "y"]) diff --git a/codonPython/tests/tolerance_test.py b/codonPython/tests/tolerance_test.py deleted file mode 100644 index 04e2fa7..0000000 --- a/codonPython/tests/tolerance_test.py +++ /dev/null @@ -1,159 +0,0 @@ -from codonPython.validation.tolerance import check_tolerance -import numpy as np -import pandas as pd -import pandas.util.testing as pdt -import pytest - -testdata = [ - pd.Series([1234, 1235, 1236, 1237, 1238, 1239, 1240, 1241, 1242]), - pd.Series([1, 2, 3, 4, 5, 5.5, 6, 6.5, 7]), -] - - -@pytest.mark.parametrize( - "t, y, to_exclude, poly_features, alpha, parse_dates, expected", - [ - ( - *testdata, - 2, - [1, 2], - 0.05, - False, - pd.DataFrame( - { - "t": [1241, 1242, 1241, 1242], - "yhat_u": [ - 8.11380197739608, - 9.051653693670929, - 7.127135023632205, - 7.735627110021585, - ], - "yobs": [6.5, 7.0, 6.5, 7.0], - "yhat": [ - 7.214285714285714, - 8.071428571428573, - 6.500000000000002, - 6.821428571428574, - ], - "yhat_l": [ - 6.31476945117535, - 7.091203449186216, - 5.872864976367799, - 5.907230032835563, - ], - "polynomial": [1, 1, 2, 2], - } - ), - ), - ( - *testdata, - 2, - [3], - 0.05, - False, - pd.DataFrame( - { - "t": [1241, 1242], - "yhat_u": [6.753927165005773, 7.214574732953706], - "yobs": [6.5, 7.0], - "yhat": [6.0000000000000036, 5.571428571428576], - "yhat_l": [5.2460728349942345, 3.928282409903445], - "polynomial": [3, 3], - } - ), - ), - ( - pd.Series( - [ # Check dates - "2012-05-16", - "2012-05-17", - "2012-05-18", - "2012-05-19", - "2012-05-20", - "2012-05-21", - "2012-05-22", - "2012-05-23", - "2012-05-24", - ] - ), - pd.Series([1, 2, 3, 4, 5, 5.5, 6, 6.5, 7]), - 2, - [3], - 0.05, - True, - pd.DataFrame( - { - "t": ["2012-05-23", "2012-05-24"], - "yhat_u": [6.753927165005773, 7.214574732953706], - "yobs": [6.5, 7.0], - "yhat": [6.0000000000000036, 5.571428571428576], - "yhat_l": [5.2460728349942345, 3.928282409903445], - "polynomial": [3, 3], - } - ), - ), - ], -) -def test_tolerance_checking_BAU( - t, y, to_exclude, poly_features, alpha, parse_dates, expected -): - obtained = check_tolerance( - t, - y, - to_exclude=to_exclude, - poly_features=poly_features, - alpha=alpha, - parse_dates=parse_dates, - ) - pdt.assert_frame_equal(expected, obtained) - - -@pytest.mark.parametrize( - "t, y, to_exclude, poly_features, alpha", - [ - (*testdata, 2, "flamingo", 0.05), # This should be a list - (*testdata, 2, [2], "flamingo"), # Needs to be int - (*testdata, 2, [2], 42), # Needs to be between 0 and 1 - (*testdata, "flamingo", [2], 0.05), # Needs to be int - ], -) -def test_ValueErrors(t, y, to_exclude, poly_features, alpha): - with pytest.raises(ValueError): - check_tolerance( - t, y, to_exclude=to_exclude, poly_features=poly_features, alpha=alpha - ) - - -@pytest.mark.parametrize( - "t, y, to_exclude, poly_features, alpha", - [ - (*testdata, 2, [42], 0.05), # Elements in the list should be between 0 and 4 - ( - *testdata, - 42, # Can't have to_exclude making your sample size smaller than 4 - [2], - 0.05, - ), - ( - pd.Series( - [1234, 1235, 1236, 1237, 1238, 1239, 1240, 1241, np.nan] - ), # Missing t value - pd.Series([1, 2, 3, 4, 5, 5.5, 6, 6.5, 7]), - 2, - [2], - 0.05, - ), - ( - pd.Series([1234, 1235, 1236, 1237, 1238, 1239, 1240, 1241, 1242]), - pd.Series([1, 2, 3, 4, 5, 5.5, 6, 6.5, np.nan]), # Missing y value - 2, - [2], - 0.05, - ), - ], -) -def test_AssertionErrors(t, y, to_exclude, poly_features, alpha): - with pytest.raises(AssertionError): - check_tolerance( - t, y, to_exclude=to_exclude, poly_features=poly_features, alpha=alpha - ) diff --git a/codonPython/validation/check_consistent_measures.py b/codonPython/validation/check_consistent_measures.py deleted file mode 100644 index ea68796..0000000 --- a/codonPython/validation/check_consistent_measures.py +++ /dev/null @@ -1,70 +0,0 @@ -import pandas as pd -import numpy as np - - -def check_consistent_measures( - data, - geography_col: str = "Org_Level", - measure_col: str = "Measure", - measures_set: set = set(), -) -> bool: - """ - Check every measure is in every geography level. - - Parameters - ---------- - data : pd.DataFrame - DataFrame of data to check. - geography_col : str, default = "Org_Level" - Column name for the geography level. - measure_col : str, default = "Measure" - Column name for measure - measures_set : set, default = set() - Set of measures that should be in every geography level. If empty, the existing - global set is presumed to be correct. - - Returns - ------- - bool - Whether the checks have been passed. - - Examples - -------- - >>> check_consistent_measures( - ... pd.DataFrame({ - ... "Geog" : ["National" ,"National", "Region", "Region", "Local", "Local",], - ... "measure" : ["m1", "m2", "m1", "m2", "m1", "m2",], - ... "Value_Unsuppressed" : [4, 2, 2, 1, 2, 1,], - ... }), - ... geography_col = "Geog", - ... measure_col = "measure", - ... measures_set = set({"m1", "m2"}), - ... ) - True - >>> check_consistent_measures( - ... pd.DataFrame({ - ... "Org_Level" : ["National" ,"National", "Region", "Region", "Local", "Local",], - ... "Measure" : ["m1", "m3", "m1", "m2", "m1", "m2",], - ... "Value_Unsuppressed" : [4, 2, 2, 1, 2, 1,], - ... }) - ... ) - False - """ - - if data.isna().any(axis=None): - raise ValueError( - f"Missing values at locations {list(map(tuple, np.argwhere(data.isna().values)))}" - ) - if not isinstance(geography_col, str) or not isinstance(measure_col, str): - raise ValueError("Please input strings for column indexes.") - if not isinstance(measures_set, set): - raise ValueError("Please input a set object for measures") - if geography_col not in data.columns or measure_col not in data.columns: - raise KeyError("Check column names correspond to the DataFrame.") - - # Every geography level should have the same set of measures as the global set. - global_set = measures_set if measures_set else set(data[measure_col].unique()) - subsets = data.groupby(geography_col).agg({measure_col: "unique"}) - subset_agreement = all(set(x) == global_set for x in subsets[measure_col]) - - return subset_agreement diff --git a/codonPython/validation/check_consistent_submissions.py b/codonPython/validation/check_consistent_submissions.py deleted file mode 100644 index ebaaa48..0000000 --- a/codonPython/validation/check_consistent_submissions.py +++ /dev/null @@ -1,82 +0,0 @@ -import pandas as pd - - -def check_consistent_submissions( - data, - national_geog_level: str = "National", - geography_col: str = "Org_Level", - submissions_col: str = "Value_Unsuppressed", - measure_col: str = "Measure", -) -> bool: - """ - Check total submissions for each measure are the same across all geography levels - except national. - - Parameters - ---------- - data : pd.DataFrame - DataFrame of data to check. - national_geog_level : str, default = "National" - Geography level code for national values. - geography_col : str, default = "Org_Level" - Column name for the geography level. - submissions_col : str, default = "Value_Unsuppressed" - Column name for the submissions count. - measure_col : str, default = "Measure" - Column name for measure. - - Returns - ------- - bool - Whether the checks have been passed. - - Examples - -------- - >>> check_consistent_submissions( - ... pd.DataFrame({ - ... "Geog" : ["N" ,"N", "Region", "Region", "Local", "Local",], - ... "measure" : ["m1", "m2", "m1", "m2", "m1", "m2",], - ... "submissions" : [4, 2, 2, 1, 2, 1,], - ... }), - ... national_geog_level = "N", - ... geography_col = "Geog", - ... submissions_col = "submissions", - ... measure_col = "measure", - ... ) - True - >>> check_consistent_submissions( - ... pd.DataFrame({ - ... "Org_Level" : ["National" ,"National", "Region", "Region", "Local", "Local",], - ... "Measure" : ["m1", "m2", "m1", "m2", "m1", "m2",], - ... "Value_Unsuppressed" : [4, 2, 3, 1, 2, 1,], - ... }) - ... ) - False - """ - - if ( - not isinstance(submissions_col, str) - or not isinstance(measure_col, str) - or not isinstance(geography_col, str) - or not isinstance(national_geog_level, str) - ): - raise ValueError( - "Please input strings for column names and national geography level." - ) - if ( - submissions_col not in data.columns - or measure_col not in data.columns - or geography_col not in data.columns - ): - raise KeyError("Check column names correspond to the DataFrame.") - - # All non-national measures should have only one unique submission number for each - # geography level. - submissions_by_measure = ( - data[data[geography_col] != national_geog_level] - .groupby(measure_col) - .agg({submissions_col: "nunique"}) - ) - result = (submissions_by_measure[submissions_col] == 1).all() - - return result diff --git a/codonPython/validation/check_nat_val.py b/codonPython/validation/check_nat_val.py deleted file mode 100644 index 56d0501..0000000 --- a/codonPython/validation/check_nat_val.py +++ /dev/null @@ -1,101 +0,0 @@ -import pandas as pd - - -def check_nat_val( - df: pd.DataFrame, - breakdown_col: str = "Breakdown", - measure_col: str = "Measure", - value_col: str = "Value_Unsuppressed", - nat_val: str = "National", -) -> bool: - """ - Check national value less than or equal to sum of breakdowns. - - This function checks that the national value is less than or equal to the - sum of each organisation level breakdown. - This function does not apply to values which are averages. - This function does not apply to values which are percentages calculated - from the numerator and denominator. - - Parameters - ---------- - df : pandas.DataFrame - DataFrame of data to check. - breakdown_col : str, default = "Breakdown" - Column name for the breakdown level. - measure_col : str, default = "Measure" - Column name for measures - value_col : str, default = "Value_Unsuppressed" - Column name for values - nat_val : str, default = "National" - Value in breakdown column denoting national values - Returns - ------- - bool - Whether the checks have been passed. - - Examples - -------- - >>> check_nat_val( - ... df = pd.DataFrame({ - ... "Breakdown" : ['National', 'CCG', 'CCG', 'Provider', 'Provider', - ... 'National' ,'CCG', 'CCG', 'Provider', 'Provider','National' ,'CCG', 'CCG', - ... 'Provider', 'Provider',], - ... "Measure" : ['m1', 'm1', 'm1', 'm1', 'm1', 'm2', 'm2', 'm2', 'm2', - ... 'm2', 'm3', 'm3', 'm3', 'm3', 'm3',], - ... "Value_Unsuppressed" : [9, 4, 5, 3, 6, 11, 2, 9, 7, 4, 9, 5, 4, 6, - ... 3], - ... }), - ... breakdown_col = "Breakdown", - ... measure_col = "Measure", - ... value_col = "Value_Unsuppressed", - ... nat_val = "National", - ... ) - True - >>> check_nat_val( - ... df = pd.DataFrame({ - ... "Breakdown" : ['National', 'CCG', 'CCG', 'Provider', 'Provider', - ... 'National' ,'CCG', 'CCG', 'Provider', 'Provider','National' ,'CCG', 'CCG', - ... 'Provider', 'Provider',], - ... "Measure" : ['m1', 'm1', 'm1', 'm1', 'm1', 'm2', 'm2', 'm2', 'm2', - ... 'm2', 'm3', 'm3', 'm3', 'm3', 'm3',], - ... "Value_Unsuppressed" : [18, 4, 5, 3, 6, 11, 2, 9, 7, 4, 9, 5, 4, 6, - ... 3], - ... }), - ... breakdown_col = "Breakdown", - ... measure_col = "Measure", - ... value_col = "Value_Unsuppressed", - ... nat_val = "National", - ... ) - False - """ - - if ( - not isinstance(breakdown_col, str) - or not isinstance(measure_col, str) - or not isinstance(value_col, str) - ): - raise ValueError("Please input strings for column indexes.") - if not isinstance(nat_val, str): - raise ValueError("Please input strings for value indexes.") - if ( - breakdown_col not in df.columns - or measure_col not in df.columns - or value_col not in df.columns - ): - raise KeyError("Check column names correspond to the DataFrame.") - # aggregate values by measure and breakdown - grouped = ( - df.groupby([measure_col, breakdown_col]).agg({value_col: sum}).reset_index() - ) - national = grouped.loc[grouped[breakdown_col] == nat_val].reset_index() - non_national = grouped.loc[grouped[breakdown_col] != nat_val].reset_index() - # check values are less than or equal to national value for each measure - join = pd.merge( - non_national, national, left_on=measure_col, right_on=measure_col, how="left" - ) - left = value_col + "_x" - right = value_col + "_y" - join["Check"] = join[right] <= join[left] - result = all(join["Check"]) - return result diff --git a/codonPython/validation/check_null.py b/codonPython/validation/check_null.py deleted file mode 100644 index 37333b6..0000000 --- a/codonPython/validation/check_null.py +++ /dev/null @@ -1,46 +0,0 @@ -import numpy -import pandas as pd - - -def check_null(dataframe: pd.DataFrame, columns_to_be_checked: list) -> int: - """ - Checks a pandas dataframe for null values - - This function takes a pandas dataframe supplied as an argument and returns a integer value - representing any null values found within the columns to check. - - Parameters - ---------- - data : pandas.DataFrame - Dataframe to read - columns_to_be_checked: list - Given dataframe columns to be checked for null values - - Returns - ------- - out : int - The number of null values found in the given columns - - Examples - -------- - >>> check_null(dataframe = pd.DataFrame({'col1': [1,2], 'col2': [3,4]}),columns_to_be_checked = ['col1', 'col2']) - 0 - >>> check_null(dataframe = pd.DataFrame({'col1': [1,numpy.nan], 'col2': [3,4]}),columns_to_be_checked = ['col1']) - 1 - """ - - if not isinstance(columns_to_be_checked, list): - raise ValueError("Please make sure that all your columns passed are strings") - - for eachCol in columns_to_be_checked: - if eachCol not in dataframe.columns: - raise KeyError( - "Please check the column names correspond to values in the DataFrame." - ) - - null_count = 0 - for eachColumn in columns_to_be_checked: - prev_null_count = null_count - null_count = prev_null_count + (len(dataframe) - dataframe[eachColumn].count()) - - return null_count diff --git a/codonPython/validation/tolerance.py b/codonPython/validation/tolerance.py deleted file mode 100644 index b3f0d14..0000000 --- a/codonPython/validation/tolerance.py +++ /dev/null @@ -1,139 +0,0 @@ -import numpy as np -import pandas as pd -from datetime import datetime -from sklearn.preprocessing import StandardScaler, PolynomialFeatures -from sklearn.pipeline import make_pipeline -import statsmodels.api as sm -from statsmodels.sandbox.regression.predstd import wls_prediction_std - - -def check_tolerance( - t, - y, - to_exclude: int = 1, - poly_features: list = [1, 2], - alpha: float = 0.05, - parse_dates: bool = False, - predict_all: bool = False, -) -> pd.DataFrame: - """ - Check that some future values are within a weighted least squares confidence interval. - - Parameters - ---------- - t : pd.Series - N explanatory time points of shape (N, 1). - y : pd.Series - The corresponding response variable values to X, of shape (N, 1). - to_exclude : int, default = 1 - How many of the last y values will have their tolerances checked. - poly_features : list, default = [1, 2] - List of degrees of polynomial basis to fit to the data. One model will be - produced for each number in the list, eg. the default will fit a linear and - a second degree polynomial to the data and return both sets of results. - alpha : float, default = 0.05 - Alpha parameter for the weighted least squares confidence interval. - parse_dates : bool, default = True - Set to true to parse string dates in t - predict_all : bool, default = False - Set to true to show predictions for all points of the dataset. - - - Returns - ------- - pd.DataFrame - DataFrame containing: - "t" : Value for t - "yhat_u" : Upper condfidence interval for y - "yobs" : Observed value for y - "yhat" : Predicted value for y - "yhat_l" : Lower confidence interval for y - "polynomial": Max polynomial of model fit to the data - - - Examples - -------- - >>> check_tolerance( - ... t = pd.Series([1001,1002,1003,1004,1005,1006]), - ... y = pd.Series([2,3,4,4.5,5,5.1]), - ... to_exclude = 2, - ... ) - t yhat_u yobs yhat yhat_l polynomial - 0 1005 6.817413 5.0 5.500 4.182587 1 - 1 1006 7.952702 5.1 6.350 4.747298 1 - 2 1005 9.077182 5.0 4.875 0.672818 2 - 3 1006 13.252339 5.1 4.975 -3.302339 2 - """ - - if not isinstance(poly_features, list): - raise ValueError( - "Please input a list of integers from 0 to 4 for poly_features." - ) - assert all( - 0 <= degree <= 4 for degree in poly_features - ), "Please ensure all numbers in poly_features are from 0 to 4." - if not isinstance(alpha, float) or 0 > alpha >= 1: - raise ValueError("Please input a float between 0 and 1 for alpha.") - if not isinstance(to_exclude, int): - raise ValueError( - "Please input an integer between 1 and your sample size for to_exclude." - ) - assert ( - len(t) - to_exclude - ) >= 4, """The sample size for your model is smaller than 4. This will not produce a good - model. Either reduce to_exclude or increase your sample size to continue.""" - assert y.notna().all(), f"""Your sample contains missing or infinite values for y at locations - {list(map(tuple, np.where(np.isnan(y))))}. Exclude these values to continue.""" - assert t.notna().all(), f"""Your sample contains missing or infinite values for t at locations - {list(map(tuple, np.where(np.isnan(t))))}. Exclude these values to continue.""" - - # Convert date strings to numeric variables for the model - if parse_dates: - t_numeric = pd.to_datetime(t) - t_numeric = (t_numeric - datetime(1970, 1, 1)).apply(lambda x: x.days) - - # Sort data by t increasing. t_ is for internal use. - idx = np.argsort(t_numeric.values) if parse_dates else np.argsort(t.values) - t_ = t_numeric[idx] if parse_dates else t[idx] - t = t[idx] - y = y[idx] - - results = pd.DataFrame() - for degree in poly_features: - transforms = make_pipeline(StandardScaler(), PolynomialFeatures(degree=degree)) - - # Fit transforms to training data only, apply to all data. - fitted_transforms = transforms.fit(t_[:-to_exclude].values.reshape(-1, 1)) - t_scaled = fitted_transforms.transform(t_.values.reshape(-1, 1)) - - t_train, y_train = t_scaled[:-to_exclude, :], y[:-to_exclude] - t_predict, y_predict, t_orig = ( - t_scaled if predict_all else t_scaled[-to_exclude:, :], - y if predict_all else y[-to_exclude:], - t if predict_all else t[-to_exclude:], - ) - - # Fit ordinary least squares model to the training data, then predict for the - # prediction data. - model = sm.OLS(y_train, t_train).fit() - yhat = model.predict(t_predict) - - # Calculate prediction intervals of fitted model. - _, yhat_l, yhat_u = wls_prediction_std(model, t_predict, alpha=alpha) - - # Store model results in master frame - results = results.append( - pd.DataFrame( - { - "t": t_orig, - "yhat_u": yhat_u, - "yobs": y_predict, - "yhat": yhat, - "yhat_l": yhat_l, - "polynomial": degree, - } - ), - ignore_index=True, - ) - - return results diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..9131894 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,53 @@ +[build-system] +requires = [ + "setuptools>=64", + #"setuptools-scm>=8" +] +build-backend = "setuptools.build_meta" + +[project] +name = "nhs_reusable_code_library" +version = "0.1.0a6" +description = "Common NHS Python (& mainly specifically PySpark) functions for data processing, validations, cleansing and analysis" +authors = [{name = "Sam Hollings", email = "sam.hollings1@nhs.net"}] +readme = "README.md" +keywords = ["PySpark", "cleansing", "validation", "data engineering"] +license = {file = "LICENCE.md"} +classifiers = ["Programming Language :: Python :: 3"] +dependencies = [ + "inflection", + "pyspark", + "pandas", + "pytest", + "pytest-spark", + "black", + "isort", + "build", + "sphinx", + "sphinx-rtd-theme", + #"setuptools-scm", +] +requires-python = ">=3.6" +# dynamic = ['version'] - can't get this to work in github actions with setuptools-scm as the github action only pulls the latest commit and no tag info + +[project.urls] +Repository = "https://github.com/nhsengland/reusable-code-library" +Issues = "https://github.com/nhsengland/reusable-code-library/issues" + +# `isort` configurations +[tool.isort] +profile = "black" + +# `pytest` configurations +[tool.pytest.ini_options] +addopts = [ + "-v", + "--doctest-modules", + "--doctest-report=cdiff", + "--doctest-continue-on-failure" +] +doctest_optionflags = "NORMALIZE_WHITESPACE" +testpaths = [ + "tests", + "src" +] \ No newline at end of file diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index 9b2c778..0000000 --- a/pytest.ini +++ /dev/null @@ -1,4 +0,0 @@ -# content of pytest.ini -[pytest] -doctest_encoding = utf8 -addopts = --doctest-modules \ No newline at end of file diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 93f5b3a..0000000 --- a/requirements.txt +++ /dev/null @@ -1,15 +0,0 @@ -numpy>=1.16.0 -scipy>=0.19.0 -pandas>=0.24.0 -sqlalchemy>=1.3.12 -pyodbc -scikit-learn>=0.21.2 -statsmodels>=0.10.0 -seaborn>=0.9.0 -sphinx==2.2.2 -sphinx-rtd-theme>=0.4.3 -flake8>=3.7.9 -m2r>=0.2.1 -requests>=2.22.0 -requests-mock>=1.7.0 -dataclasses>=0.7; python_version == '3.6' diff --git a/setup.py b/setup.py deleted file mode 100644 index 358008e..0000000 --- a/setup.py +++ /dev/null @@ -1,16 +0,0 @@ -from setuptools import setup, find_packages - -with open('requirements.txt') as f: - requirements = f.read().splitlines() - -setup( - name='codonPython', - version='0.2.3.1', - license='BSD', - packages=find_packages(), - install_requires=requirements, - author='NHS Digital DIS Team', - author_email='paul.ellingham@nhs.net', - url='https://digital.nhs.uk/data-and-information', - description='This is a first attempt at how our package will work.' -) diff --git a/codonPython/__init__.py b/src/nhs_reusable_code_library/__init__.py similarity index 100% rename from codonPython/__init__.py rename to src/nhs_reusable_code_library/__init__.py diff --git a/codonPython/nhsNumber.py b/src/nhs_reusable_code_library/nhsNumber.py similarity index 100% rename from codonPython/nhsNumber.py rename to src/nhs_reusable_code_library/nhsNumber.py diff --git a/codonPython/suppression.py b/src/nhs_reusable_code_library/suppression.py similarity index 100% rename from codonPython/suppression.py rename to src/nhs_reusable_code_library/suppression.py diff --git a/codonPython/tests/dateValidator_test.py b/src/nhs_reusable_code_library/tests/dateValidator_test.py similarity index 92% rename from codonPython/tests/dateValidator_test.py rename to src/nhs_reusable_code_library/tests/dateValidator_test.py index 44a0a1e..2facbca 100644 --- a/codonPython/tests/dateValidator_test.py +++ b/src/nhs_reusable_code_library/tests/dateValidator_test.py @@ -1,4 +1,4 @@ -from codonPython.validation import dateValidator +from nhs_reusable_code_library.validation import dateValidator import pytest diff --git a/codonPython/tests/nhsNumber_test.py b/src/nhs_reusable_code_library/tests/nhsNumber_test.py similarity index 91% rename from codonPython/tests/nhsNumber_test.py rename to src/nhs_reusable_code_library/tests/nhsNumber_test.py index c1b966a..fbce3f6 100644 --- a/codonPython/tests/nhsNumber_test.py +++ b/src/nhs_reusable_code_library/tests/nhsNumber_test.py @@ -1,4 +1,4 @@ -from codonPython.nhsNumber import nhsNumberGenerator, nhsNumberValidator +from nhs_reusable_code_library.nhsNumber import nhsNumberGenerator, nhsNumberValidator import pytest import random diff --git a/codonPython/tests/suppression_test.py b/src/nhs_reusable_code_library/tests/suppression_test.py similarity index 86% rename from codonPython/tests/suppression_test.py rename to src/nhs_reusable_code_library/tests/suppression_test.py index 9e8f04f..295e842 100644 --- a/codonPython/tests/suppression_test.py +++ b/src/nhs_reusable_code_library/tests/suppression_test.py @@ -1,4 +1,4 @@ -from codonPython.suppression import central_suppression_method +from nhs_reusable_code_library.suppression import central_suppression_method import pytest diff --git a/codonPython/validation/dateValidator.py b/src/nhs_reusable_code_library/validation/dateValidator.py similarity index 100% rename from codonPython/validation/dateValidator.py rename to src/nhs_reusable_code_library/validation/dateValidator.py