From 2576504b74b2e4f29d4d4ba0b82a00cd5a70b871 Mon Sep 17 00:00:00 2001 From: Samantha Hughes Date: Tue, 11 Mar 2025 15:35:13 -0700 Subject: [PATCH 1/6] loosen --- pyproject.toml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 0177ed0..8a0cad0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,25 +30,25 @@ dependencies = [ "boto3>=1.26.0,<2.0.0", "click~=8.1.0", "httpx~=0.23", - "platformdirs", + "platformdirs~=4.0", "pydantic~=1.10.0", "python-dateutil>=2.7,<3", - "tenacity~=8.2", + "tenacity~=9.0", ] dynamic = ["description", "version"] name = "syncsparkpy" readme = "README.md" -requires-python = ">=3.7" +requires-python = ">=3.8,<3.13" [project.optional-dependencies] dev = [ - "Sphinx==4.3.0", - "deepdiff==6.3.0", - "pytest-asyncio==0.21.0", - "pytest-env==0.8.1", - "pytest==7.2.0", - "respx==0.20.1", - "ruff==0.7.2", + "deepdiff~=6.3", + "pytest-asyncio~=0.21", + "pytest-env~=0.8", + "pytest~=7.2", + "respx~=0.20", + "ruff~=0.7", + "sphinx~=5.3", ] [tool.hatch.version] From 1afa59dfddf4e236a6d494388b714682c6e2a2eb Mon Sep 17 00:00:00 2001 From: Samantha Hughes Date: Tue, 11 Mar 2025 15:36:10 -0700 Subject: [PATCH 2/6] workflows --- .github/workflows/pull-request.yml | 4 ++-- .github/workflows/release.yml | 15 ++++++--------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml index 5a1af3a..7e43af3 100644 --- a/.github/workflows/pull-request.yml +++ b/.github/workflows/pull-request.yml @@ -13,10 +13,10 @@ jobs: with: fetch-depth: 0 - - name: Set up Python 3.7.x + - name: Set up Python 3.8.x uses: actions/setup-python@v4 with: - python-version: "3.7.16" + python-version: "3.8.19" - name: Install dependencies run: pip install .[dev] diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 65783a6..9b5bb69 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -7,12 +7,10 @@ on: # A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: - release-latest: - permissions: - id-token: write # to verify the deployment originates from an appropriate source - contents: write # To allow pushing tags/etc. + id-token: write # to verify the deployment originates from an appropriate source + contents: write # To allow pushing tags/etc. # Specify runner + deployment step runs-on: ubuntu-22.04 @@ -22,7 +20,7 @@ jobs: fetch-depth: 0 - uses: actions/setup-python@v4 with: - python-version: "3.7.16" + python-version: "3.8.19" - name: Install dependencies run: pip install . - name: version-tag @@ -59,7 +57,7 @@ jobs: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: - python-version: "3.7.16" + python-version: "3.8.19" - name: Install dependencies run: pip install .[dev] - name: Build docs @@ -70,14 +68,13 @@ jobs: path: docs/_build/html release-docs: - # Add a dependency to the build job needs: build-docs # Grant GITHUB_TOKEN the permissions required to make a Pages deployment permissions: - pages: write # to deploy to Pages - id-token: write # to verify the deployment originates from an appropriate source + pages: write # to deploy to Pages + id-token: write # to verify the deployment originates from an appropriate source # Deploy to the github-pages environment environment: From 8351fedd6193c8d2ac44fa48687d20d469b0585e Mon Sep 17 00:00:00 2001 From: Samantha Hughes Date: Tue, 11 Mar 2025 15:37:47 -0700 Subject: [PATCH 3/6] valid python --- .github/workflows/pull-request.yml | 2 +- .github/workflows/release.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml index 7e43af3..2cc36ef 100644 --- a/.github/workflows/pull-request.yml +++ b/.github/workflows/pull-request.yml @@ -16,7 +16,7 @@ jobs: - name: Set up Python 3.8.x uses: actions/setup-python@v4 with: - python-version: "3.8.19" + python-version: "3.8.18" - name: Install dependencies run: pip install .[dev] diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 9b5bb69..3c3e637 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -20,7 +20,7 @@ jobs: fetch-depth: 0 - uses: actions/setup-python@v4 with: - python-version: "3.8.19" + python-version: "3.8.18" - name: Install dependencies run: pip install . - name: version-tag @@ -57,7 +57,7 @@ jobs: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: - python-version: "3.8.19" + python-version: "3.8.18" - name: Install dependencies run: pip install .[dev] - name: Build docs From 344981cd049d5a3e8fa5885e75c0ed24765b55f9 Mon Sep 17 00:00:00 2001 From: Samantha Hughes Date: Tue, 11 Mar 2025 15:42:46 -0700 Subject: [PATCH 4/6] ruff fixes --- pyproject.toml | 2 +- sync/_databricks.py | 6 ++---- sync/clients/__init__.py | 1 + 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8a0cad0..7900e12 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,7 +47,7 @@ dev = [ "pytest-env~=0.8", "pytest~=7.2", "respx~=0.20", - "ruff~=0.7", + "ruff<=1", "sphinx~=5.3", ] diff --git a/sync/_databricks.py b/sync/_databricks.py index 19c93f4..d4e9c54 100644 --- a/sync/_databricks.py +++ b/sync/_databricks.py @@ -684,11 +684,9 @@ def get_recommendation_cluster( # Recommendations to set these appropriately. Since we may recommend a Static cluster (i.e. a cluster # with `num_workers`) for a cluster that was originally autoscaled, we want to make sure to remove this # prior configuration - if "num_workers" in cluster: - del cluster["num_workers"] + cluster.pop("num_workers", None) - if "autoscale" in cluster: - del cluster["autoscale"] + cluster.pop("autoscale", None) recommendation_cluster = deep_update(cluster, recommendation["configuration"]) diff --git a/sync/clients/__init__.py b/sync/clients/__init__.py index c1a7b96..ab02c50 100644 --- a/sync/clients/__init__.py +++ b/sync/clients/__init__.py @@ -47,6 +47,7 @@ def __init__(self, client: Union[httpx.Client, httpx.AsyncClient]): self._client: Union[httpx.Client, httpx.AsyncClient] = client def _send_request(self, request: httpx.Request) -> httpx.Response: + response = None try: for attempt in Retrying( stop=stop_after_attempt(20), From f517b10c35e14ff19180b3c533c9e1cab5848ba7 Mon Sep 17 00:00:00 2001 From: Samantha Hughes Date: Tue, 11 Mar 2025 15:44:38 -0700 Subject: [PATCH 5/6] ignore rule --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 7900e12..9f1d074 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,9 +73,9 @@ pythonpath = ["."] [tool.ruff] exclude = ["artifacts/*"] line-length = 100 -target-version = "py37" +target-version = "py38" [tool.ruff.lint] -ignore = ["E501"] +ignore = ["B903", "E501"] preview = true select = ["ASYNC", "B", "C9", "E", "F", "I", "PLE", "RUF", "TID", "UP", "W"] @@ -96,7 +96,7 @@ max-complexity = 10 [tool.pyright] pythonPlatform = "All" -pythonVersion = "3.7" +pythonVersion = "3.8" reportUnnecessaryTypeIgnoreComment = "error" typeCheckingMode = "standard" useLibraryCodeForTypes = false From d7878cc563d11a50b16e4386b1d48b1c6e8dc14d Mon Sep 17 00:00:00 2001 From: Samantha Hughes Date: Tue, 11 Mar 2025 16:05:03 -0700 Subject: [PATCH 6/6] pydantic --- pyproject.toml | 3 +- sync/config.py | 19 +++++++++--- sync/models.py | 80 ++++++++++++++++++++++++++++++++++---------------- 3 files changed, 71 insertions(+), 31 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9f1d074..23778d0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,8 @@ dependencies = [ "click~=8.1.0", "httpx~=0.23", "platformdirs~=4.0", - "pydantic~=1.10.0", + "pydantic-settings~=2.0", + "pydantic~=2.0", "python-dateutil>=2.7,<3", "tenacity~=9.0", ] diff --git a/sync/config.py b/sync/config.py index a652295..228b1ce 100644 --- a/sync/config.py +++ b/sync/config.py @@ -8,7 +8,8 @@ from urllib.parse import urlparse import boto3 as boto -from pydantic import BaseSettings, Extra, Field, validator +from pydantic import Extra, Field, validator +from pydantic_settings import BaseSettings CREDENTIALS_FILE = "credentials" CONFIG_FILE = "config" @@ -33,7 +34,11 @@ class APIKey(BaseSettings): class Config: @classmethod def customise_sources(cls, init_settings, env_settings, file_secret_settings): - return (init_settings, env_settings, json_config_settings_source(CREDENTIALS_FILE)) + return ( + init_settings, + env_settings, + json_config_settings_source(CREDENTIALS_FILE), + ) class Configuration(BaseSettings): @@ -44,13 +49,19 @@ class Config: @classmethod def customise_sources(cls, init_settings, env_settings, file_secret_settings): - return (init_settings, env_settings, json_config_settings_source(CONFIG_FILE)) + return ( + init_settings, + env_settings, + json_config_settings_source(CONFIG_FILE), + ) class DatabricksConf(BaseSettings): host: str = Field(..., env="DATABRICKS_HOST") token: str = Field(..., env="DATABRICKS_TOKEN") - aws_region_name: str = Field(boto.client("s3").meta.region_name, env="DATABRICKS_AWS_REGION") + aws_region_name: str = Field( + boto.client("s3").meta.region_name, env="DATABRICKS_AWS_REGION" + ) @validator("host") def validate_host(cls, host): diff --git a/sync/models.py b/sync/models.py index e3f2019..4f5df8c 100644 --- a/sync/models.py +++ b/sync/models.py @@ -57,7 +57,9 @@ class AccessReportLine: class AccessReport(List[AccessReportLine]): def __str__(self): - return "\n".join(f"{line.name}\n {line.status}: {line.message}" for line in self) + return "\n".join( + f"{line.name}\n {line.status}: {line.message}" for line in self + ) def add_boto_method_call( self, @@ -95,15 +97,15 @@ def __str__(self): class ProjectError(Error): - code: str = Field("Project Error", const=True) + code: str = "Project Error" class RecommendationError(Error): - code: str = Field("Recommendation Error", const=True) + code: str = "Recommendation Error" class SubmissionError(Error): - code: str = Field("Submission Error", const=True) + code: str = "Submission Error" @unique @@ -139,13 +141,13 @@ class AzureDatabricksClusterReport(DatabricksClusterReport): class DatabricksError(Error): - code: str = Field("Databricks Error", const=True) + code: str = "Databricks Error" class MissingOrIncompleteEventlogError(Error): dbfs_eventlog_file_size: Union[int, None] = None - code: str = Field("Retryable Databricks Error", const=True) - message: str = Field("Event log was missing or incomplete. Please retry.", const=True) + code: str = "Retryable Databricks Error" + message: str = "Event log was missing or incomplete. Please retry." class DatabricksAPIError(Error): @@ -162,8 +164,8 @@ def validate_error(cls, values): class Response(GenericModel, Generic[DataType]): - result: Union[DataType, None] - error: Union[Error, None] + result: Union[DataType, None] = None + error: Union[Error, None] = None @validator("error", always=True) def check_consistency(cls, err, values): @@ -265,9 +267,13 @@ class AwsRegionEnum(str, Enum): "Statement": [ { "Effect": "Allow", - "Principal": {"AWS": "arn:aws:iam::533267411813:role/sync-computing-collector"}, + "Principal": { + "AWS": "arn:aws:iam::533267411813:role/sync-computing-collector" + }, "Action": "sts:AssumeRole", - "Condition": {"StringEquals": {"sts:ExternalId": "PLACEHOLDER_EXTERNAL_ID"}}, + "Condition": { + "StringEquals": {"sts:ExternalId": "PLACEHOLDER_EXTERNAL_ID"} + }, } ], } @@ -276,15 +282,15 @@ class AwsRegionEnum(str, Enum): class AwsHostedIAMInstructions(BaseModel): step_1_prompt: str = "Step 1: Copy the JSON and paste in AWS IAM Permissions page:" step_1_value: str = json.dumps(IAMRoleRequiredPermissions) - step_2_prompt: str = ( - "Step 2: Copy the JSON and paste in AWS IAM Trust relationships page with External ID:" - ) + step_2_prompt: str = "Step 2: Copy the JSON and paste in AWS IAM Trust relationships page with External ID:" external_id: str @property def step_2_value(self) -> str: policy = copy.deepcopy(IAMRoleTrustPolicy) - policy["Statement"][0]["Condition"]["StringEquals"]["sts:ExternalId"] = self.external_id + policy["Statement"][0]["Condition"]["StringEquals"]["sts:ExternalId"] = ( + self.external_id + ) return json.dumps(policy) @@ -299,16 +305,26 @@ class ComputeProviderHostedValues(BaseModel): class CreateWorkspaceConfig(BaseModel): workspace_id: str = Field(..., description="Unique identifier for the workspace") databricks_host: str = Field(..., description="Databricks service host URL") - databricks_token: str = Field(..., description="Authentication token for Databricks service") - sync_api_key_id: str = Field(..., description="API Key ID for synchronization service") - sync_api_key_secret: str = Field(..., description="API Key secret for synchronization service") - instance_profile_arn: Optional[str] = Field(None, description="AWS instance profile ARN") + databricks_token: str = Field( + ..., description="Authentication token for Databricks service" + ) + sync_api_key_id: str = Field( + ..., description="API Key ID for synchronization service" + ) + sync_api_key_secret: str = Field( + ..., description="API Key secret for synchronization service" + ) + instance_profile_arn: Optional[str] = Field( + None, description="AWS instance profile ARN" + ) webhook_id: Optional[str] = Field(None, description="Webhook ID for notifications") databricks_plan_type: DatabricksPlanType = Field( DatabricksPlanType.STANDARD, description="Plan type for Databricks deployment" ) aws_region: Optional[str] = Field(None, description="AWS region if applicable") - cluster_policy_id: Optional[str] = Field(None, description="Cluster policy ID for Databricks") + cluster_policy_id: Optional[str] = Field( + None, description="Cluster policy ID for Databricks" + ) collection_type: WorkspaceCollectionTypeEnum = Field( ..., description="Type of hosting for the workspace" ) @@ -318,10 +334,18 @@ class CreateWorkspaceConfig(BaseModel): compute_provider: ComputeProvider = Field( ..., description="Cloud provider for compute resources" ) - external_id: Optional[str] = Field(None, description="External ID for AWS configurations") - aws_iam_role_arn: Optional[str] = Field(None, description="AWS IAM role ARN if needed") - azure_tenant_id: Optional[str] = Field(None, description="Azure tenant ID if using Azure") - azure_client_id: Optional[str] = Field(None, description="Azure client ID if using Azure") + external_id: Optional[str] = Field( + None, description="External ID for AWS configurations" + ) + aws_iam_role_arn: Optional[str] = Field( + None, description="AWS IAM role ARN if needed" + ) + azure_tenant_id: Optional[str] = Field( + None, description="Azure tenant ID if using Azure" + ) + azure_client_id: Optional[str] = Field( + None, description="Azure client ID if using Azure" + ) azure_client_secret: Optional[str] = Field( None, description="Azure client secret if using Azure" ) @@ -352,7 +376,9 @@ def check_aws_iam_role_arn(cls, aws_iam_role_arn, values): compute_provider = values.get("compute_provider") if values.get("collection_type") == WorkspaceCollectionTypeEnum.HOSTED: if compute_provider == ComputeProvider.AWS and not aws_iam_role_arn: - raise ValueError("AWS IAM Role ARN is required for AWS compute provider") + raise ValueError( + "AWS IAM Role ARN is required for AWS compute provider" + ) return aws_iam_role_arn @validator("compute_provider", pre=False) @@ -367,7 +393,9 @@ def check_azure_hosted_fields(cls, compute_provider, values): "azure_client_secret", "azure_subscription_id", ] - missing_fields = [field for field in required_fields if not values.get(field)] + missing_fields = [ + field for field in required_fields if not values.get(field) + ] if missing_fields: raise ValueError( f"Missing required fields for Azure compute provider: "