From e860969a7b5e16d82d65e165e36246a0df6b6e65 Mon Sep 17 00:00:00 2001 From: Matthew Rutherford Date: Fri, 4 Apr 2025 10:21:37 -0400 Subject: [PATCH 1/3] Added support for Azure Dev Ops and Azure Dev Ops Server --- README.md | 12 +- scraper/azuredevops/__init__.py | 215 ++++++++++++++++++++++++++++++++ scraper/azuredevops/models.py | 16 +++ scraper/code_gov/__init__.py | 21 ++++ scraper/code_gov/models.py | 54 +++++++- 5 files changed, 316 insertions(+), 2 deletions(-) create mode 100644 scraper/azuredevops/__init__.py create mode 100644 scraper/azuredevops/models.py diff --git a/README.md b/README.md index 4c89191..5db65aa 100644 --- a/README.md +++ b/README.md @@ -118,13 +118,23 @@ The basic structure is: ```jsonc "TFS": [ { - "url": "https://tfs.internal", // Base URL for a Team Foundation Server (TFS) or Visual Studio Team Services (VSTS) or Azure DevOps instance + "url": "https://tfs.internal", // Base URL for a Team Foundation Server (TFS) or Visual Studio Team Services (VSTS) "token": null, // Private token for accessing this TFS instance "exclude": [ ... ] // List of projects / repositories to exclude from inventory } ] ``` +```jsonc +"AzureDevOps": [ + { + "url": "https://dev.azure.com", // Base URL for an Azure Dev Ops Server or Azure Dev Ops Cloud instance + "token": null, // Personal Access Token for accessing this ADO instance + "apiVersion": "", // API Version + "exclude": [ ... ] // List of projects to exclude from inventory + } +] +``` ## License diff --git a/scraper/azuredevops/__init__.py b/scraper/azuredevops/__init__.py new file mode 100644 index 0000000..21f334d --- /dev/null +++ b/scraper/azuredevops/__init__.py @@ -0,0 +1,215 @@ +import logging +import os +import requests +import base64 +import re + +from scraper.azuredevops.models import AzureDevOpsCollection, AzureDevOpsProject +from typing import List + +logger = logging.getLogger(__name__) + +class AzureDevOpsClient: + def __init__(self, baseurl, api_version, token=None): + self.baseurl = baseurl + self.api_version = api_version + self.is_cloud_ado = "dev.azure.com" in baseurl + self.session = self._create_client_session(token) + + def get_projects_metadata(self) -> List[AzureDevOpsProject]: + """ + Get metadata for all projects + """ + collections = self._get_all_collections() + return self._get_all_projects(collections) + + def _create_client_session(self, token): + """ + Creates the Azure DevOps Client Context with the provided token. + If no token is provided, it will look for the ADO_API_TOKEN environment variable. + """ + if token is None: + token = os.environ.get("ADO_API_TOKEN", None) + + if token is None: + raise RuntimeError("Azure Dev Ops Token was not provided.") + + session = requests.Session() + auth_string = f":{token}" + encoded_auth = base64.b64encode(auth_string.encode('ascii')).decode('ascii') + session.headers.update({ + 'Authorization': f'Basic {encoded_auth}', + 'Accept': 'application/json' + }) + return session + + def _get_all_collections(self) -> List[AzureDevOpsCollection]: + """ + Get all collections from the Azure DevOps API. + """ + collections = [] + + if self.is_cloud_ado: + # For cloud Azure DevOps, get all organizations from the API + profile_url = f"https://app.vssps.visualstudio.com/_apis/profile/profiles/me?api-version={self.api_version}" + profile_response = self.session.get(profile_url) + + if profile_response.status_code == 200: + profile = profile_response.json() + + # Get user's organizations/accounts + accounts_url = f"https://app.vssps.visualstudio.com/_apis/accounts?memberId={profile['id']}&api-version={self.api_version}" + accounts_response = self.session.get(accounts_url) + + if accounts_response.status_code == 200: + accounts_json = accounts_response.json() + + if accounts_json.get('value') and len(accounts_json['value']) > 0: + for org in accounts_json['value']: + collections.append(AzureDevOpsCollection( + id=org['accountId'], + name=org['accountName'], + url=f"https://dev.azure.com/{org['accountName']}" + )) + logger.debug(f"Found cloud organization: {org['accountName']}") + else: + logger.warning("No organizations found with your access token.") + + # Fallback: Try to extract organization from baseAddress + org_name = self.baseurl.rstrip('/').split('/')[-1] + if org_name and org_name != "dev.azure.com": + collections.append(AzureDevOpsCollection( + id=org_name, + name=org_name, + url=f"https://dev.azure.com/{org_name}" + )) + logger.debug(f"Using organization from base address: {org_name}") + else: + raise RuntimeError(f"Failed to retrieve organizations. Status Code: {accounts_response.status_code} Response: {accounts_response.text}") + else: + logger.warning(f"Failed to retrieve user profile: {profile_response.status_code} Response: {profile_response.text}") + logger.warning("Falling back to base address for organization extraction.") + # Fallback: Try to extract organization from baseAddress + org_name = self.baseurl.rstrip('/').split('/')[-1] + if org_name and org_name != "dev.azure.com": + collections.append(AzureDevOpsCollection( + id=org_name, + name=org_name, + url=f"https://dev.azure.com/{org_name}" + )) + logger.debug(f"Using organization from base address: {org_name}") + else: + raise RuntimeError("Could not determine organization. Please specify organization in the baseurl.") + else: + # For on-premises, get collections via API + collections_url = f"{self.baseurl}/_apis/projectcollections?api-version={self.api_version}" + collections_response = self.session.get(collections_url) + + if collections_response.status_code == 200: + collections_json = collections_response.json() + for collection in collections_json.get('value', []): + collections.append(AzureDevOpsCollection( + id=collection['id'], + name=collection['name'], + url=collection['url'] + )) + else: + raise RuntimeError(f"Failed to retrieve collections. Status Code: {collections_response.status_code} Response: {collections_response.text}") + + logger.debug(f"Found {len(collections)} collections/organizations") + return collections + + def _get_web_url_from_api_url(self, api_url, project_name): + """ + Convert an API URL to a web-accessible URL + + Parameters: + api_url (str): API URL for the project + project_name (str): Name of the project + + Returns: + str: Web URL for the project + """ + if self.is_cloud_ado: + # For cloud ADO, convert URL like: + # https://dev.azure.com/org-name/_apis/projects/project-id + # to: https://dev.azure.com/org-name/project-name + match = re.search(r'https://dev\.azure\.com/([^/]+)', api_url) + if match: + org_name = match.group(1) + return f"https://dev.azure.com/{org_name}/{project_name}" + else: + # For on-premises ADO, convert URL like: + # https://server/collection/_apis/projects/project-id + # to: https://server/collection/project-name + base_url = api_url.split('/_apis/projects')[0] + return f"{base_url}/{project_name}" + + def _get_repo_web_url(self, api_url, project_name): + """ + Generate web-accessible URL for repositories page + + Parameters: + api_url (str): API URL for the project + project_name (str): Name of the project + + Returns: + str: Web URL for the project's repositories page + """ + project_web_url = self._get_web_url_from_api_url(api_url, project_name) + return f"{project_web_url}/_git" + + def _get_all_projects(self, collections: List[AzureDevOpsCollection] = None) -> List[AzureDevOpsProject]: + """ + Get all projects from the provided collections or from all collections if none are provided + + Parameters: + collections (List[AzureDevOpsCollection]): List of collections to get projects from + """ + if collections is None: + collections = self._get_all_collections() + + projects = [] + for collection in collections: + collection_url = f"https://dev.azure.com/{collection.name}" if self.is_cloud_ado else f"{self.baseurl}/{collection.name}" + logger.debug('Getting projects from collection: %s', collection_url) + + top = 100 + project_skip = 0 + total_projects = 0 + has_more_projects = True + + while has_more_projects: + url = f"{collection_url}/_apis/projects?$top={top}&$skip={project_skip}&api-version={self.api_version}&includeCapabilities=true" + + response = self.session.get(url) + if response.status_code != 200: + raise RuntimeError(f"Failed to get projects: {response.status_code}") + + result = response.json() + for project in result.get('value', []): + project_api_url = project.get('url') + project_name = project.get('name') + + project_web_url = self._get_web_url_from_api_url(project_api_url, project_name) + repo_web_url = self._get_repo_web_url(project_api_url, project_name) + + projects.append(AzureDevOpsProject( + project_id=project.get('id'), + project_name=project_name, + project_description=project.get('description') or "", + project_url=project_web_url, + repo_url=repo_web_url, + project_create_time="", # Not provided in API response + project_last_update_time=project.get('lastUpdateTime'), + collection_or_org_name = collection.name + )) + + count = len(result.get('value', [])) + total_projects += count + project_skip += top + + has_more_projects = count == top + + return projects + diff --git a/scraper/azuredevops/models.py b/scraper/azuredevops/models.py new file mode 100644 index 0000000..8978d0b --- /dev/null +++ b/scraper/azuredevops/models.py @@ -0,0 +1,16 @@ +class AzureDevOpsCollection: + def __init__(self, id="", name="", url=""): + self.id = id + self.name = name + self.url = url + +class AzureDevOpsProject: + def __init__(self, project_id="", project_name="", project_description="", project_url="", repo_url="", project_create_time="", project_last_update_time="", collection_or_org_name = ""): + self.project_id = project_id + self.project_name = project_name + self.project_description = project_description + self.project_url = project_url + self.repo_url = repo_url + self.project_create_time = project_create_time + self.project_last_update_time = project_last_update_time + self.collection_or_org_name = collection_or_org_name \ No newline at end of file diff --git a/scraper/code_gov/__init__.py b/scraper/code_gov/__init__.py index f5e0a77..d8bc350 100644 --- a/scraper/code_gov/__init__.py +++ b/scraper/code_gov/__init__.py @@ -6,6 +6,7 @@ from scraper import bitbucket, doecode, github, gitlab, tfs from scraper.code_gov.models import Metadata, Project from scraper.github import gov_orgs +from scraper.azuredevops import AzureDevOpsClient logger = logging.getLogger(__name__) @@ -128,6 +129,26 @@ def process_config(config): ) code_gov_metadata["releases"].append(code_gov_project) + # parse config for AzureDevOps repositories + ado_instances = config.get("AzureDevOps", []) + for instance in ado_instances: + url = instance.get("url") + token = instance.get("token", None) + api_version = instance.get("apiVersion", "6.1-preview") + excluded = instance.get("exclude", []) + + ado_client = AzureDevOpsClient(url, api_version, token) + projects = ado_client.get_projects_metadata() + for project in projects: + if project.project_name in excluded: + logger.info("Excluding: %s", project.project_name) + continue + + code_gov_project = Project.from_ado( + project, labor_hours=compute_labor_hours + ) + code_gov_metadata["releases"].append(code_gov_project) + # Handle parsing of DOE CODE records doecode_config = config.get("DOE CODE", {}) diff --git a/scraper/code_gov/models.py b/scraper/code_gov/models.py index 02cfe21..bf1e66b 100644 --- a/scraper/code_gov/models.py +++ b/scraper/code_gov/models.py @@ -12,6 +12,8 @@ from scraper.github.util import _license_obj from scraper.util import _prune_dict_null_str, labor_hours_from_url +from scraper.azuredevops.models import AzureDevOpsProject + logger = logging.getLogger(__name__) POLICY_START_DATE = date_parse("2016-08-08T00:00:00Z") @@ -598,7 +600,7 @@ def from_tfs(klass, tfs_project, labor_hours=True): project["description"] = tfs_project.projectInfo.description - project["vcs"] = "TFS/AzureDevOps" + project["vcs"] = "TFS" project["permissions"]["license"] = None @@ -629,3 +631,53 @@ def from_tfs(klass, tfs_project, labor_hours=True): _prune_dict_null_str(project) return project + + @classmethod + def from_ado(klass, ado_project: AzureDevOpsProject, labor_hours=True): + """ + Creates CodeGovProject object from AzureDevOps Instance + """ + project = klass() + project_web_url = "" + + # -- REQUIRED FIELDS -- + project["name"] = ado_project.project_name + + project["repositoryURL"] = requote_uri(ado_project.repo_url) + + project["homepageURL"] = requote_uri(ado_project.project_url) + + project["description"] = ado_project.project_description + + project["vcs"] = "AzureDevOps" + + project["permissions"]["license"] = None + + project["tags"] = [] + + if labor_hours: + logger.debug("Sorry labor hour calculation not currently supported.") + # project['laborHours'] = labor_hours_from_url(project['repositoryURL']) + else: + project["laborHours"] = 0 + + last_update_time_as_date = date_parse(ado_project.project_last_update_time) + if last_update_time_as_date < POLICY_START_DATE: + project["permissions"]["usageType"] = "exemptByPolicyDate" + else: + project["permissions"]["usageType"] = "exemptByAgencyMission" + project["permissions"][ + "exemptionText" + ] = "This source code resides on a private server and has not been properly evaluated for releaseability." + + project["contact"] = {"email": "", "URL": project_web_url} + + project["date"] = { + "lastModified": last_update_time_as_date.isoformat(), + "created": "", + "metadataLastUpdated": "", + } + + _prune_dict_null_str(project) + + return project \ No newline at end of file From 064fb162ef1c1a699d5df8a7e446f1188f274087 Mon Sep 17 00:00:00 2001 From: Ian Lee Date: Thu, 17 Apr 2025 06:07:41 -0700 Subject: [PATCH 2/3] Blackened files --- scraper/azuredevops/__init__.py | 205 ++++++++++++++++++-------------- scraper/azuredevops/models.py | 15 ++- scraper/code_gov/__init__.py | 2 +- scraper/code_gov/models.py | 6 +- 4 files changed, 136 insertions(+), 92 deletions(-) diff --git a/scraper/azuredevops/__init__.py b/scraper/azuredevops/__init__.py index 21f334d..c2b408a 100644 --- a/scraper/azuredevops/__init__.py +++ b/scraper/azuredevops/__init__.py @@ -9,6 +9,7 @@ logger = logging.getLogger(__name__) + class AzureDevOpsClient: def __init__(self, baseurl, api_version, token=None): self.baseurl = baseurl @@ -22,7 +23,7 @@ def get_projects_metadata(self) -> List[AzureDevOpsProject]: """ collections = self._get_all_collections() return self._get_all_projects(collections) - + def _create_client_session(self, token): """ Creates the Azure DevOps Client Context with the provided token. @@ -30,103 +31,124 @@ def _create_client_session(self, token): """ if token is None: token = os.environ.get("ADO_API_TOKEN", None) - + if token is None: raise RuntimeError("Azure Dev Ops Token was not provided.") - + session = requests.Session() auth_string = f":{token}" - encoded_auth = base64.b64encode(auth_string.encode('ascii')).decode('ascii') - session.headers.update({ - 'Authorization': f'Basic {encoded_auth}', - 'Accept': 'application/json' - }) + encoded_auth = base64.b64encode(auth_string.encode("ascii")).decode("ascii") + session.headers.update( + {"Authorization": f"Basic {encoded_auth}", "Accept": "application/json"} + ) return session - + def _get_all_collections(self) -> List[AzureDevOpsCollection]: """ Get all collections from the Azure DevOps API. """ collections = [] - + if self.is_cloud_ado: # For cloud Azure DevOps, get all organizations from the API profile_url = f"https://app.vssps.visualstudio.com/_apis/profile/profiles/me?api-version={self.api_version}" profile_response = self.session.get(profile_url) - + if profile_response.status_code == 200: profile = profile_response.json() - + # Get user's organizations/accounts accounts_url = f"https://app.vssps.visualstudio.com/_apis/accounts?memberId={profile['id']}&api-version={self.api_version}" accounts_response = self.session.get(accounts_url) - + if accounts_response.status_code == 200: accounts_json = accounts_response.json() - - if accounts_json.get('value') and len(accounts_json['value']) > 0: - for org in accounts_json['value']: - collections.append(AzureDevOpsCollection( - id=org['accountId'], - name=org['accountName'], - url=f"https://dev.azure.com/{org['accountName']}" - )) - logger.debug(f"Found cloud organization: {org['accountName']}") + + if accounts_json.get("value") and len(accounts_json["value"]) > 0: + for org in accounts_json["value"]: + collections.append( + AzureDevOpsCollection( + id=org["accountId"], + name=org["accountName"], + url=f"https://dev.azure.com/{org['accountName']}", + ) + ) + logger.debug( + f"Found cloud organization: {org['accountName']}" + ) else: logger.warning("No organizations found with your access token.") - + # Fallback: Try to extract organization from baseAddress - org_name = self.baseurl.rstrip('/').split('/')[-1] + org_name = self.baseurl.rstrip("/").split("/")[-1] if org_name and org_name != "dev.azure.com": - collections.append(AzureDevOpsCollection( - id=org_name, - name=org_name, - url=f"https://dev.azure.com/{org_name}" - )) - logger.debug(f"Using organization from base address: {org_name}") + collections.append( + AzureDevOpsCollection( + id=org_name, + name=org_name, + url=f"https://dev.azure.com/{org_name}", + ) + ) + logger.debug( + f"Using organization from base address: {org_name}" + ) else: - raise RuntimeError(f"Failed to retrieve organizations. Status Code: {accounts_response.status_code} Response: {accounts_response.text}") + raise RuntimeError( + f"Failed to retrieve organizations. Status Code: {accounts_response.status_code} Response: {accounts_response.text}" + ) else: - logger.warning(f"Failed to retrieve user profile: {profile_response.status_code} Response: {profile_response.text}") - logger.warning("Falling back to base address for organization extraction.") + logger.warning( + f"Failed to retrieve user profile: {profile_response.status_code} Response: {profile_response.text}" + ) + logger.warning( + "Falling back to base address for organization extraction." + ) # Fallback: Try to extract organization from baseAddress - org_name = self.baseurl.rstrip('/').split('/')[-1] + org_name = self.baseurl.rstrip("/").split("/")[-1] if org_name and org_name != "dev.azure.com": - collections.append(AzureDevOpsCollection( - id=org_name, - name=org_name, - url=f"https://dev.azure.com/{org_name}" - )) + collections.append( + AzureDevOpsCollection( + id=org_name, + name=org_name, + url=f"https://dev.azure.com/{org_name}", + ) + ) logger.debug(f"Using organization from base address: {org_name}") else: - raise RuntimeError("Could not determine organization. Please specify organization in the baseurl.") + raise RuntimeError( + "Could not determine organization. Please specify organization in the baseurl." + ) else: # For on-premises, get collections via API collections_url = f"{self.baseurl}/_apis/projectcollections?api-version={self.api_version}" collections_response = self.session.get(collections_url) - + if collections_response.status_code == 200: collections_json = collections_response.json() - for collection in collections_json.get('value', []): - collections.append(AzureDevOpsCollection( - id=collection['id'], - name=collection['name'], - url=collection['url'] - )) + for collection in collections_json.get("value", []): + collections.append( + AzureDevOpsCollection( + id=collection["id"], + name=collection["name"], + url=collection["url"], + ) + ) else: - raise RuntimeError(f"Failed to retrieve collections. Status Code: {collections_response.status_code} Response: {collections_response.text}") + raise RuntimeError( + f"Failed to retrieve collections. Status Code: {collections_response.status_code} Response: {collections_response.text}" + ) logger.debug(f"Found {len(collections)} collections/organizations") return collections - + def _get_web_url_from_api_url(self, api_url, project_name): """ Convert an API URL to a web-accessible URL - + Parameters: api_url (str): API URL for the project project_name (str): Name of the project - + Returns: str: Web URL for the project """ @@ -134,7 +156,7 @@ def _get_web_url_from_api_url(self, api_url, project_name): # For cloud ADO, convert URL like: # https://dev.azure.com/org-name/_apis/projects/project-id # to: https://dev.azure.com/org-name/project-name - match = re.search(r'https://dev\.azure\.com/([^/]+)', api_url) + match = re.search(r"https://dev\.azure\.com/([^/]+)", api_url) if match: org_name = match.group(1) return f"https://dev.azure.com/{org_name}/{project_name}" @@ -142,37 +164,43 @@ def _get_web_url_from_api_url(self, api_url, project_name): # For on-premises ADO, convert URL like: # https://server/collection/_apis/projects/project-id # to: https://server/collection/project-name - base_url = api_url.split('/_apis/projects')[0] + base_url = api_url.split("/_apis/projects")[0] return f"{base_url}/{project_name}" - + def _get_repo_web_url(self, api_url, project_name): """ Generate web-accessible URL for repositories page - + Parameters: api_url (str): API URL for the project project_name (str): Name of the project - + Returns: str: Web URL for the project's repositories page """ project_web_url = self._get_web_url_from_api_url(api_url, project_name) return f"{project_web_url}/_git" - - def _get_all_projects(self, collections: List[AzureDevOpsCollection] = None) -> List[AzureDevOpsProject]: + + def _get_all_projects( + self, collections: List[AzureDevOpsCollection] = None + ) -> List[AzureDevOpsProject]: """ Get all projects from the provided collections or from all collections if none are provided - + Parameters: collections (List[AzureDevOpsCollection]): List of collections to get projects from """ if collections is None: collections = self._get_all_collections() - + projects = [] for collection in collections: - collection_url = f"https://dev.azure.com/{collection.name}" if self.is_cloud_ado else f"{self.baseurl}/{collection.name}" - logger.debug('Getting projects from collection: %s', collection_url) + collection_url = ( + f"https://dev.azure.com/{collection.name}" + if self.is_cloud_ado + else f"{self.baseurl}/{collection.name}" + ) + logger.debug("Getting projects from collection: %s", collection_url) top = 100 project_skip = 0 @@ -181,35 +209,40 @@ def _get_all_projects(self, collections: List[AzureDevOpsCollection] = None) -> while has_more_projects: url = f"{collection_url}/_apis/projects?$top={top}&$skip={project_skip}&api-version={self.api_version}&includeCapabilities=true" - + response = self.session.get(url) if response.status_code != 200: - raise RuntimeError(f"Failed to get projects: {response.status_code}") - + raise RuntimeError( + f"Failed to get projects: {response.status_code}" + ) + result = response.json() - for project in result.get('value', []): - project_api_url = project.get('url') - project_name = project.get('name') - - project_web_url = self._get_web_url_from_api_url(project_api_url, project_name) + for project in result.get("value", []): + project_api_url = project.get("url") + project_name = project.get("name") + + project_web_url = self._get_web_url_from_api_url( + project_api_url, project_name + ) repo_web_url = self._get_repo_web_url(project_api_url, project_name) - - projects.append(AzureDevOpsProject( - project_id=project.get('id'), - project_name=project_name, - project_description=project.get('description') or "", - project_url=project_web_url, - repo_url=repo_web_url, - project_create_time="", # Not provided in API response - project_last_update_time=project.get('lastUpdateTime'), - collection_or_org_name = collection.name - )) - - count = len(result.get('value', [])) + + projects.append( + AzureDevOpsProject( + project_id=project.get("id"), + project_name=project_name, + project_description=project.get("description") or "", + project_url=project_web_url, + repo_url=repo_web_url, + project_create_time="", # Not provided in API response + project_last_update_time=project.get("lastUpdateTime"), + collection_or_org_name=collection.name, + ) + ) + + count = len(result.get("value", [])) total_projects += count project_skip += top - - has_more_projects = count == top - return projects + has_more_projects = count == top + return projects diff --git a/scraper/azuredevops/models.py b/scraper/azuredevops/models.py index 8978d0b..515c565 100644 --- a/scraper/azuredevops/models.py +++ b/scraper/azuredevops/models.py @@ -4,8 +4,19 @@ def __init__(self, id="", name="", url=""): self.name = name self.url = url + class AzureDevOpsProject: - def __init__(self, project_id="", project_name="", project_description="", project_url="", repo_url="", project_create_time="", project_last_update_time="", collection_or_org_name = ""): + def __init__( + self, + project_id="", + project_name="", + project_description="", + project_url="", + repo_url="", + project_create_time="", + project_last_update_time="", + collection_or_org_name="", + ): self.project_id = project_id self.project_name = project_name self.project_description = project_description @@ -13,4 +24,4 @@ def __init__(self, project_id="", project_name="", project_description="", proje self.repo_url = repo_url self.project_create_time = project_create_time self.project_last_update_time = project_last_update_time - self.collection_or_org_name = collection_or_org_name \ No newline at end of file + self.collection_or_org_name = collection_or_org_name diff --git a/scraper/code_gov/__init__.py b/scraper/code_gov/__init__.py index d8bc350..0e83646 100644 --- a/scraper/code_gov/__init__.py +++ b/scraper/code_gov/__init__.py @@ -143,7 +143,7 @@ def process_config(config): if project.project_name in excluded: logger.info("Excluding: %s", project.project_name) continue - + code_gov_project = Project.from_ado( project, labor_hours=compute_labor_hours ) diff --git a/scraper/code_gov/models.py b/scraper/code_gov/models.py index bf1e66b..5eb837f 100644 --- a/scraper/code_gov/models.py +++ b/scraper/code_gov/models.py @@ -631,7 +631,7 @@ def from_tfs(klass, tfs_project, labor_hours=True): _prune_dict_null_str(project) return project - + @classmethod def from_ado(klass, ado_project: AzureDevOpsProject, labor_hours=True): """ @@ -660,7 +660,7 @@ def from_ado(klass, ado_project: AzureDevOpsProject, labor_hours=True): # project['laborHours'] = labor_hours_from_url(project['repositoryURL']) else: project["laborHours"] = 0 - + last_update_time_as_date = date_parse(ado_project.project_last_update_time) if last_update_time_as_date < POLICY_START_DATE: project["permissions"]["usageType"] = "exemptByPolicyDate" @@ -680,4 +680,4 @@ def from_ado(klass, ado_project: AzureDevOpsProject, labor_hours=True): _prune_dict_null_str(project) - return project \ No newline at end of file + return project From af46697c0aac8908fcdaae97214d91f9a3778b9b Mon Sep 17 00:00:00 2001 From: Ian Lee Date: Thu, 17 Apr 2025 06:27:05 -0700 Subject: [PATCH 3/3] Fixed isort and markdownlint test errors --- README.md | 1 + scraper/azuredevops/__init__.py | 7 ++++--- scraper/code_gov/__init__.py | 2 +- scraper/code_gov/models.py | 3 +-- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 5db65aa..be2f030 100644 --- a/README.md +++ b/README.md @@ -125,6 +125,7 @@ The basic structure is: } ] ``` + ```jsonc "AzureDevOps": [ { diff --git a/scraper/azuredevops/__init__.py b/scraper/azuredevops/__init__.py index c2b408a..c1b1a00 100644 --- a/scraper/azuredevops/__init__.py +++ b/scraper/azuredevops/__init__.py @@ -1,11 +1,12 @@ +import base64 import logging import os -import requests -import base64 import re +from typing import List + +import requests from scraper.azuredevops.models import AzureDevOpsCollection, AzureDevOpsProject -from typing import List logger = logging.getLogger(__name__) diff --git a/scraper/code_gov/__init__.py b/scraper/code_gov/__init__.py index 0e83646..e430a8e 100644 --- a/scraper/code_gov/__init__.py +++ b/scraper/code_gov/__init__.py @@ -4,9 +4,9 @@ import logging from scraper import bitbucket, doecode, github, gitlab, tfs +from scraper.azuredevops import AzureDevOpsClient from scraper.code_gov.models import Metadata, Project from scraper.github import gov_orgs -from scraper.azuredevops import AzureDevOpsClient logger = logging.getLogger(__name__) diff --git a/scraper/code_gov/models.py b/scraper/code_gov/models.py index 5eb837f..5af1e9d 100644 --- a/scraper/code_gov/models.py +++ b/scraper/code_gov/models.py @@ -9,11 +9,10 @@ import gitlab from requests.utils import requote_uri +from scraper.azuredevops.models import AzureDevOpsProject from scraper.github.util import _license_obj from scraper.util import _prune_dict_null_str, labor_hours_from_url -from scraper.azuredevops.models import AzureDevOpsProject - logger = logging.getLogger(__name__) POLICY_START_DATE = date_parse("2016-08-08T00:00:00Z")