Skip to content

Commit 744911f

Browse files
authored
Merge branch 'main' into CM-58022-cycode-guardrails-support-cursor-scan-via-hooks
2 parents 14afe21 + 26d13d2 commit 744911f

File tree

20 files changed

+499
-11
lines changed

20 files changed

+499
-11
lines changed

cycode/cli/apps/report/sbom/repository_url/repository_url_command.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
from cycode.cli.utils.get_api_client import get_report_cycode_client
99
from cycode.cli.utils.progress_bar import SbomReportProgressBarSection
1010
from cycode.cli.utils.sentry import add_breadcrumb
11+
from cycode.cli.utils.url_utils import sanitize_repository_url
12+
from cycode.logger import get_logger
13+
14+
logger = get_logger('Repository URL Command')
1115

1216

1317
def repository_url_command(
@@ -28,8 +32,13 @@ def repository_url_command(
2832
start_scan_time = time.time()
2933
report_execution_id = -1
3034

35+
# Sanitize repository URL to remove any embedded credentials/tokens before sending to API
36+
sanitized_uri = sanitize_repository_url(uri)
37+
if sanitized_uri != uri:
38+
logger.debug('Sanitized repository URL to remove credentials')
39+
3140
try:
32-
report_execution = client.request_sbom_report_execution(report_parameters, repository_url=uri)
41+
report_execution = client.request_sbom_report_execution(report_parameters, repository_url=sanitized_uri)
3342
report_execution_id = report_execution.id
3443

3544
create_sbom_report(progress_bar, client, report_execution_id, output_file, output_format)

cycode/cli/apps/scan/code_scanner.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import os
12
import time
23
from platform import platform
34
from typing import TYPE_CHECKING, Callable, Optional
@@ -21,6 +22,7 @@
2122
from cycode.cli.files_collector.sca.sca_file_collector import add_sca_dependencies_tree_documents_if_needed
2223
from cycode.cli.files_collector.zip_documents import zip_documents
2324
from cycode.cli.models import CliError, Document, LocalScanResult
25+
from cycode.cli.utils.path_utils import get_absolute_path, get_path_by_os
2426
from cycode.cli.utils.progress_bar import ScanProgressBarSection
2527
from cycode.cli.utils.scan_batch import run_parallel_batched_scan
2628
from cycode.cli.utils.scan_utils import (
@@ -53,6 +55,21 @@ def scan_disk_files(ctx: typer.Context, paths: tuple[str, ...]) -> None:
5355
paths,
5456
is_cycodeignore_allowed=is_cycodeignore_allowed_by_scan_config(ctx),
5557
)
58+
59+
# Add entrypoint.cycode file at root path to mark the scan root (only for single path that is a directory)
60+
if len(paths) == 1:
61+
root_path = paths[0]
62+
absolute_root_path = get_absolute_path(root_path)
63+
if os.path.isdir(absolute_root_path):
64+
entrypoint_path = get_path_by_os(os.path.join(absolute_root_path, consts.CYCODE_ENTRYPOINT_FILENAME))
65+
entrypoint_document = Document(
66+
entrypoint_path,
67+
'', # Empty file content
68+
is_git_diff_format=False,
69+
absolute_path=entrypoint_path,
70+
)
71+
documents.append(entrypoint_document)
72+
5673
add_sca_dependencies_tree_documents_if_needed(ctx, scan_type, documents)
5774
scan_documents(ctx, documents, get_scan_parameters(ctx, paths))
5875
except Exception as e:

cycode/cli/apps/scan/remote_url_resolver.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from cycode.cli import consts
44
from cycode.cli.utils.git_proxy import git_proxy
55
from cycode.cli.utils.shell_executor import shell
6+
from cycode.cli.utils.url_utils import sanitize_repository_url
67
from cycode.logger import get_logger
78

89
logger = get_logger('Remote URL Resolver')
@@ -102,7 +103,11 @@ def _try_get_git_remote_url(path: str) -> Optional[str]:
102103
repo = git_proxy.get_repo(path, search_parent_directories=True)
103104
remote_url = repo.remotes[0].config_reader.get('url')
104105
logger.debug('Found Git remote URL, %s', {'remote_url': remote_url, 'repo_path': repo.working_dir})
105-
return remote_url
106+
# Sanitize URL to remove any embedded credentials/tokens before returning
107+
sanitized_url = sanitize_repository_url(remote_url)
108+
if sanitized_url != remote_url:
109+
logger.debug('Sanitized repository URL to remove credentials')
110+
return sanitized_url
106111
except Exception as e:
107112
logger.debug('Failed to get Git remote URL. Probably not a Git repository', exc_info=e)
108113
return None
@@ -124,7 +129,9 @@ def get_remote_url_scan_parameter(paths: tuple[str, ...]) -> Optional[str]:
124129
# - len(paths)*2 Plastic SCM subprocess calls
125130
remote_url = _try_get_any_remote_url(path)
126131
if remote_url:
127-
remote_urls.add(remote_url)
132+
# URLs are already sanitized in _try_get_git_remote_url, but sanitize again as safety measure
133+
sanitized_url = sanitize_repository_url(remote_url)
134+
remote_urls.add(sanitized_url)
128135

129136
if len(remote_urls) == 1:
130137
# we are resolving remote_url only if all paths belong to the same repo (identical remote URLs),

cycode/cli/consts.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
IAC_SCAN_SUPPORTED_FILE_PREFIXES = ('dockerfile', 'containerfile')
1919

2020
CYCODEIGNORE_FILENAME = '.cycodeignore'
21+
CYCODE_ENTRYPOINT_FILENAME = 'entrypoint.cycode'
2122

2223
SECRET_SCAN_FILE_EXTENSIONS_TO_IGNORE = (
2324
'.DS_Store',
@@ -269,6 +270,7 @@
269270

270271
# git consts
271272
COMMIT_DIFF_DELETED_FILE_CHANGE_TYPE = 'D'
273+
COMMIT_RANGE_ALL_COMMITS = '--all'
272274
GIT_HEAD_COMMIT_REV = 'HEAD'
273275
GIT_EMPTY_TREE_OBJECT = '4b825dc642cb6eb9a060e54bf8d69288fbee4904'
274276
EMPTY_COMMIT_SHA = '0000000000000000000000000000000000000000'

cycode/cli/files_collector/commit_range_documents.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -351,10 +351,10 @@ def calculate_pre_push_commit_range(push_update_details: str) -> Optional[str]:
351351
return f'{merge_base}..{local_object_name}'
352352

353353
logger.debug('Failed to find merge base with any default branch')
354-
return '--all'
354+
return consts.COMMIT_RANGE_ALL_COMMITS
355355
except Exception as e:
356356
logger.debug('Failed to get repo for pre-push commit range calculation: %s', exc_info=e)
357-
return '--all'
357+
return consts.COMMIT_RANGE_ALL_COMMITS
358358

359359
# If deleting a branch (local_object_name is all zeros), no need to scan
360360
if local_object_name == consts.EMPTY_COMMIT_SHA:
@@ -448,9 +448,25 @@ def parse_commit_range(commit_range: str, path: str) -> tuple[Optional[str], Opt
448448
- 'commit' (interpreted as 'commit..HEAD')
449449
- '..to' (interpreted as 'HEAD..to')
450450
- 'from..' (interpreted as 'from..HEAD')
451+
- '--all' (interpreted as 'first_commit..HEAD' to scan all commits)
451452
"""
452453
repo = git_proxy.get_repo(path)
453454

455+
# Handle '--all' special case: scan all commits from first to HEAD
456+
# Usually represents an empty remote repository
457+
if commit_range == consts.COMMIT_RANGE_ALL_COMMITS:
458+
try:
459+
head_commit = repo.rev_parse(consts.GIT_HEAD_COMMIT_REV).hexsha
460+
all_commits = repo.git.rev_list('--reverse', head_commit).strip()
461+
if all_commits:
462+
first_commit = all_commits.splitlines()[0]
463+
return first_commit, head_commit, '..'
464+
logger.warning("No commits found for range '%s'", commit_range)
465+
return None, None, None
466+
except Exception as e:
467+
logger.warning("Failed to parse commit range '%s'", commit_range, exc_info=e)
468+
return None, None, None
469+
454470
separator = '..'
455471
if '...' in commit_range:
456472
from_spec, to_spec = commit_range.split('...', 1)

cycode/cli/files_collector/models/in_memory_zip.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,11 @@ def append(self, filename: str, unique_id: Optional[str], content: str) -> None:
2626
if unique_id:
2727
filename = concat_unique_id(filename, unique_id)
2828

29-
self.zip.writestr(filename, content)
29+
# Encode content to bytes with error handling to handle surrogate characters
30+
# that cannot be encoded to UTF-8. Use 'replace' to replace invalid characters
31+
# with the Unicode replacement character (U+FFFD).
32+
content_bytes = content.encode('utf-8', errors='replace')
33+
self.zip.writestr(filename, content_bytes)
3034

3135
def close(self) -> None:
3236
self.zip.close()

cycode/cli/files_collector/sca/maven/restore_maven_dependencies.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def is_project(self, document: Document) -> bool:
2424
return path.basename(document.path).split('/')[-1] == BUILD_MAVEN_FILE_NAME
2525

2626
def get_commands(self, manifest_file_path: str) -> list[list[str]]:
27-
command = ['mvn', 'org.cyclonedx:cyclonedx-maven-plugin:2.7.4:makeAggregateBom', '-f', manifest_file_path]
27+
command = ['mvn', 'org.cyclonedx:cyclonedx-maven-plugin:2.9.1:makeAggregateBom', '-f', manifest_file_path]
2828

2929
maven_settings_file = self.ctx.obj.get('maven_settings_file')
3030
if maven_settings_file:

cycode/cli/printers/tables/table_printer.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from cycode.cli.printers.tables.table_printer_base import TablePrinterBase
99
from cycode.cli.printers.utils import is_git_diff_based_scan
1010
from cycode.cli.printers.utils.detection_ordering.common_ordering import sort_and_group_detections_from_scan_result
11-
from cycode.cli.utils.string_utils import get_position_in_line, obfuscate_text
11+
from cycode.cli.utils.string_utils import get_position_in_line, obfuscate_text, sanitize_text_for_encoding
1212

1313
if TYPE_CHECKING:
1414
from cycode.cli.models import LocalScanResult
@@ -96,6 +96,8 @@ def _enrich_table_with_detection_code_segment_values(
9696
if not self.show_secret:
9797
violation = obfuscate_text(violation)
9898

99+
violation = sanitize_text_for_encoding(violation)
100+
99101
table.add_cell(LINE_NUMBER_COLUMN, str(detection_line))
100102
table.add_cell(COLUMN_NUMBER_COLUMN, str(detection_column))
101103
table.add_cell(VIOLATION_LENGTH_COLUMN, f'{violation_length} chars')

cycode/cli/printers/utils/code_snippet_syntax.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from cycode.cli import consts
66
from cycode.cli.console import _SYNTAX_HIGHLIGHT_THEME
77
from cycode.cli.printers.utils import is_git_diff_based_scan
8-
from cycode.cli.utils.string_utils import get_position_in_line, obfuscate_text
8+
from cycode.cli.utils.string_utils import get_position_in_line, obfuscate_text, sanitize_text_for_encoding
99

1010
if TYPE_CHECKING:
1111
from cycode.cli.models import Document
@@ -72,6 +72,7 @@ def _get_code_snippet_syntax_from_file(
7272
code_lines_to_render.append(line_content)
7373

7474
code_to_render = '\n'.join(code_lines_to_render)
75+
code_to_render = sanitize_text_for_encoding(code_to_render)
7576
return _get_syntax_highlighted_code(
7677
code=code_to_render,
7778
lexer=Syntax.guess_lexer(document.path, code=code_to_render),
@@ -94,6 +95,7 @@ def _get_code_snippet_syntax_from_git_diff(
9495
violation = line_content[detection_position_in_line : detection_position_in_line + violation_length]
9596
line_content = line_content.replace(violation, obfuscate_text(violation))
9697

98+
line_content = sanitize_text_for_encoding(line_content)
9799
return _get_syntax_highlighted_code(
98100
code=line_content,
99101
lexer='diff',

cycode/cli/printers/utils/rich_helpers.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from rich.panel import Panel
66

77
from cycode.cli.console import console
8+
from cycode.cli.utils.string_utils import sanitize_text_for_encoding
89

910
if TYPE_CHECKING:
1011
from rich.console import RenderableType
@@ -20,8 +21,9 @@ def get_panel(renderable: 'RenderableType', title: str) -> Panel:
2021

2122

2223
def get_markdown_panel(markdown_text: str, title: str) -> Panel:
24+
sanitized_text = sanitize_text_for_encoding(markdown_text.strip())
2325
return get_panel(
24-
Markdown(markdown_text.strip()),
26+
Markdown(sanitized_text),
2527
title=title,
2628
)
2729

0 commit comments

Comments
 (0)