diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 961f501..c30f40a 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -1,7 +1,10 @@ name: check on: - push: pull_request: + branches: [main] + pull_request_target: + types: [opened, synchronize, reopened] + branches: [main] workflow_dispatch: jobs: @@ -15,9 +18,9 @@ jobs: with: version: latest - - uses: golangci/golangci-lint-action@v4 + uses: golangci/golangci-lint-action@v7 with: - version: latest + version: v2.2.0 - run: go test ./... shell: bash diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 34680c7..c6c2ca4 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -4,25 +4,27 @@ on: - '*' name: "Create release" + +permissions: + contents: write + jobs: goreleaser: runs-on: ubuntu-latest steps: - - - name: Checkout + - name: Checkout uses: actions/checkout@v4 with: fetch-depth: 0 - - - uses: kevincobain2000/action-gobrew@v2 + - name: Set up Go + uses: actions/setup-go@v5 with: - version: latest - - - name: Run GoReleaser - uses: goreleaser/goreleaser-action@v5 + go-version: stable + - name: Run GoReleaser + uses: goreleaser/goreleaser-action@v6 with: distribution: goreleaser - version: latest + version: "~> v2" args: release --clean env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/.golangci.yaml b/.golangci.yaml index fe58133..7614753 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -1,24 +1,36 @@ +version: "2" linters: - # Disable all linters. - # Default: false - disable-all: true - # Enable specific linter - # https://golangci-lint.run/usage/linters/#enabled-by-default + default: none enable: - - errcheck - - gosimple - - govet - - ineffassign - - staticcheck - dupl + - errcheck - errorlint - - exportloopref - goconst - gocritic - gocyclo - goprintffuncname - gosec + - govet + - ineffassign - prealloc - revive - - stylecheck + - staticcheck - whitespace + exclusions: + generated: lax + presets: + - comments + - common-false-positives + - legacy + - std-error-handling + paths: + - third_party$ + - builtin$ + - examples$ +formatters: + exclusions: + generated: lax + paths: + - third_party$ + - builtin$ + - examples$ diff --git a/.goreleaser.yaml b/.goreleaser.yaml index 60ba558..c4cb37d 100644 --- a/.goreleaser.yaml +++ b/.goreleaser.yaml @@ -1,9 +1,11 @@ +version: 2 + before: hooks: - go mod tidy + builds: - - - env: + - env: - CGO_ENABLED=0 goos: - linux @@ -12,6 +14,11 @@ builds: goarch: - amd64 - arm64 + ldflags: + - -s -w + - -X main.version={{.Version}} + - -X main.commit={{.Commit}} + - -X main.date={{.Date}} archives: - format: binary diff --git a/cache_example.go b/cache_example.go new file mode 100644 index 0000000..495763e --- /dev/null +++ b/cache_example.go @@ -0,0 +1,71 @@ +package main + +import ( + "fmt" + "time" +) + +// Example of how to use the improved directory cache with performance monitoring +func ExampleCacheUsage() { + // Create custom cache configuration + cacheConfig := &CacheConfig{ + TTL: 2 * time.Minute, // Longer TTL for better performance + CleanupInterval: 1 * time.Minute, // More frequent cleanup + MaxEntries: 500, // Smaller cache for this example + EnablePeriodicCleanup: true, + } + + // Create cache with custom configuration + fs := &DefaultFileSystem{} + cache := NewDirCache(cacheConfig, fs) + defer cache.Close() // Important: close to stop cleanup goroutine + + // Test some directory checks + testDirs := []string{ + "/tmp", + "/etc", + "/var", + "/usr/bin", + "/nonexistent/dir", + } + + fmt.Println("Testing directory cache performance...") + + // First round - cache misses + start := time.Now() + for _, dir := range testDirs { + exists := cache.IsDirectoryNotEmpty(dir) + fmt.Printf("Directory %s exists and not empty: %v\n", dir, exists) + } + firstRoundTime := time.Since(start) + + // Second round - cache hits (should be much faster) + start = time.Now() + for _, dir := range testDirs { + exists := cache.IsDirectoryNotEmpty(dir) + fmt.Printf("Directory %s exists and not empty: %v (cached)\n", dir, exists) + } + secondRoundTime := time.Since(start) + + // Get and display cache statistics + stats := cache.GetStats() + fmt.Printf("\n=== Cache Performance Statistics ===\n") + fmt.Printf("Cache Hits: %d\n", stats.Hits) + fmt.Printf("Cache Misses: %d\n", stats.Misses) + fmt.Printf("Hit Ratio: %.2f%%\n", stats.HitRatio()*100) + fmt.Printf("Total Entries: %d\n", stats.TotalSize) + fmt.Printf("Evictions: %d\n", stats.Evictions) + + fmt.Printf("\n=== Performance Improvement ===\n") + fmt.Printf("First round (cache misses): %v\n", firstRoundTime) + fmt.Printf("Second round (cache hits): %v\n", secondRoundTime) + if secondRoundTime.Nanoseconds() > 0 { + speedup := float64(firstRoundTime.Nanoseconds()) / float64(secondRoundTime.Nanoseconds()) + fmt.Printf("Speedup: %.1fx faster with cache\n", speedup) + } +} + +// Uncomment to run the example: +// func main() { +// ExampleCacheUsage() +// } \ No newline at end of file diff --git a/main.go b/main.go index 72921f2..3e671fe 100644 --- a/main.go +++ b/main.go @@ -1,38 +1,939 @@ package main import ( + "context" + "errors" "fmt" - "log" + "net/url" "os" "os/exec" + "os/signal" "path/filepath" "regexp" + "runtime" + "sort" "strings" + "sync" + "sync/atomic" + "syscall" + "time" "github.com/spf13/pflag" ) +// Dependencies interfaces for better testability and modularity + +// FileSystem abstracts file system operations for better testability +type FileSystem interface { + Open(name string) (*os.File, error) + Stat(name string) (os.FileInfo, error) + MkdirAll(path string, perm os.FileMode) error + UserHomeDir() (string, error) + Getenv(key string) string +} + +// CommandRunner abstracts command execution for better testability +type CommandRunner interface { + LookPath(file string) (string, error) + Run(ctx context.Context, name string, args ...string) error + RunWithOutput(ctx context.Context, name string, args ...string) ([]byte, error) +} + +// GitCloner abstracts git cloning operations for better testability +type GitCloner interface { + Clone(ctx context.Context, repository, targetDir string, quiet, shallow bool) error +} + +// DirectoryChecker abstracts directory existence checking for better testability +type DirectoryChecker interface { + IsNotEmpty(name string) bool +} + +// Environment abstracts environment operations +type Environment interface { + UserHomeDir() (string, error) + Getenv(key string) string +} + +// Dependencies holds all external dependencies for the application +type Dependencies struct { + FS FileSystem + CmdRun CommandRunner + GitClone GitCloner + DirCheck DirectoryChecker + Env Environment +} + +// Default implementations for production use + +// DefaultFileSystem provides real file system operations +type DefaultFileSystem struct{} + +func (fs *DefaultFileSystem) Open(name string) (*os.File, error) { + return os.Open(name) +} + +func (fs *DefaultFileSystem) Stat(name string) (os.FileInfo, error) { + return os.Stat(name) +} + +func (fs *DefaultFileSystem) MkdirAll(path string, perm os.FileMode) error { + return os.MkdirAll(path, perm) +} + +func (fs *DefaultFileSystem) UserHomeDir() (string, error) { + return os.UserHomeDir() +} + +func (fs *DefaultFileSystem) Getenv(key string) string { + return os.Getenv(key) +} + +// DefaultCommandRunner provides real command execution +type DefaultCommandRunner struct{} + +func (cr *DefaultCommandRunner) LookPath(file string) (string, error) { + return exec.LookPath(file) +} + +func (cr *DefaultCommandRunner) Run(ctx context.Context, name string, args ...string) error { + cmd := exec.CommandContext(ctx, name, args...) + return cmd.Run() +} + +func (cr *DefaultCommandRunner) RunWithOutput(ctx context.Context, name string, args ...string) ([]byte, error) { + cmd := exec.CommandContext(ctx, name, args...) + return cmd.Output() +} + +// DefaultGitCloner provides real git cloning functionality +type DefaultGitCloner struct{} + +func (gc *DefaultGitCloner) Clone(_ context.Context, repository, targetDir string, quiet, shallow bool) error { + return secureGitClone(repository, targetDir, quiet, shallow) +} + +// DefaultDirectoryChecker provides real directory checking functionality +type DefaultDirectoryChecker struct { + cache *DirCache +} + +func NewDefaultDirectoryChecker(fs FileSystem) *DefaultDirectoryChecker { + return &DefaultDirectoryChecker{ + cache: NewDirCache(DefaultCacheConfig(), fs), + } +} + +func NewDirectoryCheckerWithConfig(fs FileSystem, config *CacheConfig) *DefaultDirectoryChecker { + return &DefaultDirectoryChecker{ + cache: NewDirCache(config, fs), + } +} + +func (dc *DefaultDirectoryChecker) IsNotEmpty(name string) bool { + return dc.cache.IsDirectoryNotEmpty(name) +} + +// DefaultEnvironment provides real environment operations +type DefaultEnvironment struct{} + +func (env *DefaultEnvironment) UserHomeDir() (string, error) { + return os.UserHomeDir() +} + +func (env *DefaultEnvironment) Getenv(key string) string { + return os.Getenv(key) +} + +// NewDefaultDependencies creates a new Dependencies instance with default implementations +func NewDefaultDependencies() *Dependencies { + fs := &DefaultFileSystem{} + return &Dependencies{ + FS: fs, + CmdRun: &DefaultCommandRunner{}, + GitClone: &DefaultGitCloner{}, + DirCheck: NewDefaultDirectoryChecker(fs), + Env: &DefaultEnvironment{}, + } +} + +// NewDependenciesWithCacheConfig creates a new Dependencies instance with custom cache configuration +func NewDependenciesWithCacheConfig(cacheConfig *CacheConfig) *Dependencies { + fs := &DefaultFileSystem{} + return &Dependencies{ + FS: fs, + CmdRun: &DefaultCommandRunner{}, + GitClone: &DefaultGitCloner{}, + DirCheck: NewDirectoryCheckerWithConfig(fs, cacheConfig), + Env: &DefaultEnvironment{}, + } +} + var ( - version = "0.3.4" + version = "dev" commit = "none" date = "unknown" ) -var r = regexp.MustCompile(`^(?:.*://)?(?:[^@]+@)?([^:/]+)(?::\d+)?[/:]?(.*)$`) +// Config holds the configuration for the application +type Config struct { + ShowCommandHelp bool + ShowVersionInfo bool + Quiet bool + ShallowClone bool + Workers int + RepositoryArgs []string + Dependencies *Dependencies + CacheConfig *CacheConfig +} -func main() { - var showCommandHelp, showVersionInfo, quiet bool - pflag.BoolVarP(&showCommandHelp, "help", "h", false, "Show this help message and exit") - pflag.BoolVarP(&showVersionInfo, "version", "v", false, "Show the version number and exit") - pflag.BoolVarP(&quiet, "quiet", "q", false, "Suppress output") +// ProcessingResult holds the result of repository processing +type ProcessingResult struct { + LastSuccessfulProjectDir string + ProcessedCount int + FailedCount int +} + +// RepositoryJob represents a job for cloning a repository +type RepositoryJob struct { + Repository string + Index int // Original position in the arguments list +} + +// WorkerResult represents the result of processing a repository job +type WorkerResult struct { + Job RepositoryJob + ProjectDir string + Success bool + Error error +} + +// WorkerPool manages parallel repository cloning +type WorkerPool struct { + config *Config + jobs chan RepositoryJob + results chan WorkerResult + done chan struct{} + shutdown chan struct{} + workerCount int32 // Track active workers for graceful shutdown +} + +// NewWorkerPool creates a new worker pool for parallel repository cloning +func NewWorkerPool(config *Config) *WorkerPool { + return &WorkerPool{ + config: config, + jobs: make(chan RepositoryJob, len(config.RepositoryArgs)), + results: make(chan WorkerResult, len(config.RepositoryArgs)), + done: make(chan struct{}), + shutdown: make(chan struct{}), + } +} + +// worker is the worker goroutine that processes repository cloning jobs +func (wp *WorkerPool) worker(workerID int) { + atomic.AddInt32(&wp.workerCount, 1) + defer atomic.AddInt32(&wp.workerCount, -1) + + for { + select { + case job, ok := <-wp.jobs: + if !ok { + return // Jobs channel is closed + } + wp.processJob(job, workerID) + case <-wp.shutdown: + return // Shutdown requested + } + } +} + +// processJob processes a single repository cloning job +func (wp *WorkerPool) processJob(job RepositoryJob, _ int) { + result := WorkerResult{ + Job: job, + Success: false, + } + + // Security: Validate repository URL before processing + if err := validateRepositoryURL(job.Repository); err != nil { + result.Error = fmt.Errorf("invalid repository URL '%s': %w", job.Repository, err) + wp.results <- result + return + } + + projectDir, err := getProjectDir(job.Repository, wp.config.Dependencies.Env) + if err != nil { + result.Error = fmt.Errorf("failed to determine project directory for '%s': %w", job.Repository, err) + wp.results <- result + return + } + + // Set project directory in result for potential success case + result.ProjectDir = projectDir + + // Check if directory already exists and is not empty + if wp.config.Dependencies.DirCheck.IsNotEmpty(projectDir) { + if !wp.config.Quiet { + // Thread-safe output using stderr + fmt.Fprintf(os.Stderr, "repository already exists: %s\n", projectDir) + } + result.Success = true + wp.results <- result + return + } + + // Create parent directory + if err := wp.config.Dependencies.FS.MkdirAll(filepath.Dir(projectDir), 0750); err != nil { + result.Error = fmt.Errorf("failed create directory: %w", err) + wp.results <- result + return + } + + // Security: Use secure git clone with validated arguments + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) + defer cancel() + + if err := wp.config.Dependencies.GitClone.Clone(ctx, job.Repository, filepath.Dir(projectDir), wp.config.Quiet, wp.config.ShallowClone); err != nil { + result.Error = fmt.Errorf("failed clone repository '%s': %w", job.Repository, err) + wp.results <- result + return + } + + // Clone was successful + result.Success = true + if !wp.config.Quiet { + fmt.Fprintln(os.Stderr) + } + wp.results <- result +} + +// Start starts the worker pool and processes all repositories +func (wp *WorkerPool) Start() *ProcessingResult { + // Start workers + for i := 0; i < wp.config.Workers; i++ { + go wp.worker(i) + } + + // Send jobs to workers + go func() { + defer close(wp.jobs) + for i, repository := range wp.config.RepositoryArgs { + job := RepositoryJob{ + Repository: strings.TrimSpace(repository), + Index: i, + } + select { + case wp.jobs <- job: + case <-wp.shutdown: + return // Shutdown requested, stop sending jobs + } + } + }() + + // Collect results + result := &ProcessingResult{} + processedCount := 0 + expectedJobs := len(wp.config.RepositoryArgs) + + for processedCount < expectedJobs { + select { + case workerResult := <-wp.results: + result.ProcessedCount++ + processedCount++ + + if workerResult.Success { + result.LastSuccessfulProjectDir = workerResult.ProjectDir + } else { + result.FailedCount++ + if workerResult.Error != nil { + prnt(workerResult.Error.Error()) + } + } + case <-wp.shutdown: + // Graceful shutdown requested + wp.gracefulShutdown() + // Continue collecting results for already started jobs + for processedCount < result.ProcessedCount { + workerResult := <-wp.results + processedCount++ + if !workerResult.Success { + result.FailedCount++ + } + } + return result + } + } + + // Signal completion and wait for workers to finish + close(wp.done) + wp.waitForWorkers() + + return result +} + +// gracefulShutdown signals all workers to shut down +func (wp *WorkerPool) gracefulShutdown() { + close(wp.shutdown) +} + +// waitForWorkers waits for all workers to finish gracefully +func (wp *WorkerPool) waitForWorkers() { + // Wait with timeout to avoid hanging indefinitely + timeout := time.After(30 * time.Second) + ticker := time.NewTicker(100 * time.Millisecond) + defer ticker.Stop() + + for { + select { + case <-ticker.C: + if atomic.LoadInt32(&wp.workerCount) == 0 { + return // All workers have finished + } + case <-timeout: + // Force shutdown after timeout + return + } + } +} + +// StartWithSignalHandling starts the worker pool with signal handling for graceful shutdown +func (wp *WorkerPool) StartWithSignalHandling() *ProcessingResult { + // Set up signal handling + signalChan := make(chan os.Signal, 1) + signal.Notify(signalChan, syscall.SIGINT, syscall.SIGTERM) + + // Start worker pool in a goroutine + resultChan := make(chan *ProcessingResult, 1) + go func() { + resultChan <- wp.Start() + }() + + // Wait for either completion or signal + select { + case result := <-resultChan: + // Normal completion + signal.Stop(signalChan) + return result + case sig := <-signalChan: + // Signal received, initiate graceful shutdown + if !wp.config.Quiet { + fmt.Fprintf(os.Stderr, "\nReceived signal %v, initiating graceful shutdown...\n", sig) + } + wp.gracefulShutdown() + + // Wait for result with timeout + select { + case result := <-resultChan: + if !wp.config.Quiet { + fmt.Fprintf(os.Stderr, "Graceful shutdown completed.\n") + } + return result + case <-time.After(45 * time.Second): + // Force shutdown if graceful shutdown takes too long + if !wp.config.Quiet { + fmt.Fprintf(os.Stderr, "Graceful shutdown timeout, forcing exit.\n") + } + os.Exit(1) + return nil // Never reached + } + } +} + +// getDefaultWorkers returns the default number of workers based on CPU count +func getDefaultWorkers() int { + cpuCount := runtime.NumCPU() + if cpuCount > 4 { + return 4 + } + return cpuCount +} + +// RegexType represents different types of repository URL patterns +type RegexType int + +const ( + RegexHTTPS RegexType = iota + RegexSSH + RegexGit + RegexGeneric +) + +// Specialized regex pools for different URL patterns +var ( + // HTTPS URLs (https://github.com/user/repo.git) + httpsRegexPool = sync.Pool{ + New: func() any { + return regexp.MustCompile(`^https?://([^/]+)/(.+?)(?:\.git)?/?$`) + }, + } + + // SSH URLs (git@github.com:user/repo.git) + sshRegexPool = sync.Pool{ + New: func() any { + return regexp.MustCompile(`^(?:ssh://)?([^@]+)@([^:]+):(.+?)(?:\.git)?/?$`) + }, + } + + // Git protocol URLs (git://github.com/user/repo.git) + gitRegexPool = sync.Pool{ + New: func() any { + return regexp.MustCompile(`^git://([^/]+)/(.+?)(?:\.git)?/?$`) + }, + } + + // Generic fallback regex pool (original pattern) + genericRegexPool = sync.Pool{ + New: func() any { + return regexp.MustCompile(`^(?:.*://)?(?:[^@]+@)?([^:/]+)(?::\d+)?[/:]?(.*)$`) + }, + } +) + +// RegexPoolStats tracks usage statistics for regex pools +type RegexPoolStats struct { + HTTPSUsage int64 + SSHUsage int64 + GitUsage int64 + GenericUsage int64 + CacheHits int64 + CacheMisses int64 + mutex sync.RWMutex +} + +var regexStats = &RegexPoolStats{} + +// GetRegexStats returns current regex usage statistics +func GetRegexStats() RegexPoolStats { + regexStats.mutex.RLock() + defer regexStats.mutex.RUnlock() + // Return a copy to avoid returning the mutex + return RegexPoolStats{ + HTTPSUsage: regexStats.HTTPSUsage, + SSHUsage: regexStats.SSHUsage, + GitUsage: regexStats.GitUsage, + GenericUsage: regexStats.GenericUsage, + CacheHits: regexStats.CacheHits, + CacheMisses: regexStats.CacheMisses, + // Don't copy the mutex + } +} + +// incrementUsage atomically increments usage counter for specific regex type +func (stats *RegexPoolStats) incrementUsage(regexType RegexType) { + stats.mutex.Lock() + defer stats.mutex.Unlock() + + switch regexType { + case RegexHTTPS: + stats.HTTPSUsage++ + case RegexSSH: + stats.SSHUsage++ + case RegexGit: + stats.GitUsage++ + case RegexGeneric: + stats.GenericUsage++ + } +} + +// incrementCacheHit atomically increments cache hit counter +func (stats *RegexPoolStats) incrementCacheHit() { + stats.mutex.Lock() + defer stats.mutex.Unlock() + stats.CacheHits++ +} + +// incrementCacheMiss atomically increments cache miss counter +func (stats *RegexPoolStats) incrementCacheMiss() { + stats.mutex.Lock() + defer stats.mutex.Unlock() + stats.CacheMisses++ +} + +// CacheConfig holds configuration parameters for directory cache +type CacheConfig struct { + TTL time.Duration + CleanupInterval time.Duration + MaxEntries int + EnablePeriodicCleanup bool +} + +// DefaultCacheConfig returns default cache configuration +func DefaultCacheConfig() *CacheConfig { + return &CacheConfig{ + TTL: 60 * time.Second, // Increased from 30s to 1 minute + CleanupInterval: 5 * time.Minute, + MaxEntries: 1000, + EnablePeriodicCleanup: true, + } +} + +type cacheEntry struct { + exists bool + timestamp time.Time + lastAccess time.Time +} + +// CacheStats holds statistics about cache performance +type CacheStats struct { + Hits int64 + Misses int64 + Evictions int64 + TotalSize int64 +} + +// CachePerformance provides cache performance metrics +func (stats *CacheStats) HitRatio() float64 { + total := stats.Hits + stats.Misses + if total == 0 { + return 0.0 + } + return float64(stats.Hits) / float64(total) +} + +type DirCache struct { + cache map[string]cacheEntry + mutex sync.RWMutex + config *CacheConfig + fs FileSystem + stats CacheStats + stopCleanup chan struct{} + cleanupOnce sync.Once +} + +var dirCache = NewDirCache(DefaultCacheConfig(), &DefaultFileSystem{}) + +// Security validation functions + +var ( + // Allowed URL schemes for git repositories + allowedSchemes = map[string]bool{ + "https": true, + "http": true, + "ssh": true, + "git": true, + } + + // Dangerous characters that could be used for command injection + dangerousChars = regexp.MustCompile(`[;&|$\x60<>(){}[\]!*?]`) + + // Valid hostname pattern - more restrictive than RFC but safer + validHostname = regexp.MustCompile(`^[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?)*$`) + + // Valid path characters for Git repositories + validRepoPath = regexp.MustCompile(`^[a-zA-Z0-9._/-]+$`) +) + +// validateRepositoryURL performs comprehensive security validation of repository URLs +func validateRepositoryURL(repo string) error { + if repo == "" { + return errors.New("repository URL cannot be empty") + } + + // Check for dangerous characters that could indicate command injection + if dangerousChars.MatchString(repo) { + return errors.New("repository URL contains dangerous characters") + } + + // Handle SSH URLs like git@github.com:user/repo.git + if strings.Contains(repo, "@") && strings.Contains(repo, ":") && !strings.Contains(repo, "://") { + return validateSSHURL(repo) + } + + // Parse as regular URL + parsedURL, err := url.Parse(repo) + if err != nil { + return fmt.Errorf("invalid URL format: %w", err) + } + + // Validate scheme + if parsedURL.Scheme != "" && !allowedSchemes[parsedURL.Scheme] { + return fmt.Errorf("unsupported URL scheme: %s", parsedURL.Scheme) + } + + // Validate hostname + if parsedURL.Host != "" && !validHostname.MatchString(parsedURL.Host) { + return fmt.Errorf("invalid hostname: %s", parsedURL.Host) + } + + // Validate path for traversal attacks + if err := validatePath(parsedURL.Path); err != nil { + return err + } + + return nil +} + +// validateSSHURL validates SSH-style Git URLs (git@host:path) +func validateSSHURL(repo string) error { + parts := strings.SplitN(repo, "@", 2) + if len(parts) != 2 { + return errors.New("invalid SSH URL format") + } + + hostPath := parts[1] + hostPathParts := strings.SplitN(hostPath, ":", 2) + if len(hostPathParts) != 2 { + return errors.New("invalid SSH URL format - missing colon separator") + } + + host := hostPathParts[0] + path := hostPathParts[1] + + // Validate hostname + if !validHostname.MatchString(host) { + return fmt.Errorf("invalid hostname in SSH URL: %s", host) + } + + // Validate path + if err := validatePath(path); err != nil { + return err + } + + return nil +} + +// validatePath checks for path traversal attacks and invalid characters +func validatePath(path string) error { + if path == "" { + return nil // Empty path is acceptable + } + + // Check for path traversal attempts + if strings.Contains(path, "..") { + return errors.New("path traversal detected in URL") + } + + // Check for absolute paths that could escape intended directory + if len(path) > 0 && path[0] == '/' { + // Remove leading slash for validation but allow it + path = path[1:] + } + + // Allow tilde for user directories but validate the rest + if len(path) > 0 && path[0] == '~' { + path = path[1:] + if len(path) > 0 && path[0] == '/' { + path = path[1:] + } + } + + // Validate remaining path characters (allow common Git repo path characters) + if path != "" && !validRepoPath.MatchString(path) { + return fmt.Errorf("invalid characters in repository path: %s", path) + } + + return nil +} + +// secureGitClone performs git clone with additional security measures including timeout +func secureGitClone(repository, targetDir string, quiet, shallow bool) error { + // Double-check validation (defense in depth) + if err := validateRepositoryURL(repository); err != nil { + return fmt.Errorf("security validation failed: %w", err) + } + + // Validate target directory to prevent directory traversal + cleanTargetDir := filepath.Clean(targetDir) + if strings.Contains(cleanTargetDir, "..") { + return errors.New("target directory contains path traversal") + } + + // Create context with timeout to prevent hanging operations + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) + defer cancel() + + // Create command with explicit arguments and timeout context (no shell interpretation) + args := []string{"clone"} + if shallow { + args = append(args, "--depth=1") + } + args = append(args, "--", repository) + cmd := exec.CommandContext(ctx, "git", args...) + + // Set working directory + cmd.Dir = cleanTargetDir + + // Configure output + if !quiet { + cmd.Stdout = os.Stderr + cmd.Stderr = os.Stderr + } + + // Execute with timeout protection + if err := cmd.Run(); err != nil { + if ctx.Err() == context.DeadlineExceeded { + return fmt.Errorf("git clone operation timed out after 10 minutes: %s", repository) + } + return err + } + + return nil +} + +// parseArgs parses command line arguments and returns configuration +func parseArgs() (*Config, error) { + cacheConfig := DefaultCacheConfig() + config := &Config{ + Dependencies: NewDependenciesWithCacheConfig(cacheConfig), + CacheConfig: cacheConfig, + Workers: getDefaultWorkers(), + } + + pflag.BoolVarP(&config.ShowCommandHelp, "help", "h", false, "Show this help message and exit") + pflag.BoolVarP(&config.ShowVersionInfo, "version", "v", false, "Show the version number and exit") + pflag.BoolVarP(&config.Quiet, "quiet", "q", false, "Suppress output") + pflag.BoolVarP(&config.ShallowClone, "shallow", "s", false, "Perform shallow clone with --depth=1") + pflag.IntVarP(&config.Workers, "workers", "w", getDefaultWorkers(), "Number of parallel workers for cloning") pflag.Parse() - if showCommandHelp { + config.RepositoryArgs = pflag.Args() + + // Validate workers count + if config.Workers < 1 { + return nil, errors.New("workers count must be at least 1") + } + if config.Workers > 32 { + return nil, errors.New("workers count cannot exceed 32") + } + + // Validate that we have arguments (unless help or version is requested) + if !config.ShowCommandHelp && !config.ShowVersionInfo && len(config.RepositoryArgs) == 0 { + return nil, errors.New("no repository URLs provided") + } + + // Validate each argument is not empty + for i, arg := range config.RepositoryArgs { + if strings.TrimSpace(arg) == "" { + return nil, fmt.Errorf("argument %d is empty", i+1) + } + } + + return config, nil +} + +// validateDependencies checks if required dependencies are available +func validateDependencies(deps *Dependencies) error { + // Check git availability with better error message + if _, err := deps.CmdRun.LookPath("git"); err != nil { + return errors.New("git command not found in PATH. Please install git: https://git-scm.com/downloads") + } + return nil +} + +// processRepositories processes all repository arguments using worker pool and returns the result +func processRepositories(config *Config) *ProcessingResult { + // For single repository or single worker, use sequential processing to avoid overhead + if len(config.RepositoryArgs) == 1 || config.Workers == 1 { + return processRepositoriesSequential(config) + } + + // Use worker pool for multiple repositories with multiple workers + wp := NewWorkerPool(config) + return wp.StartWithSignalHandling() +} + +// processRepositoriesSequential processes repositories sequentially (fallback for single repo/worker) +func processRepositoriesSequential(config *Config) *ProcessingResult { + result := &ProcessingResult{} + + for _, arg := range config.RepositoryArgs { + repository := strings.TrimSpace(arg) + result.ProcessedCount++ + + // Security: Validate repository URL before processing + if err := validateRepositoryURL(repository); err != nil { + prnt("invalid repository URL '%s': %s", repository, err) + result.FailedCount++ + continue + } + + projectDir, err := getProjectDir(repository, config.Dependencies.Env) + if err != nil { + prnt("failed to determine project directory for '%s': %s", repository, err) + result.FailedCount++ + continue + } + + // Check if directory already exists and is not empty + if ok := config.Dependencies.DirCheck.IsNotEmpty(projectDir); ok { + if !config.Quiet { + prnt("repository already exists: %s", projectDir) + } + // Still consider this successful for output purposes + result.LastSuccessfulProjectDir = projectDir + continue + } + + // Create parent directory + if err := config.Dependencies.FS.MkdirAll(filepath.Dir(projectDir), 0750); err != nil { + prnt("failed create directory: %s", err) + result.FailedCount++ + continue + } + + // Security: Use secure git clone with validated arguments + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) + defer cancel() + if err := config.Dependencies.GitClone.Clone(ctx, repository, filepath.Dir(projectDir), config.Quiet, config.ShallowClone); err != nil { + prnt("failed clone repository '%s': %s", repository, err) + result.FailedCount++ + continue + } + + // Clone was successful + result.LastSuccessfulProjectDir = projectDir + if !config.Quiet { + fmt.Fprintln(os.Stderr) + } + } + + return result +} + +// printSummary prints the final summary and handles output/exit logic +func printSummary(config *Config, result *ProcessingResult) { + // Print summary if multiple repositories were processed + if result.ProcessedCount > 1 && !config.Quiet { + successCount := result.ProcessedCount - result.FailedCount + prnt("processed %d repositories: %d successful, %d failed", + result.ProcessedCount, successCount, result.FailedCount) + } + + // Print the last successfully processed project directory + if result.LastSuccessfulProjectDir != "" { + abs, err := filepath.Abs(result.LastSuccessfulProjectDir) + if err != nil { + prnt("failed to get absolute path for %s: %s", result.LastSuccessfulProjectDir, err) + fmt.Println(result.LastSuccessfulProjectDir) // fallback to relative path + } else { + fmt.Println(abs) + } + } else { + // No successful repositories processed + if !config.Quiet { + prnt("no repositories were successfully processed") + } + os.Exit(1) + } +} + +func main() { + // Parse command line arguments + config, err := parseArgs() + if err != nil { + prnt("error: %s", err) + usage() + os.Exit(1) + } + + // Handle help command + if config.ShowCommandHelp { usage() return } - if showVersionInfo { + // Handle version command + if config.ShowVersionInfo { if commit != "none" { fmt.Printf("gclone version %s, commit %s, built at %s\n", version, commit, date) } else { @@ -41,109 +942,527 @@ func main() { return } - if _, err := exec.LookPath("git"); err != nil { - prnt("git not found") + // Validate dependencies + if err := validateDependencies(config.Dependencies); err != nil { + prnt("error: %s", err) os.Exit(1) } - var projectDir string - for _, arg := range pflag.Args() { - repository := arg - projectDir = getProjectDir(repository) + // Process repositories + result := processRepositories(config) - if ok := isDirectoryNotEmpty(projectDir); ok { - continue + // Print summary and handle exit + printSummary(config, result) +} + +// URLCache provides caching for normalized URLs to avoid repeated parsing +type URLCache struct { + cache map[string]string + mutex sync.RWMutex + maxEntries int +} + +var urlCache = &URLCache{ + cache: make(map[string]string), + maxEntries: 1000, +} + +// Get retrieves a cached normalized URL +func (uc *URLCache) Get(key string) (string, bool) { + uc.mutex.RLock() + defer uc.mutex.RUnlock() + value, exists := uc.cache[key] + return value, exists +} + +// Set stores a normalized URL in cache +func (uc *URLCache) Set(key, value string) { + uc.mutex.Lock() + defer uc.mutex.Unlock() + + // Simple eviction strategy: clear cache when full + if len(uc.cache) >= uc.maxEntries { + uc.cache = make(map[string]string) + } + uc.cache[key] = value +} + +// Clear removes all entries from the URLCache for test isolation +func (uc *URLCache) Clear() { + uc.mutex.Lock() + defer uc.mutex.Unlock() + uc.cache = make(map[string]string) +} + +// detectRegexType determines the best regex pattern for the given URL +func detectRegexType(repo string) RegexType { + if len(repo) > 8 && (repo[:7] == "https://" || repo[:7] == "http://") { + return RegexHTTPS + } + if strings.Contains(repo, "@") && strings.Contains(repo, ":") && !strings.Contains(repo, "://") { + return RegexSSH + } + if len(repo) > 6 && repo[:6] == "git://" { + return RegexGit + } + return RegexGeneric +} + +// getRegexFromPool gets the appropriate regex from pool based on URL type +func getRegexFromPool(regexType RegexType) (*regexp.Regexp, func(*regexp.Regexp)) { + switch regexType { + case RegexHTTPS: + r := httpsRegexPool.Get().(*regexp.Regexp) + return r, func(regex *regexp.Regexp) { httpsRegexPool.Put(regex) } + case RegexSSH: + r := sshRegexPool.Get().(*regexp.Regexp) + return r, func(regex *regexp.Regexp) { sshRegexPool.Put(regex) } + case RegexGit: + r := gitRegexPool.Get().(*regexp.Regexp) + return r, func(regex *regexp.Regexp) { gitRegexPool.Put(regex) } + default: + r := genericRegexPool.Get().(*regexp.Regexp) + return r, func(regex *regexp.Regexp) { genericRegexPool.Put(regex) } + } +} + +// normalize normalizes the given repository string and returns the parsed repository URL. +// Enhanced with security validation, specialized regex patterns, and caching. +// Returns error instead of empty string for better error handling. +func normalize(repo string) (string, error) { + if repo == "" { + return "", errors.New("repository URL is empty") + } + + // Check cache first + if cached, exists := urlCache.Get(repo); exists { + regexStats.incrementCacheHit() + return cached, nil + } + regexStats.incrementCacheMiss() + + // Detect the best regex pattern for this URL + regexType := detectRegexType(repo) + regexStats.incrementUsage(regexType) + + // Get appropriate regex from pool + r, putBack := getRegexFromPool(regexType) + defer putBack(r) + + var host, path string + + // Handle different URL patterns + switch regexType { + case RegexHTTPS, RegexGit: + match := r.FindStringSubmatch(repo) + if len(match) != 3 { + return "", errors.New("failed to parse HTTPS/Git repository URL format") + } + host, path = match[1], match[2] + + case RegexSSH: + match := r.FindStringSubmatch(repo) + if len(match) != 4 { + return "", errors.New("failed to parse SSH repository URL format") } + host, path = match[2], match[3] + + default: // RegexGeneric + match := r.FindStringSubmatch(repo) + if len(match) != 3 { + return "", errors.New("failed to parse repository URL format") + } + host, path = match[1], match[2] + } - if err := os.MkdirAll(filepath.Dir(projectDir), 0750); err != nil { - prnt("failed create directory: %s", err) + // Security: Validate host component + if !validHostname.MatchString(host) { + return "", fmt.Errorf("invalid hostname: %s", host) + } + + // Security: Sanitize path to prevent traversal attacks + sanitizedPath, err := sanitizePathWithError(path) + if err != nil { + return "", fmt.Errorf("invalid repository path: %w", err) + } + + // Security: Validate final path doesn't contain dangerous patterns + if strings.Contains(sanitizedPath, "..") || strings.Contains(sanitizedPath, "//") { + return "", errors.New("repository path contains dangerous patterns") + } + + result := filepath.Join(host, sanitizedPath) + + // Cache the result + urlCache.Set(repo, result) + + return result, nil +} + +// sanitizePathWithError cleans and validates repository paths against security threats +// Returns error for better error handling instead of empty string +func sanitizePathWithError(path string) (string, error) { + if path == "" { + return "", nil // Empty path is acceptable + } + + originalPath := path + + // Remove dangerous prefixes and suffixes with optimized string operations + // Use string slicing to avoid multiple allocations + for { + originalLen := len(path) + + // Remove leading slashes and tildes + if len(path) > 0 && (path[0] == '/' || path[0] == '~') { + path = path[1:] continue } - - cmd := exec.Command("git", "clone", repository) - if !quiet { - cmd.Stdout = os.Stderr - cmd.Stderr = os.Stderr + + // Remove trailing slashes + if len(path) > 0 && path[len(path)-1] == '/' { + path = path[:len(path)-1] + continue } - cmd.Dir = filepath.Dir(projectDir) - if err := cmd.Run(); err != nil { - prnt("failed clone repository: %s", err) + + // Remove .git suffix + if len(path) >= 4 && path[len(path)-4:] == ".git" { + path = path[:len(path)-4] continue } - if !quiet { - fmt.Fprintln(os.Stderr) + + // If no changes were made, break + if len(path) == originalLen { + break } } - // Print latest project directory - abs, _ := filepath.Abs(projectDir) - fmt.Println(abs) + // Security: Check for path traversal attempts + if strings.Contains(path, "..") { + return "", fmt.Errorf("path traversal detected: %s", originalPath) + } + + // Security: Remove consecutive slashes + for strings.Contains(path, "//") { + path = strings.ReplaceAll(path, "//", "/") + } + + // Security: Validate path contains only safe characters + if path != "" && !validRepoPath.MatchString(path) { + return "", fmt.Errorf("path contains invalid characters: %s", originalPath) + } + + // Security: Ensure path doesn't start with dangerous patterns + if len(path) >= 2 { + firstChar := path[0] + if (firstChar == '.' || firstChar == '-' || firstChar == '_') && path[1] == '/' { + return "", fmt.Errorf("path starts with dangerous pattern '%c/': %s", firstChar, originalPath) + } + } + + return path, nil } -// normalize normalizes the given repository string and returns the parsed repository URL. -func normalize(repo string) string { - match := r.FindStringSubmatch(repo) - if len(match) != 3 { +// sanitizePath cleans and validates repository paths against security threats +func sanitizePath(path string) string { + if path == "" { return "" } - path := match[2] - path = strings.TrimPrefix(path, "/") - path = strings.TrimPrefix(path, "~") - path = strings.TrimPrefix(path, "/") - path = strings.TrimSuffix(path, "/") - path = strings.TrimSuffix(path, ".git") - return filepath.Join(match[1], path) + // Remove dangerous prefixes and suffixes with optimized string operations + // Use string slicing to avoid multiple allocations + for { + originalLen := len(path) + + // Remove leading slashes and tildes + if len(path) > 0 && (path[0] == '/' || path[0] == '~') { + path = path[1:] + continue + } + + // Remove trailing slashes + if len(path) > 0 && path[len(path)-1] == '/' { + path = path[:len(path)-1] + continue + } + + // Remove .git suffix + if len(path) >= 4 && path[len(path)-4:] == ".git" { + path = path[:len(path)-4] + continue + } + + // If no changes were made, break + if len(path) == originalLen { + break + } + } + + // Security: Check for path traversal attempts + if strings.Contains(path, "..") { + return "" + } + + // Security: Remove consecutive slashes + for strings.Contains(path, "//") { + path = strings.ReplaceAll(path, "//", "/") + } + + // Security: Validate path contains only safe characters + if path != "" && !validRepoPath.MatchString(path) { + return "" + } + + // Security: Ensure path doesn't start with dangerous patterns + if len(path) >= 2 { + firstChar := path[0] + if (firstChar == '.' || firstChar == '-' || firstChar == '_') && path[1] == '/' { + return "" + } + } + + return path } // getProjectDir returns the project directory based on the given repository URL. // It retrieves the GIT_PROJECT_DIR environment variable and normalizes it. // If the GIT_PROJECT_DIR starts with "~", it replaces it with the user's home directory. // The normalized repository URL is then joined with the GIT_PROJECT_DIR to form the project directory path. -// The project directory path is returned as a string. -func getProjectDir(repository string) string { - gitProjectDir := os.Getenv("GIT_PROJECT_DIR") - if strings.HasPrefix(gitProjectDir, "~") { - homeDir, err := os.UserHomeDir() +// Returns error for better error handling instead of empty string. +func getProjectDir(repository string, env Environment) (string, error) { + gitProjectDir := env.Getenv("GIT_PROJECT_DIR") + + if len(gitProjectDir) > 0 && gitProjectDir[0] == '~' { + homeDir, err := env.UserHomeDir() if err != nil { - log.Fatal(err) + return "", fmt.Errorf("failed to get user home directory: %w", err) } gitProjectDir = filepath.Join(homeDir, gitProjectDir[1:]) } - return filepath.Join(gitProjectDir, normalize(repository)) + normalizedRepo, err := normalize(repository) + if err != nil { + return "", fmt.Errorf("failed to normalize repository URL: %w", err) + } + + // Security: Validate and clean the final path + projectDir := filepath.Join(gitProjectDir, normalizedRepo) + cleanedPath := filepath.Clean(projectDir) + + // Security: Ensure the path doesn't escape the base directory + if gitProjectDir != "" { + cleanGitProjectDir := filepath.Clean(gitProjectDir) + if len(cleanedPath) < len(cleanGitProjectDir) || cleanedPath[:len(cleanGitProjectDir)] != cleanGitProjectDir { + return "", errors.New("security: path traversal detected in project directory") + } + } + + return cleanedPath, nil } -// isDirectoryNotEmpty checks if the specified directory is not empty. +// isDirectoryNotEmptyRaw checks if the specified directory is not empty without caching. // It uses the Readdirnames function to get the directory contents without loading full FileInfo // structures for each entry. If there are any entries, it returns true. Otherwise, it returns false. -func isDirectoryNotEmpty(name string) bool { - f, err := os.Open(name) +func isDirectoryNotEmptyRaw(name string, fs FileSystem) bool { + f, err := fs.Open(name) if err != nil { return false } - defer f.Close() - _, err = f.Readdirnames(1) - return err == nil + names, err := f.Readdirnames(1) + f.Close() // Direct call without defer for better performance + + return err == nil && len(names) > 0 +} + +// NewDirCache creates a new directory cache with the given configuration +func NewDirCache(config *CacheConfig, fs FileSystem) *DirCache { + if config == nil { + config = DefaultCacheConfig() + } + + dc := &DirCache{ + cache: make(map[string]cacheEntry), + config: config, + fs: fs, + stopCleanup: make(chan struct{}), + } + + // Start periodic cleanup if enabled + if config.EnablePeriodicCleanup { + go dc.startPeriodicCleanup() + } + + return dc +} + +// Close stops the cache cleanup routine +func (dc *DirCache) Close() { + dc.cleanupOnce.Do(func() { + close(dc.stopCleanup) + }) +} + +// startPeriodicCleanup runs periodic cleanup of expired cache entries +func (dc *DirCache) startPeriodicCleanup() { + ticker := time.NewTicker(dc.config.CleanupInterval) + defer ticker.Stop() + + for { + select { + case <-ticker.C: + dc.cleanup() + case <-dc.stopCleanup: + return + } + } +} + +// cleanup removes expired entries from the cache +func (dc *DirCache) cleanup() { + now := time.Now() + dc.mutex.Lock() + defer dc.mutex.Unlock() + + evictionCount := 0 + for key, entry := range dc.cache { + if now.Sub(entry.timestamp) > dc.config.TTL { + delete(dc.cache, key) + evictionCount++ + } + } + + dc.stats.Evictions += int64(evictionCount) + dc.stats.TotalSize = int64(len(dc.cache)) +} + +// GetStats returns current cache statistics +func (dc *DirCache) GetStats() CacheStats { + dc.mutex.RLock() + defer dc.mutex.RUnlock() + + stats := dc.stats + stats.TotalSize = int64(len(dc.cache)) + return stats +} + +// Clear removes all entries from the cache +func (dc *DirCache) Clear() { + dc.mutex.Lock() + defer dc.mutex.Unlock() + + dc.cache = make(map[string]cacheEntry) + dc.stats = CacheStats{} +} + +// IsDirectoryNotEmpty checks if the specified directory is not empty with caching. +func (dc *DirCache) IsDirectoryNotEmpty(name string) bool { + now := time.Now() + + // Try to get from cache first (optimistic read) + dc.mutex.RLock() + if entry, ok := dc.cache[name]; ok { + if now.Sub(entry.timestamp) < dc.config.TTL { + // Cache hit - we need to upgrade to write lock to update lastAccess + dc.mutex.RUnlock() + dc.mutex.Lock() + // Double-check after acquiring write lock (entry might have been evicted) + if entry, ok := dc.cache[name]; ok && now.Sub(entry.timestamp) < dc.config.TTL { + entry.lastAccess = now + dc.cache[name] = entry + dc.stats.Hits++ + dc.mutex.Unlock() + return entry.exists + } + dc.mutex.Unlock() + // Entry was evicted or expired during lock upgrade, fall through to miss handling + } else { + dc.mutex.RUnlock() + } + } else { + dc.mutex.RUnlock() + } + + // Cache miss or expired entry - check directory + exists := isDirectoryNotEmptyRaw(name, dc.fs) + + // Update cache with new entry + dc.mutex.Lock() + dc.cache[name] = cacheEntry{ + exists: exists, + timestamp: now, + lastAccess: now, + } + dc.stats.Misses++ + + // Check if cache size exceeds limit and evict LRU entries if needed + if dc.config.MaxEntries > 0 && len(dc.cache) > dc.config.MaxEntries { + dc.evictLRU() + } + + dc.mutex.Unlock() + + return exists +} + +// evictLRU removes the least recently used entries to stay within MaxEntries limit +func (dc *DirCache) evictLRU() { + // Find entries to evict (remove 10% of cache when limit is exceeded) + targetSize := int(float64(dc.config.MaxEntries) * 0.9) + toEvict := len(dc.cache) - targetSize + + if toEvict <= 0 { + return + } + + // Create slice of entries with their keys for sorting + type entryWithKey struct { + key string + lastAccess time.Time + } + + entries := make([]entryWithKey, 0, len(dc.cache)) + for key, entry := range dc.cache { + entries = append(entries, entryWithKey{key: key, lastAccess: entry.lastAccess}) + } + + // Sort by last access time (oldest first) using efficient sort.Slice + sort.Slice(entries, func(i, j int) bool { + return entries[i].lastAccess.Before(entries[j].lastAccess) + }) + + // Remove oldest entries + for i := 0; i < toEvict && i < len(entries); i++ { + delete(dc.cache, entries[i].key) + dc.stats.Evictions++ + } +} + +// isDirectoryNotEmpty is a wrapper that uses the global cache. +func isDirectoryNotEmpty(name string) bool { + return dirCache.IsDirectoryNotEmpty(name) } // Usage prints the usage of the program. func usage() { - fmt.Println("usage: gclone [-h] [-v] [REPOSITORY]") + fmt.Println("usage: gclone [-h] [-v] [-q] [-s] [-w WORKERS] [REPOSITORY]") fmt.Println() fmt.Println("positional arguments:") - fmt.Println(" REPOSITORY Repository URL") + fmt.Println(" REPOSITORY Repository URL") fmt.Println() fmt.Println("optional arguments:") - fmt.Println(" -h, --help Show this help message and exit") - fmt.Println(" -v, --version Show the version number and exit") + fmt.Println(" -h, --help Show this help message and exit") + fmt.Println(" -v, --version Show the version number and exit") + fmt.Println(" -q, --quiet Suppress output") + fmt.Println(" -s, --shallow Perform shallow clone with --depth=1") + fmt.Printf(" -w, --workers Number of parallel workers (default: %d)\n", getDefaultWorkers()) fmt.Println() fmt.Println("environment variables:") - fmt.Println(" GIT_PROJECT_DIR Directory to clone repositories") + fmt.Println(" GIT_PROJECT_DIR Directory to clone repositories") fmt.Println() - fmt.Println("example:") + fmt.Println("examples:") fmt.Println(" GIT_PROJECT_DIR=\"$HOME/src\" gclone https://github.com/user/repo") + fmt.Println(" gclone -w 8 https://github.com/user/repo1 https://github.com/user/repo2") } func prnt(format string, args ...any) { diff --git a/main_test.go b/main_test.go index 8ad8211..ef7f9cd 100644 --- a/main_test.go +++ b/main_test.go @@ -1,7 +1,13 @@ package main import ( + "context" + "errors" + "fmt" + "os" "path/filepath" + "strings" + "sync" "testing" ) @@ -75,7 +81,13 @@ func Test_getProjectDir(t *testing.T) { t.Setenv("HOME", tt.homeVar) t.Setenv("GIT_PROJECT_DIR", tt.gitProjectDir) - if got := getProjectDir(tt.repository); got != tt.want { + env := &DefaultEnvironment{} + got, err := getProjectDir(tt.repository, env) + if err != nil { + t.Errorf("getProjectDir() unexpected error = %v", err) + return + } + if got != tt.want { t.Errorf("getProjectDir() = %v, want %v", got, tt.want) } }) @@ -163,10 +175,806 @@ func Test_normalize(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - gotRepo := normalize(tt.args.repository) + gotRepo, err := normalize(tt.args.repository) + if err != nil { + t.Errorf("normalize() unexpected error = %v", err) + return + } if gotRepo != tt.wantRepo { - t.Errorf("parse() gotRepo = %v, want %v", gotRepo, tt.wantRepo) + t.Errorf("normalize() gotRepo = %v, want %v", gotRepo, tt.wantRepo) } }) } } + +// Benchmark functions +func BenchmarkNormalize(b *testing.B) { + repository := "https://github.com/user/repo" + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, _ = normalize(repository) + } +} + +func BenchmarkIsDirectoryNotEmpty(b *testing.B) { + // Create test directory + tempDir, err := os.MkdirTemp("", "benchmark-test") + if err != nil { + b.Fatal(err) + } + defer os.RemoveAll(tempDir) + + // Create a non-empty directory + nonEmptyDir := filepath.Join(tempDir, "non-empty") + if err := os.Mkdir(nonEmptyDir, 0755); err != nil { + b.Fatal(err) + } + if err := os.WriteFile(filepath.Join(nonEmptyDir, "test.txt"), []byte("test"), 0600); err != nil { + b.Fatal(err) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + isDirectoryNotEmpty(nonEmptyDir) + } +} + +func BenchmarkIsDirectoryNotEmptyRaw(b *testing.B) { + // Create test directory + tempDir, err := os.MkdirTemp("", "benchmark-test") + if err != nil { + b.Fatal(err) + } + defer os.RemoveAll(tempDir) + + // Create a non-empty directory + nonEmptyDir := filepath.Join(tempDir, "non-empty") + if err := os.Mkdir(nonEmptyDir, 0755); err != nil { + b.Fatal(err) + } + if err := os.WriteFile(filepath.Join(nonEmptyDir, "test.txt"), []byte("test"), 0600); err != nil { + b.Fatal(err) + } + + fs := &DefaultFileSystem{} + b.ResetTimer() + for i := 0; i < b.N; i++ { + isDirectoryNotEmptyRaw(nonEmptyDir, fs) + } +} + +func BenchmarkIsDirectoryNotEmptyCache(b *testing.B) { + // Create test directory + tempDir, err := os.MkdirTemp("", "benchmark-test") + if err != nil { + b.Fatal(err) + } + defer os.RemoveAll(tempDir) + + // Create a non-empty directory + nonEmptyDir := filepath.Join(tempDir, "non-empty") + if err := os.Mkdir(nonEmptyDir, 0755); err != nil { + b.Fatal(err) + } + if err := os.WriteFile(filepath.Join(nonEmptyDir, "test.txt"), []byte("test"), 0600); err != nil { + b.Fatal(err) + } + + // Clear cache before benchmark + dirCache.Clear() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + dirCache.IsDirectoryNotEmpty(nonEmptyDir) // This will benefit from caching after first call + } +} + +// New comprehensive benchmarks for performance measurement + +// BenchmarkNormalizeHTTPS benchmarks HTTPS URL parsing +func BenchmarkNormalizeHTTPS(b *testing.B) { + repository := "https://github.com/user/repo.git" + // Clear cache before benchmark + urlCache.Clear() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, _ = normalize(repository) + } +} + +// BenchmarkNormalizeSSH benchmarks SSH URL parsing +func BenchmarkNormalizeSSH(b *testing.B) { + repository := "git@github.com:user/repo.git" + // Clear cache before benchmark + urlCache.Clear() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, _ = normalize(repository) + } +} + +// BenchmarkNormalizeGit benchmarks Git protocol URL parsing +func BenchmarkNormalizeGit(b *testing.B) { + repository := "git://github.com/user/repo.git" + // Clear cache before benchmark + urlCache.Clear() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, _ = normalize(repository) + } +} + +// BenchmarkNormalizeCached benchmarks cached URL parsing +func BenchmarkNormalizeCached(b *testing.B) { + repository := "https://github.com/user/repo.git" + // Warm up cache + _, _ = normalize(repository) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, _ = normalize(repository) + } +} + +// BenchmarkNormalizeMixed benchmarks mixed URL types +func BenchmarkNormalizeMixed(b *testing.B) { + repositories := []string{ + "https://github.com/user/repo1.git", + "git@github.com:user/repo2.git", + "git://github.com/user/repo3.git", + "https://gitlab.com/user/repo4.git", + "git@gitlab.com:user/repo5.git", + } + // Clear cache before benchmark + urlCache.Clear() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, _ = normalize(repositories[i%len(repositories)]) + } +} + +// BenchmarkSanitizePathOptimized benchmarks optimized path sanitization +func BenchmarkSanitizePathOptimized(b *testing.B) { + path := "///~user//repo//.git///" + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = sanitizePath(path) + } +} + +// BenchmarkDetectRegexType benchmarks regex type detection +func BenchmarkDetectRegexType(b *testing.B) { + urls := []string{ + "https://github.com/user/repo.git", + "git@github.com:user/repo.git", + "git://github.com/user/repo.git", + "github.com/user/repo", + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = detectRegexType(urls[i%len(urls)]) + } +} + +// Security tests +func TestValidateRepositoryURL(t *testing.T) { + tests := []struct { + name string + url string + wantErr bool + errMsg string + }{ + { + name: "valid https URL", + url: "https://github.com/user/repo.git", + wantErr: false, + }, + { + name: "valid SSH URL", + url: "git@github.com:user/repo.git", + wantErr: false, + }, + { + name: "command injection attempt", + url: "https://github.com/user/repo.git; rm -rf /", + wantErr: true, + errMsg: "dangerous characters", + }, + { + name: "path traversal in URL", + url: "https://github.com/../../../etc/passwd", + wantErr: true, + errMsg: "path traversal", + }, + { + name: "invalid scheme", + url: "ftp://github.com/user/repo", + wantErr: true, + errMsg: "unsupported URL scheme", + }, + { + name: "empty URL", + url: "", + wantErr: true, + errMsg: "cannot be empty", + }, + { + name: "backticks for command substitution", + url: "https://github.com/user/`whoami`.git", + wantErr: true, + errMsg: "dangerous characters", + }, + { + name: "pipe character", + url: "https://github.com/user/repo | cat /etc/passwd", + wantErr: true, + errMsg: "dangerous characters", + }, + { + name: "invalid hostname", + url: "https://github..com/user/repo", + wantErr: true, + errMsg: "invalid hostname", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := validateRepositoryURL(tt.url) + if tt.wantErr { + if err == nil { + t.Errorf("validateRepositoryURL() expected error but got none for URL: %s", tt.url) + return + } + if !strings.Contains(err.Error(), tt.errMsg) { + t.Errorf("validateRepositoryURL() error = %v, expected to contain %v", err, tt.errMsg) + } + } else if err != nil { + t.Errorf("validateRepositoryURL() unexpected error = %v for URL: %s", err, tt.url) + } + }) + } +} + +func TestNormalizeSecurity(t *testing.T) { + tests := []struct { + name string + input string + expected string // Empty string means should be rejected + }{ + { + name: "normal repo", + input: "https://github.com/user/repo", + expected: "github.com/user/repo", + }, + { + name: "path traversal attempt", + input: "https://github.com/../../../etc/passwd", + expected: "", // Should be rejected + }, + { + name: "double slash", + input: "https://github.com//user//repo", + expected: "github.com/user/repo", + }, + { + name: "invalid hostname", + input: "https://github..com/user/repo", + expected: "", // Should be rejected + }, + { + name: "dangerous path start with dot", + input: "https://github.com/./repo", + expected: "", // Should be rejected + }, + { + name: "invalid characters in path", + input: "https://github.com/user/repo