diff --git a/src/current/CREATE_PORTABLE_ARCHIVE.md b/src/current/CREATE_PORTABLE_ARCHIVE.md new file mode 100644 index 00000000000..08c8b52f11d --- /dev/null +++ b/src/current/CREATE_PORTABLE_ARCHIVE.md @@ -0,0 +1,190 @@ +# Creating a Portable CockroachDB Documentation Archive + +This guide shows how to create a fully portable, offline documentation archive for any CockroachDB version that works with **any folder name** and has **working navigation**. + +## What You'll Get + +- **Portable Archive**: Works when renamed to any folder name +- **Dynamic Navigation**: Automatically detects archive location +- **Comprehensive Sidebars**: Full navigation on every page +- **Fully Offline**: No internet connection required +- **Version-Specific**: Contains only docs relevant to the target version + +## Prerequisites + +- Jekyll site built and ready: `bundle exec jekyll build` +- Python 3.x installed +- BeautifulSoup4: `pip install beautifulsoup4` +- Requests: `pip install requests` + +## Quick Start + +### Create Archive for a Single Version + +```bash +# Navigate to the docs source directory +cd src/current + +# Create archive for any version (e.g., v23.1, v24.2, v25.4) +python3 create_single_archive.py v23.1 +``` + +This will create `cockroachdb-docs-v23.1-offline.zip` containing the complete offline documentation. + +### Create Archives for Multiple Versions + +```bash +# Create archives for multiple versions (default: v20.2, v21.1, v21.2, v22.1, v22.2) +python3 create_all_archives_fixed.py +``` + +## The 14-Step Archive Creation Process + +The `create_single_archive.py` script automates the following steps: + +1. **Create base archive** - `snapshot_relative.py` +2. **Apply navigation fixes** - `fix_navigation_quick.py` +3. **Fix version placeholders** - Dynamic script for target version +4. **Remove non-target sidebars** - Keep only target version sidebar +5. **Clean target sidebar** - Remove references to newer versions +6. **Fix JavaScript sidebar** - `fix_js_sidebar_final.py` +7. **Fix remaining references** - `fix_remaining_v25_refs.py` +8. **Create advisories directory** - For security advisories JSON +9. **Copy advisories JSON** - From `_site/docs/advisories/internal/` +10. **Fix incomplete sidebars** - `fix_incomplete_sidebars.py` +11. **Make navigation dynamic** - `make_navigation_dynamic_v2.py` +12. **Fix root navigation** - `fix_root_navigation.py` +13. **Fix broken sidebar links** - `fix_broken_sidebar_links.py` +14. **Fix final broken links** - `fix_final_broken_links.py` + +## Output Structure + +``` +offline_snap/ (or any name you choose) +├── index.html # Root landing page +├── {version}/ # Version-specific documentation +│ ├── index.html +│ └── [documentation pages] +├── cockroachcloud/ # CockroachCloud docs +├── advisories/ # Security advisories +├── releases/ # Release notes +├── molt/ # MOLT migration tool docs +├── css/ # Stylesheets +├── js/ # JavaScript +├── images/ # Images +│ └── {version}/ # Version-specific images +├── fonts/ # Localized Google Fonts +└── _internal/ # Internal assets + └── sidebar-{version}.html # Navigation sidebar +``` + +## Required Scripts + +### Main Scripts + +| Script | Purpose | +|--------|---------| +| `create_single_archive.py` | Creates a single version archive (recommended) | +| `create_all_archives_fixed.py` | Creates archives for multiple versions | +| `snapshot_relative.py` | Core archiver that creates the base structure | + +### Supporting Scripts (14-step process) + +| Script | Purpose | +|--------|---------| +| `fix_navigation_quick.py` | Basic navigation fixes | +| `fix_js_sidebar_final.py` | Remove newer version references from JavaScript | +| `fix_remaining_v25_refs.py` | Final URL cleanup | +| `fix_incomplete_sidebars.py` | Ensures all pages have comprehensive sidebar | +| `make_navigation_dynamic_v2.py` | Makes navigation work with any folder name | +| `fix_root_navigation.py` | Fixes navigation from root index.html | +| `fix_broken_sidebar_links.py` | Removes broken links from sidebars | +| `fix_final_broken_links.py` | Final pass for remaining broken links | + +## Features + +### Dynamic Folder Detection + +The archive can be renamed to any folder name and navigation will continue to work: + +```javascript +// The JavaScript automatically detects the archive folder: +// Works with: my-docs/, cockroachdb-archive/, custom_name/, etc. + +// Method 1: Look for _internal folder pattern +var internalMatch = currentPath.match(/\/([^\/]+)\/_internal\//); + +// Method 2: Look for known directory structure +var archiveMatch = currentPath.match(/\/([^\/]+)\/(v\d+\.\d+|cockroachcloud|releases)/); +``` + +### Cross-Directory Navigation + +- Navigate between version docs, cockroachcloud, advisories, and releases +- Proper relative path calculation from any page +- Sidebar works identically on all pages + +## Usage Instructions + +### Opening the Archive + +```bash +# Extract the archive +unzip cockroachdb-docs-v23.1-offline.zip + +# Open in browser (from within archive directory) +cd offline_snap +open index.html + +# Or use full path +open /path/to/offline_snap/index.html +``` + +### Sharing the Archive + +1. Share the zip file +2. User can extract and rename to anything: `my-docs/`, `cockroach-archive/`, etc. +3. Navigation will work automatically with the new name + +## Troubleshooting + +### Jekyll Build Missing + +```bash +# Ensure _site directory exists +bundle exec jekyll build +``` + +### Navigation Issues + +- **Problem**: Links go to wrong location +- **Solution**: Open `index.html` from within the archive directory + +### Folder Renaming Issues + +- **Problem**: Navigation breaks after renaming +- **Solution**: The `make_navigation_dynamic_v2.py` script should have been run during creation + +### Missing Sidebars + +- **Problem**: Some pages have minimal sidebars +- **Solution**: Run `fix_incomplete_sidebars.py` on the archive + +## Version Support + +The scripts support creating archives for any CockroachDB version: + +- v2.1, v19.1, v19.2, v20.1, v20.2 +- v21.1, v21.2 +- v22.1, v22.2 +- v23.1, v23.2 +- v24.1, v24.2, v24.3 +- v25.1, v25.2, v25.3, v25.4 +- v26.1 + +## Notes + +- Archives grow in size with newer versions due to more documentation +- Each archive is typically 100-200MB +- Archives are self-contained and work completely offline +- Navigation auto-detects the archive folder name for portability diff --git a/src/current/README_ARCHIVE_CREATION.md b/src/current/README_ARCHIVE_CREATION.md new file mode 100644 index 00000000000..72e00ba2507 --- /dev/null +++ b/src/current/README_ARCHIVE_CREATION.md @@ -0,0 +1,188 @@ +# CockroachDB Offline Documentation Archive Creation Guide + +This guide explains how to create offline documentation archives for specific CockroachDB versions. + +## Prerequisites + +Before creating archives, ensure you have: + +1. **Jekyll Build Directory**: `_site/` directory with built documentation + - Run Jekyll build if not present: `bundle exec jekyll build` + +2. **Python 3**: Required for all scripts + +3. **Supporting Scripts**: All scripts listed in the "Required Scripts" section below + +4. **Disk Space**: Each archive is 100-200MB, ensure adequate space + +## Quick Start + +### Create Archives for Multiple Versions + +```bash +# Create archives for versions v20.1 through v22.2 +python3 create_all_archives_fixed.py +``` + +This will create archives for the default versions: v20.2, v21.1, v21.2, v22.1, v22.2 + +### Create Archive for a Single Version + +```bash +# Create archive for a specific version +python3 create_single_archive.py v23.1 +``` + +## The 14-Step Archive Creation Process + +Each archive goes through the following steps: + +1. **Create base archive** - `snapshot_relative.py` +2. **Apply navigation fixes** - `fix_navigation_quick.py` +3. **Fix version placeholders** - Dynamic script +4. **Remove non-target sidebars** - Shell command +5. **Clean target sidebar** - Python logic +6. **Fix JavaScript sidebar** - `fix_js_sidebar_final.py` +7. **Fix remaining references** - `fix_remaining_v25_refs.py` +8. **Create advisories directory** - Shell command +9. **Copy advisories JSON** - Shell command +10. **Fix incomplete sidebars** - `fix_incomplete_sidebars.py` +11. **Make navigation dynamic** - `make_navigation_dynamic_v2.py` +12. **Fix root navigation** - `fix_root_navigation.py` +13. **Fix broken sidebar links** - `fix_broken_sidebar_links.py` +14. **Fix final broken links** - `fix_final_broken_links.py` + +## Required Scripts + +### Main Scripts +- **`create_all_archives_fixed.py`** - Creates multiple version archives with all fixes +- **`create_single_archive.py`** - Creates a single version archive +- **`make_navigation_dynamic_v2.py`** - Makes navigation work with any folder name (version-aware) + +### Supporting Scripts (14-step process) +- **`snapshot_relative.py`** - Creates the initial archive structure +- **`fix_navigation_quick.py`** - Applies initial navigation fixes +- **`fix_js_sidebar_final.py`** - Fixes JavaScript sidebar functionality +- **`fix_remaining_v25_refs.py`** - Removes references to newer versions +- **`fix_incomplete_sidebars.py`** - Completes sidebar HTML structure +- **`fix_root_navigation.py`** - Fixes navigation for root-level files +- **`fix_broken_sidebar_links.py`** - Repairs broken links in sidebars +- **`fix_final_broken_links.py`** - Final pass to fix any remaining broken links + +## Common Issues and Solutions + +### Issue 1: Navigation Links Go to System Paths +**Symptom**: Links resolve to `file:///Users/username/Documents/index.html` instead of staying in archive + +**Cause**: The `make_navigation_dynamic.py` script has a hardcoded version (v19.2) in the pattern + +**Solution**: Use `make_navigation_dynamic_v2.py` which accepts the target version as a parameter + +### Issue 2: Sidebar Shows Newer Versions +**Symptom**: Archive for v20.1 shows links to v25.x in sidebar + +**Cause**: Sidebar cleaning step didn't remove all newer version references + +**Solution**: The script automatically removes references to versions newer than the target + +### Issue 3: Archive Doesn't Work When Renamed +**Symptom**: Navigation breaks when archive folder is renamed from `offline_snap` + +**Cause**: Hardcoded folder name in navigation JavaScript + +**Solution**: `make_navigation_dynamic_v2.py` makes the navigation detect any folder name + +### Issue 4: JavaScript Syntax Errors +**Symptom**: Browser console shows syntax errors, navigation completely broken + +**Cause**: Missing arguments in JavaScript `replace()` calls + +**Solution**: The scripts automatically fix these syntax errors during creation + +## Archive Structure + +Each archive contains: +``` +offline_snap/ +├── _internal/ +│ └── sidebar-vX.Y.html # Version-specific sidebar +├── v[version]/ # Version-specific documentation +├── releases/ # Release notes +├── advisories/ # Security advisories +├── cockroachcloud/ # Cloud documentation +├── molt/ # MOLT documentation +└── index.html # Main entry point +``` + +## Testing Archives + +1. **Extract the archive**: + ```bash + unzip cockroachdb-docs-v20.1-offline.zip + ``` + +2. **Open in browser**: + ```bash + open offline_snap/index.html + ``` + +3. **Test navigation**: + - Click "Docs Home" - should stay within archive + - Click version-specific links - should navigate correctly + - Check that sidebar shows only appropriate versions + +## Version Support + +The scripts support creating archives for any CockroachDB version. Common versions: +- v2.1, v19.2, v20.1, v20.2 +- v21.1, v21.2 +- v22.1, v22.2 +- v23.1, v23.2 +- v24.1, v24.2, v24.3 +- v25.1, v25.2, v25.3, v25.4 +- v26.1 + +## Advanced Usage + +### Customizing the Archive Creation + +Edit `create_all_archives_fixed.py` to: +- Change which versions are created (modify the `versions` list) +- Adjust the cleaning logic for sidebars +- Add additional fix steps + +### Manual Navigation Fix + +If you need to fix navigation in an existing archive: + +```bash +# Extract archive +unzip cockroachdb-docs-vX.Y-offline.zip + +# Apply navigation fix with correct version +python3 make_navigation_dynamic_v2.py offline_snap vX.Y + +# Re-create archive +zip -r cockroachdb-docs-vX.Y-offline.zip offline_snap/ +``` + +## Troubleshooting + +### Scripts Not Found +Ensure all supporting scripts are in the same directory as the main scripts. + +### Jekyll Build Missing +Run `bundle exec jekyll build` to create the `_site` directory. + +### Out of Disk Space +Each archive is 100-200MB. The creation process also needs temporary space. + +### Navigation Still Broken After Fixes +Check browser console for JavaScript errors. The issue is likely a syntax error that needs fixing. + +## Notes + +- Archives grow in size with newer versions due to more documentation +- The creation process takes ~2-3 minutes per version +- Archives are self-contained and work offline +- Navigation auto-detects the archive folder name for portability \ No newline at end of file diff --git a/src/current/create_all_archives_fixed.py b/src/current/create_all_archives_fixed.py new file mode 100644 index 00000000000..35e5955f341 --- /dev/null +++ b/src/current/create_all_archives_fixed.py @@ -0,0 +1,277 @@ +#!/usr/bin/env python3 +""" +Create all documentation archives (v20.2, v21.1, v21.2, v22.1, v22.2) +with FIXED navigation that properly detects the version +""" +import subprocess +import shutil +import re +from pathlib import Path +import time + +def run_cmd(cmd, description): + """Run a shell command""" + print(f" {description}...") + result = subprocess.run(cmd, shell=True, capture_output=True, text=True) + if result.returncode != 0 and "verify" not in cmd.lower(): + print(f" Warning: {result.stderr[:200] if result.stderr else 'Command had issues but continuing'}") + return result.returncode == 0 + +def fix_navigation_in_archive(version): + """Apply all navigation fixes to the archive""" + print(f" Applying navigation fixes for {version}...") + + fixed_count = 0 + for html_file in Path("offline_snap").rglob("*.html"): + content = html_file.read_text() + original = content + + # Fix 1: JavaScript syntax error + content = content.replace( + f"url = url.replace(/^stable\\//, ).replace(/\\/stable\\//, '/{version}/');", + f"url = url.replace(/^stable\\//, '{version}/').replace(/\\/stable\\//, '/{version}/');" + ) + + # Fix 2: Archive detection with nested directory handling + # Look for the old pattern that might exist + old_patterns = [ + # Pattern from make_navigation_dynamic.py with v19.2 + re.compile(r'// Method 2: Look for archive structure \(docs, v19\.2, releases, etc\.\).*?var archiveMatch = currentPath\.match\(/\\/\(\[\^/\]\+\)\\/\(docs\|v19\\\.2\|releases\|advisories\|cockroachcloud\|molt\)\\//\);', re.DOTALL), + # Pattern with any version + re.compile(r'// Method 2: Look for archive structure.*?var archiveMatch = currentPath\.match\(/\\/\(\[\^/\]\+\)\\/\(docs\|v\d+\\\.\d+\|releases\|advisories\|cockroachcloud\|molt\)\\//\);', re.DOTALL), + ] + + for pattern in old_patterns: + if pattern.search(content): + # Replace with the correct version + content = re.sub( + r'var archiveMatch = currentPath\.match\(/\\/\(\[\^/\]\+\)\\/\(docs\|v[\d\.\\]+\|releases\|advisories\|cockroachcloud\|molt\)\\//\);', + f'var archiveMatch = currentPath.match(/\\/([^\\/]+)\\/(docs|{version.replace(".", "\\.")}|releases|advisories|cockroachcloud|molt)\\//);', + content + ) + + # Also apply the nested directory fix + if 'var knownDirs' not in content and 'archiveMatch = currentPath.match' in content: + # The file has old detection, update it with better logic + old_detection = """ // Method 1: Look for _internal folder pattern + var internalMatch = currentPath.match(/\\/([^\\/]+)\\/_internal\\//); + if (internalMatch) { + archiveFolder = internalMatch[1]; + offlineSnapIndex = currentPath.indexOf('/' + archiveFolder + '/'); + } else { + // Method 2: Look for archive structure (docs, """ + version.replace('.', '\\.') + """, releases, etc.) + var archiveMatch = currentPath.match(/\\/([^\\/]+)\\/(docs|""" + version.replace('.', '\\.') + """|releases|advisories|cockroachcloud|molt)\\//); + if (archiveMatch) { + archiveFolder = archiveMatch[1]; + offlineSnapIndex = currentPath.indexOf('/' + archiveFolder + '/'); + } + }""" + + new_detection = """ // Archive detection - handles nested known directories + var offlineSnapIndex = -1; + var archiveFolder = ''; + + // Split path into parts for analysis + var pathParts = currentPath.split('/').filter(function(p) { return p; }); + + // List of known directories in our documentation structure + var knownDirs = ['""" + version + """', 'cockroachcloud', 'releases', 'advisories', 'molt', '_internal']; + + // Check if the path contains any known directories + var hasKnownDir = false; + var firstKnownDirIndex = -1; + for (var j = 0; j < pathParts.length; j++) { + if (knownDirs.indexOf(pathParts[j]) !== -1) { + hasKnownDir = true; + if (firstKnownDirIndex === -1) { + firstKnownDirIndex = j; + } + } + } + + if (!hasKnownDir && pathParts.length > 0) { + // We're likely at root - the archive folder is the parent of this file + // For /path/to/offline_snap/index.html, get 'offline_snap' + archiveFolder = pathParts[pathParts.length - 2]; + if (archiveFolder && archiveFolder.indexOf('.html') === -1) { + offlineSnapIndex = currentPath.lastIndexOf('/' + archiveFolder + '/'); + } + } else if (firstKnownDirIndex > 0) { + // The archive folder is the parent of the first known directory + // For /path/offline_snap/releases/""" + version + """/, archive is 'offline_snap' + archiveFolder = pathParts[firstKnownDirIndex - 1]; + offlineSnapIndex = currentPath.lastIndexOf('/' + archiveFolder + '/'); + }""" + + content = content.replace(old_detection, new_detection) + + if content != original: + html_file.write_text(content) + fixed_count += 1 + + print(f" Fixed {fixed_count} files") + +def create_version_archive(version): + """Create archive for a specific version""" + print(f"\n{'='*60}") + print(f"🚀 Creating {version} archive with FIXED navigation") + print('='*60) + + # Clean up + if Path("offline_snap").exists(): + shutil.rmtree("offline_snap") + + # Step 1: Modify snapshot_relative.py for this version + print(f"📝 Setting up for {version}...") + snapshot_content = Path("snapshot_relative.py").read_text() + + # Reset to a clean state first + snapshot_content = re.sub(r'sidebar-v[\d.]+\.html', f'sidebar-{version}.html', snapshot_content) + snapshot_content = re.sub(r'TARGET_VERSION = "[^"]*"', f'TARGET_VERSION = "{version}"', snapshot_content) + + Path("snapshot_relative.py").write_text(snapshot_content) + + # Step 2: Run the 14-step process + print(f"\n📚 Running the 14-step archive creation process for {version}...") + + # Step 1: Create base archive + run_cmd("python3 snapshot_relative.py", "Step 1: Creating base archive") + + # Step 2: Apply navigation fixes + run_cmd("python3 fix_navigation_quick.py", "Step 2: Applying navigation fixes") + + # Step 3: Fix version placeholders + print(" Step 3: Fixing version placeholders...") + fix_versions_script = f"""#!/usr/bin/env python3 +from pathlib import Path +import re + +for html in Path("offline_snap").rglob("*.html"): + content = html.read_text() + # Replace any v2.1 references with {version} + content = re.sub(r'/v2\\.1/', '/{version}/', content) + content = re.sub(r'"v2\\.1/', '"{version}/', content) + content = re.sub(r"'v2\\.1/", "'{version}/", content) + content = re.sub(r'v2\\.1\\.html', '{version}.html', content) + content = re.sub(r'sidebar-v2\\.1', 'sidebar-{version}', content) + # Replace ${{VERSION}} with {version} + content = content.replace('${{VERSION}}', '{version}') + # Replace /stable/ with /{version}/ + content = re.sub(r'/stable/', '/{version}/', content) + html.write_text(content) +print("Fixed version placeholders") +""" + Path(f"fix_{version.replace('.', '_')}_versions.py").write_text(fix_versions_script) + run_cmd(f"python3 fix_{version.replace('.', '_')}_versions.py", " Running version fix") + + # Step 4: Remove non-target sidebars + run_cmd(f'find offline_snap/_internal -name "sidebar-v*.html" ! -name "sidebar-{version}.html" -delete', + "Step 4: Removing other version sidebars") + + # Step 5: Clean target version sidebar + print(f" Step 5: Cleaning {version} sidebar...") + sidebar_file = Path(f"offline_snap/_internal/sidebar-{version}.html") + if sidebar_file.exists(): + content = sidebar_file.read_text() + # Get the major.minor version number + major_minor = '.'.join(version[1:].split('.')[:2]) if version.startswith('v') else version + major = int(major_minor.split('.')[0]) + minor = int(major_minor.split('.')[1]) + + # Remove references to newer versions + newer = [] + for maj in range(major + 1, 27): # Up to v26 + for min in range(1, 5): # Up to .4 + newer.append(f"v{maj}.{min}") + # Also remove newer minor versions of same major + for min in range(minor + 1, 5): + newer.append(f"v{major}.{min}") + + for v in newer: + content = re.sub(f']*{v}[^>]*>.*?', '', content, flags=re.DOTALL) + content = re.sub(f']*>.*?{v}.*?', '', content, flags=re.DOTALL) + + sidebar_file.write_text(content) + + # Steps 6-14: Run remaining fix scripts + run_cmd("python3 fix_js_sidebar_final.py", "Step 6: Fixing JavaScript sidebar") + run_cmd("python3 fix_remaining_v25_refs.py", "Step 7: Fixing remaining references") + run_cmd("mkdir -p offline_snap/advisories/internal", "Step 8: Creating advisories directory") + run_cmd("cp _site/docs/advisories/internal/advisories.json offline_snap/advisories/internal/ 2>/dev/null || true", + "Step 9: Copying advisories JSON") + run_cmd("python3 fix_incomplete_sidebars.py", "Step 10: Fixing incomplete sidebars") + + # Step 11: CRITICAL - Use the new make_navigation_dynamic_v2.py with version parameter + print(f" Step 11: Making navigation dynamic with correct version ({version})...") + run_cmd(f"python3 make_navigation_dynamic_v2.py offline_snap {version}", + " Making navigation dynamic with version-specific detection") + + run_cmd("python3 fix_root_navigation.py", "Step 12: Fixing root navigation") + run_cmd("python3 fix_broken_sidebar_links.py", "Step 13: Fixing broken sidebar links") + run_cmd("python3 fix_final_broken_links.py", "Step 14: Fixing final broken links") + + # Apply additional navigation fixes + fix_navigation_in_archive(version) + + # Create ZIP + print(f"\n📦 Creating ZIP archive for {version}...") + zip_name = f"cockroachdb-docs-{version}-offline.zip" + run_cmd(f"zip -r {zip_name} offline_snap/ -q", "Creating ZIP file") + + # Cleanup temporary script + Path(f"fix_{version.replace('.', '_')}_versions.py").unlink(missing_ok=True) + + # Check size + zip_file = Path(zip_name) + if zip_file.exists(): + size_mb = zip_file.stat().st_size / (1024 * 1024) + print(f"✅ Created: {zip_name} ({size_mb:.1f} MB)") + else: + print(f"❌ Failed to create {zip_name}") + + return zip_file.exists() + +def main(): + """Main function to create all archives""" + versions = ["v20.2", "v21.1", "v21.2", "v22.1", "v22.2"] + successful = [] + failed = [] + + print("="*60) + print("📚 Creating Documentation Archives with FIXED Navigation") + print(f" Versions: {', '.join(versions)}") + print("="*60) + + start_time = time.time() + + for version in versions: + if create_version_archive(version): + successful.append(version) + else: + failed.append(version) + + # Clean up + if Path("offline_snap").exists(): + shutil.rmtree("offline_snap") + + # Summary + elapsed = time.time() - start_time + print("\n" + "="*60) + print("📊 Archive Creation Summary") + print("="*60) + print(f"✅ Successful: {', '.join(successful) if successful else 'None'}") + if failed: + print(f"❌ Failed: {', '.join(failed)}") + print(f"⏱️ Total time: {elapsed:.1f} seconds") + print("="*60) + + # List created files + print("\n📁 Created Archives:") + for version in successful: + zip_file = Path(f"cockroachdb-docs-{version}-offline.zip") + if zip_file.exists(): + size_mb = zip_file.stat().st_size / (1024 * 1024) + print(f" • {zip_file.name}: {size_mb:.1f} MB") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/current/create_single_archive.py b/src/current/create_single_archive.py new file mode 100644 index 00000000000..321e5296d22 --- /dev/null +++ b/src/current/create_single_archive.py @@ -0,0 +1,281 @@ +#!/usr/bin/env python3 +""" +Create a single documentation archive for a specific CockroachDB version +Usage: python3 create_single_archive.py +Example: python3 create_single_archive.py v23.1 +""" +import subprocess +import shutil +import re +import sys +from pathlib import Path +import time + +def run_cmd(cmd, description): + """Run a shell command""" + print(f" {description}...") + result = subprocess.run(cmd, shell=True, capture_output=True, text=True) + if result.returncode != 0 and "verify" not in cmd.lower(): + print(f" Warning: {result.stderr[:200] if result.stderr else 'Command had issues but continuing'}") + return result.returncode == 0 + +def fix_navigation_in_archive(version): + """Apply all navigation fixes to the archive""" + print(f" Applying navigation fixes for {version}...") + + fixed_count = 0 + for html_file in Path("offline_snap").rglob("*.html"): + content = html_file.read_text() + original = content + + # Fix 1: JavaScript syntax error + content = content.replace( + f"url = url.replace(/^stable\\//, ).replace(/\\/stable\\//, '/{version}/');", + f"url = url.replace(/^stable\\//, '{version}/').replace(/\\/stable\\//, '/{version}/');" + ) + + # Fix 2: Archive detection with nested directory handling + if 'var knownDirs' not in content and 'archiveMatch = currentPath.match' in content: + # Apply the nested directory fix + old_detection = """ // Method 1: Look for _internal folder pattern + var internalMatch = currentPath.match(/\\/([^\\/]+)\\/_internal\\//); + if (internalMatch) { + archiveFolder = internalMatch[1]; + offlineSnapIndex = currentPath.indexOf('/' + archiveFolder + '/'); + } else { + // Method 2: Look for archive structure (docs, """ + version.replace('.', '\\.') + """, releases, etc.) + var archiveMatch = currentPath.match(/\\/([^\\/]+)\\/(docs|""" + version.replace('.', '\\.') + """|releases|advisories|cockroachcloud|molt)\\//); + if (archiveMatch) { + archiveFolder = archiveMatch[1]; + offlineSnapIndex = currentPath.indexOf('/' + archiveFolder + '/'); + } + }""" + + new_detection = """ // Archive detection - handles nested known directories + var offlineSnapIndex = -1; + var archiveFolder = ''; + + // Split path into parts for analysis + var pathParts = currentPath.split('/').filter(function(p) { return p; }); + + // List of known directories in our documentation structure + var knownDirs = ['""" + version + """', 'cockroachcloud', 'releases', 'advisories', 'molt', '_internal']; + + // Check if the path contains any known directories + var hasKnownDir = false; + var firstKnownDirIndex = -1; + for (var j = 0; j < pathParts.length; j++) { + if (knownDirs.indexOf(pathParts[j]) !== -1) { + hasKnownDir = true; + if (firstKnownDirIndex === -1) { + firstKnownDirIndex = j; + } + } + } + + if (!hasKnownDir && pathParts.length > 0) { + // We're likely at root - the archive folder is the parent of this file + // For /path/to/offline_snap/index.html, get 'offline_snap' + archiveFolder = pathParts[pathParts.length - 2]; + if (archiveFolder && archiveFolder.indexOf('.html') === -1) { + offlineSnapIndex = currentPath.lastIndexOf('/' + archiveFolder + '/'); + } + } else if (firstKnownDirIndex > 0) { + // The archive folder is the parent of the first known directory + // For /path/offline_snap/releases/""" + version + """/, archive is 'offline_snap' + archiveFolder = pathParts[firstKnownDirIndex - 1]; + offlineSnapIndex = currentPath.lastIndexOf('/' + archiveFolder + '/'); + }""" + + content = content.replace(old_detection, new_detection) + + if content != original: + html_file.write_text(content) + fixed_count += 1 + + print(f" Fixed {fixed_count} files") + +def create_version_archive(version): + """Create archive for a specific version""" + print(f"\n{'='*60}") + print(f"🚀 Creating {version} archive") + print('='*60) + + # Clean up any existing offline_snap + if Path("offline_snap").exists(): + shutil.rmtree("offline_snap") + + # Step 1: Modify snapshot_relative.py for this version + print(f"📝 Setting up for {version}...") + + snapshot_file = Path("snapshot_relative.py") + if not snapshot_file.exists(): + print(f"❌ Error: snapshot_relative.py not found!") + return False + + snapshot_content = snapshot_file.read_text() + + # Update version references + snapshot_content = re.sub(r'sidebar-v[\d.]+\.html', f'sidebar-{version}.html', snapshot_content) + snapshot_content = re.sub(r'TARGET_VERSION = "[^"]*"', f'TARGET_VERSION = "{version}"', snapshot_content) + + snapshot_file.write_text(snapshot_content) + + # Step 2: Run the 14-step process + print(f"\n📚 Running the 14-step archive creation process for {version}...") + + # Step 1: Create base archive + if not run_cmd("python3 snapshot_relative.py", "Step 1: Creating base archive"): + print("❌ Failed to create base archive") + return False + + # Check if offline_snap was created + if not Path("offline_snap").exists(): + print("❌ Error: offline_snap directory was not created") + return False + + # Step 2: Apply navigation fixes + run_cmd("python3 fix_navigation_quick.py", "Step 2: Applying navigation fixes") + + # Step 3: Fix version placeholders + print(" Step 3: Fixing version placeholders...") + fix_versions_script = f"""#!/usr/bin/env python3 +from pathlib import Path +import re + +for html in Path("offline_snap").rglob("*.html"): + content = html.read_text() + # Replace any v2.1 references with {version} + content = re.sub(r'/v2\\.1/', '/{version}/', content) + content = re.sub(r'"v2\\.1/', '"{version}/', content) + content = re.sub(r"'v2\\.1/", "'{version}/", content) + content = re.sub(r'v2\\.1\\.html', '{version}.html', content) + content = re.sub(r'sidebar-v2\\.1', 'sidebar-{version}', content) + # Replace ${{VERSION}} with {version} + content = content.replace('${{VERSION}}', '{version}') + # Replace /stable/ with /{version}/ + content = re.sub(r'/stable/', '/{version}/', content) + html.write_text(content) +print("Fixed version placeholders") +""" + temp_script = Path(f"fix_{version.replace('.', '_')}_versions.py") + temp_script.write_text(fix_versions_script) + run_cmd(f"python3 {temp_script.name}", " Running version fix") + + # Step 4: Remove non-target sidebars + run_cmd(f'find offline_snap/_internal -name "sidebar-v*.html" ! -name "sidebar-{version}.html" -delete', + "Step 4: Removing other version sidebars") + + # Step 5: Clean target version sidebar + print(f" Step 5: Cleaning {version} sidebar...") + sidebar_file = Path(f"offline_snap/_internal/sidebar-{version}.html") + if sidebar_file.exists(): + content = sidebar_file.read_text() + # Get the major.minor version number + major_minor = '.'.join(version[1:].split('.')[:2]) if version.startswith('v') else version + try: + major = int(major_minor.split('.')[0]) + minor = int(major_minor.split('.')[1]) + + # Remove references to newer versions + newer = [] + for maj in range(major + 1, 27): # Up to v26 + for min in range(1, 5): # Up to .4 + newer.append(f"v{maj}.{min}") + # Also remove newer minor versions of same major + for min in range(minor + 1, 5): + newer.append(f"v{major}.{min}") + + for v in newer: + content = re.sub(f']*{v}[^>]*>.*?', '', content, flags=re.DOTALL) + content = re.sub(f']*>.*?{v}.*?', '', content, flags=re.DOTALL) + + sidebar_file.write_text(content) + except (ValueError, IndexError): + print(f" Warning: Could not parse version {version} for sidebar cleaning") + + # Steps 6-14: Run remaining fix scripts + run_cmd("python3 fix_js_sidebar_final.py", "Step 6: Fixing JavaScript sidebar") + run_cmd("python3 fix_remaining_v25_refs.py", "Step 7: Fixing remaining references") + run_cmd("mkdir -p offline_snap/advisories/internal", "Step 8: Creating advisories directory") + run_cmd("cp _site/docs/advisories/internal/advisories.json offline_snap/advisories/internal/ 2>/dev/null || true", + "Step 9: Copying advisories JSON") + run_cmd("python3 fix_incomplete_sidebars.py", "Step 10: Fixing incomplete sidebars") + + # Step 11: Use the version-aware make_navigation_dynamic_v2.py + print(f" Step 11: Making navigation dynamic with correct version ({version})...") + if Path("make_navigation_dynamic_v2.py").exists(): + run_cmd(f"python3 make_navigation_dynamic_v2.py offline_snap {version}", + " Making navigation dynamic with version-specific detection") + else: + run_cmd(f"python3 make_navigation_dynamic.py offline_snap", + " Making navigation dynamic (legacy)") + + run_cmd("python3 fix_root_navigation.py", "Step 12: Fixing root navigation") + run_cmd("python3 fix_broken_sidebar_links.py", "Step 13: Fixing broken sidebar links") + run_cmd("python3 fix_final_broken_links.py", "Step 14: Fixing final broken links") + + # Apply additional navigation fixes + fix_navigation_in_archive(version) + + # Create ZIP + print(f"\n📦 Creating ZIP archive for {version}...") + zip_name = f"cockroachdb-docs-{version}-offline.zip" + run_cmd(f"zip -r {zip_name} offline_snap/ -q", "Creating ZIP file") + + # Cleanup temporary script + temp_script.unlink(missing_ok=True) + + # Check size + zip_file = Path(zip_name) + if zip_file.exists(): + size_mb = zip_file.stat().st_size / (1024 * 1024) + print(f"✅ Created: {zip_name} ({size_mb:.1f} MB)") + return True + else: + print(f"❌ Failed to create {zip_name}") + return False + +def main(): + """Main function""" + if len(sys.argv) != 2: + print("Usage: python3 create_single_archive.py ") + print("Example: python3 create_single_archive.py v23.1") + sys.exit(1) + + version = sys.argv[1] + + # Validate version format + if not version.startswith('v') or '.' not in version: + print(f"Error: Version should be in format vX.Y (e.g., v23.1)") + sys.exit(1) + + print("="*60) + print(f"📚 Creating Documentation Archive for {version}") + print("="*60) + + # Check prerequisites + if not Path("_site").exists(): + print("❌ Error: _site directory not found. Run Jekyll build first.") + sys.exit(1) + + start_time = time.time() + + success = create_version_archive(version) + + # Clean up + if Path("offline_snap").exists(): + shutil.rmtree("offline_snap") + + # Summary + elapsed = time.time() - start_time + print("\n" + "="*60) + if success: + print(f"✅ Successfully created archive for {version}") + else: + print(f"❌ Failed to create archive for {version}") + print(f"⏱️ Time: {elapsed:.1f} seconds") + print("="*60) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/current/fix_broken_sidebar_links.py b/src/current/fix_broken_sidebar_links.py new file mode 100644 index 00000000000..f9bbf980f34 --- /dev/null +++ b/src/current/fix_broken_sidebar_links.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 +""" +Fix broken sidebar links - removes v25.3 references and handles query parameters +""" +import re +from pathlib import Path + +def fix_sidebar_links(html_content, archive_path): + """Fix broken links in sidebar JavaScript""" + fixed_content = html_content + changes_made = [] + + # 1. Remove or redirect v25.3 links to v19.2 equivalents + v25_pattern = r'"(/?)v25\.3/([^"]+)"' + def replace_v25(match): + changes_made.append(f"v25.3/{match.group(2)} -> v19.2/{match.group(2)}") + # Check if v19.2 equivalent exists + v19_file = archive_path / f"v19.2/{match.group(2)}" + if v19_file.exists(): + return f'"{match.group(1)}v19.2/{match.group(2)}"' + else: + # Try without .html + base_name = match.group(2).replace('.html', '') + v19_file_alt = archive_path / f"v19.2/{base_name}.html" + if v19_file_alt.exists(): + return f'"{match.group(1)}v19.2/{base_name}.html"' + # Default to v19.2 anyway (better than broken v25.3) + return f'"{match.group(1)}v19.2/{match.group(2)}"' + + fixed_content = re.sub(v25_pattern, replace_v25, fixed_content) + + # 2. Handle URLs with query parameters - strip them for offline use + query_pattern = r'"([^"]+\.html)\?[^"]*"' + def strip_query(match): + url = match.group(1) + # Special case for terraform provisioning - redirect to a related page + if 'provision-a-cluster-with-terraform' in url: + changes_made.append(f"{match.group(0)} -> cockroachcloud/quickstart.html") + return '"/cockroachcloud/quickstart.html"' + changes_made.append(f"Stripped query params from {url}") + return f'"{url}"' + + fixed_content = re.sub(query_pattern, strip_query, fixed_content) + + # 3. Fix any remaining v24.x or v23.x references + other_versions_pattern = r'"(/?)v2[345]\.\d+/([^"]+)"' + def replace_other_versions(match): + changes_made.append(f"v2x.x/{match.group(2)} -> v19.2/{match.group(2)}") + return f'"{match.group(1)}v19.2/{match.group(2)}"' + + fixed_content = re.sub(other_versions_pattern, replace_other_versions, fixed_content) + + return fixed_content, changes_made + +def process_archive(archive_path): + """Process all HTML files in the archive""" + archive_path = Path(archive_path) + + if not archive_path.exists(): + print(f"❌ Archive {archive_path} not found") + return + + print(f"🔧 Fixing broken sidebar links in {archive_path}") + + html_files = list(archive_path.rglob("*.html")) + total_fixed = 0 + all_changes = [] + + for i, html_file in enumerate(html_files): + if i % 100 == 0 and i > 0: + print(f"Progress: {i}/{len(html_files)} files") + + try: + content = html_file.read_text(encoding='utf-8') + fixed_content, changes = fix_sidebar_links(content, archive_path) + + if fixed_content != content: + html_file.write_text(fixed_content, encoding='utf-8') + total_fixed += 1 + all_changes.extend(changes) + except Exception as e: + print(f"Error processing {html_file}: {e}") + + print(f"\n✅ Fixed {total_fixed} files") + + if all_changes: + print(f"\n📝 Changes made:") + # Show unique changes + unique_changes = list(set(all_changes)) + for change in unique_changes[:20]: # Show first 20 unique changes + print(f" • {change}") + if len(unique_changes) > 20: + print(f" ... and {len(unique_changes) - 20} more unique changes") + +def main(): + # Find archive + archive_folders = ['my_dynamic_archive', 'test_portable_docs', 'offline_snap'] + + for folder in archive_folders: + if Path(folder).exists(): + process_archive(folder) + break + else: + print("❌ No archive folder found") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/current/fix_final_broken_links.py b/src/current/fix_final_broken_links.py new file mode 100644 index 00000000000..2d32a184898 --- /dev/null +++ b/src/current/fix_final_broken_links.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python3 +""" +Final cleanup - remove or redirect links to pages that don't exist in v19.2 +""" +import re +from pathlib import Path + +def fix_non_existent_links(html_content, archive_path): + """Remove or redirect links to non-existent pages""" + fixed_content = html_content + changes_made = [] + + # Map of non-existent pages to best alternatives in v19.2 + redirect_map = { + 'v19.2/example-apps.html': 'v19.2/build-an-app-with-cockroachdb.html', + 'v19.2/kubernetes-overview.html': 'v19.2/orchestrate-cockroachdb-with-kubernetes.html', + 'v19.2/demo-cockroachdb-resilience.html': 'v19.2/demo-fault-tolerance-and-recovery.html', + 'v19.2/sso-sql.html': 'v19.2/authentication.html', + 'v19.2/security-reference/transport-layer-security.html': 'v19.2/security.html', + 'v19.2/hashicorp-integration.html': 'v19.2/orchestration.html', + 'v19.2/cockroachdb-feature-availability.html': 'v19.2/enterprise-licensing.html' + } + + for old_url, new_url in redirect_map.items(): + # Check if the new URL actually exists + new_path = archive_path / new_url.lstrip('/') + if new_path.exists(): + # Replace in both quoted and non-quoted contexts + patterns = [ + f'"{old_url}"', + f'"/{old_url}"', + f'"{old_url.replace("v19.2/", "/v19.2/")}"' + ] + + for pattern in patterns: + if pattern in fixed_content: + replacement = f'"{new_url}"' if not pattern.startswith('"/') else f'"/{new_url}"' + fixed_content = fixed_content.replace(pattern, replacement) + changes_made.append(f"{old_url} -> {new_url}") + + # Remove any remaining links to non-existent v19.2 pages by checking existence + url_pattern = r'"(/?)v19\.2/([^"#]+)(#[^"]+)?"' + + def check_and_fix(match): + slash = match.group(1) + page = match.group(2) + anchor = match.group(3) or '' + + # Check if file exists + check_path = archive_path / f"v19.2/{page}" + if not check_path.exists() and page.endswith('.html'): + # Try to find a similar page + base_name = page.replace('.html', '') + + # Common replacements + if 'example' in base_name or 'demo' in base_name: + changes_made.append(f"Redirected {page} to index") + return f'"{slash}v19.2/index.html"' + elif 'security' in base_name: + changes_made.append(f"Redirected {page} to security.html") + return f'"{slash}v19.2/security.html"' + elif 'kubernetes' in base_name or 'k8s' in base_name: + changes_made.append(f"Redirected {page} to orchestrate-cockroachdb-with-kubernetes.html") + return f'"{slash}v19.2/orchestrate-cockroachdb-with-kubernetes.html"' + + return match.group(0) # Keep original if exists or can't fix + + fixed_content = re.sub(url_pattern, check_and_fix, fixed_content) + + return fixed_content, changes_made + +def main(): + archive_folders = ['my_dynamic_archive', 'test_portable_docs', 'offline_snap'] + archive_path = None + + for folder in archive_folders: + if Path(folder).exists(): + archive_path = Path(folder) + break + + if not archive_path: + print("❌ No archive folder found") + return + + print(f"🔧 Final cleanup of broken links in {archive_path}") + + html_files = list(archive_path.rglob("*.html")) + total_fixed = 0 + all_changes = [] + + for i, html_file in enumerate(html_files): + if i % 100 == 0 and i > 0: + print(f"Progress: {i}/{len(html_files)} files") + + try: + content = html_file.read_text(encoding='utf-8') + fixed_content, changes = fix_non_existent_links(content, archive_path) + + if fixed_content != content: + html_file.write_text(fixed_content, encoding='utf-8') + total_fixed += 1 + all_changes.extend(changes) + except Exception as e: + print(f"Error: {e}") + + print(f"\n✅ Fixed {total_fixed} files") + + if all_changes: + print(f"\n📝 Redirects applied:") + unique = list(set(all_changes)) + for change in unique[:10]: + print(f" • {change}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/current/fix_incomplete_sidebars.py b/src/current/fix_incomplete_sidebars.py new file mode 100644 index 00000000000..80befac2245 --- /dev/null +++ b/src/current/fix_incomplete_sidebars.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python3 +""" +Fix pages with incomplete sidebars by replacing them with the comprehensive sidebar +""" +import re +from pathlib import Path + +OFFLINE_SNAP = Path("/Users/eeshan/Documents/docs/src/current/offline_snap") + +def extract_comprehensive_sidebar(): + """Extract comprehensive sidebar from a working page""" + # Use index.html as the source of the comprehensive sidebar + source_file = OFFLINE_SNAP / "index.html" + + if not source_file.exists(): + print("❌ Source file (index.html) not found") + return None + + content = source_file.read_text(encoding='utf-8') + + # Find the sidebar JavaScript + sidebar_start = content.find('const sidebar = {') + if sidebar_start == -1: + print("❌ Comprehensive sidebar not found in source file") + return None + + sidebar_end = content.find('};', sidebar_start) + if sidebar_end == -1: + print("❌ Sidebar end not found in source file") + return None + + comprehensive_sidebar = content[sidebar_start:sidebar_end + 2] + print(f"✅ Extracted comprehensive sidebar ({len(comprehensive_sidebar)} characters)") + return comprehensive_sidebar + +def fix_page_sidebar(file_path, comprehensive_sidebar): + """Replace incomplete sidebar with comprehensive one""" + try: + content = file_path.read_text(encoding='utf-8') + + # Find existing sidebar + sidebar_start = content.find('const sidebar = {') + if sidebar_start == -1: + return False + + sidebar_end = content.find('};', sidebar_start) + if sidebar_end == -1: + return False + + # Replace the sidebar + new_content = ( + content[:sidebar_start] + + comprehensive_sidebar + + content[sidebar_end + 2:] + ) + + file_path.write_text(new_content, encoding='utf-8') + return True + + except Exception as e: + print(f"❌ Error fixing {file_path}: {e}") + return False + +def main(): + """Fix all pages with incomplete sidebars""" + if not OFFLINE_SNAP.exists(): + print(f"❌ Offline snap directory not found: {OFFLINE_SNAP}") + return + + print("🚀 Fixing pages with incomplete sidebars...") + + # Get comprehensive sidebar + comprehensive_sidebar = extract_comprehensive_sidebar() + if not comprehensive_sidebar: + return + + # List of files that need fixing (from the previous analysis) + files_to_fix = [ + "v19.2/as-of-system-time.html", + "v19.2/show-grants.html", + "v19.2/add-constraint.html", + "v19.2/performance-benchmarking-with-tpc-c-100k-warehouses.html", + "v19.2/recommended-production-settings.html" + ] + + # Get complete list by checking all v19.2 files + print("🔍 Scanning for all files with incomplete sidebars...") + + incomplete_files = [] + for html_file in (OFFLINE_SNAP / "v19.2").rglob("*.html"): + try: + content = html_file.read_text(encoding='utf-8') + if 'const sidebar = {' in content: + # Count top-level sections + top_level_sections = len(re.findall(r'"is_top_level":\s*true', content)) + if top_level_sections < 8: # Less than comprehensive + incomplete_files.append(html_file) + except: + continue + + print(f"📋 Found {len(incomplete_files)} files with incomplete sidebars") + + # Fix each file + fixed_count = 0 + for file_path in incomplete_files: + if fix_page_sidebar(file_path, comprehensive_sidebar): + fixed_count += 1 + if fixed_count <= 5: + print(f"✅ Fixed {file_path.name}") + + print(f"\n✅ Fixed {fixed_count} out of {len(incomplete_files)} files") + + if fixed_count > 0: + print("🎯 All pages should now have comprehensive sidebars!") + + return fixed_count > 0 + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/current/fix_js_sidebar_final.py b/src/current/fix_js_sidebar_final.py new file mode 100644 index 00000000000..d200db759ea --- /dev/null +++ b/src/current/fix_js_sidebar_final.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 +""" +Final fix for JavaScript sidebar to remove ALL v25.1 and newer version references +""" +import re +from pathlib import Path + +OFFLINE_SNAP = Path("/Users/eeshan/Documents/docs/src/current/offline_snap") + +def fix_file(file_path): + """Remove v25.1 and newer references from JavaScript sidebar""" + try: + content = file_path.read_text(encoding='utf-8') + original_content = content + + # Target versions to remove (anything newer than v19.2) + versions_to_remove = [ + 'v25.3', 'v25.2', 'v25.1', + 'v24.3', 'v24.2', 'v24.1', + 'v23.2', 'v23.1', + 'v22.2', 'v22.1', + 'v21.2', 'v21.1', + 'v20.2', 'v20.1' + ] + + for version in versions_to_remove: + # Remove URLs in arrays like "v25.1/some-page.html", + patterns = [ + r'"{}/[^"]*",?\s*'.format(version), # "v25.1/page.html", + r"'{}/[^']*',?\s*".format(version), # 'v25.1/page.html', + r'"{}"\s*:\s*"[^"]*",?\s*'.format(version), # "v25.1": "something", + r"'{}'\s*:\s*'[^']*',?\s*".format(version), # 'v25.1': 'something', + ] + + for pattern in patterns: + content = re.sub(pattern, '', content, flags=re.MULTILINE | re.DOTALL) + + # Clean up any leftover commas and formatting issues + content = re.sub(r',\s*,', ',', content) # Remove double commas + content = re.sub(r',\s*\]', ']', content) # Remove trailing commas before ] + content = re.sub(r',\s*\}', '}', content) # Remove trailing commas before } + content = re.sub(r'\[\s*,', '[', content) # Remove leading commas after [ + content = re.sub(r'\{\s*,', '{', content) # Remove leading commas after { + + if content != original_content: + file_path.write_text(content, encoding='utf-8') + return True + return False + + except Exception as e: + print(f"❌ Error processing {file_path}: {e}") + return False + +def main(): + """Fix all HTML files with JavaScript sidebars""" + if not OFFLINE_SNAP.exists(): + print(f"❌ Offline snap directory not found: {OFFLINE_SNAP}") + return + + print("🚀 Final cleanup: removing ALL v25.1+ references from JavaScript sidebars...") + + fixed_count = 0 + total_count = 0 + + # Process all HTML files + for html_file in OFFLINE_SNAP.rglob("*.html"): + # Only process files that likely contain JavaScript sidebars + file_content = html_file.read_text(encoding='utf-8') + if 'const sidebar = {' in file_content or 'v25.1' in file_content: + total_count += 1 + if fix_file(html_file): + fixed_count += 1 + if fixed_count <= 5: + print(f"✅ Fixed {html_file.name}") + + print(f"\n✅ Fixed {fixed_count} out of {total_count} files containing v25.1+ references") + + if fixed_count > 0: + print("\n🎯 All v25.1+ version references should now be removed from navigation!") + else: + print("\n⚠️ No v25.1+ references found to fix.") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/current/fix_navigation_quick.py b/src/current/fix_navigation_quick.py new file mode 100644 index 00000000000..feccfab6a09 --- /dev/null +++ b/src/current/fix_navigation_quick.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 +""" +Quick fix for the current navigation issue in generated files +""" +import os +import re +from pathlib import Path + +OFFLINE_SNAP = Path("/Users/eeshan/Documents/docs/src/current/offline_snap") + +def fix_html_file(file_path): + """Apply the quick navigation fix to current generated files""" + try: + content = file_path.read_text(encoding='utf-8') + + # Look for the current pattern in the generated files + old_pattern = '''// Clean up any double slashes + url = url.replace(/\/+/g, '/'); + + // Use relative path for portability + // Don't prepend baseUrl for relative navigation + if (!sidebar.baseUrl || sidebar.baseUrl === '') { + // Already relative, just return + } else if (sidebar.baseUrl.startsWith('file://')) { + // Legacy absolute path - convert to relative + url = url; + } else { + url = sidebar.baseUrl + url; + }''' + + # Insert our bulletproof logic BEFORE the baseUrl logic + new_pattern = '''// Clean up any double slashes + url = url.replace(/\/+/g, '/'); + + // BULLETPROOF offline navigation fix + var currentPath = window.location.pathname; + var offlineSnapIndex = currentPath.indexOf('/offline_snap/'); + if (offlineSnapIndex !== -1) { + // We're in the offline snap - calculate relative path to target + var currentFromSnap = currentPath.substring(offlineSnapIndex + '/offline_snap/'.length); + var currentParts = currentFromSnap.split('/').filter(function(p) { return p; }); + + // Remove the current filename to get directory path + currentParts.pop(); + + // Calculate how many ../ we need to get to offline_snap root + var upLevels = currentParts.length; + var upPath = ''; + for (var i = 0; i < upLevels; i++) { + upPath += '../'; + } + + // Target path is always relative to offline_snap root + url = upPath + url; + } + + // Use relative path for portability + // Don't prepend baseUrl for relative navigation + if (!sidebar.baseUrl || sidebar.baseUrl === '') { + // Already relative, just return + } else if (sidebar.baseUrl.startsWith('file://')) { + // Legacy absolute path - convert to relative + url = url; + } else { + url = sidebar.baseUrl + url; + }''' + + if old_pattern in content: + new_content = content.replace(old_pattern, new_pattern) + file_path.write_text(new_content, encoding='utf-8') + return True + else: + return False + + except Exception as e: + print(f"❌ Error fixing {file_path}: {e}") + return False + +def main(): + """Apply the quick navigation fix""" + if not OFFLINE_SNAP.exists(): + print(f"❌ Offline snap directory not found: {OFFLINE_SNAP}") + return + + print("🚀 Applying QUICK navigation fix to generated files...") + + fixed_count = 0 + total_count = 0 + + # Find all HTML files + for html_file in OFFLINE_SNAP.rglob("*.html"): + total_count += 1 + if fix_html_file(html_file): + fixed_count += 1 + if fixed_count <= 5: + print(f"✅ Fixed {html_file.name}") + + print(f"\n✅ Applied quick fix to {fixed_count} out of {total_count} HTML files") + if fixed_count > 0: + print("🎯 Navigation should now work perfectly!") + else: + print("⚠️ No files needed fixing - pattern may have changed") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/current/fix_remaining_v25_refs.py b/src/current/fix_remaining_v25_refs.py new file mode 100644 index 00000000000..391fbbb8b41 --- /dev/null +++ b/src/current/fix_remaining_v25_refs.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python3 +""" +Fix remaining v25.1 URL references in JSON-like structures +""" +import re +from pathlib import Path + +OFFLINE_SNAP = Path("/Users/eeshan/Documents/docs/src/current/offline_snap") + +def fix_file(file_path): + """Remove remaining v25.1 references from URL arrays""" + try: + content = file_path.read_text(encoding='utf-8') + original_content = content + + # Remove entire URL entries in arrays that reference v25.1 or newer + versions_to_remove = [ + 'v25.3', 'v25.2', 'v25.1', + 'v24.3', 'v24.2', 'v24.1', + 'v23.2', 'v23.1', + 'v22.2', 'v22.1', + 'v21.2', 'v21.1', + 'v20.2', 'v20.1' + ] + + for version in versions_to_remove: + # Pattern to match full URL entries like: + # "/v25.1/some-page.html" + # including the quotes and comma + patterns = [ + r'"/' + version + r'/[^"]*"(?:\s*,)?\s*', # "/v25.1/page.html", + r"'/" + version + r"/[^']*'(?:\s*,)?\s*", # '/v25.1/page.html', + ] + + for pattern in patterns: + content = re.sub(pattern, '', content, flags=re.MULTILINE) + + # Clean up empty arrays and trailing commas + content = re.sub(r'"urls":\s*\[\s*\]', '"urls": []', content) + content = re.sub(r',\s*\]', ']', content) + content = re.sub(r'\[\s*,', '[', content) + + if content != original_content: + file_path.write_text(content, encoding='utf-8') + return True + return False + + except Exception as e: + print(f"❌ Error processing {file_path}: {e}") + return False + +def main(): + """Fix remaining v25.1 references""" + if not OFFLINE_SNAP.exists(): + print(f"❌ Offline snap directory not found: {OFFLINE_SNAP}") + return + + print("🚀 Removing remaining v25.1+ URL references...") + + fixed_count = 0 + total_files = 0 + + # Look for files that still contain v25.1 references + for html_file in OFFLINE_SNAP.rglob("*.html"): + try: + content = html_file.read_text(encoding='utf-8') + if any(f'/{version}/' in content for version in ['v25.1', 'v24.1', 'v23.1']): + total_files += 1 + if fix_file(html_file): + fixed_count += 1 + if fixed_count <= 5: + print(f"✅ Fixed {html_file.name}") + except: + continue + + print(f"\n✅ Fixed {fixed_count} out of {total_files} files with remaining version references") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/current/fix_root_navigation.py b/src/current/fix_root_navigation.py new file mode 100644 index 00000000000..899d32210f0 --- /dev/null +++ b/src/current/fix_root_navigation.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python3 +""" +Fix navigation for root-level index.html and other root files +""" +import re +from pathlib import Path + +def fix_root_navigation(file_path): + """Fix navigation in root-level HTML files""" + try: + content = file_path.read_text(encoding='utf-8') + + # Check if this is a root-level file + archive_path = file_path.parent + relative_path = file_path.relative_to(archive_path) + + if len(relative_path.parts) != 1: + return False # Not a root file + + # Replace the broken detection with better logic for root files + broken_detection = """// Dynamic archive folder detection - FIXED + var offlineSnapIndex = -1; + var archiveFolder = ''; + + // Split the path and look for the archive folder + var pathParts = currentPath.split('/'); + + // Find the folder that's the parent of our known directories + for (var i = pathParts.length - 2; i >= 0; i--) { + var part = pathParts[i + 1]; + // Check if this part is one of our known directories + if (part === 'v19.2' || part === 'cockroachcloud' || + part === 'releases' || part === 'advisories' || + part === 'molt' || part === '_internal' || part === 'docs') { + // The previous part is our archive folder + if (pathParts[i]) { + archiveFolder = pathParts[i]; + offlineSnapIndex = currentPath.lastIndexOf('/' + archiveFolder + '/'); + break; + } + } + }""" + + improved_detection = """// Dynamic archive folder detection - FIXED FOR ROOT + var offlineSnapIndex = -1; + var archiveFolder = ''; + + // Split the path and look for the archive folder + var pathParts = currentPath.split('/'); + + // Special handling for root-level files (index.html at archive root) + // Check if current file is at root by looking for subdirectories in same folder + var isRootFile = false; + + // If the path doesn't contain any of our known directories, we might be at root + var hasKnownDir = false; + for (var j = 0; j < pathParts.length; j++) { + if (pathParts[j] === 'v19.2' || pathParts[j] === 'cockroachcloud' || + pathParts[j] === 'releases' || pathParts[j] === 'advisories' || + pathParts[j] === 'molt' || pathParts[j] === '_internal') { + hasKnownDir = true; + break; + } + } + + if (!hasKnownDir && pathParts.length > 0) { + // We're likely at root - the archive folder is the parent of this file + archiveFolder = pathParts[pathParts.length - 2] || pathParts[pathParts.length - 1]; + if (archiveFolder && archiveFolder.indexOf('.html') === -1) { + offlineSnapIndex = currentPath.lastIndexOf('/' + archiveFolder + '/'); + isRootFile = true; + } + } + + // If not a root file, use the standard detection + if (!isRootFile) { + for (var i = pathParts.length - 2; i >= 0; i--) { + var part = pathParts[i + 1]; + // Check if this part is one of our known directories + if (part === 'v19.2' || part === 'cockroachcloud' || + part === 'releases' || part === 'advisories' || + part === 'molt' || part === '_internal' || part === 'docs') { + // The previous part is our archive folder + if (pathParts[i]) { + archiveFolder = pathParts[i]; + offlineSnapIndex = currentPath.lastIndexOf('/' + archiveFolder + '/'); + break; + } + } + } + }""" + + new_content = content.replace(broken_detection, improved_detection) + + if new_content != content: + file_path.write_text(new_content, encoding='utf-8') + return True + + return False + + except Exception as e: + print(f"Error processing {file_path}: {e}") + return False + +def main(): + # Find archive folder + archive_folders = ['my_dynamic_archive', 'test_portable_docs', 'offline_snap'] + archive_path = None + + for folder in archive_folders: + if Path(folder).exists(): + archive_path = Path(folder) + break + + if not archive_path: + print("❌ No archive folder found") + return + + print(f"🔧 Fixing root navigation in {archive_path}") + + # Process root-level HTML files only + root_files = [f for f in archive_path.glob("*.html")] + + fixed_count = 0 + for html_file in root_files: + if fix_root_navigation(html_file): + fixed_count += 1 + print(f" ✅ Fixed: {html_file.name}") + + print(f"\n✅ Fixed {fixed_count} root-level files") + print("📁 Root navigation should now work correctly") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/current/make_navigation_dynamic.py b/src/current/make_navigation_dynamic.py new file mode 100644 index 00000000000..da0552f1f20 --- /dev/null +++ b/src/current/make_navigation_dynamic.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python3 +""" +Make archive navigation work with any folder name by replacing hardcoded offline_snap references +""" +import os +import re +import sys +from pathlib import Path + +def make_navigation_dynamic(file_path): + """Replace hardcoded offline_snap references with dynamic folder detection""" + try: + content = file_path.read_text(encoding='utf-8') + + # Pattern 1: Replace the hardcoded indexOf('/offline_snap/') with dynamic detection + pattern1 = r"var offlineSnapIndex = currentPath\.indexOf\('/offline_snap/'\);" + replacement1 = '''// Dynamic archive folder detection + var offlineSnapIndex = -1; + var archiveFolder = ''; + + // Method 1: Look for _internal folder pattern + var internalMatch = currentPath.match(/\\/([^\\/]+)\\/_internal\\//); + if (internalMatch) { + archiveFolder = internalMatch[1]; + offlineSnapIndex = currentPath.indexOf('/' + archiveFolder + '/'); + } else { + // Method 2: Look for archive structure (docs, v19.2, releases, etc.) + var archiveMatch = currentPath.match(/\\/([^\\/]+)\\/(docs|v19\\.2|releases|advisories|cockroachcloud|molt)\\//); + if (archiveMatch) { + archiveFolder = archiveMatch[1]; + offlineSnapIndex = currentPath.indexOf('/' + archiveFolder + '/'); + } + }''' + + # Pattern 2: Replace the hardcoded substring calculation + pattern2 = r"var currentFromSnap = currentPath\.substring\(offlineSnapIndex \+ '/offline_snap/'\.length\);" + replacement2 = "var currentFromSnap = currentPath.substring(offlineSnapIndex + ('/' + archiveFolder + '/').length);" + + # Apply replacements + new_content = re.sub(pattern1, replacement1, content, flags=re.MULTILINE) + new_content = re.sub(pattern2, replacement2, new_content, flags=re.MULTILINE) + + # Also fix comments that mention "offline_snap root" + new_content = new_content.replace('// Calculate how many ../ we need to get to offline_snap root', + '// Calculate how many ../ we need to get to archive root') + new_content = new_content.replace('// Target path is always relative to offline_snap root', + '// Target path is always relative to archive root') + + if new_content != content: + file_path.write_text(new_content, encoding='utf-8') + return True + + return False + + except Exception as e: + print(f"Error processing {file_path}: {e}") + return False + +def main(): + if len(sys.argv) > 1: + archive_path = Path(sys.argv[1]) + else: + archive_path = Path("offline_snap") + + if not archive_path.exists(): + print(f"❌ Archive folder {archive_path} not found!") + return + + print(f"🔧 Making navigation dynamic in: {archive_path}") + + # Find all HTML files + html_files = list(archive_path.rglob("*.html")) + + fixed_count = 0 + total_files = len(html_files) + + for i, html_file in enumerate(html_files): + if i % 100 == 0 and i > 0: + print(f"Progress: {i}/{total_files} ({i/total_files*100:.1f}%)") + + if make_navigation_dynamic(html_file): + fixed_count += 1 + + print(f"✅ Made navigation dynamic in {fixed_count} HTML files") + print(f"🎯 Archive can now be renamed to any folder name!") + print(f"📁 Navigation will auto-detect the archive folder and work correctly") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/current/make_navigation_dynamic_v2.py b/src/current/make_navigation_dynamic_v2.py new file mode 100644 index 00000000000..aee88e2a7ac --- /dev/null +++ b/src/current/make_navigation_dynamic_v2.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 +""" +Make archive navigation work with any folder name by replacing hardcoded offline_snap references +Version 2: Accepts target version as parameter to fix navigation properly +""" +import os +import re +import sys +from pathlib import Path + +def make_navigation_dynamic(file_path, target_version=None): + """Replace hardcoded offline_snap references with dynamic folder detection""" + try: + content = file_path.read_text(encoding='utf-8') + + # Determine the version pattern to use + if target_version: + # Use the specific version provided + version_pattern = target_version.replace('.', '\\.') + else: + # Use a generic pattern that matches any version (v followed by digits.digits) + version_pattern = r'v\d+\.\d+' + + # Pattern 1: Replace the hardcoded indexOf('/offline_snap/') with dynamic detection + pattern1 = r"var offlineSnapIndex = currentPath\.indexOf\('/offline_snap/'\);" + replacement1 = f'''// Dynamic archive folder detection + var offlineSnapIndex = -1; + var archiveFolder = ''; + + // Method 1: Look for _internal folder pattern + var internalMatch = currentPath.match(/\\/([^\\/]+)\\/_internal\\//); + if (internalMatch) {{ + archiveFolder = internalMatch[1]; + offlineSnapIndex = currentPath.indexOf('/' + archiveFolder + '/'); + }} else {{ + // Method 2: Look for archive structure (docs, {version_pattern}, releases, etc.) + var archiveMatch = currentPath.match(/\\/([^\\/]+)\\/(docs|{version_pattern}|releases|advisories|cockroachcloud|molt)\\//); + if (archiveMatch) {{ + archiveFolder = archiveMatch[1]; + offlineSnapIndex = currentPath.indexOf('/' + archiveFolder + '/'); + }} + }}''' + + # Pattern 2: Replace the hardcoded substring calculation + pattern2 = r"var currentFromSnap = currentPath\.substring\(offlineSnapIndex \+ '/offline_snap/'\.length\);" + replacement2 = "var currentFromSnap = currentPath.substring(offlineSnapIndex + ('/' + archiveFolder + '/').length);" + + # Apply replacements + new_content = re.sub(pattern1, replacement1, content, flags=re.MULTILINE) + new_content = re.sub(pattern2, replacement2, new_content, flags=re.MULTILINE) + + # Also fix comments that mention "offline_snap root" + new_content = new_content.replace('// Calculate how many ../ we need to get to offline_snap root', + '// Calculate how many ../ we need to get to archive root') + new_content = new_content.replace('// Target path is always relative to offline_snap root', + '// Target path is always relative to archive root') + + if new_content != content: + file_path.write_text(new_content, encoding='utf-8') + return True + + return False + + except Exception as e: + print(f"Error processing {file_path}: {e}") + return False + +def main(): + # Parse arguments + if len(sys.argv) < 2: + print("Usage: python3 make_navigation_dynamic_v2.py [version]") + print("Example: python3 make_navigation_dynamic_v2.py offline_snap v20.2") + sys.exit(1) + + archive_path = Path(sys.argv[1]) + target_version = sys.argv[2] if len(sys.argv) > 2 else None + + if not archive_path.exists(): + print(f"❌ Archive folder {archive_path} not found!") + return + + print(f"🔧 Making navigation dynamic in: {archive_path}") + if target_version: + print(f"📌 Using specific version: {target_version}") + else: + print(f"📌 Using generic version pattern (matches any vX.Y)") + + # Find all HTML files + html_files = list(archive_path.rglob("*.html")) + + fixed_count = 0 + total_files = len(html_files) + + for i, html_file in enumerate(html_files): + if i % 100 == 0 and i > 0: + print(f"Progress: {i}/{total_files} ({i/total_files*100:.1f}%)") + + if make_navigation_dynamic(html_file, target_version): + fixed_count += 1 + + print(f"✅ Made navigation dynamic in {fixed_count} HTML files") + print(f"🎯 Archive can now be renamed to any folder name!") + print(f"📁 Navigation will auto-detect the archive folder and work correctly") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/current/snapshot_relative.py b/src/current/snapshot_relative.py new file mode 100644 index 00000000000..54c5c51eefb --- /dev/null +++ b/src/current/snapshot_relative.py @@ -0,0 +1,1632 @@ +#!/usr/bin/env python3 +""" +Complete Offline Documentation Archiver for Jekyll CockroachDB Documentation +HYBRID VERSION - Combines vibrant sidebar styling, professional homepage, optimized assets, and improved navigation logic +""" +import re +import shutil +import requests +import os +import sys +from pathlib import Path +from urllib.parse import urlparse, urljoin +from bs4 import BeautifulSoup +import json +from datetime import datetime +import hashlib + +# Configuration +JEKYLL_ROOT = Path.cwd() +SITE_ROOT = JEKYLL_ROOT / "_site" +DOCS_ROOT = SITE_ROOT / "docs" +OUTPUT_ROOT = JEKYLL_ROOT / "offline_snap" + +# The pre-rendered sidebar file +SIDEBAR_HTML_PATH = DOCS_ROOT / "_internal" / "sidebar-v22.2.html" + +TARGET_VERSION = "v22.2" + +# Common pages to include +COMMON_PAGES = [ + "index.html", + "cockroachcloud/*.html", + "releases/*.html", + "advisories/*.html", + "molt/*.html", # Include molt folder + f"{TARGET_VERSION}/*.html" # Include the target version directory +] + +# Google Fonts +FONTS_CSS_URL = ( + "https://fonts.googleapis.com/css2?" + "family=Poppins:wght@400;600&" + "family=Source+Code+Pro&" + "family=Source+Sans+Pro:wght@300;400;600;700&" + "display=swap" +) + + +class OfflineArchiver: + def __init__(self): + self.sidebar_html = None + self.comprehensive_sidebar_html = None # Store comprehensive sidebar from cockroachcloud + self.processed_files = set() + self.missing_assets = set() + self.copied_assets = set() + self.total_broken_urls = 0 + self.total_removed_sections = 0 + + def log(self, message, level="INFO"): + """Enhanced logging with levels""" + timestamp = datetime.now().strftime("%H:%M:%S") + prefix = { + "INFO": "ℹ️ ", + "SUCCESS": "✅", + "WARNING": "⚠️ ", + "ERROR": "❌", + "DEBUG": "🔍" + }.get(level, "") + print(f"[{timestamp}] {prefix} {message}") + + def check_file_exists(self, url): + """Test if a file exists for a given URL""" + try: + if not url or url.startswith(('http://', 'https://', '#', 'mailto:', 'javascript:')): + return True # External/anchor links are always valid + + # Normalize URL to file path + file_url = str(url).strip() + + # Handle root/empty URLs + if file_url in ['/', '', 'index', 'index.html']: + return True # Root index always exists + + # Remove leading slash and docs prefix + if file_url.startswith('/docs/'): + file_url = file_url[6:] + elif file_url.startswith('docs/'): + file_url = file_url[5:] + file_url = file_url.lstrip('/') + + # Handle stable -> v2.1 + file_url = file_url.replace('/stable/', f'/{TARGET_VERSION}/') + file_url = file_url.replace('stable/', f'{TARGET_VERSION}/') + if file_url == 'stable': + file_url = TARGET_VERSION + + # Convert ${VERSION} placeholder + file_url = file_url.replace('${VERSION}', TARGET_VERSION) + + # Try multiple file path variations + possible_paths = [ + file_url, + file_url + '.html' if file_url and not file_url.endswith('.html') and '.' not in file_url.split('/')[-1] else None, + file_url + '/index.html' if file_url and not file_url.endswith('/') else None, + file_url.rstrip('/') + '.html' if file_url.endswith('/') else None + ] + + # Check if any variation exists in the source + for path in possible_paths: + if path: + try: + file_path = DOCS_ROOT / path + if file_path.exists(): + return True + except Exception: + continue + + # Special handling for common directories that should exist even if we can't verify individual files + if any(pattern in file_url for pattern in ['cockroachcloud/', 'releases/', 'advisories/']): + # For non-versioned directories, be more permissive + return True + + # File doesn't exist + return False + + except Exception as e: + # If there's any error checking, log it and assume false to be safe + self.log(f"Error checking file existence for {url}: {e}", "DEBUG") + return False + + def clean_sidebar_items(self, items_data): + """Clean the sidebar items array and count removed URLs""" + import re + removed_urls_count = 0 + removed_sections_count = 0 + + def clean_item(item, level=0): + nonlocal removed_urls_count, removed_sections_count + + if not isinstance(item, dict): + return item + + # Clean URLs if present + if 'urls' in item and item['urls']: + original_count = len(item['urls']) + valid_urls = [] + + for url in item['urls']: + try: + # Simple check - let the original check_file_exists handle everything + if url and self.check_file_exists(url): + valid_urls.append(url) + else: + removed_urls_count += 1 + if level == 0: # Only log for top-level items to reduce noise + self.log(f"Removing broken URL: {url}", "DEBUG") + except Exception as e: + # If there's an error checking the URL, skip it + removed_urls_count += 1 + if level == 0: + self.log(f"Removing problematic URL: {url} (error: {e})", "DEBUG") + + if valid_urls: + item['urls'] = valid_urls + else: + del item['urls'] + + # Clean child items if present + if 'items' in item and item['items']: + cleaned_items = [] + + for child in item['items']: + cleaned_child = clean_item(child, level + 1) + if cleaned_child is not None: + cleaned_items.append(cleaned_child) + + if cleaned_items: + item['items'] = cleaned_items + else: + del item['items'] + + # Decide whether to keep this item + has_urls = 'urls' in item and item['urls'] + has_children = 'items' in item and item['items'] + + # Only keep items that have actual content (URLs or children) + # Remove empty parents regardless of is_top_level status + if has_urls or has_children: + return item + else: + # Remove empty items completely + removed_sections_count += 1 + if level == 0: # Only log removal of top-level items to reduce noise + title = item.get('title', 'Unknown') + is_top_level = item.get('is_top_level', False) + self.log(f"Removing empty {'top-level ' if is_top_level else ''}section: '{title}' (no URLs or children)", "DEBUG") + return None + + # Clean the items array + cleaned_items = [] + + for item in items_data: + cleaned_item = clean_item(item) + if cleaned_item is not None: + cleaned_items.append(cleaned_item) + + return cleaned_items, removed_urls_count, removed_sections_count + + def js_to_json(self, js_text): + """Convert JavaScript object notation to valid JSON""" + try: + if not js_text or not js_text.strip(): + return "" + + # First pass - handle line by line for basic fixes + lines = js_text.split('\n') + fixed_lines = [] + + for line_num, line in enumerate(lines, 1): + try: + original_line = line + + # Remove comments first + if '//' in line: + # Only remove comments that aren't inside quotes + in_quotes = False + quote_char = None + comment_pos = -1 + + for i, char in enumerate(line): + if not in_quotes and char in ['"', "'"]: + in_quotes = True + quote_char = char + elif in_quotes and char == quote_char and (i == 0 or line[i-1] != '\\'): + in_quotes = False + quote_char = None + elif not in_quotes and char == '/' and i < len(line) - 1 and line[i+1] == '/': + comment_pos = i + break + + if comment_pos >= 0: + line = line[:comment_pos].rstrip() + + # Remove function definitions + line = re.sub(r':\s*function\s*\([^)]*\)\s*\{[^}]*\}', ': null', line) + + # Fix unquoted property names ONLY at start of line + stripped = line.strip() + if stripped and ':' in stripped and not stripped.startswith('"') and not stripped.startswith('[') and not stripped.startswith('{'): + match = re.match(r'^(\s*)([a-zA-Z_$][a-zA-Z0-9_$]*)(\s*:\s*)(.*)', line) + if match: + indent, prop_name, colon_part, rest = match.groups() + line = f'{indent}"{prop_name}"{colon_part}{rest}' + + # Remove trailing commas before } or ] + line = re.sub(r',(\s*[}\]])', r'\1', line) + + fixed_lines.append(line) + + except Exception as e: + self.log(f"Error processing line {line_num}: {e}", "DEBUG") + fixed_lines.append(line) # Use original line if processing fails + + result = '\n'.join(fixed_lines) + + # Second pass - safer character-by-character processing for quotes + final_result = [] + in_double_quotes = False + in_single_quotes = False + i = 0 + + while i < len(result): + try: + char = result[i] + + if char == '"' and not in_single_quotes: + in_double_quotes = not in_double_quotes + final_result.append(char) + elif char == "'" and not in_double_quotes: + if in_single_quotes: + # End of single-quoted string - convert to double quote + final_result.append('"') + in_single_quotes = False + else: + # Start of single-quoted string - convert to double quote + final_result.append('"') + in_single_quotes = True + elif char == '\\' and (in_single_quotes or in_double_quotes): + # Handle escape sequences + final_result.append(char) + if i + 1 < len(result): + i += 1 + final_result.append(result[i]) + else: + final_result.append(char) + + i += 1 + + except Exception as e: + self.log(f"Error processing character at position {i}: {e}", "DEBUG") + final_result.append(char) + i += 1 + + result = ''.join(final_result) + + # Handle undefined + result = re.sub(r'\bundefined\b', 'null', result) + + return result + + except Exception as e: + self.log(f"Error in js_to_json: {e}", "WARNING") + return "" + + def find_matching_bracket(self, text, start_pos): + """Find the matching closing bracket for an opening bracket at start_pos""" + if start_pos >= len(text) or text[start_pos] != '[': + return -1 + + count = 0 + in_string = False + escape_next = False + quote_char = None + + for i in range(start_pos, len(text)): + char = text[i] + + if escape_next: + escape_next = False + continue + + if char == '\\': + escape_next = True + continue + + if not in_string: + if char in ['"', "'"]: + in_string = True + quote_char = char + elif char == '[': + count += 1 + elif char == ']': + count -= 1 + if count == 0: + return i + else: + if char == quote_char: + in_string = False + quote_char = None + + return -1 + + def clean_sidebar_in_html(self, html_content): + """Clean the JavaScript sidebar items array in HTML content""" + # Look for the sidebar JavaScript object + sidebar_start = html_content.find('const sidebar = {') + if sidebar_start == -1: + return html_content, 0 + + # Find the items: part + items_start = html_content.find('items:', sidebar_start) + if items_start == -1: + return html_content, 0 + + # Find the opening bracket of the items array + array_start = html_content.find('[', items_start) + if array_start == -1: + return html_content, 0 + + # Find the matching closing bracket + array_end = self.find_matching_bracket(html_content, array_start) + if array_end == -1: + # Try to find just the next ]; as fallback + fallback_end = html_content.find('];', array_start) + if fallback_end != -1: + array_end = fallback_end + else: + return html_content, 0 + + # Extract the items array + items_str = html_content[array_start:array_end + 1] + + try: + # Convert JavaScript to JSON + json_str = self.js_to_json(items_str) + if not json_str.strip(): + return html_content, 0 + + items_data = json.loads(json_str) + + # Clean the items + cleaned_items, removed_urls_count, removed_sections_count = self.clean_sidebar_items(items_data) + + # Convert back to JSON string + cleaned_json = json.dumps(cleaned_items, indent=2) + + # Replace in the original HTML + new_html = html_content[:array_start] + cleaned_json + html_content[array_end + 1:] + + if removed_urls_count > 0 or removed_sections_count > 0: + self.log(f"Cleaned sidebar: {removed_urls_count} broken URLs, {removed_sections_count} empty sections removed", "SUCCESS") + + return new_html, removed_urls_count + removed_sections_count + + except json.JSONDecodeError as e: + self.log(f"JSON parsing failed in sidebar cleaning: {e}", "WARNING") + self.log(f"Problematic JSON snippet: {json_str[:200] if 'json_str' in locals() else 'N/A'}...", "DEBUG") + return html_content, 0 + + except Exception as e: + self.log(f"Error cleaning sidebar: {e}", "WARNING") + self.log(f"Error type: {type(e).__name__}", "DEBUG") + return html_content, 0 + + def load_sidebar(self): + """Load and prepare the sidebar HTML""" + self.log(f"Loading sidebar from: {SIDEBAR_HTML_PATH}") + + if SIDEBAR_HTML_PATH.exists(): + self.sidebar_html = SIDEBAR_HTML_PATH.read_text(encoding="utf-8") + else: + # Try alternative locations + alt_paths = [ + DOCS_ROOT / "_internal" / "sidebar-v22.2.html", + SITE_ROOT / "_internal" / "sidebar-v22.2.html", + ] + + for alt_path in alt_paths: + if alt_path.exists(): + self.log(f"Found sidebar at: {alt_path}", "SUCCESS") + self.sidebar_html = alt_path.read_text(encoding="utf-8") + break + + if self.sidebar_html: + # Clean the sidebar using our working method + self.log("Cleaning sidebar data (removing broken links)...") + cleaned_sidebar, removed_count = self.clean_sidebar_in_html(self.sidebar_html) + self.sidebar_html = cleaned_sidebar + self.total_broken_urls += removed_count + + # Simplify isVersionDirectory function for v2.1 only + self.sidebar_html = re.sub( + r'isVersionDirectory:\s*function\s*\([^}]*\{[^}]*\}', + 'isVersionDirectory: function (d) { return d === "v2.1" || d === "stable"; }', + self.sidebar_html + ) + + # Clean the sidebar HTML of any Ask AI elements + sidebar_soup = BeautifulSoup(self.sidebar_html, "html.parser") + + # Remove Ask AI elements from sidebar + remove_selectors = [ + '.ask-ai', '#ask-ai', '[data-ask-ai]', '.kapa-widget', + '[class*="kapa"]', '[id*="kapa"]', 'script[src*="kapa"]', + '[class*="ask-ai"]', '[id*="ask-ai"]', + # Remove search elements that won't work offline + '.search', '#search', '.search-bar', '.search-input', '.search-form', + '[class*="search"]', '[id*="search"]', 'input[type="search"]', + '.algolia-search', '.docsearch', '[class*="docsearch"]', + # Target forms and inputs with search-related attributes + 'form[action*="search"]', 'input[placeholder*="Search" i]', + 'input[placeholder*="search" i]', 'input[name="query"]', + 'form[action="/docs/search"]', 'form[action*="/search"]' + ] + + for selector in remove_selectors: + for elem in sidebar_soup.select(selector): + elem.decompose() + + # Remove scripts that might initialize Ask AI + for script in sidebar_soup.find_all('script'): + if script.string and any(term in script.string.lower() for term in ['kapa', 'askai', 'ask-ai']): + script.decompose() + + # Pre-process sidebar links to normalize paths + for a in sidebar_soup.find_all('a', href=True): + href = a.get('href') + + # Skip if no href or external links + if not href or href.startswith(('http://', 'https://', '#', 'mailto:')): + continue + + # First handle stable -> v2.1 + if 'stable' in href: + href = href.replace('/stable/', f'/{TARGET_VERSION}/') + href = href.replace('stable/', f'{TARGET_VERSION}/') + if href == 'stable': + href = TARGET_VERSION + + # Remove /docs/ prefix if present (but keep everything after) + if href.startswith('/docs/'): + href = href[6:] + elif href.startswith('docs/'): + href = href[5:] + + # Remove leading slash + href = href.lstrip('/') + + # Update the href + a['href'] = href + + self.sidebar_html = str(sidebar_soup) + return True + + self.log("Sidebar not found", "WARNING") + return False + + def extract_comprehensive_sidebar(self, html): + """Extract comprehensive sidebar JavaScript from cockroachcloud pages and ensure correct format""" + try: + # Simple extraction - find the sidebar object + sidebar_start = html.find('const sidebar = {') + if sidebar_start == -1: + self.log("No sidebar JavaScript found in cockroachcloud page", "DEBUG") + return + + # Find end with simple pattern + sidebar_end = html.find('};\n', sidebar_start) + if sidebar_end == -1: + sidebar_end = html.find('};', sidebar_start) + if sidebar_end == -1: + self.log("Could not find end of sidebar JavaScript", "DEBUG") + return + + # Extract the sidebar JavaScript + comprehensive_sidebar_js = html[sidebar_start:sidebar_end + 2] + + self.log("Extracted comprehensive sidebar from cockroachcloud page", "SUCCESS") + self.log(f"Raw sidebar preview (first 300 chars): {comprehensive_sidebar_js[:300]}...", "DEBUG") + + # CRITICAL: Fix baseUrl to match original format + # The original script uses baseUrl: "" but comprehensive sidebar has baseUrl: "/docs" + if 'baseUrl: "/docs"' in comprehensive_sidebar_js: + comprehensive_sidebar_js = comprehensive_sidebar_js.replace('baseUrl: "/docs"', 'baseUrl: ""') + self.log("✓ Fixed baseUrl from '/docs' to empty string", "DEBUG") + elif 'baseUrl:"/docs"' in comprehensive_sidebar_js: + comprehensive_sidebar_js = comprehensive_sidebar_js.replace('baseUrl:"/docs"', 'baseUrl:""') + self.log("✓ Fixed baseUrl from '/docs' to empty string", "DEBUG") + + # DIRECT FIX: Replace the broken URL processing with working offline logic + # The comprehensive sidebar contains web-based URL processing that strips .html extensions + # This breaks offline navigation, so we replace it with proper offline logic + + # Always apply fix for comprehensive sidebar since it has web-based URL processing + if comprehensive_sidebar_js and len(comprehensive_sidebar_js) > 100: + self.log("🔍 Found broken URL processing in comprehensive sidebar - fixing it", "DEBUG") + + # COMPREHENSIVE FIX: Replace the entire URL processing section + # Look for the pattern that indicates URL processing + + # First try the exact broken line + broken_line = 'url = sidebar.baseUrl + url.replace("/index.html", "").replace(".html", "");' + + if broken_line in comprehensive_sidebar_js: + working_replacement = '''// Remove /docs/ prefix if present + url = url.replace(/^\\/docs\\//, '').replace(/^docs\\//, ''); + + // Handle root/home URLs + if (url === '/' || url === '' || url === 'index' || url === 'index.html') { + var currentPath = window.location.pathname; + var pathMatch = currentPath.match(/(cockroachcloud|v19\\.2|releases|advisories)\\/[^\\/]+$/); + if (pathMatch) { + url = '../index.html'; + } else { + url = 'index.html'; + } + } else { + if (url.startsWith('/')) { + url = url.substring(1); + } + url = url.replace(/^stable\\//, 'v2.1/').replace(/\\/stable\\//, '/v2.1/'); + + var currentPath = window.location.pathname; + + // BULLETPROOF offline navigation fix + var offlineSnapIndex = currentPath.indexOf('/offline_snap/'); + if (offlineSnapIndex !== -1) { + // We're in the offline snap - calculate relative path to target + var currentFromSnap = currentPath.substring(offlineSnapIndex + '/offline_snap/'.length); + var currentParts = currentFromSnap.split('/').filter(function(p) { return p; }); + + // Remove the current filename to get directory path + currentParts.pop(); + + // Calculate how many ../ we need to get to offline_snap root + var upLevels = currentParts.length; + var upPath = ''; + for (var i = 0; i < upLevels; i++) { + upPath += '../'; + } + + // Target path is always relative to offline_snap root + url = upPath + url; + } + } + url = url.replace(/\\/+/g, '/'); + url = sidebar.baseUrl + url;''' + + comprehensive_sidebar_js = comprehensive_sidebar_js.replace(broken_line, working_replacement) + self.log("✅ Successfully replaced broken URL processing line", "SUCCESS") + else: + # The comprehensive sidebar doesn't have the problematic line + # Instead, we need to replace the simple URL assignment + simple_assignment = 'url = sidebar.baseUrl + url;' + + if simple_assignment in comprehensive_sidebar_js: + # We need to insert the directory logic BEFORE this assignment + enhanced_replacement = '''// BULLETPROOF offline navigation fix + var currentPath = window.location.pathname; + + // Find the offline_snap directory position + var offlineSnapIndex = currentPath.indexOf('/offline_snap/'); + if (offlineSnapIndex !== -1) { + // We're in the offline snap - calculate relative path to target + var currentFromSnap = currentPath.substring(offlineSnapIndex + '/offline_snap/'.length); + var currentParts = currentFromSnap.split('/').filter(function(p) { return p; }); + + // Remove the current filename to get directory path + currentParts.pop(); + + // Calculate how many ../ we need to get to offline_snap root + var upLevels = currentParts.length; + var upPath = ''; + for (var i = 0; i < upLevels; i++) { + upPath += '../'; + } + + // Target path is always relative to offline_snap root + url = upPath + url; + } + url = sidebar.baseUrl + url;''' + + comprehensive_sidebar_js = comprehensive_sidebar_js.replace(simple_assignment, enhanced_replacement) + self.log("✅ Enhanced comprehensive sidebar with same-directory navigation logic", "SUCCESS") + else: + self.log("⚠️ Could not find URL assignment pattern to enhance", "WARNING") + self.log("✅ Fixed comprehensive sidebar URL processing for offline use", "SUCCESS") + fixed_sidebar = comprehensive_sidebar_js + else: + # Fallback to original processing + self.log("🔍 No broken URL processing found, using standard fix", "DEBUG") + fixed_sidebar = self.fix_sidebar_javascript(comprehensive_sidebar_js) + + cleaned_sidebar, removed_count = self.clean_sidebar_in_html(fixed_sidebar) + if removed_count > 0: + self.log(f"Cleaned {removed_count} broken URLs from comprehensive sidebar", "DEBUG") + fixed_sidebar = cleaned_sidebar + + # Store it + self.comprehensive_sidebar_html = fixed_sidebar + self.log(f"Final sidebar preview (first 300 chars): {fixed_sidebar[:300]}...", "DEBUG") + + except Exception as e: + self.log(f"Error extracting comprehensive sidebar: {e}", "ERROR") + + def ensure_asset(self, name, local_candidates, url, dest_dir): + """Ensure an asset exists, downloading if necessary""" + dest_dir.mkdir(parents=True, exist_ok=True) + dst = dest_dir / name + + # Try local candidates first + for candidate in local_candidates: + p = Path(candidate) + if p.exists() and p.resolve() != dst.resolve(): + shutil.copy(p, dst) + self.log(f"Asset copied: {name}", "SUCCESS") + return + + # Download if not found locally + if not dst.exists(): + try: + self.log(f"Downloading {name}...") + resp = requests.get(url, timeout=10) + resp.raise_for_status() + dst.write_bytes(resp.content) + self.log(f"Downloaded: {name}", "SUCCESS") + except Exception as e: + self.log(f"Failed to download {name}: {e}", "ERROR") + + def copy_selective_assets(self): + """Copy only necessary assets, excluding non-target version assets""" + self.log("\n--- Copying Selective Assets ---") + + # Copy global assets (always needed) + for asset_dir in ["css", "js", "img"]: + src = SITE_ROOT / asset_dir + if src.exists(): + dst = OUTPUT_ROOT / asset_dir + shutil.copytree(src, dst, dirs_exist_ok=True) + self.log(f"Copied global {asset_dir}/", "SUCCESS") + + # Copy docs-specific assets (base level) + for asset_dir in ["css", "js", "_internal"]: + src = DOCS_ROOT / asset_dir + if src.exists(): + dst = OUTPUT_ROOT / asset_dir + shutil.copytree(src, dst, dirs_exist_ok=True) + self.log(f"Copied docs {asset_dir}/", "SUCCESS") + + # Handle images selectively - include target version, v2.1 (legacy), and global images + images_src = DOCS_ROOT / "images" + if images_src.exists(): + images_dst = OUTPUT_ROOT / "images" + images_dst.mkdir(parents=True, exist_ok=True) + + copied_count = 0 + skipped_count = 0 + + for img_file in images_src.rglob("*"): + if img_file.is_file(): + rel_path = img_file.relative_to(images_src) + + # Skip version-specific images except TARGET_VERSION and v2.1 (legacy) + path_parts = rel_path.parts + if (len(path_parts) > 0 and + path_parts[0].startswith('v') and + path_parts[0] != TARGET_VERSION and + path_parts[0] not in ['v2.1']): # v2.1 images are always included for legacy support + skipped_count += 1 + continue + + # Copy allowed images + dst_file = images_dst / rel_path + dst_file.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(img_file, dst_file) + copied_count += 1 + + self.log(f"Images: copied {copied_count}, skipped {skipped_count} version-specific files", "SUCCESS") + + # Copy version-specific assets only for TARGET_VERSION + version_dirs = [TARGET_VERSION] # Only process our target version + + for version in version_dirs: + version_src = DOCS_ROOT / version + if version_src.exists(): + # Copy version-specific images if they exist + version_images = version_src / "images" + if version_images.exists(): + version_images_dst = OUTPUT_ROOT / version / "images" + shutil.copytree(version_images, version_images_dst, dirs_exist_ok=True) + self.log(f"Copied {version}/images/", "SUCCESS") + + # Copy other version-specific assets + for asset_type in ["css", "js", "_internal"]: + version_asset = version_src / asset_type + if version_asset.exists(): + version_asset_dst = OUTPUT_ROOT / version / asset_type + shutil.copytree(version_asset, version_asset_dst, dirs_exist_ok=True) + self.log(f"Copied {version}/{asset_type}/", "SUCCESS") + + def fix_sidebar_javascript(self, html): + """Fix the embedded sidebar JavaScript configuration and URL processing (ORIGINAL WORKING VERSION)""" + + # Fix 1: Replace baseUrl in the embedded sidebar configuration + # For offline file:// URLs, use absolute path to offline_snap directory + offline_snap_path = "" # Use relative paths for portability + html = re.sub( + r'baseUrl:\s*["\'][^"\']*["\']', + f'baseUrl: "{offline_snap_path}"', + html + ) + + # Fix 2: Find and replace the URL processing logic + # Look for the specific URL processing pattern in the JavaScript + url_processing_pattern = r'(if \(!/\^https\?:/.test\(url\)\) \{\s*url = sidebar\.baseUrl \+ url\.replace\([^}]+\}\s*return url;)' + + # More robust pattern that captures the entire URL processing block + # Fixed pattern to match comprehensive sidebar format exactly + better_pattern = r'(const urls = \(item\.urls \|\| \[\]\)\.map\(function \(url\) \{[\s\S]*?)(if \(!/\^https\?:/.test\(url\)\) \{[\s\S]*?url = sidebar\.baseUrl \+ url\.replace\([^}]+\}[\s\S]*?)(return url;[\s\S]*?\}\);)' + + def replace_url_processing(match): + start_part = match.group(1) + end_part = match.group(3) + + # Simplified URL processing for offline file:// URLs with absolute baseUrl + new_processing = r'''if (!/^https?:/.test(url)) { + // Remove /docs/ prefix if present + url = url.replace(/^\/docs\//, '').replace(/^docs\//, ''); + + // Remove leading slash to make it relative + if (url.startsWith('/')) { + url = url.substring(1); + } + + // Handle stable -> v2.1 conversion + url = url.replace(/^stable\//, 'v2.1/').replace(/\/stable\//, '/v2.1/'); + + // Handle root/home URLs + if (url === '' || url === 'index' || url === 'index.html') { + url = 'index.html'; + } + + // Clean up any double slashes + url = url.replace(/\/+/g, '/'); + + // Use relative path for portability + // Don't prepend baseUrl for relative navigation + if (!sidebar.baseUrl || sidebar.baseUrl === '') { + // Already relative, just return + } else if (sidebar.baseUrl.startsWith('file://')) { + // Legacy absolute path - convert to relative + url = url; + } else { + url = sidebar.baseUrl + url; + } + }''' + + return start_part + new_processing + end_part + + # Try to apply the replacement - use global replacement to catch all instances + new_html = html + matches_found = 0 + def count_replacements(match): + nonlocal matches_found + matches_found += 1 + return replace_url_processing(match) + + new_html = re.sub(better_pattern, count_replacements, html, flags=re.DOTALL) + + if matches_found > 0: + self.log(f"✅ Applied comprehensive URL processing replacement ({matches_found} matches)", "SUCCESS") + else: + self.log("⚠️ Comprehensive URL processing pattern not found", "WARNING") + + # If that didn't work, try direct replacement of the .html stripping pattern + # This is the most important fix for comprehensive sidebar + if new_html == html: + # Direct pattern matching for comprehensive sidebar format - handle spacing + new_html = re.sub( + r'url\s*=\s*sidebar\.baseUrl\s*\+\s*url\.replace\s*\(\s*"/index\.html"\s*,\s*""\s*\)\.replace\s*\(\s*"\.html"\s*,\s*""\s*\)\s*;', + 'url = sidebar.baseUrl + url.replace("/index.html", ""); // Keep .html for offline', + html + ) + if new_html != html: + self.log("Applied direct .html preservation fix to comprehensive sidebar", "DEBUG") + + # Also fix the .html stripping issue - replace the line that removes .html extensions + # The main pattern we need to fix is: + # url = sidebar.baseUrl + url.replace("/index.html", "").replace(".html", ""); + + # FINAL FIX: Simple string replacement to ensure .html extensions are preserved + old_text = 'url = sidebar.baseUrl + url.replace("/index.html", "").replace(".html", "");' + new_text = 'url = sidebar.baseUrl + url.replace("/index.html", ""); // Keep .html for offline' + + # Apply the fix regardless of previous replacements + new_html = new_html.replace(old_text, new_text) + + if old_text in html and old_text not in new_html: + self.log("✅ Fixed .html stripping with simple string replacement", "SUCCESS") + elif old_text in html: + self.log("⚠️ Failed to replace .html stripping pattern", "WARNING") + else: + self.log("ℹ️ No .html stripping pattern found to fix", "INFO") + + # If the complex pattern didn't match, try a simpler approach + if new_html == html: + self.log("Trying simple pattern replacement as fallback", "DEBUG") + # Simple pattern - just replace the specific problematic line + simple_pattern = r'url = sidebar\.baseUrl \+ url\.replace\([^}]+\}' + + simple_replacement = r'''// Custom offline URL processing + url = url.replace(/^\/docs\//, '').replace(/^docs\//, ''); + + // Handle root/home URLs + if (url === '/' || url === '' || url === 'index' || url === 'index.html') { + var currentPath = window.location.pathname; + var pathMatch = currentPath.match(/(cockroachcloud|v19\.2|releases|advisories)\/[^\/]+$/); + if (pathMatch) { + url = '../index.html'; + } else { + url = 'index.html'; + } + } else { + var currentPath = window.location.pathname; + var currentDir = ''; + + var pathMatch = currentPath.match(/(cockroachcloud|v19\.2|releases|advisories)\/[^\/]+$/); + if (pathMatch) { + currentDir = pathMatch[1]; + } else { + var pathParts = currentPath.split('/').filter(function(part) { return part; }); + for (var i = pathParts.length - 2; i >= 0; i--) { + if (pathParts[i] === 'cockroachcloud' || pathParts[i] === 'v2.1' || + pathParts[i] === 'releases' || pathParts[i] === 'advisories') { + currentDir = pathParts[i]; + break; + } + } + } + + if (url.startsWith('/')) { + url = url.substring(1); + } + + url = url.replace(/^stable\//, 'v2.1/').replace(/\/stable\//, '/v2.1/'); + + // Handle cross-directory URLs (releases, cockroachcloud, advisories) + if (url.startsWith('releases/') || url.startsWith('cockroachcloud/') || url.startsWith('advisories/')) { + // These should go up from v2.1 directory to the root level + if (currentDir === 'v2.1') { + url = '../' + url; + } + } + + // BULLETPROOF offline navigation fix + var offlineSnapIndex = currentPath.indexOf('/offline_snap/'); + if (offlineSnapIndex !== -1) { + // We're in the offline snap - calculate relative path to target + var currentFromSnap = currentPath.substring(offlineSnapIndex + '/offline_snap/'.length); + var currentParts = currentFromSnap.split('/').filter(function(p) { return p; }); + + // Remove the current filename to get directory path + currentParts.pop(); + + // Calculate how many ../ we need to get to offline_snap root + var upLevels = currentParts.length; + var upPath = ''; + for (var i = 0; i < upLevels; i++) { + upPath += '../'; + } + + // Target path is always relative to offline_snap root + url = upPath + url; + } + } + + url = url.replace(/\/+/g, '/'); + // Keep .html extensions for offline use + }''' + + new_html = re.sub(simple_pattern, simple_replacement, html, flags=re.DOTALL) + + # Also fix the .html stripping issue - handle both patterns + new_html = re.sub( + r'url = url\.replace\("/index\.html", ""\)\.replace\("\.html", ""\);', + 'url = url.replace("/index.html", ""); // Keep .html for offline', + new_html + ) + new_html = re.sub( + r'url = sidebar\.baseUrl \+ url\.replace\("/index\.html", ""\)\.replace\("\.html", ""\);', + 'url = sidebar.baseUrl + url.replace("/index.html", ""); // Keep .html for offline', + new_html + ) + + # Debug output + if new_html != html: + self.log("Successfully replaced JavaScript URL processing", "DEBUG") + else: + self.log("Warning: JavaScript URL processing replacement may have failed", "WARNING") + + return new_html + + def get_vibrant_sidebar_styles(self, prefix): + """Return vibrant sidebar styles with #6933FF purple branding (FROM SCRIPT 1)""" + return f'''''' + + def process_html_file(self, src_path): + """Process a single HTML file with vibrant sidebar styling""" + import re # Import at the top to avoid UnboundLocalError + try: + rel_path = src_path.relative_to(DOCS_ROOT) + dst_path = OUTPUT_ROOT / rel_path + + # Calculate depth and prefix + depth = len(rel_path.parent.parts) + prefix = "../" * depth + + # Read content + html = src_path.read_text(encoding="utf-8") + + # Extract comprehensive sidebar from cockroachcloud pages FIRST (if not already done) + if not self.comprehensive_sidebar_html and 'cockroachcloud' in str(rel_path): + self.extract_comprehensive_sidebar(html) + + # SIMPLE APPROACH: If we have comprehensive sidebar, replace it. Otherwise use original logic. + if self.comprehensive_sidebar_html: + # Find and replace the sidebar JavaScript with our comprehensive version + sidebar_pattern = r'const sidebar = \{[\s\S]*?\};' + match = re.search(sidebar_pattern, html, flags=re.DOTALL) + if match: + # Use simple string replacement to avoid regex escape issues + original_sidebar = match.group(0) + + # FINAL FIX: Apply URL processing fix to comprehensive sidebar before applying it + fixed_comprehensive_sidebar = self.comprehensive_sidebar_html + + # Fix the .html stripping issue in the comprehensive sidebar + broken_line = 'url = sidebar.baseUrl + url.replace("/index.html", "").replace(".html", "");' + fixed_line = 'url = sidebar.baseUrl + url.replace("/index.html", ""); // Keep .html for offline' + + if broken_line in fixed_comprehensive_sidebar: + fixed_comprehensive_sidebar = fixed_comprehensive_sidebar.replace(broken_line, fixed_line) + self.log("🔧 Fixed .html stripping in comprehensive sidebar", "SUCCESS") + + # The simple fix above should be sufficient + + html = html.replace(original_sidebar, fixed_comprehensive_sidebar) + self.log(f"Applied comprehensive sidebar to {rel_path}", "DEBUG") + + # CRITICAL: Apply sidebar fixes AFTER comprehensive sidebar replacement + html = self.fix_sidebar_javascript(html) + + # Debug: check if "/" URL is present in replaced content + if '"/"' in self.comprehensive_sidebar_html: + self.log("✓ Root URL '/' found in comprehensive sidebar", "DEBUG") + else: + self.log("⚠ Root URL '/' NOT found in comprehensive sidebar", "WARNING") + else: + # No sidebar JS found, continue with normal processing + html = self.fix_sidebar_javascript(html) + cleaned_html, removed_count = self.clean_sidebar_in_html(html) + if removed_count > 0: + self.total_broken_urls += removed_count + html = cleaned_html + else: + # ORIGINAL LOGIC: Fix sidebar JavaScript BEFORE other processing + html = self.fix_sidebar_javascript(html) + + # Clean embedded sidebar JavaScript + cleaned_html, removed_count = self.clean_sidebar_in_html(html) + if removed_count > 0: + self.total_broken_urls += removed_count + html = cleaned_html + + # Inject sidebar HTML if available (ORIGINAL LOGIC) + if self.sidebar_html: + sidebar_to_inject = self.sidebar_html + # Try to inject into ul#sidebar first + ul_replaced = re.sub( + r"(]*id=\"sidebar\"[^>]*>)([^<]*)()", + rf"\1{sidebar_to_inject}\3", + html, + flags=re.IGNORECASE | re.DOTALL, + ) + + # If ul replacement worked, use it + if ul_replaced != html: + html = ul_replaced + else: + # Fallback to div#sidebar + html = re.sub( + r"(
]*>)(\s*?
)", + rf"\1{sidebar_to_inject}\2", + html, + flags=re.IGNORECASE, + ) + + # Parse with BeautifulSoup for additional cleanup + soup = BeautifulSoup(html, "html.parser") + + # Remove Ask AI widget and other unwanted elements + remove_selectors = [ + '.ask-ai', '#ask-ai', '[data-ask-ai]', '.ai-widget', '.kapa-widget', + 'script[src*="kapa"]', '#kapa-widget-container', '.kapa-trigger', + '.kapa-ai-button', '[class*="kapa"]', '[id*="kapa"]', + 'div[data-kapa-widget]', 'button[aria-label*="AI"]', + '[class*="ask-ai"]', '[id*="ask-ai"]', + 'iframe[src*="kapa"]', 'iframe[id*="kapa"]', + '.version-switcher', '#version-switcher', '.version-dropdown', + '.feedback-widget', '#feedback-widget', '[id*="feedback"]', + '.helpful-widget', '.page-helpful', + 'script[src*="googletagmanager"]', 'script[src*="google-analytics"]', + 'script[src*="segment"]', 'script[src*="heap"]', + # Remove search elements that won't work offline + '.search', '#search', '.search-bar', '.search-input', '.search-form', + '[class*="search"]', '[id*="search"]', 'input[type="search"]', + '.algolia-search', '.docsearch', '[class*="docsearch"]', + # Target forms and inputs with search-related attributes + 'form[action*="search"]', 'input[placeholder*="Search" i]', + 'input[placeholder*="search" i]', 'input[name="query"]', + 'form[action="/docs/search"]', 'form[action*="/search"]' + ] + + for selector in remove_selectors: + for elem in soup.select(selector): + elem.decompose() + + # Remove any script tags that contain kapa or AI-related code + for script in soup.find_all('script'): + if script.string and any(term in script.string.lower() for term in ['kapa', 'askai', 'ask-ai', 'aiwidget']): + script.decompose() + + # Remove any iframes that might be Ask AI related + for iframe in soup.find_all('iframe'): + src = iframe.get('src', '') + if src and any(term in src.lower() for term in ['kapa', 'ask', 'ai']): + iframe.decompose() + + # Fix any remaining anchor tags without href attributes + for a in soup.find_all('a'): + if not a.get('href'): + # Remove anchor tags without href or set a placeholder + if a.get_text().strip(): + # Convert to span if it has text content + span = soup.new_tag('span') + span.string = a.get_text() + a.replace_with(span) + else: + # Remove empty anchor tags + a.decompose() + + # Convert back to string + html = str(soup) + + # Clean up various path patterns + html = re.sub( + r"(src|href)=\"([^\"?]+)\?[^\" ]+\"", + lambda m: f'{m.group(1)}="{m.group(2)}"', + html, + ) + + # Fix various path patterns + html = re.sub(r'(href|src)="/docs/stable/', rf'\1="{TARGET_VERSION}/', html) + html = re.sub(r'(href|src)="docs/stable/', rf'\1="{TARGET_VERSION}/', html) + html = re.sub(r'(href|src)="/docs/(v\d+\.\d+/[^"]+)"', r'\1="\2"', html) + html = re.sub(r'(href|src)="docs/(v\d+\.\d+/[^"]+)"', r'\1="\2"', html) + html = re.sub(r'(href|src)="/docs/([^v][^"]+)"', r'\1="\2"', html) + html = re.sub(r'(href|src)="docs/([^v][^"]+)"', r'\1="\2"', html) + html = re.sub(r'(href|src)="/(?!/)([^"]+)"', r'\1="\2"', html) + + # Fix asset paths + for asset in ["css", "js", "images", "_internal"]: + html = re.sub( + rf"(src|href)=[\"']/{asset}/([^\"']+)[\"']", + rf'\1="{asset}/\2"', + html, + ) + + html = re.sub(r"(src|href)=[\"']/?img/([^\"']+)[\"']", r'\1="img/\2"', html) + html = re.sub(r"(src|href|xlink:href)=[\"']/?docs/images/([^\"']+)[\"']", r'\1="images/\2"', html) + + # Replace Google Fonts + html = re.sub( + r"]+fonts\.googleapis\.com[^>]+>", + f'', + html, + ) + + # Apply relative prefixes to asset paths + for asset in ["css", "js", "images", "_internal", "img"]: + html = re.sub( + rf'(src|href)="({asset}/[^"]+)"', + rf'\1="{prefix}\2"', + html, + ) + + # Inject navigation dependencies + nav_deps = f''' + + +''' + + html = re.sub(r"", nav_deps + "\n", html, flags=re.IGNORECASE) + + # Add vibrant sidebar styles (FROM SCRIPT 1) + offline_styles = self.get_vibrant_sidebar_styles(prefix) + html = re.sub(r"", offline_styles + "\n", html, flags=re.IGNORECASE) + + # Simple navgoco initialization (FROM SCRIPT 1) + nav_init = """""" + + html = re.sub(r"", nav_init + "\n", html, flags=re.IGNORECASE) + + # Write output + dst_path.parent.mkdir(parents=True, exist_ok=True) + dst_path.write_text(html, encoding="utf-8") + + self.processed_files.add(str(rel_path)) + + except Exception as e: + self.log(f"Error processing {src_path}: {e}", "ERROR") + self.log(f"Error type: {type(e).__name__}", "ERROR") + self.log(f"Error details: {str(e)}", "ERROR") + # Continue processing other files instead of crashing + import traceback + traceback.print_exc() + + def fix_css_images(self): + """Fix image paths in CSS files""" + self.log("Fixing CSS image paths...") + + for css_file in (OUTPUT_ROOT / "css").rglob("*.css"): + try: + content = css_file.read_text(encoding="utf-8") + + # Fix various image URL patterns + content = re.sub( + r"url\((['\"]?)/?docs/images/([^)\"']+)\1\)", + r"url(\1../images/\2\1)", + content, + ) + content = re.sub( + r"url\((['\"]?)images/([^)\"']+)\1\)", + r"url(\1../images/\2\1)", + content, + ) + + css_file.write_text(content, encoding="utf-8") + + except Exception as e: + self.log(f"Error fixing CSS {css_file}: {e}", "WARNING") + + def download_google_fonts(self): + """Download and localize Google Fonts""" + self.log("Downloading Google Fonts...") + + fonts_dir = OUTPUT_ROOT / "fonts" + fonts_dir.mkdir(exist_ok=True) + + try: + headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'} + css_response = requests.get(FONTS_CSS_URL, headers=headers, timeout=10) + css_response.raise_for_status() + css_content = css_response.text + + font_urls = set(re.findall(r"url\((https://fonts\.gstatic\.com/[^\)]+)\)", css_content)) + + for url in font_urls: + try: + font_response = requests.get(url, headers=headers, timeout=10) + font_response.raise_for_status() + + parsed = urlparse(url) + font_path = parsed.path.lstrip("/") + dst = fonts_dir / font_path + dst.parent.mkdir(parents=True, exist_ok=True) + dst.write_bytes(font_response.content) + + css_content = css_content.replace(url, f"../fonts/{font_path}") + + except Exception as e: + self.log(f"Failed to download font from {url}: {e}", "WARNING") + + (OUTPUT_ROOT / "css" / "google-fonts.css").write_text(css_content, encoding="utf-8") + self.log("Google Fonts localized", "SUCCESS") + + except Exception as e: + self.log(f"Error downloading fonts: {e}", "ERROR") + fallback = """/* Fallback fonts */ +body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Arial, sans-serif; } +code, pre { font-family: Consolas, Monaco, "Courier New", monospace; }""" + (OUTPUT_ROOT / "css" / "google-fonts.css").write_text(fallback) + + def create_professional_index_page(self): + """Add archived banner to existing index.html""" + index_path = OUTPUT_ROOT / "index.html" + + # Check if there's already an index.html file from the Jekyll build + if index_path.exists(): + # Read the existing content + html_content = index_path.read_text(encoding="utf-8") + + # Add the banner CSS to the head + banner_css = '''''' + + # Add the banner HTML + banner_html = ''' +
+
+

+ 📚 This is an archived version of the CockroachDB documentation. + View the latest documentation +

+
+
''' + + # Insert CSS before + html_content = html_content.replace('', banner_css + '\n') + + # Insert banner HTML after + html_content = html_content.replace('', '\n' + banner_html) + + # Write back the modified content + index_path.write_text(html_content, encoding="utf-8") + self.log("Added archived banner to existing index.html", "SUCCESS") + else: + self.log("No existing index.html found to modify", "WARNING") + + def build(self): + """Main build process with hybrid optimizations""" + print("\n" + "="*60) + print("🚀 COCKROACHDB OFFLINE DOCUMENTATION ARCHIVER (HYBRID+)") + print("="*60) + + # Verify paths + self.log(f"Jekyll Root: {JEKYLL_ROOT}") + self.log(f"Site Root: {SITE_ROOT}") + self.log(f"Docs Root: {DOCS_ROOT}") + self.log(f"Output: {OUTPUT_ROOT}") + self.log(f"Target Version: {TARGET_VERSION}") + + if not SITE_ROOT.exists(): + self.log("Site root not found! Run 'jekyll build' first.", "ERROR") + return False + + # Clean output directory + if OUTPUT_ROOT.exists(): + self.log("Cleaning existing output directory...") + shutil.rmtree(OUTPUT_ROOT) + OUTPUT_ROOT.mkdir(parents=True) + + # Use selective asset copying (FROM SCRIPT 2) + self.copy_selective_assets() + + # Ensure critical navigation assets + self.log("\n--- Ensuring Navigation Assets ---") + self.ensure_asset( + "jquery.min.js", + [DOCS_ROOT / "js" / "jquery.min.js", SITE_ROOT / "js" / "jquery.min.js"], + "https://code.jquery.com/jquery-3.6.3.min.js", + OUTPUT_ROOT / "js" + ) + self.ensure_asset( + "jquery.cookie.min.js", + [DOCS_ROOT / "js" / "jquery.cookie.min.js", SITE_ROOT / "js" / "jquery.cookie.min.js"], + "https://cdnjs.cloudflare.com/ajax/libs/jquery-cookie/1.4.1/jquery.cookie.min.js", + OUTPUT_ROOT / "js" + ) + self.ensure_asset( + "jquery.navgoco.min.js", + [DOCS_ROOT / "js" / "jquery.navgoco.min.js", SITE_ROOT / "js" / "jquery.navgoco.min.js"], + "https://raw.githubusercontent.com/tefra/navgoco/master/src/jquery.navgoco.js", + OUTPUT_ROOT / "js" + ) + self.ensure_asset( + "jquery.navgoco.css", + [DOCS_ROOT / "css" / "jquery.navgoco.css", SITE_ROOT / "css" / "jquery.navgoco.css"], + "https://raw.githubusercontent.com/tefra/navgoco/master/src/jquery.navgoco.css", + OUTPUT_ROOT / "css" + ) + + # Load sidebar + self.log("\n--- Loading Sidebar ---") + self.load_sidebar() + + # Process HTML files with stricter version filtering (FROM SCRIPT 2) + self.log("\n--- Processing HTML Files ---") + + files_to_process = [] + + # Only target version files + version_dir = DOCS_ROOT / TARGET_VERSION + if version_dir.exists(): + files_to_process.extend(list(version_dir.rglob("*.html"))) + self.log(f"Found {len(files_to_process)} files in {TARGET_VERSION}/", "SUCCESS") + + # Common pages (but exclude other version directories) + for pattern in COMMON_PAGES: + if '*' in pattern: + for file_path in DOCS_ROOT.glob(pattern): + # Skip other version directories + rel_path = file_path.relative_to(DOCS_ROOT) + if (rel_path.parts and + rel_path.parts[0].startswith('v') and + rel_path.parts[0] != TARGET_VERSION): + continue + files_to_process.append(file_path) + else: + file_path = DOCS_ROOT / pattern + if file_path.exists(): + files_to_process.append(file_path) + + # Remove duplicates and filter out unwanted versions + filtered_files = [] + for file_path in set(files_to_process): + rel_path = file_path.relative_to(DOCS_ROOT) + # Skip files from other version directories + if (rel_path.parts and + rel_path.parts[0].startswith('v') and + rel_path.parts[0] != TARGET_VERSION): + continue + filtered_files.append(file_path) + + files_to_process = filtered_files + self.log(f"Total files to process (after version filtering): {len(files_to_process)}") + + # Process each file with better error handling (FROM SCRIPT 2) + processed_count = 0 + error_count = 0 + + for i, file_path in enumerate(files_to_process, 1): + try: + if i % 25 == 0: + self.log(f"Progress: {i}/{len(files_to_process)} ({i*100//len(files_to_process)}%)") + + self.process_html_file(file_path) + processed_count += 1 + + except Exception as e: + error_count += 1 + self.log(f"Failed to process {file_path}: {e}", "ERROR") + # Continue with next file instead of crashing + continue + + self.log(f"Successfully processed {processed_count} files, {error_count} errors", "SUCCESS") + + # Final cleanup steps + self.log("\n--- Final Steps ---") + self.fix_css_images() + self.download_google_fonts() + self.create_professional_index_page() # FROM SCRIPT 2 + + # Enhanced summary + print("\n" + "="*60) + self.log("HYBRID ARCHIVE COMPLETE!", "SUCCESS") + self.log(f"Output directory: {OUTPUT_ROOT.resolve()}") + self.log(f"Total files: {len(self.processed_files)}") + self.log(f"Total broken URLs removed: {self.total_broken_urls}", "SUCCESS") + + # Navigation summary + if self.comprehensive_sidebar_html: + self.log("✅ Comprehensive sidebar extracted and applied to all pages", "SUCCESS") + else: + self.log("⚠️ No comprehensive sidebar found - using original individual processing", "WARNING") + + self.log("🟣 Vibrant #6933FF sidebar styling", "SUCCESS") + self.log("🏠 Professional homepage with archived banner", "SUCCESS") + self.log("🔗 ORIGINAL working navigation logic restored", "SUCCESS") + self.log("⚡ Selective asset copying for reduced size", "SUCCESS") + self.log("🔧 Robust error handling and progress reporting", "SUCCESS") + self.log("✅ JavaScript URL processing: ORIGINAL working version", "SUCCESS") + self.log("✅ Filtered out non-v2.1 version links (v25.1, v24.x, etc.)", "SUCCESS") + self.log("✅ Broken sidebar links removed from comprehensive sidebar", "SUCCESS") + + print(f"\n🎉 Hybrid offline site built in {OUTPUT_ROOT}") + print(f"\n📦 To test: open {OUTPUT_ROOT}/index.html in your browser") + print(f"\n🟣 Vibrant purple sidebar + professional homepage + improved navigation logic") + print(f"\n⚡ Optimized assets - excluded non-{TARGET_VERSION} files") + print(f"\n🔧 {self.total_broken_urls} broken sidebar URLs cleaned up") + print(f"\n✨ Best features from all scripts combined!") + + return True + + +def main(): + """Main entry point""" + try: + archiver = OfflineArchiver() + success = archiver.build() + sys.exit(0 if success else 1) + except KeyboardInterrupt: + print("\n\nArchiving cancelled by user.") + sys.exit(1) + except Exception as e: + print(f"\n❌ Fatal error: {e}") + import traceback + traceback.print_exc() + sys.exit(1) + + +if __name__ == "__main__": + main() \ No newline at end of file