Skip to content

Commit efb550a

Browse files
committed
Improve code formatting, language detection, and PDF processing efficiency
- Add comprehensive language detection for .env files and Solidity contracts - Fix code block formatting issues in generated documents - Remove redundant file information from document headers - Add chunked PDF processing to prevent memory exhaustion on large repositories - Implement PDF merging with pdfunite/pdftk for scalable document generation - Add configurable resource limits for Pandoc operations - Preserve future-ready API methods with allow(dead_code) annotations Key improvements: * Solidity files now properly detected as 'solidity' instead of 'javascript' * Environment files (.env, .env.local, etc.) get bash syntax highlighting * Fixed spaced text rendering by removing problematic LaTeX listings option * Large repositories can now generate PDFs through individual file processing * Memory-efficient processing prevents system resource exhaustion * Enhanced error handling for timeout and memory limit scenarios
1 parent a8a64d6 commit efb550a

6 files changed

Lines changed: 349 additions & 39 deletions

File tree

src/config.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ impl Default for IgnoreConfig {
108108

109109
impl Config {
110110
/// Load configuration from a file, falling back to defaults if not found
111+
#[allow(dead_code)]
111112
pub fn load_from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
112113
let config_path = path.as_ref();
113114

@@ -123,6 +124,7 @@ impl Config {
123124
}
124125

125126
/// Load configuration from the current directory or user's home directory
127+
#[allow(dead_code)]
126128
pub fn load_default() -> Result<Self> {
127129
// Try to load from current directory first
128130
let local_config = Path::new("scrollcast.toml");
@@ -143,6 +145,7 @@ impl Config {
143145
}
144146

145147
/// Save configuration to a file
148+
#[allow(dead_code)]
146149
pub fn save_to_file<P: AsRef<Path>>(&self, path: P) -> Result<()> {
147150
let content = toml::to_string_pretty(self)
148151
.context("Failed to serialize configuration")?;
@@ -152,6 +155,7 @@ impl Config {
152155
}
153156

154157
/// Create a sample configuration file
158+
#[allow(dead_code)]
155159
pub fn create_sample_config<P: AsRef<Path>>(path: P) -> Result<()> {
156160
let sample_config = Config {
157161
output: OutputConfig {
@@ -197,6 +201,7 @@ impl Config {
197201
}
198202

199203
/// Get theme mode as enum
204+
#[allow(dead_code)]
200205
pub fn get_theme_mode(&self) -> ThemeMode {
201206
match self.theme.mode.as_str() {
202207
"dark" => ThemeMode::Dark,
@@ -205,11 +210,13 @@ impl Config {
205210
}
206211

207212
/// Get output directory path
213+
#[allow(dead_code)]
208214
pub fn get_output_dir(&self) -> PathBuf {
209215
PathBuf::from(&self.output.folder)
210216
}
211217

212218
/// Get output filename with fallback
219+
#[allow(dead_code)]
213220
pub fn get_output_filename(&self, fallback: &str) -> String {
214221
self.output.filename
215222
.as_ref()
@@ -218,6 +225,7 @@ impl Config {
218225
}
219226

220227
/// Ensure output directory exists
228+
#[allow(dead_code)]
221229
pub fn ensure_output_dir(&self) -> Result<PathBuf> {
222230
let output_dir = self.get_output_dir();
223231

src/file_processor.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ impl FileProcessor {
3939
}
4040
}
4141

42+
#[allow(dead_code)]
4243
pub fn with_ignore_config(mut self, config: IgnoreConfig) -> Self {
4344
self.ignore_config = config;
4445
self
@@ -54,6 +55,7 @@ impl FileProcessor {
5455
self
5556
}
5657

58+
#[allow(dead_code)]
5759
pub fn load_ignore_config_from_path<P: AsRef<Path>>(mut self, path: P) -> Result<Self> {
5860
let ignore_file_path = path.as_ref().join("scrollcast.ignore");
5961
if ignore_file_path.exists() {

src/main.rs

Lines changed: 105 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -283,11 +283,11 @@ async fn main() -> Result<()> {
283283
let temp_markdown = temp_dir.join(format!("{}_temp.md", repo_name));
284284

285285
if needs_chunking {
286-
process_files_in_chunks(files, repo_name, effective_chunk_size, &temp_markdown, include_toc, verbose, memory_limit, max_file_size_mb).await
286+
process_files_in_chunks(&files, repo_name, effective_chunk_size, &temp_markdown, include_toc, verbose, memory_limit, max_file_size_mb).await
287287
.context("Failed to process files in chunks")?;
288288
} else {
289289
let markdown_generator = MarkdownGenerator::new(include_toc, true);
290-
let markdown_content = markdown_generator.generate_markdown(files, repo_name)
290+
let markdown_content = markdown_generator.generate_markdown(&files, repo_name)
291291
.context("Failed to generate markdown")?;
292292
fs::write(&temp_markdown, &markdown_content)
293293
.context("Failed to write temporary markdown file")?;
@@ -302,20 +302,42 @@ async fn main() -> Result<()> {
302302
}
303303

304304
// Configure Pandoc
305+
let use_chunked_pdf = matches!(output_format, OutputFormat::Pdf) &&
306+
(files.len() > 10 || total_size > 1_000_000);
307+
308+
if use_chunked_pdf && verbose {
309+
println!("📦 Using chunked PDF processing for better memory efficiency");
310+
}
311+
305312
let pandoc_config = PandocConfig {
306313
output_format,
307314
highlight_style: theme,
308315
include_toc,
309316
syntax_definitions: Vec::new(),
317+
use_chunked_pdf,
310318
};
311319

312320
let converter = PandocConverter::new(pandoc_config)
313321
.context("Failed to initialize Pandoc converter")?;
314322

315323
// Convert to final format
316324
println!("{}", "🔄 Converting to final format...".color(Color::Cyan));
317-
converter.convert_markdown_to_document(&temp_markdown, output_path, verbose).await
318-
.context("Failed to convert markdown to final format")?;
325+
326+
if use_chunked_pdf {
327+
// For chunked PDF processing, we need individual markdown files
328+
let temp_dir = std::env::temp_dir();
329+
let chunk_markdowns = generate_individual_markdowns(&files, repo_name, include_toc, &temp_dir, verbose)?;
330+
converter.convert_markdown_chunks_to_pdf(&chunk_markdowns, output_path, verbose).await
331+
.context("Failed to convert markdown chunks to PDF")?;
332+
333+
// Clean up temporary markdown files
334+
for markdown_file in &chunk_markdowns {
335+
let _ = fs::remove_file(markdown_file);
336+
}
337+
} else {
338+
converter.convert_markdown_to_document(&temp_markdown, output_path, verbose).await
339+
.context("Failed to convert markdown to final format")?;
340+
}
319341

320342
// Keep temporary file for debugging
321343
// let _ = fs::remove_file(&temp_markdown);
@@ -341,7 +363,7 @@ async fn main() -> Result<()> {
341363
}
342364

343365
async fn process_files_in_chunks(
344-
files: Vec<FileInfo>,
366+
files: &[FileInfo],
345367
repo_name: &str,
346368
chunk_size: usize,
347369
output_path: &Path,
@@ -360,7 +382,7 @@ async fn process_files_in_chunks(
360382
// Add table of contents for all files
361383
if include_toc {
362384
final_markdown.push_str("## Table of Contents\n\n");
363-
for file in &files {
385+
for file in files {
364386
let sanitized_path = file.path.replace(['/', '\\'], "-").replace('.', "-");
365387
let escaped_path = escape_markdown_special_chars(&file.path);
366388
final_markdown.push_str(&format!("- [{}](#{sanitized_path})\n", escaped_path));
@@ -371,7 +393,7 @@ async fn process_files_in_chunks(
371393
// Add file tree
372394
final_markdown.push_str("## File Structure\n\n");
373395
final_markdown.push_str("```\n");
374-
for file in &files {
396+
for file in files {
375397
final_markdown.push_str(&format!("{}\n", file.path));
376398
}
377399
final_markdown.push_str("```\n\n");
@@ -382,7 +404,7 @@ async fn process_files_in_chunks(
382404
// Process files in chunks
383405
let chunks: Vec<&[FileInfo]> = files.chunks(chunk_size).collect();
384406
let total_chunks = chunks.len();
385-
let mut global_page_number = 1; // Start after title/TOC page
407+
let mut _global_page_number = 1; // Start after title/TOC page
386408
let mut file_counter = 0;
387409

388410
for (chunk_index, chunk) in chunks.iter().enumerate() {
@@ -394,7 +416,7 @@ async fn process_files_in_chunks(
394416
// Process each file in the chunk
395417
for file in chunk.iter() {
396418
file_counter += 1;
397-
global_page_number += 1; // Each file gets a new page
419+
_global_page_number += 1; // Each file gets a new page
398420

399421
if verbose {
400422
sys.refresh_memory();
@@ -432,8 +454,7 @@ async fn process_files_in_chunks(
432454

433455
// Add file header with page numbers
434456
final_markdown.push_str(&format!("### {} {{#{sanitized_path}}}\n\n", escaped_path));
435-
final_markdown.push_str(&format!("**File:** {} | **Size:** {} | **File #{} | Page {}**\n\n",
436-
escaped_path, format_file_size(file.size), file_counter, global_page_number));
457+
final_markdown.push_str(&format!("**Size:** {}\n\n", format_file_size(file.size)));
437458

438459
if let Some(language) = &file.language {
439460
final_markdown.push_str(&format!("```{}\n", language));
@@ -443,15 +464,12 @@ async fn process_files_in_chunks(
443464

444465
final_markdown.push_str(&processed_content);
445466

446-
if !file.content.ends_with('\n') {
467+
// Ensure there's always a newline before closing backticks
468+
if !processed_content.ends_with('\n') {
447469
final_markdown.push('\n');
448470
}
449471

450472
final_markdown.push_str("```\n\n");
451-
452-
// Add file info with page numbering
453-
final_markdown.push_str(&format!("*File size: {} | File #{} of {} | Page {}*\n\n",
454-
format_file_size(file.size), file_counter, files.len(), global_page_number));
455473
final_markdown.push_str("---\n\n");
456474
}
457475

@@ -594,6 +612,77 @@ fn list_themes() -> Result<()> {
594612
Ok(())
595613
}
596614

615+
fn generate_individual_markdowns(files: &[FileInfo], repo_name: &str, include_toc: bool, temp_dir: &Path, verbose: bool) -> Result<Vec<PathBuf>> {
616+
let mut markdown_files = Vec::new();
617+
618+
// Generate title page markdown
619+
let title_markdown = temp_dir.join("scrollcast_title.md");
620+
let mut title_content = String::new();
621+
title_content.push_str(&format!("# {}\n\n", repo_name));
622+
title_content.push_str(&format!("Generated on: {}\n\n", chrono::Utc::now().format("%Y-%m-%d %H:%M:%S UTC")));
623+
624+
if include_toc {
625+
title_content.push_str("## Table of Contents\n\n");
626+
for file in files {
627+
let escaped_path = escape_markdown_special_chars(&file.path);
628+
title_content.push_str(&format!("- {}\n", escaped_path));
629+
}
630+
title_content.push_str("\n");
631+
}
632+
633+
// Add file tree
634+
title_content.push_str("## File Structure\n\n");
635+
title_content.push_str("```\n");
636+
for file in files {
637+
title_content.push_str(&format!("{}\n", file.path));
638+
}
639+
title_content.push_str("```\n\n");
640+
641+
fs::write(&title_markdown, &title_content)
642+
.context("Failed to write title markdown")?;
643+
markdown_files.push(title_markdown);
644+
645+
if verbose {
646+
println!(" 📄 Generated title page");
647+
}
648+
649+
// Generate individual file markdowns
650+
for (index, file) in files.iter().enumerate() {
651+
let file_markdown = temp_dir.join(format!("scrollcast_file_{}.md", index));
652+
let mut file_content = String::new();
653+
654+
let sanitized_path = file.path.replace(['/', '\\'], "-").replace('.', "-");
655+
let escaped_path = escape_markdown_special_chars(&file.path);
656+
file_content.push_str(&format!("## {} {{#{sanitized_path}}}\n\n", escaped_path));
657+
file_content.push_str(&format!("**Size:** {}\n\n", format_file_size(file.size)));
658+
659+
if let Some(language) = &file.language {
660+
file_content.push_str(&format!("```{}\n", language));
661+
} else {
662+
file_content.push_str("```\n");
663+
}
664+
665+
let processed_content = process_content_for_latex(&file.content);
666+
file_content.push_str(&processed_content);
667+
668+
if !processed_content.ends_with('\n') {
669+
file_content.push('\n');
670+
}
671+
672+
file_content.push_str("```\n\n");
673+
674+
fs::write(&file_markdown, &file_content)
675+
.context("Failed to write file markdown")?;
676+
markdown_files.push(file_markdown);
677+
678+
if verbose {
679+
println!(" 📄 Generated markdown for: {}", file.path);
680+
}
681+
}
682+
683+
Ok(markdown_files)
684+
}
685+
597686
fn list_languages() -> Result<()> {
598687
println!("{}", "Supported programming languages:".color(Color::Blue).bold());
599688

src/markdown_generator.rs

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ impl MarkdownGenerator {
2424
}
2525
}
2626

27-
pub fn generate_markdown(&self, files: Vec<FileInfo>, repo_name: &str) -> Result<String> {
27+
pub fn generate_markdown(&self, files: &[FileInfo], repo_name: &str) -> Result<String> {
2828
let mut markdown = String::new();
2929

3030
// Title and metadata
@@ -34,7 +34,7 @@ impl MarkdownGenerator {
3434
// Table of contents
3535
if self.include_toc {
3636
markdown.push_str("## Table of Contents\n\n");
37-
for file in &files {
37+
for file in files {
3838
let sanitized_path = file.path.replace(['/', '\\'], "-").replace('.', "-");
3939
let escaped_path = self.escape_markdown_special_chars(&file.path);
4040
markdown.push_str(&format!("- [{}](#{sanitized_path})\n", escaped_path));
@@ -53,20 +53,19 @@ impl MarkdownGenerator {
5353
// File contents
5454
markdown.push_str("## File Contents\n\n");
5555

56-
let total_files = files.len();
57-
let mut global_page_number = 1; // Start after title/TOC page
56+
let _total_files = files.len();
57+
let mut _global_page_number = 1; // Start after title/TOC page
5858

59-
for (file_index, file) in files.into_iter().enumerate() {
60-
let file_counter = file_index + 1;
61-
global_page_number += 1; // Each file gets a new page
59+
for (file_index, file) in files.iter().enumerate() {
60+
let _file_counter = file_index + 1;
61+
_global_page_number += 1; // Each file gets a new page
6262

6363
// Add page break before each file (except the first one)
6464
markdown.push_str("\n\\newpage\n\n");
6565
let sanitized_path = file.path.replace(['/', '\\'], "-").replace('.', "-");
6666
let escaped_path = self.escape_markdown_special_chars(&file.path);
6767
markdown.push_str(&format!("### {} {{#{sanitized_path}}}\n\n", escaped_path));
68-
markdown.push_str(&format!("**File:** {} | **Size:** {} | **File #{} | Page {}**\n\n",
69-
escaped_path, MarkdownGenerator::format_file_size(file.size), file_counter, global_page_number));
68+
markdown.push_str(&format!("**Size:** {}\n\n", MarkdownGenerator::format_file_size(file.size)));
7069

7170
if let Some(language) = &file.language {
7271
markdown.push_str(&format!("```{}\n", language));
@@ -78,15 +77,12 @@ impl MarkdownGenerator {
7877
let processed_content = self.process_content_for_latex(&file.content);
7978
markdown.push_str(&processed_content);
8079

81-
if !file.content.ends_with('\n') {
80+
// Ensure there's always a newline before closing backticks
81+
if !processed_content.ends_with('\n') {
8282
markdown.push('\n');
8383
}
8484

8585
markdown.push_str("```\n\n");
86-
87-
// Add file info with page numbering
88-
markdown.push_str(&format!("*File size: {} | File #{} of {} | Page {}*\n\n",
89-
MarkdownGenerator::format_file_size(file.size), file_counter, total_files, global_page_number));
9086
markdown.push_str("---\n\n");
9187
}
9288

@@ -135,6 +131,20 @@ impl MarkdownGenerator {
135131

136132
pub fn detect_language(file_path: &str) -> Option<String> {
137133
let path = Path::new(file_path);
134+
135+
// Handle special cases first
136+
if let Some(file_name) = path.file_name() {
137+
let file_name_str = file_name.to_string_lossy();
138+
// Handle .env files and variants like .env.local, .env.production, etc.
139+
if file_name_str.starts_with(".env") {
140+
return Some("bash".to_string());
141+
}
142+
// Handle Dockerfile variants
143+
if file_name_str.to_lowercase().starts_with("dockerfile") {
144+
return Some("dockerfile".to_string());
145+
}
146+
}
147+
138148
let extension = path.extension()?.to_str()?;
139149

140150
let language = match extension.to_lowercase().as_str() {
@@ -169,7 +179,8 @@ impl MarkdownGenerator {
169179
"r" => "r",
170180
"sql" => "sql",
171181
"dockerfile" => "dockerfile",
172-
"sol" => "javascript", // Use JavaScript highlighting for Solidity as fallback
182+
"env" => "bash", // Environment files use shell-like syntax
183+
"sol" => "solidity",
173184
"vy" => "python", // Vyper (use python highlighting as fallback)
174185
"move" => "rust", // Move language (use rust as fallback)
175186
_ => return None,
@@ -201,7 +212,7 @@ impl MarkdownGenerator {
201212
let mut current_line = String::new();
202213
let chars: Vec<char> = line.chars().collect();
203214

204-
for (i, &ch) in chars.iter().enumerate() {
215+
for (_i, &ch) in chars.iter().enumerate() {
205216
current_line.push(ch);
206217

207218
// Break at 100 characters or at natural breakpoints

0 commit comments

Comments
 (0)