diff --git a/.codetesting/AnalysisReport_20260125_220624_678.md b/.codetesting/AnalysisReport_20260125_220624_678.md deleted file mode 100644 index bef7b7d5..00000000 --- a/.codetesting/AnalysisReport_20260125_220624_678.md +++ /dev/null @@ -1,20 +0,0 @@ -# Test Failures due possible code bugs - -## Tests.csproj - Text_Grab.Utilities.UnitTests.WindowsAiUtilitiesTests.CleanRegexResult_OnlyOpeningFence_ReturnsPattern -- **Confidence**: High -- **Test File**: Tests\Utilities\WindowsAiUtilitiesTests.cs -- **Bug Location**: Text-Grab\Utilities\WindowsAiUtilities.cs@588-592 - -### Analysis -The production code has a logical error in the CleanRegexResult method. The Where clause at lines 588-592 filters out lines starting with 'Pattern:' (case-insensitive) BEFORE the Select clause at lines 593-601 can remove the 'pattern:' prefix. When the input is '```\npattern: [a-z]+', after removing the opening fence, we have 'pattern: [a-z]+'. This line gets filtered out by the Where clause because it starts with 'Pattern:' (case-insensitive), so the Select clause never gets a chance to remove the prefix. The method then returns the cleaned text as-is ('pattern: [a-z]+') instead of the extracted pattern ('[a-z]+'). The fix is to remove the filtering of 'Pattern:', 'Regex:', and 'Expression:' from the Where clause (lines 590-592), allowing the Select clause to handle prefix removal. - -### Suggested Fix -In the CleanRegexResult method at D:\source\TheJoeFin\Text-Grab\Text-Grab\Utilities\WindowsAiUtilities.cs, remove lines 590-592 from the Where clause. The Where clause should only filter out comment lines (lines starting with '//' or '#'), not descriptor lines like 'Regex:', 'Pattern:', or 'Expression:', since the subsequent Select clause is designed to handle removing these prefixes. The corrected Where clause should be: - -```csharp -.Where(line => !line.StartsWith("//", StringComparison.Ordinal) && - !line.StartsWith('#')) -``` - -This allows lines with 'pattern:', 'regex:', or 'expression:' prefixes to reach the Select clause where the prefixes are properly removed. - diff --git a/.github/workflows/Release.yml b/.github/workflows/Release.yml index db3fba66..79f9c39c 100644 --- a/.github/workflows/Release.yml +++ b/.github/workflows/Release.yml @@ -36,7 +36,7 @@ jobs: build: runs-on: windows-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Setup .NET uses: actions/setup-dotnet@v5 @@ -231,25 +231,25 @@ jobs: } - name: Upload build artifact (x64 framework-dependent) - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: Text-Grab-win-x64-framework-dependent path: ${{ env.BUILD_X64 }} - name: Upload build artifact (x64 self-contained) - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: Text-Grab-win-x64-self-contained path: ${{ steps.compute.outputs.archive_x64_sc }} - name: Upload build artifact (ARM64 framework-dependent) - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: Text-Grab-win-arm64-framework-dependent path: ${{ env.BUILD_ARM64 }} - name: Upload build artifact (ARM64 self-contained) - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: Text-Grab-win-arm64-self-contained path: ${{ steps.compute.outputs.archive_arm64_sc }} diff --git a/.github/workflows/buildDev.yml b/.github/workflows/buildDev.yml index 71e04cf9..ae14fbb8 100644 --- a/.github/workflows/buildDev.yml +++ b/.github/workflows/buildDev.yml @@ -17,7 +17,7 @@ jobs: build: runs-on: windows-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Setup .NET uses: actions/setup-dotnet@v5 with: @@ -33,7 +33,7 @@ jobs: run: dotnet publish ${{ env.PROJECT_PATH }} -c Release --self-contained -r win-x64 -p:PublishSingleFile=true -p:EnableMsixTooling=true -o publish - name: Upload artifact - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: Text-Grab path: .\publish diff --git a/Tests/ClipboardUtilitiesTests.cs b/Tests/ClipboardUtilitiesTests.cs new file mode 100644 index 00000000..f88de72c --- /dev/null +++ b/Tests/ClipboardUtilitiesTests.cs @@ -0,0 +1,137 @@ +using Text_Grab.Utilities; + +namespace Tests; + +public class ClipboardUtilitiesTests +{ + private const string SampleCfHtml = """ + Version:1.0 + StartHTML:00000097 + EndHTML:00002353 + StartFragment:00000153 + EndFragment:00002320 + + + + + + + + + + + + + + + + + + +
MonthIntSeason
January1Winter
February2Winter
+ + + """; + + [Fact] + public void ConvertHtmlToTabSeparated_ParsesBasicTable() + { + string result = ClipboardUtilities.ConvertHtmlToTabSeparated(SampleCfHtml); + + string[] lines = result.Split('\n'); + Assert.Equal(3, lines.Length); + Assert.Equal("Month\tInt\tSeason", lines[0]); + Assert.Equal("January\t1\tWinter", lines[1]); + Assert.Equal("February\t2\tWinter", lines[2]); + } + + [Fact] + public void ConvertHtmlToTabSeparated_HandlesBrTag() + { + string html = """ + + +
4
A
Spring
+ """; + + string result = ClipboardUtilities.ConvertHtmlToTabSeparated(html); + + Assert.Equal("4 A\tSpring", result); + } + + [Fact] + public void ConvertHtmlToTabSeparated_ReturnsEmptyWhenNoTable() + { + string html = "

No table here

"; + string result = ClipboardUtilities.ConvertHtmlToTabSeparated(html); + Assert.Empty(result); + } + + [Fact] + public void ConvertHtmlToTabSeparated_DecodesHtmlEntities() + { + string html = """ + + +
A & B<tag>
+ """; + + string result = ClipboardUtilities.ConvertHtmlToTabSeparated(html); + + Assert.Equal("A & B\t", result); + } + + [Fact] + public void ConvertHtmlToTabSeparated_HandlesThElements() + { + string html = """ + + + +
NameValue
Foo42
+ """; + + string result = ClipboardUtilities.ConvertHtmlToTabSeparated(html); + + string[] lines = result.Split('\n'); + Assert.Equal(2, lines.Length); + Assert.Equal("Name\tValue", lines[0]); + Assert.Equal("Foo\t42", lines[1]); + } + + [Fact] + public void ConvertHtmlToTabSeparated_HandlesColspan() + { + string html = """ + + + +
MergedRight
ABC
+ """; + + string result = ClipboardUtilities.ConvertHtmlToTabSeparated(html); + + string[] lines = result.Split('\n'); + Assert.Equal(2, lines.Length); + Assert.Equal("Merged\tMerged\tRight", lines[0]); + Assert.Equal("A\tB\tC", lines[1]); + } + + [Fact] + public void ConvertHtmlToTabSeparated_HandlesRowspan() + { + string html = """ + + + +
TallTop
Bottom
+ """; + + string result = ClipboardUtilities.ConvertHtmlToTabSeparated(html); + + string[] lines = result.Split('\n'); + Assert.Equal(2, lines.Length); + Assert.Equal("Tall\tTop", lines[0]); + Assert.Equal("Tall\tBottom", lines[1]); + } +} diff --git a/Tests/EditTextWindowSpreadsheetTests.cs b/Tests/EditTextWindowSpreadsheetTests.cs index 89eadb6b..c8f96203 100644 --- a/Tests/EditTextWindowSpreadsheetTests.cs +++ b/Tests/EditTextWindowSpreadsheetTests.cs @@ -35,6 +35,65 @@ public void ClearSpreadsheetCellValues_ClearsOnlyRequestedCells() Assert.Equal(string.Empty, dataTable.Rows[1][2]); } + [Fact] + public void TryCutSpreadsheetCellValues_CopiesThenClearsRequestedCells() + { + DataTable dataTable = new(); + dataTable.Columns.Add("A", typeof(string)); + dataTable.Columns.Add("B", typeof(string)); + dataTable.Columns.Add("C", typeof(string)); + dataTable.Rows.Add("a1", "b1", "c1"); + dataTable.Rows.Add("a2", "b2", "c2"); + + string clipboardText = string.Empty; + + bool didCut = EditTextWindow.TryCutSpreadsheetCellValues( + dataTable, + [ + (1, 2), + (0, 1), + (1, 0), + (0, 1), + (-1, 0), + (5, 5) + ], + text => + { + clipboardText = text; + return true; + }); + + Assert.True(didCut); + Assert.Equal("b1" + Environment.NewLine + "a2\tc2", clipboardText); + Assert.Equal("a1", dataTable.Rows[0][0]); + Assert.Equal(string.Empty, dataTable.Rows[0][1]); + Assert.Equal("c1", dataTable.Rows[0][2]); + Assert.Equal(string.Empty, dataTable.Rows[1][0]); + Assert.Equal("b2", dataTable.Rows[1][1]); + Assert.Equal(string.Empty, dataTable.Rows[1][2]); + } + + [Fact] + public void TryCutSpreadsheetCellValues_DoesNotClearWhenClipboardCopyFails() + { + DataTable dataTable = new(); + dataTable.Columns.Add("A", typeof(string)); + dataTable.Columns.Add("B", typeof(string)); + dataTable.Rows.Add("a1", "b1"); + + bool didCut = EditTextWindow.TryCutSpreadsheetCellValues( + dataTable, + [ + (0, 0), + (0, 1) + ], + _ => false); + + Assert.False(didCut); + Assert.Equal("a1", dataTable.Rows[0][0]); + Assert.Equal("b1", dataTable.Rows[0][1]); + } + [Fact] public void BuildSpreadsheetSelectionText_IncludesOnlySelectedCells() { diff --git a/Tests/FilesIoTests.cs b/Tests/FilesIoTests.cs index 6fbb5403..6560b7ca 100644 --- a/Tests/FilesIoTests.cs +++ b/Tests/FilesIoTests.cs @@ -1,4 +1,6 @@ -using System.Drawing; +using System.Drawing; +using System.IO; +using System.Windows; using Text_Grab; using Text_Grab.Models; using Text_Grab.Utilities; @@ -107,4 +109,91 @@ public void GetEditorModeForPath_UsesFileExtension(string path, EtwEditorMode ex { Assert.Equal(expectedMode, IoUtilities.GetEditorModeForPath(path)); } + + [Theory] + [InlineData(@"C:\Temp\scan.png", OpenContentKind.Image)] + [InlineData(@"C:\Temp\scan.PDF", OpenContentKind.PdfDocument)] + [InlineData(@"C:\Temp\notes.txt", OpenContentKind.TextFile)] + public void GetOpenContentKindForPath_ClassifiesVisualDocumentsAndText(string path, OpenContentKind expectedKind) + { + Assert.Equal(expectedKind, IoUtilities.GetOpenContentKindForPath(path)); + } + + [Theory] + [InlineData(".png", true)] + [InlineData(".PDF", true)] + [InlineData(".txt", false)] + [InlineData("", false)] + public void IsVisualDocumentFileExtension_RecognizesImagesAndPdf(string extension, bool expected) + { + Assert.Equal(expected, IoUtilities.IsVisualDocumentFileExtension(extension)); + } + + [Fact] + public void GetVisualDocumentFilter_IncludesPdfSupport() + { + string filter = FileUtilities.GetVisualDocumentFilter(); + + Assert.Contains("Image and PDF files|", filter); + Assert.Contains("PDF files|*.pdf", filter); + Assert.Contains("Image files|", filter); + } + + [Fact] + public void GetOpenDocumentFilter_IncludesVisualAndTextOptions() + { + string filter = FileUtilities.GetOpenDocumentFilter(); + + Assert.Contains("Supported documents|", filter); + Assert.Contains("Image and PDF files|", filter); + Assert.Contains("Spreadsheet documents|*.csv;*.tsv;*.tab", filter); + Assert.Contains("Markdown documents|*.md;*.markdown", filter); + Assert.Contains("Text documents (*.txt)|*.txt", filter); + Assert.Contains("All files (*.*)|*.*", filter); + } + + [WpfFact] + public void GetDroppedFilePaths_ReturnsExistingFilesOnly() + { + string firstPath = Path.GetTempFileName(); + string secondPath = Path.GetTempFileName(); + string missingPath = Path.Combine(Path.GetTempPath(), $"{Guid.NewGuid()}.txt"); + DataObject dataObject = new(DataFormats.FileDrop, new[] { firstPath, missingPath, secondPath }); + + try + { + IReadOnlyList paths = App.GetDroppedFilePaths(dataObject); + + Assert.Equal([firstPath, secondPath], paths); + } + finally + { + File.Delete(firstPath); + File.Delete(secondPath); + } + } + + [WpfFact] + public void GetDroppedFileEffect_ReturnsCopyWhenExistingFilesAreDropped() + { + string path = Path.GetTempFileName(); + DataObject dataObject = new(DataFormats.FileDrop, new[] { path }); + + try + { + Assert.Equal(DragDropEffects.Copy, App.GetDroppedFileEffect(dataObject)); + } + finally + { + File.Delete(path); + } + } + + [WpfFact] + public void GetDroppedFileEffect_ReturnsNoneWhenNoFilesCanBeOpened() + { + DataObject dataObject = new(DataFormats.Text, "hello"); + + Assert.Equal(DragDropEffects.None, App.GetDroppedFileEffect(dataObject)); + } } diff --git a/Tests/PdfDocumentRendererTests.cs b/Tests/PdfDocumentRendererTests.cs new file mode 100644 index 00000000..8d00801c --- /dev/null +++ b/Tests/PdfDocumentRendererTests.cs @@ -0,0 +1,89 @@ +using Text_Grab.Utilities; +using UglyToad.PdfPig.Core; +using Windows.Media.Ocr; + +namespace Tests; + +public class PdfDocumentRendererTests +{ + [Fact] + public void GetRenderDimensions_DoublesTypicalPdfPageSize() + { + (uint width, uint height) = PdfDocumentRenderer.GetRenderDimensions(612, 792); + + Assert.Equal(1224u, width); + Assert.Equal(1584u, height); + } + + [Fact] + public void GetRenderDimensions_ClampsToOcrEngineLimit() + { + (uint width, uint height) = PdfDocumentRenderer.GetRenderDimensions(5000, 2500); + + Assert.True(Math.Max(width, height) <= OcrEngine.MaxImageDimension); + Assert.True(width > height); + } + + [Fact] + public void GetRenderDimensions_InvalidSize_ReturnsSinglePixel() + { + (uint width, uint height) = PdfDocumentRenderer.GetRenderDimensions(0, -1); + + Assert.Equal(1u, width); + Assert.Equal(1u, height); + } + + [Fact] + public void ConvertPdfRectToImageRect_MapsPdfCoordinatesToRenderedBitmapSpace() + { + PdfRectangle pdfRect = new(10, 20, 60, 80); + + Windows.Foundation.Rect imageRect = PdfDocumentRenderer.ConvertPdfRectToImageRect(pdfRect, 100, 100, 200, 200); + + Assert.Equal(20, imageRect.X); + Assert.Equal(40, imageRect.Y); + Assert.Equal(100, imageRect.Width); + Assert.Equal(120, imageRect.Height); + } + + [Fact] + public void GroupWordsIntoLines_GroupsNearbyWordsIntoSingleLine() + { + IReadOnlyList lines = PdfDocumentRenderer.GroupWordsIntoLines( + [ + (new Windows.Foundation.Rect(10, 10, 20, 12), "Hello"), + (new Windows.Foundation.Rect(35, 11, 25, 12), "world"), + (new Windows.Foundation.Rect(12, 40, 30, 12), "Again") + ]); + + Assert.Collection( + lines, + firstLine => + { + Assert.Equal("Hello world", firstLine.Text); + Assert.True(firstLine.IsNativeText); + Assert.Equal(10, firstLine.SourceRect.X); + Assert.Equal(10, firstLine.SourceRect.Y); + Assert.Equal(50, firstLine.SourceRect.Width); + Assert.Equal(13, firstLine.SourceRect.Height); + }, + secondLine => Assert.Equal("Again", secondLine.Text)); + } + + [Fact] + public void ShouldIncludeOcrLine_OnlyReturnsTrueWhenImageOverlapIsMeaningful() + { + Windows.Foundation.Rect sourceRect = new(0, 0, 10, 10); + + bool shouldIncludeFromLargeOverlap = PdfDocumentRenderer.ShouldIncludeOcrLine( + sourceRect, + [new Windows.Foundation.Rect(5, 5, 10, 10)]); + + bool shouldIgnoreFromSmallOverlap = PdfDocumentRenderer.ShouldIncludeOcrLine( + sourceRect, + [new Windows.Foundation.Rect(8, 8, 10, 10)]); + + Assert.True(shouldIncludeFromLargeOverlap); + Assert.False(shouldIgnoreFromSmallOverlap); + } +} diff --git a/Text-Grab/App.xaml.cs b/Text-Grab/App.xaml.cs index 85738b95..415ec3fa 100644 --- a/Text-Grab/App.xaml.cs +++ b/Text-Grab/App.xaml.cs @@ -6,6 +6,7 @@ using System.Collections.Generic; using System.Diagnostics; using System.IO; +using System.Linq; using System.Threading.Tasks; using System.Windows; using System.Windows.Markup; @@ -74,6 +75,49 @@ public static void DefaultLaunch() SetTheme(); } + public static async Task OpenFileWithPickerAsync(bool isQuiet = false) + { + OpenFileDialog openFileDialog = new() + { + Filter = FileUtilities.GetOpenDocumentFilter(), + Title = "Open File", + CheckFileExists = true, + InitialDirectory = Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments) + }; + + if (openFileDialog.ShowDialog() == true) + await TryToOpenFilePathAsync(openFileDialog.FileName, isQuiet); + } + + public static DragDropEffects GetDroppedFileEffect(IDataObject? dataObject) + { + return GetDroppedFilePaths(dataObject).Any() + ? DragDropEffects.Copy + : DragDropEffects.None; + } + + public static IReadOnlyList GetDroppedFilePaths(IDataObject? dataObject) + { + if (dataObject is null || !dataObject.GetDataPresent(DataFormats.FileDrop, true)) + return []; + + + if (dataObject.GetData(DataFormats.FileDrop, true) is not string[] paths || paths.Length == 0) + return []; + + return [.. paths.Where(File.Exists)]; + } + + public static async Task TryToOpenDroppedFilesAsync(IDataObject? dataObject, bool isQuiet = false) + { + bool openedAny = false; + + foreach (string path in GetDroppedFilePaths(dataObject)) + openedAny = await TryToOpenFilePathAsync(path, isQuiet) || openedAny; + + return openedAny; + } + public static void SetTheme(object? sender = null, EventArgs? e = null) { bool gotTheme = Enum.TryParse(_defaultSettings.AppTheme.ToString(), true, out AppTheme currentAppTheme); @@ -240,7 +284,7 @@ private static async Task HandleStartupArgs(string[] args) } else { - Debug.WriteLine("--grabframe flag specified but no valid image file path provided"); + Debug.WriteLine("--grabframe flag specified but no valid image or PDF file path provided"); // Fall through to default launch behavior } } @@ -265,7 +309,7 @@ private static async Task HandleStartupArgs(string[] args) return true; } - bool openedFile = await TryToOpenFile(currentArgument, isQuiet); + bool openedFile = await TryToOpenFilePathAsync(currentArgument, isQuiet); if (openedFile) return true; @@ -305,7 +349,7 @@ private static void ShowAndSetFirstRun() _defaultSettings.Save(); } - private static async Task TryToOpenFile(string possiblePath, bool isQuiet) + public static async Task TryToOpenFilePathAsync(string possiblePath, bool isQuiet = false) { if (!File.Exists(possiblePath)) return false; @@ -318,7 +362,7 @@ private static async Task TryToOpenFile(string possiblePath, bool isQuiet) false, false); } - else if (IoUtilities.IsImageFile(possiblePath)) + else if (IoUtilities.IsVisualDocumentFile(possiblePath)) { GrabFrame gf = new(possiblePath); gf.Show(); @@ -329,6 +373,7 @@ private static async Task TryToOpenFile(string possiblePath, bool isQuiet) EditTextWindow manipulateTextWindow = new(); manipulateTextWindow.OpenPath(possiblePath); manipulateTextWindow.Show(); + manipulateTextWindow.Activate(); } return true; } diff --git a/Text-Grab/Controls/FindAndReplaceWindow.xaml b/Text-Grab/Controls/FindAndReplaceWindow.xaml index a6677039..8935d419 100644 --- a/Text-Grab/Controls/FindAndReplaceWindow.xaml +++ b/Text-Grab/Controls/FindAndReplaceWindow.xaml @@ -289,8 +289,7 @@ - - + @@ -302,23 +301,18 @@ + Text="{Binding LocationDisplay}" /> - diff --git a/Text-Grab/Controls/FindAndReplaceWindow.xaml.cs b/Text-Grab/Controls/FindAndReplaceWindow.xaml.cs index 851735d3..f04bd253 100644 --- a/Text-Grab/Controls/FindAndReplaceWindow.xaml.cs +++ b/Text-Grab/Controls/FindAndReplaceWindow.xaml.cs @@ -59,6 +59,8 @@ public FindAndReplaceWindow() #region Properties + private bool IsSpreadsheetSearch => textEditWindow?.IsSpreadsheetMode is true; + public List FindResults { get; set; } = []; public string StringFromWindow @@ -85,6 +87,8 @@ public EditTextWindow? TextEditWindow public void SearchForText() { + if (IsSpreadsheetSearch) { SearchSpreadsheetCells(); return; } + FindResults.Clear(); ResultsListView.ItemsSource = null; @@ -180,6 +184,61 @@ public void SearchForText() } } + private Regex? BuildCurrentRegex() + { + string rawPattern = FindTextBox.Text; + if (string.IsNullOrEmpty(rawPattern)) return null; + + if (rawPattern.StartsWith('^') && rawPattern.EndsWith('$') && rawPattern.Length > 2) + rawPattern = rawPattern[1..^1]; + + if (UsePatternCheckBox.IsChecked is false && ExactMatchCheckBox.IsChecked is bool matchExactly) + rawPattern = rawPattern.EscapeSpecialRegexChars(matchExactly); + + RegexOptions options = RegexOptions.None; + if (ExactMatchCheckBox.IsChecked is not true) options |= RegexOptions.IgnoreCase; + if (UsePatternCheckBox.IsChecked is true) options |= RegexOptions.IgnorePatternWhitespace; + + try { return new Regex(rawPattern, options, TimeSpan.FromSeconds(5)); } + catch { return null; } + } + + private void SearchSpreadsheetCells() + { + FindResults.Clear(); + ResultsListView.ItemsSource = null; + Matches = null; + + if (textEditWindow is null || string.IsNullOrWhiteSpace(FindTextBox.Text)) + { + MatchesText.Text = "0 Matches"; + return; + } + + Regex? regex = BuildCurrentRegex(); + if (regex is null) { MatchesText.Text = "0 Matches"; return; } + + textEditWindow.CommitSpreadsheetAndSync(); + + List results; + try { results = textEditWindow.SearchSpreadsheetCells(regex); } + catch (RegexMatchTimeoutException) { MatchesText.Text = "Regex timeout"; return; } + + FindResults.AddRange(results); + if (FindResults.Count == 0) { MatchesText.Text = "0 Matches"; return; } + + MatchesText.Text = FindResults.Count == 1 ? "1 Match" : $"{FindResults.Count} Matches"; + ResultsListView.IsEnabled = true; + ResultsListView.ItemsSource = FindResults; + + FindResult first = FindResults[0]; + if (this.IsFocused && first.RowIndex.HasValue && first.ColumnIndex.HasValue) + { + textEditWindow.NavigateToSpreadsheetCell(first.RowIndex.Value, first.ColumnIndex.Value); + this.Focus(); + } + } + public void ShouldCloseWithThisETW(EditTextWindow etw) { if (textEditWindow is not null && etw == textEditWindow) @@ -200,6 +259,12 @@ private void PrecisionSlider_Tick(object? sender, EventArgs? e) private void CopyMatchesCmd_CanExecute(object sender, CanExecuteRoutedEventArgs e) { + if (IsSpreadsheetSearch) + { + e.CanExecute = FindResults.Count > 0 && !string.IsNullOrEmpty(FindTextBox.Text); + return; + } + if (Matches is null || Matches.Count < 1 || string.IsNullOrEmpty(FindTextBox.Text)) e.CanExecute = false; else @@ -208,9 +273,9 @@ private void CopyMatchesCmd_CanExecute(object sender, CanExecuteRoutedEventArgs private void CopyMatchesCmd_Executed(object sender, ExecutedRoutedEventArgs e) { - if (Matches is null - || textEditWindow is null - || Matches.Count < 1) + if (textEditWindow is null) return; + + if (!IsSpreadsheetSearch && (Matches is null || Matches.Count < 1)) return; StringBuilder stringBuilder = new(); @@ -230,6 +295,12 @@ private void CopyMatchesCmd_Executed(object sender, ExecutedRoutedEventArgs e) private void DeleteAll_CanExecute(object sender, CanExecuteRoutedEventArgs e) { + if (IsSpreadsheetSearch) + { + e.CanExecute = FindResults.Count > 0 && !string.IsNullOrEmpty(FindTextBox.Text); + return; + } + if (Matches is not null && Matches.Count > 1 && !string.IsNullOrEmpty(FindTextBox.Text)) e.CanExecute = true; else @@ -238,24 +309,41 @@ private void DeleteAll_CanExecute(object sender, CanExecuteRoutedEventArgs e) private async void DeleteAll_Executed(object sender, ExecutedRoutedEventArgs e) { - if (Matches is null - || Matches.Count < 1 - || textEditWindow is null) + if (textEditWindow is null) return; + + if (IsSpreadsheetSearch) + { + if (FindResults.Count == 0) return; + SetWindowToLoading(); + Regex? regex = BuildCurrentRegex(); + if (regex is null) { ResetWindowLoading(); return; } + IList selection = ResultsListView.SelectedItems; + List targets = selection.Count >= 2 + ? [.. selection.Cast()] + : [.. ResultsListView.Items.Cast()]; + await Task.Run(() => Dispatcher.Invoke(() => + textEditWindow.ReplaceInSpreadsheetCells(targets, string.Empty, regex))); + SearchForText(); + ResetWindowLoading(); + return; + } + + if (Matches is null || Matches.Count < 1) return; SetWindowToLoading(); - IList selection = ResultsListView.SelectedItems; + IList selection2 = ResultsListView.SelectedItems; StringBuilder stringBuilderOfText = new(textEditWindow.PassedTextControl.Text); await Task.Run(() => { - if (selection.Count < 2) - selection = ResultsListView.Items; + if (selection2.Count < 2) + selection2 = ResultsListView.Items; - for (int j = selection.Count - 1; j >= 0; j--) + for (int j = selection2.Count - 1; j >= 0; j--) { - if (selection[j] is not FindResult selectedResult) + if (selection2[j] is not FindResult selectedResult) continue; stringBuilderOfText.Remove(selectedResult.Index, selectedResult.Length); @@ -270,6 +358,8 @@ await Task.Run(() => private void EditTextBoxChanged(object sender, TextChangedEventArgs e) { + if (IsSpreadsheetSearch) return; + ChangeFindTextTimer.Stop(); if (textEditWindow is not null) StringFromWindow = textEditWindow.PassedTextControl.Text; @@ -279,6 +369,8 @@ private void EditTextBoxChanged(object sender, TextChangedEventArgs e) private void ExtractPattern_CanExecute(object sender, CanExecuteRoutedEventArgs e) { + if (IsSpreadsheetSearch) { e.CanExecute = false; return; } + if (textEditWindow is not null && textEditWindow.PassedTextControl.SelectedText.Length > 0) e.CanExecute = true; @@ -410,6 +502,12 @@ private void OptionsChangedRefresh(object sender, RoutedEventArgs e) private void Replace_CanExecute(object sender, CanExecuteRoutedEventArgs e) { + if (IsSpreadsheetSearch) + { + e.CanExecute = FindResults.Count > 0 && !string.IsNullOrEmpty(ReplaceTextBox.Text); + return; + } + if (string.IsNullOrEmpty(ReplaceTextBox.Text) || Matches is null || Matches.Count < 1) @@ -420,10 +518,21 @@ private void Replace_CanExecute(object sender, CanExecuteRoutedEventArgs e) private void Replace_Executed(object sender, ExecutedRoutedEventArgs e) { - if (Matches is null - || textEditWindow is null - || ResultsListView.Items.Count is 0) + if (textEditWindow is null || ResultsListView.Items.Count is 0) + return; + + if (IsSpreadsheetSearch) + { + if (ResultsListView.SelectedIndex == -1) ResultsListView.SelectedIndex = 0; + if (ResultsListView.SelectedItem is not FindResult fr) return; + Regex? regex = BuildCurrentRegex(); + if (regex is null) return; + textEditWindow.ReplaceInSpreadsheetCells([fr], ReplaceTextBox.Text, regex); + SearchForText(); return; + } + + if (Matches is null) return; if (ResultsListView.SelectedIndex == -1) ResultsListView.SelectedIndex = 0; @@ -439,26 +548,44 @@ private void Replace_Executed(object sender, ExecutedRoutedEventArgs e) private async void ReplaceAll_Executed(object sender, ExecutedRoutedEventArgs e) { - if (Matches is null - || Matches.Count < 1 - || textEditWindow is null) + if (textEditWindow is null) return; + + if (IsSpreadsheetSearch) + { + if (FindResults.Count == 0) return; + SetWindowToLoading(); + Regex? regex = BuildCurrentRegex(); + if (regex is null) { ResetWindowLoading(); return; } + IList selection = ResultsListView.SelectedItems; + List targets = selection.Count >= 2 + ? [.. selection.Cast()] + : [.. ResultsListView.Items.Cast()]; + string replaceWith = ReplaceTextBox.Text; + await Task.Run(() => Dispatcher.Invoke(() => + textEditWindow.ReplaceInSpreadsheetCells(targets, replaceWith, regex))); + SearchForText(); + ResetWindowLoading(); + return; + } + + if (Matches is null || Matches.Count < 1) return; SetWindowToLoading(); StringBuilder stringBuilder = new(textEditWindow.PassedTextControl.Text); - IList selection = ResultsListView.SelectedItems; + IList selection2 = ResultsListView.SelectedItems; string newText = ReplaceTextBox.Text; await Task.Run(() => { - if (selection.Count < 2) - selection = ResultsListView.Items; + if (selection2.Count < 2) + selection2 = ResultsListView.Items; - for (int j = selection.Count - 1; j >= 0; j--) + for (int j = selection2.Count - 1; j >= 0; j--) { - if (selection[j] is not FindResult selectedResult) + if (selection2[j] is not FindResult selectedResult) continue; stringBuilder.Remove(selectedResult.Index, selectedResult.Length); @@ -486,15 +613,21 @@ private void SetWindowToLoading() private void ResultsListView_SelectionChanged(object sender, SelectionChangedEventArgs e) { - if (ResultsListView.SelectedItem is not FindResult selectedResult) + if (ResultsListView.SelectedItem is not FindResult selectedResult || textEditWindow is null) return; - if (textEditWindow is not null) + if (IsSpreadsheetSearch) { - textEditWindow.PassedTextControl.Focus(); - textEditWindow.PassedTextControl.Select(selectedResult.Index, selectedResult.Length); + if (selectedResult.RowIndex.HasValue && selectedResult.ColumnIndex.HasValue) + textEditWindow.NavigateToSpreadsheetCell( + selectedResult.RowIndex.Value, selectedResult.ColumnIndex.Value); this.Focus(); + return; } + + textEditWindow.PassedTextControl.Focus(); + textEditWindow.PassedTextControl.Select(selectedResult.Index, selectedResult.Length); + this.Focus(); } private void SetExtraOptionsVisibility(Visibility optionsVisibility) diff --git a/Text-Grab/Controls/NotifyIconWindow.xaml b/Text-Grab/Controls/NotifyIconWindow.xaml index bd6e13f7..946db309 100644 --- a/Text-Grab/Controls/NotifyIconWindow.xaml +++ b/Text-Grab/Controls/NotifyIconWindow.xaml @@ -37,6 +37,14 @@ + + + + + Canvas.GetLeft(this); + private set => Canvas.SetLeft(this, value); + } + + public double Top + { + get => Canvas.GetTop(this); + private set => Canvas.SetTop(this, value); + } + + public string Text { get; } + + public bool WasRegionSelected { get; set; } + + public void ApplyLayout(Rect bounds) + { + Width = Math.Max(1, bounds.Width + 2); + Height = Math.Max(1, bounds.Height + 2); + Left = Math.Max(0, bounds.X - 1); + Top = Math.Max(0, bounds.Y - 1); + + if (Child is TextBlock textBlock) + { + textBlock.FontSize = Math.Max(1, bounds.Height * 0.75); + textBlock.LineHeight = Math.Max(1, bounds.Height); + } + } + + public void Deselect() + { + IsSelected = false; + Background = Brushes.Transparent; + BorderBrush = Brushes.Transparent; + BorderThickness = new Thickness(0); + } + + public bool IntersectsWith(Rect rectToCheck) + { + Rect overlayRect = new(Left, Top, Width, Height); + return rectToCheck.IntersectsWith(overlayRect); + } + + public void Select() + { + IsSelected = true; + Background = DefaultHighlightBrush; + BorderBrush = DefaultBorderBrush; + BorderThickness = new Thickness(1); + } +} diff --git a/Text-Grab/Controls/ZoomBorder.cs b/Text-Grab/Controls/ZoomBorder.cs index dc29ee82..e03e9e92 100644 --- a/Text-Grab/Controls/ZoomBorder.cs +++ b/Text-Grab/Controls/ZoomBorder.cs @@ -1,10 +1,11 @@ -using System.Linq; +using System.Linq; using System.Windows; using System.Windows.Controls; using System.Windows.Input; using System.Windows.Media; +using System.Windows.Media.Media3D; -// From StackOverFlow: +// From StackOverFlow: // https://stackoverflow.com/questions/741956/pan-zoom-image // Answered by https://stackoverflow.com/users/282801/wies%c5%82aw-%c5%a0olt%c3%a9s // Read on 2024-05-02 @@ -15,6 +16,7 @@ namespace Text_Grab.Controls; public class ZoomBorder : Border { private UIElement? child = null; + private bool isPanning = false; private Point origin; private Point start; @@ -41,6 +43,10 @@ public override UIElement Child public bool CanZoom { get; set; } = true; + public bool IsSpacePanModifierPressed { get; set; } = false; + + public bool RequireSpaceToPan { get; set; } = false; + public void Initialize(UIElement element) { child = element; @@ -55,18 +61,9 @@ public void Initialize(UIElement element) child.RenderTransform = group; child.RenderTransformOrigin = new Point(0.0, 0.0); MouseWheel += Child_MouseWheel; - MouseLeftButtonDown += Child_MouseLeftButtonDown; - MouseLeftButtonUp += Child_MouseLeftButtonUp; - PreviewMouseDown += ZoomBorder_PreviewMouseDown; - MouseMove += Child_MouseMove; - PreviewMouseRightButtonDown += new MouseButtonEventHandler( - Child_PreviewMouseRightButtonDown); - } - - private void ZoomBorder_PreviewMouseDown(object sender, MouseButtonEventArgs e) - { - if (e.MiddleButton == MouseButtonState.Pressed) - Reset(); + AddHandler(Mouse.PreviewMouseDownEvent, new MouseButtonEventHandler(Child_PreviewMouseDown), true); + AddHandler(Mouse.PreviewMouseUpEvent, new MouseButtonEventHandler(Child_PreviewMouseUp), true); + AddHandler(Mouse.PreviewMouseMoveEvent, new MouseEventHandler(Child_MouseMove), true); } public void Reset() @@ -84,9 +81,43 @@ public void Reset() tt.X = 0.0; tt.Y = 0.0; + isPanning = false; + ReleaseMouseCapture(); + Cursor = Cursors.Arrow; CanPan = false; } + private bool IsPanGestureActive() => + !RequireSpaceToPan || IsSpacePanModifierPressed || Keyboard.IsKeyDown(Key.Space); + + private bool BlocksPanFromSource(object? originalSource) + { + DependencyObject? current = originalSource switch + { + DependencyObject dependencyObject => dependencyObject, + null => null, + _ => null + }; + + while (current is not null) + { + if (current is TextBox) + return true; + + if (current is PdfTextLineOverlay) + return !IsPanGestureActive(); + + current = current switch + { + Visual visual => VisualTreeHelper.GetParent(visual), + Visual3D visual3D => VisualTreeHelper.GetParent(visual3D), + _ => null + }; + } + + return false; + } + private void Child_MouseWheel(object sender, MouseWheelEventArgs e) { if (child is null || !CanZoom) @@ -115,45 +146,68 @@ private void Child_MouseWheel(object sender, MouseWheelEventArgs e) CanPan = true; } - private void Child_MouseLeftButtonDown(object sender, MouseButtonEventArgs e) + private void Child_PreviewMouseDown(object sender, MouseButtonEventArgs e) { - if (child is null) + if (e.ChangedButton == MouseButton.Middle) + { + Reset(); + e.Handled = true; return; + } + + if (e.ChangedButton != MouseButton.Left) + return; + + if (child is null + || GetScaleTransform(child) is not ScaleTransform st + || st.ScaleX == 1.0 + || !CanPan + || !IsPanGestureActive() + || BlocksPanFromSource(e.OriginalSource)) + { + return; + } TranslateTransform tt = GetTranslateTransform(child); start = e.GetPosition(this); origin = new Point(tt.X, tt.Y); + + bool captured = CaptureMouse(); + if (!captured) + return; + + isPanning = true; Cursor = Cursors.Hand; - // child.CaptureMouse(); + e.Handled = true; } - private void Child_MouseLeftButtonUp(object sender, MouseButtonEventArgs e) + private void Child_PreviewMouseUp(object sender, MouseButtonEventArgs e) { - if (child is null) + if (e.ChangedButton != MouseButton.Left || child is null || !isPanning) return; - child.ReleaseMouseCapture(); + isPanning = false; + ReleaseMouseCapture(); Cursor = Cursors.Arrow; - } - - private void Child_PreviewMouseRightButtonDown(object sender, MouseButtonEventArgs e) - { + e.Handled = true; } private void Child_MouseMove(object sender, MouseEventArgs e) { - if (e.OriginalSource is TextBox) + if (!isPanning && BlocksPanFromSource(e.OriginalSource)) return; if (child is null || GetScaleTransform(child) is not ScaleTransform st || st.ScaleX == 1.0 - || Mouse.LeftButton == MouseButtonState.Released + || !isPanning || !CanPan || KeyboardExtensions.IsShiftDown() || KeyboardExtensions.IsCtrlDown()) { - child?.ReleaseMouseCapture(); + isPanning = false; + ReleaseMouseCapture(); + Cursor = Cursors.Arrow; return; } @@ -161,5 +215,6 @@ private void Child_MouseMove(object sender, MouseEventArgs e) Vector v = start - e.GetPosition(this); tt.X = origin.X - v.X; tt.Y = origin.Y - v.Y; + e.Handled = true; } } diff --git a/Text-Grab/Enums.cs b/Text-Grab/Enums.cs index 52824fa6..4ed5a1f5 100644 --- a/Text-Grab/Enums.cs +++ b/Text-Grab/Enums.cs @@ -33,6 +33,7 @@ public enum OpenContentKind Image = 0, TextFile = 1, Directory = 2, + PdfDocument = 3, } public enum OcrEngineKind diff --git a/Text-Grab/Models/FindResult.cs b/Text-Grab/Models/FindResult.cs index 7a083c6e..4b7da09f 100644 --- a/Text-Grab/Models/FindResult.cs +++ b/Text-Grab/Models/FindResult.cs @@ -13,11 +13,22 @@ public class FindResult public string PreviewRight { get; set; } = ""; - public int Length + public int Length => Text.Length; + + public int? RowIndex { get; set; } + + public int? ColumnIndex { get; set; } + + public string CellAddress { get { - return Text.Length; + if (RowIndex is null || ColumnIndex is null) return string.Empty; + string colLabel = EditTextTableDocument.GetSpreadsheetColumnLabel(ColumnIndex.Value); + return $"Cell: {colLabel}{RowIndex.Value + 1}"; } } + + public string LocationDisplay => + CellAddress.Length > 0 ? CellAddress : $"At index: {Index}"; } diff --git a/Text-Grab/Pages/GeneralSettings.xaml b/Text-Grab/Pages/GeneralSettings.xaml index 80794c77..2e326eb6 100644 --- a/Text-Grab/Pages/GeneralSettings.xaml +++ b/Text-Grab/Pages/GeneralSettings.xaml @@ -236,11 +236,11 @@ Checked="AddToContextMenuCheckBox_Checked" Unchecked="AddToContextMenuCheckBox_Unchecked"> - Add "Grab text with Text Grab" to right-click menu for image files + Add "Grab text with Text Grab" to right-click menu for image and PDF files - Right-click on PNG, JPG, BMP, GIF, or TIFF files to quickly grab text. + Right-click on supported image files or PDFs to quickly grab text. @@ -254,11 +254,11 @@ Checked="RegisterOpenWithCheckBox_Checked" Unchecked="RegisterOpenWithCheckBox_Unchecked"> - Register Text Grab as an "Open with" app for image files + Register Text Grab as an "Open with" app for image and PDF files - Opens images directly in Grab Frame when using "Open with" from File Explorer. + Opens supported images and PDFs directly in Grab Frame when using "Open with" from File Explorer. @@ -237,9 +238,12 @@ VerticalAlignment="Top"> + Width="{Binding ActualWidth, + ElementName=SubMenuBorder}" + Height="{Binding ActualHeight, + ElementName=SubMenuBorder}" + Fill="{Binding Background, + ElementName=SubMenuBorder}" /> @@ -469,9 +474,12 @@ VerticalAlignment="Top"> + Width="{Binding ActualWidth, + ElementName=SubMenuBorder}" + Height="{Binding ActualHeight, + ElementName=SubMenuBorder}" + Fill="{Binding Background, + ElementName=SubMenuBorder}" /> + Visibility="{Binding HeadersVisibility, + ConverterParameter={x:Static DataGridHeadersVisibility.Row}, + Converter={x:Static DataGrid.HeadersVisibilityConverter}, + RelativeSource={RelativeSource AncestorType={x:Type DataGrid}}}" /> diff --git a/Text-Grab/Styles/ListViewScrollFix.xaml b/Text-Grab/Styles/ListViewScrollFix.xaml index 3dad6541..aa605b6f 100644 --- a/Text-Grab/Styles/ListViewScrollFix.xaml +++ b/Text-Grab/Styles/ListViewScrollFix.xaml @@ -26,12 +26,18 @@ VerticalScrollBarVisibility="Hidden"> + Value="{Binding Path=HorizontalOffset, + RelativeSource={RelativeSource TemplatedParent}, + Mode=OneWay}" /> + Value="{Binding Path=VerticalOffset, + RelativeSource={RelativeSource TemplatedParent}, + Mode=OneWay}" /> + Data="{Binding Content, + RelativeSource={RelativeSource TemplatedParent}}"> diff --git a/Text-Grab/Text-Grab.csproj b/Text-Grab/Text-Grab.csproj index 16f1fad4..c1512207 100644 --- a/Text-Grab/Text-Grab.csproj +++ b/Text-Grab/Text-Grab.csproj @@ -64,6 +64,7 @@ + diff --git a/Text-Grab/Utilities/ClipboardUtilities.cs b/Text-Grab/Utilities/ClipboardUtilities.cs index 833e09ba..e9499b7d 100644 --- a/Text-Grab/Utilities/ClipboardUtilities.cs +++ b/Text-Grab/Utilities/ClipboardUtilities.cs @@ -1,6 +1,10 @@ using System; +using System.Collections.Generic; using System.IO; +using System.Linq; +using System.Net; using System.Text; +using System.Text.RegularExpressions; using System.Threading.Tasks; using System.Windows; using System.Windows.Media; @@ -127,6 +131,240 @@ private static string CleanTeamsBase64Image(string dirtyTeamsString) return sb.ToString(); } + public static bool TryGetHtmlTableAsTabSeparated(out string tabSeparated) + { + tabSeparated = string.Empty; + try + { + if (!System.Windows.Clipboard.ContainsData(System.Windows.DataFormats.Html)) + return false; + + string htmlData = System.Windows.Clipboard.GetData(System.Windows.DataFormats.Html) as string ?? string.Empty; + if (string.IsNullOrEmpty(htmlData)) + return false; + + string result = ConvertHtmlToTabSeparated(htmlData); + if (string.IsNullOrEmpty(result)) + return false; + + tabSeparated = result; + return true; + } + catch + { + return false; + } + } + + internal static string ConvertHtmlToTabSeparated(string cfHtml) + { + string fragment = ExtractHtmlFragment(cfHtml); + List> table = ParseHtmlTableToGrid(fragment); + if (table.Count == 0) + return string.Empty; + + StringBuilder sb = new(); + for (int r = 0; r < table.Count; r++) + { + if (r > 0) sb.Append('\n'); + sb.Append(string.Join("\t", table[r])); + } + return sb.ToString(); + } + + private static string ExtractHtmlFragment(string cfHtml) + { + int startPos = cfHtml.IndexOf("", StringComparison.OrdinalIgnoreCase); + if (startPos < 0) + startPos = cfHtml.IndexOf("", StringComparison.OrdinalIgnoreCase); + + int endPos = cfHtml.IndexOf("", StringComparison.OrdinalIgnoreCase); + if (endPos < 0) + endPos = cfHtml.IndexOf("", StringComparison.OrdinalIgnoreCase); + + if (startPos >= 0 && endPos > startPos) + { + int fragmentStart = cfHtml.IndexOf("-->", startPos) + 3; + return cfHtml[fragmentStart..endPos]; + } + + // Fall back to byte-offset headers (StartFragment:/EndFragment:) + const string startKey = "StartFragment:"; + const string endKey = "EndFragment:"; + int sfIdx = cfHtml.IndexOf(startKey, StringComparison.OrdinalIgnoreCase); + int efIdx = cfHtml.IndexOf(endKey, StringComparison.OrdinalIgnoreCase); + + if (sfIdx >= 0 && efIdx >= 0) + { + int sfNumStart = sfIdx + startKey.Length; + int sfLineEnd = cfHtml.IndexOf('\n', sfNumStart); + int efNumStart = efIdx + endKey.Length; + int efLineEnd = cfHtml.IndexOf('\n', efNumStart); + + if (sfLineEnd > sfNumStart && efLineEnd > efNumStart + && int.TryParse(cfHtml[sfNumStart..sfLineEnd].Trim(), out int sfOff) + && int.TryParse(cfHtml[efNumStart..efLineEnd].Trim(), out int efOff) + && sfOff >= 0 && efOff > sfOff && efOff <= cfHtml.Length) + { + return cfHtml[sfOff..efOff]; + } + } + + return cfHtml; + } + + private static List> ParseHtmlTableToGrid(string html) + { + List> result = []; + int tableStart = html.IndexOf("", StringComparison.OrdinalIgnoreCase); + tableEnd = tableEnd >= 0 ? tableEnd + 8 : html.Length; + + string tableHtml = html[tableStart..tableEnd]; + + // Tracks cells that span into future rows: col -> (remaining rows to fill, cell content) + Dictionary rowspanMap = []; + + int pos = 0; + while (pos < tableHtml.Length) + { + int rowStart = tableHtml.IndexOf("", rowStart, StringComparison.OrdinalIgnoreCase); + rowEnd = rowEnd >= 0 ? rowEnd + 5 : tableHtml.Length; + + List<(string Text, int ColSpan, int RowSpan)> parsedCells = + ParseHtmlRowCells(tableHtml[rowStart..rowEnd]); + + if (parsedCells.Count > 0 || rowspanMap.Count > 0) + { + // Build a sparse column map for this row + Dictionary rowData = []; + + // Apply rowspan carry-overs from previous rows first + foreach (int col in rowspanMap.Keys.OrderBy(k => k).ToList()) + { + (int rem, string content) = rowspanMap[col]; + rowData[col] = content; + if (rem > 1) + rowspanMap[col] = (rem - 1, content); + else + rowspanMap.Remove(col); + } + + // Place each parsed cell in the next free column(s) + int nextFreeCol = 0; + foreach ((string text, int colspan, int rowspan) in parsedCells) + { + // Advance past columns already occupied by rowspan carry-overs + while (rowData.ContainsKey(nextFreeCol)) + nextFreeCol++; + + for (int cs = 0; cs < colspan; cs++) + rowData[nextFreeCol + cs] = text; + + if (rowspan > 1) + for (int cs = 0; cs < colspan; cs++) + rowspanMap[nextFreeCol + cs] = (rowspan - 1, text); + + nextFreeCol += colspan; + } + + if (rowData.Count > 0) + { + int colCount = rowData.Keys.Max() + 1; + List row = []; + for (int c = 0; c < colCount; c++) + row.Add(rowData.TryGetValue(c, out string? cell) ? cell : string.Empty); + result.Add(row); + } + } + + pos = rowEnd; + } + + return result; + } + + private static List<(string Text, int ColSpan, int RowSpan)> ParseHtmlRowCells(string rowHtml) + { + List<(string, int, int)> cells = []; + int pos = 0; + + while (pos < rowHtml.Length) + { + int tdPos = rowHtml.IndexOf("= 0 && (thPos < 0 || tdPos <= thPos)) + { + cellStart = tdPos; + endTag = ""; + } + else + { + cellStart = thPos; + endTag = ""; + } + + int openEnd = rowHtml.IndexOf('>', cellStart); + if (openEnd < 0) break; + + string tagAttributes = rowHtml[(cellStart + 3)..openEnd]; + int colspan = ParseSpanAttribute(tagAttributes, "colspan"); + int rowspan = ParseSpanAttribute(tagAttributes, "rowspan"); + + int contentStart = openEnd + 1; + int contentEnd = rowHtml.IndexOf(endTag, contentStart, StringComparison.OrdinalIgnoreCase); + contentEnd = contentEnd >= 0 ? contentEnd : rowHtml.Length; + + cells.Add((CleanHtmlCellContent(rowHtml[contentStart..contentEnd]), colspan, rowspan)); + pos = contentEnd + endTag.Length; + } + + return cells; + } + + private static int ParseSpanAttribute(string tagAttributes, string attributeName) + { + int attrPos = tagAttributes.IndexOf(attributeName, StringComparison.OrdinalIgnoreCase); + if (attrPos < 0) return 1; + + int eqPos = tagAttributes.IndexOf('=', attrPos + attributeName.Length); + if (eqPos < 0) return 1; + + int valueStart = eqPos + 1; + while (valueStart < tagAttributes.Length && tagAttributes[valueStart] is ' ' or '"' or '\'') + valueStart++; + + int valueEnd = valueStart; + while (valueEnd < tagAttributes.Length && char.IsDigit(tagAttributes[valueEnd])) + valueEnd++; + + if (valueEnd == valueStart) return 1; + + return int.TryParse(tagAttributes[valueStart..valueEnd], out int span) && span >= 1 ? span : 1; + } + + private static string CleanHtmlCellContent(string html) + { + if (string.IsNullOrEmpty(html)) + return string.Empty; + + html = Regex.Replace(html, @"", " ", RegexOptions.IgnoreCase); + html = Regex.Replace(html, @"<[^>]*>", string.Empty); + html = WebUtility.HtmlDecode(html); + + return html.Trim(); + } + private static string base64ImageExtension(ref string base64String) { // Copied this portion of the code from https://github.com/veler/DevToys diff --git a/Text-Grab/Utilities/ContextMenuUtilities.cs b/Text-Grab/Utilities/ContextMenuUtilities.cs index 4088aab2..7ef899fb 100644 --- a/Text-Grab/Utilities/ContextMenuUtilities.cs +++ b/Text-Grab/Utilities/ContextMenuUtilities.cs @@ -6,7 +6,7 @@ namespace Text_Grab.Utilities; /// /// Utility class for managing Windows context menu integration. -/// Adds "Grab text with Text Grab" and "Open in Grab Frame" options to the right-click context menu for image files. +/// Adds "Grab text with Text Grab" and "Open in Grab Frame" options to the right-click context menu for supported visual documents. /// internal static class ContextMenuUtilities { @@ -16,22 +16,17 @@ internal static class ContextMenuUtilities private const string GrabFrameDisplayText = "Open in Grab Frame"; /// - /// Supported image file extensions for context menu integration. + /// Supported image and PDF file extensions for context menu integration. /// - private static readonly string[] ImageExtensions = + private static readonly string[] VisualDocumentExtensions = [ - ".png", - ".jpg", - ".jpeg", - ".bmp", - ".gif", - ".tiff", - ".tif" + .. IoUtilities.ImageExtensions, + .. IoUtilities.PdfExtensions ]; /// - /// Adds Text Grab to the Windows context menu for image files. - /// This allows users to right-click on an image and select "Grab text with Text Grab" or "Open in Grab Frame". + /// Adds Text Grab to the Windows context menu for supported visual documents. + /// This allows users to right-click a file and select "Grab text with Text Grab" or "Open in Grab Frame". /// /// When the method returns false, contains an error message describing the failure. /// True if registration was successful, false otherwise. @@ -48,7 +43,7 @@ public static bool AddToContextMenu(out string? errorMessage) try { - foreach (string extension in ImageExtensions) + foreach (string extension in VisualDocumentExtensions) { RegisterGrabTextContextMenu(extension, executablePath); RegisterGrabFrameContextMenu(extension, executablePath); @@ -70,7 +65,7 @@ public static bool AddToContextMenu(out string? errorMessage) } /// - /// Removes Text Grab from the Windows context menu for image files. + /// Removes Text Grab from the Windows context menu for supported visual documents. /// /// When the method returns false, contains an error message describing the failure. /// True if removal was successful, false otherwise. @@ -79,7 +74,7 @@ public static bool RemoveFromContextMenu(out string? errorMessage) errorMessage = null; try { - foreach (string extension in ImageExtensions) + foreach (string extension in VisualDocumentExtensions) { UnregisterContextMenuForExtension(extension, GrabTextRegistryKeyName); UnregisterContextMenuForExtension(extension, GrabFrameRegistryKeyName); @@ -109,7 +104,7 @@ public static bool IsRegisteredInContextMenu() try { // Check if at least one extension has the context menu registered - foreach (string extension in ImageExtensions) + foreach (string extension in VisualDocumentExtensions) { string keyPath = GetShellKeyPath(extension, GrabTextRegistryKeyName); using RegistryKey? key = Registry.CurrentUser.OpenSubKey(keyPath); @@ -186,7 +181,7 @@ private static void RegisterGrabFrameContextMenu(string extension, string execut throw new InvalidOperationException($"Could not create command registry key for {extension}"); } - // --grabframe flag opens the image in GrabFrame instead of EditTextWindow + // --grabframe flag opens the visual document in GrabFrame instead of EditTextWindow commandKey.SetValue(string.Empty, $"\"{executablePath}\" --grabframe \"%1\""); } } diff --git a/Text-Grab/Utilities/FileUtilities.cs b/Text-Grab/Utilities/FileUtilities.cs index 73bae77d..c84033a9 100644 --- a/Text-Grab/Utilities/FileUtilities.cs +++ b/Text-Grab/Utilities/FileUtilities.cs @@ -3,6 +3,7 @@ using System.Drawing; using System.Drawing.Imaging; using System.IO; +using System.Linq; using System.Text; using System.Threading.Tasks; using Windows.Storage; @@ -31,27 +32,45 @@ public class FileUtilities /// Modified by Joseph Finney public static string GetImageFilter() { - string imageExtensions = string.Empty; - string separator = ""; - ImageCodecInfo[] codecs = ImageCodecInfo.GetImageEncoders(); - Dictionary imageFilters = []; - foreach (ImageCodecInfo codec in codecs) + string imageExtensions = GetImageExtensionsFilterPattern(); + return string.IsNullOrEmpty(imageExtensions) ? string.Empty : $"Image files|{imageExtensions}"; + } + + public static string GetVisualDocumentFilter() + { + string pdfExtensions = GetExtensionsFilterPattern(IoUtilities.PdfExtensions); + string combinedExtensions = GetVisualDocumentFilterPattern(); + string imageFilter = GetImageFilter(); + + return string.Join("|", new[] { - if (codec.FilenameExtension is not string extension) - continue; + $"Image and PDF files|{combinedExtensions}", + $"PDF files|{pdfExtensions}", + imageFilter + }); + } - imageExtensions = $"{imageExtensions}{separator}{extension.ToLower()}"; - separator = ";"; - imageFilters.Add($"{codec.FormatDescription} files ({extension.ToLower()})", extension.ToLower()); - } - string result = string.Empty; - separator = ""; + public static string GetOpenDocumentFilter() + { + string spreadsheetExtensions = GetExtensionsFilterPattern(IoUtilities.SpreadsheetExtensions); + string markdownExtensions = GetExtensionsFilterPattern(IoUtilities.MarkdownExtensions); + string supportedExtensions = string.Join(";", new[] + { + GetVisualDocumentFilterPattern(), + spreadsheetExtensions, + markdownExtensions, + "*.txt" + }.Where(pattern => !string.IsNullOrWhiteSpace(pattern))); - if (!string.IsNullOrEmpty(imageExtensions)) + return string.Join("|", new[] { - result += $"{separator}Image files|{imageExtensions}"; - } - return result; + $"Supported documents|{supportedExtensions}", + GetVisualDocumentFilter(), + $"Spreadsheet documents|{spreadsheetExtensions}", + $"Markdown documents|{markdownExtensions}", + "Text documents (*.txt)|*.txt", + "All files (*.*)|*.*" + }); } public static string GetPathToLocalFile(string imageRelativePath) @@ -99,6 +118,40 @@ public static Task SaveTextFile(string textContent, string filename, FileS return SaveTextFileUnpackaged(textContent, filename, storageKind); } + private static string GetImageExtensionsFilterPattern() + { + string imageExtensions = string.Empty; + string separator = string.Empty; + ImageCodecInfo[] codecs = ImageCodecInfo.GetImageEncoders(); + Dictionary imageFilters = []; + + foreach (ImageCodecInfo codec in codecs) + { + if (codec.FilenameExtension is not string extension) + continue; + + imageExtensions = $"{imageExtensions}{separator}{extension.ToLower()}"; + separator = ";"; + imageFilters.Add($"{codec.FormatDescription} files ({extension.ToLower()})", extension.ToLower()); + } + + return imageExtensions; + } + + private static string GetExtensionsFilterPattern(IEnumerable extensions) + { + return string.Join(";", extensions.Select(extension => $"*{extension}")); + } + + private static string GetVisualDocumentFilterPattern() + { + return string.Join(";", new[] + { + GetImageExtensionsFilterPattern(), + GetExtensionsFilterPattern(IoUtilities.PdfExtensions) + }.Where(pattern => !string.IsNullOrWhiteSpace(pattern))); + } + private static async Task GetImageFilePackaged(string fileName, FileStorageKind storageKind) { StorageFolder folder = await GetStorageFolderPackaged(fileName, storageKind); diff --git a/Text-Grab/Utilities/ImplementAppOptions.cs b/Text-Grab/Utilities/ImplementAppOptions.cs index 50ec062e..125255b7 100644 --- a/Text-Grab/Utilities/ImplementAppOptions.cs +++ b/Text-Grab/Utilities/ImplementAppOptions.cs @@ -8,7 +8,11 @@ namespace Text_Grab.Utilities; internal class ImplementAppOptions { - private static readonly string[] ImageExtensions = [".png", ".jpg", ".jpeg", ".bmp", ".gif", ".tiff", ".tif", ".webp", ".ico"]; + private static readonly string[] SupportedOpenWithExtensions = + [ + .. IoUtilities.ImageExtensions, + .. IoUtilities.PdfExtensions + ]; public static async Task ImplementStartupOption(bool startupOnLogin) { @@ -60,8 +64,8 @@ public static void RegisterAsImageOpenWithApp() iconKey?.SetValue("", $"\"{executablePath}\",0"); } - // Register Text Grab in OpenWithProgids for each image extension - foreach (string ext in ImageExtensions) + // Register Text Grab in OpenWithProgids for each supported visual document extension + foreach (string ext in SupportedOpenWithExtensions) { string extKey = $@"SOFTWARE\Classes\{ext}\OpenWithProgids"; using RegistryKey? key = Registry.CurrentUser.CreateSubKey(extKey); @@ -80,7 +84,7 @@ public static void RegisterAsImageOpenWithApp() using RegistryKey? supportedTypes = key.CreateSubKey("SupportedTypes"); if (supportedTypes is not null) { - foreach (string ext in ImageExtensions) + foreach (string ext in SupportedOpenWithExtensions) supportedTypes.SetValue(ext, ""); } @@ -108,7 +112,7 @@ public static void UnregisterAsImageOpenWithApp() Registry.CurrentUser.DeleteSubKeyTree(@"SOFTWARE\Classes\Text-Grab.Image", false); // Remove OpenWithProgids entries for each extension - foreach (string ext in ImageExtensions) + foreach (string ext in SupportedOpenWithExtensions) { string extKey = $@"SOFTWARE\Classes\{ext}\OpenWithProgids"; using RegistryKey? key = Registry.CurrentUser.OpenSubKey(extKey, true); diff --git a/Text-Grab/Utilities/IoUtilities.cs b/Text-Grab/Utilities/IoUtilities.cs index b05e90cc..698bf16b 100644 --- a/Text-Grab/Utilities/IoUtilities.cs +++ b/Text-Grab/Utilities/IoUtilities.cs @@ -11,6 +11,7 @@ namespace Text_Grab.Utilities; public class IoUtilities { public static readonly List ImageExtensions = [".png", ".bmp", ".jpg", ".jpeg", ".tiff", ".gif", ".tif", ".webp", ".ico"]; + public static readonly List PdfExtensions = [".pdf"]; public static readonly List MarkdownExtensions = [".md", ".markdown"]; public static readonly List SpreadsheetExtensions = [".csv", ".tsv", ".tab"]; @@ -30,6 +31,35 @@ public static bool IsImageFileExtension(string extension) return ImageExtensions.Contains(extension.ToLowerInvariant()); } + public static bool IsPdfFile(string path) + { + if (string.IsNullOrWhiteSpace(path) || !File.Exists(path)) + return false; + + return IsPdfFileExtension(Path.GetExtension(path)); + } + + public static bool IsPdfFileExtension(string extension) + { + if (string.IsNullOrWhiteSpace(extension)) + return false; + + return PdfExtensions.Contains(extension.ToLowerInvariant()); + } + + public static bool IsVisualDocumentFile(string path) + { + if (string.IsNullOrWhiteSpace(path) || !File.Exists(path)) + return false; + + return IsVisualDocumentFileExtension(Path.GetExtension(path)); + } + + public static bool IsVisualDocumentFileExtension(string extension) + { + return IsImageFileExtension(extension) || IsPdfFileExtension(extension); + } + public static bool IsMarkdownFileExtension(string extension) { if (string.IsNullOrWhiteSpace(extension)) @@ -59,15 +89,28 @@ public static EtwEditorMode GetEditorModeForPath(string? path) return EtwEditorMode.Text; } + public static OpenContentKind GetOpenContentKindForPath(string? path) + { + string extension = Path.GetExtension(path ?? string.Empty); + + if (IsPdfFileExtension(extension)) + return OpenContentKind.PdfDocument; + + if (IsImageFileExtension(extension)) + return OpenContentKind.Image; + + return OpenContentKind.TextFile; + } + public static async Task<(string TextContent, OpenContentKind SourceKindOfContent)> GetContentFromPath(string pathOfFileToOpen, bool isMultipleFiles = false, ILanguage? language = null) { StringBuilder stringBuilder = new(); - OpenContentKind openContentKind = OpenContentKind.Image; + OpenContentKind openContentKind = GetOpenContentKindForPath(pathOfFileToOpen); if (isMultipleFiles) stringBuilder.AppendLine(pathOfFileToOpen); - if (ImageExtensions.Contains(Path.GetExtension(pathOfFileToOpen).ToLower())) + if (openContentKind is OpenContentKind.Image or OpenContentKind.PdfDocument) { try { diff --git a/Text-Grab/Utilities/MarkdownDocumentUtilities.cs b/Text-Grab/Utilities/MarkdownDocumentUtilities.cs index b1d9405a..0097bc59 100644 --- a/Text-Grab/Utilities/MarkdownDocumentUtilities.cs +++ b/Text-Grab/Utilities/MarkdownDocumentUtilities.cs @@ -1,5 +1,4 @@ using Markdig; -using Markdig.Extensions.Tables; using Markdig.Extensions.TaskLists; using Markdig.Syntax; using Markdig.Syntax.Inlines; @@ -25,21 +24,14 @@ namespace Text_Grab.Utilities; -public static class MarkdownDocumentUtilities +public static partial class MarkdownDocumentUtilities { - private static readonly Regex LiveBlockTriggerRegex = new( - @"^\s{0,3}(#{1,6}|>+|[-+*]|\d+[.)])$", - RegexOptions.Compiled); - private static readonly Regex LiveInlinePromotionRegex = new( - @"(^|\s)\[( |x|X)\](\s|$)|(\*\*|__)(?=\S).+?\4|(?+\s|[-+*]\s|\d+[.)]\s|```|~~~|---\s*$|___\s*$|\*\*\*\s*$)|\[[^\]]+\]\([^)]+\)|!\[[^\]]*\]\([^)]+\)|(^|\n)\|.+\|\s*$", - RegexOptions.Compiled | RegexOptions.Multiline); + private static readonly Regex LiveBlockTriggerRegex = LiveBlockTrigger(); + private static readonly Regex LiveInlinePromotionRegex = LiveInlinePromotion(); + private static readonly Regex MarkdownPatternRegex = MarkdownPattern(); + private static readonly MarkdownPipeline MarkdownPipeline = new MarkdownPipelineBuilder() - .UseAutoLinks() - .UsePipeTables() - .UseTaskLists() + .UseAdvancedExtensions() .Build(); private enum MarkdownBlockRole @@ -492,8 +484,8 @@ private static void WriteTable(StringBuilder builder, WpfTable table) if (firstGroup is null || firstGroup.Rows.Count == 0) return; - List rows = firstGroup.Rows.Cast().ToList(); - List headerCells = rows[0].Cells.Cast().Select(SerializeTableCell).ToList(); + List rows = [.. firstGroup.Rows.Cast()]; + List headerCells = [.. rows[0].Cells.Cast().Select(SerializeTableCell)]; builder.Append(ApplyQuotePrefix($"| {string.Join(" | ", headerCells)} |", quotePrefix)); builder.AppendLine(); @@ -506,7 +498,7 @@ private static void WriteTable(StringBuilder builder, WpfTable table) foreach (WpfTableRow row in dataRows) { builder.AppendLine(); - List rowCells = row.Cells.Cast().Select(SerializeTableCell).ToList(); + List rowCells = [.. row.Cells.Cast().Select(SerializeTableCell)]; builder.Append(ApplyQuotePrefix($"| {string.Join(" | ", rowCells)} |", quotePrefix)); } } @@ -834,4 +826,14 @@ private static string GetSourceSlice(string source, MarkdownObject markdownObjec private static string GetCodeFenceInfo(DependencyObject element) => (string)element.GetValue(CodeFenceInfoProperty); private static void SetIsTableHeader(DependencyObject element, bool value) => element.SetValue(IsTableHeaderProperty, value); private static bool GetIsTableHeader(DependencyObject element) => (bool)element.GetValue(IsTableHeaderProperty); + + + [GeneratedRegex(@"^\s{0,3}(#{1,6}|>+|[-+*]|\d+[.)])$", RegexOptions.Compiled)] + private static partial Regex LiveBlockTrigger(); + + [GeneratedRegex(@"(^|\s)\[( |x|X)\](\s|$)|(\*\*|__)(?=\S).+?\4|(?+\s|[-+*]\s|\d+[.)]\s|```|~~~|---\s*$|___\s*$|\*\*\*\s*$)|\[[^\]]+\]\([^)]+\)|!\[[^\]]*\]\([^)]+\)|(^|\n)\|.+\|\s*$", RegexOptions.Multiline | RegexOptions.Compiled)] + private static partial Regex MarkdownPattern(); } diff --git a/Text-Grab/Utilities/OcrUtilities.cs b/Text-Grab/Utilities/OcrUtilities.cs index eed11f63..4b88756d 100644 --- a/Text-Grab/Utilities/OcrUtilities.cs +++ b/Text-Grab/Utilities/OcrUtilities.cs @@ -540,8 +540,15 @@ public static string GetStringFromOcrOutputs(List outputs) public static async Task OcrAbsoluteFilePathAsync(string absolutePath, ILanguage? language = null) { - Bitmap bmp = LoadBitmapFromFile(absolutePath); language ??= LanguageUtilities.GetCurrentInputLanguage(); + + if (IoUtilities.IsPdfFileExtension(Path.GetExtension(absolutePath))) + { + using PdfDocumentRenderer pdfDocument = await PdfDocumentRenderer.LoadAsync(absolutePath); + return await pdfDocument.ExtractTextAsync(language); + } + + using Bitmap bmp = LoadBitmapFromFile(absolutePath); return GetStringFromOcrOutputs(await GetTextFromImageAsync(bmp, language)); } @@ -657,8 +664,16 @@ public static async Task OcrFile(string path, ILanguage? selectedLanguag string ocrText; if (options.GrabTemplate is GrabTemplate grabTemplate) { - Bitmap bmp = LoadBitmapFromFile(path); - ocrText = await GrabTemplateExecutor.ExecuteTemplateOnBitmapAsync(grabTemplate, bmp, selectedLanguage); + if (IoUtilities.IsPdfFileExtension(Path.GetExtension(path))) + { + using PdfDocumentRenderer pdfDocument = await PdfDocumentRenderer.LoadAsync(path); + ocrText = await pdfDocument.ExtractTextAsync(selectedLanguage, grabTemplate); + } + else + { + using Bitmap bmp = LoadBitmapFromFile(path); + ocrText = await GrabTemplateExecutor.ExecuteTemplateOnBitmapAsync(grabTemplate, bmp, selectedLanguage); + } } else ocrText = await OcrAbsoluteFilePathAsync(path, selectedLanguage); diff --git a/Text-Grab/Utilities/PdfDocumentRenderer.cs b/Text-Grab/Utilities/PdfDocumentRenderer.cs new file mode 100644 index 00000000..6e27cd11 --- /dev/null +++ b/Text-Grab/Utilities/PdfDocumentRenderer.cs @@ -0,0 +1,451 @@ +using System; +using System.Collections.Generic; +using System.Drawing; +using System.IO; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using System.Windows.Media.Imaging; +using Text_Grab.Interfaces; +using Text_Grab.Models; +using UglyToad.PdfPig.Content; +using UglyToad.PdfPig.Core; +using UglyToad.PdfPig.DocumentLayoutAnalysis.WordExtractor; +using Windows.Graphics.Imaging; +using Windows.Storage; +using Windows.Storage.Streams; +using OcrEngine = Windows.Media.Ocr.OcrEngine; +using PigPdfDocument = UglyToad.PdfPig.PdfDocument; +using PdfPageRenderOptions = Windows.Data.Pdf.PdfPageRenderOptions; +using WinPdfDocument = Windows.Data.Pdf.PdfDocument; +using WinPdfPage = Windows.Data.Pdf.PdfPage; + +namespace Text_Grab.Utilities; + +internal sealed class PdfPageContent +{ + public PdfPageContent( + int pageIndex, + BitmapSource renderedPage, + IReadOnlyList nativeLines, + IReadOnlyList imageRegions) + { + PageIndex = pageIndex; + RenderedPage = renderedPage; + NativeLines = nativeLines; + ImageRegions = imageRegions; + } + + public bool HasNativeText => NativeLines.Count > 0; + + public IReadOnlyList ImageRegions { get; } + + public IReadOnlyList NativeLines { get; } + + public int PageIndex { get; } + + public BitmapSource RenderedPage { get; } +} + +internal sealed class PdfPageTextLine +{ + public PdfPageTextLine(Windows.Foundation.Rect sourceRect, string text, bool isNativeText) + { + SourceRect = sourceRect; + Text = text; + IsNativeText = isNativeText; + } + + public bool IsNativeText { get; } + + public Windows.Foundation.Rect SourceRect { get; } + + public string Text { get; } +} + +internal sealed class PdfDocumentRenderer : IDisposable +{ + private const double DefaultRenderScale = 2.0; + private const int MaxCachedPages = 10; + private readonly WinPdfDocument renderDocument; + private readonly PigPdfDocument textDocument; + private readonly Dictionary pageCache = []; + private readonly LinkedList cacheOrder = new(); + + private PdfDocumentRenderer(string filePath, WinPdfDocument renderDocument, PigPdfDocument textDocument) + { + FilePath = filePath; + this.renderDocument = renderDocument; + this.textDocument = textDocument; + } + + public string FilePath { get; } + + public int PageCount => (int)renderDocument.PageCount; + + public void Dispose() + { + textDocument.Dispose(); + } + + public async Task ExtractTextAsync(ILanguage? language = null, GrabTemplate? grabTemplate = null) + { + ILanguage resolvedLanguage = language ?? LanguageUtilities.GetCurrentInputLanguage(); + StringBuilder extractedText = new(); + + for (int pageIndex = 0; pageIndex < PageCount; pageIndex++) + { + string pageText; + if (grabTemplate is not null) + { + BitmapSource pageImage = await RenderPageAsync(pageIndex); + using Bitmap pageBitmap = ImageMethods.BitmapSourceToBitmap(pageImage); + pageText = await GrabTemplateExecutor.ExecuteTemplateOnBitmapAsync(grabTemplate, pageBitmap, resolvedLanguage); + } + else + { + IReadOnlyList lines = await GetSelectableLinesAsync(pageIndex, resolvedLanguage); + pageText = string.Join(Environment.NewLine, lines.Select(line => line.Text)); + } + + if (string.IsNullOrWhiteSpace(pageText)) + continue; + + if (extractedText.Length > 0) + extractedText.AppendLine().AppendLine(); + + extractedText.Append(pageText.Trim()); + } + + return extractedText.ToString(); + } + + public async Task GetPageContentAsync(int pageIndex) + { + ValidatePageIndex(pageIndex); + + if (pageCache.TryGetValue(pageIndex, out PdfPageContent? cachedPage)) + { + cacheOrder.Remove(pageIndex); + cacheOrder.AddLast(pageIndex); + return cachedPage; + } + + WinPdfPage renderPage = renderDocument.GetPage((uint)pageIndex); + try + { + BitmapImage renderedPage = await RenderPageBitmapAsync(renderPage); + Page textPage = textDocument.GetPage(pageIndex + 1); + + List nativeLines = ExtractNativeLines(textPage, renderedPage.PixelWidth, renderedPage.PixelHeight); + List imageRegions = ExtractImageRegions(textPage, renderedPage.PixelWidth, renderedPage.PixelHeight); + + PdfPageContent pageContent = new(pageIndex, renderedPage, nativeLines, imageRegions); + + if (pageCache.Count >= MaxCachedPages && cacheOrder.First is LinkedListNode oldest) + { + pageCache.Remove(oldest.Value); + cacheOrder.RemoveFirst(); + } + + pageCache[pageIndex] = pageContent; + cacheOrder.AddLast(pageIndex); + return pageContent; + } + finally + { + (renderPage as IDisposable)?.Dispose(); + } + } + + public async Task> GetSelectableLinesAsync(int pageIndex, ILanguage? language = null) + { + PdfPageContent pageContent = await GetPageContentAsync(pageIndex); + ILanguage resolvedLanguage = language ?? LanguageUtilities.GetCurrentInputLanguage(); + + if (!pageContent.HasNativeText) + return await GetOcrLinesAsync(pageContent.RenderedPage, resolvedLanguage); + + if (pageContent.ImageRegions.Count == 0) + return pageContent.NativeLines; + + List combinedLines = [.. pageContent.NativeLines]; + IReadOnlyList nativeRects = [.. pageContent.NativeLines.Select(l => l.SourceRect)]; + IReadOnlyList imageOcrLines = await GetOcrLinesAsync( + pageContent.RenderedPage, + resolvedLanguage, + sourceRect => ShouldIncludeOcrLine(sourceRect, pageContent.ImageRegions) + && !ShouldIncludeOcrLine(sourceRect, nativeRects)); + + combinedLines.AddRange(imageOcrLines); + return SortLines(combinedLines); + } + + public async Task RenderPageAsync(int pageIndex) + { + PdfPageContent pageContent = await GetPageContentAsync(pageIndex); + return pageContent.RenderedPage; + } + + public static async Task LoadAsync(string filePath) + { + if (!IoUtilities.IsPdfFileExtension(Path.GetExtension(filePath))) + throw new InvalidOperationException("The provided path is not a PDF document."); + + string absolutePath = Path.GetFullPath(filePath); + StorageFile storageFile = await StorageFile.GetFileFromPathAsync(absolutePath); + WinPdfDocument renderDocument = await WinPdfDocument.LoadFromFileAsync(storageFile); + PigPdfDocument textDocument = PigPdfDocument.Open(absolutePath); + + return new PdfDocumentRenderer(absolutePath, renderDocument, textDocument); + } + + internal static Windows.Foundation.Rect ConvertPdfRectToImageRect( + PdfRectangle pdfRect, + double pageWidthPoints, + double pageHeightPoints, + double renderedWidth, + double renderedHeight) + { + if (pageWidthPoints <= 0 || pageHeightPoints <= 0 || renderedWidth <= 0 || renderedHeight <= 0) + return new Windows.Foundation.Rect(0, 0, 0, 0); + + PdfPoint[] points = + [ + pdfRect.TopLeft, + pdfRect.TopRight, + pdfRect.BottomLeft, + pdfRect.BottomRight + ]; + + List xs = []; + List ys = []; + + foreach (PdfPoint point in points) + { + double x = (double)point.X / pageWidthPoints * renderedWidth; + double y = (1 - ((double)point.Y / pageHeightPoints)) * renderedHeight; + xs.Add(x); + ys.Add(y); + } + + double left = xs.Min(); + double top = ys.Min(); + double right = xs.Max(); + double bottom = ys.Max(); + + return new Windows.Foundation.Rect(left, top, Math.Max(0, right - left), Math.Max(0, bottom - top)); + } + + internal static IReadOnlyList GroupWordsIntoLines(IEnumerable<(Windows.Foundation.Rect SourceRect, string Text)> words) + { + List<(Windows.Foundation.Rect SourceRect, string Text)> orderedWords = [.. words + .Where(word => !string.IsNullOrWhiteSpace(word.Text) && word.SourceRect.Width > 0 && word.SourceRect.Height > 0) + .OrderBy(word => word.SourceRect.Y) + .ThenBy(word => word.SourceRect.X)]; + + if (orderedWords.Count == 0) + return []; + + List> groups = []; + + foreach ((Windows.Foundation.Rect SourceRect, string Text) word in orderedWords) + { + if (groups.Count == 0) + { + groups.Add([word]); + continue; + } + + List<(Windows.Foundation.Rect SourceRect, string Text)> currentGroup = groups[^1]; + Windows.Foundation.Rect currentBounds = GetBounds(currentGroup.Select(item => item.SourceRect)); + double currentCenterY = currentBounds.Y + (currentBounds.Height / 2); + double wordCenterY = word.SourceRect.Y + (word.SourceRect.Height / 2); + double lineHeight = Math.Max(currentBounds.Height, word.SourceRect.Height); + double maxGap = lineHeight * 6; + double horizontalGap = Math.Max(0, word.SourceRect.X - currentBounds.Right); + bool sameBaseline = Math.Abs(wordCenterY - currentCenterY) <= lineHeight * 0.6; + + if (sameBaseline && horizontalGap <= maxGap) + currentGroup.Add(word); + else + groups.Add([word]); + } + + List lines = []; + foreach (List<(Windows.Foundation.Rect SourceRect, string Text)> group in groups) + { + List<(Windows.Foundation.Rect SourceRect, string Text)> orderedGroup = [.. group.OrderBy(item => item.SourceRect.X)]; + Windows.Foundation.Rect lineBounds = GetBounds(orderedGroup.Select(item => item.SourceRect)); + string text = string.Join(" ", orderedGroup.Select(item => item.Text.Trim())); + lines.Add(new PdfPageTextLine(lineBounds, text, isNativeText: true)); + } + + return SortLines(lines); + } + + internal static (uint Width, uint Height) GetRenderDimensions(double pageWidth, double pageHeight, double scaleFactor = DefaultRenderScale) + { + if (!double.IsFinite(pageWidth) || pageWidth <= 0 || !double.IsFinite(pageHeight) || pageHeight <= 0) + return (1, 1); + + double scaledWidth = Math.Max(1, pageWidth * scaleFactor); + double scaledHeight = Math.Max(1, pageHeight * scaleFactor); + double maxDimension = Math.Max(scaledWidth, scaledHeight); + + if (maxDimension > OcrEngine.MaxImageDimension) + { + double scaleDownRatio = OcrEngine.MaxImageDimension / maxDimension; + scaledWidth *= scaleDownRatio; + scaledHeight *= scaleDownRatio; + } + + return ((uint)Math.Max(1, Math.Round(scaledWidth)), (uint)Math.Max(1, Math.Round(scaledHeight))); + } + + internal static bool ShouldIncludeOcrLine(Windows.Foundation.Rect sourceRect, IReadOnlyList imageRegions) + { + if (sourceRect.Width <= 0 || sourceRect.Height <= 0) + return false; + + double sourceArea = sourceRect.Width * sourceRect.Height; + if (sourceArea <= 0) + return false; + + foreach (Windows.Foundation.Rect imageRegion in imageRegions) + { + double intersectionLeft = Math.Max(sourceRect.Left, imageRegion.Left); + double intersectionTop = Math.Max(sourceRect.Top, imageRegion.Top); + double intersectionRight = Math.Min(sourceRect.Right, imageRegion.Right); + double intersectionBottom = Math.Min(sourceRect.Bottom, imageRegion.Bottom); + + double intersectionWidth = Math.Max(0, intersectionRight - intersectionLeft); + double intersectionHeight = Math.Max(0, intersectionBottom - intersectionTop); + double intersectionArea = intersectionWidth * intersectionHeight; + + if (intersectionArea / sourceArea >= 0.25) + return true; + } + + return false; + } + + private static PdfPageRenderOptions CreateRenderOptions(WinPdfPage page) + { + (uint width, uint height) = GetRenderDimensions(page.Size.Width, page.Size.Height); + + return new PdfPageRenderOptions + { + BackgroundColor = new Windows.UI.Color { A = byte.MaxValue, R = byte.MaxValue, G = byte.MaxValue, B = byte.MaxValue }, + BitmapEncoderId = Windows.Graphics.Imaging.BitmapEncoder.PngEncoderId, + DestinationWidth = width, + DestinationHeight = height, + IsIgnoringHighContrast = true + }; + } + + private static List ExtractImageRegions(Page textPage, int renderedWidth, int renderedHeight) + { + return [.. textPage.GetImages() + .Select(image => ConvertPdfRectToImageRect(image.BoundingBox, (double)textPage.Width, (double)textPage.Height, renderedWidth, renderedHeight)) + .Where(rect => rect.Width > 0 && rect.Height > 0)]; + } + + private static List ExtractNativeLines(Page textPage, int renderedWidth, int renderedHeight) + { + List<(Windows.Foundation.Rect SourceRect, string Text)> words = [.. textPage + .GetWords(NearestNeighbourWordExtractor.Instance) + .Where(word => !string.IsNullOrWhiteSpace(word.Text)) + .Select(word => ( + SourceRect: ConvertPdfRectToImageRect(word.BoundingBox, (double)textPage.Width, (double)textPage.Height, renderedWidth, renderedHeight), + Text: word.Text.Trim())) + .Where(word => word.SourceRect.Width > 0 && word.SourceRect.Height > 0)]; + + return [.. GroupWordsIntoLines(words)]; + } + + private static Windows.Foundation.Rect GetBounds(IEnumerable rects) + { + List rectList = [.. rects.Where(rect => rect.Width > 0 && rect.Height > 0)]; + if (rectList.Count == 0) + return new Windows.Foundation.Rect(0, 0, 0, 0); + + double left = rectList.Min(rect => rect.Left); + double top = rectList.Min(rect => rect.Top); + double right = rectList.Max(rect => rect.Right); + double bottom = rectList.Max(rect => rect.Bottom); + + return new Windows.Foundation.Rect(left, top, Math.Max(0, right - left), Math.Max(0, bottom - top)); + } + + private async Task> GetOcrLinesAsync( + BitmapSource renderedPage, + ILanguage language, + Func? sourceRectPredicate = null) + { + using Bitmap bitmap = ImageMethods.BitmapSourceToBitmap(renderedPage); + (IOcrLinesWords? ocrResult, double scale) = await OcrUtilities.GetOcrResultFromBitmapAsync(bitmap, language); + if (ocrResult is null || ocrResult.Lines.Length == 0) + return []; + + return ConvertOcrLines(ocrResult, scale, language, sourceRectPredicate); + } + + private static IReadOnlyList ConvertOcrLines( + IOcrLinesWords ocrResult, + double scale, + ILanguage language, + Func? sourceRectPredicate) + { + List lines = []; + bool isSpaceJoiningLanguage = language.IsSpaceJoining(); + + foreach (IOcrLine ocrLine in ocrResult.Lines) + { + StringBuilder textBuilder = new(); + ocrLine.GetTextFromOcrLine(isSpaceJoiningLanguage, textBuilder); + textBuilder.RemoveTrailingNewlines(); + + string lineText = textBuilder.ToString(); + if (string.IsNullOrWhiteSpace(lineText)) + continue; + + Windows.Foundation.Rect scaledRect = ocrLine.BoundingBox; + Windows.Foundation.Rect sourceRect = new( + scaledRect.X / scale, + scaledRect.Y / scale, + scaledRect.Width / scale, + scaledRect.Height / scale); + + if (sourceRectPredicate is not null && !sourceRectPredicate(sourceRect)) + continue; + + lines.Add(new PdfPageTextLine(sourceRect, lineText.Trim(), isNativeText: false)); + } + + return SortLines(lines); + } + + private static List SortLines(IEnumerable lines) + { + return [.. lines.OrderBy(line => line.SourceRect.Y).ThenBy(line => line.SourceRect.X)]; + } + + private static async Task RenderPageBitmapAsync(WinPdfPage page) + { + using InMemoryRandomAccessStream renderedStream = new(); + PdfPageRenderOptions renderOptions = CreateRenderOptions(page); + + await page.RenderToStreamAsync(renderedStream, renderOptions); + renderedStream.Seek(0); + + using Bitmap renderedBitmap = ImageMethods.GetBitmapFromIRandomAccessStream(renderedStream); + return ImageMethods.BitmapToImageSource(renderedBitmap); + } + + private void ValidatePageIndex(int pageIndex) + { + if (pageIndex < 0 || pageIndex >= PageCount) + throw new ArgumentOutOfRangeException(nameof(pageIndex), pageIndex, "Page index is outside the document bounds."); + } +} diff --git a/Text-Grab/Views/EditTextWindow.xaml b/Text-Grab/Views/EditTextWindow.xaml index 73352783..1172eee6 100644 --- a/Text-Grab/Views/EditTextWindow.xaml +++ b/Text-Grab/Views/EditTextWindow.xaml @@ -83,7 +83,10 @@ - + + + + @@ -293,9 +296,7 @@ x:Name="AddRemoveAtMenuItem" Click="AddRemoveAtMenuItem_Click" Header="_Add, Remove, Limit..." /> - + - + imageFiles = [.. files.Where(x => IoUtilities.ImageExtensions.Contains(Path.GetExtension(x).ToLower()))]; + List imageFiles = [.. files.Where(x => IoUtilities.IsVisualDocumentFileExtension(Path.GetExtension(x).ToLower()))]; if (imageFiles.Count == 0) { - PassedTextControl.AppendText($"{folderPath} contains no images"); + PassedTextControl.AppendText($"{folderPath} contains no images or PDFs"); return; } @@ -294,7 +293,7 @@ public async Task OcrAllImagesInFolder(string folderPath, OcrDirectoryOptions op { PassedTextControl.AppendText(folderPath); PassedTextControl.AppendText(Environment.NewLine); - PassedTextControl.AppendText($"{imageFiles.Count} images found"); + PassedTextControl.AppendText($"{imageFiles.Count} files found"); if (!string.IsNullOrEmpty(tesseractLanguageTag)) { @@ -343,14 +342,14 @@ public async Task OcrAllImagesInFolder(string folderPath, OcrDirectoryOptions op if (options.OutputFooter) { PassedTextControl.AppendText(Environment.NewLine); - PassedTextControl.AppendText($"----- COMPLETED OCR OF {imageFiles.Count} images"); + PassedTextControl.AppendText($"----- COMPLETED OCR OF {imageFiles.Count} files"); } } catch (OperationCanceledException) { PassedTextControl.AppendText(Environment.NewLine); int countCompleted = ocrFileResults.Where(r => r.OcrResult is not null).Count(); - PassedTextControl.AppendText($"----- CANCELLED OCR OF {ocrFileResults.Count - countCompleted}, Completed {countCompleted} images"); + PassedTextControl.AppendText($"----- CANCELLED OCR OF {ocrFileResults.Count - countCompleted}, Completed {countCompleted} files"); } finally { @@ -599,15 +598,7 @@ private void CopySpreadsheetRowsMenuItem_Click(object sender, RoutedEventArgs e) private void CopySpreadsheetSelectionMenuItem_Click(object sender, RoutedEventArgs e) { - List<(int RowIndex, int ColumnIndex)> selectedCellCoordinates = GetSelectedSpreadsheetCellCoordinates(); - if (selectedCellCoordinates.Count == 0) - return; - - string selectionText = BuildSpreadsheetSelectionText(spreadsheetTable, selectedCellCoordinates); - if (string.IsNullOrEmpty(selectionText)) - return; - - TrySetClipboardText(selectionText); + _ = TryCopySpreadsheetSelectionToClipboard(GetSelectedSpreadsheetCellCoordinates()); } private void AddSpreadsheetColumnMenuItem_Click(object sender, RoutedEventArgs e) @@ -1107,6 +1098,24 @@ private void SpreadsheetDataGrid_PreviewKeyDown(object sender, System.Windows.In return; } + if (e.Key == Key.X + && (Keyboard.IsKeyDown(Key.LeftCtrl) || Keyboard.IsKeyDown(Key.RightCtrl)) + && !IsSpreadsheetCellEditorFocused()) + { + e.Handled = true; + _ = TryCutSelectedSpreadsheetCellValues(); + return; + } + + if (e.Key == Key.V + && (Keyboard.IsKeyDown(Key.LeftCtrl) || Keyboard.IsKeyDown(Key.RightCtrl)) + && !IsSpreadsheetCellEditorFocused()) + { + e.Handled = true; + PasteIntoSpreadsheet(); + return; + } + if (e.Key != Key.Enter || SpreadsheetDataGrid.CurrentCell.Column is null) return; @@ -1225,6 +1234,83 @@ internal static void ClearSpreadsheetCellValues(DataTable dataTable, IEnumerable } } + internal static bool TryCutSpreadsheetCellValues( + DataTable dataTable, + IEnumerable<(int RowIndex, int ColumnIndex)> cellCoordinates, + Func trySetClipboardText) + { + ArgumentNullException.ThrowIfNull(dataTable); + ArgumentNullException.ThrowIfNull(cellCoordinates); + ArgumentNullException.ThrowIfNull(trySetClipboardText); + + string selectionText = BuildSpreadsheetSelectionText(dataTable, cellCoordinates); + if (string.IsNullOrEmpty(selectionText) || !trySetClipboardText(selectionText)) + return false; + + ClearSpreadsheetCellValues(dataTable, cellCoordinates); + return true; + } + + private void PasteIntoSpreadsheet() + { + string clipboardText; + try + { + if (!ClipboardUtilities.TryGetHtmlTableAsTabSeparated(out clipboardText)) + clipboardText = System.Windows.Clipboard.GetText(); + } + catch (Exception ex) + { + Debug.WriteLine($"PasteIntoSpreadsheet: clipboard read failed. {ex.Message}"); + return; + } + + if (string.IsNullOrEmpty(clipboardText)) + return; + + int startRow = Math.Max(0, SpreadsheetDataGrid.Items.IndexOf(SpreadsheetDataGrid.CurrentItem)); + int startCol = Math.Max(0, SpreadsheetDataGrid.CurrentCell.Column?.DisplayIndex ?? 0); + + // Parse clipboard text into a 2D array of cell values + string[] lines = clipboardText.Split('\n'); + List pastedRows = []; + foreach (string line in lines) + pastedRows.Add(line.TrimEnd('\r').Split('\t')); + + // Remove trailing empty row artifact produced by a final newline in copied table text + while (pastedRows.Count > 1 && pastedRows[^1].Length == 1 && pastedRows[^1][0].Length == 0) + pastedRows.RemoveAt(pastedRows.Count - 1); + + if (pastedRows.Count == 0) + return; + + int maxPastedCols = pastedRows.Max(row => row.Length); + + ApplySpreadsheetDocumentChange(document => + { + // Expand the document to fit the pasted data if necessary + int requiredRows = startRow + pastedRows.Count; + int requiredCols = startCol + maxPastedCols; + document.RowCount = Math.Max(document.RowCount, requiredRows); + document.ColumnCount = Math.Max(document.ColumnCount, requiredCols); + document.MinimumRowCount = Math.Max(document.MinimumRowCount, requiredRows); + document.MinimumColumnCount = Math.Max(document.MinimumColumnCount, requiredCols); + document.EnsureMinimumSize(); + + // Write values into the target cells + for (int r = 0; r < pastedRows.Count; r++) + { + int targetRow = startRow + r; + for (int c = 0; c < pastedRows[r].Length; c++) + { + int targetCol = startCol + c; + if (targetRow < document.Rows.Count && targetCol < document.Rows[targetRow].Count) + document.Rows[targetRow][targetCol] = pastedRows[r][c]; + } + } + }, startRow, startCol); + } + internal static string BuildSpreadsheetSelectionText( DataTable dataTable, IEnumerable<(int RowIndex, int ColumnIndex)> cellCoordinates) @@ -1444,6 +1530,24 @@ private void SpreadsheetUndoCanExecute(object sender, CanExecuteRoutedEventArgs e.Handled = true; } + private void SpreadsheetCopyCanExecute(object sender, CanExecuteRoutedEventArgs e) + { + if (editorMode != EtwEditorMode.Spreadsheet || IsSpreadsheetCellEditorFocused()) + return; + + e.CanExecute = GetSelectedSpreadsheetCellCoordinates().Count > 0; + e.Handled = true; + } + + private void SpreadsheetPasteCanExecute(object sender, CanExecuteRoutedEventArgs e) + { + if (editorMode != EtwEditorMode.Spreadsheet || IsSpreadsheetCellEditorFocused()) + return; + + e.CanExecute = true; + e.Handled = true; + } + private void SpreadsheetRedoCanExecute(object sender, CanExecuteRoutedEventArgs e) { if (editorMode != EtwEditorMode.Spreadsheet || IsSpreadsheetCellEditorFocused()) @@ -1483,6 +1587,33 @@ private void SpreadsheetRedoExecuted(object sender, ExecutedRoutedEventArgs e) e.Handled = true; } + private void SpreadsheetCopyExecuted(object sender, ExecutedRoutedEventArgs e) + { + if (editorMode != EtwEditorMode.Spreadsheet || IsSpreadsheetCellEditorFocused()) + return; + + _ = TryCopySpreadsheetSelectionToClipboard(GetSelectedSpreadsheetCellCoordinates()); + e.Handled = true; + } + + private void SpreadsheetCutExecuted(object sender, ExecutedRoutedEventArgs e) + { + if (editorMode != EtwEditorMode.Spreadsheet || IsSpreadsheetCellEditorFocused()) + return; + + _ = TryCutSelectedSpreadsheetCellValues(); + e.Handled = true; + } + + private void SpreadsheetPasteExecuted(object sender, ExecutedRoutedEventArgs e) + { + if (editorMode != EtwEditorMode.Spreadsheet || IsSpreadsheetCellEditorFocused()) + return; + + PasteIntoSpreadsheet(); + e.Handled = true; + } + private bool IsSpreadsheetCellEditorFocused() { if (Keyboard.FocusedElement is not DependencyObject focusedElement) @@ -1643,17 +1774,43 @@ private void TrackSpreadsheetColumnWidth(DataGridColumn column) DependencyPropertyDescriptor.FromProperty(DataGridColumn.WidthProperty, typeof(DataGridColumn))?.AddValueChanged(column, SpreadsheetColumnWidthChanged); } - private void TrySetClipboardText(string text) + private bool TrySetClipboardText(string text) { try { System.Windows.Clipboard.SetDataObject(text, true); + return true; } catch { + return false; } } + private bool TryCopySpreadsheetSelectionToClipboard(IEnumerable<(int RowIndex, int ColumnIndex)> cellCoordinates) + { + string selectionText = BuildSpreadsheetSelectionText(spreadsheetTable, cellCoordinates); + return !string.IsNullOrEmpty(selectionText) && TrySetClipboardText(selectionText); + } + + private bool TryCutSelectedSpreadsheetCellValues() + { + List<(int RowIndex, int ColumnIndex)> selectedCellCoordinates = GetSelectedSpreadsheetCellCoordinates(); + if (selectedCellCoordinates.Count == 0) + return false; + + CommitSpreadsheetEditsAndCapturePendingHistory(); + SpreadsheetUndoState? beforeChange = CreateCurrentSpreadsheetUndoState(syncFromTable: true); + + if (!TryCutSpreadsheetCellValues(spreadsheetTable, selectedCellCoordinates, TrySetClipboardText)) + return false; + + SyncSpreadsheetDocumentFromTable(); + RecordSpreadsheetUndoChange(beforeChange, CreateCurrentSpreadsheetUndoState(syncFromTable: false)); + UpdateLineAndColumnText(); + return true; + } + private void UpdateSpreadsheetModeUi() { bool isSpreadsheetMode = editorMode == EtwEditorMode.Spreadsheet; @@ -1886,6 +2043,7 @@ internal async void OpenPath(string pathOfFileToOpen, bool isMultipleFiles = fal finally { isLoadingOpenedFile = false; + SyncTextFromActiveEditor(); SetOpenedFileState(shouldTrackOpenedFile ? pathOfFileToOpen : null); } } @@ -2360,6 +2518,88 @@ public string GetSelectedTextOrAllText() return textToModify; } + public bool IsSpreadsheetMode => editorMode == EtwEditorMode.Spreadsheet; + + public void CommitSpreadsheetAndSync() + { + CommitSpreadsheetEditsAndCapturePendingHistory(); + SyncSpreadsheetDocumentFromTable(writeText: false); + } + + public void NavigateToSpreadsheetCell(int rowIndex, int columnIndex) + { + Dispatcher.BeginInvoke( + () => FocusSpreadsheetCell(rowIndex, columnIndex, beginEdit: false), + DispatcherPriority.Background); + } + + public List SearchSpreadsheetCells(Regex pattern) + { + if (tableDocument is null) return []; + tableDocument.EnsureMinimumSize(); + List results = []; + int count = 1; + + for (int row = 0; row < tableDocument.RowCount; row++) + { + List rowData = tableDocument.Rows[row]; + for (int col = 0; col < tableDocument.ColumnCount; col++) + { + string cellValue = col < rowData.Count ? rowData[col] ?? string.Empty : string.Empty; + foreach (Match m in pattern.Matches(cellValue)) + { + int previewStart = Math.Max(0, m.Index - 12); + int previewEnd = Math.Min(cellValue.Length, m.Index + m.Length + 12); + results.Add(new FindResult + { + RowIndex = row, + ColumnIndex = col, + Index = m.Index, + Text = m.Value.MakeStringSingleLine(), + PreviewLeft = cellValue[previewStart..m.Index], + PreviewRight = cellValue[(m.Index + m.Length)..previewEnd], + Count = count++ + }); + } + } + } + return results; + } + + public void ReplaceInSpreadsheetCells( + IEnumerable targets, + string replaceWith, + Regex pattern) + { + CommitSpreadsheetEditsAndCapturePendingHistory(); + SyncSpreadsheetDocumentFromTable(writeText: false); + + if (tableDocument is null) return; + + SpreadsheetUndoState? beforeState = CreateCurrentSpreadsheetUndoState(syncFromTable: false); + + IEnumerable<(int RowIndex, int ColumnIndex, string Value)> updates = targets + .Where(r => r.RowIndex.HasValue && r.ColumnIndex.HasValue) + .GroupBy(r => (r.RowIndex!.Value, r.ColumnIndex!.Value)) + .Select(g => + { + int row = g.Key.Item1, col = g.Key.Item2; + string oldValue = row < tableDocument.Rows.Count && col < tableDocument.Rows[row].Count + ? tableDocument.Rows[row][col] ?? string.Empty : string.Empty; + + HashSet indicesToReplace = [.. g.Select(r => r.Index)]; + string newValue = pattern.Replace(oldValue, m => + indicesToReplace.Contains(m.Index) ? m.Result(replaceWith) : m.Value); + + return (RowIndex: row, ColumnIndex: col, Value: newValue); + }); + + SetSpreadsheetDocumentCellValues(tableDocument, updates); + RebuildSpreadsheetTable(); + UpdateTextFromSpreadsheetDocument(); + RecordSpreadsheetUndoChange(beforeState, CreateCurrentSpreadsheetUndoState(syncFromTable: false)); + } + private IEnumerable GetSelectedOrAllTextSegmentsForEdit() { if (editorMode == EtwEditorMode.Spreadsheet) @@ -2732,6 +2972,7 @@ private void LoadGrabTemplateMenuItems(MenuItem grabTemplateMenuItem) Header = "(None)", IsCheckable = true, IsChecked = previouslySelected is null, + StaysOpenOnClick = true, }; noneItem.Click += GrabTemplateMenuItem_Click; grabTemplateMenuItem.Items.Add(noneItem); @@ -2744,6 +2985,7 @@ private void LoadGrabTemplateMenuItems(MenuItem grabTemplateMenuItem) IsCheckable = true, IsChecked = previouslySelected?.Id == template.Id, Tag = template, + StaysOpenOnClick = true, }; templateMenuItem.Click += GrabTemplateMenuItem_Click; grabTemplateMenuItem.Items.Add(templateMenuItem); @@ -2844,6 +3086,7 @@ private async void LoadLanguageMenuItems(MenuItem captureMenuItem) Tag = language, IsCheckable = true, IsChecked = i == selectedIndex, + StaysOpenOnClick = true, }; languageMenuItem.Click += LanguageMenuItem_Click; captureMenuItem.Items.Add(languageMenuItem); @@ -3084,7 +3327,7 @@ private void OpenFileMenuItem_Click(object sender, RoutedEventArgs e) { // Set filter for file extension and default file extension DefaultExt = ".txt", - Filter = OpenDocumentFilter, + Filter = FileUtilities.GetOpenDocumentFilter(), DefaultDirectory = Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments) }; @@ -3404,7 +3647,13 @@ private async void PasteExecuted(object sender, ExecutedRoutedEventArgs? e = nul { try { - string textFromClipboard = await dataPackageView.GetTextAsync(); + string textFromClipboard; + if (editorMode == EtwEditorMode.Text + && ClipboardUtilities.TryGetHtmlTableAsTabSeparated(out string htmlTableText)) + textFromClipboard = htmlTableText; + else + textFromClipboard = await dataPackageView.GetTextAsync(); + System.Windows.Application.Current.Dispatcher.Invoke(new Action(() => { AddCopiedTextToTextBox(textFromClipboard); })); } catch (Exception ex) @@ -3940,6 +4189,9 @@ private void SetupRoutedCommands() { _ = CommandBindings.Add(new CommandBinding(ApplicationCommands.Undo, SpreadsheetUndoExecuted, SpreadsheetUndoCanExecute)); _ = CommandBindings.Add(new CommandBinding(ApplicationCommands.Redo, SpreadsheetRedoExecuted, SpreadsheetRedoCanExecute)); + _ = CommandBindings.Add(new CommandBinding(ApplicationCommands.Cut, SpreadsheetCutExecuted, SpreadsheetCopyCanExecute)); + _ = CommandBindings.Add(new CommandBinding(ApplicationCommands.Copy, SpreadsheetCopyExecuted, SpreadsheetCopyCanExecute)); + _ = CommandBindings.Add(new CommandBinding(ApplicationCommands.Paste, SpreadsheetPasteExecuted, SpreadsheetPasteCanExecute)); RoutedCommand newFullscreenGrab = new(); _ = newFullscreenGrab.InputGestures.Add(new KeyGesture(Key.F, ModifierKeys.Control)); diff --git a/Text-Grab/Views/FirstRunWindow.xaml b/Text-Grab/Views/FirstRunWindow.xaml index 145532bd..b6bf5ebb 100644 --- a/Text-Grab/Views/FirstRunWindow.xaml +++ b/Text-Grab/Views/FirstRunWindow.xaml @@ -296,7 +296,7 @@ • The Grab Frame is a window which can be moved or resized. It stays on top of other windows and will read all of the text within the border. • Click or drag to select Word Borders then add them to the clipboard by clicking "Grab". - • Drop an image onto the Grab Frame to view the image and copy text. + • Drop an image or PDF onto the Grab Frame to view it and copy text. • Pause the Grab Frame and scroll to zoom in on a piece of text. • Edit each line to correct any errors and fix up the results to be perfect. • Table mode will draw a grid around the lines to be pasted into a table easily. diff --git a/Text-Grab/Views/GrabFrame.xaml b/Text-Grab/Views/GrabFrame.xaml index e6275d84..4bdd3675 100644 --- a/Text-Grab/Views/GrabFrame.xaml +++ b/Text-Grab/Views/GrabFrame.xaml @@ -129,7 +129,9 @@ x:Name="IsTopmostMenuItem" Header="Keep Grab Frame On Top" IsCheckable="True" - IsChecked="{Binding Topmost, ElementName=GrabFrameWindow, Mode=TwoWay}" /> + IsChecked="{Binding Topmost, + ElementName=GrabFrameWindow, + Mode=TwoWay}" /> @@ -289,31 +291,41 @@ Checked="AspectRationMI_Checked" Header="Maintain Aspect Ratio" IsCheckable="True" - IsChecked="{Binding IsChecked, ElementName=AspectRationMI, Mode=TwoWay}" + IsChecked="{Binding IsChecked, + ElementName=AspectRationMI, + Mode=TwoWay}" Unchecked="AspectRationMI_Checked" /> + IsChecked="{Binding IsChecked, + ElementName=FreezeToggleButton, + Mode=TwoWay}" /> + IsChecked="{Binding IsChecked, + ElementName=TableToggleButton, + Mode=TwoWay}" /> + IsChecked="{Binding IsChecked, + ElementName=EditToggleButton, + Mode=TwoWay}" /> + IsChecked="{Binding IsChecked, + ElementName=EditTextToggleButton, + Mode=TwoWay}" /> + + Visibility="{Binding Visibility, + ElementName=SearchBox, + Mode=OneWay}" /> + + + + + movingWordBordersDictionary = []; private IOcrLinesWords? ocrResultOfWindow; @@ -92,6 +97,8 @@ public partial class GrabFrame : Window private int totalWordsToTranslate = 0; private int translatedWordsCount = 0; private CancellationTokenSource? translationCancellationTokenSource; + private readonly List pdfTextLineOverlays = []; + private CancellationTokenSource? _pdfPageNavCts; private const string TargetLanguageMenuHeader = "Target Language"; #endregion Fields @@ -114,9 +121,9 @@ public GrabFrame(HistoryInfo historyInfo) } /// - /// Creates a GrabFrame and loads the specified image file. + /// Creates a GrabFrame and loads the specified image or PDF file. /// - /// The path to the image file to load. + /// The path to the file to load. public GrabFrame(string imagePath) { StandardInitialize(); @@ -126,11 +133,11 @@ public GrabFrame(string imagePath) // Validate the path before loading if (string.IsNullOrEmpty(imagePath)) { - Debug.WriteLine("GrabFrame: Empty image path provided"); + Debug.WriteLine("GrabFrame: Empty file path provided"); Loaded += async (s, e) => await new Wpf.Ui.Controls.MessageBox { Title = "Text Grab", - Content = "No image file path was provided.", + Content = "No file path was provided.", CloseButtonText = "OK" }.ShowDialogAsync(); return; @@ -141,17 +148,17 @@ public GrabFrame(string imagePath) if (!File.Exists(absolutePath)) { - Debug.WriteLine($"GrabFrame: Image file not found: {absolutePath}"); + Debug.WriteLine($"GrabFrame: File not found: {absolutePath}"); Loaded += async (s, e) => await new Wpf.Ui.Controls.MessageBox { Title = "Text Grab", - Content = $"Image file not found:\n{absolutePath}", + Content = $"File not found:\n{absolutePath}", CloseButtonText = "OK" }.ShowDialogAsync(); return; } - Loaded += async (s, e) => await TryLoadImageFromPath(absolutePath); + Loaded += async (s, e) => await TryLoadDocumentFromPath(absolutePath); } /// @@ -202,7 +209,7 @@ private async Task LoadTemplateForEditing(GrabTemplate template) if (!string.IsNullOrEmpty(template.SourceImagePath) && File.Exists(template.SourceImagePath)) { isStaticImageSource = true; - await TryLoadImageFromPath(template.SourceImagePath); + await TryLoadDocumentFromPath(template.SourceImagePath); reDrawTimer.Stop(); } else @@ -544,6 +551,94 @@ private void ShowFrameMessage(string message) frameMessageTimer.Start(); } + private void ClearLoadedPdfDocument() + { + _pdfPageNavCts?.Cancel(); + _pdfPageNavCts?.Dispose(); + _pdfPageNavCts = null; + _loadedPdfDocument?.Dispose(); + _loadedPdfDocument = null; + _currentPdfPageContent = null; + _currentPdfPageIndex = -1; + SetSpacePanModifierState(false); + UpdateZoomPanMode(); + SetScrollBehaviorMenuItems(); + UpdatePdfPageNavigation(); + } + + private async Task ChangePdfPageAsync(int delta) + { + if (_loadedPdfDocument is null) + return; + + int targetPageIndex = _currentPdfPageIndex + delta; + if (targetPageIndex < 0 || targetPageIndex >= _loadedPdfDocument.PageCount) + return; + + await ShowPdfPageAsync(targetPageIndex); + } + + private async Task ShowPdfPageAsync(int pageIndex) + { + if (_loadedPdfDocument is null) + return; + + CancellationTokenSource? previousCts = _pdfPageNavCts; + _pdfPageNavCts = new CancellationTokenSource(); + CancellationToken ct = _pdfPageNavCts.Token; + previousCts?.Cancel(); + previousCts?.Dispose(); + + try + { + reDrawTimer.Stop(); + ResetGrabFrame(); + await Task.Delay(300, ct); + + if (_loadedPdfDocument is null || ct.IsCancellationRequested) + return; + + _currentPdfPageContent = await _loadedPdfDocument.GetPageContentAsync(pageIndex); + frameContentImageSource = _currentPdfPageContent.RenderedPage; + hasLoadedImageSource = true; + isStaticImageSource = true; + frozenUiAutomationSnapshot = null; + liveUiAutomationSnapshot = null; + _currentImagePath = _loadedPdfDocument.FilePath; + _currentPdfPageIndex = pageIndex; + FreezeToggleButton.IsChecked = true; + FreezeGrabFrame(); + MainZoomBorder.CanZoom = true; + FreezeToggleButton.Visibility = Visibility.Collapsed; + UpdatePdfPageNavigation(); + SwitchToOcrFallbackIfUiAutomation(); + + reDrawTimer.Start(); + } + catch (OperationCanceledException) + { + // Navigation superseded by a newer request — no-op + } + } + + private void UpdatePdfPageNavigation() + { + bool isPdfLoaded = _loadedPdfDocument is not null; + PdfPagePanel.Visibility = isPdfLoaded ? Visibility.Visible : Visibility.Collapsed; + + if (!isPdfLoaded || _currentPdfPageIndex < 0) + { + PdfPageTextBlock.Text = string.Empty; + PreviousPdfPageButton.IsEnabled = false; + NextPdfPageButton.IsEnabled = false; + return; + } + + PdfPageTextBlock.Text = $"Page {_currentPdfPageIndex + 1} / {_loadedPdfDocument!.PageCount}"; + PreviousPdfPageButton.IsEnabled = _currentPdfPageIndex > 0; + NextPdfPageButton.IsEnabled = _currentPdfPageIndex < _loadedPdfDocument.PageCount - 1; + } + /// /// When a static image is loaded and the active language is UI Automation (Direct Text), /// silently switch to the OCR fallback language so no warning is shown. @@ -624,6 +719,7 @@ public TextBox? DestinationTextBox public bool IsEditingAnyWordBorders => wordBorders.Any(x => x.IsEditing); public bool IsFreezeMode { get; set; } = false; public bool IsFromEditWindow => destinationTextBox is not null; + private bool IsPdfDocumentLoaded => _loadedPdfDocument is not null; public bool IsWordEditMode { get; set; } = true; public bool ShouldSaveOnClose { get; set; } = true; @@ -637,6 +733,39 @@ public static bool CheckKey(VirtualKeyCodes code) return (GetKeyState(code) & 0xFF00) == 0xFF00; } + private static FrameworkElement? GetInteractionSurface(object? sender) => sender as FrameworkElement; + + private bool IsPdfTextInteraction(object? sender) => ReferenceEquals(sender, PdfTextCanvas); + + private bool IsZoomPanGestureActive => + MainZoomBorder.CanPan + && !KeyboardExtensions.IsShiftDown() + && !KeyboardExtensions.IsCtrlDown() + && (!MainZoomBorder.RequireSpaceToPan || isSpacePanModifierDown || Keyboard.IsKeyDown(Key.Space)); + + private bool CanUseSpacePanModifier => + MainZoomBorder.RequireSpaceToPan + && MainZoomBorder.CanPan + && !IsEditingAnyWordBorders + && Keyboard.FocusedElement is not TextBox and not RichTextBox; + + private void SetSpacePanModifierState(bool isDown) + { + isSpacePanModifierDown = isDown; + MainZoomBorder.IsSpacePanModifierPressed = isDown; + } + + private void MoveKeyboardFocusFromButtonBase() + { + if (MainZoomBorder.CanPan && Keyboard.FocusedElement is ButtonBase) + RectanglesCanvas.Focus(); + } + + private void UpdateZoomPanMode() + { + MainZoomBorder.RequireSpaceToPan = true; + } + public HistoryInfo AsHistoryItem() { System.Drawing.Bitmap? bitmap = ImageMethods.ImageSourceToBitmap(frameContentImageSource); @@ -1249,12 +1378,44 @@ private void CheckSelectBorderIntersections(bool finalCheck = false) wordBorder.WasRegionSelected = false; } + foreach (PdfTextLineOverlay pdfTextLine in pdfTextLineOverlays) + { + if (rectSelect.IntersectsWith(new Rect(pdfTextLine.Left, pdfTextLine.Top, pdfTextLine.Width, pdfTextLine.Height))) + { + clickedEmptySpace = false; + + if (!smallSelection) + { + pdfTextLine.Select(); + pdfTextLine.WasRegionSelected = true; + } + else if (!finalCheck) + { + if (pdfTextLine.IsSelected) + pdfTextLine.Deselect(); + else + pdfTextLine.Select(); + pdfTextLine.WasRegionSelected = false; + } + } + else if (pdfTextLine.WasRegionSelected && !smallSelection) + { + pdfTextLine.Deselect(); + } + + if (finalCheck) + pdfTextLine.WasRegionSelected = false; + } + if (clickedEmptySpace && smallSelection && finalCheck) { foreach (WordBorder wb in wordBorders) wb.Deselect(); + + foreach (PdfTextLineOverlay pdfTextLine in pdfTextLineOverlays) + pdfTextLine.Deselect(); } if (finalCheck) @@ -1324,6 +1485,13 @@ private void ClearRenderedWordBorders() { RectanglesCanvas.Children.Clear(); wordBorders.Clear(); + ClearRenderedPdfTextLines(); + } + + private void ClearRenderedPdfTextLines() + { + PdfTextCanvas.Children.Clear(); + pdfTextLineOverlays.Clear(); } private IReadOnlyCollection? GetUiAutomationExcludedHandles() @@ -1384,6 +1552,28 @@ private void AddRenderedWordBorder(WordBorder wordBorderBox) }); } + private PdfTextLineOverlay CreatePdfTextLineOverlay(Windows.Foundation.Rect sourceRect, double sourceScale, string text, DpiScale dpi) + { + Rect displayRect = new( + sourceRect.X / (dpi.DpiScaleX * sourceScale), + sourceRect.Y / (dpi.DpiScaleY * sourceScale), + sourceRect.Width / (dpi.DpiScaleX * sourceScale), + sourceRect.Height / (dpi.DpiScaleY * sourceScale)); + + PdfTextLineOverlay overlay = new(text); + overlay.ApplyLayout(displayRect); + return overlay; + } + + private void AddRenderedPdfTextLine(PdfTextLineOverlay overlay) + { + if (!IsOcrValid) + return; + + pdfTextLineOverlays.Add(overlay); + _ = PdfTextCanvas.Children.Add(overlay); + } + private Task DrawRectanglesAroundWords(string searchWord = "") { return CurrentLanguage is UiAutomationLang @@ -1396,6 +1586,12 @@ private async Task DrawOcrRectanglesAsync(string searchWord = "") if (isDrawing || IsDragOver) return; + if (_currentPdfPageContent?.HasNativeText is true) + { + await DrawPdfRectanglesAsync(searchWord); + return; + } + isDrawing = true; IsOcrValid = true; @@ -1524,6 +1720,71 @@ private async Task DrawOcrRectanglesAsync(string searchWord = "") } } + private async Task DrawPdfRectanglesAsync(string searchWord = "") + { + if (isDrawing || IsDragOver || _loadedPdfDocument is null || _currentPdfPageContent is null || _currentPdfPageIndex < 0) + return; + + isDrawing = true; + IsOcrValid = true; + windowFrameImageScale = 1; + ocrResultOfWindow = null; + + if (string.IsNullOrWhiteSpace(searchWord)) + searchWord = SearchBox.Text; + + ClearRenderedWordBorders(); + + if (frameContentImageSource is not BitmapSource) + { + isDrawing = false; + reDrawTimer.Start(); + return; + } + + DpiScale dpi = VisualTreeHelper.GetDpi(this); + SyncRectanglesCanvasSizeToImage(); + isSpaceJoining = CurrentLanguage!.IsSpaceJoining(); + + IReadOnlyList pageLines = await _loadedPdfDocument.GetSelectableLinesAsync(_currentPdfPageIndex, CurrentLanguage); + + foreach (PdfPageTextLine pageLine in pageLines) + { + string lineText = pageLine.Text; + if (!pageLine.IsNativeText) + { + if (DefaultSettings.CorrectErrors) + lineText = lineText.TryFixEveryWordLetterNumberErrors(); + + if (DefaultSettings.CorrectToLatin) + lineText = lineText.ReplaceGreekOrCyrillicWithLatin(); + } + + if (CurrentLanguage!.IsRightToLeft() && !pageLine.IsNativeText) + { + StringBuilder sb = new(lineText); + sb.ReverseWordsForRightToLeft(); + sb.RemoveTrailingNewlines(); + lineText = sb.ToString(); + } + + PdfTextLineOverlay overlay = CreatePdfTextLineOverlay(pageLine.SourceRect, 1, lineText, dpi); + AddRenderedPdfTextLine(overlay); + } + + if (DefaultSettings.TryToReadBarcodes) + TryToReadBarcodes(dpi); + + isDrawing = false; + reSearchTimer.Start(); + + if (isTranslationEnabled && WindowsAiUtilities.CanDeviceUseWinAI()) + { + translationTimer.Stop(); + translationTimer.Start(); + } + } + private async Task DrawUiAutomationRectanglesAsync(string searchWord = "") { if (isDrawing || IsDragOver) @@ -1719,6 +1980,8 @@ private void Escape_Keyed(object sender, ExecutedRoutedEventArgs e) SearchBox.Text = ""; else if (RectanglesCanvas.Children.Count > 0) ResetGrabFrame(); + else if (PdfTextCanvas.Children.Count > 0) + ResetGrabFrame(); else Close(); } @@ -1768,6 +2031,7 @@ private void FreezeGrabFrame() Background = new SolidColorBrush(Colors.DimGray); RectanglesBorder.Background.Opacity = 0; IsFreezeMode = true; + UpdateZoomPanMode(); if (scrollBehavior == ScrollBehavior.ZoomWhenFrozen) MainZoomBorder.CanZoom = true; @@ -1790,6 +2054,7 @@ private void SyncRectanglesCanvasSizeToImage() if (double.IsFinite(sourceWidth) && sourceWidth > 0) { GrabFrameImage.Width = sourceWidth; + PdfTextCanvas.Width = sourceWidth; RectanglesCanvas.Width = sourceWidth; TemplateRegionOverlayCanvas.Width = sourceWidth; } @@ -1797,6 +2062,7 @@ private void SyncRectanglesCanvasSizeToImage() if (double.IsFinite(sourceHeight) && sourceHeight > 0) { GrabFrameImage.Height = sourceHeight; + PdfTextCanvas.Height = sourceHeight; RectanglesCanvas.Height = sourceHeight; TemplateRegionOverlayCanvas.Height = sourceHeight; } @@ -1806,6 +2072,12 @@ private async void FreezeMI_Click(object sender, RoutedEventArgs e) { if (IsFreezeMode) { + if (IsPdfDocumentLoaded) + { + FreezeToggleButton.IsChecked = true; + return; + } + FreezeToggleButton.IsChecked = false; UnfreezeGrabFrame(); ResetGrabFrame(); @@ -1827,6 +2099,8 @@ private void FreezeToggleButton_Click(object? sender = null, RoutedEventArgs? e { if (FreezeToggleButton.IsChecked is bool freezeMode && freezeMode) FreezeGrabFrame(); + else if (IsPdfDocumentLoaded) + FreezeToggleButton.IsChecked = true; else UnfreezeGrabFrame(); } @@ -1964,14 +2238,20 @@ private void GrabFrameWindow_Closing(object sender, System.ComponentModel.Cancel Singleton.Instance.SaveToHistory(this); historyItem?.ClearTransientImage(); + ClearLoadedPdfDocument(); FrameText = ""; wordBorders.Clear(); + pdfTextLineOverlays.Clear(); UpdateFrameText(); } private void GrabFrameWindow_Deactivated(object? sender, EventArgs e) { + _spacePanGraceTimer?.Stop(); + _spacePanGraceTimer = null; + SetSpacePanModifierState(false); + if (!IsWordEditMode && !IsFreezeMode) { ResetGrabFrame(); @@ -2011,7 +2291,7 @@ private async void GrabFrameWindow_Drop(object sender, DragEventArgs e) frameContentImageSource = null; isStaticImageSource = true; - await TryLoadImageFromPath(fileName); + await TryLoadDocumentFromPath(fileName); IsDragOver = false; @@ -2118,6 +2398,14 @@ private void HandlePreviewMouseWheel(object sender, MouseWheelEventArgs e) if (scrollBehavior == ScrollBehavior.ZoomWhenFrozen && IsFreezeMode) return; // ZoomBorder handles scroll when frozen + if (IsPdfDocumentLoaded) + { + // ZoomBorder handles the scroll and sets CanPan=true synchronously after we return. + // Defer a focus check so ButtonBase never holds focus while panning is possible. + Dispatcher.InvokeAsync(MoveKeyboardFocusFromButtonBase, DispatcherPriority.Input); + return; + } + e.Handled = true; double aspectRatio = (Height - 66) / (Width - 4); @@ -2161,6 +2449,16 @@ private void InvertSelection(object? sender = null, RoutedEventArgs? e = null) else wordBorder.Select(); } + + foreach (PdfTextLineOverlay pdfTextLine in pdfTextLineOverlays) + { + if (pdfTextLine.IsSelected) + pdfTextLine.Deselect(); + else + pdfTextLine.Select(); + } + + UpdateFrameText(); } private void LanguagesComboBox_MouseDown(object sender, MouseButtonEventArgs e) @@ -2351,7 +2649,7 @@ private async void OpenImageMenuItem_Click(object? sender = null, RoutedEventArg Microsoft.Win32.OpenFileDialog dlg = new() { // Set filter for file extension and default file extension - Filter = FileUtilities.GetImageFilter() + Filter = FileUtilities.GetVisualDocumentFilter() }; bool? result = dlg.ShowDialog(); @@ -2359,7 +2657,7 @@ private async void OpenImageMenuItem_Click(object? sender = null, RoutedEventArg if (result is false || !File.Exists(dlg.FileName)) return; - await TryLoadImageFromPath(dlg.FileName); + await TryLoadDocumentFromPath(dlg.FileName); reDrawTimer.Start(); } @@ -2386,6 +2684,7 @@ private async void PasteExecuted(object sender, ExecutedRoutedEventArgs? e = nul frameContentImageSource = clipboardImage; } + ClearLoadedPdfDocument(); hasLoadedImageSource = true; isStaticImageSource = true; frozenUiAutomationSnapshot = null; @@ -2405,8 +2704,14 @@ private async void RateAndReview_Click(object sender, RoutedEventArgs e) private void RectanglesCanvas_MouseDown(object sender, MouseButtonEventArgs e) { + bool isPdfTextInteraction = IsPdfTextInteraction(sender); + FrameworkElement interactionSurface = isPdfTextInteraction + ? (e.OriginalSource as FrameworkElement ?? PdfTextCanvas) + : (GetInteractionSurface(sender) ?? RectanglesCanvas); + reDrawTimer.Stop(); - GrabBTN.Focus(); + if (!MainZoomBorder.CanPan) + GrabBTN.Focus(); if (e.RightButton == MouseButtonState.Pressed) { @@ -2422,13 +2727,17 @@ private void RectanglesCanvas_MouseDown(object sender, MouseButtonEventArgs e) return; } - if (!KeyboardExtensions.IsShiftDown() && !KeyboardExtensions.IsCtrlDown()) + bool shouldPanInsteadOfSelect = IsPdfDocumentLoaded + ? IsZoomPanGestureActive + : IsZoomPanGestureActive && !isPdfTextInteraction; + + if (shouldPanInsteadOfSelect) return; } isSelecting = true; clickedPoint = e.GetPosition(RectanglesCanvas); - RectanglesCanvas.CaptureMouse(); + interactionSurface.CaptureMouse(); selectBorder.Height = 1; selectBorder.Width = 1; @@ -2439,8 +2748,11 @@ private void RectanglesCanvas_MouseDown(object sender, MouseButtonEventArgs e) e.Handled = true; isMiddleDown = true; - ResetGrabFrame(); - UnfreezeGrabFrame(); + if (!IsPdfDocumentLoaded) + { + ResetGrabFrame(); + UnfreezeGrabFrame(); + } return; } @@ -2460,12 +2772,17 @@ private void RectanglesCanvas_MouseDown(object sender, MouseButtonEventArgs e) private void RectanglesCanvas_MouseMove(object sender, MouseEventArgs e) { + FrameworkElement interactionSurface = GetInteractionSurface(sender) ?? RectanglesCanvas; + bool isPdfTextInteraction = IsPdfTextInteraction(sender); + if (IsCtrlDown) - RectanglesCanvas.Cursor = Cursors.Cross; + interactionSurface.Cursor = Cursors.Cross; else if (MainZoomBorder.CanPan) - RectanglesCanvas.Cursor = Cursors.SizeAll; + interactionSurface.Cursor = (IsPdfDocumentLoaded || !isPdfTextInteraction) && IsZoomPanGestureActive + ? Cursors.SizeAll + : Cursors.Arrow; else - RectanglesCanvas.Cursor = null; + interactionSurface.Cursor = null; if (!isSelecting && !isMiddleDown && movingWordBordersDictionary.Count == 0) return; @@ -2473,8 +2790,9 @@ private void RectanglesCanvas_MouseMove(object sender, MouseEventArgs e) isMiddleDown = e.MiddleButton == MouseButtonState.Pressed; if (MainZoomBorder.CanPan - && !KeyboardExtensions.IsShiftDown() - && !KeyboardExtensions.IsCtrlDown()) + && (IsPdfDocumentLoaded + ? IsZoomPanGestureActive + : (IsZoomPanGestureActive && !isPdfTextInteraction))) { isSelecting = false; return; @@ -2522,12 +2840,13 @@ private void RectanglesCanvas_MouseUp(object sender, MouseButtonEventArgs e) { isSelecting = false; CursorClipper.UnClipCursor(); - RectanglesCanvas.ReleaseMouseCapture(); + Mouse.Captured?.ReleaseMouseCapture(); if (e.ChangedButton == MouseButton.Middle && scrollBehavior != ScrollBehavior.Zoom) { isMiddleDown = false; - FreezeGrabFrame(); + if (!IsPdfDocumentLoaded) + FreezeGrabFrame(); reDrawTimer.Start(); return; } @@ -2676,6 +2995,9 @@ private void ReSearchTimer_Tick(object? sender, EventArgs e) { foreach (WordBorder wb in wordBorders) wb.Deselect(); + + foreach (PdfTextLineOverlay pdfTextLine in pdfTextLineOverlays) + pdfTextLine.Deselect(); MatchesTXTBLK.Text = $"0 Matches"; UpdateFrameText(); return; @@ -2697,6 +3019,9 @@ private void ReSearchTimer_Tick(object? sender, EventArgs e) { foreach (WordBorder wb in wordBorders) wb.Deselect(); + + foreach (PdfTextLineOverlay pdfTextLine in pdfTextLineOverlays) + pdfTextLine.Deselect(); UpdateFrameText(); MatchesTXTBLK.Text = $"Search Error"; return; @@ -2716,6 +3041,17 @@ private void ReSearchTimer_Tick(object? sender, EventArgs e) else wb.Deselect(); } + + foreach (PdfTextLineOverlay pdfTextLine in pdfTextLineOverlays) + { + int numberOfMatchesInLine = regex.Count(pdfTextLine.Text); + numberOfMatches += numberOfMatchesInLine; + + if (numberOfMatchesInLine > 0) + pdfTextLine.Select(); + else + pdfTextLine.Deselect(); + } } UpdateFrameText(); @@ -2796,6 +3132,11 @@ private void SelectAllWordBorders(object? sender = null, RoutedEventArgs? e = nu { foreach (WordBorder wordBorder in wordBorders) wordBorder.Select(); + + foreach (PdfTextLineOverlay pdfTextLine in pdfTextLineOverlays) + pdfTextLine.Select(); + + UpdateFrameText(); } private void SetGrabFrameUserSettings() @@ -3209,11 +3550,23 @@ private void TableToggleButton_Click(object? sender = null, RoutedEventArgs? e = UpdateFrameText(); } + private async Task TryLoadDocumentFromPath(string path) + { + if (IoUtilities.IsPdfFileExtension(Path.GetExtension(path))) + { + await TryLoadPdfFromPath(path); + return; + } + + await TryLoadImageFromPath(path); + } + private async Task TryLoadImageFromPath(string path) { Uri fileURI = new(path); try { + ClearLoadedPdfDocument(); ResetGrabFrame(); await Task.Delay(300); BitmapImage droppedImage = new(); @@ -3249,6 +3602,29 @@ private async Task TryLoadImageFromPath(string path) } } + private async Task TryLoadPdfFromPath(string path) + { + try + { + ClearLoadedPdfDocument(); + _loadedPdfDocument = await PdfDocumentRenderer.LoadAsync(path); + _currentImagePath = Path.GetFullPath(path); + await ShowPdfPageAsync(0); + } + catch (Exception ex) + { + ClearLoadedPdfDocument(); + hasLoadedImageSource = false; + UnfreezeGrabFrame(); + await new Wpf.Ui.Controls.MessageBox + { + Title = "Text Grab", + Content = $"Failed to open PDF.{Environment.NewLine}{ex.Message}", + CloseButtonText = "OK" + }.ShowDialogAsync(); + } + } + private void TryToAlphaMenuItem_Click(object sender, RoutedEventArgs e) { List wbToEdit = SelectedWordBorders(); @@ -3283,10 +3659,12 @@ private void TryToNumberMenuItem_Click(object sender, RoutedEventArgs e) UndoRedo.EndTransaction(); } - private void TryToPlaceTable() + private List TryToPlaceTable() { RemoveTableLines(); + List wbInfos = [.. wordBorders.Select(wb => new WordBorderInfo(wb))]; + Point windowPosition = this.GetAbsolutePosition(); DpiScale dpi = VisualTreeHelper.GetDpi(this); System.Drawing.Rectangle rectCanvasSize = new() @@ -3300,8 +3678,6 @@ private void TryToPlaceTable() try { AnalyzedResultTable = new(); - // Convert UI controls to model-only infos - List wbInfos = [.. wordBorders.Select(wb => new WordBorderInfo(wb))]; AnalyzedResultTable.AnalyzeAsTable(wbInfos, rectCanvasSize); if (AnalyzedResultTable.TableLines is not null) RectanglesCanvas.Children.Add(AnalyzedResultTable.TableLines); @@ -3310,6 +3686,8 @@ private void TryToPlaceTable() { Debug.WriteLine(ex.Message); } + + return wbInfos; } private void TryToReadBarcodes(DpiScale dpi) @@ -3426,7 +3804,11 @@ private void UndoExecuted(object sender, ExecutedRoutedEventArgs e) private void UnfreezeGrabFrame() { + if (IsPdfDocumentLoaded) + return; + reDrawTimer.Stop(); + ClearLoadedPdfDocument(); hasLoadedImageSource = false; isStaticImageSource = false; frozenUiAutomationSnapshot = null; @@ -3441,6 +3823,7 @@ private void UnfreezeGrabFrame() FreezeToggleButton.Visibility = Visibility.Visible; Background = new SolidColorBrush(Colors.Transparent); IsFreezeMode = false; + UpdateZoomPanMode(); if (scrollBehavior == ScrollBehavior.ZoomWhenFrozen) MainZoomBorder.CanZoom = false; @@ -3448,26 +3831,77 @@ private void UnfreezeGrabFrame() reDrawTimer.Start(); } - private void UpdateFrameText() + private async void PreviousPdfPageButton_Click(object sender, RoutedEventArgs e) { - string[] selectedWbs = [.. wordBorders - .OrderBy(b => b.Top) - .Where(w => w.IsSelected) - .Select(t => t.Word)]; + await ChangePdfPageAsync(-1); + } + + private async void NextPdfPageButton_Click(object sender, RoutedEventArgs e) + { + await ChangePdfPageAsync(1); + } + + private void AppendPositionedTextLines( + StringBuilder stringBuilder, + IEnumerable<(double Top, double Left, double Height, string Text, bool AllowParagraphJoin)> lines) + { + List<(double Top, double Left, double Height, string Text, bool AllowParagraphJoin)> orderedLines = + [.. lines + .Where(line => !string.IsNullOrWhiteSpace(line.Text)) + .OrderBy(line => line.Top) + .ThenBy(line => line.Left)]; + + if (orderedLines.Count == 0) + return; + stringBuilder.Append(orderedLines[0].Text); + for (int i = 1; i < orderedLines.Count; i++) + { + (double Top, double Left, double Height, string Text, bool AllowParagraphJoin) previousLine = orderedLines[i - 1]; + (double Top, double Left, double Height, string Text, bool AllowParagraphJoin) currentLine = orderedLines[i]; + + bool shouldJoinParagraph = + DefaultSettings.ParagraphDetection + && isSpaceJoining + && previousLine.AllowParagraphJoin + && currentLine.AllowParagraphJoin + && OcrUtilities.IsWrappedParagraph(previousLine.Top, previousLine.Height, currentLine.Top, currentLine.Height); + + if (shouldJoinParagraph) + stringBuilder.Append(' '); + else + stringBuilder.AppendLine(); + + stringBuilder.Append(currentLine.Text); + } + } + + private void UpdateFrameText() + { StringBuilder stringBuilder = new(); + List<(double Top, double Left, double Height, string Text, bool AllowParagraphJoin)> selectedLines = + [.. wordBorders + .Where(w => w.IsSelected) + .Select(w => (w.Top, w.Left, w.Height, w.Word, AllowParagraphJoin: false)) + .Concat(pdfTextLineOverlays + .Where(line => line.IsSelected) + .Select(line => (line.Top, line.Left, line.Height, line.Text, AllowParagraphJoin: true)))]; - if (TableToggleButton.IsChecked is true) + if (TableToggleButton.IsChecked is true && wordBorders.Count > 0) { - TryToPlaceTable(); - // Build table text via model-only API - List infos = [.. wordBorders.Select(wb => new WordBorderInfo(wb))]; + List infos = TryToPlaceTable(); ResultTable.GetTextFromTabledWordBorders(stringBuilder, infos, isSpaceJoining); } else { - if (selectedWbs.Length > 0) - stringBuilder.AppendJoin(Environment.NewLine, selectedWbs); + if (selectedLines.Count > 0) + AppendPositionedTextLines(stringBuilder, selectedLines); + else if (pdfTextLineOverlays.Count > 0) + AppendPositionedTextLines( + stringBuilder, + wordBorders + .Select(w => (w.Top, w.Left, w.Height, w.Word, AllowParagraphJoin: false)) + .Concat(pdfTextLineOverlays.Select(line => (line.Top, line.Left, line.Height, line.Text, AllowParagraphJoin: true)))); else AppendWordBordersWithParagraphDetection(stringBuilder); } @@ -3525,6 +3959,19 @@ private void Window_LocationChanged(object? sender, EventArgs e) private void Window_PreviewKeyDown(object sender, KeyEventArgs e) { + if (e.Key == Key.Space) + { + // Cancel any pending grace-period clear when Space is pressed + _spacePanGraceTimer?.Stop(); + _spacePanGraceTimer = null; + if (CanUseSpacePanModifier) + { + SetSpacePanModifierState(true); + e.Handled = true; + return; + } + } + if (!wasAltHeld && (e.SystemKey == Key.LeftAlt || e.SystemKey == Key.RightAlt)) { RectanglesCanvas.Opacity = 0.1; @@ -3550,6 +3997,29 @@ private void Window_PreviewKeyDown(object sender, KeyEventArgs e) private void Window_PreviewKeyUp(object sender, KeyEventArgs e) { + if (e.Key == Key.Space) + { + // Keep the pan modifier active for a short grace period after Space is released. + // Users commonly release Space a split-second before clicking to start a pan, + // so clearing immediately makes the gesture feel broken. + _spacePanGraceTimer?.Stop(); + _spacePanGraceTimer = new DispatcherTimer { Interval = TimeSpan.FromMilliseconds(300) }; + _spacePanGraceTimer.Tick += (_, _) => + { + _spacePanGraceTimer?.Stop(); + _spacePanGraceTimer = null; + if (!Keyboard.IsKeyDown(Key.Space)) + SetSpacePanModifierState(false); + }; + _spacePanGraceTimer.Start(); + + if (CanUseSpacePanModifier) + { + e.Handled = true; + return; + } + } + if (wasAltHeld && (e.SystemKey == Key.LeftAlt || e.SystemKey == Key.RightAlt)) { RectanglesCanvas.Opacity = 1; @@ -3591,9 +4061,12 @@ private void ResetViewMenuItem_Click(object sender, RoutedEventArgs e) private void ShowWordBordersMenuItem_Click(object sender, RoutedEventArgs e) { - RectanglesCanvas.Visibility = ShowWordBordersMenuItem.IsChecked is true + Visibility overlayVisibility = ShowWordBordersMenuItem.IsChecked is true ? Visibility.Visible : Visibility.Hidden; + + RectanglesCanvas.Visibility = overlayVisibility; + PdfTextCanvas.Visibility = overlayVisibility; } private void OverlayOpacityMenuItem_Click(object sender, RoutedEventArgs e) @@ -3749,6 +4222,9 @@ private void SetScrollBehaviorMenuItems() default: break; } + + if (IsPdfDocumentLoaded) + MainZoomBorder.CanZoom = true; } private void InvertColorsMI_Click(object sender, RoutedEventArgs e) @@ -3823,6 +4299,7 @@ private void AutoContrastMI_Click(object sender, RoutedEventArgs e) reDrawTimer.Stop(); RectanglesCanvas.Children.Clear(); wordBorders.Clear(); + ClearRenderedPdfTextLines(); if (!IsFreezeMode) FreezeGrabFrame(); @@ -3870,6 +4347,7 @@ private void BrightenMI_Click(object sender, RoutedEventArgs e) reDrawTimer.Stop(); RectanglesCanvas.Children.Clear(); wordBorders.Clear(); + ClearRenderedPdfTextLines(); if (!IsFreezeMode) FreezeGrabFrame(); @@ -3917,6 +4395,7 @@ private void DarkenMI_Click(object sender, RoutedEventArgs e) reDrawTimer.Stop(); RectanglesCanvas.Children.Clear(); wordBorders.Clear(); + ClearRenderedPdfTextLines(); if (!IsFreezeMode) FreezeGrabFrame(); @@ -3964,6 +4443,7 @@ private void GrayscaleMI_Click(object sender, RoutedEventArgs e) reDrawTimer.Stop(); RectanglesCanvas.Children.Clear(); wordBorders.Clear(); + ClearRenderedPdfTextLines(); if (!IsFreezeMode) FreezeGrabFrame();