From 6f93d78dcbdeab8ee7b3a58fe0ac91bd0f0095a8 Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Sat, 25 Apr 2026 18:38:31 -0500 Subject: [PATCH 01/22] Add PDF OCR support and context menu integration - Introduce OpenContentKind.PdfDocument and PDF extension helpers - Support PDFs in context menu and "Open With" registration - Add FileUtilities.GetVisualDocumentFilter for images/PDFs - Route PDF files to new PdfDocumentRenderer for native text and OCR - Refactor to treat images and PDFs as visual documents - No breaking changes to existing image workflows --- Text-Grab/Enums.cs | 1 + Text-Grab/Utilities/ContextMenuUtilities.cs | 29 +- Text-Grab/Utilities/FileUtilities.cs | 53 ++- Text-Grab/Utilities/ImplementAppOptions.cs | 14 +- Text-Grab/Utilities/IoUtilities.cs | 47 ++- Text-Grab/Utilities/OcrUtilities.cs | 21 +- Text-Grab/Utilities/PdfDocumentRenderer.cs | 435 ++++++++++++++++++++ 7 files changed, 555 insertions(+), 45 deletions(-) create mode 100644 Text-Grab/Utilities/PdfDocumentRenderer.cs diff --git a/Text-Grab/Enums.cs b/Text-Grab/Enums.cs index 52824fa6..4ed5a1f5 100644 --- a/Text-Grab/Enums.cs +++ b/Text-Grab/Enums.cs @@ -33,6 +33,7 @@ public enum OpenContentKind Image = 0, TextFile = 1, Directory = 2, + PdfDocument = 3, } public enum OcrEngineKind diff --git a/Text-Grab/Utilities/ContextMenuUtilities.cs b/Text-Grab/Utilities/ContextMenuUtilities.cs index 4088aab2..7ef899fb 100644 --- a/Text-Grab/Utilities/ContextMenuUtilities.cs +++ b/Text-Grab/Utilities/ContextMenuUtilities.cs @@ -6,7 +6,7 @@ namespace Text_Grab.Utilities; /// /// Utility class for managing Windows context menu integration. -/// Adds "Grab text with Text Grab" and "Open in Grab Frame" options to the right-click context menu for image files. +/// Adds "Grab text with Text Grab" and "Open in Grab Frame" options to the right-click context menu for supported visual documents. /// internal static class ContextMenuUtilities { @@ -16,22 +16,17 @@ internal static class ContextMenuUtilities private const string GrabFrameDisplayText = "Open in Grab Frame"; /// - /// Supported image file extensions for context menu integration. + /// Supported image and PDF file extensions for context menu integration. /// - private static readonly string[] ImageExtensions = + private static readonly string[] VisualDocumentExtensions = [ - ".png", - ".jpg", - ".jpeg", - ".bmp", - ".gif", - ".tiff", - ".tif" + .. IoUtilities.ImageExtensions, + .. IoUtilities.PdfExtensions ]; /// - /// Adds Text Grab to the Windows context menu for image files. - /// This allows users to right-click on an image and select "Grab text with Text Grab" or "Open in Grab Frame". + /// Adds Text Grab to the Windows context menu for supported visual documents. + /// This allows users to right-click a file and select "Grab text with Text Grab" or "Open in Grab Frame". /// /// When the method returns false, contains an error message describing the failure. /// True if registration was successful, false otherwise. @@ -48,7 +43,7 @@ public static bool AddToContextMenu(out string? errorMessage) try { - foreach (string extension in ImageExtensions) + foreach (string extension in VisualDocumentExtensions) { RegisterGrabTextContextMenu(extension, executablePath); RegisterGrabFrameContextMenu(extension, executablePath); @@ -70,7 +65,7 @@ public static bool AddToContextMenu(out string? errorMessage) } /// - /// Removes Text Grab from the Windows context menu for image files. + /// Removes Text Grab from the Windows context menu for supported visual documents. /// /// When the method returns false, contains an error message describing the failure. /// True if removal was successful, false otherwise. @@ -79,7 +74,7 @@ public static bool RemoveFromContextMenu(out string? errorMessage) errorMessage = null; try { - foreach (string extension in ImageExtensions) + foreach (string extension in VisualDocumentExtensions) { UnregisterContextMenuForExtension(extension, GrabTextRegistryKeyName); UnregisterContextMenuForExtension(extension, GrabFrameRegistryKeyName); @@ -109,7 +104,7 @@ public static bool IsRegisteredInContextMenu() try { // Check if at least one extension has the context menu registered - foreach (string extension in ImageExtensions) + foreach (string extension in VisualDocumentExtensions) { string keyPath = GetShellKeyPath(extension, GrabTextRegistryKeyName); using RegistryKey? key = Registry.CurrentUser.OpenSubKey(keyPath); @@ -186,7 +181,7 @@ private static void RegisterGrabFrameContextMenu(string extension, string execut throw new InvalidOperationException($"Could not create command registry key for {extension}"); } - // --grabframe flag opens the image in GrabFrame instead of EditTextWindow + // --grabframe flag opens the visual document in GrabFrame instead of EditTextWindow commandKey.SetValue(string.Empty, $"\"{executablePath}\" --grabframe \"%1\""); } } diff --git a/Text-Grab/Utilities/FileUtilities.cs b/Text-Grab/Utilities/FileUtilities.cs index 73bae77d..cfe0edc7 100644 --- a/Text-Grab/Utilities/FileUtilities.cs +++ b/Text-Grab/Utilities/FileUtilities.cs @@ -3,6 +3,7 @@ using System.Drawing; using System.Drawing.Imaging; using System.IO; +using System.Linq; using System.Text; using System.Threading.Tasks; using Windows.Storage; @@ -31,27 +32,23 @@ public class FileUtilities /// Modified by Joseph Finney public static string GetImageFilter() { - string imageExtensions = string.Empty; - string separator = ""; - ImageCodecInfo[] codecs = ImageCodecInfo.GetImageEncoders(); - Dictionary imageFilters = []; - foreach (ImageCodecInfo codec in codecs) - { - if (codec.FilenameExtension is not string extension) - continue; + string imageExtensions = GetImageExtensionsFilterPattern(); + return string.IsNullOrEmpty(imageExtensions) ? string.Empty : $"Image files|{imageExtensions}"; + } - imageExtensions = $"{imageExtensions}{separator}{extension.ToLower()}"; - separator = ";"; - imageFilters.Add($"{codec.FormatDescription} files ({extension.ToLower()})", extension.ToLower()); - } - string result = string.Empty; - separator = ""; + public static string GetVisualDocumentFilter() + { + string imageExtensions = GetImageExtensionsFilterPattern(); + string pdfExtensions = string.Join(";", IoUtilities.PdfExtensions.Select(extension => $"*{extension}")); + string combinedExtensions = string.Join(";", new[] { imageExtensions, pdfExtensions }.Where(pattern => !string.IsNullOrWhiteSpace(pattern))); + string imageFilter = GetImageFilter(); - if (!string.IsNullOrEmpty(imageExtensions)) + return string.Join("|", new[] { - result += $"{separator}Image files|{imageExtensions}"; - } - return result; + $"Image and PDF files|{combinedExtensions}", + $"PDF files|{pdfExtensions}", + imageFilter + }); } public static string GetPathToLocalFile(string imageRelativePath) @@ -99,6 +96,26 @@ public static Task SaveTextFile(string textContent, string filename, FileS return SaveTextFileUnpackaged(textContent, filename, storageKind); } + private static string GetImageExtensionsFilterPattern() + { + string imageExtensions = string.Empty; + string separator = string.Empty; + ImageCodecInfo[] codecs = ImageCodecInfo.GetImageEncoders(); + Dictionary imageFilters = []; + + foreach (ImageCodecInfo codec in codecs) + { + if (codec.FilenameExtension is not string extension) + continue; + + imageExtensions = $"{imageExtensions}{separator}{extension.ToLower()}"; + separator = ";"; + imageFilters.Add($"{codec.FormatDescription} files ({extension.ToLower()})", extension.ToLower()); + } + + return imageExtensions; + } + private static async Task GetImageFilePackaged(string fileName, FileStorageKind storageKind) { StorageFolder folder = await GetStorageFolderPackaged(fileName, storageKind); diff --git a/Text-Grab/Utilities/ImplementAppOptions.cs b/Text-Grab/Utilities/ImplementAppOptions.cs index 50ec062e..125255b7 100644 --- a/Text-Grab/Utilities/ImplementAppOptions.cs +++ b/Text-Grab/Utilities/ImplementAppOptions.cs @@ -8,7 +8,11 @@ namespace Text_Grab.Utilities; internal class ImplementAppOptions { - private static readonly string[] ImageExtensions = [".png", ".jpg", ".jpeg", ".bmp", ".gif", ".tiff", ".tif", ".webp", ".ico"]; + private static readonly string[] SupportedOpenWithExtensions = + [ + .. IoUtilities.ImageExtensions, + .. IoUtilities.PdfExtensions + ]; public static async Task ImplementStartupOption(bool startupOnLogin) { @@ -60,8 +64,8 @@ public static void RegisterAsImageOpenWithApp() iconKey?.SetValue("", $"\"{executablePath}\",0"); } - // Register Text Grab in OpenWithProgids for each image extension - foreach (string ext in ImageExtensions) + // Register Text Grab in OpenWithProgids for each supported visual document extension + foreach (string ext in SupportedOpenWithExtensions) { string extKey = $@"SOFTWARE\Classes\{ext}\OpenWithProgids"; using RegistryKey? key = Registry.CurrentUser.CreateSubKey(extKey); @@ -80,7 +84,7 @@ public static void RegisterAsImageOpenWithApp() using RegistryKey? supportedTypes = key.CreateSubKey("SupportedTypes"); if (supportedTypes is not null) { - foreach (string ext in ImageExtensions) + foreach (string ext in SupportedOpenWithExtensions) supportedTypes.SetValue(ext, ""); } @@ -108,7 +112,7 @@ public static void UnregisterAsImageOpenWithApp() Registry.CurrentUser.DeleteSubKeyTree(@"SOFTWARE\Classes\Text-Grab.Image", false); // Remove OpenWithProgids entries for each extension - foreach (string ext in ImageExtensions) + foreach (string ext in SupportedOpenWithExtensions) { string extKey = $@"SOFTWARE\Classes\{ext}\OpenWithProgids"; using RegistryKey? key = Registry.CurrentUser.OpenSubKey(extKey, true); diff --git a/Text-Grab/Utilities/IoUtilities.cs b/Text-Grab/Utilities/IoUtilities.cs index b05e90cc..698bf16b 100644 --- a/Text-Grab/Utilities/IoUtilities.cs +++ b/Text-Grab/Utilities/IoUtilities.cs @@ -11,6 +11,7 @@ namespace Text_Grab.Utilities; public class IoUtilities { public static readonly List ImageExtensions = [".png", ".bmp", ".jpg", ".jpeg", ".tiff", ".gif", ".tif", ".webp", ".ico"]; + public static readonly List PdfExtensions = [".pdf"]; public static readonly List MarkdownExtensions = [".md", ".markdown"]; public static readonly List SpreadsheetExtensions = [".csv", ".tsv", ".tab"]; @@ -30,6 +31,35 @@ public static bool IsImageFileExtension(string extension) return ImageExtensions.Contains(extension.ToLowerInvariant()); } + public static bool IsPdfFile(string path) + { + if (string.IsNullOrWhiteSpace(path) || !File.Exists(path)) + return false; + + return IsPdfFileExtension(Path.GetExtension(path)); + } + + public static bool IsPdfFileExtension(string extension) + { + if (string.IsNullOrWhiteSpace(extension)) + return false; + + return PdfExtensions.Contains(extension.ToLowerInvariant()); + } + + public static bool IsVisualDocumentFile(string path) + { + if (string.IsNullOrWhiteSpace(path) || !File.Exists(path)) + return false; + + return IsVisualDocumentFileExtension(Path.GetExtension(path)); + } + + public static bool IsVisualDocumentFileExtension(string extension) + { + return IsImageFileExtension(extension) || IsPdfFileExtension(extension); + } + public static bool IsMarkdownFileExtension(string extension) { if (string.IsNullOrWhiteSpace(extension)) @@ -59,15 +89,28 @@ public static EtwEditorMode GetEditorModeForPath(string? path) return EtwEditorMode.Text; } + public static OpenContentKind GetOpenContentKindForPath(string? path) + { + string extension = Path.GetExtension(path ?? string.Empty); + + if (IsPdfFileExtension(extension)) + return OpenContentKind.PdfDocument; + + if (IsImageFileExtension(extension)) + return OpenContentKind.Image; + + return OpenContentKind.TextFile; + } + public static async Task<(string TextContent, OpenContentKind SourceKindOfContent)> GetContentFromPath(string pathOfFileToOpen, bool isMultipleFiles = false, ILanguage? language = null) { StringBuilder stringBuilder = new(); - OpenContentKind openContentKind = OpenContentKind.Image; + OpenContentKind openContentKind = GetOpenContentKindForPath(pathOfFileToOpen); if (isMultipleFiles) stringBuilder.AppendLine(pathOfFileToOpen); - if (ImageExtensions.Contains(Path.GetExtension(pathOfFileToOpen).ToLower())) + if (openContentKind is OpenContentKind.Image or OpenContentKind.PdfDocument) { try { diff --git a/Text-Grab/Utilities/OcrUtilities.cs b/Text-Grab/Utilities/OcrUtilities.cs index eed11f63..aa14648d 100644 --- a/Text-Grab/Utilities/OcrUtilities.cs +++ b/Text-Grab/Utilities/OcrUtilities.cs @@ -540,8 +540,15 @@ public static string GetStringFromOcrOutputs(List outputs) public static async Task OcrAbsoluteFilePathAsync(string absolutePath, ILanguage? language = null) { - Bitmap bmp = LoadBitmapFromFile(absolutePath); language ??= LanguageUtilities.GetCurrentInputLanguage(); + + if (IoUtilities.IsPdfFileExtension(Path.GetExtension(absolutePath))) + { + PdfDocumentRenderer pdfDocument = await PdfDocumentRenderer.LoadAsync(absolutePath); + return await pdfDocument.ExtractTextAsync(language); + } + + using Bitmap bmp = LoadBitmapFromFile(absolutePath); return GetStringFromOcrOutputs(await GetTextFromImageAsync(bmp, language)); } @@ -657,8 +664,16 @@ public static async Task OcrFile(string path, ILanguage? selectedLanguag string ocrText; if (options.GrabTemplate is GrabTemplate grabTemplate) { - Bitmap bmp = LoadBitmapFromFile(path); - ocrText = await GrabTemplateExecutor.ExecuteTemplateOnBitmapAsync(grabTemplate, bmp, selectedLanguage); + if (IoUtilities.IsPdfFileExtension(Path.GetExtension(path))) + { + PdfDocumentRenderer pdfDocument = await PdfDocumentRenderer.LoadAsync(path); + ocrText = await pdfDocument.ExtractTextAsync(selectedLanguage, grabTemplate); + } + else + { + using Bitmap bmp = LoadBitmapFromFile(path); + ocrText = await GrabTemplateExecutor.ExecuteTemplateOnBitmapAsync(grabTemplate, bmp, selectedLanguage); + } } else ocrText = await OcrAbsoluteFilePathAsync(path, selectedLanguage); diff --git a/Text-Grab/Utilities/PdfDocumentRenderer.cs b/Text-Grab/Utilities/PdfDocumentRenderer.cs new file mode 100644 index 00000000..c5bbb47a --- /dev/null +++ b/Text-Grab/Utilities/PdfDocumentRenderer.cs @@ -0,0 +1,435 @@ +using System; +using System.Collections.Generic; +using System.Drawing; +using System.IO; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using System.Windows.Media.Imaging; +using Text_Grab.Interfaces; +using Text_Grab.Models; +using UglyToad.PdfPig.Content; +using UglyToad.PdfPig.Core; +using UglyToad.PdfPig.DocumentLayoutAnalysis.WordExtractor; +using Windows.Graphics.Imaging; +using Windows.Storage; +using Windows.Storage.Streams; +using OcrEngine = Windows.Media.Ocr.OcrEngine; +using PigPdfDocument = UglyToad.PdfPig.PdfDocument; +using PdfPageRenderOptions = Windows.Data.Pdf.PdfPageRenderOptions; +using WinPdfDocument = Windows.Data.Pdf.PdfDocument; +using WinPdfPage = Windows.Data.Pdf.PdfPage; + +namespace Text_Grab.Utilities; + +internal sealed class PdfPageContent +{ + public PdfPageContent( + int pageIndex, + BitmapSource renderedPage, + IReadOnlyList nativeLines, + IReadOnlyList imageRegions) + { + PageIndex = pageIndex; + RenderedPage = renderedPage; + NativeLines = nativeLines; + ImageRegions = imageRegions; + } + + public bool HasNativeText => NativeLines.Count > 0; + + public IReadOnlyList ImageRegions { get; } + + public IReadOnlyList NativeLines { get; } + + public int PageIndex { get; } + + public BitmapSource RenderedPage { get; } +} + +internal sealed class PdfPageTextLine +{ + public PdfPageTextLine(Windows.Foundation.Rect sourceRect, string text, bool isNativeText) + { + SourceRect = sourceRect; + Text = text; + IsNativeText = isNativeText; + } + + public bool IsNativeText { get; } + + public Windows.Foundation.Rect SourceRect { get; } + + public string Text { get; } +} + +internal sealed class PdfDocumentRenderer : IDisposable +{ + private const double DefaultRenderScale = 2.0; + private readonly WinPdfDocument renderDocument; + private readonly PigPdfDocument textDocument; + private readonly Dictionary pageCache = []; + + private PdfDocumentRenderer(string filePath, WinPdfDocument renderDocument, PigPdfDocument textDocument) + { + FilePath = filePath; + this.renderDocument = renderDocument; + this.textDocument = textDocument; + } + + public string FilePath { get; } + + public int PageCount => (int)renderDocument.PageCount; + + public void Dispose() + { + textDocument.Dispose(); + } + + public async Task ExtractTextAsync(ILanguage? language = null, GrabTemplate? grabTemplate = null) + { + ILanguage resolvedLanguage = language ?? LanguageUtilities.GetCurrentInputLanguage(); + StringBuilder extractedText = new(); + + for (int pageIndex = 0; pageIndex < PageCount; pageIndex++) + { + string pageText; + if (grabTemplate is not null) + { + BitmapSource pageImage = await RenderPageAsync(pageIndex); + using Bitmap pageBitmap = ImageMethods.BitmapSourceToBitmap(pageImage); + pageText = await GrabTemplateExecutor.ExecuteTemplateOnBitmapAsync(grabTemplate, pageBitmap, resolvedLanguage); + } + else + { + IReadOnlyList lines = await GetSelectableLinesAsync(pageIndex, resolvedLanguage); + pageText = string.Join(Environment.NewLine, lines.Select(line => line.Text)); + } + + if (string.IsNullOrWhiteSpace(pageText)) + continue; + + if (extractedText.Length > 0) + extractedText.AppendLine().AppendLine(); + + extractedText.Append(pageText.Trim()); + } + + return extractedText.ToString(); + } + + public async Task GetPageContentAsync(int pageIndex) + { + ValidatePageIndex(pageIndex); + + if (pageCache.TryGetValue(pageIndex, out PdfPageContent? cachedPage)) + return cachedPage; + + WinPdfPage renderPage = renderDocument.GetPage((uint)pageIndex); + try + { + BitmapImage renderedPage = await RenderPageBitmapAsync(renderPage); + Page textPage = textDocument.GetPage(pageIndex + 1); + + List nativeLines = ExtractNativeLines(textPage, renderedPage.PixelWidth, renderedPage.PixelHeight); + List imageRegions = ExtractImageRegions(textPage, renderedPage.PixelWidth, renderedPage.PixelHeight); + + PdfPageContent pageContent = new(pageIndex, renderedPage, nativeLines, imageRegions); + pageCache[pageIndex] = pageContent; + return pageContent; + } + finally + { + (renderPage as IDisposable)?.Dispose(); + } + } + + public async Task> GetSelectableLinesAsync(int pageIndex, ILanguage? language = null) + { + PdfPageContent pageContent = await GetPageContentAsync(pageIndex); + ILanguage resolvedLanguage = language ?? LanguageUtilities.GetCurrentInputLanguage(); + + if (!pageContent.HasNativeText) + return await GetOcrLinesAsync(pageContent.RenderedPage, resolvedLanguage); + + if (pageContent.ImageRegions.Count == 0) + return pageContent.NativeLines; + + List combinedLines = [.. pageContent.NativeLines]; + IReadOnlyList imageOcrLines = await GetOcrLinesAsync( + pageContent.RenderedPage, + resolvedLanguage, + sourceRect => ShouldIncludeOcrLine(sourceRect, pageContent.ImageRegions)); + + combinedLines.AddRange(imageOcrLines); + return SortLines(combinedLines); + } + + public async Task RenderPageAsync(int pageIndex) + { + PdfPageContent pageContent = await GetPageContentAsync(pageIndex); + return pageContent.RenderedPage; + } + + public static async Task LoadAsync(string filePath) + { + if (!IoUtilities.IsPdfFileExtension(Path.GetExtension(filePath))) + throw new InvalidOperationException("The provided path is not a PDF document."); + + string absolutePath = Path.GetFullPath(filePath); + StorageFile storageFile = await StorageFile.GetFileFromPathAsync(absolutePath); + WinPdfDocument renderDocument = await WinPdfDocument.LoadFromFileAsync(storageFile); + PigPdfDocument textDocument = PigPdfDocument.Open(absolutePath); + + return new PdfDocumentRenderer(absolutePath, renderDocument, textDocument); + } + + internal static Windows.Foundation.Rect ConvertPdfRectToImageRect( + PdfRectangle pdfRect, + double pageWidthPoints, + double pageHeightPoints, + double renderedWidth, + double renderedHeight) + { + if (pageWidthPoints <= 0 || pageHeightPoints <= 0 || renderedWidth <= 0 || renderedHeight <= 0) + return new Windows.Foundation.Rect(0, 0, 0, 0); + + PdfPoint[] points = + [ + pdfRect.TopLeft, + pdfRect.TopRight, + pdfRect.BottomLeft, + pdfRect.BottomRight + ]; + + List xs = []; + List ys = []; + + foreach (PdfPoint point in points) + { + double x = (double)point.X / pageWidthPoints * renderedWidth; + double y = (1 - ((double)point.Y / pageHeightPoints)) * renderedHeight; + xs.Add(x); + ys.Add(y); + } + + double left = xs.Min(); + double top = ys.Min(); + double right = xs.Max(); + double bottom = ys.Max(); + + return new Windows.Foundation.Rect(left, top, Math.Max(0, right - left), Math.Max(0, bottom - top)); + } + + internal static IReadOnlyList GroupWordsIntoLines(IEnumerable<(Windows.Foundation.Rect SourceRect, string Text)> words) + { + List<(Windows.Foundation.Rect SourceRect, string Text)> orderedWords = [.. words + .Where(word => !string.IsNullOrWhiteSpace(word.Text) && word.SourceRect.Width > 0 && word.SourceRect.Height > 0) + .OrderBy(word => word.SourceRect.Y) + .ThenBy(word => word.SourceRect.X)]; + + if (orderedWords.Count == 0) + return []; + + List> groups = []; + + foreach ((Windows.Foundation.Rect SourceRect, string Text) word in orderedWords) + { + if (groups.Count == 0) + { + groups.Add([word]); + continue; + } + + List<(Windows.Foundation.Rect SourceRect, string Text)> currentGroup = groups[^1]; + Windows.Foundation.Rect currentBounds = GetBounds(currentGroup.Select(item => item.SourceRect)); + double currentCenterY = currentBounds.Y + (currentBounds.Height / 2); + double wordCenterY = word.SourceRect.Y + (word.SourceRect.Height / 2); + double lineHeight = Math.Max(currentBounds.Height, word.SourceRect.Height); + double maxGap = lineHeight * 6; + double horizontalGap = Math.Max(0, word.SourceRect.X - currentBounds.Right); + bool sameBaseline = Math.Abs(wordCenterY - currentCenterY) <= lineHeight * 0.6; + + if (sameBaseline && horizontalGap <= maxGap) + currentGroup.Add(word); + else + groups.Add([word]); + } + + List lines = []; + foreach (List<(Windows.Foundation.Rect SourceRect, string Text)> group in groups) + { + List<(Windows.Foundation.Rect SourceRect, string Text)> orderedGroup = [.. group.OrderBy(item => item.SourceRect.X)]; + Windows.Foundation.Rect lineBounds = GetBounds(orderedGroup.Select(item => item.SourceRect)); + string text = string.Join(" ", orderedGroup.Select(item => item.Text.Trim())); + lines.Add(new PdfPageTextLine(lineBounds, text, isNativeText: true)); + } + + return SortLines(lines); + } + + internal static (uint Width, uint Height) GetRenderDimensions(double pageWidth, double pageHeight, double scaleFactor = DefaultRenderScale) + { + if (!double.IsFinite(pageWidth) || pageWidth <= 0 || !double.IsFinite(pageHeight) || pageHeight <= 0) + return (1, 1); + + double scaledWidth = Math.Max(1, pageWidth * scaleFactor); + double scaledHeight = Math.Max(1, pageHeight * scaleFactor); + double maxDimension = Math.Max(scaledWidth, scaledHeight); + + if (maxDimension > OcrEngine.MaxImageDimension) + { + double scaleDownRatio = OcrEngine.MaxImageDimension / maxDimension; + scaledWidth *= scaleDownRatio; + scaledHeight *= scaleDownRatio; + } + + return ((uint)Math.Max(1, Math.Round(scaledWidth)), (uint)Math.Max(1, Math.Round(scaledHeight))); + } + + internal static bool ShouldIncludeOcrLine(Windows.Foundation.Rect sourceRect, IReadOnlyList imageRegions) + { + if (sourceRect.Width <= 0 || sourceRect.Height <= 0) + return false; + + double sourceArea = sourceRect.Width * sourceRect.Height; + if (sourceArea <= 0) + return false; + + foreach (Windows.Foundation.Rect imageRegion in imageRegions) + { + double intersectionLeft = Math.Max(sourceRect.Left, imageRegion.Left); + double intersectionTop = Math.Max(sourceRect.Top, imageRegion.Top); + double intersectionRight = Math.Min(sourceRect.Right, imageRegion.Right); + double intersectionBottom = Math.Min(sourceRect.Bottom, imageRegion.Bottom); + + double intersectionWidth = Math.Max(0, intersectionRight - intersectionLeft); + double intersectionHeight = Math.Max(0, intersectionBottom - intersectionTop); + double intersectionArea = intersectionWidth * intersectionHeight; + + if (intersectionArea / sourceArea >= 0.25) + return true; + } + + return false; + } + + private static PdfPageRenderOptions CreateRenderOptions(WinPdfPage page) + { + (uint width, uint height) = GetRenderDimensions(page.Size.Width, page.Size.Height); + + return new PdfPageRenderOptions + { + BackgroundColor = new Windows.UI.Color { A = byte.MaxValue, R = byte.MaxValue, G = byte.MaxValue, B = byte.MaxValue }, + BitmapEncoderId = Windows.Graphics.Imaging.BitmapEncoder.PngEncoderId, + DestinationWidth = width, + DestinationHeight = height, + IsIgnoringHighContrast = true + }; + } + + private static List ExtractImageRegions(Page textPage, int renderedWidth, int renderedHeight) + { + return [.. textPage.GetImages() + .Select(image => ConvertPdfRectToImageRect(image.BoundingBox, (double)textPage.Width, (double)textPage.Height, renderedWidth, renderedHeight)) + .Where(rect => rect.Width > 0 && rect.Height > 0)]; + } + + private static List ExtractNativeLines(Page textPage, int renderedWidth, int renderedHeight) + { + List<(Windows.Foundation.Rect SourceRect, string Text)> words = [.. textPage + .GetWords(NearestNeighbourWordExtractor.Instance) + .Where(word => !string.IsNullOrWhiteSpace(word.Text)) + .Select(word => ( + SourceRect: ConvertPdfRectToImageRect(word.BoundingBox, (double)textPage.Width, (double)textPage.Height, renderedWidth, renderedHeight), + Text: word.Text.Trim())) + .Where(word => word.SourceRect.Width > 0 && word.SourceRect.Height > 0)]; + + return [.. GroupWordsIntoLines(words)]; + } + + private static Windows.Foundation.Rect GetBounds(IEnumerable rects) + { + List rectList = [.. rects.Where(rect => rect.Width > 0 && rect.Height > 0)]; + if (rectList.Count == 0) + return new Windows.Foundation.Rect(0, 0, 0, 0); + + double left = rectList.Min(rect => rect.Left); + double top = rectList.Min(rect => rect.Top); + double right = rectList.Max(rect => rect.Right); + double bottom = rectList.Max(rect => rect.Bottom); + + return new Windows.Foundation.Rect(left, top, Math.Max(0, right - left), Math.Max(0, bottom - top)); + } + + private async Task> GetOcrLinesAsync( + BitmapSource renderedPage, + ILanguage language, + Func? sourceRectPredicate = null) + { + using Bitmap bitmap = ImageMethods.BitmapSourceToBitmap(renderedPage); + (IOcrLinesWords? ocrResult, double scale) = await OcrUtilities.GetOcrResultFromBitmapAsync(bitmap, language); + if (ocrResult is null || ocrResult.Lines.Length == 0) + return []; + + return ConvertOcrLines(ocrResult, scale, language, sourceRectPredicate); + } + + private static IReadOnlyList ConvertOcrLines( + IOcrLinesWords ocrResult, + double scale, + ILanguage language, + Func? sourceRectPredicate) + { + List lines = []; + bool isSpaceJoiningLanguage = language.IsSpaceJoining(); + + foreach (IOcrLine ocrLine in ocrResult.Lines) + { + StringBuilder textBuilder = new(); + ocrLine.GetTextFromOcrLine(isSpaceJoiningLanguage, textBuilder); + textBuilder.RemoveTrailingNewlines(); + + string lineText = textBuilder.ToString(); + if (string.IsNullOrWhiteSpace(lineText)) + continue; + + Windows.Foundation.Rect scaledRect = ocrLine.BoundingBox; + Windows.Foundation.Rect sourceRect = new( + scaledRect.X / scale, + scaledRect.Y / scale, + scaledRect.Width / scale, + scaledRect.Height / scale); + + if (sourceRectPredicate is not null && !sourceRectPredicate(sourceRect)) + continue; + + lines.Add(new PdfPageTextLine(sourceRect, lineText.Trim(), isNativeText: false)); + } + + return SortLines(lines); + } + + private static List SortLines(IEnumerable lines) + { + return [.. lines.OrderBy(line => line.SourceRect.Y).ThenBy(line => line.SourceRect.X)]; + } + + private static async Task RenderPageBitmapAsync(WinPdfPage page) + { + using InMemoryRandomAccessStream renderedStream = new(); + PdfPageRenderOptions renderOptions = CreateRenderOptions(page); + + await page.RenderToStreamAsync(renderedStream, renderOptions); + renderedStream.Seek(0); + + using Bitmap renderedBitmap = ImageMethods.GetBitmapFromIRandomAccessStream(renderedStream); + return ImageMethods.BitmapToImageSource(renderedBitmap); + } + + private void ValidatePageIndex(int pageIndex) + { + if (pageIndex < 0 || pageIndex >= PageCount) + throw new ArgumentOutOfRangeException(nameof(pageIndex), pageIndex, "Page index is outside the document bounds."); + } +} From be3b874c040d1f71f81cc8c56c00249bd0f78571 Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Sat, 25 Apr 2026 18:39:37 -0500 Subject: [PATCH 02/22] Add PDF text selection and navigation to GrabFrame - Introduce PdfTextLineOverlay and PdfTextCanvas for PDF text selection - Add PDF page navigation controls to GrabFrame UI - Support loading, rendering, and extracting text from PDFs - Enable selection, search, and copy of PDF text lines - Update pan/zoom logic for PDF overlays and spacebar-panning - Refactor event handlers and utilities for PDF support - Update dialogs, menus, and help text to include PDFs - Improve XAML formatting and documentation consistency --- Text-Grab/Controls/PdfTextLineOverlay.cs | 90 +++++ Text-Grab/Controls/ZoomBorder.cs | 38 +- Text-Grab/Pages/GeneralSettings.xaml | 8 +- Text-Grab/Styles/ButtonStyles.xaml | 24 +- Text-Grab/Styles/DataGridStyles.xaml | 10 +- Text-Grab/Styles/ListViewScrollFix.xaml | 26 +- Text-Grab/Styles/TextBoxStyles.xaml | 3 +- Text-Grab/Views/EditTextWindow.xaml | 24 +- Text-Grab/Views/EditTextWindow.xaml.cs | 2 +- Text-Grab/Views/FirstRunWindow.xaml | 2 +- Text-Grab/Views/GrabFrame.xaml | 71 +++- Text-Grab/Views/GrabFrame.xaml.cs | 442 +++++++++++++++++++++-- 12 files changed, 662 insertions(+), 78 deletions(-) create mode 100644 Text-Grab/Controls/PdfTextLineOverlay.cs diff --git a/Text-Grab/Controls/PdfTextLineOverlay.cs b/Text-Grab/Controls/PdfTextLineOverlay.cs new file mode 100644 index 00000000..a08ecaf3 --- /dev/null +++ b/Text-Grab/Controls/PdfTextLineOverlay.cs @@ -0,0 +1,90 @@ +using System; +using System.Windows; +using System.Windows.Controls; +using System.Windows.Media; +using Text_Grab.Utilities; + +namespace Text_Grab.Controls; + +internal sealed class PdfTextLineOverlay : Border +{ + private static readonly Brush DefaultBorderBrush = new SolidColorBrush(Color.FromArgb(0x90, 0x00, 0x78, 0xD7)); + private static readonly Brush DefaultHighlightBrush = new SolidColorBrush(Color.FromArgb(0x50, 0x00, 0x78, 0xD7)); + private static readonly Brush TransparentTextBrush = new SolidColorBrush(Colors.Transparent); + + public PdfTextLineOverlay(string text) + { + Text = text; + Child = new TextBlock + { + Text = text, + Foreground = TransparentTextBrush, + TextWrapping = TextWrapping.NoWrap, + TextTrimming = TextTrimming.CharacterEllipsis, + VerticalAlignment = VerticalAlignment.Center, + Margin = new Thickness(1, 0, 1, 0), + IsHitTestVisible = false + }; + + Background = Brushes.Transparent; + BorderBrush = Brushes.Transparent; + BorderThickness = new Thickness(0); + ClipToBounds = true; + IsHitTestVisible = true; + SnapsToDevicePixels = true; + } + + public bool IsSelected { get; private set; } + + public double Left + { + get => Canvas.GetLeft(this); + private set => Canvas.SetLeft(this, value); + } + + public double Top + { + get => Canvas.GetTop(this); + private set => Canvas.SetTop(this, value); + } + + public string Text { get; } + + public bool WasRegionSelected { get; set; } + + public void ApplyLayout(Rect bounds) + { + Width = Math.Max(1, bounds.Width + 2); + Height = Math.Max(1, bounds.Height + 2); + Left = Math.Max(0, bounds.X - 1); + Top = Math.Max(0, bounds.Y - 1); + + if (Child is TextBlock textBlock) + { + textBlock.FontSize = Math.Max(1, bounds.Height * 0.75); + textBlock.LineHeight = Math.Max(1, bounds.Height); + } + } + + public void Deselect() + { + IsSelected = false; + Background = Brushes.Transparent; + BorderBrush = Brushes.Transparent; + BorderThickness = new Thickness(0); + } + + public bool IntersectsWith(Rect rectToCheck) + { + Rect overlayRect = new(Left, Top, Width, Height); + return rectToCheck.IntersectsWith(overlayRect); + } + + public void Select() + { + IsSelected = true; + Background = DefaultHighlightBrush; + BorderBrush = DefaultBorderBrush; + BorderThickness = new Thickness(1); + } +} diff --git a/Text-Grab/Controls/ZoomBorder.cs b/Text-Grab/Controls/ZoomBorder.cs index dc29ee82..198dcb97 100644 --- a/Text-Grab/Controls/ZoomBorder.cs +++ b/Text-Grab/Controls/ZoomBorder.cs @@ -3,6 +3,7 @@ using System.Windows.Controls; using System.Windows.Input; using System.Windows.Media; +using System.Windows.Media.Media3D; // From StackOverFlow: // https://stackoverflow.com/questions/741956/pan-zoom-image @@ -41,6 +42,8 @@ public override UIElement Child public bool CanZoom { get; set; } = true; + public bool RequireSpaceToPan { get; set; } = false; + public void Initialize(UIElement element) { child = element; @@ -87,6 +90,36 @@ public void Reset() CanPan = false; } + private bool IsPanGestureActive() => !RequireSpaceToPan || Keyboard.IsKeyDown(Key.Space); + + private bool BlocksPanFromSource(object? originalSource) + { + DependencyObject? current = originalSource switch + { + DependencyObject dependencyObject => dependencyObject, + null => null, + _ => null + }; + + while (current is not null) + { + if (current is TextBox) + return true; + + if (current is PdfTextLineOverlay) + return !IsPanGestureActive(); + + current = current switch + { + Visual visual => VisualTreeHelper.GetParent(visual), + Visual3D visual3D => VisualTreeHelper.GetParent(visual3D), + _ => null + }; + } + + return false; + } + private void Child_MouseWheel(object sender, MouseWheelEventArgs e) { if (child is null || !CanZoom) @@ -117,7 +150,7 @@ private void Child_MouseWheel(object sender, MouseWheelEventArgs e) private void Child_MouseLeftButtonDown(object sender, MouseButtonEventArgs e) { - if (child is null) + if (child is null || !IsPanGestureActive() || BlocksPanFromSource(e.OriginalSource)) return; TranslateTransform tt = GetTranslateTransform(child); @@ -142,7 +175,7 @@ private void Child_PreviewMouseRightButtonDown(object sender, MouseButtonEventAr private void Child_MouseMove(object sender, MouseEventArgs e) { - if (e.OriginalSource is TextBox) + if (BlocksPanFromSource(e.OriginalSource)) return; if (child is null @@ -150,6 +183,7 @@ private void Child_MouseMove(object sender, MouseEventArgs e) || st.ScaleX == 1.0 || Mouse.LeftButton == MouseButtonState.Released || !CanPan + || !IsPanGestureActive() || KeyboardExtensions.IsShiftDown() || KeyboardExtensions.IsCtrlDown()) { diff --git a/Text-Grab/Pages/GeneralSettings.xaml b/Text-Grab/Pages/GeneralSettings.xaml index 80794c77..2e326eb6 100644 --- a/Text-Grab/Pages/GeneralSettings.xaml +++ b/Text-Grab/Pages/GeneralSettings.xaml @@ -236,11 +236,11 @@ Checked="AddToContextMenuCheckBox_Checked" Unchecked="AddToContextMenuCheckBox_Unchecked"> - Add "Grab text with Text Grab" to right-click menu for image files + Add "Grab text with Text Grab" to right-click menu for image and PDF files - Right-click on PNG, JPG, BMP, GIF, or TIFF files to quickly grab text. + Right-click on supported image files or PDFs to quickly grab text. @@ -254,11 +254,11 @@ Checked="RegisterOpenWithCheckBox_Checked" Unchecked="RegisterOpenWithCheckBox_Unchecked"> - Register Text Grab as an "Open with" app for image files + Register Text Grab as an "Open with" app for image and PDF files - Opens images directly in Grab Frame when using "Open with" from File Explorer. + Opens supported images and PDFs directly in Grab Frame when using "Open with" from File Explorer. @@ -237,9 +238,12 @@ VerticalAlignment="Top"> + Width="{Binding ActualWidth, + ElementName=SubMenuBorder}" + Height="{Binding ActualHeight, + ElementName=SubMenuBorder}" + Fill="{Binding Background, + ElementName=SubMenuBorder}" /> @@ -469,9 +474,12 @@ VerticalAlignment="Top"> + Width="{Binding ActualWidth, + ElementName=SubMenuBorder}" + Height="{Binding ActualHeight, + ElementName=SubMenuBorder}" + Fill="{Binding Background, + ElementName=SubMenuBorder}" /> + Visibility="{Binding HeadersVisibility, + ConverterParameter={x:Static DataGridHeadersVisibility.Row}, + Converter={x:Static DataGrid.HeadersVisibilityConverter}, + RelativeSource={RelativeSource AncestorType={x:Type DataGrid}}}" /> diff --git a/Text-Grab/Styles/ListViewScrollFix.xaml b/Text-Grab/Styles/ListViewScrollFix.xaml index 3dad6541..aa605b6f 100644 --- a/Text-Grab/Styles/ListViewScrollFix.xaml +++ b/Text-Grab/Styles/ListViewScrollFix.xaml @@ -26,12 +26,18 @@ VerticalScrollBarVisibility="Hidden"> + Value="{Binding Path=HorizontalOffset, + RelativeSource={RelativeSource TemplatedParent}, + Mode=OneWay}" /> + Value="{Binding Path=VerticalOffset, + RelativeSource={RelativeSource TemplatedParent}, + Mode=OneWay}" /> + Data="{Binding Content, + RelativeSource={RelativeSource TemplatedParent}}"> diff --git a/Text-Grab/Views/EditTextWindow.xaml b/Text-Grab/Views/EditTextWindow.xaml index 73352783..69d60201 100644 --- a/Text-Grab/Views/EditTextWindow.xaml +++ b/Text-Grab/Views/EditTextWindow.xaml @@ -293,9 +293,7 @@ x:Name="AddRemoveAtMenuItem" Click="AddRemoveAtMenuItem_Click" Header="_Add, Remove, Limit..." /> - + - + • The Grab Frame is a window which can be moved or resized. It stays on top of other windows and will read all of the text within the border. • Click or drag to select Word Borders then add them to the clipboard by clicking "Grab". - • Drop an image onto the Grab Frame to view the image and copy text. + • Drop an image or PDF onto the Grab Frame to view it and copy text. • Pause the Grab Frame and scroll to zoom in on a piece of text. • Edit each line to correct any errors and fix up the results to be perfect. • Table mode will draw a grid around the lines to be pasted into a table easily. diff --git a/Text-Grab/Views/GrabFrame.xaml b/Text-Grab/Views/GrabFrame.xaml index e6275d84..4bdd3675 100644 --- a/Text-Grab/Views/GrabFrame.xaml +++ b/Text-Grab/Views/GrabFrame.xaml @@ -129,7 +129,9 @@ x:Name="IsTopmostMenuItem" Header="Keep Grab Frame On Top" IsCheckable="True" - IsChecked="{Binding Topmost, ElementName=GrabFrameWindow, Mode=TwoWay}" /> + IsChecked="{Binding Topmost, + ElementName=GrabFrameWindow, + Mode=TwoWay}" /> @@ -289,31 +291,41 @@ Checked="AspectRationMI_Checked" Header="Maintain Aspect Ratio" IsCheckable="True" - IsChecked="{Binding IsChecked, ElementName=AspectRationMI, Mode=TwoWay}" + IsChecked="{Binding IsChecked, + ElementName=AspectRationMI, + Mode=TwoWay}" Unchecked="AspectRationMI_Checked" /> + IsChecked="{Binding IsChecked, + ElementName=FreezeToggleButton, + Mode=TwoWay}" /> + IsChecked="{Binding IsChecked, + ElementName=TableToggleButton, + Mode=TwoWay}" /> + IsChecked="{Binding IsChecked, + ElementName=EditToggleButton, + Mode=TwoWay}" /> + IsChecked="{Binding IsChecked, + ElementName=EditTextToggleButton, + Mode=TwoWay}" /> + + Visibility="{Binding Visibility, + ElementName=SearchBox, + Mode=OneWay}" /> + + + + + pdfTextLineOverlays = []; private const string TargetLanguageMenuHeader = "Target Language"; #endregion Fields @@ -114,9 +118,9 @@ public GrabFrame(HistoryInfo historyInfo) } /// - /// Creates a GrabFrame and loads the specified image file. + /// Creates a GrabFrame and loads the specified image or PDF file. /// - /// The path to the image file to load. + /// The path to the file to load. public GrabFrame(string imagePath) { StandardInitialize(); @@ -126,11 +130,11 @@ public GrabFrame(string imagePath) // Validate the path before loading if (string.IsNullOrEmpty(imagePath)) { - Debug.WriteLine("GrabFrame: Empty image path provided"); + Debug.WriteLine("GrabFrame: Empty file path provided"); Loaded += async (s, e) => await new Wpf.Ui.Controls.MessageBox { Title = "Text Grab", - Content = "No image file path was provided.", + Content = "No file path was provided.", CloseButtonText = "OK" }.ShowDialogAsync(); return; @@ -141,17 +145,17 @@ public GrabFrame(string imagePath) if (!File.Exists(absolutePath)) { - Debug.WriteLine($"GrabFrame: Image file not found: {absolutePath}"); + Debug.WriteLine($"GrabFrame: File not found: {absolutePath}"); Loaded += async (s, e) => await new Wpf.Ui.Controls.MessageBox { Title = "Text Grab", - Content = $"Image file not found:\n{absolutePath}", + Content = $"File not found:\n{absolutePath}", CloseButtonText = "OK" }.ShowDialogAsync(); return; } - Loaded += async (s, e) => await TryLoadImageFromPath(absolutePath); + Loaded += async (s, e) => await TryLoadDocumentFromPath(absolutePath); } /// @@ -202,7 +206,7 @@ private async Task LoadTemplateForEditing(GrabTemplate template) if (!string.IsNullOrEmpty(template.SourceImagePath) && File.Exists(template.SourceImagePath)) { isStaticImageSource = true; - await TryLoadImageFromPath(template.SourceImagePath); + await TryLoadDocumentFromPath(template.SourceImagePath); reDrawTimer.Stop(); } else @@ -544,6 +548,73 @@ private void ShowFrameMessage(string message) frameMessageTimer.Start(); } + private void ClearLoadedPdfDocument() + { + _loadedPdfDocument?.Dispose(); + _loadedPdfDocument = null; + _currentPdfPageContent = null; + _currentPdfPageIndex = -1; + MainZoomBorder.RequireSpaceToPan = false; + UpdatePdfPageNavigation(); + } + + private async Task ChangePdfPageAsync(int delta) + { + if (_loadedPdfDocument is null) + return; + + int targetPageIndex = _currentPdfPageIndex + delta; + if (targetPageIndex < 0 || targetPageIndex >= _loadedPdfDocument.PageCount) + return; + + await ShowPdfPageAsync(targetPageIndex); + } + + private async Task ShowPdfPageAsync(int pageIndex) + { + if (_loadedPdfDocument is null) + return; + + reDrawTimer.Stop(); + ResetGrabFrame(); + await Task.Delay(300); + + _currentPdfPageContent = await _loadedPdfDocument.GetPageContentAsync(pageIndex); + frameContentImageSource = _currentPdfPageContent.RenderedPage; + hasLoadedImageSource = true; + isStaticImageSource = true; + frozenUiAutomationSnapshot = null; + liveUiAutomationSnapshot = null; + _currentImagePath = _loadedPdfDocument.FilePath; + _currentPdfPageIndex = pageIndex; + FreezeToggleButton.IsChecked = true; + FreezeGrabFrame(); + FreezeToggleButton.Visibility = Visibility.Collapsed; + MainZoomBorder.RequireSpaceToPan = true; + UpdatePdfPageNavigation(); + SwitchToOcrFallbackIfUiAutomation(); + + reDrawTimer.Start(); + } + + private void UpdatePdfPageNavigation() + { + bool isPdfLoaded = _loadedPdfDocument is not null; + PdfPagePanel.Visibility = isPdfLoaded ? Visibility.Visible : Visibility.Collapsed; + + if (!isPdfLoaded || _currentPdfPageIndex < 0) + { + PdfPageTextBlock.Text = string.Empty; + PreviousPdfPageButton.IsEnabled = false; + NextPdfPageButton.IsEnabled = false; + return; + } + + PdfPageTextBlock.Text = $"Page {_currentPdfPageIndex + 1} / {_loadedPdfDocument!.PageCount}"; + PreviousPdfPageButton.IsEnabled = _currentPdfPageIndex > 0; + NextPdfPageButton.IsEnabled = _currentPdfPageIndex < _loadedPdfDocument.PageCount - 1; + } + /// /// When a static image is loaded and the active language is UI Automation (Direct Text), /// silently switch to the OCR fallback language so no warning is shown. @@ -624,6 +695,7 @@ public TextBox? DestinationTextBox public bool IsEditingAnyWordBorders => wordBorders.Any(x => x.IsEditing); public bool IsFreezeMode { get; set; } = false; public bool IsFromEditWindow => destinationTextBox is not null; + private bool IsPdfDocumentLoaded => _loadedPdfDocument is not null; public bool IsWordEditMode { get; set; } = true; public bool ShouldSaveOnClose { get; set; } = true; @@ -637,6 +709,17 @@ public static bool CheckKey(VirtualKeyCodes code) return (GetKeyState(code) & 0xFF00) == 0xFF00; } + private static FrameworkElement? GetInteractionSurface(object? sender) => sender as FrameworkElement; + + private bool IsPdfTextInteraction(object? sender) => ReferenceEquals(sender, PdfTextCanvas); + + private bool IsPdfPanGestureActive => + IsPdfDocumentLoaded + && MainZoomBorder.CanPan + && !KeyboardExtensions.IsShiftDown() + && !KeyboardExtensions.IsCtrlDown() + && Keyboard.IsKeyDown(Key.Space); + public HistoryInfo AsHistoryItem() { System.Drawing.Bitmap? bitmap = ImageMethods.ImageSourceToBitmap(frameContentImageSource); @@ -1249,12 +1332,44 @@ private void CheckSelectBorderIntersections(bool finalCheck = false) wordBorder.WasRegionSelected = false; } + foreach (PdfTextLineOverlay pdfTextLine in pdfTextLineOverlays) + { + if (rectSelect.IntersectsWith(new Rect(pdfTextLine.Left, pdfTextLine.Top, pdfTextLine.Width, pdfTextLine.Height))) + { + clickedEmptySpace = false; + + if (!smallSelection) + { + pdfTextLine.Select(); + pdfTextLine.WasRegionSelected = true; + } + else if (!finalCheck) + { + if (pdfTextLine.IsSelected) + pdfTextLine.Deselect(); + else + pdfTextLine.Select(); + pdfTextLine.WasRegionSelected = false; + } + } + else if (pdfTextLine.WasRegionSelected && !smallSelection) + { + pdfTextLine.Deselect(); + } + + if (finalCheck) + pdfTextLine.WasRegionSelected = false; + } + if (clickedEmptySpace && smallSelection && finalCheck) { foreach (WordBorder wb in wordBorders) wb.Deselect(); + + foreach (PdfTextLineOverlay pdfTextLine in pdfTextLineOverlays) + pdfTextLine.Deselect(); } if (finalCheck) @@ -1324,6 +1439,13 @@ private void ClearRenderedWordBorders() { RectanglesCanvas.Children.Clear(); wordBorders.Clear(); + ClearRenderedPdfTextLines(); + } + + private void ClearRenderedPdfTextLines() + { + PdfTextCanvas.Children.Clear(); + pdfTextLineOverlays.Clear(); } private IReadOnlyCollection? GetUiAutomationExcludedHandles() @@ -1384,6 +1506,28 @@ private void AddRenderedWordBorder(WordBorder wordBorderBox) }); } + private PdfTextLineOverlay CreatePdfTextLineOverlay(Windows.Foundation.Rect sourceRect, double sourceScale, string text, DpiScale dpi) + { + Rect displayRect = new( + sourceRect.X / (dpi.DpiScaleX * sourceScale), + sourceRect.Y / (dpi.DpiScaleY * sourceScale), + sourceRect.Width / (dpi.DpiScaleX * sourceScale), + sourceRect.Height / (dpi.DpiScaleY * sourceScale)); + + PdfTextLineOverlay overlay = new(text); + overlay.ApplyLayout(displayRect); + return overlay; + } + + private void AddRenderedPdfTextLine(PdfTextLineOverlay overlay) + { + if (!IsOcrValid) + return; + + pdfTextLineOverlays.Add(overlay); + _ = PdfTextCanvas.Children.Add(overlay); + } + private Task DrawRectanglesAroundWords(string searchWord = "") { return CurrentLanguage is UiAutomationLang @@ -1396,6 +1540,12 @@ private async Task DrawOcrRectanglesAsync(string searchWord = "") if (isDrawing || IsDragOver) return; + if (_currentPdfPageContent?.HasNativeText is true) + { + await DrawPdfRectanglesAsync(searchWord); + return; + } + isDrawing = true; IsOcrValid = true; @@ -1524,6 +1674,71 @@ private async Task DrawOcrRectanglesAsync(string searchWord = "") } } + private async Task DrawPdfRectanglesAsync(string searchWord = "") + { + if (isDrawing || IsDragOver || _loadedPdfDocument is null || _currentPdfPageContent is null || _currentPdfPageIndex < 0) + return; + + isDrawing = true; + IsOcrValid = true; + windowFrameImageScale = 1; + ocrResultOfWindow = null; + + if (string.IsNullOrWhiteSpace(searchWord)) + searchWord = SearchBox.Text; + + ClearRenderedWordBorders(); + + if (frameContentImageSource is not BitmapSource) + { + isDrawing = false; + reDrawTimer.Start(); + return; + } + + DpiScale dpi = VisualTreeHelper.GetDpi(this); + SyncRectanglesCanvasSizeToImage(); + isSpaceJoining = CurrentLanguage!.IsSpaceJoining(); + + IReadOnlyList pageLines = await _loadedPdfDocument.GetSelectableLinesAsync(_currentPdfPageIndex, CurrentLanguage); + + foreach (PdfPageTextLine pageLine in pageLines) + { + string lineText = pageLine.Text; + if (!pageLine.IsNativeText) + { + if (DefaultSettings.CorrectErrors) + lineText = lineText.TryFixEveryWordLetterNumberErrors(); + + if (DefaultSettings.CorrectToLatin) + lineText = lineText.ReplaceGreekOrCyrillicWithLatin(); + } + + if (CurrentLanguage!.IsRightToLeft() && !pageLine.IsNativeText) + { + StringBuilder sb = new(lineText); + sb.ReverseWordsForRightToLeft(); + sb.RemoveTrailingNewlines(); + lineText = sb.ToString(); + } + + PdfTextLineOverlay overlay = CreatePdfTextLineOverlay(pageLine.SourceRect, 1, lineText, dpi); + AddRenderedPdfTextLine(overlay); + } + + if (DefaultSettings.TryToReadBarcodes) + TryToReadBarcodes(dpi); + + isDrawing = false; + reSearchTimer.Start(); + + if (isTranslationEnabled && WindowsAiUtilities.CanDeviceUseWinAI()) + { + translationTimer.Stop(); + translationTimer.Start(); + } + } + private async Task DrawUiAutomationRectanglesAsync(string searchWord = "") { if (isDrawing || IsDragOver) @@ -1719,6 +1934,8 @@ private void Escape_Keyed(object sender, ExecutedRoutedEventArgs e) SearchBox.Text = ""; else if (RectanglesCanvas.Children.Count > 0) ResetGrabFrame(); + else if (PdfTextCanvas.Children.Count > 0) + ResetGrabFrame(); else Close(); } @@ -1790,6 +2007,7 @@ private void SyncRectanglesCanvasSizeToImage() if (double.IsFinite(sourceWidth) && sourceWidth > 0) { GrabFrameImage.Width = sourceWidth; + PdfTextCanvas.Width = sourceWidth; RectanglesCanvas.Width = sourceWidth; TemplateRegionOverlayCanvas.Width = sourceWidth; } @@ -1797,6 +2015,7 @@ private void SyncRectanglesCanvasSizeToImage() if (double.IsFinite(sourceHeight) && sourceHeight > 0) { GrabFrameImage.Height = sourceHeight; + PdfTextCanvas.Height = sourceHeight; RectanglesCanvas.Height = sourceHeight; TemplateRegionOverlayCanvas.Height = sourceHeight; } @@ -1806,6 +2025,12 @@ private async void FreezeMI_Click(object sender, RoutedEventArgs e) { if (IsFreezeMode) { + if (IsPdfDocumentLoaded) + { + FreezeToggleButton.IsChecked = true; + return; + } + FreezeToggleButton.IsChecked = false; UnfreezeGrabFrame(); ResetGrabFrame(); @@ -1827,6 +2052,8 @@ private void FreezeToggleButton_Click(object? sender = null, RoutedEventArgs? e { if (FreezeToggleButton.IsChecked is bool freezeMode && freezeMode) FreezeGrabFrame(); + else if (IsPdfDocumentLoaded) + FreezeToggleButton.IsChecked = true; else UnfreezeGrabFrame(); } @@ -1967,6 +2194,7 @@ private void GrabFrameWindow_Closing(object sender, System.ComponentModel.Cancel FrameText = ""; wordBorders.Clear(); + pdfTextLineOverlays.Clear(); UpdateFrameText(); } @@ -2011,7 +2239,7 @@ private async void GrabFrameWindow_Drop(object sender, DragEventArgs e) frameContentImageSource = null; isStaticImageSource = true; - await TryLoadImageFromPath(fileName); + await TryLoadDocumentFromPath(fileName); IsDragOver = false; @@ -2161,6 +2389,16 @@ private void InvertSelection(object? sender = null, RoutedEventArgs? e = null) else wordBorder.Select(); } + + foreach (PdfTextLineOverlay pdfTextLine in pdfTextLineOverlays) + { + if (pdfTextLine.IsSelected) + pdfTextLine.Deselect(); + else + pdfTextLine.Select(); + } + + UpdateFrameText(); } private void LanguagesComboBox_MouseDown(object sender, MouseButtonEventArgs e) @@ -2351,7 +2589,7 @@ private async void OpenImageMenuItem_Click(object? sender = null, RoutedEventArg Microsoft.Win32.OpenFileDialog dlg = new() { // Set filter for file extension and default file extension - Filter = FileUtilities.GetImageFilter() + Filter = FileUtilities.GetVisualDocumentFilter() }; bool? result = dlg.ShowDialog(); @@ -2359,7 +2597,7 @@ private async void OpenImageMenuItem_Click(object? sender = null, RoutedEventArg if (result is false || !File.Exists(dlg.FileName)) return; - await TryLoadImageFromPath(dlg.FileName); + await TryLoadDocumentFromPath(dlg.FileName); reDrawTimer.Start(); } @@ -2386,6 +2624,7 @@ private async void PasteExecuted(object sender, ExecutedRoutedEventArgs? e = nul frameContentImageSource = clipboardImage; } + ClearLoadedPdfDocument(); hasLoadedImageSource = true; isStaticImageSource = true; frozenUiAutomationSnapshot = null; @@ -2405,6 +2644,11 @@ private async void RateAndReview_Click(object sender, RoutedEventArgs e) private void RectanglesCanvas_MouseDown(object sender, MouseButtonEventArgs e) { + bool isPdfTextInteraction = IsPdfTextInteraction(sender); + FrameworkElement interactionSurface = isPdfTextInteraction + ? (e.OriginalSource as FrameworkElement ?? PdfTextCanvas) + : (GetInteractionSurface(sender) ?? RectanglesCanvas); + reDrawTimer.Stop(); GrabBTN.Focus(); @@ -2422,13 +2666,17 @@ private void RectanglesCanvas_MouseDown(object sender, MouseButtonEventArgs e) return; } - if (!KeyboardExtensions.IsShiftDown() && !KeyboardExtensions.IsCtrlDown()) + bool shouldPanInsteadOfSelect = IsPdfDocumentLoaded + ? IsPdfPanGestureActive + : !KeyboardExtensions.IsShiftDown() && !KeyboardExtensions.IsCtrlDown() && !isPdfTextInteraction; + + if (shouldPanInsteadOfSelect) return; } isSelecting = true; clickedPoint = e.GetPosition(RectanglesCanvas); - RectanglesCanvas.CaptureMouse(); + interactionSurface.CaptureMouse(); selectBorder.Height = 1; selectBorder.Width = 1; @@ -2439,8 +2687,11 @@ private void RectanglesCanvas_MouseDown(object sender, MouseButtonEventArgs e) e.Handled = true; isMiddleDown = true; - ResetGrabFrame(); - UnfreezeGrabFrame(); + if (!IsPdfDocumentLoaded) + { + ResetGrabFrame(); + UnfreezeGrabFrame(); + } return; } @@ -2460,12 +2711,17 @@ private void RectanglesCanvas_MouseDown(object sender, MouseButtonEventArgs e) private void RectanglesCanvas_MouseMove(object sender, MouseEventArgs e) { + FrameworkElement interactionSurface = GetInteractionSurface(sender) ?? RectanglesCanvas; + bool isPdfTextInteraction = IsPdfTextInteraction(sender); + if (IsCtrlDown) - RectanglesCanvas.Cursor = Cursors.Cross; + interactionSurface.Cursor = Cursors.Cross; else if (MainZoomBorder.CanPan) - RectanglesCanvas.Cursor = Cursors.SizeAll; + interactionSurface.Cursor = IsPdfDocumentLoaded + ? (IsPdfPanGestureActive ? Cursors.SizeAll : Cursors.Arrow) + : (isPdfTextInteraction ? Cursors.Arrow : Cursors.SizeAll); else - RectanglesCanvas.Cursor = null; + interactionSurface.Cursor = null; if (!isSelecting && !isMiddleDown && movingWordBordersDictionary.Count == 0) return; @@ -2473,8 +2729,11 @@ private void RectanglesCanvas_MouseMove(object sender, MouseEventArgs e) isMiddleDown = e.MiddleButton == MouseButtonState.Pressed; if (MainZoomBorder.CanPan - && !KeyboardExtensions.IsShiftDown() - && !KeyboardExtensions.IsCtrlDown()) + && (IsPdfDocumentLoaded + ? IsPdfPanGestureActive + : (!KeyboardExtensions.IsShiftDown() + && !KeyboardExtensions.IsCtrlDown() + && !isPdfTextInteraction))) { isSelecting = false; return; @@ -2522,12 +2781,13 @@ private void RectanglesCanvas_MouseUp(object sender, MouseButtonEventArgs e) { isSelecting = false; CursorClipper.UnClipCursor(); - RectanglesCanvas.ReleaseMouseCapture(); + Mouse.Captured?.ReleaseMouseCapture(); if (e.ChangedButton == MouseButton.Middle && scrollBehavior != ScrollBehavior.Zoom) { isMiddleDown = false; - FreezeGrabFrame(); + if (!IsPdfDocumentLoaded) + FreezeGrabFrame(); reDrawTimer.Start(); return; } @@ -2676,6 +2936,9 @@ private void ReSearchTimer_Tick(object? sender, EventArgs e) { foreach (WordBorder wb in wordBorders) wb.Deselect(); + + foreach (PdfTextLineOverlay pdfTextLine in pdfTextLineOverlays) + pdfTextLine.Deselect(); MatchesTXTBLK.Text = $"0 Matches"; UpdateFrameText(); return; @@ -2697,6 +2960,9 @@ private void ReSearchTimer_Tick(object? sender, EventArgs e) { foreach (WordBorder wb in wordBorders) wb.Deselect(); + + foreach (PdfTextLineOverlay pdfTextLine in pdfTextLineOverlays) + pdfTextLine.Deselect(); UpdateFrameText(); MatchesTXTBLK.Text = $"Search Error"; return; @@ -2716,6 +2982,17 @@ private void ReSearchTimer_Tick(object? sender, EventArgs e) else wb.Deselect(); } + + foreach (PdfTextLineOverlay pdfTextLine in pdfTextLineOverlays) + { + int numberOfMatchesInLine = regex.Count(pdfTextLine.Text); + numberOfMatches += numberOfMatchesInLine; + + if (numberOfMatchesInLine > 0) + pdfTextLine.Select(); + else + pdfTextLine.Deselect(); + } } UpdateFrameText(); @@ -2796,6 +3073,11 @@ private void SelectAllWordBorders(object? sender = null, RoutedEventArgs? e = nu { foreach (WordBorder wordBorder in wordBorders) wordBorder.Select(); + + foreach (PdfTextLineOverlay pdfTextLine in pdfTextLineOverlays) + pdfTextLine.Select(); + + UpdateFrameText(); } private void SetGrabFrameUserSettings() @@ -3209,11 +3491,23 @@ private void TableToggleButton_Click(object? sender = null, RoutedEventArgs? e = UpdateFrameText(); } + private async Task TryLoadDocumentFromPath(string path) + { + if (IoUtilities.IsPdfFileExtension(Path.GetExtension(path))) + { + await TryLoadPdfFromPath(path); + return; + } + + await TryLoadImageFromPath(path); + } + private async Task TryLoadImageFromPath(string path) { Uri fileURI = new(path); try { + ClearLoadedPdfDocument(); ResetGrabFrame(); await Task.Delay(300); BitmapImage droppedImage = new(); @@ -3249,6 +3543,28 @@ private async Task TryLoadImageFromPath(string path) } } + private async Task TryLoadPdfFromPath(string path) + { + try + { + _loadedPdfDocument = await PdfDocumentRenderer.LoadAsync(path); + _currentImagePath = Path.GetFullPath(path); + await ShowPdfPageAsync(0); + } + catch (Exception ex) + { + ClearLoadedPdfDocument(); + hasLoadedImageSource = false; + UnfreezeGrabFrame(); + await new Wpf.Ui.Controls.MessageBox + { + Title = "Text Grab", + Content = $"Failed to open PDF.{Environment.NewLine}{ex.Message}", + CloseButtonText = "OK" + }.ShowDialogAsync(); + } + } + private void TryToAlphaMenuItem_Click(object sender, RoutedEventArgs e) { List wbToEdit = SelectedWordBorders(); @@ -3426,7 +3742,11 @@ private void UndoExecuted(object sender, ExecutedRoutedEventArgs e) private void UnfreezeGrabFrame() { + if (IsPdfDocumentLoaded) + return; + reDrawTimer.Stop(); + ClearLoadedPdfDocument(); hasLoadedImageSource = false; isStaticImageSource = false; frozenUiAutomationSnapshot = null; @@ -3448,16 +3768,63 @@ private void UnfreezeGrabFrame() reDrawTimer.Start(); } - private void UpdateFrameText() + private async void PreviousPdfPageButton_Click(object sender, RoutedEventArgs e) { - string[] selectedWbs = [.. wordBorders - .OrderBy(b => b.Top) - .Where(w => w.IsSelected) - .Select(t => t.Word)]; + await ChangePdfPageAsync(-1); + } + + private async void NextPdfPageButton_Click(object sender, RoutedEventArgs e) + { + await ChangePdfPageAsync(1); + } + + private void AppendPositionedTextLines( + StringBuilder stringBuilder, + IEnumerable<(double Top, double Left, double Height, string Text, bool AllowParagraphJoin)> lines) + { + List<(double Top, double Left, double Height, string Text, bool AllowParagraphJoin)> orderedLines = + [.. lines + .Where(line => !string.IsNullOrWhiteSpace(line.Text)) + .OrderBy(line => line.Top) + .ThenBy(line => line.Left)]; + + if (orderedLines.Count == 0) + return; + stringBuilder.Append(orderedLines[0].Text); + for (int i = 1; i < orderedLines.Count; i++) + { + (double Top, double Left, double Height, string Text, bool AllowParagraphJoin) previousLine = orderedLines[i - 1]; + (double Top, double Left, double Height, string Text, bool AllowParagraphJoin) currentLine = orderedLines[i]; + + bool shouldJoinParagraph = + DefaultSettings.ParagraphDetection + && isSpaceJoining + && previousLine.AllowParagraphJoin + && currentLine.AllowParagraphJoin + && OcrUtilities.IsWrappedParagraph(previousLine.Top, previousLine.Height, currentLine.Top, currentLine.Height); + + if (shouldJoinParagraph) + stringBuilder.Append(' '); + else + stringBuilder.AppendLine(); + + stringBuilder.Append(currentLine.Text); + } + } + + private void UpdateFrameText() + { StringBuilder stringBuilder = new(); + List<(double Top, double Left, double Height, string Text, bool AllowParagraphJoin)> selectedLines = + [.. wordBorders + .Where(w => w.IsSelected) + .Select(w => (w.Top, w.Left, w.Height, w.Word, AllowParagraphJoin: false)) + .Concat(pdfTextLineOverlays + .Where(line => line.IsSelected) + .Select(line => (line.Top, line.Left, line.Height, line.Text, AllowParagraphJoin: true)))]; - if (TableToggleButton.IsChecked is true) + if (TableToggleButton.IsChecked is true && wordBorders.Count > 0) { TryToPlaceTable(); // Build table text via model-only API @@ -3466,8 +3833,14 @@ private void UpdateFrameText() } else { - if (selectedWbs.Length > 0) - stringBuilder.AppendJoin(Environment.NewLine, selectedWbs); + if (selectedLines.Count > 0) + AppendPositionedTextLines(stringBuilder, selectedLines); + else if (pdfTextLineOverlays.Count > 0) + AppendPositionedTextLines( + stringBuilder, + wordBorders + .Select(w => (w.Top, w.Left, w.Height, w.Word, AllowParagraphJoin: false)) + .Concat(pdfTextLineOverlays.Select(line => (line.Top, line.Left, line.Height, line.Text, AllowParagraphJoin: true)))); else AppendWordBordersWithParagraphDetection(stringBuilder); } @@ -3591,9 +3964,12 @@ private void ResetViewMenuItem_Click(object sender, RoutedEventArgs e) private void ShowWordBordersMenuItem_Click(object sender, RoutedEventArgs e) { - RectanglesCanvas.Visibility = ShowWordBordersMenuItem.IsChecked is true + Visibility overlayVisibility = ShowWordBordersMenuItem.IsChecked is true ? Visibility.Visible : Visibility.Hidden; + + RectanglesCanvas.Visibility = overlayVisibility; + PdfTextCanvas.Visibility = overlayVisibility; } private void OverlayOpacityMenuItem_Click(object sender, RoutedEventArgs e) @@ -3823,6 +4199,7 @@ private void AutoContrastMI_Click(object sender, RoutedEventArgs e) reDrawTimer.Stop(); RectanglesCanvas.Children.Clear(); wordBorders.Clear(); + ClearRenderedPdfTextLines(); if (!IsFreezeMode) FreezeGrabFrame(); @@ -3870,6 +4247,7 @@ private void BrightenMI_Click(object sender, RoutedEventArgs e) reDrawTimer.Stop(); RectanglesCanvas.Children.Clear(); wordBorders.Clear(); + ClearRenderedPdfTextLines(); if (!IsFreezeMode) FreezeGrabFrame(); @@ -3917,6 +4295,7 @@ private void DarkenMI_Click(object sender, RoutedEventArgs e) reDrawTimer.Stop(); RectanglesCanvas.Children.Clear(); wordBorders.Clear(); + ClearRenderedPdfTextLines(); if (!IsFreezeMode) FreezeGrabFrame(); @@ -3964,6 +4343,7 @@ private void GrayscaleMI_Click(object sender, RoutedEventArgs e) reDrawTimer.Stop(); RectanglesCanvas.Children.Clear(); wordBorders.Clear(); + ClearRenderedPdfTextLines(); if (!IsFreezeMode) FreezeGrabFrame(); From 3d49a182f81c4b9e31ec0a1dd7f9bb22005a7c6b Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Sat, 25 Apr 2026 18:39:48 -0500 Subject: [PATCH 03/22] Add PDF support to GrabFrame and update file checks Updated GrabFrame logic to accept both image and PDF files by using IoUtilities.IsVisualDocumentFile. Added PdfPig NuGet package for PDF handling. Improved debug message to reflect support for PDFs. --- Text-Grab/App.xaml.cs | 4 ++-- Text-Grab/Text-Grab.csproj | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Text-Grab/App.xaml.cs b/Text-Grab/App.xaml.cs index 85738b95..d1a32934 100644 --- a/Text-Grab/App.xaml.cs +++ b/Text-Grab/App.xaml.cs @@ -240,7 +240,7 @@ private static async Task HandleStartupArgs(string[] args) } else { - Debug.WriteLine("--grabframe flag specified but no valid image file path provided"); + Debug.WriteLine("--grabframe flag specified but no valid image or PDF file path provided"); // Fall through to default launch behavior } } @@ -318,7 +318,7 @@ private static async Task TryToOpenFile(string possiblePath, bool isQuiet) false, false); } - else if (IoUtilities.IsImageFile(possiblePath)) + else if (IoUtilities.IsVisualDocumentFile(possiblePath)) { GrabFrame gf = new(possiblePath); gf.Show(); diff --git a/Text-Grab/Text-Grab.csproj b/Text-Grab/Text-Grab.csproj index 16f1fad4..c1512207 100644 --- a/Text-Grab/Text-Grab.csproj +++ b/Text-Grab/Text-Grab.csproj @@ -64,6 +64,7 @@ + From 02fec333a6130bce5c0b7d5129abda1c4edef693 Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Sat, 25 Apr 2026 18:39:56 -0500 Subject: [PATCH 04/22] Add tests for file type classification and PDF rendering Expanded FilesIoTests to cover file type and filter logic. Added PdfDocumentRendererTests for rendering, coordinate mapping, line grouping, and OCR overlap handling. --- Tests/FilesIoTests.cs | 29 ++++++++++ Tests/PdfDocumentRendererTests.cs | 89 +++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+) create mode 100644 Tests/PdfDocumentRendererTests.cs diff --git a/Tests/FilesIoTests.cs b/Tests/FilesIoTests.cs index 6fbb5403..e8967d06 100644 --- a/Tests/FilesIoTests.cs +++ b/Tests/FilesIoTests.cs @@ -107,4 +107,33 @@ public void GetEditorModeForPath_UsesFileExtension(string path, EtwEditorMode ex { Assert.Equal(expectedMode, IoUtilities.GetEditorModeForPath(path)); } + + [Theory] + [InlineData(@"C:\Temp\scan.png", OpenContentKind.Image)] + [InlineData(@"C:\Temp\scan.PDF", OpenContentKind.PdfDocument)] + [InlineData(@"C:\Temp\notes.txt", OpenContentKind.TextFile)] + public void GetOpenContentKindForPath_ClassifiesVisualDocumentsAndText(string path, OpenContentKind expectedKind) + { + Assert.Equal(expectedKind, IoUtilities.GetOpenContentKindForPath(path)); + } + + [Theory] + [InlineData(".png", true)] + [InlineData(".PDF", true)] + [InlineData(".txt", false)] + [InlineData("", false)] + public void IsVisualDocumentFileExtension_RecognizesImagesAndPdf(string extension, bool expected) + { + Assert.Equal(expected, IoUtilities.IsVisualDocumentFileExtension(extension)); + } + + [Fact] + public void GetVisualDocumentFilter_IncludesPdfSupport() + { + string filter = FileUtilities.GetVisualDocumentFilter(); + + Assert.Contains("Image and PDF files|", filter); + Assert.Contains("PDF files|*.pdf", filter); + Assert.Contains("Image files|", filter); + } } diff --git a/Tests/PdfDocumentRendererTests.cs b/Tests/PdfDocumentRendererTests.cs new file mode 100644 index 00000000..8d00801c --- /dev/null +++ b/Tests/PdfDocumentRendererTests.cs @@ -0,0 +1,89 @@ +using Text_Grab.Utilities; +using UglyToad.PdfPig.Core; +using Windows.Media.Ocr; + +namespace Tests; + +public class PdfDocumentRendererTests +{ + [Fact] + public void GetRenderDimensions_DoublesTypicalPdfPageSize() + { + (uint width, uint height) = PdfDocumentRenderer.GetRenderDimensions(612, 792); + + Assert.Equal(1224u, width); + Assert.Equal(1584u, height); + } + + [Fact] + public void GetRenderDimensions_ClampsToOcrEngineLimit() + { + (uint width, uint height) = PdfDocumentRenderer.GetRenderDimensions(5000, 2500); + + Assert.True(Math.Max(width, height) <= OcrEngine.MaxImageDimension); + Assert.True(width > height); + } + + [Fact] + public void GetRenderDimensions_InvalidSize_ReturnsSinglePixel() + { + (uint width, uint height) = PdfDocumentRenderer.GetRenderDimensions(0, -1); + + Assert.Equal(1u, width); + Assert.Equal(1u, height); + } + + [Fact] + public void ConvertPdfRectToImageRect_MapsPdfCoordinatesToRenderedBitmapSpace() + { + PdfRectangle pdfRect = new(10, 20, 60, 80); + + Windows.Foundation.Rect imageRect = PdfDocumentRenderer.ConvertPdfRectToImageRect(pdfRect, 100, 100, 200, 200); + + Assert.Equal(20, imageRect.X); + Assert.Equal(40, imageRect.Y); + Assert.Equal(100, imageRect.Width); + Assert.Equal(120, imageRect.Height); + } + + [Fact] + public void GroupWordsIntoLines_GroupsNearbyWordsIntoSingleLine() + { + IReadOnlyList lines = PdfDocumentRenderer.GroupWordsIntoLines( + [ + (new Windows.Foundation.Rect(10, 10, 20, 12), "Hello"), + (new Windows.Foundation.Rect(35, 11, 25, 12), "world"), + (new Windows.Foundation.Rect(12, 40, 30, 12), "Again") + ]); + + Assert.Collection( + lines, + firstLine => + { + Assert.Equal("Hello world", firstLine.Text); + Assert.True(firstLine.IsNativeText); + Assert.Equal(10, firstLine.SourceRect.X); + Assert.Equal(10, firstLine.SourceRect.Y); + Assert.Equal(50, firstLine.SourceRect.Width); + Assert.Equal(13, firstLine.SourceRect.Height); + }, + secondLine => Assert.Equal("Again", secondLine.Text)); + } + + [Fact] + public void ShouldIncludeOcrLine_OnlyReturnsTrueWhenImageOverlapIsMeaningful() + { + Windows.Foundation.Rect sourceRect = new(0, 0, 10, 10); + + bool shouldIncludeFromLargeOverlap = PdfDocumentRenderer.ShouldIncludeOcrLine( + sourceRect, + [new Windows.Foundation.Rect(5, 5, 10, 10)]); + + bool shouldIgnoreFromSmallOverlap = PdfDocumentRenderer.ShouldIncludeOcrLine( + sourceRect, + [new Windows.Foundation.Rect(8, 8, 10, 10)]); + + Assert.True(shouldIncludeFromLargeOverlap); + Assert.False(shouldIgnoreFromSmallOverlap); + } +} From 4a0b99a35a846404cf0c223520bee22b4861d635 Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Sun, 26 Apr 2026 22:47:26 -0500 Subject: [PATCH 05/22] Add "Open File..." option to NotifyIcon context menu Added a new "Open File..." menu item with a document icon to the NotifyIconWindow context menu. Implemented its click handler to asynchronously open the file picker using App.OpenFileWithPickerAsync(). --- Text-Grab/Controls/NotifyIconWindow.xaml | 8 ++++++++ Text-Grab/Controls/NotifyIconWindow.xaml.cs | 5 +++++ 2 files changed, 13 insertions(+) diff --git a/Text-Grab/Controls/NotifyIconWindow.xaml b/Text-Grab/Controls/NotifyIconWindow.xaml index bd6e13f7..946db309 100644 --- a/Text-Grab/Controls/NotifyIconWindow.xaml +++ b/Text-Grab/Controls/NotifyIconWindow.xaml @@ -37,6 +37,14 @@ + + + + + Date: Sun, 26 Apr 2026 22:48:01 -0500 Subject: [PATCH 06/22] Refactor file filters and improve spacebar pan in GrabFrame Introduce FileUtilities.GetOpenDocumentFilter() for unified open file dialog filters, replacing hardcoded strings and supporting images, PDFs, spreadsheets, markdown, and text files. Refactor GrabFrame to generalize and enhance spacebar-based pan/zoom logic, including new state tracking, improved event handling, and better user experience for both images and PDFs. --- Text-Grab/Utilities/FileUtilities.cs | 42 ++++++++++++++-- Text-Grab/Views/EditTextWindow.xaml.cs | 3 +- Text-Grab/Views/GrabFrame.xaml.cs | 70 ++++++++++++++++++++------ 3 files changed, 95 insertions(+), 20 deletions(-) diff --git a/Text-Grab/Utilities/FileUtilities.cs b/Text-Grab/Utilities/FileUtilities.cs index cfe0edc7..c84033a9 100644 --- a/Text-Grab/Utilities/FileUtilities.cs +++ b/Text-Grab/Utilities/FileUtilities.cs @@ -38,9 +38,8 @@ public static string GetImageFilter() public static string GetVisualDocumentFilter() { - string imageExtensions = GetImageExtensionsFilterPattern(); - string pdfExtensions = string.Join(";", IoUtilities.PdfExtensions.Select(extension => $"*{extension}")); - string combinedExtensions = string.Join(";", new[] { imageExtensions, pdfExtensions }.Where(pattern => !string.IsNullOrWhiteSpace(pattern))); + string pdfExtensions = GetExtensionsFilterPattern(IoUtilities.PdfExtensions); + string combinedExtensions = GetVisualDocumentFilterPattern(); string imageFilter = GetImageFilter(); return string.Join("|", new[] @@ -51,6 +50,29 @@ public static string GetVisualDocumentFilter() }); } + public static string GetOpenDocumentFilter() + { + string spreadsheetExtensions = GetExtensionsFilterPattern(IoUtilities.SpreadsheetExtensions); + string markdownExtensions = GetExtensionsFilterPattern(IoUtilities.MarkdownExtensions); + string supportedExtensions = string.Join(";", new[] + { + GetVisualDocumentFilterPattern(), + spreadsheetExtensions, + markdownExtensions, + "*.txt" + }.Where(pattern => !string.IsNullOrWhiteSpace(pattern))); + + return string.Join("|", new[] + { + $"Supported documents|{supportedExtensions}", + GetVisualDocumentFilter(), + $"Spreadsheet documents|{spreadsheetExtensions}", + $"Markdown documents|{markdownExtensions}", + "Text documents (*.txt)|*.txt", + "All files (*.*)|*.*" + }); + } + public static string GetPathToLocalFile(string imageRelativePath) { string? executableDirectory = Path.GetDirectoryName(GetExePath()); @@ -116,6 +138,20 @@ private static string GetImageExtensionsFilterPattern() return imageExtensions; } + private static string GetExtensionsFilterPattern(IEnumerable extensions) + { + return string.Join(";", extensions.Select(extension => $"*{extension}")); + } + + private static string GetVisualDocumentFilterPattern() + { + return string.Join(";", new[] + { + GetImageExtensionsFilterPattern(), + GetExtensionsFilterPattern(IoUtilities.PdfExtensions) + }.Where(pattern => !string.IsNullOrWhiteSpace(pattern))); + } + private static async Task GetImageFilePackaged(string fileName, FileStorageKind storageKind) { StorageFolder folder = await GetStorageFolderPackaged(fileName, storageKind); diff --git a/Text-Grab/Views/EditTextWindow.xaml.cs b/Text-Grab/Views/EditTextWindow.xaml.cs index 62c12a5e..15f42842 100644 --- a/Text-Grab/Views/EditTextWindow.xaml.cs +++ b/Text-Grab/Views/EditTextWindow.xaml.cs @@ -50,7 +50,6 @@ public partial class EditTextWindow : Wpf.Ui.Controls.FluentWindow private const double SpreadsheetDefaultColumnWidth = 120; private const double HorizontalWheelScrollStep = 48; private const int WmMouseHWheel = 0x020E; - private const string OpenDocumentFilter = "Supported documents (*.pdf;*.csv;*.tsv;*.tab;*.md;*.markdown;*.txt)|*.pdf;*.csv;*.tsv;*.tab;*.md;*.markdown;*.txt|PDF documents (*.pdf)|*.pdf|Spreadsheet documents (*.csv;*.tsv;*.tab)|*.csv;*.tsv;*.tab|Markdown documents (*.md;*.markdown)|*.md;*.markdown|Text documents (*.txt)|*.txt|All files (*.*)|*.*"; private const string SaveDocumentFilter = "Spreadsheet documents (*.csv;*.tsv;*.tab)|*.csv;*.tsv;*.tab|Markdown documents (*.md;*.markdown)|*.md;*.markdown|Text documents (*.txt)|*.txt|All files (*.*)|*.*"; #region Fields @@ -3084,7 +3083,7 @@ private void OpenFileMenuItem_Click(object sender, RoutedEventArgs e) { // Set filter for file extension and default file extension DefaultExt = ".txt", - Filter = OpenDocumentFilter, + Filter = FileUtilities.GetOpenDocumentFilter(), DefaultDirectory = Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments) }; diff --git a/Text-Grab/Views/GrabFrame.xaml.cs b/Text-Grab/Views/GrabFrame.xaml.cs index 34aead7e..59cea908 100644 --- a/Text-Grab/Views/GrabFrame.xaml.cs +++ b/Text-Grab/Views/GrabFrame.xaml.cs @@ -68,6 +68,7 @@ public partial class GrabFrame : Window private bool isSearchSelectionOverridden = false; private bool isSelecting; private bool isSpaceJoining = true; + private bool isSpacePanModifierDown = false; private bool isStaticImageSource = false; private readonly Dictionary movingWordBordersDictionary = []; private IOcrLinesWords? ocrResultOfWindow; @@ -554,7 +555,8 @@ private void ClearLoadedPdfDocument() _loadedPdfDocument = null; _currentPdfPageContent = null; _currentPdfPageIndex = -1; - MainZoomBorder.RequireSpaceToPan = false; + SetSpacePanModifierState(false); + UpdateZoomPanMode(); UpdatePdfPageNavigation(); } @@ -590,7 +592,6 @@ private async Task ShowPdfPageAsync(int pageIndex) FreezeToggleButton.IsChecked = true; FreezeGrabFrame(); FreezeToggleButton.Visibility = Visibility.Collapsed; - MainZoomBorder.RequireSpaceToPan = true; UpdatePdfPageNavigation(); SwitchToOcrFallbackIfUiAutomation(); @@ -713,12 +714,31 @@ public static bool CheckKey(VirtualKeyCodes code) private bool IsPdfTextInteraction(object? sender) => ReferenceEquals(sender, PdfTextCanvas); - private bool IsPdfPanGestureActive => - IsPdfDocumentLoaded - && MainZoomBorder.CanPan + private bool IsZoomPanGestureActive => + MainZoomBorder.CanPan && !KeyboardExtensions.IsShiftDown() && !KeyboardExtensions.IsCtrlDown() - && Keyboard.IsKeyDown(Key.Space); + && (!MainZoomBorder.RequireSpaceToPan || isSpacePanModifierDown || Keyboard.IsKeyDown(Key.Space)); + + private bool CanUseSpacePanModifier => + MainZoomBorder.RequireSpaceToPan + && MainZoomBorder.CanPan + && !IsEditingAnyWordBorders + && Keyboard.FocusedElement is not TextBox and not RichTextBox; + + private void SetSpacePanModifierState(bool isDown) + { + isSpacePanModifierDown = isDown; + MainZoomBorder.IsSpacePanModifierPressed = isDown; + } + + private void UpdateZoomPanMode() + { + MainZoomBorder.RequireSpaceToPan = IsFreezeMode; + + if (!MainZoomBorder.RequireSpaceToPan) + SetSpacePanModifierState(false); + } public HistoryInfo AsHistoryItem() { @@ -1985,6 +2005,7 @@ private void FreezeGrabFrame() Background = new SolidColorBrush(Colors.DimGray); RectanglesBorder.Background.Opacity = 0; IsFreezeMode = true; + UpdateZoomPanMode(); if (scrollBehavior == ScrollBehavior.ZoomWhenFrozen) MainZoomBorder.CanZoom = true; @@ -2200,6 +2221,8 @@ private void GrabFrameWindow_Closing(object sender, System.ComponentModel.Cancel private void GrabFrameWindow_Deactivated(object? sender, EventArgs e) { + SetSpacePanModifierState(false); + if (!IsWordEditMode && !IsFreezeMode) { ResetGrabFrame(); @@ -2667,8 +2690,8 @@ private void RectanglesCanvas_MouseDown(object sender, MouseButtonEventArgs e) } bool shouldPanInsteadOfSelect = IsPdfDocumentLoaded - ? IsPdfPanGestureActive - : !KeyboardExtensions.IsShiftDown() && !KeyboardExtensions.IsCtrlDown() && !isPdfTextInteraction; + ? IsZoomPanGestureActive + : IsZoomPanGestureActive && !isPdfTextInteraction; if (shouldPanInsteadOfSelect) return; @@ -2717,9 +2740,9 @@ private void RectanglesCanvas_MouseMove(object sender, MouseEventArgs e) if (IsCtrlDown) interactionSurface.Cursor = Cursors.Cross; else if (MainZoomBorder.CanPan) - interactionSurface.Cursor = IsPdfDocumentLoaded - ? (IsPdfPanGestureActive ? Cursors.SizeAll : Cursors.Arrow) - : (isPdfTextInteraction ? Cursors.Arrow : Cursors.SizeAll); + interactionSurface.Cursor = (IsPdfDocumentLoaded || !isPdfTextInteraction) && IsZoomPanGestureActive + ? Cursors.SizeAll + : Cursors.Arrow; else interactionSurface.Cursor = null; @@ -2730,10 +2753,8 @@ private void RectanglesCanvas_MouseMove(object sender, MouseEventArgs e) if (MainZoomBorder.CanPan && (IsPdfDocumentLoaded - ? IsPdfPanGestureActive - : (!KeyboardExtensions.IsShiftDown() - && !KeyboardExtensions.IsCtrlDown() - && !isPdfTextInteraction))) + ? IsZoomPanGestureActive + : (IsZoomPanGestureActive && !isPdfTextInteraction))) { isSelecting = false; return; @@ -3761,6 +3782,7 @@ private void UnfreezeGrabFrame() FreezeToggleButton.Visibility = Visibility.Visible; Background = new SolidColorBrush(Colors.Transparent); IsFreezeMode = false; + UpdateZoomPanMode(); if (scrollBehavior == ScrollBehavior.ZoomWhenFrozen) MainZoomBorder.CanZoom = false; @@ -3898,6 +3920,13 @@ private void Window_LocationChanged(object? sender, EventArgs e) private void Window_PreviewKeyDown(object sender, KeyEventArgs e) { + if (e.Key == Key.Space && CanUseSpacePanModifier) + { + SetSpacePanModifierState(true); + e.Handled = true; + return; + } + if (!wasAltHeld && (e.SystemKey == Key.LeftAlt || e.SystemKey == Key.RightAlt)) { RectanglesCanvas.Opacity = 0.1; @@ -3923,6 +3952,17 @@ private void Window_PreviewKeyDown(object sender, KeyEventArgs e) private void Window_PreviewKeyUp(object sender, KeyEventArgs e) { + if (e.Key == Key.Space) + { + SetSpacePanModifierState(false); + + if (CanUseSpacePanModifier) + { + e.Handled = true; + return; + } + } + if (wasAltHeld && (e.SystemKey == Key.LeftAlt || e.SystemKey == Key.RightAlt)) { RectanglesCanvas.Opacity = 1; From 3ec02aab3cc870678225b75645cfaeca2a6bfec7 Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Sun, 26 Apr 2026 22:48:25 -0500 Subject: [PATCH 07/22] Refactor ZoomBorder panning and event handling Refactored ZoomBorder to use PreviewMouseDown/Up/Move for panning, improving modifier key support and event robustness. Added isPanning state, IsSpacePanModifierPressed, and RequireSpaceToPan for flexible pan activation. Middle mouse now resets zoom/pan. Improved mouse capture/release and removed obsolete handlers. --- Text-Grab/Controls/ZoomBorder.cs | 71 ++++++++++++++++++++------------ 1 file changed, 45 insertions(+), 26 deletions(-) diff --git a/Text-Grab/Controls/ZoomBorder.cs b/Text-Grab/Controls/ZoomBorder.cs index 198dcb97..004f9dbd 100644 --- a/Text-Grab/Controls/ZoomBorder.cs +++ b/Text-Grab/Controls/ZoomBorder.cs @@ -16,6 +16,7 @@ namespace Text_Grab.Controls; public class ZoomBorder : Border { private UIElement? child = null; + private bool isPanning = false; private Point origin; private Point start; @@ -42,6 +43,8 @@ public override UIElement Child public bool CanZoom { get; set; } = true; + public bool IsSpacePanModifierPressed { get; set; } = false; + public bool RequireSpaceToPan { get; set; } = false; public void Initialize(UIElement element) @@ -58,18 +61,9 @@ public void Initialize(UIElement element) child.RenderTransform = group; child.RenderTransformOrigin = new Point(0.0, 0.0); MouseWheel += Child_MouseWheel; - MouseLeftButtonDown += Child_MouseLeftButtonDown; - MouseLeftButtonUp += Child_MouseLeftButtonUp; - PreviewMouseDown += ZoomBorder_PreviewMouseDown; - MouseMove += Child_MouseMove; - PreviewMouseRightButtonDown += new MouseButtonEventHandler( - Child_PreviewMouseRightButtonDown); - } - - private void ZoomBorder_PreviewMouseDown(object sender, MouseButtonEventArgs e) - { - if (e.MiddleButton == MouseButtonState.Pressed) - Reset(); + AddHandler(Mouse.PreviewMouseDownEvent, new MouseButtonEventHandler(Child_PreviewMouseDown), true); + AddHandler(Mouse.PreviewMouseUpEvent, new MouseButtonEventHandler(Child_PreviewMouseUp), true); + AddHandler(Mouse.PreviewMouseMoveEvent, new MouseEventHandler(Child_MouseMove), true); } public void Reset() @@ -87,10 +81,14 @@ public void Reset() tt.X = 0.0; tt.Y = 0.0; + isPanning = false; + ReleaseMouseCapture(); + Cursor = Cursors.Arrow; CanPan = false; } - private bool IsPanGestureActive() => !RequireSpaceToPan || Keyboard.IsKeyDown(Key.Space); + private bool IsPanGestureActive() => + !RequireSpaceToPan || IsSpacePanModifierPressed || Keyboard.IsKeyDown(Key.Space); private bool BlocksPanFromSource(object? originalSource) { @@ -148,46 +146,66 @@ private void Child_MouseWheel(object sender, MouseWheelEventArgs e) CanPan = true; } - private void Child_MouseLeftButtonDown(object sender, MouseButtonEventArgs e) + private void Child_PreviewMouseDown(object sender, MouseButtonEventArgs e) { - if (child is null || !IsPanGestureActive() || BlocksPanFromSource(e.OriginalSource)) + if (e.ChangedButton == MouseButton.Middle) + { + Reset(); + e.Handled = true; + return; + } + + if (e.ChangedButton != MouseButton.Left) + return; + + if (child is null + || GetScaleTransform(child) is not ScaleTransform st + || st.ScaleX == 1.0 + || !CanPan + || !IsPanGestureActive() + || BlocksPanFromSource(e.OriginalSource)) return; TranslateTransform tt = GetTranslateTransform(child); start = e.GetPosition(this); origin = new Point(tt.X, tt.Y); + + if (!CaptureMouse()) + return; + + isPanning = true; Cursor = Cursors.Hand; - // child.CaptureMouse(); + e.Handled = true; } - private void Child_MouseLeftButtonUp(object sender, MouseButtonEventArgs e) + private void Child_PreviewMouseUp(object sender, MouseButtonEventArgs e) { - if (child is null) + if (e.ChangedButton != MouseButton.Left || child is null || !isPanning) return; - child.ReleaseMouseCapture(); + isPanning = false; + ReleaseMouseCapture(); Cursor = Cursors.Arrow; - } - - private void Child_PreviewMouseRightButtonDown(object sender, MouseButtonEventArgs e) - { + e.Handled = true; } private void Child_MouseMove(object sender, MouseEventArgs e) { - if (BlocksPanFromSource(e.OriginalSource)) + if (!isPanning && BlocksPanFromSource(e.OriginalSource)) return; if (child is null || GetScaleTransform(child) is not ScaleTransform st || st.ScaleX == 1.0 - || Mouse.LeftButton == MouseButtonState.Released + || !isPanning || !CanPan || !IsPanGestureActive() || KeyboardExtensions.IsShiftDown() || KeyboardExtensions.IsCtrlDown()) { - child?.ReleaseMouseCapture(); + isPanning = false; + ReleaseMouseCapture(); + Cursor = Cursors.Arrow; return; } @@ -195,5 +213,6 @@ private void Child_MouseMove(object sender, MouseEventArgs e) Vector v = start - e.GetPosition(this); tt.X = origin.X - v.X; tt.Y = origin.Y - v.Y; + e.Handled = true; } } From c2407f799bba5d68e20d24fcb02a36084bac4e7f Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Sun, 26 Apr 2026 22:48:45 -0500 Subject: [PATCH 08/22] Enhance file open and drag-and-drop support Added static helpers for file picker and drag-and-drop file handling in App.xaml.cs. Renamed TryToOpenFile to TryToOpenFilePathAsync and updated usages. Ensured EditTextWindow is activated after opening a file. --- Text-Grab/App.xaml.cs | 49 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/Text-Grab/App.xaml.cs b/Text-Grab/App.xaml.cs index d1a32934..415ec3fa 100644 --- a/Text-Grab/App.xaml.cs +++ b/Text-Grab/App.xaml.cs @@ -6,6 +6,7 @@ using System.Collections.Generic; using System.Diagnostics; using System.IO; +using System.Linq; using System.Threading.Tasks; using System.Windows; using System.Windows.Markup; @@ -74,6 +75,49 @@ public static void DefaultLaunch() SetTheme(); } + public static async Task OpenFileWithPickerAsync(bool isQuiet = false) + { + OpenFileDialog openFileDialog = new() + { + Filter = FileUtilities.GetOpenDocumentFilter(), + Title = "Open File", + CheckFileExists = true, + InitialDirectory = Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments) + }; + + if (openFileDialog.ShowDialog() == true) + await TryToOpenFilePathAsync(openFileDialog.FileName, isQuiet); + } + + public static DragDropEffects GetDroppedFileEffect(IDataObject? dataObject) + { + return GetDroppedFilePaths(dataObject).Any() + ? DragDropEffects.Copy + : DragDropEffects.None; + } + + public static IReadOnlyList GetDroppedFilePaths(IDataObject? dataObject) + { + if (dataObject is null || !dataObject.GetDataPresent(DataFormats.FileDrop, true)) + return []; + + + if (dataObject.GetData(DataFormats.FileDrop, true) is not string[] paths || paths.Length == 0) + return []; + + return [.. paths.Where(File.Exists)]; + } + + public static async Task TryToOpenDroppedFilesAsync(IDataObject? dataObject, bool isQuiet = false) + { + bool openedAny = false; + + foreach (string path in GetDroppedFilePaths(dataObject)) + openedAny = await TryToOpenFilePathAsync(path, isQuiet) || openedAny; + + return openedAny; + } + public static void SetTheme(object? sender = null, EventArgs? e = null) { bool gotTheme = Enum.TryParse(_defaultSettings.AppTheme.ToString(), true, out AppTheme currentAppTheme); @@ -265,7 +309,7 @@ private static async Task HandleStartupArgs(string[] args) return true; } - bool openedFile = await TryToOpenFile(currentArgument, isQuiet); + bool openedFile = await TryToOpenFilePathAsync(currentArgument, isQuiet); if (openedFile) return true; @@ -305,7 +349,7 @@ private static void ShowAndSetFirstRun() _defaultSettings.Save(); } - private static async Task TryToOpenFile(string possiblePath, bool isQuiet) + public static async Task TryToOpenFilePathAsync(string possiblePath, bool isQuiet = false) { if (!File.Exists(possiblePath)) return false; @@ -329,6 +373,7 @@ private static async Task TryToOpenFile(string possiblePath, bool isQuiet) EditTextWindow manipulateTextWindow = new(); manipulateTextWindow.OpenPath(possiblePath); manipulateTextWindow.Show(); + manipulateTextWindow.Activate(); } return true; } From b2a2efa677f707cf407fb490e7d1d22df5d90b23 Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Sun, 26 Apr 2026 22:48:56 -0500 Subject: [PATCH 09/22] Add unit tests for file dialog filters and drag-drop Expanded FilesIoTests to cover GetOpenDocumentFilter and drag-and-drop file handling. Added tests for document type filters, dropped file path extraction, and drag-drop effects. Included necessary using directives for System.IO and System.Windows. --- Tests/FilesIoTests.cs | 62 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/Tests/FilesIoTests.cs b/Tests/FilesIoTests.cs index e8967d06..6560b7ca 100644 --- a/Tests/FilesIoTests.cs +++ b/Tests/FilesIoTests.cs @@ -1,4 +1,6 @@ -using System.Drawing; +using System.Drawing; +using System.IO; +using System.Windows; using Text_Grab; using Text_Grab.Models; using Text_Grab.Utilities; @@ -136,4 +138,62 @@ public void GetVisualDocumentFilter_IncludesPdfSupport() Assert.Contains("PDF files|*.pdf", filter); Assert.Contains("Image files|", filter); } + + [Fact] + public void GetOpenDocumentFilter_IncludesVisualAndTextOptions() + { + string filter = FileUtilities.GetOpenDocumentFilter(); + + Assert.Contains("Supported documents|", filter); + Assert.Contains("Image and PDF files|", filter); + Assert.Contains("Spreadsheet documents|*.csv;*.tsv;*.tab", filter); + Assert.Contains("Markdown documents|*.md;*.markdown", filter); + Assert.Contains("Text documents (*.txt)|*.txt", filter); + Assert.Contains("All files (*.*)|*.*", filter); + } + + [WpfFact] + public void GetDroppedFilePaths_ReturnsExistingFilesOnly() + { + string firstPath = Path.GetTempFileName(); + string secondPath = Path.GetTempFileName(); + string missingPath = Path.Combine(Path.GetTempPath(), $"{Guid.NewGuid()}.txt"); + DataObject dataObject = new(DataFormats.FileDrop, new[] { firstPath, missingPath, secondPath }); + + try + { + IReadOnlyList paths = App.GetDroppedFilePaths(dataObject); + + Assert.Equal([firstPath, secondPath], paths); + } + finally + { + File.Delete(firstPath); + File.Delete(secondPath); + } + } + + [WpfFact] + public void GetDroppedFileEffect_ReturnsCopyWhenExistingFilesAreDropped() + { + string path = Path.GetTempFileName(); + DataObject dataObject = new(DataFormats.FileDrop, new[] { path }); + + try + { + Assert.Equal(DragDropEffects.Copy, App.GetDroppedFileEffect(dataObject)); + } + finally + { + File.Delete(path); + } + } + + [WpfFact] + public void GetDroppedFileEffect_ReturnsNoneWhenNoFilesCanBeOpened() + { + DataObject dataObject = new(DataFormats.Text, "hello"); + + Assert.Equal(DragDropEffects.None, App.GetDroppedFileEffect(dataObject)); + } } From 06b256ea96bb4bf4af1a293df03fcda9463e718a Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Mon, 27 Apr 2026 23:16:46 -0500 Subject: [PATCH 10/22] Improve pan/zoom UX and focus handling in GrabFrame Added a 300ms grace period after releasing Space before disabling pan mode, making panning with Space+mouse smoother. Ensured pan/zoom is always enabled for PDFs and moved focus away from buttons to prevent accidental activation during panning. Refactored event handling and focus logic for more robust and user-friendly pan/zoom interactions. Also improved mouse capture logic and code clarity in ZoomBorder. --- Text-Grab/Controls/ZoomBorder.cs | 10 +++--- Text-Grab/Views/GrabFrame.xaml.cs | 58 +++++++++++++++++++++++++------ 2 files changed, 54 insertions(+), 14 deletions(-) diff --git a/Text-Grab/Controls/ZoomBorder.cs b/Text-Grab/Controls/ZoomBorder.cs index 004f9dbd..e03e9e92 100644 --- a/Text-Grab/Controls/ZoomBorder.cs +++ b/Text-Grab/Controls/ZoomBorder.cs @@ -1,11 +1,11 @@ -using System.Linq; +using System.Linq; using System.Windows; using System.Windows.Controls; using System.Windows.Input; using System.Windows.Media; using System.Windows.Media.Media3D; -// From StackOverFlow: +// From StackOverFlow: // https://stackoverflow.com/questions/741956/pan-zoom-image // Answered by https://stackoverflow.com/users/282801/wies%c5%82aw-%c5%a0olt%c3%a9s // Read on 2024-05-02 @@ -164,13 +164,16 @@ private void Child_PreviewMouseDown(object sender, MouseButtonEventArgs e) || !CanPan || !IsPanGestureActive() || BlocksPanFromSource(e.OriginalSource)) + { return; + } TranslateTransform tt = GetTranslateTransform(child); start = e.GetPosition(this); origin = new Point(tt.X, tt.Y); - if (!CaptureMouse()) + bool captured = CaptureMouse(); + if (!captured) return; isPanning = true; @@ -199,7 +202,6 @@ private void Child_MouseMove(object sender, MouseEventArgs e) || st.ScaleX == 1.0 || !isPanning || !CanPan - || !IsPanGestureActive() || KeyboardExtensions.IsShiftDown() || KeyboardExtensions.IsCtrlDown()) { diff --git a/Text-Grab/Views/GrabFrame.xaml.cs b/Text-Grab/Views/GrabFrame.xaml.cs index 59cea908..df18d2eb 100644 --- a/Text-Grab/Views/GrabFrame.xaml.cs +++ b/Text-Grab/Views/GrabFrame.xaml.cs @@ -69,6 +69,7 @@ public partial class GrabFrame : Window private bool isSelecting; private bool isSpaceJoining = true; private bool isSpacePanModifierDown = false; + private DispatcherTimer? _spacePanGraceTimer; private bool isStaticImageSource = false; private readonly Dictionary movingWordBordersDictionary = []; private IOcrLinesWords? ocrResultOfWindow; @@ -557,6 +558,7 @@ private void ClearLoadedPdfDocument() _currentPdfPageIndex = -1; SetSpacePanModifierState(false); UpdateZoomPanMode(); + SetScrollBehaviorMenuItems(); UpdatePdfPageNavigation(); } @@ -591,6 +593,7 @@ private async Task ShowPdfPageAsync(int pageIndex) _currentPdfPageIndex = pageIndex; FreezeToggleButton.IsChecked = true; FreezeGrabFrame(); + MainZoomBorder.CanZoom = true; FreezeToggleButton.Visibility = Visibility.Collapsed; UpdatePdfPageNavigation(); SwitchToOcrFallbackIfUiAutomation(); @@ -732,12 +735,15 @@ private void SetSpacePanModifierState(bool isDown) MainZoomBorder.IsSpacePanModifierPressed = isDown; } - private void UpdateZoomPanMode() + private void MoveKeyboardFocusFromButtonBase() { - MainZoomBorder.RequireSpaceToPan = IsFreezeMode; + if (MainZoomBorder.CanPan && Keyboard.FocusedElement is ButtonBase) + RectanglesCanvas.Focus(); + } - if (!MainZoomBorder.RequireSpaceToPan) - SetSpacePanModifierState(false); + private void UpdateZoomPanMode() + { + MainZoomBorder.RequireSpaceToPan = true; } public HistoryInfo AsHistoryItem() @@ -2221,6 +2227,8 @@ private void GrabFrameWindow_Closing(object sender, System.ComponentModel.Cancel private void GrabFrameWindow_Deactivated(object? sender, EventArgs e) { + _spacePanGraceTimer?.Stop(); + _spacePanGraceTimer = null; SetSpacePanModifierState(false); if (!IsWordEditMode && !IsFreezeMode) @@ -2369,6 +2377,14 @@ private void HandlePreviewMouseWheel(object sender, MouseWheelEventArgs e) if (scrollBehavior == ScrollBehavior.ZoomWhenFrozen && IsFreezeMode) return; // ZoomBorder handles scroll when frozen + if (IsPdfDocumentLoaded) + { + // ZoomBorder handles the scroll and sets CanPan=true synchronously after we return. + // Defer a focus check so ButtonBase never holds focus while panning is possible. + Dispatcher.InvokeAsync(MoveKeyboardFocusFromButtonBase, DispatcherPriority.Input); + return; + } + e.Handled = true; double aspectRatio = (Height - 66) / (Width - 4); @@ -2673,7 +2689,8 @@ private void RectanglesCanvas_MouseDown(object sender, MouseButtonEventArgs e) : (GetInteractionSurface(sender) ?? RectanglesCanvas); reDrawTimer.Stop(); - GrabBTN.Focus(); + if (!MainZoomBorder.CanPan) + GrabBTN.Focus(); if (e.RightButton == MouseButtonState.Pressed) { @@ -3920,11 +3937,17 @@ private void Window_LocationChanged(object? sender, EventArgs e) private void Window_PreviewKeyDown(object sender, KeyEventArgs e) { - if (e.Key == Key.Space && CanUseSpacePanModifier) + if (e.Key == Key.Space) { - SetSpacePanModifierState(true); - e.Handled = true; - return; + // Cancel any pending grace-period clear when Space is pressed + _spacePanGraceTimer?.Stop(); + _spacePanGraceTimer = null; + if (CanUseSpacePanModifier) + { + SetSpacePanModifierState(true); + e.Handled = true; + return; + } } if (!wasAltHeld && (e.SystemKey == Key.LeftAlt || e.SystemKey == Key.RightAlt)) @@ -3954,7 +3977,19 @@ private void Window_PreviewKeyUp(object sender, KeyEventArgs e) { if (e.Key == Key.Space) { - SetSpacePanModifierState(false); + // Keep the pan modifier active for a short grace period after Space is released. + // Users commonly release Space a split-second before clicking to start a pan, + // so clearing immediately makes the gesture feel broken. + _spacePanGraceTimer?.Stop(); + _spacePanGraceTimer = new DispatcherTimer { Interval = TimeSpan.FromMilliseconds(300) }; + _spacePanGraceTimer.Tick += (_, _) => + { + _spacePanGraceTimer?.Stop(); + _spacePanGraceTimer = null; + if (!Keyboard.IsKeyDown(Key.Space)) + SetSpacePanModifierState(false); + }; + _spacePanGraceTimer.Start(); if (CanUseSpacePanModifier) { @@ -4165,6 +4200,9 @@ private void SetScrollBehaviorMenuItems() default: break; } + + if (IsPdfDocumentLoaded) + MainZoomBorder.CanZoom = true; } private void InvertColorsMI_Click(object sender, RoutedEventArgs e) From 2766e9582eeb6ffa533b3adff809aa7a0bde2781 Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Wed, 29 Apr 2026 20:20:17 -0500 Subject: [PATCH 11/22] Add clipboard tests --- Tests/ClipboardUtilitiesTests.cs | 101 +++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 Tests/ClipboardUtilitiesTests.cs diff --git a/Tests/ClipboardUtilitiesTests.cs b/Tests/ClipboardUtilitiesTests.cs new file mode 100644 index 00000000..5e8896a6 --- /dev/null +++ b/Tests/ClipboardUtilitiesTests.cs @@ -0,0 +1,101 @@ +using Text_Grab.Utilities; + +namespace Tests; + +public class ClipboardUtilitiesTests +{ + private const string SampleCfHtml = """ + Version:1.0 + StartHTML:00000097 + EndHTML:00002353 + StartFragment:00000153 + EndFragment:00002320 + + + + + + + + + + + + + + + + + + +
MonthIntSeason
January1Winter
February2Winter
+ + + """; + + [Fact] + public void ConvertHtmlToTabSeparated_ParsesBasicTable() + { + string result = ClipboardUtilities.ConvertHtmlToTabSeparated(SampleCfHtml); + + string[] lines = result.Split('\n'); + Assert.Equal(3, lines.Length); + Assert.Equal("Month\tInt\tSeason", lines[0]); + Assert.Equal("January\t1\tWinter", lines[1]); + Assert.Equal("February\t2\tWinter", lines[2]); + } + + [Fact] + public void ConvertHtmlToTabSeparated_HandlesBrTag() + { + string html = """ + + +
4
A
Spring
+ """; + + string result = ClipboardUtilities.ConvertHtmlToTabSeparated(html); + + Assert.Equal("4 A\tSpring", result); + } + + [Fact] + public void ConvertHtmlToTabSeparated_ReturnsEmptyWhenNoTable() + { + string html = "

No table here

"; + string result = ClipboardUtilities.ConvertHtmlToTabSeparated(html); + Assert.Empty(result); + } + + [Fact] + public void ConvertHtmlToTabSeparated_DecodesHtmlEntities() + { + string html = """ + + +
A & B<tag>
+ """; + + string result = ClipboardUtilities.ConvertHtmlToTabSeparated(html); + + Assert.Equal("A & B\t", result); + } + + [Fact] + public void ConvertHtmlToTabSeparated_HandlesThElements() + { + string html = """ + + + +
NameValue
Foo42
+ """; + + string result = ClipboardUtilities.ConvertHtmlToTabSeparated(html); + + string[] lines = result.Split('\n'); + Assert.Equal(2, lines.Length); + Assert.Equal("Name\tValue", lines[0]); + Assert.Equal("Foo\t42", lines[1]); + } +} From da3ee2d671c1da6487df312c0f5d392c4e376205 Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Wed, 29 Apr 2026 20:23:53 -0500 Subject: [PATCH 12/22] improve the way the clipboard can handle html table data and more --- Text-Grab/Utilities/ClipboardUtilities.cs | 166 ++++++++++++++++++++++ 1 file changed, 166 insertions(+) diff --git a/Text-Grab/Utilities/ClipboardUtilities.cs b/Text-Grab/Utilities/ClipboardUtilities.cs index 833e09ba..804d7231 100644 --- a/Text-Grab/Utilities/ClipboardUtilities.cs +++ b/Text-Grab/Utilities/ClipboardUtilities.cs @@ -1,6 +1,9 @@ using System; +using System.Collections.Generic; using System.IO; +using System.Net; using System.Text; +using System.Text.RegularExpressions; using System.Threading.Tasks; using System.Windows; using System.Windows.Media; @@ -127,6 +130,169 @@ private static string CleanTeamsBase64Image(string dirtyTeamsString) return sb.ToString(); } + public static bool TryGetHtmlTableAsTabSeparated(out string tabSeparated) + { + tabSeparated = string.Empty; + try + { + if (!System.Windows.Clipboard.ContainsData(System.Windows.DataFormats.Html)) + return false; + + string htmlData = System.Windows.Clipboard.GetData(System.Windows.DataFormats.Html) as string ?? string.Empty; + if (string.IsNullOrEmpty(htmlData)) + return false; + + string result = ConvertHtmlToTabSeparated(htmlData); + if (string.IsNullOrEmpty(result)) + return false; + + tabSeparated = result; + return true; + } + catch + { + return false; + } + } + + internal static string ConvertHtmlToTabSeparated(string cfHtml) + { + string fragment = ExtractHtmlFragment(cfHtml); + List> table = ParseHtmlTableToGrid(fragment); + if (table.Count == 0) + return string.Empty; + + StringBuilder sb = new(); + for (int r = 0; r < table.Count; r++) + { + if (r > 0) sb.Append('\n'); + sb.Append(string.Join("\t", table[r])); + } + return sb.ToString(); + } + + private static string ExtractHtmlFragment(string cfHtml) + { + int startPos = cfHtml.IndexOf("", StringComparison.OrdinalIgnoreCase); + if (startPos < 0) + startPos = cfHtml.IndexOf("", StringComparison.OrdinalIgnoreCase); + + int endPos = cfHtml.IndexOf("", StringComparison.OrdinalIgnoreCase); + if (endPos < 0) + endPos = cfHtml.IndexOf("", StringComparison.OrdinalIgnoreCase); + + if (startPos >= 0 && endPos > startPos) + { + int fragmentStart = cfHtml.IndexOf("-->", startPos) + 3; + return cfHtml[fragmentStart..endPos]; + } + + // Fall back to byte-offset headers (StartFragment:/EndFragment:) + const string startKey = "StartFragment:"; + const string endKey = "EndFragment:"; + int sfIdx = cfHtml.IndexOf(startKey, StringComparison.OrdinalIgnoreCase); + int efIdx = cfHtml.IndexOf(endKey, StringComparison.OrdinalIgnoreCase); + + if (sfIdx >= 0 && efIdx >= 0) + { + int sfNumStart = sfIdx + startKey.Length; + int sfLineEnd = cfHtml.IndexOf('\n', sfNumStart); + int efNumStart = efIdx + endKey.Length; + int efLineEnd = cfHtml.IndexOf('\n', efNumStart); + + if (sfLineEnd > sfNumStart && efLineEnd > efNumStart + && int.TryParse(cfHtml[sfNumStart..sfLineEnd].Trim(), out int sfOff) + && int.TryParse(cfHtml[efNumStart..efLineEnd].Trim(), out int efOff) + && sfOff >= 0 && efOff > sfOff && efOff <= cfHtml.Length) + { + return cfHtml[sfOff..efOff]; + } + } + + return cfHtml; + } + + private static List> ParseHtmlTableToGrid(string html) + { + List> result = []; + int tableStart = html.IndexOf("", StringComparison.OrdinalIgnoreCase); + tableEnd = tableEnd >= 0 ? tableEnd + 8 : html.Length; + + string tableHtml = html[tableStart..tableEnd]; + int pos = 0; + + while (pos < tableHtml.Length) + { + int rowStart = tableHtml.IndexOf("", rowStart, StringComparison.OrdinalIgnoreCase); + rowEnd = rowEnd >= 0 ? rowEnd + 5 : tableHtml.Length; + + List cells = ParseHtmlRowCells(tableHtml[rowStart..rowEnd]); + if (cells.Count > 0) + result.Add(cells); + + pos = rowEnd; + } + + return result; + } + + private static List ParseHtmlRowCells(string rowHtml) + { + List cells = []; + int pos = 0; + + while (pos < rowHtml.Length) + { + int tdPos = rowHtml.IndexOf("= 0 && (thPos < 0 || tdPos <= thPos)) + { + cellStart = tdPos; + endTag = ""; + } + else + { + cellStart = thPos; + endTag = ""; + } + + int openEnd = rowHtml.IndexOf('>', cellStart); + if (openEnd < 0) break; + + int contentStart = openEnd + 1; + int contentEnd = rowHtml.IndexOf(endTag, contentStart, StringComparison.OrdinalIgnoreCase); + contentEnd = contentEnd >= 0 ? contentEnd : rowHtml.Length; + + cells.Add(CleanHtmlCellContent(rowHtml[contentStart..contentEnd])); + pos = contentEnd + endTag.Length; + } + + return cells; + } + + private static string CleanHtmlCellContent(string html) + { + if (string.IsNullOrEmpty(html)) + return string.Empty; + + html = Regex.Replace(html, @"", " ", RegexOptions.IgnoreCase); + html = Regex.Replace(html, @"<[^>]*>", string.Empty); + html = WebUtility.HtmlDecode(html); + + return html.Trim(); + } + private static string base64ImageExtension(ref string base64String) { // Copied this portion of the code from https://github.com/veler/DevToys From a8b61c981b16cd2990eb6d752fd805e98393032f Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Wed, 29 Apr 2026 20:24:34 -0500 Subject: [PATCH 13/22] send table data to etw properly --- Text-Grab/Views/GrabFrame.xaml.cs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Text-Grab/Views/GrabFrame.xaml.cs b/Text-Grab/Views/GrabFrame.xaml.cs index df18d2eb..a18f4226 100644 --- a/Text-Grab/Views/GrabFrame.xaml.cs +++ b/Text-Grab/Views/GrabFrame.xaml.cs @@ -3637,10 +3637,12 @@ private void TryToNumberMenuItem_Click(object sender, RoutedEventArgs e) UndoRedo.EndTransaction(); } - private void TryToPlaceTable() + private List TryToPlaceTable() { RemoveTableLines(); + List wbInfos = [.. wordBorders.Select(wb => new WordBorderInfo(wb))]; + Point windowPosition = this.GetAbsolutePosition(); DpiScale dpi = VisualTreeHelper.GetDpi(this); System.Drawing.Rectangle rectCanvasSize = new() @@ -3654,8 +3656,6 @@ private void TryToPlaceTable() try { AnalyzedResultTable = new(); - // Convert UI controls to model-only infos - List wbInfos = [.. wordBorders.Select(wb => new WordBorderInfo(wb))]; AnalyzedResultTable.AnalyzeAsTable(wbInfos, rectCanvasSize); if (AnalyzedResultTable.TableLines is not null) RectanglesCanvas.Children.Add(AnalyzedResultTable.TableLines); @@ -3664,6 +3664,8 @@ private void TryToPlaceTable() { Debug.WriteLine(ex.Message); } + + return wbInfos; } private void TryToReadBarcodes(DpiScale dpi) @@ -3865,9 +3867,7 @@ [.. wordBorders if (TableToggleButton.IsChecked is true && wordBorders.Count > 0) { - TryToPlaceTable(); - // Build table text via model-only API - List infos = [.. wordBorders.Select(wb => new WordBorderInfo(wb))]; + List infos = TryToPlaceTable(); ResultTable.GetTextFromTabledWordBorders(stringBuilder, infos, isSpaceJoining); } else From 57217937209b3be77dbf6ad653ad3d02345280fb Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Wed, 29 Apr 2026 20:25:01 -0500 Subject: [PATCH 14/22] improve perf and options around markdown --- .../Utilities/MarkdownDocumentUtilities.cs | 39 ++++++++++++------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/Text-Grab/Utilities/MarkdownDocumentUtilities.cs b/Text-Grab/Utilities/MarkdownDocumentUtilities.cs index b1d9405a..72c795e6 100644 --- a/Text-Grab/Utilities/MarkdownDocumentUtilities.cs +++ b/Text-Grab/Utilities/MarkdownDocumentUtilities.cs @@ -1,5 +1,4 @@ using Markdig; -using Markdig.Extensions.Tables; using Markdig.Extensions.TaskLists; using Markdig.Syntax; using Markdig.Syntax.Inlines; @@ -25,21 +24,23 @@ namespace Text_Grab.Utilities; -public static class MarkdownDocumentUtilities +public static partial class MarkdownDocumentUtilities { - private static readonly Regex LiveBlockTriggerRegex = new( - @"^\s{0,3}(#{1,6}|>+|[-+*]|\d+[.)])$", - RegexOptions.Compiled); - private static readonly Regex LiveInlinePromotionRegex = new( - @"(^|\s)\[( |x|X)\](\s|$)|(\*\*|__)(?=\S).+?\4|(?+\s|[-+*]\s|\d+[.)]\s|```|~~~|---\s*$|___\s*$|\*\*\*\s*$)|\[[^\]]+\]\([^)]+\)|!\[[^\]]*\]\([^)]+\)|(^|\n)\|.+\|\s*$", - RegexOptions.Compiled | RegexOptions.Multiline); + private static readonly Regex LiveBlockTriggerRegex = LiveBlockTrigger(); + private static readonly Regex LiveInlinePromotionRegex = LiveInlinePromotion(); + private static readonly Regex MarkdownPatternRegex = MarkdownPattern(); + private static readonly MarkdownPipeline MarkdownPipeline = new MarkdownPipelineBuilder() .UseAutoLinks() .UsePipeTables() .UseTaskLists() + .UseCitations() + .UseDiagrams() + .UseAlertBlocks() + .UseEmojiAndSmiley() + .UseEmphasisExtras() + .UseAutoIdentifiers() + .UseGridTables() .Build(); private enum MarkdownBlockRole @@ -492,8 +493,8 @@ private static void WriteTable(StringBuilder builder, WpfTable table) if (firstGroup is null || firstGroup.Rows.Count == 0) return; - List rows = firstGroup.Rows.Cast().ToList(); - List headerCells = rows[0].Cells.Cast().Select(SerializeTableCell).ToList(); + List rows = [.. firstGroup.Rows.Cast()]; + List headerCells = [.. rows[0].Cells.Cast().Select(SerializeTableCell)]; builder.Append(ApplyQuotePrefix($"| {string.Join(" | ", headerCells)} |", quotePrefix)); builder.AppendLine(); @@ -506,7 +507,7 @@ private static void WriteTable(StringBuilder builder, WpfTable table) foreach (WpfTableRow row in dataRows) { builder.AppendLine(); - List rowCells = row.Cells.Cast().Select(SerializeTableCell).ToList(); + List rowCells = [.. row.Cells.Cast().Select(SerializeTableCell)]; builder.Append(ApplyQuotePrefix($"| {string.Join(" | ", rowCells)} |", quotePrefix)); } } @@ -834,4 +835,14 @@ private static string GetSourceSlice(string source, MarkdownObject markdownObjec private static string GetCodeFenceInfo(DependencyObject element) => (string)element.GetValue(CodeFenceInfoProperty); private static void SetIsTableHeader(DependencyObject element, bool value) => element.SetValue(IsTableHeaderProperty, value); private static bool GetIsTableHeader(DependencyObject element) => (bool)element.GetValue(IsTableHeaderProperty); + + + [GeneratedRegex(@"^\s{0,3}(#{1,6}|>+|[-+*]|\d+[.)])$", RegexOptions.Compiled)] + private static partial Regex LiveBlockTrigger(); + + [GeneratedRegex(@"(^|\s)\[( |x|X)\](\s|$)|(\*\*|__)(?=\S).+?\4|(?+\s|[-+*]\s|\d+[.)]\s|```|~~~|---\s*$|___\s*$|\*\*\*\s*$)|\[[^\]]+\]\([^)]+\)|!\[[^\]]*\]\([^)]+\)|(^|\n)\|.+\|\s*$", RegexOptions.Multiline | RegexOptions.Compiled)] + private static partial Regex MarkdownPattern(); } From 9e8aeb18fa3d35cb17b00a49cd055e329f0053ff Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Wed, 29 Apr 2026 20:26:30 -0500 Subject: [PATCH 15/22] enhance the find and replace to be compatible with etw spreadsheet mode --- Text-Grab/Controls/FindAndReplaceWindow.xaml | 14 +- .../Controls/FindAndReplaceWindow.xaml.cs | 185 +++++++++++++++--- Text-Grab/Models/FindResult.cs | 15 +- Text-Grab/Views/EditTextWindow.xaml.cs | 154 ++++++++++++++- 4 files changed, 329 insertions(+), 39 deletions(-) diff --git a/Text-Grab/Controls/FindAndReplaceWindow.xaml b/Text-Grab/Controls/FindAndReplaceWindow.xaml index a6677039..8935d419 100644 --- a/Text-Grab/Controls/FindAndReplaceWindow.xaml +++ b/Text-Grab/Controls/FindAndReplaceWindow.xaml @@ -289,8 +289,7 @@ - - + @@ -302,23 +301,18 @@ + Text="{Binding LocationDisplay}" /> - diff --git a/Text-Grab/Controls/FindAndReplaceWindow.xaml.cs b/Text-Grab/Controls/FindAndReplaceWindow.xaml.cs index 851735d3..f04bd253 100644 --- a/Text-Grab/Controls/FindAndReplaceWindow.xaml.cs +++ b/Text-Grab/Controls/FindAndReplaceWindow.xaml.cs @@ -59,6 +59,8 @@ public FindAndReplaceWindow() #region Properties + private bool IsSpreadsheetSearch => textEditWindow?.IsSpreadsheetMode is true; + public List FindResults { get; set; } = []; public string StringFromWindow @@ -85,6 +87,8 @@ public EditTextWindow? TextEditWindow public void SearchForText() { + if (IsSpreadsheetSearch) { SearchSpreadsheetCells(); return; } + FindResults.Clear(); ResultsListView.ItemsSource = null; @@ -180,6 +184,61 @@ public void SearchForText() } } + private Regex? BuildCurrentRegex() + { + string rawPattern = FindTextBox.Text; + if (string.IsNullOrEmpty(rawPattern)) return null; + + if (rawPattern.StartsWith('^') && rawPattern.EndsWith('$') && rawPattern.Length > 2) + rawPattern = rawPattern[1..^1]; + + if (UsePatternCheckBox.IsChecked is false && ExactMatchCheckBox.IsChecked is bool matchExactly) + rawPattern = rawPattern.EscapeSpecialRegexChars(matchExactly); + + RegexOptions options = RegexOptions.None; + if (ExactMatchCheckBox.IsChecked is not true) options |= RegexOptions.IgnoreCase; + if (UsePatternCheckBox.IsChecked is true) options |= RegexOptions.IgnorePatternWhitespace; + + try { return new Regex(rawPattern, options, TimeSpan.FromSeconds(5)); } + catch { return null; } + } + + private void SearchSpreadsheetCells() + { + FindResults.Clear(); + ResultsListView.ItemsSource = null; + Matches = null; + + if (textEditWindow is null || string.IsNullOrWhiteSpace(FindTextBox.Text)) + { + MatchesText.Text = "0 Matches"; + return; + } + + Regex? regex = BuildCurrentRegex(); + if (regex is null) { MatchesText.Text = "0 Matches"; return; } + + textEditWindow.CommitSpreadsheetAndSync(); + + List results; + try { results = textEditWindow.SearchSpreadsheetCells(regex); } + catch (RegexMatchTimeoutException) { MatchesText.Text = "Regex timeout"; return; } + + FindResults.AddRange(results); + if (FindResults.Count == 0) { MatchesText.Text = "0 Matches"; return; } + + MatchesText.Text = FindResults.Count == 1 ? "1 Match" : $"{FindResults.Count} Matches"; + ResultsListView.IsEnabled = true; + ResultsListView.ItemsSource = FindResults; + + FindResult first = FindResults[0]; + if (this.IsFocused && first.RowIndex.HasValue && first.ColumnIndex.HasValue) + { + textEditWindow.NavigateToSpreadsheetCell(first.RowIndex.Value, first.ColumnIndex.Value); + this.Focus(); + } + } + public void ShouldCloseWithThisETW(EditTextWindow etw) { if (textEditWindow is not null && etw == textEditWindow) @@ -200,6 +259,12 @@ private void PrecisionSlider_Tick(object? sender, EventArgs? e) private void CopyMatchesCmd_CanExecute(object sender, CanExecuteRoutedEventArgs e) { + if (IsSpreadsheetSearch) + { + e.CanExecute = FindResults.Count > 0 && !string.IsNullOrEmpty(FindTextBox.Text); + return; + } + if (Matches is null || Matches.Count < 1 || string.IsNullOrEmpty(FindTextBox.Text)) e.CanExecute = false; else @@ -208,9 +273,9 @@ private void CopyMatchesCmd_CanExecute(object sender, CanExecuteRoutedEventArgs private void CopyMatchesCmd_Executed(object sender, ExecutedRoutedEventArgs e) { - if (Matches is null - || textEditWindow is null - || Matches.Count < 1) + if (textEditWindow is null) return; + + if (!IsSpreadsheetSearch && (Matches is null || Matches.Count < 1)) return; StringBuilder stringBuilder = new(); @@ -230,6 +295,12 @@ private void CopyMatchesCmd_Executed(object sender, ExecutedRoutedEventArgs e) private void DeleteAll_CanExecute(object sender, CanExecuteRoutedEventArgs e) { + if (IsSpreadsheetSearch) + { + e.CanExecute = FindResults.Count > 0 && !string.IsNullOrEmpty(FindTextBox.Text); + return; + } + if (Matches is not null && Matches.Count > 1 && !string.IsNullOrEmpty(FindTextBox.Text)) e.CanExecute = true; else @@ -238,24 +309,41 @@ private void DeleteAll_CanExecute(object sender, CanExecuteRoutedEventArgs e) private async void DeleteAll_Executed(object sender, ExecutedRoutedEventArgs e) { - if (Matches is null - || Matches.Count < 1 - || textEditWindow is null) + if (textEditWindow is null) return; + + if (IsSpreadsheetSearch) + { + if (FindResults.Count == 0) return; + SetWindowToLoading(); + Regex? regex = BuildCurrentRegex(); + if (regex is null) { ResetWindowLoading(); return; } + IList selection = ResultsListView.SelectedItems; + List targets = selection.Count >= 2 + ? [.. selection.Cast()] + : [.. ResultsListView.Items.Cast()]; + await Task.Run(() => Dispatcher.Invoke(() => + textEditWindow.ReplaceInSpreadsheetCells(targets, string.Empty, regex))); + SearchForText(); + ResetWindowLoading(); + return; + } + + if (Matches is null || Matches.Count < 1) return; SetWindowToLoading(); - IList selection = ResultsListView.SelectedItems; + IList selection2 = ResultsListView.SelectedItems; StringBuilder stringBuilderOfText = new(textEditWindow.PassedTextControl.Text); await Task.Run(() => { - if (selection.Count < 2) - selection = ResultsListView.Items; + if (selection2.Count < 2) + selection2 = ResultsListView.Items; - for (int j = selection.Count - 1; j >= 0; j--) + for (int j = selection2.Count - 1; j >= 0; j--) { - if (selection[j] is not FindResult selectedResult) + if (selection2[j] is not FindResult selectedResult) continue; stringBuilderOfText.Remove(selectedResult.Index, selectedResult.Length); @@ -270,6 +358,8 @@ await Task.Run(() => private void EditTextBoxChanged(object sender, TextChangedEventArgs e) { + if (IsSpreadsheetSearch) return; + ChangeFindTextTimer.Stop(); if (textEditWindow is not null) StringFromWindow = textEditWindow.PassedTextControl.Text; @@ -279,6 +369,8 @@ private void EditTextBoxChanged(object sender, TextChangedEventArgs e) private void ExtractPattern_CanExecute(object sender, CanExecuteRoutedEventArgs e) { + if (IsSpreadsheetSearch) { e.CanExecute = false; return; } + if (textEditWindow is not null && textEditWindow.PassedTextControl.SelectedText.Length > 0) e.CanExecute = true; @@ -410,6 +502,12 @@ private void OptionsChangedRefresh(object sender, RoutedEventArgs e) private void Replace_CanExecute(object sender, CanExecuteRoutedEventArgs e) { + if (IsSpreadsheetSearch) + { + e.CanExecute = FindResults.Count > 0 && !string.IsNullOrEmpty(ReplaceTextBox.Text); + return; + } + if (string.IsNullOrEmpty(ReplaceTextBox.Text) || Matches is null || Matches.Count < 1) @@ -420,10 +518,21 @@ private void Replace_CanExecute(object sender, CanExecuteRoutedEventArgs e) private void Replace_Executed(object sender, ExecutedRoutedEventArgs e) { - if (Matches is null - || textEditWindow is null - || ResultsListView.Items.Count is 0) + if (textEditWindow is null || ResultsListView.Items.Count is 0) + return; + + if (IsSpreadsheetSearch) + { + if (ResultsListView.SelectedIndex == -1) ResultsListView.SelectedIndex = 0; + if (ResultsListView.SelectedItem is not FindResult fr) return; + Regex? regex = BuildCurrentRegex(); + if (regex is null) return; + textEditWindow.ReplaceInSpreadsheetCells([fr], ReplaceTextBox.Text, regex); + SearchForText(); return; + } + + if (Matches is null) return; if (ResultsListView.SelectedIndex == -1) ResultsListView.SelectedIndex = 0; @@ -439,26 +548,44 @@ private void Replace_Executed(object sender, ExecutedRoutedEventArgs e) private async void ReplaceAll_Executed(object sender, ExecutedRoutedEventArgs e) { - if (Matches is null - || Matches.Count < 1 - || textEditWindow is null) + if (textEditWindow is null) return; + + if (IsSpreadsheetSearch) + { + if (FindResults.Count == 0) return; + SetWindowToLoading(); + Regex? regex = BuildCurrentRegex(); + if (regex is null) { ResetWindowLoading(); return; } + IList selection = ResultsListView.SelectedItems; + List targets = selection.Count >= 2 + ? [.. selection.Cast()] + : [.. ResultsListView.Items.Cast()]; + string replaceWith = ReplaceTextBox.Text; + await Task.Run(() => Dispatcher.Invoke(() => + textEditWindow.ReplaceInSpreadsheetCells(targets, replaceWith, regex))); + SearchForText(); + ResetWindowLoading(); + return; + } + + if (Matches is null || Matches.Count < 1) return; SetWindowToLoading(); StringBuilder stringBuilder = new(textEditWindow.PassedTextControl.Text); - IList selection = ResultsListView.SelectedItems; + IList selection2 = ResultsListView.SelectedItems; string newText = ReplaceTextBox.Text; await Task.Run(() => { - if (selection.Count < 2) - selection = ResultsListView.Items; + if (selection2.Count < 2) + selection2 = ResultsListView.Items; - for (int j = selection.Count - 1; j >= 0; j--) + for (int j = selection2.Count - 1; j >= 0; j--) { - if (selection[j] is not FindResult selectedResult) + if (selection2[j] is not FindResult selectedResult) continue; stringBuilder.Remove(selectedResult.Index, selectedResult.Length); @@ -486,15 +613,21 @@ private void SetWindowToLoading() private void ResultsListView_SelectionChanged(object sender, SelectionChangedEventArgs e) { - if (ResultsListView.SelectedItem is not FindResult selectedResult) + if (ResultsListView.SelectedItem is not FindResult selectedResult || textEditWindow is null) return; - if (textEditWindow is not null) + if (IsSpreadsheetSearch) { - textEditWindow.PassedTextControl.Focus(); - textEditWindow.PassedTextControl.Select(selectedResult.Index, selectedResult.Length); + if (selectedResult.RowIndex.HasValue && selectedResult.ColumnIndex.HasValue) + textEditWindow.NavigateToSpreadsheetCell( + selectedResult.RowIndex.Value, selectedResult.ColumnIndex.Value); this.Focus(); + return; } + + textEditWindow.PassedTextControl.Focus(); + textEditWindow.PassedTextControl.Select(selectedResult.Index, selectedResult.Length); + this.Focus(); } private void SetExtraOptionsVisibility(Visibility optionsVisibility) diff --git a/Text-Grab/Models/FindResult.cs b/Text-Grab/Models/FindResult.cs index 7a083c6e..4b7da09f 100644 --- a/Text-Grab/Models/FindResult.cs +++ b/Text-Grab/Models/FindResult.cs @@ -13,11 +13,22 @@ public class FindResult public string PreviewRight { get; set; } = ""; - public int Length + public int Length => Text.Length; + + public int? RowIndex { get; set; } + + public int? ColumnIndex { get; set; } + + public string CellAddress { get { - return Text.Length; + if (RowIndex is null || ColumnIndex is null) return string.Empty; + string colLabel = EditTextTableDocument.GetSpreadsheetColumnLabel(ColumnIndex.Value); + return $"Cell: {colLabel}{RowIndex.Value + 1}"; } } + + public string LocationDisplay => + CellAddress.Length > 0 ? CellAddress : $"At index: {Index}"; } diff --git a/Text-Grab/Views/EditTextWindow.xaml.cs b/Text-Grab/Views/EditTextWindow.xaml.cs index 15f42842..e6142772 100644 --- a/Text-Grab/Views/EditTextWindow.xaml.cs +++ b/Text-Grab/Views/EditTextWindow.xaml.cs @@ -1106,6 +1106,15 @@ private void SpreadsheetDataGrid_PreviewKeyDown(object sender, System.Windows.In return; } + if (e.Key == Key.V + && (Keyboard.IsKeyDown(Key.LeftCtrl) || Keyboard.IsKeyDown(Key.RightCtrl)) + && !IsSpreadsheetCellEditorFocused()) + { + e.Handled = true; + PasteIntoSpreadsheet(); + return; + } + if (e.Key != Key.Enter || SpreadsheetDataGrid.CurrentCell.Column is null) return; @@ -1224,6 +1233,66 @@ internal static void ClearSpreadsheetCellValues(DataTable dataTable, IEnumerable } } + private void PasteIntoSpreadsheet() + { + string clipboardText; + try + { + if (!ClipboardUtilities.TryGetHtmlTableAsTabSeparated(out clipboardText)) + clipboardText = System.Windows.Clipboard.GetText(); + } + catch (Exception ex) + { + Debug.WriteLine($"PasteIntoSpreadsheet: clipboard read failed. {ex.Message}"); + return; + } + + if (string.IsNullOrEmpty(clipboardText)) + return; + + int startRow = Math.Max(0, SpreadsheetDataGrid.Items.IndexOf(SpreadsheetDataGrid.CurrentItem)); + int startCol = Math.Max(0, SpreadsheetDataGrid.CurrentCell.Column?.DisplayIndex ?? 0); + + // Parse clipboard text into a 2D array of cell values + string[] lines = clipboardText.Split('\n'); + List pastedRows = []; + foreach (string line in lines) + pastedRows.Add(line.TrimEnd('\r').Split('\t')); + + // Remove trailing empty row artifact produced by a final newline in copied table text + while (pastedRows.Count > 1 && pastedRows[^1].Length == 1 && pastedRows[^1][0].Length == 0) + pastedRows.RemoveAt(pastedRows.Count - 1); + + if (pastedRows.Count == 0) + return; + + int maxPastedCols = pastedRows.Max(row => row.Length); + + ApplySpreadsheetDocumentChange(document => + { + // Expand the document to fit the pasted data if necessary + int requiredRows = startRow + pastedRows.Count; + int requiredCols = startCol + maxPastedCols; + document.RowCount = Math.Max(document.RowCount, requiredRows); + document.ColumnCount = Math.Max(document.ColumnCount, requiredCols); + document.MinimumRowCount = Math.Max(document.MinimumRowCount, requiredRows); + document.MinimumColumnCount = Math.Max(document.MinimumColumnCount, requiredCols); + document.EnsureMinimumSize(); + + // Write values into the target cells + for (int r = 0; r < pastedRows.Count; r++) + { + int targetRow = startRow + r; + for (int c = 0; c < pastedRows[r].Length; c++) + { + int targetCol = startCol + c; + if (targetRow < document.Rows.Count && targetCol < document.Rows[targetRow].Count) + document.Rows[targetRow][targetCol] = pastedRows[r][c]; + } + } + }, startRow, startCol); + } + internal static string BuildSpreadsheetSelectionText( DataTable dataTable, IEnumerable<(int RowIndex, int ColumnIndex)> cellCoordinates) @@ -1885,6 +1954,7 @@ internal async void OpenPath(string pathOfFileToOpen, bool isMultipleFiles = fal finally { isLoadingOpenedFile = false; + SyncTextFromActiveEditor(); SetOpenedFileState(shouldTrackOpenedFile ? pathOfFileToOpen : null); } } @@ -2359,6 +2429,82 @@ public string GetSelectedTextOrAllText() return textToModify; } + public bool IsSpreadsheetMode => editorMode == EtwEditorMode.Spreadsheet; + + public void CommitSpreadsheetAndSync() + { + CommitSpreadsheetEditsAndCapturePendingHistory(); + SyncSpreadsheetDocumentFromTable(writeText: false); + } + + public void NavigateToSpreadsheetCell(int rowIndex, int columnIndex) + { + Dispatcher.BeginInvoke( + () => FocusSpreadsheetCell(rowIndex, columnIndex, beginEdit: false), + DispatcherPriority.Background); + } + + public List SearchSpreadsheetCells(Regex pattern) + { + if (tableDocument is null) return []; + tableDocument.EnsureMinimumSize(); + List results = []; + int count = 1; + + for (int row = 0; row < tableDocument.RowCount; row++) + { + List rowData = tableDocument.Rows[row]; + for (int col = 0; col < tableDocument.ColumnCount; col++) + { + string cellValue = col < rowData.Count ? rowData[col] ?? string.Empty : string.Empty; + foreach (Match m in pattern.Matches(cellValue)) + { + int previewStart = Math.Max(0, m.Index - 12); + int previewEnd = Math.Min(cellValue.Length, m.Index + m.Length + 12); + results.Add(new FindResult + { + RowIndex = row, + ColumnIndex = col, + Text = m.Value.MakeStringSingleLine(), + PreviewLeft = cellValue[previewStart..m.Index], + PreviewRight = cellValue[(m.Index + m.Length)..previewEnd], + Count = count++ + }); + } + } + } + return results; + } + + public void ReplaceInSpreadsheetCells( + IEnumerable targets, + string replaceWith, + Regex pattern) + { + CommitSpreadsheetEditsAndCapturePendingHistory(); + SyncSpreadsheetDocumentFromTable(writeText: false); + + if (tableDocument is null) return; + + SpreadsheetUndoState? beforeState = CreateCurrentSpreadsheetUndoState(syncFromTable: false); + + var updates = targets + .Where(r => r.RowIndex.HasValue && r.ColumnIndex.HasValue) + .GroupBy(r => (r.RowIndex!.Value, r.ColumnIndex!.Value)) + .Select(g => + { + int row = g.Key.Item1, col = g.Key.Item2; + string oldValue = row < tableDocument.Rows.Count && col < tableDocument.Rows[row].Count + ? tableDocument.Rows[row][col] ?? string.Empty : string.Empty; + return (RowIndex: row, ColumnIndex: col, Value: pattern.Replace(oldValue, replaceWith)); + }); + + SetSpreadsheetDocumentCellValues(tableDocument, updates); + RebuildSpreadsheetTable(); + UpdateTextFromSpreadsheetDocument(); + RecordSpreadsheetUndoChange(beforeState, CreateCurrentSpreadsheetUndoState(syncFromTable: false)); + } + private IEnumerable GetSelectedOrAllTextSegmentsForEdit() { if (editorMode == EtwEditorMode.Spreadsheet) @@ -3403,7 +3549,13 @@ private async void PasteExecuted(object sender, ExecutedRoutedEventArgs? e = nul { try { - string textFromClipboard = await dataPackageView.GetTextAsync(); + string textFromClipboard; + if (editorMode == EtwEditorMode.Text + && ClipboardUtilities.TryGetHtmlTableAsTabSeparated(out string htmlTableText)) + textFromClipboard = htmlTableText; + else + textFromClipboard = await dataPackageView.GetTextAsync(); + System.Windows.Application.Current.Dispatcher.Invoke(new Action(() => { AddCopiedTextToTextBox(textFromClipboard); })); } catch (Exception ex) From 493b304ff065b3d3d2bf97b644865e1ee4536878 Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Fri, 1 May 2026 23:13:23 -0500 Subject: [PATCH 16/22] update github actions --- .github/workflows/Release.yml | 10 +++++----- .github/workflows/buildDev.yml | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/Release.yml b/.github/workflows/Release.yml index db3fba66..79f9c39c 100644 --- a/.github/workflows/Release.yml +++ b/.github/workflows/Release.yml @@ -36,7 +36,7 @@ jobs: build: runs-on: windows-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Setup .NET uses: actions/setup-dotnet@v5 @@ -231,25 +231,25 @@ jobs: } - name: Upload build artifact (x64 framework-dependent) - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: Text-Grab-win-x64-framework-dependent path: ${{ env.BUILD_X64 }} - name: Upload build artifact (x64 self-contained) - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: Text-Grab-win-x64-self-contained path: ${{ steps.compute.outputs.archive_x64_sc }} - name: Upload build artifact (ARM64 framework-dependent) - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: Text-Grab-win-arm64-framework-dependent path: ${{ env.BUILD_ARM64 }} - name: Upload build artifact (ARM64 self-contained) - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: Text-Grab-win-arm64-self-contained path: ${{ steps.compute.outputs.archive_arm64_sc }} diff --git a/.github/workflows/buildDev.yml b/.github/workflows/buildDev.yml index 71e04cf9..ae14fbb8 100644 --- a/.github/workflows/buildDev.yml +++ b/.github/workflows/buildDev.yml @@ -17,7 +17,7 @@ jobs: build: runs-on: windows-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Setup .NET uses: actions/setup-dotnet@v5 with: @@ -33,7 +33,7 @@ jobs: run: dotnet publish ${{ env.PROJECT_PATH }} -c Release --self-contained -r win-x64 -p:PublishSingleFile=true -p:EnableMsixTooling=true -o publish - name: Upload artifact - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: Text-Grab path: .\publish From 2091e938123ebc5e887058335fac7939659858a2 Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Fri, 1 May 2026 23:14:13 -0500 Subject: [PATCH 17/22] remove unused file --- .../AnalysisReport_20260125_220624_678.md | 20 ------------------- 1 file changed, 20 deletions(-) delete mode 100644 .codetesting/AnalysisReport_20260125_220624_678.md diff --git a/.codetesting/AnalysisReport_20260125_220624_678.md b/.codetesting/AnalysisReport_20260125_220624_678.md deleted file mode 100644 index bef7b7d5..00000000 --- a/.codetesting/AnalysisReport_20260125_220624_678.md +++ /dev/null @@ -1,20 +0,0 @@ -# Test Failures due possible code bugs - -## Tests.csproj - Text_Grab.Utilities.UnitTests.WindowsAiUtilitiesTests.CleanRegexResult_OnlyOpeningFence_ReturnsPattern -- **Confidence**: High -- **Test File**: Tests\Utilities\WindowsAiUtilitiesTests.cs -- **Bug Location**: Text-Grab\Utilities\WindowsAiUtilities.cs@588-592 - -### Analysis -The production code has a logical error in the CleanRegexResult method. The Where clause at lines 588-592 filters out lines starting with 'Pattern:' (case-insensitive) BEFORE the Select clause at lines 593-601 can remove the 'pattern:' prefix. When the input is '```\npattern: [a-z]+', after removing the opening fence, we have 'pattern: [a-z]+'. This line gets filtered out by the Where clause because it starts with 'Pattern:' (case-insensitive), so the Select clause never gets a chance to remove the prefix. The method then returns the cleaned text as-is ('pattern: [a-z]+') instead of the extracted pattern ('[a-z]+'). The fix is to remove the filtering of 'Pattern:', 'Regex:', and 'Expression:' from the Where clause (lines 590-592), allowing the Select clause to handle prefix removal. - -### Suggested Fix -In the CleanRegexResult method at D:\source\TheJoeFin\Text-Grab\Text-Grab\Utilities\WindowsAiUtilities.cs, remove lines 590-592 from the Where clause. The Where clause should only filter out comment lines (lines starting with '//' or '#'), not descriptor lines like 'Regex:', 'Pattern:', or 'Expression:', since the subsequent Select clause is designed to handle removing these prefixes. The corrected Where clause should be: - -```csharp -.Where(line => !line.StartsWith("//", StringComparison.Ordinal) && - !line.StartsWith('#')) -``` - -This allows lines with 'pattern:', 'regex:', or 'expression:' prefixes to reach the Select clause where the prefixes are properly removed. - From 38b679f252e35d18b94c3d5788dd477193d4bedf Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Fri, 1 May 2026 23:21:40 -0500 Subject: [PATCH 18/22] Simplify Markdig pipeline with UseAdvancedExtensions Replaces individual Markdig extension calls with UseAdvancedExtensions for a cleaner and more maintainable pipeline configuration. This enables a broad set of advanced features in one step. --- Text-Grab/Utilities/MarkdownDocumentUtilities.cs | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/Text-Grab/Utilities/MarkdownDocumentUtilities.cs b/Text-Grab/Utilities/MarkdownDocumentUtilities.cs index 72c795e6..0097bc59 100644 --- a/Text-Grab/Utilities/MarkdownDocumentUtilities.cs +++ b/Text-Grab/Utilities/MarkdownDocumentUtilities.cs @@ -31,16 +31,7 @@ public static partial class MarkdownDocumentUtilities private static readonly Regex MarkdownPatternRegex = MarkdownPattern(); private static readonly MarkdownPipeline MarkdownPipeline = new MarkdownPipelineBuilder() - .UseAutoLinks() - .UsePipeTables() - .UseTaskLists() - .UseCitations() - .UseDiagrams() - .UseAlertBlocks() - .UseEmojiAndSmiley() - .UseEmphasisExtras() - .UseAutoIdentifiers() - .UseGridTables() + .UseAdvancedExtensions() .Build(); private enum MarkdownBlockRole From 703f09ff85d8ee1e16229140b1a1880948afef0b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 2 May 2026 15:25:14 +0000 Subject: [PATCH 19/22] Fix review comments: disposal, race conditions, cache eviction, find/replace, colspan/rowspan Agent-Logs-Url: https://github.com/TheJoeFin/Text-Grab/sessions/c5acdc4a-84dd-4662-b5d3-335886f0b193 Co-authored-by: TheJoeFin <7809853+TheJoeFin@users.noreply.github.com> --- Tests/ClipboardUtilitiesTests.cs | 36 +++++++++ Text-Grab/Utilities/ClipboardUtilities.cs | 86 ++++++++++++++++++++-- Text-Grab/Utilities/OcrUtilities.cs | 4 +- Text-Grab/Utilities/PdfDocumentRenderer.cs | 17 ++++- Text-Grab/Views/EditTextWindow.xaml.cs | 8 +- Text-Grab/Views/GrabFrame.xaml.cs | 58 ++++++++++----- 6 files changed, 180 insertions(+), 29 deletions(-) diff --git a/Tests/ClipboardUtilitiesTests.cs b/Tests/ClipboardUtilitiesTests.cs index 5e8896a6..f88de72c 100644 --- a/Tests/ClipboardUtilitiesTests.cs +++ b/Tests/ClipboardUtilitiesTests.cs @@ -98,4 +98,40 @@ public void ConvertHtmlToTabSeparated_HandlesThElements() Assert.Equal("Name\tValue", lines[0]); Assert.Equal("Foo\t42", lines[1]); } + + [Fact] + public void ConvertHtmlToTabSeparated_HandlesColspan() + { + string html = """ + + + +
MergedRight
ABC
+ """; + + string result = ClipboardUtilities.ConvertHtmlToTabSeparated(html); + + string[] lines = result.Split('\n'); + Assert.Equal(2, lines.Length); + Assert.Equal("Merged\tMerged\tRight", lines[0]); + Assert.Equal("A\tB\tC", lines[1]); + } + + [Fact] + public void ConvertHtmlToTabSeparated_HandlesRowspan() + { + string html = """ + + + +
TallTop
Bottom
+ """; + + string result = ClipboardUtilities.ConvertHtmlToTabSeparated(html); + + string[] lines = result.Split('\n'); + Assert.Equal(2, lines.Length); + Assert.Equal("Tall\tTop", lines[0]); + Assert.Equal("Tall\tBottom", lines[1]); + } } diff --git a/Text-Grab/Utilities/ClipboardUtilities.cs b/Text-Grab/Utilities/ClipboardUtilities.cs index 804d7231..e9499b7d 100644 --- a/Text-Grab/Utilities/ClipboardUtilities.cs +++ b/Text-Grab/Utilities/ClipboardUtilities.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.IO; +using System.Linq; using System.Net; using System.Text; using System.Text.RegularExpressions; @@ -222,8 +223,11 @@ private static List> ParseHtmlTableToGrid(string html) tableEnd = tableEnd >= 0 ? tableEnd + 8 : html.Length; string tableHtml = html[tableStart..tableEnd]; - int pos = 0; + // Tracks cells that span into future rows: col -> (remaining rows to fill, cell content) + Dictionary rowspanMap = []; + + int pos = 0; while (pos < tableHtml.Length) { int rowStart = tableHtml.IndexOf("> ParseHtmlTableToGrid(string html) int rowEnd = tableHtml.IndexOf("", rowStart, StringComparison.OrdinalIgnoreCase); rowEnd = rowEnd >= 0 ? rowEnd + 5 : tableHtml.Length; - List cells = ParseHtmlRowCells(tableHtml[rowStart..rowEnd]); - if (cells.Count > 0) - result.Add(cells); + List<(string Text, int ColSpan, int RowSpan)> parsedCells = + ParseHtmlRowCells(tableHtml[rowStart..rowEnd]); + + if (parsedCells.Count > 0 || rowspanMap.Count > 0) + { + // Build a sparse column map for this row + Dictionary rowData = []; + + // Apply rowspan carry-overs from previous rows first + foreach (int col in rowspanMap.Keys.OrderBy(k => k).ToList()) + { + (int rem, string content) = rowspanMap[col]; + rowData[col] = content; + if (rem > 1) + rowspanMap[col] = (rem - 1, content); + else + rowspanMap.Remove(col); + } + + // Place each parsed cell in the next free column(s) + int nextFreeCol = 0; + foreach ((string text, int colspan, int rowspan) in parsedCells) + { + // Advance past columns already occupied by rowspan carry-overs + while (rowData.ContainsKey(nextFreeCol)) + nextFreeCol++; + + for (int cs = 0; cs < colspan; cs++) + rowData[nextFreeCol + cs] = text; + + if (rowspan > 1) + for (int cs = 0; cs < colspan; cs++) + rowspanMap[nextFreeCol + cs] = (rowspan - 1, text); + + nextFreeCol += colspan; + } + + if (rowData.Count > 0) + { + int colCount = rowData.Keys.Max() + 1; + List row = []; + for (int c = 0; c < colCount; c++) + row.Add(rowData.TryGetValue(c, out string? cell) ? cell : string.Empty); + result.Add(row); + } + } pos = rowEnd; } @@ -242,9 +289,9 @@ private static List> ParseHtmlTableToGrid(string html) return result; } - private static List ParseHtmlRowCells(string rowHtml) + private static List<(string Text, int ColSpan, int RowSpan)> ParseHtmlRowCells(string rowHtml) { - List cells = []; + List<(string, int, int)> cells = []; int pos = 0; while (pos < rowHtml.Length) @@ -270,17 +317,42 @@ private static List ParseHtmlRowCells(string rowHtml) int openEnd = rowHtml.IndexOf('>', cellStart); if (openEnd < 0) break; + string tagAttributes = rowHtml[(cellStart + 3)..openEnd]; + int colspan = ParseSpanAttribute(tagAttributes, "colspan"); + int rowspan = ParseSpanAttribute(tagAttributes, "rowspan"); + int contentStart = openEnd + 1; int contentEnd = rowHtml.IndexOf(endTag, contentStart, StringComparison.OrdinalIgnoreCase); contentEnd = contentEnd >= 0 ? contentEnd : rowHtml.Length; - cells.Add(CleanHtmlCellContent(rowHtml[contentStart..contentEnd])); + cells.Add((CleanHtmlCellContent(rowHtml[contentStart..contentEnd]), colspan, rowspan)); pos = contentEnd + endTag.Length; } return cells; } + private static int ParseSpanAttribute(string tagAttributes, string attributeName) + { + int attrPos = tagAttributes.IndexOf(attributeName, StringComparison.OrdinalIgnoreCase); + if (attrPos < 0) return 1; + + int eqPos = tagAttributes.IndexOf('=', attrPos + attributeName.Length); + if (eqPos < 0) return 1; + + int valueStart = eqPos + 1; + while (valueStart < tagAttributes.Length && tagAttributes[valueStart] is ' ' or '"' or '\'') + valueStart++; + + int valueEnd = valueStart; + while (valueEnd < tagAttributes.Length && char.IsDigit(tagAttributes[valueEnd])) + valueEnd++; + + if (valueEnd == valueStart) return 1; + + return int.TryParse(tagAttributes[valueStart..valueEnd], out int span) && span >= 1 ? span : 1; + } + private static string CleanHtmlCellContent(string html) { if (string.IsNullOrEmpty(html)) diff --git a/Text-Grab/Utilities/OcrUtilities.cs b/Text-Grab/Utilities/OcrUtilities.cs index aa14648d..4b88756d 100644 --- a/Text-Grab/Utilities/OcrUtilities.cs +++ b/Text-Grab/Utilities/OcrUtilities.cs @@ -544,7 +544,7 @@ public static async Task OcrAbsoluteFilePathAsync(string absolutePath, I if (IoUtilities.IsPdfFileExtension(Path.GetExtension(absolutePath))) { - PdfDocumentRenderer pdfDocument = await PdfDocumentRenderer.LoadAsync(absolutePath); + using PdfDocumentRenderer pdfDocument = await PdfDocumentRenderer.LoadAsync(absolutePath); return await pdfDocument.ExtractTextAsync(language); } @@ -666,7 +666,7 @@ public static async Task OcrFile(string path, ILanguage? selectedLanguag { if (IoUtilities.IsPdfFileExtension(Path.GetExtension(path))) { - PdfDocumentRenderer pdfDocument = await PdfDocumentRenderer.LoadAsync(path); + using PdfDocumentRenderer pdfDocument = await PdfDocumentRenderer.LoadAsync(path); ocrText = await pdfDocument.ExtractTextAsync(selectedLanguage, grabTemplate); } else diff --git a/Text-Grab/Utilities/PdfDocumentRenderer.cs b/Text-Grab/Utilities/PdfDocumentRenderer.cs index c5bbb47a..74127ff7 100644 --- a/Text-Grab/Utilities/PdfDocumentRenderer.cs +++ b/Text-Grab/Utilities/PdfDocumentRenderer.cs @@ -66,9 +66,11 @@ public PdfPageTextLine(Windows.Foundation.Rect sourceRect, string text, bool isN internal sealed class PdfDocumentRenderer : IDisposable { private const double DefaultRenderScale = 2.0; + private const int MaxCachedPages = 10; private readonly WinPdfDocument renderDocument; private readonly PigPdfDocument textDocument; private readonly Dictionary pageCache = []; + private readonly LinkedList cacheOrder = new(); private PdfDocumentRenderer(string filePath, WinPdfDocument renderDocument, PigPdfDocument textDocument) { @@ -123,7 +125,11 @@ public async Task GetPageContentAsync(int pageIndex) ValidatePageIndex(pageIndex); if (pageCache.TryGetValue(pageIndex, out PdfPageContent? cachedPage)) + { + cacheOrder.Remove(pageIndex); + cacheOrder.AddLast(pageIndex); return cachedPage; + } WinPdfPage renderPage = renderDocument.GetPage((uint)pageIndex); try @@ -135,7 +141,15 @@ public async Task GetPageContentAsync(int pageIndex) List imageRegions = ExtractImageRegions(textPage, renderedPage.PixelWidth, renderedPage.PixelHeight); PdfPageContent pageContent = new(pageIndex, renderedPage, nativeLines, imageRegions); + + if (pageCache.Count >= MaxCachedPages && cacheOrder.First is LinkedListNode oldest) + { + pageCache.Remove(oldest.Value); + cacheOrder.RemoveFirst(); + } + pageCache[pageIndex] = pageContent; + cacheOrder.AddLast(pageIndex); return pageContent; } finally @@ -159,7 +173,8 @@ public async Task> GetSelectableLinesAsync(int pa IReadOnlyList imageOcrLines = await GetOcrLinesAsync( pageContent.RenderedPage, resolvedLanguage, - sourceRect => ShouldIncludeOcrLine(sourceRect, pageContent.ImageRegions)); + sourceRect => ShouldIncludeOcrLine(sourceRect, pageContent.ImageRegions) + && !ShouldIncludeOcrLine(sourceRect, pageContent.NativeLines.Select(l => l.SourceRect).ToList())); combinedLines.AddRange(imageOcrLines); return SortLines(combinedLines); diff --git a/Text-Grab/Views/EditTextWindow.xaml.cs b/Text-Grab/Views/EditTextWindow.xaml.cs index e6142772..972b12f2 100644 --- a/Text-Grab/Views/EditTextWindow.xaml.cs +++ b/Text-Grab/Views/EditTextWindow.xaml.cs @@ -2465,6 +2465,7 @@ public List SearchSpreadsheetCells(Regex pattern) { RowIndex = row, ColumnIndex = col, + Index = m.Index, Text = m.Value.MakeStringSingleLine(), PreviewLeft = cellValue[previewStart..m.Index], PreviewRight = cellValue[(m.Index + m.Length)..previewEnd], @@ -2496,7 +2497,12 @@ public void ReplaceInSpreadsheetCells( int row = g.Key.Item1, col = g.Key.Item2; string oldValue = row < tableDocument.Rows.Count && col < tableDocument.Rows[row].Count ? tableDocument.Rows[row][col] ?? string.Empty : string.Empty; - return (RowIndex: row, ColumnIndex: col, Value: pattern.Replace(oldValue, replaceWith)); + + HashSet indicesToReplace = [.. g.Select(r => r.Index)]; + string newValue = pattern.Replace(oldValue, m => + indicesToReplace.Contains(m.Index) ? m.Result(replaceWith) : m.Value); + + return (RowIndex: row, ColumnIndex: col, Value: newValue); }); SetSpreadsheetDocumentCellValues(tableDocument, updates); diff --git a/Text-Grab/Views/GrabFrame.xaml.cs b/Text-Grab/Views/GrabFrame.xaml.cs index a18f4226..15bd1eae 100644 --- a/Text-Grab/Views/GrabFrame.xaml.cs +++ b/Text-Grab/Views/GrabFrame.xaml.cs @@ -98,6 +98,7 @@ public partial class GrabFrame : Window private int translatedWordsCount = 0; private CancellationTokenSource? translationCancellationTokenSource; private readonly List pdfTextLineOverlays = []; + private CancellationTokenSource? _pdfPageNavCts; private const string TargetLanguageMenuHeader = "Target Language"; #endregion Fields @@ -552,6 +553,9 @@ private void ShowFrameMessage(string message) private void ClearLoadedPdfDocument() { + _pdfPageNavCts?.Cancel(); + _pdfPageNavCts?.Dispose(); + _pdfPageNavCts = null; _loadedPdfDocument?.Dispose(); _loadedPdfDocument = null; _currentPdfPageContent = null; @@ -579,26 +583,42 @@ private async Task ShowPdfPageAsync(int pageIndex) if (_loadedPdfDocument is null) return; - reDrawTimer.Stop(); - ResetGrabFrame(); - await Task.Delay(300); + CancellationTokenSource? previousCts = _pdfPageNavCts; + _pdfPageNavCts = new CancellationTokenSource(); + CancellationToken ct = _pdfPageNavCts.Token; + previousCts?.Cancel(); + previousCts?.Dispose(); - _currentPdfPageContent = await _loadedPdfDocument.GetPageContentAsync(pageIndex); - frameContentImageSource = _currentPdfPageContent.RenderedPage; - hasLoadedImageSource = true; - isStaticImageSource = true; - frozenUiAutomationSnapshot = null; - liveUiAutomationSnapshot = null; - _currentImagePath = _loadedPdfDocument.FilePath; - _currentPdfPageIndex = pageIndex; - FreezeToggleButton.IsChecked = true; - FreezeGrabFrame(); - MainZoomBorder.CanZoom = true; - FreezeToggleButton.Visibility = Visibility.Collapsed; - UpdatePdfPageNavigation(); - SwitchToOcrFallbackIfUiAutomation(); + try + { + reDrawTimer.Stop(); + ResetGrabFrame(); + await Task.Delay(300, ct); - reDrawTimer.Start(); + if (_loadedPdfDocument is null || ct.IsCancellationRequested) + return; + + _currentPdfPageContent = await _loadedPdfDocument.GetPageContentAsync(pageIndex); + frameContentImageSource = _currentPdfPageContent.RenderedPage; + hasLoadedImageSource = true; + isStaticImageSource = true; + frozenUiAutomationSnapshot = null; + liveUiAutomationSnapshot = null; + _currentImagePath = _loadedPdfDocument.FilePath; + _currentPdfPageIndex = pageIndex; + FreezeToggleButton.IsChecked = true; + FreezeGrabFrame(); + MainZoomBorder.CanZoom = true; + FreezeToggleButton.Visibility = Visibility.Collapsed; + UpdatePdfPageNavigation(); + SwitchToOcrFallbackIfUiAutomation(); + + reDrawTimer.Start(); + } + catch (OperationCanceledException) + { + // Navigation superseded by a newer request — no-op + } } private void UpdatePdfPageNavigation() @@ -2218,6 +2238,7 @@ private void GrabFrameWindow_Closing(object sender, System.ComponentModel.Cancel Singleton.Instance.SaveToHistory(this); historyItem?.ClearTransientImage(); + ClearLoadedPdfDocument(); FrameText = ""; wordBorders.Clear(); @@ -3585,6 +3606,7 @@ private async Task TryLoadPdfFromPath(string path) { try { + ClearLoadedPdfDocument(); _loadedPdfDocument = await PdfDocumentRenderer.LoadAsync(path); _currentImagePath = Path.GetFullPath(path); await ShowPdfPageAsync(0); From 5f4772acab45b823464feff44e9d3455def7e13b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 2 May 2026 15:28:17 +0000 Subject: [PATCH 20/22] Optimize: materialize native rects once before OCR predicate Agent-Logs-Url: https://github.com/TheJoeFin/Text-Grab/sessions/c5acdc4a-84dd-4662-b5d3-335886f0b193 Co-authored-by: TheJoeFin <7809853+TheJoeFin@users.noreply.github.com> --- Text-Grab/Utilities/PdfDocumentRenderer.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Text-Grab/Utilities/PdfDocumentRenderer.cs b/Text-Grab/Utilities/PdfDocumentRenderer.cs index 74127ff7..6e27cd11 100644 --- a/Text-Grab/Utilities/PdfDocumentRenderer.cs +++ b/Text-Grab/Utilities/PdfDocumentRenderer.cs @@ -170,11 +170,12 @@ public async Task> GetSelectableLinesAsync(int pa return pageContent.NativeLines; List combinedLines = [.. pageContent.NativeLines]; + IReadOnlyList nativeRects = [.. pageContent.NativeLines.Select(l => l.SourceRect)]; IReadOnlyList imageOcrLines = await GetOcrLinesAsync( pageContent.RenderedPage, resolvedLanguage, sourceRect => ShouldIncludeOcrLine(sourceRect, pageContent.ImageRegions) - && !ShouldIncludeOcrLine(sourceRect, pageContent.NativeLines.Select(l => l.SourceRect).ToList())); + && !ShouldIncludeOcrLine(sourceRect, nativeRects)); combinedLines.AddRange(imageOcrLines); return SortLines(combinedLines); From d1cabc51bb3270309bd2d4fa2b20dc2ab89c93c7 Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Sat, 2 May 2026 10:43:19 -0500 Subject: [PATCH 21/22] Support PDFs in folder OCR, improve menu usability Expanded file filtering to include PDFs for OCR, updated UI messages to reference "files" instead of just "images," clarified variable typing in spreadsheet undo logic, and set StaysOpenOnClick for relevant menu items to enhance user experience. --- Text-Grab/Views/EditTextWindow.xaml.cs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/Text-Grab/Views/EditTextWindow.xaml.cs b/Text-Grab/Views/EditTextWindow.xaml.cs index 972b12f2..eb72d0f8 100644 --- a/Text-Grab/Views/EditTextWindow.xaml.cs +++ b/Text-Grab/Views/EditTextWindow.xaml.cs @@ -259,11 +259,11 @@ public async Task OcrAllImagesInFolder(string folderPath, OcrDirectoryOptions op if (files is null) return; - List imageFiles = [.. files.Where(x => IoUtilities.ImageExtensions.Contains(Path.GetExtension(x).ToLower()))]; + List imageFiles = [.. files.Where(x => IoUtilities.IsVisualDocumentFileExtension(Path.GetExtension(x).ToLower()))]; if (imageFiles.Count == 0) { - PassedTextControl.AppendText($"{folderPath} contains no images"); + PassedTextControl.AppendText($"{folderPath} contains no images or PDFs"); return; } @@ -293,7 +293,7 @@ public async Task OcrAllImagesInFolder(string folderPath, OcrDirectoryOptions op { PassedTextControl.AppendText(folderPath); PassedTextControl.AppendText(Environment.NewLine); - PassedTextControl.AppendText($"{imageFiles.Count} images found"); + PassedTextControl.AppendText($"{imageFiles.Count} files found"); if (!string.IsNullOrEmpty(tesseractLanguageTag)) { @@ -342,14 +342,14 @@ public async Task OcrAllImagesInFolder(string folderPath, OcrDirectoryOptions op if (options.OutputFooter) { PassedTextControl.AppendText(Environment.NewLine); - PassedTextControl.AppendText($"----- COMPLETED OCR OF {imageFiles.Count} images"); + PassedTextControl.AppendText($"----- COMPLETED OCR OF {imageFiles.Count} files"); } } catch (OperationCanceledException) { PassedTextControl.AppendText(Environment.NewLine); int countCompleted = ocrFileResults.Where(r => r.OcrResult is not null).Count(); - PassedTextControl.AppendText($"----- CANCELLED OCR OF {ocrFileResults.Count - countCompleted}, Completed {countCompleted} images"); + PassedTextControl.AppendText($"----- CANCELLED OCR OF {ocrFileResults.Count - countCompleted}, Completed {countCompleted} files"); } finally { @@ -2489,7 +2489,7 @@ public void ReplaceInSpreadsheetCells( SpreadsheetUndoState? beforeState = CreateCurrentSpreadsheetUndoState(syncFromTable: false); - var updates = targets + IEnumerable<(int RowIndex, int ColumnIndex, string Value)> updates = targets .Where(r => r.RowIndex.HasValue && r.ColumnIndex.HasValue) .GroupBy(r => (r.RowIndex!.Value, r.ColumnIndex!.Value)) .Select(g => @@ -2883,6 +2883,7 @@ private void LoadGrabTemplateMenuItems(MenuItem grabTemplateMenuItem) Header = "(None)", IsCheckable = true, IsChecked = previouslySelected is null, + StaysOpenOnClick = true, }; noneItem.Click += GrabTemplateMenuItem_Click; grabTemplateMenuItem.Items.Add(noneItem); @@ -2895,6 +2896,7 @@ private void LoadGrabTemplateMenuItems(MenuItem grabTemplateMenuItem) IsCheckable = true, IsChecked = previouslySelected?.Id == template.Id, Tag = template, + StaysOpenOnClick = true, }; templateMenuItem.Click += GrabTemplateMenuItem_Click; grabTemplateMenuItem.Items.Add(templateMenuItem); @@ -2995,6 +2997,7 @@ private async void LoadLanguageMenuItems(MenuItem captureMenuItem) Tag = language, IsCheckable = true, IsChecked = i == selectedIndex, + StaysOpenOnClick = true, }; languageMenuItem.Click += LanguageMenuItem_Click; captureMenuItem.Items.Add(languageMenuItem); From fceb621ff75af1f85c71ff7a42f6ce67f5491d0d Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Mon, 4 May 2026 23:54:10 -0500 Subject: [PATCH 22/22] Add Cut/Copy/Paste support to spreadsheet editor Replaced custom Copy with standard Cut/Copy/Paste commands in the spreadsheet context menu. Added command bindings and keyboard shortcut handling for these actions. Implemented TryCutSpreadsheetCellValues for cut logic with clipboard failure detection. Updated clipboard helper to return success status. Added unit tests for cut scenarios. --- Tests/EditTextWindowSpreadsheetTests.cs | 59 +++++++++++++ Text-Grab/Views/EditTextWindow.xaml | 5 +- Text-Grab/Views/EditTextWindow.xaml.cs | 112 +++++++++++++++++++++--- 3 files changed, 165 insertions(+), 11 deletions(-) diff --git a/Tests/EditTextWindowSpreadsheetTests.cs b/Tests/EditTextWindowSpreadsheetTests.cs index 89eadb6b..c8f96203 100644 --- a/Tests/EditTextWindowSpreadsheetTests.cs +++ b/Tests/EditTextWindowSpreadsheetTests.cs @@ -35,6 +35,65 @@ public void ClearSpreadsheetCellValues_ClearsOnlyRequestedCells() Assert.Equal(string.Empty, dataTable.Rows[1][2]); } + [Fact] + public void TryCutSpreadsheetCellValues_CopiesThenClearsRequestedCells() + { + DataTable dataTable = new(); + dataTable.Columns.Add("A", typeof(string)); + dataTable.Columns.Add("B", typeof(string)); + dataTable.Columns.Add("C", typeof(string)); + dataTable.Rows.Add("a1", "b1", "c1"); + dataTable.Rows.Add("a2", "b2", "c2"); + + string clipboardText = string.Empty; + + bool didCut = EditTextWindow.TryCutSpreadsheetCellValues( + dataTable, + [ + (1, 2), + (0, 1), + (1, 0), + (0, 1), + (-1, 0), + (5, 5) + ], + text => + { + clipboardText = text; + return true; + }); + + Assert.True(didCut); + Assert.Equal("b1" + Environment.NewLine + "a2\tc2", clipboardText); + Assert.Equal("a1", dataTable.Rows[0][0]); + Assert.Equal(string.Empty, dataTable.Rows[0][1]); + Assert.Equal("c1", dataTable.Rows[0][2]); + Assert.Equal(string.Empty, dataTable.Rows[1][0]); + Assert.Equal("b2", dataTable.Rows[1][1]); + Assert.Equal(string.Empty, dataTable.Rows[1][2]); + } + + [Fact] + public void TryCutSpreadsheetCellValues_DoesNotClearWhenClipboardCopyFails() + { + DataTable dataTable = new(); + dataTable.Columns.Add("A", typeof(string)); + dataTable.Columns.Add("B", typeof(string)); + dataTable.Rows.Add("a1", "b1"); + + bool didCut = EditTextWindow.TryCutSpreadsheetCellValues( + dataTable, + [ + (0, 0), + (0, 1) + ], + _ => false); + + Assert.False(didCut); + Assert.Equal("a1", dataTable.Rows[0][0]); + Assert.Equal("b1", dataTable.Rows[0][1]); + } + [Fact] public void BuildSpreadsheetSelectionText_IncludesOnlySelectedCells() { diff --git a/Text-Grab/Views/EditTextWindow.xaml b/Text-Grab/Views/EditTextWindow.xaml index 69d60201..1172eee6 100644 --- a/Text-Grab/Views/EditTextWindow.xaml +++ b/Text-Grab/Views/EditTextWindow.xaml @@ -83,7 +83,10 @@ - + + + + diff --git a/Text-Grab/Views/EditTextWindow.xaml.cs b/Text-Grab/Views/EditTextWindow.xaml.cs index eb72d0f8..dbcc96d3 100644 --- a/Text-Grab/Views/EditTextWindow.xaml.cs +++ b/Text-Grab/Views/EditTextWindow.xaml.cs @@ -598,15 +598,7 @@ private void CopySpreadsheetRowsMenuItem_Click(object sender, RoutedEventArgs e) private void CopySpreadsheetSelectionMenuItem_Click(object sender, RoutedEventArgs e) { - List<(int RowIndex, int ColumnIndex)> selectedCellCoordinates = GetSelectedSpreadsheetCellCoordinates(); - if (selectedCellCoordinates.Count == 0) - return; - - string selectionText = BuildSpreadsheetSelectionText(spreadsheetTable, selectedCellCoordinates); - if (string.IsNullOrEmpty(selectionText)) - return; - - TrySetClipboardText(selectionText); + _ = TryCopySpreadsheetSelectionToClipboard(GetSelectedSpreadsheetCellCoordinates()); } private void AddSpreadsheetColumnMenuItem_Click(object sender, RoutedEventArgs e) @@ -1106,6 +1098,15 @@ private void SpreadsheetDataGrid_PreviewKeyDown(object sender, System.Windows.In return; } + if (e.Key == Key.X + && (Keyboard.IsKeyDown(Key.LeftCtrl) || Keyboard.IsKeyDown(Key.RightCtrl)) + && !IsSpreadsheetCellEditorFocused()) + { + e.Handled = true; + _ = TryCutSelectedSpreadsheetCellValues(); + return; + } + if (e.Key == Key.V && (Keyboard.IsKeyDown(Key.LeftCtrl) || Keyboard.IsKeyDown(Key.RightCtrl)) && !IsSpreadsheetCellEditorFocused()) @@ -1233,6 +1234,23 @@ internal static void ClearSpreadsheetCellValues(DataTable dataTable, IEnumerable } } + internal static bool TryCutSpreadsheetCellValues( + DataTable dataTable, + IEnumerable<(int RowIndex, int ColumnIndex)> cellCoordinates, + Func trySetClipboardText) + { + ArgumentNullException.ThrowIfNull(dataTable); + ArgumentNullException.ThrowIfNull(cellCoordinates); + ArgumentNullException.ThrowIfNull(trySetClipboardText); + + string selectionText = BuildSpreadsheetSelectionText(dataTable, cellCoordinates); + if (string.IsNullOrEmpty(selectionText) || !trySetClipboardText(selectionText)) + return false; + + ClearSpreadsheetCellValues(dataTable, cellCoordinates); + return true; + } + private void PasteIntoSpreadsheet() { string clipboardText; @@ -1512,6 +1530,24 @@ private void SpreadsheetUndoCanExecute(object sender, CanExecuteRoutedEventArgs e.Handled = true; } + private void SpreadsheetCopyCanExecute(object sender, CanExecuteRoutedEventArgs e) + { + if (editorMode != EtwEditorMode.Spreadsheet || IsSpreadsheetCellEditorFocused()) + return; + + e.CanExecute = GetSelectedSpreadsheetCellCoordinates().Count > 0; + e.Handled = true; + } + + private void SpreadsheetPasteCanExecute(object sender, CanExecuteRoutedEventArgs e) + { + if (editorMode != EtwEditorMode.Spreadsheet || IsSpreadsheetCellEditorFocused()) + return; + + e.CanExecute = true; + e.Handled = true; + } + private void SpreadsheetRedoCanExecute(object sender, CanExecuteRoutedEventArgs e) { if (editorMode != EtwEditorMode.Spreadsheet || IsSpreadsheetCellEditorFocused()) @@ -1551,6 +1587,33 @@ private void SpreadsheetRedoExecuted(object sender, ExecutedRoutedEventArgs e) e.Handled = true; } + private void SpreadsheetCopyExecuted(object sender, ExecutedRoutedEventArgs e) + { + if (editorMode != EtwEditorMode.Spreadsheet || IsSpreadsheetCellEditorFocused()) + return; + + _ = TryCopySpreadsheetSelectionToClipboard(GetSelectedSpreadsheetCellCoordinates()); + e.Handled = true; + } + + private void SpreadsheetCutExecuted(object sender, ExecutedRoutedEventArgs e) + { + if (editorMode != EtwEditorMode.Spreadsheet || IsSpreadsheetCellEditorFocused()) + return; + + _ = TryCutSelectedSpreadsheetCellValues(); + e.Handled = true; + } + + private void SpreadsheetPasteExecuted(object sender, ExecutedRoutedEventArgs e) + { + if (editorMode != EtwEditorMode.Spreadsheet || IsSpreadsheetCellEditorFocused()) + return; + + PasteIntoSpreadsheet(); + e.Handled = true; + } + private bool IsSpreadsheetCellEditorFocused() { if (Keyboard.FocusedElement is not DependencyObject focusedElement) @@ -1711,17 +1774,43 @@ private void TrackSpreadsheetColumnWidth(DataGridColumn column) DependencyPropertyDescriptor.FromProperty(DataGridColumn.WidthProperty, typeof(DataGridColumn))?.AddValueChanged(column, SpreadsheetColumnWidthChanged); } - private void TrySetClipboardText(string text) + private bool TrySetClipboardText(string text) { try { System.Windows.Clipboard.SetDataObject(text, true); + return true; } catch { + return false; } } + private bool TryCopySpreadsheetSelectionToClipboard(IEnumerable<(int RowIndex, int ColumnIndex)> cellCoordinates) + { + string selectionText = BuildSpreadsheetSelectionText(spreadsheetTable, cellCoordinates); + return !string.IsNullOrEmpty(selectionText) && TrySetClipboardText(selectionText); + } + + private bool TryCutSelectedSpreadsheetCellValues() + { + List<(int RowIndex, int ColumnIndex)> selectedCellCoordinates = GetSelectedSpreadsheetCellCoordinates(); + if (selectedCellCoordinates.Count == 0) + return false; + + CommitSpreadsheetEditsAndCapturePendingHistory(); + SpreadsheetUndoState? beforeChange = CreateCurrentSpreadsheetUndoState(syncFromTable: true); + + if (!TryCutSpreadsheetCellValues(spreadsheetTable, selectedCellCoordinates, TrySetClipboardText)) + return false; + + SyncSpreadsheetDocumentFromTable(); + RecordSpreadsheetUndoChange(beforeChange, CreateCurrentSpreadsheetUndoState(syncFromTable: false)); + UpdateLineAndColumnText(); + return true; + } + private void UpdateSpreadsheetModeUi() { bool isSpreadsheetMode = editorMode == EtwEditorMode.Spreadsheet; @@ -4100,6 +4189,9 @@ private void SetupRoutedCommands() { _ = CommandBindings.Add(new CommandBinding(ApplicationCommands.Undo, SpreadsheetUndoExecuted, SpreadsheetUndoCanExecute)); _ = CommandBindings.Add(new CommandBinding(ApplicationCommands.Redo, SpreadsheetRedoExecuted, SpreadsheetRedoCanExecute)); + _ = CommandBindings.Add(new CommandBinding(ApplicationCommands.Cut, SpreadsheetCutExecuted, SpreadsheetCopyCanExecute)); + _ = CommandBindings.Add(new CommandBinding(ApplicationCommands.Copy, SpreadsheetCopyExecuted, SpreadsheetCopyCanExecute)); + _ = CommandBindings.Add(new CommandBinding(ApplicationCommands.Paste, SpreadsheetPasteExecuted, SpreadsheetPasteCanExecute)); RoutedCommand newFullscreenGrab = new(); _ = newFullscreenGrab.InputGestures.Add(new KeyGesture(Key.F, ModifierKeys.Control));