diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 00000000..c1c50921 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,61 @@ +name: Deploy Sphinx Docs to GitHub Pages + +on: + push: + branches: + - main # Replace 'main' with your default branch if needed + +permissions: + id-token: write # Grant the necessary permissions for the deploy-pages action + contents: write # Ensure content write access for deployment + pages: write # Allow deployment to GitHub Page + +jobs: + build: + runs-on: windows-latest + + steps: + - name: Checkout + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 # v4.2.1 + + # Set up Python + - name: Set up Python + uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # v4.7.0 + with: + python-version: "3.10" + + # Install dependencies and sphinx + - name: Install Dependencies + run: | + pip install sphinx sphinx-rtd-theme + + # Install requirements for documentation building + - name: Install Documentation Requirements + run: | + pip install -r client_requirements_nvidia.txt + + # Build the Sphinx documentation + - name: Build Sphinx Documentation + run: | + cd ./docs + sphinx-build -b html ./ ./_build/html + + - name: Setup Pages + uses: actions/configure-pages@983d7736d9b0ae728b81ab479565c72886d7745b # v5.0 + + # Create a tarball of the built documentation + - name: Zip artifact + run: | + tar -czvf html.tar.gz ./docs/_build/html + + - name: Upload artifact + uses: actions/upload-pages-artifact@0252fc4ba7626f0298f0cf00902a25c6afc77fa8 # v3.0 + with: + # Upload entire repository + path: "./docs/_build/html" + + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@d6db90164ac5ed86f2b6aed7e0febac5b3c0c03e #v4.0.5 + with: + token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 31458160..9d4fc7b7 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -15,6 +15,12 @@ jobs: - name: Checkout uses: actions/checkout@v1 + - name: Create Version Text File for PyInstaller + run: | + $tag = '${{ github.ref }}' -replace 'refs/tags/', '' + echo $tag > .\scripts\__version__ + shell: pwsh + - name: Install Python uses: actions/setup-python@v1 with: @@ -24,7 +30,14 @@ jobs: # Create CUDA-enabled executable - name: Install CUDA-enabled llama_cpp run: | - pip install --index-url https://abetlen.github.io/llama-cpp-python/whl/cu121 --extra-index-url https://pypi.org/simple llama-cpp-python==v0.2.90 + pip install --index-url https://abetlen.github.io/llama-cpp-python/whl/cu124 --extra-index-url https://pypi.org/simple llama-cpp-python==v0.2.90 + + - name: Instal CUDA drivers for NVIDIA install + run: | + pip install nvidia-cudnn-cu12==9.5.0.50 + pip install nvidia-cuda-runtime-cu12==12.4.127 + pip install nvidia-cuda-nvrtc-cu12==12.4.127 + pip install nvidia-cublas-cu12==12.4.5.8 - name: Install requirements run: | @@ -32,11 +45,15 @@ jobs: - name: Run PyInstaller for NVIDIA run: | - pyinstaller --additional-hooks-dir=.\scripts\hooks --add-data ".\scripts\NVIDIA_INSTALL.txt:install_state" --add-data ".\src\FreeScribe.client\whisper-assets:whisper\assets" --add-data ".\src\FreeScribe.client\markdown:markdown" --add-data ".\src\FreeScribe.client\assets:assets" --name freescribe-client-nvidia --icon=.\src\FreeScribe.client\assets\logo.ico --noconsole .\src\FreeScribe.client\client.py + pyinstaller --additional-hooks-dir=.\scripts\hooks --add-data ".\scripts\NVIDIA_INSTALL.txt:install_state" --add-data ".\src\FreeScribe.client\whisper-assets:whisper\assets" --add-data ".\src\FreeScribe.client\markdown:markdown" --add-data ".\src\FreeScribe.client\assets:assets" --add-data "C:\hostedtoolcache\windows\Python\3.10.11\x64\lib\site-packages\nvidia:nvidia-drivers" --name freescribe-client-nvidia --icon=.\src\FreeScribe.client\assets\logo.ico --noconsole .\src\FreeScribe.client\client.py # Create CPU-only executable - name: Uninstall CUDA-enabled llama_cpp (if necessary) and install CPU-only llama_cpp run: | + pip uninstall nvidia-cudnn-cu12==9.5.0.50 + pip uninstall nvidia-cuda-runtime-cu12==12.4.127 + pip uninstall nvidia-cuda-nvrtc-cu12==12.4.127 + pip uninstall nvidia-cublas-cu12==12.4.5.8 pip uninstall -y llama-cpp-python pip install --index-url https://abetlen.github.io/llama-cpp-python/whl/cpu --extra-index-url https://pypi.org/simple llama-cpp-python==v0.2.90 diff --git a/.gitignore b/.gitignore index 9e7a7014..bcb6b407 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +.idea/ +__version__ + aiscribe.txt aiscribe2.txt settings.txt @@ -18,3 +21,5 @@ freescribe-client.spec freescribe-client-cpu.spec freescribe-client-nvidia.spec scripts/FreeScribeInstaller.exe + +_build diff --git a/LICENSE.txt b/LICENSE.txt index f288702d..29ebfa54 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,5 +1,5 @@ - GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 + GNU AFFERO GENERAL PUBLIC LICENSE + Version 3, 19 November 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies @@ -7,17 +7,15 @@ Preamble - The GNU General Public License is a free, copyleft license for -software and other kinds of works. + The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, -the GNU General Public License is intended to guarantee your freedom to +our General Public Licenses are intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free -software for all its users. We, the Free Software Foundation, use the -GNU General Public License for most of our software; it applies also to -any other work released this way by its authors. You can apply it to -your programs, too. +software for all its users. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you @@ -26,44 +24,34 @@ them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. - To protect your rights, we need to prevent others from denying you -these rights or asking you to surrender the rights. Therefore, you have -certain responsibilities if you distribute copies of the software, or if -you modify it: responsibilities to respect the freedom of others. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must pass on to the recipients the same -freedoms that you received. You must make sure that they, too, receive -or can get the source code. And you must show them these terms so they -know their rights. - - Developers that use the GNU GPL protect your rights with two steps: -(1) assert copyright on the software, and (2) offer you this License -giving you legal permission to copy, distribute and/or modify it. - - For the developers' and authors' protection, the GPL clearly explains -that there is no warranty for this free software. For both users' and -authors' sake, the GPL requires that modified versions be marked as -changed, so that their problems will not be attributed erroneously to -authors of previous versions. - - Some devices are designed to deny users access to install or run -modified versions of the software inside them, although the manufacturer -can do so. This is fundamentally incompatible with the aim of -protecting users' freedom to change the software. The systematic -pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we -have designed this version of the GPL to prohibit the practice for those -products. If such problems arise substantially in other domains, we -stand ready to extend this provision to those domains in future versions -of the GPL, as needed to protect the freedom of users. - - Finally, every program is threatened constantly by software patents. -States should not allow patents to restrict development and use of -software on general-purpose computers, but in those that do, we wish to -avoid the special danger that patents applied to a free program could -make it effectively proprietary. To prevent this, the GPL assures that -patents cannot be used to render the program non-free. + Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + + A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + + The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + + An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. The precise terms and conditions for copying, distribution and modification follow. @@ -72,7 +60,7 @@ modification follow. 0. Definitions. - "This License" refers to version 3 of the GNU General Public License. + "This License" refers to version 3 of the GNU Affero General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. @@ -549,35 +537,45 @@ to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. - 13. Use with the GNU Affero General Public License. + 13. Remote Network Interaction; Use with the GNU General Public License. + + Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed -under version 3 of the GNU Affero General Public License into a single +under version 3 of the GNU General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, -but the special requirements of the GNU Affero General Public License, -section 13, concerning interaction through a network will apply to the -combination as such. +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of -the GNU General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU General +Program specifies that a certain numbered version of the GNU Affero General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the -GNU General Public License, you may choose any version ever published +GNU Affero General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future -versions of the GNU General Public License can be used, that proxy's +versions of the GNU Affero General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. @@ -635,40 +633,29 @@ the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or + it under the terms of the GNU Affero General Public License as published + by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + GNU Affero General Public License for more details. - You should have received a copy of the GNU General Public License + You should have received a copy of the GNU Affero General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - Copyright (C) - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an "about box". + If your software can interact with users remotely through a computer +network, you should also make sure that it provides a way for users to +get its source. For example, if your program is a web application, its +interface could display a "Source" link that leads users to an archive +of the code. There are many ways you could offer source, and different +solutions will be better for different programs; see section 13 for the +specific requirements. You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. +For more information on this, and how to apply and follow the GNU AGPL, see +. \ No newline at end of file diff --git a/README.md b/README.md index 9d7ee112..d7174144 100644 --- a/README.md +++ b/README.md @@ -1,80 +1,46 @@ -# AI-Scribe +# FreeScribe ## Introduction -> This is a script that I worked on to help empower physicians to alleviate the burden of documentation by utilizing a medical scribe to create SOAP notes. Expensive solutions could potentially share personal health information with their cloud-based operations. It utilizes `Koboldcpp` and `Whisper` on a local server that is concurrently running the `Server.py` script. The `Client.py` script can then be used by physicians on their device to record patient-physician conversations after a signed consent is obtained and process the result into a SOAP note. -> -> Regards, -> -> Braedon Hendy +This is a application maintained extension of Dr. Braedon Hendy's AI-Scribe python script. It is maintained by the ClinicianFOCUS team at the Conestoga College SMART Center. The goal of this project is to have a easy to install Medical Scribe application. This application can run locally on your machine (No potential share of personal health data) or can connect to a Large Language Model (LLM) and Whisper (Speech2Text) Server on your network or to a remote one like ChatGPT. To download head over to our latest [releases](https://github.com/ClinicianFOCUS/FreeScribe/releases). -## Demo -[Youtube Demo](https://www.youtube.com/watch?v=w8kUB8Y3A30) +Please note this application is still in alpha state. Feel free to contribute, connect, or inquire in our discord where majority of project communications occur. https://discord.gg/zpQTGVEVbH -## Changelog +### Note from the original creator and active contributor Dr. Braedon Hendy: -- **2024-03-17** - updated `client.py` to allow for `OpenAI` token access when `GPT` button is selected. A prompt will show to allow for scrubbing of any personal health information. -- **2024-03-28** - updated `client.py` to allow for `Whisper` to run locally when set to `True` in the settings. -- **2024-03-29** - added `Scrubadub` to be used to remove personal information prior to `OpenAI` token access. -- **2024-04-26** - added alternative server file to use `Faster-Whisper` -- **2024-05-03** - added alternative server file to use `WhisperX` -- **2024-05-06** - added real-time `Whisper` processing -- **2024-05-13** - added `SSL` and OHIP scrubbing -- **2024-05-14** - `GPT` model selection -- **2024-06-01** - template options and further fine-tuning for local and remote real-time `Whisper` +> This is a script that I worked on to help empower physicians to alleviate the burden of documentation by utilizing a medical scribe to create SOAP notes. Expensive solutions could potentially share personal health information with their cloud-based operations. The application can then be used by physicians on their device to record patient-physician conversations after a signed consent is obtained and process the result into a SOAP note. +> +> Regards, +> Braedon Hendy ## Setup on a Local Machine -Example instructions for running on a single machine: - -I will preface that this will run slowly if you are not using a GPU but will demonstrate the capability. - -Install `Python` `3.10.9` [HERE](https://www.python.org/downloads/release/python-3109/). (if the hyperlink doesn't work https://www.python.org/downloads/release/python-3109/). Make sure you click the checkbox to select "`Add Python to Path`". - -Next, you need to install software to convert the audio file to be processed. Press `Windows key` + `R`, you can run the command line by typing `powershell`. Copy/type the following: - -```powershell -Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser -Invoke-RestMethod -Uri https://get.scoop.sh | Invoke-Expression -scoop install ffmpeg -``` - -If this was successful, you need to download the files that I wrote [HERE](https://github.com/1984Doc/AI-Scribe). Unzip the files (if the hyperlink doesn't work https://github.com/1984Doc/AI-Scribe). - -Run the `client.py` (it may prompt for installation of various dependencies via `pip`) - -I would recommend using the `GPT` option using an `API key`. The cost for running each model may determine the overall choice and can be selected in the `Settings` menu of the program. +To run the application on your machine just download the latest [release](https://github.com/ClinicianFOCUS/FreeScribe/releases), run the installer, and begin to use. The application is configured to run completely locally by default. ## Setup on a Server -Example instructions for running on a server with a GPU: - -Install `Python` `3.10.9` [HERE](https://www.python.org/downloads/release/python-3109/). (if the hyperlink doesn't work https://www.python.org/downloads/release/python-3109/). Make sure you click the checkbox to select "`Add Python to Path`". - -Press `Windows key` + `R`, you can run the command line by typing `cmd`. Copy/type the following, running each line by pressing `Enter`: +If you would like to run the application on a local higher performance server please refer to our other tools. -```sh -pip install openai-whisper -``` +- Local LLM Container: https://github.com/ClinicianFOCUS/local-llm-container +- Local Whisper Container: https://github.com/ClinicianFOCUS/speech2text-container +- All-in-one installer for the tools: https://github.com/ClinicianFOCUS/clinicianfocus-installer -Now you need to download the AI model (it is large). I recommend the `Mistral 7B v0.2` or `Meta Llama 3` models. These can be found on [HuggingFace.](https://huggingface.co/) +# Further Documentation -You now need to launch the AI model with the following software that you can download [HERE](https://github.com/LostRuins/koboldcpp/releases). It will download automatically and you will need to open it (if hyperlink doesn't work https://github.com/LostRuins/koboldcpp/releases). If you have an **NVidia RTX**-based card, the below instructions can be modified using `Koboldcpp.exe` rather than `koboldcpp_nocuda.exe`. +Further documentation can be found [here](https://clinicianfocus.github.io/FreeScribe) (https://clinicianfocus.github.io/FreeScribe). -Once the `Koboldcpp.exe` is opened, click the `Browse` button and select the model downloaded. Now click the `Launch` button. +## Contributing -You should see a window open and can ask it questions to test! +We welcome contributions to the FreeScribe project! To contribute: -If this was successful, you need to download the files that I wrote [HERE](https://github.com/1984Doc/AI-Scribe). Unzip the files (if the hyperlink doesn't work https://github.com/1984Doc/AI-Scribe). +1. Fork the [repository](https://github.com/ClinicianFOCUS/FreeScribe). +2. Create a new branch (`git checkout -b feature/your-feature`). +3. Make your changes and commit them (`git commit -m 'Add some feature'`). +4. Push to the branch (`git push origin feature/your-feature`). +5. Open a pull request. -Run the `server.py` file. This will download the files to help organize the text after converting from audio. +Please ensure your code adheres to our coding standards and includes appropriate tests. -Run the `client.py` file and edit the IP addresses in the `Settings` menu. +# License -# How to run with JanAI -1. Download and install janAI and configure with your LLM of choice. -2. Start the JanAI server. -3. Open the python client applications and set the Model Endpoint to your settings in the JanAI (Typically http://localhost:1337/v1/ by default) -4. Set your model to the one of choice (Gemma 2 2b recommended Model ID: "gemma-2-2b-it") -5. Save the settings -6. Click the KoboldCPP button to enable custom endpoint. +FreeScribes code is under the AGPL-3.0 License. See (LICENSE)[https://github.com/ClinicianFOCUS/FreeScribe/blob/main/LICENSE.txt] for further information. diff --git a/client_requirements.txt b/client_requirements.txt index 86b1bf85..5c9de08d 100644 Binary files a/client_requirements.txt and b/client_requirements.txt differ diff --git a/client_requirements_nvidia.txt b/client_requirements_nvidia.txt index 1be57a16..eee154fb 100644 --- a/client_requirements_nvidia.txt +++ b/client_requirements_nvidia.txt @@ -28,8 +28,6 @@ networkx==3.3 nltk==3.9.1 numba==0.60.0 numpy==1.26.4 -openai==1.50.2 -openai-whisper==20240927 packaging==24.1 pefile==2024.8.26 phonenumbers==8.13.46 @@ -56,6 +54,7 @@ textblob==0.15.3 threadpoolctl==3.5.0 tiktoken==0.7.0 torch==2.2.2 +torchaudio==2.2.2 tqdm==4.66.5 typing_extensions==4.12.2 tzdata==2024.2 @@ -65,3 +64,10 @@ docker==7.1.0 markdown==3.7 tkhtmlview==0.3.1 llama-cpp-python==v0.2.90 +faster-whisper==1.1.0 +nvidia-cudnn-cu12==9.5.0.50 +nvidia-cuda-runtime-cu12==12.4.127 +nvidia-cuda-nvrtc-cu12==12.4.127 +nvidia-cublas-cu12==12.4.5.8 +psutil==6.1.0 +Pillow==10.2.0 diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 00000000..d4bb2cbb --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/_static/.gitkeep b/docs/_static/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/docs/_templates/.gitkeep b/docs/_templates/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 00000000..d0e8a510 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,27 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = 'FreeScribe' +copyright = '2024, ClinicianFOCUS' +author = 'ClinicianFOCUS' + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [] + +templates_path = ['_templates'] +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "sphinx_rtd_theme" +html_static_path = ['_static'] diff --git a/docs/how_to_use.rst b/docs/how_to_use.rst new file mode 100644 index 00000000..c6cbdb16 --- /dev/null +++ b/docs/how_to_use.rst @@ -0,0 +1,91 @@ +How To Use FreeScribe +===================== +.. contents:: + :depth: 2 + +FreeScribe Installation Guide +============================= + +Follow these step-by-step instructions to install FreeScribe on your computer: + +Step 1: License Agreement +-------------------------- + +1. Launch the **FreeScribe Setup** installer. +2. Read through the license agreement carefully: + + - The software is released under the **AGPL-3.0 license**. + - It includes contributions from the ClinicianFOCUS initiative and team members. + +3. Click **"I Agree"** to accept the terms and proceed. + +Step 2: Select Installation Architecture +---------------------------------------- + +1. Choose your preferred installation architecture: + + - **CPU** (Recommended for most users): Runs on any standard computer processor. + - **NVIDIA**: Select this option if you have an NVIDIA GPU for enhanced performance. + +2. Once selected, click **"Next"**. + +Step 3: Choose Installation Location +------------------------------------ + +1. The installer will suggest a default location to install FreeScribe: + + - Example: ``C:\Program Files (x86)\FreeScribe`` + +2. To change the destination folder, click **"Browse"** and select a new directory. +3. Verify the required disk space and click **"Install"** to begin the installation. + +Step 4: Installation Progress +----------------------------- + +1. The installation will proceed, and you will see a progress bar indicating the status. +2. If prompted with any messages (e.g., configuration file conflicts), resolve them: + + - Click **"Yes"** to remove old configuration files to avoid conflicts with new versions. Recommended to prevent conflicts. + - Click **"Yes"** to remove old installation versions. Recommended to prevent conflicts. + +Step 5: Complete Installation +----------------------------- + +1. Once the installation is complete, select additional options: + + - **Run FreeScribe after installation** (checked by default). + - **Create Desktop Shortcut** for easy access. + - **Add to Start Menu** for quick navigation. + +2. Click **"Close"** to exit the installer. + +Final Notes +----------- + +- After installation, FreeScribe will launch automatically (if selected). +- You can now access the application via the Desktop Shortcut or Start Menu. + +You are all set to use FreeScribe! If you encounter any issues during installation, refer to the community or documentation for troubleshooting. + +Inside The Application +====================== + +Getting Started +--------------- + +To start using FreeScribe: + +1. Launch FreeScribe from the Desktop Shortcut or Start Menu. +2. Follow the on-screen instructions to set up your preferences. + +Workflow Example +---------------- + +1. **Start Transcription**: Click on the "Start recording" button to begin capturing audio. +2. **Generate Notes**: Once the session is complete, click on "Stop Recording" and the note will begin to automatically generate. +3. **Export**: It automatically saves the note to your clipboard to be pasted anywhere. It also can be copied by clicking the copy text button. + +Additional Help +--------------- + +For further details, visit the discord for support `Join our Discord Community `_ (`discord.gg/6DnPENSn `_). diff --git a/docs/images/installer_api_key_highlighted.png b/docs/images/installer_api_key_highlighted.png new file mode 100644 index 00000000..d9eeda92 Binary files /dev/null and b/docs/images/installer_api_key_highlighted.png differ diff --git a/docs/images/installer_llm_endpoint.png b/docs/images/installer_llm_endpoint.png new file mode 100644 index 00000000..8926ea2f Binary files /dev/null and b/docs/images/installer_llm_endpoint.png differ diff --git a/docs/images/installer_screen_1.png b/docs/images/installer_screen_1.png new file mode 100644 index 00000000..26e0ca8c Binary files /dev/null and b/docs/images/installer_screen_1.png differ diff --git a/docs/images/jan_ai.png b/docs/images/jan_ai.png new file mode 100644 index 00000000..9da0d009 Binary files /dev/null and b/docs/images/jan_ai.png differ diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 00000000..cfe71dda --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,43 @@ +.. FreeScribe documentation master file, created by + sphinx-quickstart on Mon Dec 16 14:54:36 2024. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to the FreeScribe Project +================================= + +Welcome to the FreeScribe project! This project aims to provide an intelligent medical scribe application that assists healthcare professionals by transcribing conversations and generating medical notes. + + +Introduction +------------ + +The FreeScribe project leverages advanced machine learning models to transcribe conversations between healthcare providers and patients. It also generates structured medical notes based on the transcriptions, helping to streamline the documentation process in clinical settings. + +Discord Community +----------------- + +Join our Discord community to connect with other users, get support, and collaborate on the AI Medical Scribe project. Our community is a great place to ask questions, share ideas, and stay updated on the latest developments. + +`Join our Discord Community `_ (`discord.gg/6DnPENSn `_) + +Features +-------- + +- **Real-time Transcription**: Transcribe conversations in real-time using advanced speech recognition models. +- **Medical Note Generation**: Automatically generate structured medical notes from transcriptions. +- **User-Friendly Interface**: Intuitive and easy-to-use interface for healthcare professionals. +- **Customizable Settings**: Customize the application settings to suit your workflow. + +License +------- + +This project is licensed under the MIT License. See the `LICENSE file `_ for more information. + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + welcome + how_to_use + setting_remote_connection \ No newline at end of file diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 00000000..954237b9 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/setting_remote_connection.rst b/docs/setting_remote_connection.rst new file mode 100644 index 00000000..388ee04c --- /dev/null +++ b/docs/setting_remote_connection.rst @@ -0,0 +1,118 @@ +Configuring Remote Connection in FreeScribe +=========================================== + +Follow these step-by-step instructions to configure a remote connection for FreeScribe. + +General Remote AI Settings +-------------------------- + +1. Open the **Settings** window and navigate to the **"AI Settings"** tab. +2. Configure the following fields: + + - **Model Endpoint**: Enter the API URL of your remote server. For example: + ``https://api.openai.com/v1/`` + - **OpenAI API Key**: Paste your OpenAI API key here. + Example: + ``7X1PXospvS7_CtuxvuilmVZA`` + +3. Optional Settings: + - **Local LLM**: Uncheck this if you are using a remote endpoint instead of a local model. + - **AI Server Self-Signed Certificates**: Enable this **only** if your remote server uses self-signed certificates. + +4. Click **"Save"** to apply the changes. + +Configuring Speech-to-Text Remote Connection +-------------------------------------------- + +1. In the **Settings** window, go to the **"Speech-to-Text Settings"** tab. +2. Enable Speech-to-Text for remote connection by configuring these fields: + + - **Speech2Text (Whisper) Endpoint**: Enter the URL for your Speech-to-Text API. For example: + ``https://localhost:2224/whisper`` + - **Speech2Text (Whisper) API Key**: Paste the API key provided for your remote Whisper server. + +3. Optional Settings: + - **Real Time**: Check this option to enable real-time transcription. + - **S2T Server Self-Signed Certificates**: Enable this only if your remote server uses self-signed certificates. + - **Select a Microphone**: Choose the appropriate microphone input device. + - **Whisper Model**: Local Only. + - **Speech2Text (Whisper) Architecture**: Not needed for remote connections. + +4. Click **"Save"** to save the settings. + +Verify the Connection +---------------------- + +1. After saving the configuration: + - Restart FreeScribe to ensure the new settings take effect. + - Verify that the connection to the remote AI model and Speech-to-Text API is working as expected. +2. If issues occur: + - Double-check the **Model Endpoint** and **API Key**. + - Ensure the remote server is running and accessible. + - Confirm the server URL and port are correct. + +Summary of Key Fields +--------------------- + +The following table summarizes the key fields for configuration: + +.. table:: Settings Table + :name: tables-grid-example + :widths: 20, 30, 50 + :class: longtable + :align: center + :width: 66% + + +--------------------------------------+--------------------------------------+--------------------------------------------------+ + | **Setting Key** | **Example Value** | **Description** | + +--------------------------------------+--------------------------------------+--------------------------------------------------+ + | Model Endpoint | ``https://localhost:3334/v1/`` | Remote API endpoint for AI models. | + +--------------------------------------+--------------------------------------+--------------------------------------------------+ + | OpenAI API Key | ```` | API key for accessing the remote AI service. | + +--------------------------------------+--------------------------------------+--------------------------------------------------+ + | Speech2Text (Whisper) Endpoint | ``https://localhost:2224/whisper`` | API endpoint for Speech-to-Text services. | + +--------------------------------------+--------------------------------------+--------------------------------------------------+ + | Speech2Text (Whisper) API Key | ```` | API key for the Speech-to-Text server. | + +--------------------------------------+--------------------------------------+--------------------------------------------------+ + + + +By following these steps, you can successfully configure a remote AI model and Speech-to-Text connection in FreeScribe. + +How to connect to ClinicianFOCUS LLM Container +---------------------------------------------- +1. Open the **Settings** window and navigate to the **"AI Settings"** tab. +2. Configure the following fields: + + - **Model Endpoint**: Enter the API URL of your remote server. + Example: + ``https://api.openai.com/v1/`` + + .. image:: images/installer_llm_endpoint.png + :width: 600 + + - **OpenAI API Key**: Paste your OpenAI API key here. + Example: + ``The API Key provided in the installer`` + + .. image:: images/installer_api_key_highlighted.png + :width: 600 + + - **Local LLM**: Ensure this is unchecked in the FreeScribe settings. + +3. Repeat for **Whisper Settings** tab. +4. Click **"Save"** to apply the changes. + +How to connect to JanAI +----------------------- +1. Open the **Settings** window and navigate to the **"AI Settings"** tab. +2. JanAI reference screenshot: + .. image:: images/jan_ai.png + :width: 600 + + Click on Step 1 and 2 in the photo. Then proceed to the next step below. +3. Configure the following fields: + - **Model Endpoint**: Enter the API URL the JanAI server. Combine the information from steps 3 and 4. + Example: ``https://localhost:1337/v1`` + + - Note: JanAI does not require an API key. So this can be left blank. \ No newline at end of file diff --git a/docs/welcome.rst b/docs/welcome.rst new file mode 100644 index 00000000..a21b9631 --- /dev/null +++ b/docs/welcome.rst @@ -0,0 +1,35 @@ +.. FreeScribe documentation master file, created by + sphinx-quickstart on Mon Dec 16 14:54:36 2024. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to the FreeScribe Project +================================= + +Welcome to the FreeScribe project! This project aims to provide an intelligent medical scribe application that assists healthcare professionals by transcribing conversations and generating medical notes. + + +Introduction +------------ + +The FreeScribe project leverages advanced machine learning models to transcribe conversations between healthcare providers and patients. It also generates structured medical notes based on the transcriptions, helping to streamline the documentation process in clinical settings. + +Discord Community +----------------- + +Join our Discord community to connect with other users, get support, and collaborate on the AI Medical Scribe project. Our community is a great place to ask questions, share ideas, and stay updated on the latest developments. + +`Join our Discord Community `_ (`discord.gg/6DnPENSn `_) + +Features +-------- + +- **Real-time Transcription**: Transcribe conversations in real-time using advanced speech recognition models. +- **Medical Note Generation**: Automatically generate structured medical notes from transcriptions. +- **User-Friendly Interface**: Intuitive and easy-to-use interface for healthcare professionals. +- **Customizable Settings**: Customize the application settings to suit your workflow. + +License +------- + +This project is licensed under the MIT License. See the `LICENSE file `_ for more information. \ No newline at end of file diff --git a/scripts/install.nsi b/scripts/install.nsi index 927e31a3..9d6febda 100644 --- a/scripts/install.nsi +++ b/scripts/install.nsi @@ -6,6 +6,11 @@ ; Define the name of the installer OutFile "..\dist\FreeScribeInstaller.exe" +; Silent mode flags: +; /S - Silent mode +; /ARCH=[CPU|NVIDIA] - Force architecture selection +; /K - Kill running instance before installation + ; Define the default installation directory to AppData InstallDir "$PROGRAMFILES\FreeScribe" @@ -29,6 +34,90 @@ Var /GLOBAL NVIDIA_RADIO Var /GLOBAL SELECTED_OPTION Var /GLOBAL REMOVE_CONFIG_CHECKBOX Var /GLOBAL REMOVE_CONFIG +Var /GLOBAL Got_Running_Instance + +!macro CheckRunningInstanceMacro + nsExec::ExecToStack 'cmd /c tasklist /FI "IMAGENAME eq freescribe-client.exe" /NH | find /I "freescribe-client.exe" > nul' + Pop $0 ; Return value + ${If} $0 == 0 + StrCpy $Got_Running_Instance "1" + ${Else} + StrCpy $Got_Running_Instance "0" + ${EndIf} +!macroend + +!macro HideNextButtonMacro + GetDlgItem $R0 $HWNDPARENT 1 ; Get the handle of the "Next" button + ShowWindow $R0 ${SW_HIDE} ; Hide the "Next" button +!macroend + +!macro ShowNextButtonMacro + GetDlgItem $R0 $HWNDPARENT 1 ; Get the handle of the "Next" button + ShowWindow $R0 ${SW_SHOW} ; Show the "Next" button +!macroend + +!macro GotoNextPageMacro + GetDlgItem $1 $HWNDPARENT 1 ; Get the "Next" button handle + SendMessage $HWNDPARENT ${WM_COMMAND} 1 $1 ; Simulate clicking the "Next" button +!macroend + +!macro HideBackButtonMacro + GetDlgItem $R0 $HWNDPARENT 3 ; Get the handle of the "Back" button + ShowWindow $R0 ${SW_HIDE} ; Hide the "Back" button +!macroend + +Function HideNextButton + !insertmacro HideNextButtonMacro +FunctionEnd + +Function ShowNextButton + !insertmacro ShowNextButtonMacro +FunctionEnd + +Function GotoNextPage + !insertmacro GotoNextPageMacro +FunctionEnd + +Function HideBackButton + !insertmacro HideBackButtonMacro +FunctionEnd + +Function un.HideNextButton + !insertmacro HideNextButtonMacro +FunctionEnd + +Function un.ShowNextButton + !insertmacro ShowNextButtonMacro +FunctionEnd + +Function un.HideBackButton + !insertmacro HideBackButtonMacro +FunctionEnd + +Function un.GotoNextPage + !insertmacro GotoNextPageMacro +FunctionEnd + +!macro KillFreeScribeProcessMacro + nsExec::ExecToStack 'taskkill /F /IM freescribe-client.exe' + Pop $0 ; Return value + + ${If} $0 == 0 + MessageBox MB_OK "FreeScribe process has been terminated." + Return + ${Else} + MessageBox MB_OK|MB_ICONEXCLAMATION "Failed to terminate FreeScribe process. Please close it manually." + Return + ${EndIf} +!macroend + +Function KillFreeScribeProcess + !insertmacro KillFreeScribeProcessMacro +FunctionEnd + +Function un.KillFreeScribeProcess + !insertmacro KillFreeScribeProcessMacro +FunctionEnd Function Check_For_Old_Version_In_App_Data ; Check if the old version exists in AppData @@ -37,7 +126,6 @@ Function Check_For_Old_Version_In_App_Data MessageBox MB_YESNO|MB_ICONQUESTION "An old version of FreeScribe has been detected. Would you like to uninstall it?" IDYES UninstallOldVersion IDNO OldVersionDoesNotExist UninstallOldVersion: ; Remove the contents/folders of the old version - RMDir /r "$APPDATA\FreeScribe\presets" RMDir /r "$APPDATA\FreeScribe\_internal" RMDir /r "$APPDATA\FreeScribe\models" @@ -105,7 +193,7 @@ FunctionEnd Function ARCHITECTURE_SELECT_LEAVE ${If} $SELECTED_OPTION == "NVIDIA" - Call CheckNvidiaDrivers + Call CheckNvidiaDrivers ${EndIf} FunctionEnd @@ -128,46 +216,186 @@ Function .onInstSuccess FunctionEnd Function un.onInit - CheckIfFreeScribeIsRunning: + !insertmacro CheckRunningInstanceMacro +FunctionEnd + +; Checks on installer start +Var RunningInstanceDialog +Var ForceStopButton +Var RetryButton + +Var StatusLabel + +Function un.CreateRunningInstancePage + ${If} $Got_Running_Instance == "0" + Abort + ${EndIf} + !insertmacro MUI_HEADER_TEXT "Running Instance Detected" "" + + nsDialogs::Create 1018 + Pop $RunningInstanceDialog + + ${If} $RunningInstanceDialog == error + Abort + ${EndIf} + + ; Create status label + ${NSD_CreateLabel} 0 10u 100% 24u "FreeScribe is currently running.$\n$\nPlease choose how to proceed: Force Stop or close it manually and Retry" + Pop $StatusLabel + + ; Create Force Stop button + ${NSD_CreateButton} 10% 50u 30% 12u "Force Stop" + Pop $ForceStopButton + ${NSD_OnClick} $ForceStopButton un.OnForceStopClick + + ; Create Retry button + ${NSD_CreateButton} 45% 50u 30% 12u "Retry" + Pop $RetryButton + ${NSD_OnClick} $RetryButton un.OnRetryClick + + Call un.HideNextButton + Call un.HideBackButton + + nsDialogs::Show +FunctionEnd + +Function un.OnForceStopClick + Call un.KillFreeScribeProcess nsExec::ExecToStack 'cmd /c tasklist /FI "IMAGENAME eq freescribe-client.exe" /NH | find /I "freescribe-client.exe" > nul' - Pop $0 ; Return value + Pop $0 - ; Check if the process is running ${If} $0 == 0 - MessageBox MB_RETRYCANCEL "FreeScribe is currently running. Please close the application and try again." IDRETRY CheckIfFreeScribeIsRunning IDCANCEL abort - abort: - Abort + ${NSD_SetText} $StatusLabel "Unable to terminate FreeScribe.$\nPlease close it manually and click Retry." + ${Else} + StrCpy $Got_Running_Instance "0" + Call un.ShowNextButton + Call un.GotoNextPage + Abort ; Close the dialog and continue uninstallation ${EndIf} FunctionEnd -; Checks on installer start -Function .onInit - CheckIfFreeScribeIsRunning: + +Function un.OnRetryClick nsExec::ExecToStack 'cmd /c tasklist /FI "IMAGENAME eq freescribe-client.exe" /NH | find /I "freescribe-client.exe" > nul' - Pop $0 ; Return value + Pop $0 - ; Check if the process is running ${If} $0 == 0 - MessageBox MB_RETRYCANCEL "FreeScribe is currently running. Please close the application and try again." IDRETRY CheckIfFreeScribeIsRunning IDCANCEL abort - abort: - Abort + ${NSD_SetText} $StatusLabel "FreeScribe is still running.$\n$\nPlease choose how to proceed: Force Stop or close it manually and Retry" + ${Else} + StrCpy $Got_Running_Instance "0" + Call un.ShowNextButton + Call un.GotoNextPage + Abort ; Close the dialog and continue uninstallation ${EndIf} +FunctionEnd + +PageEx custom + PageCallbacks CreateRunningInstancePagePre +PageExEnd + +Function CreateRunningInstancePage + ; Skip this page in silent mode + IfSilent 0 +2 + Abort + + ${If} $Got_Running_Instance == "0" + Abort + ${EndIf} + !insertmacro MUI_HEADER_TEXT "Running Instance Detected" "" + + nsDialogs::Create 1018 + Pop $RunningInstanceDialog + + ${If} $RunningInstanceDialog == error + Abort + ${EndIf} + + ; Create status label + ${NSD_CreateLabel} 0 10u 100% 24u "FreeScribe is currently running.$\n$\nPlease choose how to proceed: Force Stop or close it manually and Retry" + Pop $StatusLabel + + ; Create Force Stop button + ${NSD_CreateButton} 10% 50u 30% 12u "Force Stop" + Pop $ForceStopButton + ${NSD_OnClick} $ForceStopButton OnForceStopClick + + ; Create Retry button + ${NSD_CreateButton} 45% 50u 30% 12u "Retry" + Pop $RetryButton + ${NSD_OnClick} $RetryButton OnRetryClick + + ${If} $Got_Running_Instance == "1" + Call HideNextButton + ${Else} + Call ShowNextButton + ${EndIf} + Call HideBackButton + + nsDialogs::Show +FunctionEnd + +Function CreateRunningInstancePagePre + ${If} $Got_Running_Instance == "0" + Abort + ${EndIf} +FunctionEnd + +Function OnForceStopClick + Call KillFreeScribeProcess + nsExec::ExecToStack 'cmd /c tasklist /FI "IMAGENAME eq freescribe-client.exe" /NH | find /I "freescribe-client.exe" > nul' + Pop $0 + + ${If} $0 == 0 + ${NSD_SetText} $StatusLabel "Unable to terminate FreeScribe.$\nPlease close it manually and click Retry." + ${Else} + StrCpy $Got_Running_Instance "0" + Call ShowNextButton + Call GotoNextPage + Abort ; Close the dialog and continue installation + ${EndIf} +FunctionEnd + +Function OnRetryClick + nsExec::ExecToStack 'cmd /c tasklist /FI "IMAGENAME eq freescribe-client.exe" /NH | find /I "freescribe-client.exe" > nul' + Pop $0 + + ${If} $0 == 0 + ${NSD_SetText} $StatusLabel "FreeScribe is still running.$\n$\nPlease choose how to proceed: Force Stop or close it manually and Retry" + ${Else} + StrCpy $Got_Running_Instance "0" + Call ShowNextButton + Call GotoNextPage + Abort ; Close the dialog and continue installation + ${EndIf} +FunctionEnd + +Function .onInit + !insertmacro CheckRunningInstanceMacro IfSilent SILENT_MODE NOT_SILENT_MODE SILENT_MODE: ${GetParameters} $R0 - ; Check for custom parameters ${GetOptions} $R0 "/ARCH=" $R1 ${If} $R1 != "" StrCpy $SELECTED_OPTION $R1 ${EndIf} + ; Check for /K flag to kill running instance + ${GetOptions} $R0 "/K" $R2 + ${IfNot} ${Errors} + Call KillFreeScribeProcess + !insertmacro CheckRunningInstanceMacro ; Re-check after killing + ${EndIf} + + ; Skip running instance page in silent mode + StrCpy $Got_Running_Instance "0" + Return + NOT_SILENT_MODE: FunctionEnd Function CleanUninstall ; Remove the contents/folders of the old version - RMDir /r "$INSTDIR\presets" RMDir /r "$INSTDIR\_internal" ; Remove the old version executable @@ -217,14 +445,12 @@ Section "MainSection" SEC01 ; Add files to the installer File /r "..\dist\freescribe-client-nvidia\freescribe-client-nvidia.exe" Rename "$INSTDIR\freescribe-client-nvidia.exe" "$INSTDIR\freescribe-client.exe" - File /r "..\dist\freescribe-client-nvidia\_internal" + File /r "..\dist\freescribe-client-nvidia\_internal" ${EndIf} - - ; add presets - CreateDirectory "$INSTDIR\presets" - SetOutPath "$INSTDIR\presets" - File /r "..\src\FreeScribe.client\presets\*" + ; Install version file to both nvidia and cpu directories for version checking + SetOutPath "$INSTDIR\_internal" + File ".\__version__" SetOutPath "$INSTDIR" @@ -278,7 +504,7 @@ Section "Uninstall" MessageBox MB_RETRYCANCEL "Unable to remove old configuration. Please close any applications using these files and try again." IDRETRY RemoveConfigFiles IDCANCEL ConfigFilesFailed ${EndIf} ${EndIf} - + ; Show message when uninstallation is complete MessageBox MB_OK "FreeScribe has been successfully uninstalled." Goto EndUninstall @@ -298,7 +524,7 @@ Function CustomizeFinishPage nsDialogs::Create 1018 Pop $0 - + ${If} $0 == error Abort ${EndIf} @@ -349,6 +575,16 @@ Function InsfilesPageLeave SetAutoClose true FunctionEnd +Function CheckCudaAvailability + nsExec::ExecToStack 'nvcc --version' + Pop $0 ; Return value + + ${If} $0 != 0 + MessageBox MB_OK "CUDA is not available. Please ensure 'nvcc' is installed and added to the PATH and restart the installer. Download it from: https://developer.nvidia.com/cuda-downloads" + Quit + ${EndIf} +FunctionEnd + Function CheckNvidiaDrivers Var /GLOBAL DriverVersion @@ -362,31 +598,36 @@ Function CheckNvidiaDrivers ReadRegStr $DriverVersion HKLM "SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall\{B2FE1952-0186-46C3-BAEC-A80AA35AC5B8}_Display.Driver" "DisplayVersion" ${EndIf} - ; No nvidia drivers detected - show error message + ; No NVIDIA drivers detected - show error message ${If} $DriverVersion == "" - MessageBox MB_OK "No valid Nvidia device deteced (Drivers Missing). This program relys on a Nvidia GPU to run. Functionality is not guaranteed without a Nvidia GPU." + MessageBox MB_OK "No valid NVIDIA device detected (Drivers Missing). This program relies on an NVIDIA GPU to run. Functionality is not guaranteed without an NVIDIA GPU." Goto driver_check_end ${EndIf} + ; Push the version number to the stack Push $DriverVersion ; Push min driver version Push ${MIN_CUDA_DRIVER_VERSION} - + Call CompareVersions Pop $0 ; Get the return value ${If} $0 == 1 - MessageBox MB_OK "Your NVIDIA driver version ($DriverVersion) is older than the minimum required version (${MIN_CUDA_DRIVER_VERSION}). Please update at https://www.nvidia.com/en-us/drivers/. Then contiune with the installation." + MessageBox MB_OK "Your NVIDIA driver version ($DriverVersion) is older than the minimum required version (${MIN_CUDA_DRIVER_VERSION}). Please update at https://www.nvidia.com/en-us/drivers/. Then continue with the installation." Abort ${EndIf} + + ; Check for CUDA availability + Call CheckCudaAvailability + driver_check_end: FunctionEnd ;------------------------------------------------------------------------------ ; Function: CompareVersions ; Purpose: Compares two version numbers in format "X.Y" (e.g., "1.0", "2.3") -; +; ; Parameters: ; Stack 1 (bottom): First version string to compare ; Stack 0 (top): Second version string to compare @@ -410,22 +651,22 @@ Function CompareVersions Push $R3 Push $R4 Push $R5 - + ; Split version strings into major and minor numbers ${WordFind} $R1 "." "+1" $R2 ; Extract major number from first version ${WordFind} $R1 "." "+2" $R3 ; Extract minor number from first version ${WordFind} $R0 "." "+1" $R4 ; Extract major number from second version ${WordFind} $R0 "." "+2" $R5 ; Extract minor number from second version - + ; Convert to comparable numbers: ; Multiply major version by 1000 to handle minor version properly IntOp $R2 $R2 * 1000 ; Convert first version major number IntOp $R4 $R4 * 1000 ; Convert second version major number - + ; Add minor numbers to create complete comparable values IntOp $R2 $R2 + $R3 ; First version complete number IntOp $R4 $R4 + $R5 ; Second version complete number - + ; Compare versions and set return value ${If} $R2 < $R4 ; If first version is less than second StrCpy $R0 1 @@ -434,7 +675,7 @@ Function CompareVersions ${Else} ; If versions are equal StrCpy $R0 0 ${EndIf} - + ; Restore registers from stack Pop $R5 Pop $R4 @@ -446,7 +687,7 @@ FunctionEnd Function un.CreateRemoveConfigFilesPage !insertmacro MUI_HEADER_TEXT "Remove Configuration Files" "Do you want to remove the configuration files (e.g., settings)?" - + nsDialogs::Create 1018 Pop $0 @@ -465,7 +706,15 @@ Function un.RemoveConfigFilesPageLeave ${NSD_GetState} $REMOVE_CONFIG_CHECKBOX $REMOVE_CONFIG FunctionEnd +; Define the uninstaller pages first +UninstPage custom un.CreateRunningInstancePage +!insertmacro MUI_UNPAGE_CONFIRM +UninstPage custom un.CreateRemoveConfigFilesPage un.RemoveConfigFilesPageLeave +!insertmacro MUI_UNPAGE_INSTFILES +!insertmacro MUI_UNPAGE_FINISH + ; Define installer pages +Page custom CreateRunningInstancePage !insertmacro MUI_PAGE_LICENSE ".\assets\License.txt" Page Custom ARCHITECTURE_SELECT ARCHITECTURE_SELECT_LEAVE !insertmacro MUI_PAGE_DIRECTORY @@ -473,11 +722,5 @@ Page Custom ARCHITECTURE_SELECT ARCHITECTURE_SELECT_LEAVE !insertmacro MUI_PAGE_INSTFILES Page Custom CustomizeFinishPage RunApp -; Define the uninstaller pages -!insertmacro MUI_UNPAGE_CONFIRM -UninstPage custom un.CreateRemoveConfigFilesPage un.RemoveConfigFilesPageLeave -!insertmacro MUI_UNPAGE_INSTFILES -!insertmacro MUI_UNPAGE_FINISH - ; Define the languages !insertmacro MUI_LANGUAGE English diff --git a/src/FreeScribe.client/UI/DebugWindow.py b/src/FreeScribe.client/UI/DebugWindow.py index 944abf82..43694edb 100644 --- a/src/FreeScribe.client/UI/DebugWindow.py +++ b/src/FreeScribe.client/UI/DebugWindow.py @@ -10,6 +10,7 @@ import sys from datetime import datetime from collections import deque +from utils.utils import bring_to_front class DualOutput: buffer = None @@ -79,7 +80,12 @@ def __init__(self, parent): :param parent: Parent tkinter window :type parent: tk.Tk or tk.Toplevel """ - self.window = tk.Toplevel(parent) + self.parent = parent + if self.parent.debug_window_open: + bring_to_front("Debug Output") + return + self.parent.debug_window_open = True + self.window = tk.Toplevel(parent.root) self.window.title("Debug Output") self.window.geometry("650x450") @@ -109,6 +115,9 @@ def __init__(self, parent): copy_button = tk.Button(self.window, text="Copy to Clipboard", command=self._copy_to_clipboard) copy_button.pack(side=tk.LEFT, pady=10, padx=10) + # custom function to close the window + self.window.protocol("WM_DELETE_WINDOW", self.close_window) + self.refresh_output() def _copy_to_clipboard(self): @@ -133,4 +142,8 @@ def refresh_output(self): top_line_index = self.text_widget.index("@0,0") self.text_widget.delete("1.0", tk.END) self.text_widget.insert(tk.END, content) - self.text_widget.see(top_line_index) \ No newline at end of file + self.text_widget.see(top_line_index) + + def close_window(self): + self.parent.debug_window_open = False + self.window.destroy() \ No newline at end of file diff --git a/src/FreeScribe.client/UI/LoadingWindow.py b/src/FreeScribe.client/UI/LoadingWindow.py index d0f9bb21..0dcd95ec 100644 --- a/src/FreeScribe.client/UI/LoadingWindow.py +++ b/src/FreeScribe.client/UI/LoadingWindow.py @@ -59,7 +59,8 @@ def __init__(self, parent=None, title="Processing", initial_text="Loading", on_c self.popup = tk.Toplevel(parent) self.popup.title(title) - self.popup.geometry("200x105") # Increased height for cancel button + # increased width for whisper model type + self.popup.geometry("280x105") # Increased height for cancel button self.popup.iconbitmap(get_file_path('assets','logo.ico')) if parent: diff --git a/src/FreeScribe.client/UI/MainWindowUI.py b/src/FreeScribe.client/UI/MainWindowUI.py index 1e1270ac..5bfacb66 100644 --- a/src/FreeScribe.client/UI/MainWindowUI.py +++ b/src/FreeScribe.client/UI/MainWindowUI.py @@ -34,6 +34,9 @@ def __init__(self, root, settings): self.scribe_template = None self.setting_window = SettingsWindowUI(self.app_settings, self, self.root) # Settings window self.root.iconbitmap(get_file_path('assets','logo.ico')) + self.debug_window_open = False # Flag to indicate if the debug window is open + + self.warning_bar = None # Warning bar self.current_docker_status_check_id = None # ID for the current Docker status check self.current_container_status_check_id = None # ID for the current container status check @@ -45,8 +48,12 @@ def load_main_window(self): """ self._bring_to_focus() self._create_menu_bar() - if (self.setting_window.settings.editable_settings['Show Welcome Message']): - self._show_welcome_message() + + # Uncomment this once the UI is refactored to this class + # For now we need to force load it after all our widgets are created + # inside client.py. This is a temporary solution. + # if (self.setting_window.settings.editable_settings['Show Welcome Message']): + # self._show_welcome_message() def _bring_to_focus(self): """ @@ -122,6 +129,50 @@ def create_docker_status_bar(self): self._background_availbility_docker_check() self._background_check_container_status(llm_dot, whisper_dot) + def create_warning_bar(self, text): + """ + Create a warning bar at the bottom of the window to notify the user about microphone issues. + + :param text: Placeholder for text input (unused). + :param row: The row in the grid layout where the bar is placed. + :param column: The starting column for the grid layout. + :param columnspan: The number of columns spanned by the warning bar. + :param pady: Padding for the vertical edges. + :param padx: Padding for the horizontal edges. + :param sticky: Defines how the widget expands in the grid cell. + """ + # Create a frame for the warning bar with a sunken border and gold background + self.warning_bar = tk.Frame(self.root, bd=1, relief=tk.SUNKEN, background="gold") + self.warning_bar.grid(row=4, column=0, columnspan=14, sticky='nsew') + + # Add a label to display the warning message in the warning bar + text_label = tk.Label( + self.warning_bar, + text=text, + foreground="black", # Text color + background="gold" # Matches the frame's background + ) + text_label.pack(side=tk.LEFT) + + # Add a button to allow users to close the warning bar + close_button = tk.Button( + self.warning_bar, + text="X", + command=self.destroy_warning_bar, # Call the destroy method when clicked + foreground="black" + ) + + close_button.pack(side=tk.RIGHT) + + def destroy_warning_bar(self): + """ + Destroy the warning bar if it exists to remove it from the UI. + """ + if self.warning_bar is not None: + # Destroy the warning bar frame and set the reference to None + self.warning_bar.destroy() + self.warning_bar = None + def disable_docker_ui(self): """ Disable the Docker status bar UI elements. @@ -225,7 +276,7 @@ def _create_help_menu(self): # Add Help menu help_menu = tk.Menu(self.menu_bar, tearoff=0) self.menu_bar.add_cascade(label="Help", menu=help_menu) - help_menu.add_command(label="Debug Window", command=lambda: DebugPrintWindow(self.root)) + help_menu.add_command(label="Debug Window", command=lambda: DebugPrintWindow(self)) help_menu.add_command(label="About", command=lambda: self._show_md_content(get_file_path('markdown','help','about.md'), 'About')) def _destroy_help_menu(self): @@ -278,7 +329,7 @@ def _on_help_window_close(self, help_window, dont_show_again: tk.BooleanVar): self.setting_window.settings.save_settings_to_file() help_window.destroy() - def _show_welcome_message(self): + def show_welcome_message(self): """ Private method to display a welcome message. Display a welcome message when the application is launched. diff --git a/src/FreeScribe.client/UI/MarkdownWindow.py b/src/FreeScribe.client/UI/MarkdownWindow.py index 6ebdd7ee..f069ab6d 100644 --- a/src/FreeScribe.client/UI/MarkdownWindow.py +++ b/src/FreeScribe.client/UI/MarkdownWindow.py @@ -3,50 +3,48 @@ import tkinter as tk from tkhtmlview import HTMLLabel from utils.file_utils import get_file_path +from utils.utils import get_application_version -""" -A class to create a window displaying rendered Markdown content. -Attributes: ------------ -window : Toplevel - The top-level window for displaying the Markdown content. -Methods: --------- -__init__(parent, title, file_path, callback=None): - Initializes the MarkdownWindow with the given parent, title, file path, and optional callback. -_on_close(var, callback): - Handles the window close event, invoking the callback with the state of the checkbox. -""" class MarkdownWindow: """ - Initializes the MarkdownWindow. + A class to display a Markdown file in a pop-up window with optional callback functionality. + Parameters: ----------- parent : widget - The parent widget. + The parent widget. title : str - The title of the window. + The title of the window. file_path : str - The path to the Markdown file to be rendered. + The path to the Markdown file to be rendered. callback : function, optional - A callback function to be called when the window is closed, with the state of the checkbox. + A callback function to be called when the window is closed, with the state of the checkbox. """ + def __init__(self, parent, title, file_path, callback=None): + try: + with open(file_path, "r") as file: + content = md.markdown(file.read(), extensions=["extra", "smarty"]) + except FileNotFoundError: + print(f"File not found: {file_path}") + messagebox.showerror("Error", "File not found") + return + + self.parent = parent self.window = Toplevel(parent) self.window.title(title) self.window.transient(parent) self.window.grab_set() - self.window.iconbitmap(get_file_path('assets','logo.ico')) + self.window.iconbitmap(get_file_path('assets', 'logo.ico')) + + # Footer frame to hold checkbox and close button + footer_frame = tk.Frame(self.window,bg="lightgray") + footer_frame.pack(side=tk.BOTTOM, fill="x", padx=10, pady=10) + + # Add a version label to the footer + version = get_application_version() + version_label = tk.Label(footer_frame, text=f"FreeScribe Client {version}",bg="lightgray",fg="black").pack(side="left", expand=True, padx=2, pady=5) - try: - with open(file_path, "r") as file: - content = md.markdown(file.read(), extensions=["extra", "smarty"]) - - except FileNotFoundError: - print(f"File not found: {file_path}") - self.window.destroy() - messagebox.showerror("Error", "File not found") - return # Create a frame to hold the HTMLLabel and scrollbar frame = tk.Frame(self.window) @@ -63,34 +61,73 @@ def __init__(self, parent, title, file_path, callback=None): # Configure the HTMLLabel to use the scrollbar html_label.config(yscrollcommand=scrollbar.set) + # Optional checkbox and callback handling if callback: var = tk.BooleanVar() - tk.Checkbutton(self.window, text="Don't show this message again", - variable=var).pack(side=tk.BOTTOM, pady=10) - self.window.protocol("WM_DELETE_WINDOW", - lambda: self._on_close(var, callback)) - - # Add a close button at the bottom center - close_button = tk.Button(self.window, text="Close", command=lambda: self._on_close(var, callback)) - close_button.pack(side=tk.BOTTOM) # Extra padding for separation from the checkbox + tk.Checkbutton( + footer_frame, text="Don't show this message again", variable=var + ).pack(side=tk.BOTTOM, padx=5) + + close_button = tk.Button( + footer_frame, text="Close", command=lambda: self._on_close(var, callback),font=("Arial", 12),width=6,height=1 + ) else: - # Add a close button at the bottom center - close_button = tk.Button(self.window, text="Close", command= self.window.destroy) - close_button.pack(side=tk.BOTTOM , pady=5) # Extra padding for separation from the checkbox + close_button = tk.Button(footer_frame, text="Close", command=self.window.destroy,font=("Arial", 12),width=6,height=1) + + # Add the close button + close_button.pack(side=tk.BOTTOM, padx=5, pady=5) - self.window.geometry("900x900") + # Adjust window size based on content with constraints + self._adjust_window_size(html_label, scrollbar) + self._display_to_center() self.window.lift() + + def _display_to_center(self): + # Get parent window dimensions and position + parent_x = self.parent.winfo_x() + parent_y = self.parent.winfo_y() + parent_width = self.parent.winfo_width() + parent_height = self.parent.winfo_height() + width = self.window.winfo_width() + height = self.window.winfo_height() + + center_x = parent_x + (parent_width - width) // 2 + center_y = parent_y + (parent_height - height) // 2 + + # Apply the calculated position to the settings window + self.window.geometry(f"{width}x{height}+{center_x}+{center_y}") + + def _adjust_window_size(self, html_label, scrollbar): + """ + Dynamically adjusts the window size based on the content, with constraints. + + Parameters: + ----------- + html_label : HTMLLabel + The label containing the rendered Markdown content. + scrollbar : Scrollbar + The scrollbar associated with the HTMLLabel. + """ + self.window.update_idletasks() # Ensure all widgets are rendered + + content_width = html_label.winfo_reqwidth() + scrollbar.winfo_reqwidth() + 20 + content_height = html_label.winfo_reqheight() + 20 # Exclude footer height from adjustment + + width = min(content_width, 900) # Maximum width of 900 + height = min(content_height, 750) + self.window.geometry(f"{width}x{height}") - """ - Handles the window close event. - Parameters: - ----------- - var : BooleanVar - The Tkinter BooleanVar associated with the checkbox. - callback : function - The callback function to be called with the state of the checkbox. - """ def _on_close(self, var, callback): + """ + Handles the window close event. + + Parameters: + ----------- + var : BooleanVar + The Tkinter BooleanVar associated with the checkbox. + callback : function + The callback function to be called with the state of the checkbox. + """ callback(var.get()) - self.window.destroy() \ No newline at end of file + self.window.destroy() diff --git a/src/FreeScribe.client/UI/SettingsWindow.py b/src/FreeScribe.client/UI/SettingsWindow.py index a034863d..9bb05e19 100644 --- a/src/FreeScribe.client/UI/SettingsWindow.py +++ b/src/FreeScribe.client/UI/SettingsWindow.py @@ -29,11 +29,38 @@ from UI.Widgets.MicrophoneSelector import MicrophoneState from utils.ip_utils import is_valid_url from enum import Enum +import multiprocessing class SettingsKeys(Enum): LOCAL_WHISPER = "Built-in Speech2Text" WHISPER_ENDPOINT = "Speech2Text (Whisper) Endpoint" WHISPER_SERVER_API_KEY = "Speech2Text (Whisper) API Key" + WHISPER_ARCHITECTURE = "Speech2Text (Whisper) Architecture" + WHISPER_CPU_COUNT = "Whisper CPU Thread Count (Experimental)" + WHISPER_COMPUTE_TYPE = "Whisper Compute Type (Experimental)" + WHISPER_BEAM_SIZE = "Whisper Beam Size (Experimental)" + WHISPER_VAD_FILTER = "Use Whisper VAD Filter (Experimental)" + AUDIO_PROCESSING_TIMEOUT_LENGTH = "Audio Processing Timeout (seconds)" + SILERO_SPEECH_THRESHOLD = "Silero Speech Threshold" + USE_TRANSLATE_TASK = "Use Translate Task" + WHISPER_LANGUAGE_CODE = "Whisper Language Code" + S2T_SELF_SIGNED_CERT = "S2T Server Self-Signed Certificates" + LLM_ARCHITECTURE = "Architecture" + USE_PRESCREEN_AI_INPUT = "Use Pre-Screen AI Input" + + +class Architectures(Enum): + CPU = ("CPU", "cpu") + CUDA = ("CUDA (Nvidia GPU)", "cuda") + + @property + def label(self): + return self._value_[0] + + @property + def architecture_value(self): + return self._value_[1] + class FeatureToggle: @@ -66,7 +93,7 @@ class SettingsWindow(): save_settings_to_file(): Saves the current settings to a JSON file. save_settings(openai_api_key, aiscribe_text, aiscribe2_text, - settings_window, preset): + settings_window): Saves the current settings, including API keys, IP addresses, and user-defined parameters. load_aiscribe_from_file(): Loads the first AI Scribe text from a file. @@ -79,6 +106,9 @@ class SettingsWindow(): CPU_INSTALL_FILE = "CPU_INSTALL.txt" NVIDIA_INSTALL_FILE = "NVIDIA_INSTALL.txt" STATE_FILES_DIR = "install_state" + DEFAULT_WHISPER_ARCHITECTURE = Architectures.CPU.architecture_value + DEFAULT_LLM_ARCHITECTURE = Architectures.CPU.architecture_value + AUTO_DETECT_LANGUAGE_CODES = ["", "auto", "Auto Detect", "None", "None (Auto Detect)"] def __init__(self): """Initializes the ApplicationSettings with default values.""" @@ -100,9 +130,11 @@ def __init__(self): "BlankSpace", # Represents the SettingsKeys.LOCAL_WHISPER.value checkbox that is manually placed "Real Time", "BlankSpace", # Represents the model dropdown that is manually placed + "BlankSpace", # Represents the mic dropdown SettingsKeys.WHISPER_ENDPOINT.value, SettingsKeys.WHISPER_SERVER_API_KEY.value, - "S2T Server Self-Signed Certificates", + "BlankSpace", # Represents the architecture dropdown that is manually placed + SettingsKeys.S2T_SELF_SIGNED_CERT.value, ] self.llm_settings = [ @@ -111,43 +143,58 @@ def __init__(self): ] self.adv_ai_settings = [ - "use_story", - "use_memory", - "use_authors_note", - "use_world_info", + ############################################################################################## + # Stuff that is commented is related to KobolodCPP API and not used in the current version # + # Maybe use it in the future? commented out for now, goes hand in hand with API style # + ############################################################################################## + + # "use_story", + # "use_memory", + # "use_authors_note", + # "use_world_info", "Use best_of", "best_of", - "max_context_length", - "max_length", - "rep_pen", - "rep_pen_range", - "rep_pen_slope", + # "max_context_length", + # "max_length", + # "rep_pen", + # "rep_pen_range", + # "rep_pen_slope", "temperature", "tfs", - "top_a", + # "top_a", "top_k", "top_p", - "typical", - "sampler_order", - "singleline", - "frmttriminc", - "frmtrmblln", + # "typical", + # "sampler_order", + # "singleline", + # "frmttriminc", + # "frmtrmblln", ] self.adv_whisper_settings = [ - "Real Time Audio Length", + # "Real Time Audio Length", + # "BlankSpace", # Represents the whisper cuttoff + SettingsKeys.WHISPER_BEAM_SIZE.value, + SettingsKeys.WHISPER_CPU_COUNT.value, + SettingsKeys.WHISPER_VAD_FILTER.value, + SettingsKeys.WHISPER_COMPUTE_TYPE.value, + SettingsKeys.SILERO_SPEECH_THRESHOLD.value, + SettingsKeys.USE_TRANSLATE_TASK.value, + SettingsKeys.WHISPER_LANGUAGE_CODE.value, + SettingsKeys.USE_PRESCREEN_AI_INPUT.value, ] self.adv_general_settings = [ - "Enable Scribe Template", + # "Enable Scribe Template", # Uncomment if you want to implement the feature right now removed as it doesn't have a real structured implementation + SettingsKeys.AUDIO_PROCESSING_TIMEOUT_LENGTH.value, ] self.editable_settings = { - "Model": "gpt-4", - "Model Endpoint": "https://api.openai.com/v1/", + "Model": "gemma2:2b-instruct-q8_0", + "Model Endpoint": "https://localhost:3334/v1", "Use Local LLM": True, - "Architecture": "CPU", + SettingsKeys.LLM_ARCHITECTURE.value: SettingsWindow.DEFAULT_LLM_ARCHITECTURE, "use_story": False, "use_memory": False, "use_authors_note": False, @@ -172,10 +219,15 @@ def __init__(self): SettingsKeys.LOCAL_WHISPER.value: True, SettingsKeys.WHISPER_ENDPOINT.value: "https://localhost:2224/whisperaudio", SettingsKeys.WHISPER_SERVER_API_KEY.value: "", - "Whisper Model": "small.en", + SettingsKeys.WHISPER_ARCHITECTURE.value: SettingsWindow.DEFAULT_WHISPER_ARCHITECTURE, + SettingsKeys.WHISPER_BEAM_SIZE.value: 5, + SettingsKeys.WHISPER_CPU_COUNT.value: multiprocessing.cpu_count(), + SettingsKeys.WHISPER_VAD_FILTER.value: False, + SettingsKeys.WHISPER_COMPUTE_TYPE.value: "float16", + "Whisper Model": "medium", "Current Mic": "None", "Real Time": True, - "Real Time Audio Length": 5, + "Real Time Audio Length": 10, "Real Time Silence Length": 1, "Silence cut-off": 0.035, "LLM Container Name": "ollama", @@ -185,16 +237,20 @@ def __init__(self): "Whisper Caddy Container Name": "caddy", "Auto Shutdown Containers on Exit": True, "Use Docker Status Bar": False, - "Preset": "Custom", "Show Welcome Message": True, "Enable Scribe Template": False, "Use Pre-Processing": True, "Use Post-Processing": False, # Disabled for now causes unexcepted behaviour "AI Server Self-Signed Certificates": False, - "S2T Server Self-Signed Certificates": False, + SettingsKeys.S2T_SELF_SIGNED_CERT.value: False, "Pre-Processing": "Please break down the conversation into a list of facts. Take the conversation and transform it to a easy to read list:\n\n", "Post-Processing": "\n\nUsing the provided list of facts, review the SOAP note for accuracy. Verify that all details align with the information provided in the list of facts and ensure consistency throughout. Update or adjust the SOAP note as necessary to reflect the listed facts without offering opinions or subjective commentary. Ensure that the revised note excludes a \"Notes\" section and does not include a header for the SOAP note. Provide the revised note after making any necessary corrections.", "Show Scrub PHI": False, + SettingsKeys.AUDIO_PROCESSING_TIMEOUT_LENGTH.value: 180, + SettingsKeys.SILERO_SPEECH_THRESHOLD.value: 0.5, + SettingsKeys.USE_TRANSLATE_TASK.value: False, + SettingsKeys.WHISPER_LANGUAGE_CODE.value: "None (Auto Detect)", + SettingsKeys.USE_PRESCREEN_AI_INPUT.value: True, } self.docker_settings = [ @@ -297,8 +353,9 @@ def save_settings_to_file(self): """ settings = { "openai_api_key": self.OPENAI_API_KEY, - "editable_settings": self.editable_settings + "editable_settings": self.editable_settings, # "api_style": self.API_STYLE # FUTURE FEATURE REVISION + "app_version": self.get_application_version() } with open(get_resource_path('settings.txt'), 'w') as file: json.dump(settings, file) @@ -458,45 +515,6 @@ def update_models_dropdown(self, dropdown, endpoint=None): else: dropdown.set(models[0]) - - def load_settings_preset(self, preset_name, settings_class): - """ - Load a settings preset from a file. - - This method loads a settings preset from a JSON file with the given name. - The settings are then applied to the application settings. - - Parameters: - preset_name (str): The name of the settings preset to load. - - Returns: - None - """ - self.editable_settings["Preset"] = preset_name - - if preset_name != "Custom": - # load the settigns from the json preset file - self.load_settings_from_file("presets/" + preset_name + ".json") - - self.editable_settings["Preset"] = preset_name - #close the settings window - settings_class.close_window() - - # save the settings to the file - self.save_settings_to_file() - - if preset_name != "Local AI": - messagebox.showinfo("Settings Preset", "Settings preset loaded successfully. Closing settings window. Please re-open and set respective API keys.") - - # Unload ai model if switching - # already has safety check in unload to check if model exist. - ModelManager.unload_model() - else: # if is local ai - # load the models here - ModelManager.start_model_threaded(self, self.main_window.root) - else: - messagebox.showinfo("Custom Settings", "To use custom settings then please fill in the values and save them.") - def set_main_window(self, window): """ Set the main window instance for the settings. @@ -528,8 +546,20 @@ def load_or_unload_model(self, old_model, new_model, old_use_local_llm, new_use_ ModelManager.start_model_threaded(self, self.main_window.root) def _create_settings_and_aiscribe_if_not_exist(self): + """ + Create the settings and AI Scribe files if they do not exist. + """ if not os.path.exists(get_resource_path('settings.txt')): - print("Settings file not found. Creating default settings file.") + architectures = self.get_available_architectures() + + # If CUDA is available, set it as the default architecture to save in settings + if Architectures.CUDA.label in architectures: + print("Settings file not found. Creating default settings file with CUDA architecture.") + self.editable_settings[SettingsKeys.WHISPER_ARCHITECTURE.value] = Architectures.CUDA.label + self.editable_settings[SettingsKeys.LLM_ARCHITECTURE.value] = Architectures.CUDA.label + else: + print("Settings file not found. Creating default settings file.") + self.save_settings_to_file() if not os.path.exists(get_resource_path('aiscribe.txt')): print("AIScribe file not found. Creating default AIScribe file.") @@ -551,10 +581,28 @@ def get_available_architectures(self): Returns: list: A list of available architectures for the user to choose from. """ - architectures = ["CPU"] # CPU is always available as fallback + architectures = [Architectures.CPU.label] # CPU is always available as fallback # Check for NVIDIA support if os.path.isfile(get_file_path(self.STATE_FILES_DIR, self.NVIDIA_INSTALL_FILE)): - architectures.append("CUDA (Nvidia GPU)") + architectures.append(Architectures.CUDA.label) return architectures + + def update_whisper_model(self): + # save the old whisper model to compare with the new model later + old_local_whisper = self.editable_settings[SettingsKeys.LOCAL_WHISPER.value] + old_whisper_architecture = self.editable_settings[SettingsKeys.WHISPER_ARCHITECTURE.value] + old_model = self.editable_settings["Whisper Model"] + old_cpu_count = self.editable_settings[SettingsKeys.WHISPER_CPU_COUNT.value] + old_compute_type = self.editable_settings[SettingsKeys.WHISPER_COMPUTE_TYPE.value] + + # loading the model after the window is closed to prevent the window from freezing + # if Local Whisper is selected, compare the old model with the new model and reload the model if it has changed + if self.editable_settings[SettingsKeys.LOCAL_WHISPER.value] and ( + old_local_whisper != self.editable_settings_entries[SettingsKeys.LOCAL_WHISPER.value].get() or + old_model != self.editable_settings_entries["Whisper Model"].get() or + old_whisper_architecture != self.editable_settings_entries[SettingsKeys.WHISPER_ARCHITECTURE.value].get() or + old_cpu_count != self.editable_settings_entries[SettingsKeys.WHISPER_CPU_COUNT.value].get() or + old_compute_type != self.editable_settings_entries[SettingsKeys.WHISPER_COMPUTE_TYPE.value].get()): + self.main_window.root.event_generate("<>") \ No newline at end of file diff --git a/src/FreeScribe.client/UI/SettingsWindowUI.py b/src/FreeScribe.client/UI/SettingsWindowUI.py index 0c06ea07..a99ceb3c 100644 --- a/src/FreeScribe.client/UI/SettingsWindowUI.py +++ b/src/FreeScribe.client/UI/SettingsWindowUI.py @@ -26,9 +26,10 @@ import threading from Model import Model, ModelManager from utils.file_utils import get_file_path +from utils.utils import get_application_version from UI.MarkdownWindow import MarkdownWindow from UI.Widgets.MicrophoneSelector import MicrophoneSelector -from UI.SettingsWindow import SettingsKeys, FeatureToggle +from UI.SettingsWindow import SettingsKeys, FeatureToggle, Architectures, SettingsWindow class SettingsWindowUI: @@ -81,12 +82,14 @@ def open_settings_window(self): """ self.settings_window = tk.Toplevel() self.settings_window.title("Settings") - self.settings_window.geometry("600x450") # Set initial window size - self.settings_window.minsize(600, 500) # Set minimum window size + self.settings_window.geometry("775x400") # Set initial window size + self.settings_window.minsize(775, 400) # Set minimum window size self.settings_window.resizable(True, True) self.settings_window.grab_set() self.settings_window.iconbitmap(get_file_path('assets','logo.ico')) + self._display_center_to_parent() + self.main_frame = tk.Frame(self.settings_window) self.main_frame.pack(expand=True, fill='both') @@ -100,8 +103,8 @@ def open_settings_window(self): self.docker_settings_frame = ttk.Frame(self.notebook) self.notebook.add(self.general_settings_frame, text="General Settings") - self.notebook.add(self.llm_settings_frame, text="AI Settings") self.notebook.add(self.whisper_settings_frame, text="Speech-to-Text Settings") + self.notebook.add(self.llm_settings_frame, text="AI Settings") self.notebook.add(self.advanced_frame, text="Advanced Settings") self.settings_window.protocol("WM_DELETE_WINDOW", self.close_window) @@ -123,6 +126,21 @@ def open_settings_window(self): self.create_buttons() + def _display_center_to_parent(self): + # Get parent window dimensions and position + parent_x = self.root.winfo_x() + parent_y = self.root.winfo_y() + parent_width = self.root.winfo_width() + parent_height = self.root.winfo_height() + + # Calculate the position for the settings window + settings_width = 775 + settings_height = 400 + center_x = parent_x + (parent_width - settings_width) // 2 + center_y = parent_y + (parent_height - settings_height) // 2 + + # Apply the calculated position to the settings window + self.settings_window.geometry(f"{settings_width}x{settings_height}+{center_x}+{center_y}") def add_scrollbar_to_frame(self, frame): """ @@ -181,7 +199,7 @@ def create_whisper_settings(self): # create the whisper model dropdown slection tk.Label(left_frame, text="Whisper Model").grid(row=3, column=0, padx=0, pady=5, sticky="w") whisper_models_drop_down_options = ["medium", "small", "tiny", "tiny.en", "base", "base.en", "small.en", "medium.en", "large"] - self.whisper_models_drop_down = ttk.Combobox(left_frame, values=whisper_models_drop_down_options, width=13) + self.whisper_models_drop_down = ttk.Combobox(left_frame, values=whisper_models_drop_down_options, width=20) self.whisper_models_drop_down.grid(row=3, column=1, padx=0, pady=5, sticky="w") try: @@ -194,9 +212,26 @@ def create_whisper_settings(self): self.settings.editable_settings_entries["Whisper Model"] = self.whisper_models_drop_down # create the whisper model dropdown slection - microphone_select = MicrophoneSelector(left_frame, left_row, 0, self.settings) - self.settings.editable_settings_entries["Current Mic"] = microphone_select + # microphone_select = MicrophoneSelector(left_frame, left_row, 0, self.settings) + # self.settings.editable_settings_entries["Current Mic"] = microphone_select + + right_row += 1 + + # Whisper Architecture Dropdown + self.whisper_architecture_label = tk.Label(left_frame, text=SettingsKeys.WHISPER_ARCHITECTURE.value) + self.whisper_architecture_label.grid(row=left_row, column=0, padx=0, pady=5, sticky="w") + whisper_architecture_options = self.settings.get_available_architectures() + self.whisper_architecture_dropdown = ttk.Combobox(left_frame, values=whisper_architecture_options, width=20, state="readonly") + if self.settings.editable_settings[SettingsKeys.WHISPER_ARCHITECTURE.value] in whisper_architecture_options: + self.whisper_architecture_dropdown.current(whisper_architecture_options.index(self.settings.editable_settings[SettingsKeys.WHISPER_ARCHITECTURE.value])) + else: + # Default cpu + self.whisper_architecture_dropdown.set(SettingsWindow.DEFAULT_WHISPER_ARCHITECTURE) + + self.whisper_architecture_dropdown.grid(row=left_row, column=1, padx=0, pady=5, sticky="w") + self.settings.editable_settings_entries[SettingsKeys.WHISPER_ARCHITECTURE.value] = self.whisper_architecture_dropdown + left_row += 1 # set the state of the whisper settings based on the SettingsKeys.LOCAL_WHISPER.value checkbox once all widgets are created @@ -215,7 +250,7 @@ def toggle_remote_whisper_settings(self): # set the local option to disabled on switch to remote inverted_state = "disabled" if current_state == 0 else "normal" self.whisper_models_drop_down.config(state=inverted_state) - + self.whisper_architecture_dropdown.config(state=inverted_state) def create_llm_settings(self): @@ -230,6 +265,9 @@ def create_llm_settings(self): right_frame = ttk.Frame(self.llm_settings_frame) right_frame.grid(row=0, column=1, padx=10, pady=5, sticky="nw") + self.llm_settings_frame.columnconfigure(0, weight=1) + self.llm_settings_frame.columnconfigure(1, weight=1) + left_row = 0 right_row = 0 @@ -243,23 +281,30 @@ def create_llm_settings(self): left_row += 1 #6. GPU OR CPU SELECTION (Right Column) - tk.Label(left_frame, text="Local Architecture").grid(row=left_row, column=0, padx=0, pady=5, sticky="w") + self.local_architecture_label = tk.Label(left_frame, text="Local Architecture") + self.local_architecture_label.grid(row=left_row, column=0, padx=0, pady=5, sticky="w") architecture_options = self.settings.get_available_architectures() - self.architecture_dropdown = ttk.Combobox(left_frame, values=architecture_options, width=15, state="readonly") - if self.settings.editable_settings["Architecture"] in architecture_options: - self.architecture_dropdown.current(architecture_options.index(self.settings.editable_settings["Architecture"])) + self.architecture_dropdown = ttk.Combobox(left_frame, values=architecture_options, width=20, state="readonly") + if self.settings.editable_settings[SettingsKeys.LLM_ARCHITECTURE.value] in architecture_options: + self.architecture_dropdown.current(architecture_options.index(self.settings.editable_settings[SettingsKeys.LLM_ARCHITECTURE.value])) else: # Default cpu - self.architecture_dropdown.set("CPU") + self.architecture_dropdown.set(Architectures.CPU.label) self.architecture_dropdown.grid(row=left_row, column=1, padx=0, pady=5, sticky="w") + # hide architecture dropdown if architecture only has one option + if len(architecture_options) == 1: + self.local_architecture_label.grid_forget() + self.architecture_dropdown.grid_forget() + + left_row += 1 # 5. Models (Left Column) tk.Label(left_frame, text="Models").grid(row=left_row, column=0, padx=0, pady=5, sticky="w") models_drop_down_options = [] - self.models_drop_down = ttk.Combobox(left_frame, values=models_drop_down_options, width=15, state="readonly") + self.models_drop_down = ttk.Combobox(left_frame, values=models_drop_down_options, width=20, state="readonly") self.models_drop_down.grid(row=left_row, column=1, padx=0, pady=5, sticky="w") self.models_drop_down.bind('<>', self.on_model_selection_change) thread = threading.Thread(target=self.settings.update_models_dropdown, args=(self.models_drop_down,)) @@ -441,8 +486,9 @@ def create_processing_section(label_text, setting_key, text_content, row): row = self._create_section_header("⚠️ Advanced Settings (For Advanced Users Only)", 0, text_colour="red") # General Settings - row = self._create_section_header("General Settings", row, text_colour="black") - row = create_settings_columns(self.settings.adv_general_settings, row) + if len(self.settings.adv_general_settings) > 0: + row = self._create_section_header("General Settings", row, text_colour="black") + row = create_settings_columns(self.settings.adv_general_settings, row) # Whisper Settings row = self._create_section_header("Whisper Settings", row, text_colour="black") @@ -453,11 +499,11 @@ def create_processing_section(label_text, setting_key, text_content, row): self.create_editable_settings_col(left_frame, right_frame, 0, 0, self.settings.adv_whisper_settings) - # Audio meter - tk.Label(left_frame, text="Whisper Audio Cutoff").grid(row=1, column=0, padx=0, pady=0, sticky="w") - self.cutoff_slider = AudioMeter(left_frame, width=150, height=50, - threshold=self.settings.editable_settings["Silence cut-off"] * 32768) - self.cutoff_slider.grid(row=1, column=1, padx=0, pady=0, sticky="w") + # # Audio meter + # tk.Label(left_frame, text="Whisper Audio Cutoff").grid(row=1, column=0, padx=0, pady=0, sticky="w") + # self.cutoff_slider = AudioMeter(left_frame, width=150, height=50, + # threshold=self.settings.editable_settings["Silence cut-off"] * 32768) + # self.cutoff_slider.grid(row=1, column=1, padx=0, pady=0, sticky="w") row += 1 # AI Settings @@ -515,10 +561,24 @@ def create_buttons(self): This method creates and places buttons for saving settings, resetting to default, and closing the settings window. """ - tk.Button(self.main_frame, text="Save", command=self.save_settings, width=10).pack(side="right", padx=2, pady=5) - tk.Button(self.main_frame, text="Default", width=10, command=self.reset_to_default).pack(side="right", padx=2, pady=5) - tk.Button(self.main_frame, text="Close", width=10, command=self.close_window).pack(side="right", padx=2, pady=5) - tk.Button(self.main_frame, text="Help", width=10, command=self.create_help_window).pack(side="left", padx=2, pady=5) + footer_frame = tk.Frame(self.main_frame,bg="lightgray", height=30) + footer_frame.pack(side="bottom", fill="x") + + # Place the "Help" button on the left + tk.Button(footer_frame, text="Help", width=10, command=self.create_help_window).pack(side="left", padx=2, pady=5) + + # Place the label in the center + version = get_application_version() + tk.Label(footer_frame, text=f"FreeScribe Client {version}",bg="lightgray",fg="black").pack(side="left", expand=True, padx=2, pady=5) + + # Create a frame for the right-side elements + right_frame = tk.Frame(footer_frame) + right_frame.pack(side="right") + + # Pack all other buttons into the right frame + tk.Button(right_frame, text="Close", width=10, command=self.close_window).pack(side="right", padx=2, pady=5) + tk.Button(right_frame, text="Default", width=10, command=self.reset_to_default).pack(side="right", padx=2, pady=5) + tk.Button(right_frame, text="Save", width=10, command=self.save_settings).pack(side="right", padx=2, pady=5) def create_help_window(self): """ @@ -539,22 +599,19 @@ def save_settings(self, close_window=True): self.get_selected_model(), self.settings.editable_settings["Use Local LLM"], self.settings.editable_settings_entries["Use Local LLM"].get(), - self.settings.editable_settings["Architecture"], + self.settings.editable_settings[SettingsKeys.LLM_ARCHITECTURE.value], self.architecture_dropdown.get()) if self.get_selected_model() not in ["Loading models...", "Failed to load models"]: self.settings.editable_settings["Model"] = self.get_selected_model() + self.settings.update_whisper_model() self.settings.editable_settings["Pre-Processing"] = self.preprocess_text.get("1.0", "end-1c") # end-1c removes the trailing newline self.settings.editable_settings["Post-Processing"] = self.postprocess_text.get("1.0", "end-1c") # end-1c removes the trailing newline # save architecture - self.settings.editable_settings["Architecture"] = self.architecture_dropdown.get() - - # save the old whisper model to compare with the new model later - old_local_whisper = self.settings.editable_settings[SettingsKeys.LOCAL_WHISPER.value] - old_model = self.settings.editable_settings["Whisper Model"] + self.settings.editable_settings[SettingsKeys.LLM_ARCHITECTURE.value] = self.architecture_dropdown.get() self.settings.save_settings( self.openai_api_key_entry.get(), @@ -562,7 +619,8 @@ def save_settings(self, close_window=True): self.aiscribe2_text.get("1.0", "end-1c"), # end-1c removes the trailing newline self.settings_window, # self.api_dropdown.get(), - self.cutoff_slider.threshold / 32768, + self.settings.editable_settings["Silence cut-off"], # Save the old one for whisper audio cutoff, will be removed in future, left in incase we go back to old cut off + # self.cutoff_slider.threshold / 32768, # old threshold ) if self.settings.editable_settings["Use Docker Status Bar"] and self.main_window.docker_status_bar is None: @@ -578,12 +636,6 @@ def save_settings(self, close_window=True): if close_window: self.close_window() - # loading the model after the window is closed to prevent the window from freezing - # if Local Whisper is selected, compare the old model with the new model and reload the model if it has changed - if self.settings.editable_settings[SettingsKeys.LOCAL_WHISPER.value] and ( - old_local_whisper != self.settings.editable_settings[SettingsKeys.LOCAL_WHISPER.value] or old_model != - self.settings.editable_settings["Whisper Model"]): - self.root.event_generate("<>") def reset_to_default(self): """ @@ -602,28 +654,31 @@ def _create_general_settings(self): """ frame, row = self.create_editable_settings(self.general_settings_frame, self.settings.general_settings) - # 1. LLM Preset (Left Column) - tk.Label(frame, text="Settings Presets:").grid(row=row, column=0, padx=0, pady=5, sticky="w") - llm_preset_options = ["Local AI", "ClinicianFocus Toolbox", "Custom"] - self.llm_preset_dropdown = ttk.Combobox(frame, values=llm_preset_options, width=15, state="readonly") - if self.settings.editable_settings["Preset"] in llm_preset_options: - self.llm_preset_dropdown.current(llm_preset_options.index(self.settings.editable_settings["Preset"])) - else: - self.llm_preset_dropdown.set("Custom") - self.llm_preset_dropdown.grid(row=row, column=1, padx=0, pady=5, sticky="w") - - load_preset_btn = ttk.Button(frame, text="Load", width=5, - command=lambda: self.settings.load_settings_preset(self.llm_preset_dropdown.get(), self)) - load_preset_btn.grid(row=row, column=2, padx=0, pady=5, sticky="w") - # Add a note at the bottom of the general settings frame note_text = ( - "Note: 'Show Scrub PHI' will only work for local LLM and private network.\n" - "For internet-facing endpoint, it will be enabled regardless of the 'Show Scrub PHI' value." + "NOTE: To protect personal health information (PHI), we recommend using a local network.\n" + "The 'Show Scrub PHI' feature is only applicable for local LLMs and private networks.\n" + "For internet-facing endpoints, this feature will always be enabled, regardless of the 'Show Scrub PHI' setting." + ) + + # Create a frame to hold the note labels + note_frame = tk.Frame(self.general_settings_frame) + note_frame.grid(padx=10, pady=5, sticky="w") + + # Add the red * label + star_label = tk.Label(note_frame, text="*", fg="red", font=("Arial", 10, "bold")) + star_label.grid(row=0, column=0, sticky="w") + + # Add the rest of the text in black (bold and underlined) + note_label = tk.Label( + note_frame, + text=note_text, + fg="black", # Set text color to black + font=("Arial", 8, "bold underline"), # Set font to bold and underlined + wraplength=400, + justify="left" ) - note_label = tk.Label(self.general_settings_frame, text=note_text, fg="red", wraplength=400, justify="left") - note_label.grid(padx=10, pady=5, sticky="w") - + note_label.grid(row=0, column=1, sticky="w") def _create_checkbox(self, frame, label, setting_name, row_idx, setting_key=None): """ @@ -658,7 +713,7 @@ def _create_entry(self, frame, label, setting_name, row_idx): """ tk.Label(frame, text=label).grid(row=row_idx, column=0, padx=0, pady=5, sticky="w") value = self.settings.editable_settings[setting_name] - entry = tk.Entry(frame) + entry = tk.Entry(frame, width=25) entry.insert(0, str(value)) entry.grid(row=row_idx, column=1, padx=0, pady=5, sticky="w") self.settings.editable_settings_entries[setting_name] = entry @@ -714,6 +769,9 @@ def close_window(self): """ self.settings_window.unbind_all("") # Unbind mouse wheel event causing errors self.settings_window.unbind_all("") # Unbind the configure event causing errors - self.cutoff_slider.destroy() + + if hasattr(self, "cutoff_slider"): + if self.cutoff_slider is not None: + self.cutoff_slider.destroy() self.settings_window.destroy() diff --git a/src/FreeScribe.client/UI/Widgets/AudioMeter.py b/src/FreeScribe.client/UI/Widgets/AudioMeter.py index f80f9062..af82b1d8 100644 --- a/src/FreeScribe.client/UI/Widgets/AudioMeter.py +++ b/src/FreeScribe.client/UI/Widgets/AudioMeter.py @@ -57,6 +57,7 @@ def __init__(self, master=None, width=400, height=100, threshold=750): self.running = False self.threshold = threshold self.destroyed = False # Add flag to track widget destruction + self.error_message_box = None # Add error message box attribute self.setup_audio() self.create_widgets() @@ -90,6 +91,10 @@ def cleanup(self, event=None): if hasattr(self, 'monitoring_thread') and self.monitoring_thread: self.monitoring_thread.join(timeout=1.0) + # Cancel error message if scheduled + if self.error_message_box is not None: + self.error_message_box.destroy() + def destroy(self): """ Override the destroy method to ensure cleanup. @@ -206,7 +211,12 @@ def toggle_monitoring(self): frames_per_buffer=self.CHUNK, ) except (OSError, IOError) as e: - tk.messagebox.showerror("Error", f"Please check your microphone settings under the speech2text settings tab. Error opening audio stream: {e}") + # show error message in thread-safe way + error_message = f"Please check your microphone settings under the speech2text settings tab. Error opening audio stream: {e}" + # create a new Tk instance to show the error message + self.error_message_box = tk.Tk() + self.error_message_box.withdraw() + self.master.after(0, lambda: tk.messagebox.showerror("Error", error_message, master=self.error_message_box)) self.monitoring_thread = Thread(target=self.update_meter) self.monitoring_thread.start() diff --git a/src/FreeScribe.client/UI/Widgets/MicrophoneSelector.py b/src/FreeScribe.client/UI/Widgets/MicrophoneSelector.py index 32d0a2af..3be9ca1e 100644 --- a/src/FreeScribe.client/UI/Widgets/MicrophoneSelector.py +++ b/src/FreeScribe.client/UI/Widgets/MicrophoneSelector.py @@ -75,7 +75,7 @@ def __init__(self, root, row, column, app_settings): self.label = tk.Label(root, text="Select a Microphone:") self.label.grid(row=row, column=0, pady=5, sticky="w") - self.dropdown = ttk.Combobox(root, state="readonly", width=15) + self.dropdown = ttk.Combobox(root, state="readonly", width=20) self.dropdown.grid(row=row, pady=5, column=1) # Populate microphones in the dropdown diff --git a/src/FreeScribe.client/UI/Widgets/MicrophoneTestFrame.py b/src/FreeScribe.client/UI/Widgets/MicrophoneTestFrame.py new file mode 100644 index 00000000..e8294885 --- /dev/null +++ b/src/FreeScribe.client/UI/Widgets/MicrophoneTestFrame.py @@ -0,0 +1,317 @@ +import tkinter as tk +from tkinter import ttk +import pyaudio +import numpy as np +from PIL import Image, ImageTk +from utils.file_utils import get_file_path +from UI.SettingsWindowUI import SettingsWindowUI + +class MicrophoneState: + SELECTED_MICROPHONE_INDEX = None + SELECTED_MICROPHONE_NAME = None + +class MicrophoneTestFrame: + def __init__(self, parent, p, app_settings, root): + """ + Initialize the MicrophoneTestFrame. + + Parameters + ---------- + parent : tk.Widget + The parent widget where the frame will be placed. + p : pyaudio.PyAudio + The PyAudio instance for audio operations. + app_settings : dict + Application settings including editable settings. + """ + self.root = root + self.parent = parent + self.p = p + self.app_settings = app_settings + self.stream = None # Persistent audio stream + self.is_stream_active = False # Track if the stream is active + + self.setting_window = SettingsWindowUI(self.app_settings, self, self.root) # Settings window + + # Create a frame for the microphone test + self.frame = ttk.Frame(self.parent) + self.frame.grid(row=1, column=0, sticky='nsew') + + # Initialize microphone list and settings + self.initialize_microphones() + + # Create mic test UI + self.create_mic_test_ui() + + # Start volume meter updates + self.update_volume_meter() + + # Initialize the selected microphone + self.initialize_selected_microphone() + + def initialize_microphones(self): + """ + Initialize the list of available microphones. + """ + self.mic_list = [] + self.mic_mapping = {} # Maps microphone names to their indices + + try: + default_input_info = self.p.get_default_input_device_info() + self.default_input_index = default_input_info['index'] + except IOError as e: + print(f"Failed to initialize microphone ({type(e).__name__}): {e}") + self.default_input_index = None + + device_count = self.p.get_device_count() + for i in range(device_count): + device_info = self.p.get_device_info_by_index(i) + if device_info['maxInputChannels'] > 0: + device_name = device_info['name'] + excluded_names = ["Virtual", "Output", "Wave Out", "What U Hear", "Aux", "Port", "Mix"] + if not any(excluded_name.lower() in device_name.lower() for excluded_name in excluded_names) and device_name not in [name for _, name in self.mic_list]: + self.mic_list.append((i, device_name)) + self.mic_mapping[device_name] = i + # Load the selected microphone from settings if available + if self.app_settings and "Current Mic" in self.app_settings.editable_settings: + selected_name = self.app_settings.editable_settings["Current Mic"] + if selected_name in self.mic_mapping: + MicrophoneState.SELECTED_MICROPHONE_NAME = selected_name + MicrophoneState.SELECTED_MICROPHONE_INDEX = self.mic_mapping[selected_name] + + def create_mic_test_ui(self): + """ + Create the UI elements for microphone testing. + """ + # Frame for dropdown + dropdown_frame = ttk.Frame(self.frame) + dropdown_frame.grid(row=0, column=0, sticky='nsew', pady=(0, 5)) + + # Create a container frame for center alignment + center_frame = ttk.Frame(dropdown_frame) + center_frame.grid(row=0, column=0, sticky='nsew') + + # Configure the center frame to center-align the dropdown + center_frame.grid_rowconfigure(0, weight=1) + center_frame.grid_columnconfigure(0, weight=1) + + # Create styles for all elements + style = ttk.Style() + style.configure('Mic.TCombobox', padding=(5, 5, 5, 5)) + style.configure('Green.TFrame', background='#2ecc71') + style.configure('Yellow.TFrame', background='#f1c40f') + style.configure('Red.TFrame', background='#e74c3c') + style.configure('Inactive.TFrame', background='#95a5a6') + + # Dropdown for microphone selection + mic_options = [f"{name}" for _, name in self.mic_list] + self.mic_dropdown = ttk.Combobox( + center_frame, + values=mic_options, + state='readonly', + width=40, + style='Mic.TCombobox' + ) + self.mic_dropdown.grid(row=0, column=0, pady=(0, 5), padx=(10, 0), sticky='nsew') + # Set the default selection + if MicrophoneState.SELECTED_MICROPHONE_NAME: + self.mic_dropdown.set(MicrophoneState.SELECTED_MICROPHONE_NAME) + elif self.mic_list: + self.mic_dropdown.current(0) + + # Bind selection change to save immediately + self.mic_dropdown.bind('<>', self.on_mic_change) + + # Volume meter container + meter_frame = ttk.Frame(self.frame) + meter_frame.grid(row=1, column=0, sticky='nsew', pady=(0, 0)) + + # Try to load mic icon + try: + mic_icon = Image.open(get_file_path('assets', 'mic_icon.png')) + mic_icon = mic_icon.resize((24, 24)) + self.mic_photo = ImageTk.PhotoImage(mic_icon) + mic_icon_label = ttk.Label(meter_frame, image=self.mic_photo) + mic_icon_label.grid(row=0, column=0, padx=(5, 0), sticky='nsew') + except Exception as e: + print(f"Error loading microphone icon: {e}") + + # Create volume meter segments + self.segments_frame = ttk.Frame(meter_frame) + self.segments_frame.grid(row=0, column=1, sticky='nsew', pady=(4, 0)) + + # Create segments + self.SEGMENT_COUNT = 20 + self.segments = [] + for i in range(self.SEGMENT_COUNT): + segment = ttk.Frame(self.segments_frame, width=10, height=20) + segment.grid(row=0, column=i, padx=1) + segment.grid_propagate(False) + self.segments.append(segment) + + # Status label for feedback + self.status_label = ttk.Label(self.frame, text="Microphone: Ready", foreground="green") + self.status_label.grid(row=2, column=0, pady=(0, 0), padx=(10, 0), sticky='nsew') + + def initialize_selected_microphone(self): + """ + Initialize the selected microphone and open the audio stream. + """ + if MicrophoneState.SELECTED_MICROPHONE_INDEX is not None: + self.update_selected_microphone(MicrophoneState.SELECTED_MICROPHONE_INDEX) + elif self.mic_list: + self.update_selected_microphone(self.mic_list[0][0]) + + def on_mic_change(self, event): + """ + Handle the event when a microphone is selected from the dropdown. + """ + selected_name = self.mic_dropdown.get() + if selected_name in self.mic_mapping: + selected_index = self.mic_mapping[selected_name] + self.update_selected_microphone(selected_index) + # save the settings to the file + self.setting_window.settings.save_settings_to_file() + # Reopen the stream with the new device + self.reopen_stream() + + def update_selected_microphone(self, selected_index): + """ + Update the selected microphone index and name. + + Parameters + ---------- + selected_index : int + The index of the selected microphone. + """ + if selected_index >= 0: + try: + selected_mic = self.p.get_device_info_by_index(selected_index) + MicrophoneState.SELECTED_MICROPHONE_INDEX = selected_mic["index"] + MicrophoneState.SELECTED_MICROPHONE_NAME = selected_mic["name"] + self.status_label.config(text="Microphone: Connected", foreground="green") + self.app_settings.editable_settings["Current Mic"] = selected_mic["name"] + + # Close existing stream if any + if self.stream: + if self.stream.is_active(): + self.stream.stop_stream() + self.stream.close() + self.stream = None + self.is_stream_active = False + # Open new stream with the selected microphone + self.stream = self.p.open( + format=pyaudio.paInt16, # Specifies the format of the audio data. paInt16 means 16-bit int PCM. + channels=1, # Specifies the number of channels. 1 for mono, 2 for stereo. + rate=16000, # Specifies the sampling rate in Hz. 16000 Hz is a common rate for speech. + input=True, # Indicates that this stream will be used for input (recording). + frames_per_buffer=1024, # Specifies the number of samples (per channel) to read in each buffer. + input_device_index=selected_index # Specifies the index of the input device to use. + ) + self.is_stream_active = True + except Exception as e: + self.status_label.config(text="Error: Microphone not found", foreground="red") + print(f"Failed to open microphone ({type(e).__name__}): {e}") + else: + MicrophoneState.SELECTED_MICROPHONE_INDEX = None + MicrophoneState.SELECTED_MICROPHONE_NAME = None + self.status_label.config(text="Error: No microphone selected", foreground="red") + + def reopen_stream(self): + """ + Reopen the audio stream with the currently selected microphone. + """ + # Stop and close the existing stream if it is open + if self.stream: + try: + if self.stream.is_active(): + self.stream.stop_stream() + self.stream.close() + except Exception as e: + print(f"Error closing stream: {e}") + finally: + self.stream = None + self.is_stream_active = False + + # Open a new stream with the selected microphone + if MicrophoneState.SELECTED_MICROPHONE_INDEX is not None: + try: + self.stream = self.p.open( + format=pyaudio.paInt16, + channels=1, + rate=16000, + input=True, + frames_per_buffer=1024, + input_device_index=MicrophoneState.SELECTED_MICROPHONE_INDEX + ) + self.is_stream_active = True + self.status_label.config(text="Microphone: Connected", foreground="green") + except Exception as e: + self.status_label.config(text="Error: Microphone not found", foreground="red") + print(f"Failed to open microphone ({type(e).__name__}): {e}") + + def update_volume_meter(self): + """ + Update the volume meter based on the current microphone input. + """ + if not self.is_stream_active: + self.frame.after(50, self.update_volume_meter) + return + + try: + data = self.stream.read(1024, exception_on_overflow=False) + audio_data = np.frombuffer(data, dtype=np.int16) + rms = np.sqrt(np.mean(np.square(audio_data.astype(np.float64)))) + + if np.isnan(rms) or rms <= 0: + volume = 0 + else: + scaling_factor = 500 + volume = min(max(int((rms / 32768) * scaling_factor), 0), 100) + + # Update segments + active_segments = int((volume / 100) * self.SEGMENT_COUNT) + for i, segment in enumerate(self.segments): + if i < active_segments: + if i < self.SEGMENT_COUNT * 0.6: + segment.configure(style='Green.TFrame') + elif i < self.SEGMENT_COUNT * 0.8: + segment.configure(style='Yellow.TFrame') + else: + segment.configure(style='Red.TFrame') + else: + segment.configure(style='Inactive.TFrame') + + except OSError as e: + # Handle both Stream closed and Unanticipated host error + if e.errno in [-9988, -9999]: + self.status_label.config(text="Error: Microphone disconnected", foreground="red") + else: + # Handle any other stream errors + self.status_label.config(text="Error: Unknown Error. Check debug log for more.", foreground="red") + + print(f"Error in update_volume_meter({type(e).__name__}): {e}") + self.is_stream_active = False + self.stream = None + for segment in self.segments: + segment.configure(style='Inactive.TFrame') + + self.frame.after(50, self.update_volume_meter) + + def get_selected_microphone_index(): + """ + Get the selected microphone index. + """ + return MicrophoneState.SELECTED_MICROPHONE_INDEX + + def __del__(self): + """ + Clean up resources when the object is destroyed. + """ + if self.stream: + try: + if self.stream.is_active(): + self.stream.stop_stream() + self.stream.close() + except Exception as e: + print(f"Error closing stream in destructor: {e}") \ No newline at end of file diff --git a/src/FreeScribe.client/UI/Widgets/PopupBox.py b/src/FreeScribe.client/UI/Widgets/PopupBox.py new file mode 100644 index 00000000..f3a025e0 --- /dev/null +++ b/src/FreeScribe.client/UI/Widgets/PopupBox.py @@ -0,0 +1,99 @@ +import tkinter as tk +from tkinter import Toplevel + +class PopupBox: + """ + A class to create a popup dialog box with a customizable message and two buttons. + + :param parent: The parent window for the popup dialog. + :param title: The title of the popup window (default: "Message"). + :param message: The message displayed in the popup (default: "Message text"). + :param button_text_1: The text for the first button (default: "OK"). + :param button_text_2: The text for the second button (default: "Cancel"). + :param button_1_callback: Callback function for the first button (default: None). + :param button_2_callback: Callback function for the second button (default: None). + """ + + def __init__(self, + parent, + title="Message", + message="Message text", + button_text_1="OK", + button_text_2="Cancel", + button_1_callback=None, + button_2_callback=None): + """ + Initialize the PopupBox instance and create the dialog window. + + :param parent: The parent widget for the dialog. + :param title: The title of the dialog window. + :param message: The message to be displayed in the dialog. + :param button_text_1: The text label for the first button. + :param button_text_2: The text label for the second button. + :param button_1_callback: Optional callback function for the first button. + :param button_2_callback: Optional callback function for the second button. + """ + self.response = None # Stores the response indicating which button was clicked + # Create a top-level window for the popup + self.dialog = Toplevel(parent) + # Make the exit button behave like the second button + self.dialog.protocol("WM_DELETE_WINDOW", self.on_button_1) + # Set the window title + self.dialog.title(title) + # Set the size of the window + window_width = 300 + window_height = 150 + self.dialog.geometry(f"{window_width}x{window_height}") + # Disable window resizing + self.dialog.resizable(False, False) + + # Center the dialog relative to the parent window + parent_x = parent.winfo_rootx() + parent_y = parent.winfo_rooty() + parent_width = parent.winfo_width() + parent_height = parent.winfo_height() + + center_x = parent_x + (parent_width // 2) - (window_width // 2) + center_y = parent_y + (parent_height // 2) - (window_height // 2) + + self.dialog.geometry(f"+{center_x}+{center_y}") + + # Create and pack the message label + label = tk.Label(self.dialog, text=message, wraplength=250) + label.pack(pady=20) + + # Create and pack a frame to hold the buttons + button_frame = tk.Frame(self.dialog) + button_frame.pack(pady=10) + + # Create and pack the first button + button_1 = tk.Button(button_frame, text=button_text_1, command=self.on_button_1) + button_1.pack(side=tk.LEFT, padx=10) + + # Create and pack the second button + button_2 = tk.Button(button_frame, text=button_text_2, command=self.on_button_2) + button_2.pack(side=tk.RIGHT, padx=10) + + # Configure the dialog as a modal window + # Make the dialog appear on top of the parent window + self.dialog.transient(parent) + # Prevent interaction with other windows until this dialog is closed + self.dialog.grab_set() + # Wait until the dialog window is closed + parent.wait_window(self.dialog) + + def on_button_1(self): + """ + Handle the event when the first button is clicked. + Sets the response to 'button_1' and closes the dialog. + """ + self.response = "button_1" + self.dialog.destroy() + + def on_button_2(self): + """ + Handle the event when the second button is clicked. + Sets the response to 'button_2' and closes the dialog. + """ + self.response = "button_2" + self.dialog.destroy() diff --git a/src/FreeScribe.client/WhisperModel.py b/src/FreeScribe.client/WhisperModel.py new file mode 100644 index 00000000..c6813af1 --- /dev/null +++ b/src/FreeScribe.client/WhisperModel.py @@ -0,0 +1,2 @@ +class TranscribeError(Exception): + pass \ No newline at end of file diff --git a/src/FreeScribe.client/assets/mic_icon.png b/src/FreeScribe.client/assets/mic_icon.png new file mode 100644 index 00000000..776c9ff9 Binary files /dev/null and b/src/FreeScribe.client/assets/mic_icon.png differ diff --git a/src/FreeScribe.client/client.py b/src/FreeScribe.client/client.py index 54a9064f..f374691a 100644 --- a/src/FreeScribe.client/client.py +++ b/src/FreeScribe.client/client.py @@ -11,48 +11,99 @@ """ +import ctypes +import io +import logging +import sys +import gc import os -import tkinter as tk -from tkinter import scrolledtext, ttk, filedialog -import requests -import pyperclip +from pathlib import Path import wave import threading -import numpy as np import base64 import json -import pyaudio -import tkinter.messagebox as messagebox import datetime -import whisper # python package is named openai-whisper -import scrubadub import re -import speech_recognition as sr # python package is named speechrecognition import time import queue import atexit +import traceback +import torch +import pyaudio +import requests +import pyperclip +import speech_recognition as sr # python package is named speechrecognition +import scrubadub +import numpy as np +import tkinter as tk +from tkinter import scrolledtext, ttk, filedialog +import tkinter.messagebox as messagebox +from faster_whisper import WhisperModel from UI.MainWindowUI import MainWindowUI -from UI.SettingsWindow import SettingsWindow, SettingsKeys +from UI.SettingsWindow import SettingsWindow, SettingsKeys, Architectures from UI.Widgets.CustomTextBox import CustomTextBox from UI.LoadingWindow import LoadingWindow -from UI.Widgets.MicrophoneSelector import MicrophoneState from Model import ModelManager from utils.ip_utils import is_private_ip from utils.file_utils import get_file_path, get_resource_path -import ctypes -import sys +from utils.OneInstance import OneInstance +from utils.utils import get_application_version from UI.DebugWindow import DualOutput -import traceback +from UI.Widgets.MicrophoneTestFrame import MicrophoneTestFrame +from utils.utils import window_has_running_instance, bring_to_front, close_mutex +from WhisperModel import TranscribeError +from UI.Widgets.PopupBox import PopupBox + +if os.environ.get("FREESCRIBE_DEBUG"): + LOG_LEVEL = logging.DEBUG +else: + LOG_LEVEL = logging.INFO + +logging.basicConfig( + level=LOG_LEVEL, + format='%(asctime)s - %(threadName)s - %(name)s - %(levelname)s - %(message)s' +) dual = DualOutput() sys.stdout = dual sys.stderr = dual +APP_NAME = 'AI Medical Scribe' # Application name +APP_TASK_MANAGER_NAME = 'freescribe-client.exe' + +# check if another instance of the application is already running. +# if false, create a new instance of the application +# if true, exit the current instance +app_manager = OneInstance(APP_NAME, APP_TASK_MANAGER_NAME) + +if app_manager.run(): + sys.exit(1) +else: + root = tk.Tk() + root.title(APP_NAME) + +def delete_temp_file(filename): + """ + Deletes a temporary file if it exists. + + Args: + filename (str): The name of the file to delete. + """ + file_path = get_resource_path(filename) + if os.path.exists(file_path): + try: + print(f"Deleting temporary file: {filename}") + os.remove(file_path) + except OSError as e: + print(f"Error deleting temporary file {filename}: {e}") +def on_closing(): + delete_temp_file('recording.wav') + delete_temp_file('realtime.wav') + close_mutex() -# GUI Setup -root = tk.Tk() -root.title("AI Medical Scribe") +# Register the close_mutex function to be called on exit +atexit.register(on_closing) # settings logic app_settings = SettingsWindow() @@ -84,7 +135,7 @@ is_gpt_button_active = False p = pyaudio.PyAudio() audio_queue = queue.Queue() -CHUNK = 1024 +CHUNK = 512 FORMAT = pyaudio.paInt16 CHANNELS = 1 RATE = 16000 @@ -103,6 +154,8 @@ # Global instance of whisper model stt_local_model = None +stt_model_loading_thread_lock = threading.Lock() + def get_prompt(formatted_message): @@ -133,9 +186,69 @@ def get_prompt(formatted_message): } def threaded_toggle_recording(): + logging.debug(f"*** Toggle Recording - Recording status: {is_recording}, STT local model: {stt_local_model}") + task_done_var = tk.BooleanVar(value=False) + task_cancel_var = tk.BooleanVar(value=False) + stt_thread = threading.Thread(target=double_check_stt_model_loading, args=(task_done_var, task_cancel_var)) + stt_thread.start() + root.wait_variable(task_done_var) + if task_cancel_var.get(): + logging.debug(f"double checking canceled") + return + thread = threading.Thread(target=toggle_recording) thread.start() + +def double_check_stt_model_loading(task_done_var, task_cancel_var): + stt_loading_window = None + try: + if is_recording: + return + if not app_settings.editable_settings[SettingsKeys.LOCAL_WHISPER.value]: + return + if stt_local_model: + return + # if using local whisper and model is not loaded, when starting recording + if stt_model_loading_thread_lock.locked(): + model_name = app_settings.editable_settings["Whisper Model"].strip() + stt_loading_window = LoadingWindow(root, "Loading Voice to Text model", + f"Loading {model_name} model. Please wait.", + on_cancel=lambda: task_cancel_var.set(True)) + timeout = 300 + time_start = time.monotonic() + # wait until the other loading thread is done + while True: + time.sleep(0.1) + if task_cancel_var.get(): + # user cancel + logging.debug(f"user canceled after {time.monotonic() - time_start} seconds") + return + if time.monotonic() - time_start > timeout: + messagebox.showerror("Error", + f"Timed out while loading local Voice to Text model after {timeout} seconds.") + task_cancel_var.set(True) + return + if not stt_model_loading_thread_lock.locked(): + break + stt_loading_window.destroy() + stt_loading_window = None + # double check + if stt_local_model is None: + # mandatory loading, synchronous + t = load_stt_model() + t.join() + + except Exception as e: + logging.exception(str(e)) + messagebox.showerror("Error", + f"An error occurred while loading Voice to Text model synchronously {type(e).__name__}: {e}") + finally: + if stt_loading_window: + stt_loading_window.destroy() + task_done_var.set(True) + + def threaded_realtime_text(): thread = threading.Thread(target=realtime_text) thread.start() @@ -167,68 +280,110 @@ def toggle_pause(): elif current_view == "minimal": pause_button.config(text="⏸️", bg=DEFAULT_BUTTON_COLOUR) +SILENCE_WARNING_LENGTH = 10 # seconds, warn the user after 10s of no input something might be wrong def record_audio(): global is_paused, frames, audio_queue try: + selected_index = MicrophoneTestFrame.get_selected_microphone_index() stream = p.open( format=FORMAT, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK, - input_device_index=int(MicrophoneState.SELECTED_MICROPHONE_INDEX)) + input_device_index=int(selected_index)) except (OSError, IOError) as e: - messagebox.showerror("Audio Error", f"Please check your microphone settings under whisper settings. Error opening audio stream: {e}") + messagebox.showerror("Audio Error", f"Please check your microphone settings. Error opening audio stream: {e}") return - - current_chunk = [] - silent_duration = 0 - record_duration = 0 - minimum_silent_duration = int(app_settings.editable_settings["Real Time Silence Length"]) - minimum_audio_duration = int(app_settings.editable_settings["Real Time Audio Length"]) - - while is_recording: - if not is_paused: - data = stream.read(CHUNK, exception_on_overflow=False) - frames.append(data) - # Check for silence - audio_buffer = np.frombuffer(data, dtype=np.int16).astype(np.float32) / 32768 - if is_silent(audio_buffer, app_settings.editable_settings["Silence cut-off"]): - silent_duration += CHUNK / RATE - else: - current_chunk.append(data) - silent_duration = 0 - - record_duration += CHUNK / RATE - - # If the current_chunk has at least 5 seconds of audio and 1 second of silence at the end - if record_duration >= minimum_audio_duration and silent_duration >= minimum_silent_duration: - if app_settings.editable_settings["Real Time"] and current_chunk: - audio_queue.put(b''.join(current_chunk)) - current_chunk = [] - silent_duration = 0 - record_duration = 0 - - # Send any remaining audio chunk when recording stops - if current_chunk: - audio_queue.put(b''.join(current_chunk)) + try: + current_chunk = [] + silent_duration = 0 + silent_warning_duration = 0 + record_duration = 0 + minimum_silent_duration = int(app_settings.editable_settings["Real Time Silence Length"]) + minimum_audio_duration = int(app_settings.editable_settings["Real Time Audio Length"]) + + while is_recording: + if not is_paused: + data = stream.read(CHUNK, exception_on_overflow=False) + frames.append(data) + # Check for silence + audio_buffer = np.frombuffer(data, dtype=np.int16).astype(np.float32) / 32768 + + # convert the setting from str to float + try: + speech_prob_threshold = float(app_settings.editable_settings[SettingsKeys.SILERO_SPEECH_THRESHOLD.value]) + except ValueError: + # default it to 0.5 on invalid error + speech_prob_threshold = 0.5 + + if is_silent(audio_buffer, speech_prob_threshold ): + silent_duration += CHUNK / RATE + silent_warning_duration += CHUNK / RATE + else: + current_chunk.append(data) + silent_duration = 0 + silent_warning_duration = 0 + + record_duration += CHUNK / RATE + + # Check if we need to warn if silence is long than warn time + check_silence_warning(silent_warning_duration) + + # 1 second of silence at the end so we dont cut off speech + if silent_duration >= minimum_silent_duration: + if app_settings.editable_settings["Real Time"] and current_chunk: + audio_queue.put(b''.join(current_chunk)) + current_chunk = [] + silent_duration = 0 + record_duration = 0 + + # Send any remaining audio chunk when recording stops + if current_chunk: + audio_queue.put(b''.join(current_chunk)) + except Exception as e: + # Log the error message + # TODO System logger + # For now general catch on any problems + print(f"An error occurred: {e}") + finally: + stream.stop_stream() + stream.close() + audio_queue.put(None) - stream.stop_stream() - stream.close() - audio_queue.put(None) + # If the warning bar is displayed, remove it + if window.warning_bar is not None: + window.destroy_warning_bar() +def check_silence_warning(silence_duration): + """Check if silence warning should be displayed.""" -def is_silent(data, threshold=0.01): - """Check if audio chunk is silent""" - data_array = np.array(data) - max_value = max(abs(data_array)) - return max_value < threshold + # Check if we need to warn if silence is long than warn time + if silence_duration >= SILENCE_WARNING_LENGTH and window.warning_bar is None: + + window.create_warning_bar(f"No audio input detected for {SILENCE_WARNING_LENGTH} seconds. Please check your microphone input device in whisper settings and adjust your microphone cutoff level in advanced settings.") + elif silence_duration <= SILENCE_WARNING_LENGTH and window.warning_bar is not None: + # If the warning bar is displayed, remove it + window.destroy_warning_bar() + +silero, _silero = torch.hub.load(repo_or_dir='snakers4/silero-vad', model='silero_vad') + +def is_silent(data, threshold: float = 0.65): + """Check if audio chunk contains speech using Silero VAD""" + # Convert audio data to tensor and ensure correct format + audio_tensor = torch.FloatTensor(data) + if audio_tensor.dim() == 2: + audio_tensor = audio_tensor.mean(dim=1) + + # Get speech probability + speech_prob = silero(audio_tensor, 16000).item() + return speech_prob < threshold def realtime_text(): - global frames, is_realtimeactive, audio_queue + global is_realtimeactive, audio_queue # Incase the user starts a new recording while this one the older thread is finishing. # This is a local flag to prevent the processing of the current audio chunk # if the global flag is reset on new recording @@ -248,50 +403,65 @@ def realtime_text(): if app_settings.editable_settings["Real Time"] == True: print("Real Time Audio to Text") audio_buffer = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768 - if not is_silent(audio_buffer): - if app_settings.editable_settings[SettingsKeys.LOCAL_WHISPER.value] == True: - print("Local Real Time Whisper") - if stt_local_model is None: - update_gui("Local Whisper model not loaded. Please check your settings.") - break - - result = stt_local_model.transcribe(audio_buffer, fp16=False) - if not local_cancel_flag and not is_audio_processing_realtime_canceled.is_set(): - update_gui(result['text']) - else: - print("Remote Real Time Whisper") - if frames: - with wave.open(get_resource_path("realtime.wav"), 'wb') as wf: - wf.setnchannels(CHANNELS) - wf.setsampwidth(p.get_sample_size(FORMAT)) - wf.setframerate(RATE) - wf.writeframes(b''.join(frames)) - frames = [] - file_to_send = get_resource_path("realtime.wav") - with open(file_to_send, 'rb') as f: - files = {'audio': f} - - headers = { - "Authorization": "Bearer "+app_settings.editable_settings[SettingsKeys.WHISPER_SERVER_API_KEY.value] - } - - try: - verify = not app_settings.editable_settings["S2T Server Self-Signed Certificates"] - response = requests.post(app_settings.editable_settings[SettingsKeys.WHISPER_ENDPOINT.value], headers=headers,files=files, verify=verify) - if response.status_code == 200: - text = response.json()['text'] - if not local_cancel_flag and not is_audio_processing_realtime_canceled.is_set(): - update_gui(text) - else: - update_gui(f"Error (HTTP Status {response.status_code}): {response.text}") - except Exception as e: - update_gui(f"Error: {e}") - finally: - #Task done clean up file - if os.path.exists(file_to_send): - f.close() - os.remove(file_to_send) - audio_queue.task_done() + if app_settings.editable_settings[SettingsKeys.LOCAL_WHISPER.value] == True: + print("Local Real Time Whisper") + if stt_local_model is None: + update_gui("Local Whisper model not loaded. Please check your settings.") + break + try: + result = faster_whisper_transcribe(audio_buffer) + except Exception as e: + update_gui(f"\nError: {e}\n") + + if not local_cancel_flag and not is_audio_processing_realtime_canceled.is_set(): + update_gui(result) + else: + print("Remote Real Time Whisper") + buffer = io.BytesIO() + with wave.open(buffer, 'wb') as wf: + wf.setnchannels(CHANNELS) + wf.setsampwidth(p.get_sample_size(FORMAT)) + wf.setframerate(RATE) + wf.writeframes(audio_data) + + buffer.seek(0) # Reset buffer position + + files = {'audio': buffer} + + headers = { + "Authorization": "Bearer "+app_settings.editable_settings[SettingsKeys.WHISPER_SERVER_API_KEY.value] + } + + body = { + "use_translate": app_settings.editable_settings[SettingsKeys.USE_TRANSLATE_TASK.value], + } + + if app_settings.editable_settings[SettingsKeys.WHISPER_LANGUAGE_CODE.value] not in SettingsWindow.AUTO_DETECT_LANGUAGE_CODES: + body["language_code"] = app_settings.editable_settings[SettingsKeys.WHISPER_LANGUAGE_CODE.value] + + try: + verify = not app_settings.editable_settings[SettingsKeys.S2T_SELF_SIGNED_CERT.value] + + print("Sending audio to server") + print("File informaton") + print("File Size: ", len(buffer.getbuffer()), "bytes") + + response = requests.post(app_settings.editable_settings[SettingsKeys.WHISPER_ENDPOINT.value], headers=headers,files=files, verify=verify, data=body) + + print("Response from whisper with status code: ", response.status_code) + + if response.status_code == 200: + text = response.json()['text'] + if not local_cancel_flag and not is_audio_processing_realtime_canceled.is_set(): + update_gui(text) + else: + update_gui(f"Error (HTTP Status {response.status_code}): {response.text}") + except Exception as e: + update_gui(f"Error: {e}") + finally: + #close buffer. we dont need it anymore + buffer.close() + audio_queue.task_done() else: is_realtimeactive = False @@ -309,13 +479,13 @@ def save_audio(): wf.writeframes(b''.join(frames)) frames = [] # Clear recorded data - if app_settings.editable_settings["Real Time"] == True and is_audio_processing_realtime_canceled.is_set() is False: - send_and_receive() - elif app_settings.editable_settings["Real Time"] == False and is_audio_processing_whole_canceled.is_set() is False: - threaded_send_audio_to_server() + if app_settings.editable_settings["Real Time"] == True and is_audio_processing_realtime_canceled.is_set() is False: + send_and_receive() + elif app_settings.editable_settings["Real Time"] == False and is_audio_processing_whole_canceled.is_set() is False: + threaded_send_audio_to_server() def toggle_recording(): - global is_recording, recording_thread, DEFAULT_BUTTON_COLOUR, audio_queue, current_view, REALTIME_TRANSCRIBE_THREAD_ID + global is_recording, recording_thread, DEFAULT_BUTTON_COLOUR, audio_queue, current_view, REALTIME_TRANSCRIBE_THREAD_ID, frames # Reset the cancel flags going into a fresh recording if not is_recording: @@ -340,6 +510,8 @@ def toggle_recording(): response_display.scrolled_text.configure(state='disabled') is_recording = True + # reset frames before new recording so old data is not used + frames = [] recording_thread = threading.Thread(target=record_audio) recording_thread.start() @@ -380,14 +552,27 @@ def cancel_realtime_processing(thread_id): loading_window = LoadingWindow(root, "Processing Audio", "Processing Audio. Please wait.", on_cancel=lambda: (cancel_processing(), cancel_realtime_processing(REALTIME_TRANSCRIBE_THREAD_ID))) + try: + timeout_length = int(app_settings.editable_settings[SettingsKeys.AUDIO_PROCESSING_TIMEOUT_LENGTH.value]) + except ValueError: + # default to 3minutes + timeout_length = 180 - timeout_timer = 0 - while audio_queue.empty() is False and timeout_timer < 180: + timeout_timer = 0.0 + while audio_queue.empty() is False and timeout_timer < timeout_length: # break because cancel was requested if is_audio_processing_realtime_canceled.is_set(): break - + # increment timer timeout_timer += 0.1 + # round to 10 decimal places, account for floating point errors + timeout_timer = round(timeout_timer, 10) + + # check if we should print a message every 5 seconds + if timeout_timer % 5 == 0: + print(f"Waiting for audio processing to finish. Timeout after {timeout_length} seconds. Timer: {timeout_timer}s") + + # Wait for 100ms before checking again, to avoid busy waiting time.sleep(0.1) loading_window.destroy() @@ -409,6 +594,7 @@ def disable_recording_ui_elements(): upload_button.config(state='disabled') response_display.scrolled_text.configure(state='disabled') timestamp_listbox.config(state='disabled') + clear_button.config(state='disabled') def enable_recording_ui_elements(): window.enable_settings_menu() @@ -417,6 +603,7 @@ def enable_recording_ui_elements(): toggle_button.config(state='normal') upload_button.config(state='normal') timestamp_listbox.config(state='normal') + clear_button.config(state='normal') def cancel_processing(): @@ -546,6 +733,7 @@ def cancel_whole_audio_process(thread_id): print(f"An error occurred: {e}") finally: GENERATION_THREAD_ID = None + clear_application_press() loading_window = LoadingWindow(root, "Processing Audio", "Processing Audio. Please wait.", on_cancel=lambda: (cancel_processing(), cancel_whole_audio_process(current_thread_id))) @@ -566,8 +754,12 @@ def cancel_whole_audio_process(thread_id): uploaded_file_path = None # Transcribe the audio file using the loaded model - result = stt_local_model.transcribe(file_to_send) - transcribed_text = result["text"] + try: + result = faster_whisper_transcribe(file_to_send) + except Exception as e: + result = f"An error occurred ({type(e).__name__}): {e}" + + transcribed_text = result # done with file clean up if os.path.exists(file_to_send) and delete_file is True: @@ -624,11 +816,25 @@ def cancel_whole_audio_process(thread_id): "Authorization": f"Bearer {app_settings.editable_settings[SettingsKeys.WHISPER_SERVER_API_KEY.value]}" } + body = { + "use_translate": app_settings.editable_settings[SettingsKeys.USE_TRANSLATE_TASK.value], + } + + if app_settings.editable_settings[SettingsKeys.WHISPER_LANGUAGE_CODE.value] not in SettingsWindow.AUTO_DETECT_LANGUAGE_CODES: + body["language_code"] = app_settings.editable_settings[SettingsKeys.WHISPER_LANGUAGE_CODE.value] + try: - verify = not app_settings.editable_settings["S2T Server Self-Signed Certificates"] + verify = not app_settings.editable_settings[SettingsKeys.S2T_SELF_SIGNED_CERT.value] + + print("Sending audio to server") + print("File informaton") + print(f"File: {file_to_send}") + print("File Size: ", os.path.getsize(file_to_send)) # Send the request without verifying the SSL certificate - response = requests.post(app_settings.editable_settings[SettingsKeys.WHISPER_ENDPOINT.value], headers=headers, files=files, verify=verify) + response = requests.post(app_settings.editable_settings[SettingsKeys.WHISPER_ENDPOINT.value], headers=headers, files=files, verify=verify, data=body) + + print("Response from whisper with status code: ", response.status_code) response.raise_for_status() @@ -708,17 +914,17 @@ def update_gui_with_response(response_text): global response_history, user_message, IS_FIRST_LOG if IS_FIRST_LOG: - timestamp_listbox.delete(0, tk.END) - timestamp_listbox.config(fg='black') + history_frame.delete(0, tk.END) + history_frame.config(fg='black') IS_FIRST_LOG = False timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") response_history.insert(0, (timestamp, user_message, response_text)) # Update the timestamp listbox - timestamp_listbox.delete(0, tk.END) + history_frame.delete(0, tk.END) for time, _, _ in response_history: - timestamp_listbox.insert(tk.END, time) + history_frame.insert(tk.END, time) display_text(response_text) pyperclip.copy(response_text) @@ -845,9 +1051,81 @@ def send_text_to_localmodel(edited_text): ) +def screen_input_with_llm(conversation): + """ + Send a conversation to a large language model (LLM) for prescreening. + :param conversation: A string containing the conversation to be screened. + :return: A boolean indicating whether the conversation is valid. + """ + prompt = ( + "Go over this conversation and ensure it's a conversation with more than 50 words. " + "Also, if it is a conversation between a doctor and a patient. Please return one word. " + "Either True or False based. Do not give an explanation and do not format the text. " + "Here is the conversation:\n" + ) + + # Send the prompt and conversation to the LLM for evaluation + prescreen = send_text_to_chatgpt(f"{prompt}{conversation}") + + # Check if the response from the LLM is 'true' (case-insensitive) + is_valid_input = prescreen.strip().lower() == "true" + + # Log the AI's response for debugging purposes + print("Generating Input. AI Prescreen: ", prescreen) + + return is_valid_input -def send_text_to_chatgpt(edited_text): + +def display_screening_popup(): + """ + Display a popup window to inform the user of invalid input and offer options. + + :return: A boolean indicating the user's choice: + - False if the user clicks 'Cancel'. + - True if the user clicks 'Process Anyway!'. + """ + # Create and display the popup window + popup_result = PopupBox( + parent=root, + title="Invalid Input", + message=( + "Input has been flagged as invalid. Please ensure the input is a conversation with more than " + "50 words between a doctor and a patient. Unexpected results may occur from the AI." + ), + button_text_1="Cancel", + button_text_2="Process Anyway!" + ) + + # Return based on the button the user clicks + if popup_result.response == "button_1": + return False + elif popup_result.response == "button_2": + return True + + +def screen_input(user_message): + """ + Screen the user's input message based on the application's settings. + + :param user_message: The message to be screened. + :return: A boolean indicating whether the input is valid and accepted for further processing. + """ + # Check if AI prescreening is enabled in the application settings + if app_settings.editable_settings[SettingsKeys.USE_PRESCREEN_AI_INPUT.value]: + # Perform AI-based prescreening + screen_result = screen_input_with_llm(user_message) + + # If the input fails prescreening, display a popup for the user + if not screen_result: + return display_screening_popup() + else: + return True + + #else return true always + return True + +def send_text_to_chatgpt(edited_text): if app_settings.editable_settings["Use Local LLM"]: return send_text_to_localmodel(edited_text) else: @@ -855,7 +1133,6 @@ def send_text_to_chatgpt(edited_text): def generate_note(formatted_message): try: - # If note generation is on if use_aiscribe: # If pre-processing is enabled if app_settings.editable_settings["Use Pre-Processing"]: @@ -934,10 +1211,7 @@ def generate_note_thread(text: str): """ global GENERATION_THREAD_ID - thread = threading.Thread(target=generate_note, args=(text,)) - thread.start() - - GENERATION_THREAD_ID = thread.ident + GENERATION_THREAD_ID = None def cancel_note_generation(thread_id): """Cancels any ongoing note generation. @@ -947,7 +1221,8 @@ def cancel_note_generation(thread_id): global GENERATION_THREAD_ID try: - kill_thread(thread_id) + if thread_id: + kill_thread(thread_id) except Exception as e: # Log the error message # TODO implment system logger @@ -957,6 +1232,14 @@ def cancel_note_generation(thread_id): loading_window = LoadingWindow(root, "Generating Note.", "Generating Note. Please wait.", on_cancel=lambda: cancel_note_generation(GENERATION_THREAD_ID)) + # screen input + if screen_input(text) is False: + loading_window.destroy() + return + + thread = threading.Thread(target=generate_note, args=(text,)) + thread.start() + GENERATION_THREAD_ID = thread.ident def check_thread_status(thread, loading_window): if thread.is_alive(): @@ -968,7 +1251,7 @@ def check_thread_status(thread, loading_window): def upload_file(): global uploaded_file_path - file_path = filedialog.askopenfilename(filetypes=(("Audio files", "*.wav *.mp3"),)) + file_path = filedialog.askopenfilename(filetypes=(("Audio files", "*.wav *.mp3 *.m4a"),)) if file_path: uploaded_file_path = file_path threaded_send_audio_to_server() # Add this line to process the file immediately @@ -1045,11 +1328,12 @@ def set_full_view(): toggle_button.grid() upload_button.grid() response_display.grid() - timestamp_listbox.grid() + history_frame.grid() mic_button.grid(row=1, column=1, pady=5, padx=0,sticky='nsew') pause_button.grid(row=1, column=2, pady=5, padx=0,sticky='nsew') switch_view_button.grid(row=1, column=7, pady=5, padx=0,sticky='nsew') blinking_circle_canvas.grid(row=1, column=8, padx=0,pady=5) + footer_frame.grid() window.toggle_menu_bar(enable=True) @@ -1109,9 +1393,9 @@ def set_minimal_view(): toggle_button.grid_remove() upload_button.grid_remove() response_display.grid_remove() - timestamp_listbox.grid_remove() + history_frame.grid_remove() blinking_circle_canvas.grid_remove() - + footer_frame.grid_remove() # Configure minimal view button sizes and placements mic_button.config(width=2, height=1) pause_button.config(width=2, height=1) @@ -1162,42 +1446,190 @@ def on_leave(e): # root.attributes('-toolwindow', True) def copy_text(widget): + """ + Copy text content from a tkinter widget to the system clipboard. + + Args: + widget: A tkinter Text widget containing the text to be copied. + """ text = widget.get("1.0", tk.END) pyperclip.copy(text) def add_placeholder(event, text_widget, placeholder_text="Text box"): + """ + Add placeholder text to a tkinter Text widget when it's empty. + + Args: + event: The event that triggered this function. + text_widget: The tkinter Text widget to add placeholder text to. + placeholder_text (str, optional): The placeholder text to display. Defaults to "Text box". + """ if text_widget.get("1.0", "end-1c") == "": text_widget.insert("1.0", placeholder_text) text_widget.config(fg='grey') def remove_placeholder(event, text_widget, placeholder_text="Text box"): + """ + Remove placeholder text from a tkinter Text widget when it gains focus. + + Args: + event: The event that triggered this function. + text_widget: The tkinter Text widget to remove placeholder text from. + placeholder_text (str, optional): The placeholder text to remove. Defaults to "Text box". + """ if text_widget.get("1.0", "end-1c") == placeholder_text: text_widget.delete("1.0", "end") text_widget.config(fg='black') def load_stt_model(event=None): - thread = threading.Thread(target=_load_stt_model_thread, daemon=True) + """ + Initialize speech-to-text model loading in a separate thread. + + Args: + event: Optional event parameter for binding to tkinter events. + """ + thread = threading.Thread(target=_load_stt_model_thread) thread.start() + return thread def _load_stt_model_thread(): + """ + Internal function to load the Whisper speech-to-text model. + + Creates a loading window and handles the initialization of the WhisperModel + with configured settings. Updates the global stt_local_model variable. + + Raises: + Exception: Any error that occurs during model loading is caught, logged, + and displayed to the user via a message box. + """ + with stt_model_loading_thread_lock: + global stt_local_model + model = app_settings.editable_settings["Whisper Model"].strip() + stt_loading_window = LoadingWindow(root, "Voice to Text", f"Loading Voice to Text {model} model. Please wait.") + print(f"Loading STT model: {model}") + try: + unload_stt_model() + device_type = get_selected_whisper_architecture() + set_cuda_paths() + + compute_type = app_settings.editable_settings[SettingsKeys.WHISPER_COMPUTE_TYPE.value] + # Change the compute type automatically if using a gpu one. + if device_type == Architectures.CPU.architecture_value and compute_type == "float16": + compute_type = "int8" + + + stt_local_model = WhisperModel( + model, + device=device_type, + cpu_threads=int(app_settings.editable_settings[SettingsKeys.WHISPER_CPU_COUNT.value]), + compute_type=compute_type + ) + + print("STT model loaded successfully.") + except Exception as e: + print(f"An error occurred while loading STT {type(e).__name__}: {e}") + stt_local_model = None + messagebox.showerror("Error", f"An error occurred while loading Voice to Text {type(e).__name__}: {e}") + finally: + stt_loading_window.destroy() + print("Closing STT loading window.") + +def unload_stt_model(): + """ + Unload the speech-to-text model from memory. + + Cleans up the global stt_local_model instance and performs garbage collection + to free up system resources. + """ global stt_local_model - model = app_settings.editable_settings["Whisper Model"].strip() - # Create a loading window to display the loading message - stt_loading_window = LoadingWindow(root, "Speech to Text", "Loading Speech to Text. Please wait.") - print(f"Loading STT model: {model}") + if stt_local_model is not None: + print("Unloading STT model from device.") + # no risk of temporary "stt_local_model in globals() is False" with same gc effect + stt_local_model = None + gc.collect() + print("STT model unloaded successfully.") + else: + print("STT model is already unloaded.") + +def get_selected_whisper_architecture(): + """ + Determine the appropriate device architecture for the Whisper model. + + Returns: + str: The architecture value (CPU or CUDA) based on user settings. + """ + device_type = Architectures.CPU.architecture_value + if app_settings.editable_settings[SettingsKeys.WHISPER_ARCHITECTURE.value] == Architectures.CUDA.label: + device_type = Architectures.CUDA.architecture_value + + return device_type + +def faster_whisper_transcribe(audio): + """ + Transcribe audio using the Faster Whisper model. + + Args: + audio: Audio data to transcribe. + + Returns: + str: Transcribed text or error message if transcription fails. + + Raises: + Exception: Any error during transcription is caught and returned as an error message. + """ try: - # Load the specified Whisper model - stt_local_model = whisper.load_model(model) - print("STT model loaded successfully.") + if stt_local_model is None: + load_stt_model() + raise TranscribeError("Speech2Text model not loaded. Please try again once loaded.") + + # Validate beam_size + try: + beam_size = int(app_settings.editable_settings[SettingsKeys.WHISPER_BEAM_SIZE.value]) + if beam_size <= 0: + raise ValueError(f"{SettingsKeys.WHISPER_BEAM_SIZE.value} must be greater than 0 in advanced settings") + except (ValueError, TypeError) as e: + return f"Invalid {SettingsKeys.WHISPER_BEAM_SIZE.value} parameter. Please go into the advanced settings and ensure you have a integer greater than 0: {str(e)}" + + # Validate vad_filter + vad_filter = bool(app_settings.editable_settings[SettingsKeys.WHISPER_VAD_FILTER.value]) + + segments, info = stt_local_model.transcribe( + audio, + beam_size=beam_size, + vad_filter=vad_filter, + ) + + return "".join(f"{segment.text} " for segment in segments) except Exception as e: - # Log the error message - print(f"An error occurred while loading STT: {e}") - stt_local_model = None - messagebox.showerror("Error", f"An error occurred while loading the STT model: {e}") - finally: - stt_loading_window.destroy() - print("Closing STT loading window.") + error_message = f"Transcription failed: {str(e)}" + print(f"Error during transcription: {str(e)}") + raise TranscribeError(error_message) from e +def set_cuda_paths(): + """ + Configure CUDA-related environment variables and paths. + + Sets up the necessary environment variables for CUDA execution when CUDA + architecture is selected. Updates CUDA_PATH, CUDA_PATH_V12_4, and PATH + environment variables with the appropriate NVIDIA driver paths. + """ + if (get_selected_whisper_architecture() != Architectures.CUDA.architecture_value) or (app_settings.editable_settings[SettingsKeys.LLM_ARCHITECTURE.value] != Architectures.CUDA.label): + return + + nvidia_base_path = Path(get_file_path('nvidia-drivers')) + + cuda_path = nvidia_base_path / 'cuda_runtime' / 'bin' + cublas_path = nvidia_base_path / 'cublas' / 'bin' + cudnn_path = nvidia_base_path / 'cudnn' / 'bin' + + paths_to_add = [str(cuda_path), str(cublas_path), str(cudnn_path)] + env_vars = ['CUDA_PATH', 'CUDA_PATH_V12_4', 'PATH'] + + for env_var in env_vars: + current_value = os.environ.get(env_var, '') + new_value = os.pathsep.join(paths_to_add + ([current_value] if current_value else [])) + os.environ[env_var] = new_value # Configure grid weights for scalability root.grid_columnconfigure(0, weight=1, minsize= 10) @@ -1270,12 +1702,34 @@ def _load_stt_model_thread(): if app_settings.editable_settings["Enable Scribe Template"]: window.create_scribe_template() -timestamp_listbox = tk.Listbox(root, height=30) -timestamp_listbox.grid(row=0, column=9, columnspan=2, rowspan=3, padx=5, pady=15, sticky='nsew') +# Create a frame to hold both timestamp listbox and mic test +history_frame = ttk.Frame(root) +history_frame.grid(row=0, column=9, columnspan=2, rowspan=5, padx=5, pady=10, sticky='nsew') + +# Configure the frame's grid +history_frame.grid_columnconfigure(0, weight=1) +history_frame.grid_rowconfigure(0, weight=4) # Timestamp takes more space +history_frame.grid_rowconfigure(1, weight=1) # Mic test takes less space + +# Add the timestamp listbox +timestamp_listbox = tk.Listbox(history_frame, height=30) +timestamp_listbox.grid(row=0, column=0, rowspan=3,sticky='nsew') timestamp_listbox.bind('<>', show_response) timestamp_listbox.insert(tk.END, "Temporary Note History") timestamp_listbox.config(fg='grey') +# Add microphone test frame +mic_test = MicrophoneTestFrame(parent=history_frame, p=p, app_settings=app_settings, root=root) +mic_test.frame.grid(row=4, column=0, pady=10, sticky='nsew') # Use grid to place the frame + +# Add a footer frame at the bottom of the window +footer_frame = tk.Frame(root, bg="lightgray", height=30) +footer_frame.grid(row=100, column=0, columnspan=100, sticky="ew") # Use grid instead of pack + +# Add "Version 2" label in the center of the footer +version = get_application_version() +version_label = tk.Label(footer_frame, text=f"FreeScribe Client {version}",bg="lightgray",fg="black").pack(side="left", expand=True, padx=2, pady=5) + window.update_aiscribe_texts(None) # Bind Alt+P to send_and_receive function root.bind('', lambda event: pause_button.invoke()) @@ -1286,7 +1740,8 @@ def _load_stt_model_thread(): #set min size root.minsize(900, 400) - +if (app_settings.editable_settings['Show Welcome Message']): + window.show_welcome_message() #Wait for the UI root to be intialized then load the model. If using local llm. if app_settings.editable_settings["Use Local LLM"]: diff --git a/src/FreeScribe.client/install_state/NVIDIA_INSTALL.txt b/src/FreeScribe.client/install_state/NVIDIA_INSTALL.txt new file mode 100644 index 00000000..e69de29b diff --git a/src/FreeScribe.client/markdown/help/about.md b/src/FreeScribe.client/markdown/help/about.md index 855e2941..6ede0360 100644 --- a/src/FreeScribe.client/markdown/help/about.md +++ b/src/FreeScribe.client/markdown/help/about.md @@ -1,9 +1,14 @@ -# AI-Scribe +# FreeScribe ## Introduction -> This is a script that I worked on to help empower physicians to alleviate the burden of documentation by utilizing a medical scribe to create SOAP notes. Expensive solutions could potentially share personal health information with their cloud-based operations. It utilizes `Koboldcpp` and `Whisper` on a local server that is concurrently running the `Server.py` script. The `Client.py` script can then be used by physicians on their device to record patient-physician conversations after a signed consent is obtained and process the result into a SOAP note. +This is a application maintained extension of Dr. Braedon Hendy's AI-Scribe python script. It is maintained by the ClinicianFOCUS team at the Conestoga College SMART Center. The goal of this project is to have a easy to install Medical Scribe application. This application can run locally on your machine (No potential share of personal health data) or can connect to a Large Language Model (LLM) and Whisper (Speech2Text) Server on your network or to a remote one like ChatGPT. To download head over to our latest [releases](https://github.com/ClinicianFOCUS/FreeScribe/releases). + +Please note this application is still in alpha state. Feel free to contribute, connect, or inquire in our discord where majority of project communications occur. Join our [discord](https://discord.gg/zpQTGVEVbH) ([discord.gg/zpQTGVEVbH](https://discord.gg/zpQTGVEVbH)). + +### Note from the original creator and active contributor Dr. Braedon Hendy: + +> This is a script that I worked on to help empower physicians to alleviate the burden of documentation by utilizing a medical scribe to create SOAP notes. Expensive solutions could potentially share personal health information with their cloud-based operations. The application can then be used by physicians on their device to record patient-physician conversations after a signed consent is obtained and process the result into a SOAP note. > > Regards, -> > Braedon Hendy diff --git a/src/FreeScribe.client/markdown/help/settings.md b/src/FreeScribe.client/markdown/help/settings.md index a8b6a202..e0e5d967 100644 --- a/src/FreeScribe.client/markdown/help/settings.md +++ b/src/FreeScribe.client/markdown/help/settings.md @@ -1,170 +1,218 @@ # Settings Documentation ## General Settings -- **Show Welcome Message** - - Description: Display welcome message on startup - - Default: `true` - - Type: boolean -- **Show Scrub PHI** - - Description: Enable/Disable Scrub PHI (Only for local llm and private network RFC 18/19) - - Default: `false` - - Type: boolean +### Show Welcome Message +**Description**: Display welcome message on application startup +**Default**: `true` +**Type**: boolean + +### Show Scrub PHI +**Description**: Enable/Disable Scrub PHI (Only for local llm and private network RFC 18/19). Scrub PHI is used to remove potentially sensitive data before feeding it to a Large Language Model. Please note it is still your responsibility to ensure all data is being sent contains no sensitive data. +**Default**: `false` +**Type**: boolean + ## Whisper Settings -- **Whisper Endpoint** - - Description: API endpoint for Whisper service - - Default: `https://localhost:2224/whisperaudio` - - Type: string -- **Whisper Server API Key** - - Description: API key for Whisper service authentication - - Default: `None` - - Type: string -- **Whisper Model** - - Description: Whisper model to use for speech recognition - - Default: `small.en` - - Type: string -- **Local Whisper** - - Description: Use local Whisper instance instead of cloud service - - Default: `false` - - Type: boolean -- **Real Time** - - Description: Enable real-time processing - - Default: `false` - - Type: boolean +### Whisper Endpoint +**Description**: API endpoint for Whisper service. This sends a wav file from the client to the endpoint. Default is set to the Local Whisper container provided by ClinicianFOCUS. +**Default**: `https://localhost:2224/whisperaudio` +**Type**: string + +### Whisper Server API Key +**Description**: API key for Whisper service authentication. +**Default**: `None` +**Type**: string + +### Whisper Model +**Description**: Whisper model to use for speech recognition. Only applies to the local model. + +Size of the model to use (tiny, tiny.en, base, base.en, +small, small.en, distil-small.en, medium, medium.en, distil-medium.en, large-v1, +large-v2, large-v3, large, distil-large-v2, distil-large-v3, large-v3-turbo, or turbo), +a path to a converted model directory, or a CTranslate2-converted Whisper model ID from +the HF Hub. When a size or a model ID is configured, the converted model is downloaded +from the Hugging Face Hub. + +**Default**: `small.en` +**Type**: string + +### Local Whisper +**Description**: Use local Whisper instance instead of a remote whisper service. +**Default**: `true` +**Type**: boolean + +### Real Time +**Description**: Enable real-time processing. This will send audio chunks to the whisper when silence is detected and 5 seconds of audio has been recorded. This setting is recommended as you will get real time transcription of your conversation. It is also the most efficient. +**Default**: `true` +**Type**: boolean + ## LLM Settings -- **Model Endpoint** - - Description: API endpoint URL for the model service - - Default: `https://api.openai.com/v1/` - - Type: string -- **Use Local LLM** - - Description: Toggle to use a locally hosted language model instead of cloud service - - Default: `false` - - Type: boolean +### Model Endpoint +**Description**: API endpoint URL for the Large Language Model. It must be to a OpenAI api style. +**Default**: `https://localhost:3334/v1/` +**Type**: string + +### Use Local LLM +**Description**: Toggle to use a locally built in language model instead of the remote service. +**Default**: `true` +**Type**: boolean + ## Advanced Settings -- **use_story** - - Description: Enable story context for generation - - Default: `false` - - Type: boolean -- **use_memory** - - Description: Enable memory context for generation - - Default: `false` - - Type: boolean -- **use_authors_note** - - Description: Enable author's notes in generation - - Default: `false` - - Type: boolean -- **use_world_info** - - Description: Enable world information in context - - Default: `false` - - Type: boolean -- **Enable Scribe Template** - - Description: Enable Scribe template functionality - - Default: `false` - - Type: boolean -- **max_context_length** - - Description: Maximum number of tokens in the context window - - Default: `5000` - - Type: integer -- **max_length** - - Description: Maximum length of generated text - - Default: `400` - - Type: integer -- **rep_pen** - - Description: Repetition penalty factor - - Default: `1.1` - - Type: float -- **rep_pen_range** - - Description: Token range for repetition penalty - - Default: `5000` - - Type: integer -- **rep_pen_slope** - - Description: Slope of repetition penalty curve - - Default: `0.7` - - Type: float -- **temperature** - - Description: Controls randomness in generation (higher = more random) - - Default: `0.1` - - Type: float -- **tfs** - - Description: Tail free sampling parameter - - Default: `0.97` - - Type: float -- **top_a** - - Description: Top-A sampling parameter - - Default: `0.8` - - Type: float -- **top_k** - - Description: Top-K sampling parameter - - Default: `30` - - Type: integer -- **top_p** - - Description: Top-P (nucleus) sampling parameter - - Default: `0.4` - - Type: float -- **typical** - - Description: Typical sampling parameter - - Default: `0.19` - - Type: float -- **sampler_order** - - Description: Order of sampling methods to apply - - Default: `[6, 0, 1, 3, 4, 2, 5]` - - Type: string (JSON array) -- **singleline** - - Description: Output single line responses only - - Default: `false` - - Type: boolean -- **frmttriminc** - - Description: Trim incomplete sentences from output - - Default: `false` - - Type: boolean -- **frmtrmblln** - - Description: Remove blank lines from output - - Default: `false` - - Type: boolean -- **Use best_of** - - Description: Enable best-of sampling - - Default: `false` - - Type: boolean -- **best_of** - - Description: Number of completions to generate and select from - - Default: `2` - - Type: integer -- **Real Time Audio Length** - - Description: Length of audio segments for real-time processing (seconds) - - Default: `5` - - Type: integer -- **Use Pre-Processing** - - Description: Enable text pre-processing - - Default: `true` - - Type: boolean -- **Use Post-Processing** - - Description: Enable text post-processing - - Default: `false` - - Type: boolean -## Docker Settings -- **LLM Container Name** - - Description: Docker container name for LLM service - - Default: `ollama` - - Type: string -- **LLM Caddy Container Name** - - Description: Docker container name for Caddy reverse proxy - - Default: `caddy-ollama` - - Type: string -- **LLM Authentication Container Name** - - Description: Docker container name for authentication service - - Default: `authentication-ollama` - - Type: string -- **Whisper Container Name** - - Description: Docker container name for Whisper service - - Default: `speech-container` - - Type: string -- **Whisper Caddy Container Name** - - Description: Docker container name for Whisper Caddy service - - Default: `caddy` - - Type: string -- **Auto Shutdown Containers on Exit** - - Description: Automatically stop Docker containers on application exit - - Default: `true` - - Type: boolean -- **Use Docker Status Bar** - - Description: Show Docker container status in UI - - Default: `false` - - Type: boolean \ No newline at end of file + + + + + + +### temperature +**Description**: Controls randomness in generation (higher = more random). Gives the LLM more freedom and creativity. More [here](https://platform.openai.com/docs/api-reference/chat/create#chat-create-temperature) +**Default**: `0.1` +**Type**: float + + + + + + + +### top_p +**Description**: Top-P (nucleus) sampling parameter. More info [here](https://platform.openai.com/docs/api-reference/chat/create#chat-create-top_p). +**Default**: `0.4` +**Type**: float + + + + + +### Use best_of +**Description**: Enable best-of sampling +**Default**: `false` +**Type**: boolean + +### best_of +**Description**: Number of completions to generate and select from. More [here](https://platform.openai.com/docs/api-reference/completions/create#completions-create-best_of). +**Default**: `2` +**Type**: integer + +### Real Time Audio Length +**Description**: Length of audio segments for real-time processing (seconds) +**Default**: `5` +**Type**: integer + +### Use Pre-Processing +**Description**: Enable text pre-processing +**Default**: `true` +**Type**: boolean + +### Use Post-Processing +**Description**: Enable text post-processing +**Default**: `false` +**Type**: boolean + + diff --git a/src/FreeScribe.client/markdown/welcome.md b/src/FreeScribe.client/markdown/welcome.md index a5fe7c46..d7611d45 100644 --- a/src/FreeScribe.client/markdown/welcome.md +++ b/src/FreeScribe.client/markdown/welcome.md @@ -11,8 +11,7 @@ The FreeScribe project leverages advanced machine learning models to transcribe - **Real-time Transcription**: Transcribe conversations in real-time using advanced speech recognition models. - **Medical Note Generation**: Automatically generate structured medical notes from transcriptions. - **User-Friendly Interface**: Intuitive and easy-to-use interface for healthcare professionals. -- **Customizable Settings**: Customize the application settings to suit your workflow. -- **Docker Integration**: Easily manage the application using Docker containers. +- **Customizable Settings**: Customize the application settings to suit your workflow.+` ## Discord Community @@ -20,18 +19,6 @@ Join our Discord community to connect with other users, get support, and collabo [Join our Discord Community](https://discord.gg/6DnPENSn) -## Contributing - -We welcome contributions to the FreeScribe project! To contribute: - -1. Fork the [repository](https://github.com/ClinicianFOCUS/FreeScribe). -2. Create a new branch (`git checkout -b feature/your-feature`). -3. Make your changes and commit them (`git commit -m 'Add some feature'`). -4. Push to the branch (`git push origin feature/your-feature`). -5. Open a pull request. - -Please ensure your code adheres to our coding standards and includes appropriate tests. - ## License This project is licensed under the MIT License. See the [LICENSE](https://github.com/ClinicianFOCUS/FreeScribe/blob/main/LICENSE.txt) file for more information. diff --git a/src/FreeScribe.client/presets/ChatGPT.json b/src/FreeScribe.client/presets/ChatGPT.json deleted file mode 100644 index 2d4c2e57..00000000 --- a/src/FreeScribe.client/presets/ChatGPT.json +++ /dev/null @@ -1,56 +0,0 @@ -{ - "openai_api_key": "None", - "editable_settings": { - "Model": "gpt-4o-mini", - "Model Endpoint": "https://api.openai.com/v1/", - "use_story": 0, - "use_memory": 0, - "use_authors_note": 0, - "use_world_info": 0, - "max_context_length": 5000, - "max_length": 400, - "rep_pen": "1.1", - "rep_pen_range": 5000, - "rep_pen_slope": "0.7", - "temperature": "0.1", - "tfs": "0.97", - "top_a": "0.8", - "top_k": 30, - "top_p": "0.4", - "typical": "0.19", - "sampler_order": "[6, 0, 1, 3, 4, 2, 5]", - "singleline": 0, - "frmttriminc": 0, - "frmtrmblln": 0, - "Local Whisper": 1, - "Whisper Endpoint": "https://localhost:2224/whisperaudio", - "Whisper Server API Key": "None", - "Whisper Model": "small.en", - "Real Time": 1, - "Real Time Audio Length": "5", - "Real Time Silence Length": 1, - "Silence cut-off": 0.035003662109375, - "LLM Container Name": "ollama", - "LLM Caddy Container Name": "caddy-ollama", - "Whisper Container Name": "speech-container", - "Whisper Caddy Container Name": "caddy", - "Auto Shutdown Containers on Exit": 1, - "Use Docker Status Bar": 0, - "Preset": "Custom", - "Use Local LLM": 0, - "Architecture": "CPU", - "best_of": "2", - "Use best_of": 0, - "LLM Authentication Container Name": "authentication-ollama", - "Show Welcome Message": 0, - "Enable Scribe Template": 0, - "Use Pre-Processing": 1, - "Use Post-Processing": 0, - "AI Server Self-Signed Certificates": 0, - "S2T Server Self-Signed Certificates": 0, - "Pre-Processing": "Please break down the conversation into a list of facts. Take the conversation and transform it to a easy to read list:\n\n", - "Post-Processing": "\n\nUsing the provided list of facts, review the SOAP note for accuracy. Verify that all details align with the information provided in the list of facts and ensure consistency throughout. Update or adjust the SOAP note as necessary to reflect the listed facts without offering opinions or subjective commentary. Ensure that the revised note excludes a \"Notes\" section and does not include a header for the SOAP note. Provide the revised note after making any necessary corrections.", - "Show Scrub PHI": 1 - }, - "api_style": "OpenAI" -} diff --git a/src/FreeScribe.client/presets/ClinicianFOCUS Toolbox.json b/src/FreeScribe.client/presets/ClinicianFOCUS Toolbox.json deleted file mode 100644 index 7c1b00ab..00000000 --- a/src/FreeScribe.client/presets/ClinicianFOCUS Toolbox.json +++ /dev/null @@ -1,56 +0,0 @@ -{ - "openai_api_key": "None", - "editable_settings": { - "Model": "gemma-2-2b-it-Q8_0.gguf", - "Model Endpoint": "http://localhost:3334/v1/", - "use_story": 0, - "use_memory": 0, - "use_authors_note": 0, - "use_world_info": 0, - "max_context_length": 5000, - "max_length": 400, - "rep_pen": "1.1", - "rep_pen_range": 5000, - "rep_pen_slope": "0.7", - "temperature": "0.1", - "tfs": "0.97", - "top_a": "0.8", - "top_k": 30, - "top_p": "0.4", - "typical": "0.19", - "sampler_order": "[6, 0, 1, 3, 4, 2, 5]", - "singleline": 0, - "frmttriminc": 0, - "frmtrmblln": 0, - "Local Whisper": 0, - "Whisper Endpoint": "https://localhost:2224/whisperaudio/", - "Whisper Server API Key": "None", - "Whisper Model": "small.en", - "Real Time": 1, - "Real Time Audio Length": "5", - "Real Time Silence Length": 1, - "Silence cut-off": 0.035003662109375, - "LLM Container Name": "ollama", - "LLM Caddy Container Name": "caddy-ollama", - "Whisper Container Name": "speech-container", - "Whisper Caddy Container Name": "caddy", - "Auto Shutdown Containers on Exit": 1, - "Use Docker Status Bar": 0, - "Preset": "Custom", - "Use Local LLM": 0, - "Architecture": "CPU", - "best_of": "2", - "Use best_of": 0, - "LLM Authentication Container Name": "authentication-ollama", - "Show Welcome Message": 0, - "Enable Scribe Template": 0, - "Use Pre-Processing": 1, - "Use Post-Processing": 0, - "AI Server Self-Signed Certificates": 0, - "S2T Server Self-Signed Certificates": 0, - "Pre-Processing": "Please break down the conversation into a list of facts. Take the conversation and transform it to a easy to read list:\n\n", - "Post-Processing": "\n\nUsing the provided list of facts, review the SOAP note for accuracy. Verify that all details align with the information provided in the list of facts and ensure consistency throughout. Update or adjust the SOAP note as necessary to reflect the listed facts without offering opinions or subjective commentary. Ensure that the revised note excludes a \"Notes\" section and does not include a header for the SOAP note. Provide the revised note after making any necessary corrections.", - "Show Scrub PHI": 0 - }, - "api_style": "OpenAI" -} diff --git a/src/FreeScribe.client/presets/Local AI.json b/src/FreeScribe.client/presets/Local AI.json deleted file mode 100644 index bf9cc27a..00000000 --- a/src/FreeScribe.client/presets/Local AI.json +++ /dev/null @@ -1,56 +0,0 @@ -{ - "openai_api_key": "None", - "editable_settings": { - "Model": "gemma2:2b-instruct-q8_0", - "Model Endpoint": "http://localhost:3334/v1/", - "use_story": 0, - "use_memory": 0, - "use_authors_note": 0, - "use_world_info": 0, - "max_context_length": 5000, - "max_length": 400, - "rep_pen": "1.1", - "rep_pen_range": 5000, - "rep_pen_slope": "0.7", - "temperature": "0.1", - "tfs": "0.97", - "top_a": "0.8", - "top_k": 30, - "top_p": "0.4", - "typical": "0.19", - "sampler_order": "[6, 0, 1, 3, 4, 2, 5]", - "singleline": 0, - "frmttriminc": 0, - "frmtrmblln": 0, - "Local Whisper": 1, - "Whisper Endpoint": "https://localhost:2224/whisperaudio/", - "Whisper Server API Key": "None", - "Whisper Model": "small.en", - "Real Time": 1, - "Real Time Audio Length": "5", - "Real Time Silence Length": 1, - "Silence cut-off": 0.035003662109375, - "LLM Container Name": "ollama", - "LLM Caddy Container Name": "caddy-ollama", - "Whisper Container Name": "speech-container", - "Whisper Caddy Container Name": "caddy", - "Auto Shutdown Containers on Exit": 1, - "Use Docker Status Bar": 0, - "Preset": "Custom", - "Use Local LLM": 1, - "Architecture": "CPU", - "best_of": "2", - "Use best_of": 0, - "LLM Authentication Container Name": "authentication-ollama", - "Show Welcome Message": 0, - "Enable Scribe Template": 0, - "Use Pre-Processing": 1, - "Use Post-Processing": 0, - "AI Server Self-Signed Certificates": 0, - "S2T Server Self-Signed Certificates": 0, - "Pre-Processing": "Please break down the conversation into a list of facts. Take the conversation and transform it to a easy to read list:\n\n", - "Post-Processing": "\n\nUsing the provided list of facts, review the SOAP note for accuracy. Verify that all details align with the information provided in the list of facts and ensure consistency throughout. Update or adjust the SOAP note as necessary to reflect the listed facts without offering opinions or subjective commentary. Ensure that the revised note excludes a \"Notes\" section and does not include a header for the SOAP note. Provide the revised note after making any necessary corrections.", - "Show Scrub PHI": 1 - }, - "api_style": "OpenAI" -} diff --git a/src/FreeScribe.client/utils/OneInstance.py b/src/FreeScribe.client/utils/OneInstance.py new file mode 100644 index 00000000..1da21d9e --- /dev/null +++ b/src/FreeScribe.client/utils/OneInstance.py @@ -0,0 +1,138 @@ +# Application lock class to prevent multiple instances of an app from running +import tkinter as tk +from tkinter import messagebox +import psutil # For process management +import sys +import ctypes +import os + +class OneInstance: + """ + Controls application instances to ensure only one is running. + + Args: + app_name: Window title of the application + app_task_manager_name: Process name as shown in Task Manager + """ + def __init__(self, app_name, app_task_manager_name): + self.app_name = app_name + self.app_task_manager_name = app_task_manager_name + self.root = None + + def get_running_instance_pid(self): + """ + Finds PIDs of any running instances of the application, excluding the current process. + + Returns: + list: PIDs of running instances, excluding the current process + """ + current_pid = os.getpid() + possible_ids = [] + for proc in psutil.process_iter(['pid', 'name']): + try: + if proc.info['name'] == f"{self.app_task_manager_name}" and proc.info['pid'] != current_pid: + possible_ids.append(proc.info['pid']) + except (psutil.NoSuchProcess, psutil.AccessDenied): + continue + return possible_ids + + def kill_instance(self, pid): + """ + Terminates specified process instance(s). + + Args: + pid: Process ID (int) or list of PIDs to terminate + + Returns: + bool: True if termination successful, False otherwise + """ + try: + if type(pid) == int: + process = psutil.Process(pid) + process.terminate() + return True + elif type(pid) == list: + for pid in pid: + process = psutil.Process(pid) + process.terminate() + return True + except psutil.NoSuchProcess: + return False + + def bring_to_front(self, app_name: str): + """ + Bring the window with the given handle to the front. + Parameters: + app_name (str): The name of the application window to bring to the front + """ + + # TODO - Check platform and handle for different platform + # For now, only Windows is supported + if sys.platform == 'win32': + U32DLL = ctypes.WinDLL('user32') + SW_SHOW = 5 + hwnd = U32DLL.FindWindowW(None, app_name) + U32DLL.ShowWindow(hwnd, SW_SHOW) + U32DLL.SetForegroundWindow(hwnd) + return True + + return False + + def _handle_kill(self, dialog, pid): + """Handles clicking 'Close Existing Instance' button""" + if self.kill_instance(pid): + dialog.destroy() + dialog.return_status = False + else: + messagebox.showerror("Error", "Failed to terminate existing instance") + dialog.destroy() + dialog.return_status = True + + def _handle_cancel(self, dialog): + """Handles clicking 'Cancel' button""" + dialog.destroy() + self.bring_to_front(self.app_name) + dialog.return_status = True + + def show_instance_dialog(self): + """ + Shows dialog when another instance is detected. + Allows user to close existing instance or cancel. + + Returns: + bool: True if existing instance continues, False if terminated + """ + pid = self.get_running_instance_pid() + + if not pid: + return False + + dialog = tk.Tk() + dialog.title("FreeScribe Instance") + dialog.geometry("300x150") + dialog.attributes("-topmost", True) + dialog.lift() + dialog.focus_force() + + dialog.return_status = True + + label = tk.Label(dialog, text="Another instance of FreeScribe is already running.\nWhat would you like to do?") + label.pack(pady=20) + + tk.Button(dialog, text="Close Existing Instance", command=lambda: self._handle_kill(dialog, pid)).pack(padx=5, pady=5) + tk.Button(dialog, text="Cancel", command=lambda: self._handle_cancel(dialog)).pack(padx=5, pady=2) + + dialog.mainloop() + return dialog.return_status + + def run(self): + """ + Main entry point to check for existing instances. + + Returns: + bool: True if existing instance continues, False if none exists or terminated + """ + if self.get_running_instance_pid(): + return self.show_instance_dialog() + else: + return False diff --git a/src/FreeScribe.client/utils/utils.py b/src/FreeScribe.client/utils/utils.py new file mode 100644 index 00000000..581ab839 --- /dev/null +++ b/src/FreeScribe.client/utils/utils.py @@ -0,0 +1,56 @@ + +import ctypes +from utils.file_utils import get_file_path +# Define the mutex name and error code +MUTEX_NAME = 'Global\\FreeScribe_Instance' +ERROR_ALREADY_EXISTS = 183 + +# Global variable to store the mutex handle +mutex = None + +# function to check if another instance of the application is already running +def window_has_running_instance() -> bool: + """ + Check if another instance of the application is already running. + Returns: + bool: True if another instance is running, False otherwise + """ + global mutex + + # Create a named mutex + mutex = ctypes.windll.kernel32.CreateMutexW(None, False, MUTEX_NAME) + return ctypes.windll.kernel32.GetLastError() == ERROR_ALREADY_EXISTS + +def bring_to_front(app_name: str): + """ + Bring the window with the given handle to the front. + Parameters: + app_name (str): The name of the application window to bring to the front + """ + + # TODO - Check platform and handle for different platform + U32DLL = ctypes.WinDLL('user32') + SW_SHOW = 5 + hwnd = U32DLL.FindWindowW(None, app_name) + U32DLL.ShowWindow(hwnd, SW_SHOW) + U32DLL.SetForegroundWindow(hwnd) + +def close_mutex(): + """ + Close the mutex handle to release the resource. + """ + global mutex + if mutex: + ctypes.windll.kernel32.ReleaseMutex(mutex) + ctypes.windll.kernel32.CloseHandle(mutex) + mutex = None + +def get_application_version(): + version_str = "vx.x.x.alpha" + try: + with open(get_file_path('__version__'), 'r') as file: + version_str = file.read().strip() + except Exception as e: + print(f"Error loading version file ({type(e).__name__}). {e}") + finally: + return version_str \ No newline at end of file