Custom-Data-Driven-Chatbot/chatbot.py at main · ARa2of/Custom-Data-Driven-Chatbot · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
import gradio as gr
import os
from langchain.document_loaders import DirectoryLoader, CSVLoader, UnstructuredExcelLoader
from langchain.document_loaders import PyPDFLoader, TextLoader, UnstructuredWordDocumentLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.memory import ConversationBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import ConversationalRetrievalChain
import webbrowser
import threading
import time

# Get the current working directory
current_directory = os.getcwd()
# === File Paths ===
openai_api_key = "ADD YOUR OPENAI API KEY HERE"
DB_PATH = current_directory+"/Database"
DATA_PATH = current_directory+"/Data"
Memory_Path = current_directory+"/Memory"
Historylog= Memory_Path+"/Historylog.txt"


# Function to open the browser after a delay
def open_browser():
    time.sleep(5)  # Wait for 5 seconds to ensure the server is ready
    webbrowser.open("http://127.0.0.1:7860/")

# Start the browser in a separate thread
threading.Thread(target=open_browser).start()

# === Function to Save History ===
def save_history(message):
    # Open the file in append mode and add the message
    with open(Historylog, "a") as f:
        f.write(f"{message}\n")

# === Load Stored Embeddings ===
embedding = OpenAIEmbeddings(openai_api_key=openai_api_key)
vectordb = Chroma(persist_directory=DB_PATH, embedding_function=embedding)

# === Create a Retriever ===
retriever = vectordb.as_retriever(search_kwargs={"k": 3})

# === Load the Language Model ===
llm = ChatOpenAI(model_name="gpt-4o", temperature=0.5, openai_api_key=openai_api_key)

# === Add Memory for Conversational Context ===
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True,
    output_key="answer"  # Ensures only "answer" is stored in memory
)

# === Set up the Conversational Chain ===
qa_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    return_source_documents=True  # Enables both database and general AI reasoning
)

# === Load Conversation History ===
conversation_history = []

# === Gradio Chatbot Function ===
def chatbot_response(message, history):
    """Handles user messages and returns chatbot responses."""

    response = qa_chain.invoke({"question": message, "chat_history": conversation_history})

    # Update conversation history
    conversation_history.append((message, response["answer"]))

    if isinstance(response, tuple):
        response = response[0]  # Extract the first element if it's a tuple
    elif isinstance(response, dict) and "answer" in response:
        response = response["answer"]

    save_history(f"User: {message}\n")
    save_history(f"Chatbot: {response}\n")

    return str(response)   # Return updated history list

# === Gradio UI ===
# chatbot_ui.launch(share=True)#to share with others
def Update_Memory():
    docs=[]
    txt_loader = DirectoryLoader(Memory_Path, glob="*.txt", loader_cls=TextLoader)
    docs.extend(txt_loader.load())
    # Check if new documents exist
    if docs:
        print("📂 Updating Memory - Processing...")
        # Split new text
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
        chunks = text_splitter.split_documents(docs)
        # ✅ Add new documents to the existing database
        vectordb.add_documents(chunks)
        # ✅ Persist the updated database
        vectordb.persist()
        print("✅ Memory updated successfully!")
    else:
        print("🚀 Memory is already up to date.")

def Clear_History():
    with open(Historylog, "w") as f:
        pass  # Opening in "w" mode and doing nothing clears the file

def create_update_database():
    embedding = OpenAIEmbeddings(openai_api_key=openai_api_key)

    # Check if database exists
    if os.path.exists(DB_PATH):
        print("📂 Database found. Updating with new documents...")
        vectordb = Chroma(persist_directory=DB_PATH, embedding_function=embedding)
    else:
        print("🚀 No database found. Creating a new one...")
        os.makedirs(DB_PATH, exist_ok=True)
        vectordb = None  # Initialize as None for now

    # Load documents
    docs = []

    # Load PDF files
    pdf_loader = DirectoryLoader(DATA_PATH, glob="*.pdf", loader_cls=PyPDFLoader)
    docs.extend(pdf_loader.load())

    # Load TXT files
    txt_loader = DirectoryLoader(DATA_PATH, glob="*.txt", loader_cls=TextLoader)
    docs.extend(txt_loader.load())

    # Load CSV files
    csv_loader = DirectoryLoader(DATA_PATH, glob="*.csv", loader_cls=CSVLoader)
    docs.extend(csv_loader.load())

    # Load DOCX files
    docx_loader = DirectoryLoader(DATA_PATH, glob="*.docx", loader_cls=UnstructuredWordDocumentLoader)
    docs.extend(docx_loader.load())

    # Load XLSX files
    xlsx_loader = DirectoryLoader(DATA_PATH, glob="*.xlsx", loader_cls=UnstructuredExcelLoader)
    docs.extend(xlsx_loader.load())

    # Check if new documents exist
    if not docs:
        print("✅ No new documents found. Database is up to date.")
    else:
        print(f"📄 Found {len(docs)} new documents. Processing...")

        # Split text into chunks
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
        chunks = text_splitter.split_documents(docs)

        # Add to the existing database or create a new one
        if vectordb:
            vectordb.add_documents(chunks)
        else:
            vectordb = Chroma.from_documents(chunks, embedding, persist_directory=DB_PATH)
        # Save changes
        vectordb.persist()
        print("✅ Database updated successfully!")

with gr.Blocks() as ChatWeb:
    # Add the ChatInterface
    chatbot_ui = gr.ChatInterface(
        fn=chatbot_response,
        title="Your Custom Title",
        description="Your custom description.",
        theme="soft"
    )

    # Add a "Update Memory" button
    Update_MemoryButton = gr.Button("Update Memory (Save Context of Conversations in Database)")
    Update_MemoryButton.click(
        Update_Memory,  # Function to call
    )

    # Add a "Clear_History" button
    Clear_HistoryButton = gr.Button("Clear History (Clear Conversations History)")
    Clear_HistoryButton.click(
        Clear_History,  # Function to call
    )

    # Create/Update a database based on your data - add buttpm
    CU_DB_Button = gr.Button("Create/Update Database (Using your own files)")
    CU_DB_Button.click(
        create_update_database,  # Function to call
    )


ChatWeb.launch(server_name="0.0.0.0", server_port=7860, share=False)