-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathupload.py
More file actions
129 lines (109 loc) · 4.3 KB
/
upload.py
File metadata and controls
129 lines (109 loc) · 4.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import os
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaEmbeddings
from langchain_community.vectorstores import SupabaseVectorStore
from supabase import create_client, Client
from dotenv import load_dotenv
load_dotenv()
def uploadUcf():
# -------------------------------------------------
# Configuration and setup of all the supabase variables
# -------------------------------------------------
SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_KEY = os.getenv("SUPABASE_KEY")
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
# --------------------------------------------
# import the text
# --------------------------------------------
txtName = input("Enter the name of the txt file: ")
loader = TextLoader(txtName)
documents = loader.load()
# -------------------------------------------------
# Spliting the text into chunks
# -------------------------------------------------
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=100
)
docs = text_splitter.split_documents(documents)
# -------------------------------------------------
# Embedding model configuration
# -------------------------------------------------
embeddings = OllamaEmbeddings(
base_url="http://localhost:11434",
model="mxbai-embed-large:latest"
)
# -------------------------------------------------
# vectorstore conection
# -------------------------------------------------
vectorstore = SupabaseVectorStore.from_documents(
docs,
embeddings,
client=supabase,
table_name="documents",
query_name="match_documents"
)
print("Done! Your text chunks are now embedded and saved to Supabase.")
def upload_verilog():
# -------------------------------------------------
# Configuration and setup of all the supabase variables
# -------------------------------------------------
SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_KEY = os.getenv("SUPABASE_KEY")
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
# --------------------------------------------
# import the text
# --------------------------------------------
txtName = input("Enter the name of the txt file: ")
loader = TextLoader(txtName)
documents = loader.load()
# -------------------------------------------------
# Spliting the text into chunks
# -------------------------------------------------
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=100
)
docs = text_splitter.split_documents(documents)
# -------------------------------------------------
# Embedding model configuration
# -------------------------------------------------
embeddings = OllamaEmbeddings(
base_url="http://localhost:11434",
model="mxbai-embed-large:latest"
)
# -------------------------------------------------
# vectorstore conection
# -------------------------------------------------
vectorstore = SupabaseVectorStore.from_documents(
docs,
embeddings,
client=supabase,
table_name="documents_verilog",
query_name="match_documents_verilog"
)
print("Done! Your text chunks are now embedded and saved to Supabase.")
def main():
while(True):
print("-------------------------------------------------------------------")
print("This is the upload script for the UCF and Verilog text files")
print("-------------------------------------------------------------------")
print("Select one of the following options:")
print("1. Upload UCF text file")
print("2. Upload Verilog text file")
print("3. Exit")
print("-------------------------------------------------------------------")
choice = input("Enter your choice: ")
if choice == "1":
uploadUcf()
elif choice == "2":
upload_verilog()
elif choice == "3":
print("Exiting...")
break
else:
print("Invalid choice! Exiting...")
break
if __name__ == "__main__":
main()