-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathlib.py
More file actions
88 lines (71 loc) · 2.71 KB
/
lib.py
File metadata and controls
88 lines (71 loc) · 2.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import os
import json
import io
import fitz
import minsearch
from deep_translator import GoogleTranslator
from google.oauth2.service_account import Credentials
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseUpload
# Fungsi membaca PDF
def readPDF(file):
doc = fitz.open(stream=file.read(), filetype="pdf")
text = ""
for page in doc:
text += page.get_text()
return text
# fungsi melakukan retrieval augmented generation
def rag(text, data):
# Inisialisasi index
index = minsearch.Index(text_fields=["content","input"], keyword_fields=[])
# Memasukkan data ke index
if data:
index.fit(data)
# Terjemahkan teks input ke bahasa Inggris
translatedText = GoogleTranslator(source='auto', target='en').translate(text)
searchResult = index.search(
query=translatedText,
boost_dict={'content': 5.0, 'input': 1},
num_results=min(len(data), 5),
relevance_threshold=0.1
)
if searchResult:
combinedInput = "\n\n".join([result["content"] for result in searchResult]) + "\n\n" + text
else:
combinedInput = text
else:
combinedInput = text
return combinedInput
# Fungsi memecah teks jadi chunks
def textChunk(text, size=500):
tokens = text.split()
for i in range(0, len(tokens), size):
chunk = ' '.join(tokens[i:i + size])
yield chunk
# Fungsi sliding window untuk memori percakapan
def slidingWindowContext(messages, window_size=5):
return messages[-window_size:]
# Fungsi autentikasi Google Drive menggunakan Service Account
def googleAuth():
# Load service account info dari secrets
serviceAccInfo = json.loads(os.getenv('GOOGLE_SERVICE_ACCOUNT_INFO'))
# Buat kredensial menggunakan service account
credentials = Credentials.from_service_account_info(serviceAccInfo)
# Buat instance Google Drive service
service = build('drive', 'v3', credentials=credentials)
return service
# Fungsi menyimpan memory bot ke Google Drive
def saveToDrive(messages, drive, fileName, folderId = 'None'):
# Simpan message history ke dalam memori menggunakan StringIO
buffer = io.StringIO()
json.dump(messages, buffer, indent=4)
buffer.seek(0)
# Konversi StringIO buffer menjadi BytesIO buffer
bytesBuffer = io.BytesIO(buffer.getvalue().encode('utf-8'))
# Buat metadata file
fileMetadata = {'name': fileName}
if folderId:
fileMetadata['parents'] = [folderId]
# Unggah file ke Google Drive
media = MediaIoBaseUpload(bytesBuffer, mimetype='application/json')
file = drive.files().create(body=fileMetadata, media_body=media, fields='id').execute()