-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest.py
More file actions
68 lines (56 loc) · 2.32 KB
/
test.py
File metadata and controls
68 lines (56 loc) · 2.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
import boto3
import json
from datetime import datetime
# Create a Bedrock Runtime client in the AWS Region of your choice.
client = boto3.client("bedrock-runtime", region_name="eu-west-3")
MODEL_ID = "eu.amazon.nova-pro-v1:0"
# Define your system prompt(s).
system_list = [
{
"text": "Act as a creative writing assistant. When the user provides you with a topic, write a short story about that topic."
}
]
# Define one or more messages using the "user" and "assistant" roles.
message_list = [{"role": "user", "content": [{"text": "A camping trip"}]}]
# Configure the inference parameters.
inf_params = {"maxTokens": 500, "topP": 0.9, "topK": 20, "temperature": 0.7}
request_body = {
"schemaVersion": "messages-v1",
"messages": message_list,
"system": system_list,
"inferenceConfig": inf_params,
}
start_time = datetime.now()
# Invoke the model with the response stream
response = client.invoke_model_with_response_stream(
modelId=MODEL_ID, body=json.dumps(request_body)
)
request_id = response.get("ResponseMetadata").get("RequestId")
print(f"Request ID: {request_id}")
print("Awaiting first token...")
chunk_count = 0
time_to_first_token = None
# Process the response stream
stream = response.get("body")
if stream:
for event in stream:
chunk = event.get("chunk")
if chunk:
# Print the response chunk
chunk_json = json.loads(chunk.get("bytes").decode())
# Pretty print JSON
# print(json.dumps(chunk_json, indent=2, ensure_ascii=False))
content_block_delta = chunk_json.get("contentBlockDelta")
if content_block_delta:
if time_to_first_token is None:
time_to_first_token = datetime.now() - start_time
print(f"Time to first token: {time_to_first_token}")
chunk_count += 1
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S:%f")
# print(f"{current_time} - ", end="")
print(content_block_delta.get("delta").get("text"), end="")
print(f"Total chunks: {chunk_count}")
else:
print("No response stream received.")