Context-Engine/test_gpu_switch.py at mit-base-salvage · voarsh2/Context-Engine · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/usr/bin/env python3
"""
Test script to verify GPU decoder switching functionality.

Usage:
    # Test Docker CPU-only decoder
    python test_gpu_switch.py

    # Test native GPU-accelerated decoder
    USE_GPU_DECODER=1 python test_gpu_switch.py
"""

import os
import sys

def load_env_file():
    """Load environment variables from .env file."""
    env_file = '.env'
    if os.path.exists(env_file):
        with open(env_file, 'r') as f:
            for line in f:
                line = line.strip()
                if line and not line.startswith('#') and '=' in line:
                    key, value = line.split('=', 1)
                    # Only set if not already in environment
                    if key not in os.environ:
                        os.environ[key] = value

def test_decoder_url_resolution():
    """Test that the decoder URL is resolved correctly based on USE_GPU_DECODER flag."""

    # Import the resolver function
    sys.path.insert(0, 'scripts')
    from refrag_llamacpp import LlamaCppRefragClient

    # Test current configuration
    use_gpu = os.environ.get("USE_GPU_DECODER", "0")
    print(f"USE_GPU_DECODER = {use_gpu}")

    # Create client and check URL
    client = LlamaCppRefragClient()
    print(f"Resolved decoder URL: {client.base_url}")

    # Test health endpoint
    try:
        import urllib.request

        health_url = client.base_url.rstrip('/') + '/health'

        # For Docker service names, try localhost equivalent when running on host
        if 'llamacpp:8080' in health_url:
            health_url = health_url.replace('llamacpp:8080', 'localhost:8080')
            print(f"Testing health endpoint: {health_url} (Docker service via localhost)")
        else:
            print(f"Testing health endpoint: {health_url}")

        req = urllib.request.Request(health_url, method='GET')
        with urllib.request.urlopen(req, timeout=5) as resp:
            if resp.status == 200:
                print("PASS: Decoder server is healthy and reachable")
                return True
            else:
                print(f"FAIL: Decoder server returned status {resp.status}")
                return False

    except Exception as e:
        print(f"FAIL: Failed to reach decoder server: {e}")
        return False

def test_simple_completion():
    """Test a simple completion request."""

    sys.path.insert(0, 'scripts')
    from refrag_llamacpp import LlamaCppRefragClient, is_decoder_enabled

    if not is_decoder_enabled():
        print("FAIL: Decoder is disabled. Set REFRAG_DECODER=1 to enable.")
        return False

    try:
        client = LlamaCppRefragClient()

        # For Docker service names, use localhost equivalent when running on host
        test_url = client.base_url
        if 'llamacpp:8080' in client.base_url:
            test_url = client.base_url.replace('llamacpp:8080', 'localhost:8080')
            # Override the client's base_url for testing
            client.base_url = test_url
            print(f"Testing completion with decoder at: {test_url} (Docker service via localhost)")
        else:
            print(f"Testing completion with decoder at: {client.base_url}")

        response = client.generate_with_soft_embeddings(
            prompt="What is 2+2?",
            max_tokens=50,
            temperature=0.1
        )

        print(f"PASS: Completion successful: {response[:100]}...")
        return True

    except Exception as e:
        print(f"FAIL: Completion failed: {e}")
        return False

if __name__ == "__main__":
    print("Testing GPU decoder switching functionality\n")

    # Load .env file first
    load_env_file()

    # Set decoder enabled for testing
    os.environ.setdefault("REFRAG_DECODER", "1")

    print("1. Testing URL resolution...")
    url_ok = test_decoder_url_resolution()

    print("\n2. Testing simple completion...")
    completion_ok = test_simple_completion()

    print(f"\nResults:")
    print(f"   URL Resolution: {'PASS' if url_ok else 'FAIL'}")
    print(f"   Completion Test: {'PASS' if completion_ok else 'FAIL'}")

    if url_ok and completion_ok:
        print("\nAll tests passed! GPU switching is working correctly.")
        sys.exit(0)
    else:
        print("\nSome tests failed. Check your decoder setup.")
        sys.exit(1)