VynFi-python/examples/neural_diffusion.py at main · VynFi/VynFi-python · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
"""VynFi neural diffusion — learned distributions instead of statistical (DS 3.0+).

DataSynth 3.0 adds three diffusion backends:
- ``statistical`` (default, Free+): classical distribution sampling
- ``neural`` (Scale+): score-matching MLP trained on fingerprinted data
- ``hybrid`` (Scale+): blend of both via ``hybridWeight``

This script demonstrates configuring the backend via the generate config.
No new SDK method — it's purely a config option under ``diffusion``.
"""

import os

import vynfi

client = vynfi.VynFi(api_key=os.environ["VYNFI_API_KEY"])

print("=== Configuring neural diffusion ===")

config = {
    "sector": "retail",
    "country": "US",
    "accountingFramework": "us_gaap",
    "rows": 2000,
    "companies": 3,
    "periods": 1,
    "periodLength": "monthly",
    "processModels": ["o2c"],
    "exportFormat": "json",
    # Neural diffusion backend config
    "diffusion": {
        "backend": "neural",
        "nSteps": 100,
        "schedule": "cosine",
        "neural": {
            "hiddenDims": [256, 256, 128],
            "timestepEmbedDim": 64,
            "learningRate": 0.001,
            "trainingEpochs": 100,
            "batchSize": 64,
        },
    },
}

# Cost first — neural diffusion has credit multipliers
est = client.configs.estimate_cost(config=config)
print(f"  Base credits:   {est.base_credits}")
print(f"  Total credits:  {est.total_credits}")
if est.multipliers:
    for m in est.multipliers:
        print(f"    {m.label}: {m.factor}x")

# Hybrid example — blend neural + statistical
print("\n=== Hybrid backend (50/50 blend) ===")
hybrid_cfg = {
    **config,
    "diffusion": {
        "backend": "hybrid",
        "nSteps": 100,
        "schedule": "cosine",
        "neural": {
            "hiddenDims": [256, 128],
            "hybridWeight": 0.5,  # 0.0 = all statistical, 1.0 = all neural
        },
    },
}

h_est = client.configs.estimate_cost(config=hybrid_cfg)
print(f"  Total credits:  {h_est.total_credits}")

# Submit
print("\n=== Submitting neural job ===")
job = client.jobs.generate_config(config=config)
print(f"  Job: {job.id}")
print(f"  Credits reserved: {job.credits_reserved}")

# Usage note: neural training runs inside the generation job, so expect longer
# runtimes than statistical (typically 2–5 minutes for small datasets).
print("\nNote: neural backend trains a small score network during generation.")
print("Expect 2-5 min runtime for small retail configs.")