-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathneural_diffusion.py
More file actions
80 lines (68 loc) · 2.41 KB
/
neural_diffusion.py
File metadata and controls
80 lines (68 loc) · 2.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
"""VynFi neural diffusion — learned distributions instead of statistical (DS 3.0+).
DataSynth 3.0 adds three diffusion backends:
- ``statistical`` (default, Free+): classical distribution sampling
- ``neural`` (Scale+): score-matching MLP trained on fingerprinted data
- ``hybrid`` (Scale+): blend of both via ``hybridWeight``
This script demonstrates configuring the backend via the generate config.
No new SDK method — it's purely a config option under ``diffusion``.
"""
import os
import vynfi
client = vynfi.VynFi(api_key=os.environ["VYNFI_API_KEY"])
print("=== Configuring neural diffusion ===")
config = {
"sector": "retail",
"country": "US",
"accountingFramework": "us_gaap",
"rows": 2000,
"companies": 3,
"periods": 1,
"periodLength": "monthly",
"processModels": ["o2c"],
"exportFormat": "json",
# Neural diffusion backend config
"diffusion": {
"backend": "neural",
"nSteps": 100,
"schedule": "cosine",
"neural": {
"hiddenDims": [256, 256, 128],
"timestepEmbedDim": 64,
"learningRate": 0.001,
"trainingEpochs": 100,
"batchSize": 64,
},
},
}
# Cost first — neural diffusion has credit multipliers
est = client.configs.estimate_cost(config=config)
print(f" Base credits: {est.base_credits}")
print(f" Total credits: {est.total_credits}")
if est.multipliers:
for m in est.multipliers:
print(f" {m.label}: {m.factor}x")
# Hybrid example — blend neural + statistical
print("\n=== Hybrid backend (50/50 blend) ===")
hybrid_cfg = {
**config,
"diffusion": {
"backend": "hybrid",
"nSteps": 100,
"schedule": "cosine",
"neural": {
"hiddenDims": [256, 128],
"hybridWeight": 0.5, # 0.0 = all statistical, 1.0 = all neural
},
},
}
h_est = client.configs.estimate_cost(config=hybrid_cfg)
print(f" Total credits: {h_est.total_credits}")
# Submit
print("\n=== Submitting neural job ===")
job = client.jobs.generate_config(config=config)
print(f" Job: {job.id}")
print(f" Credits reserved: {job.credits_reserved}")
# Usage note: neural training runs inside the generation job, so expect longer
# runtimes than statistical (typically 2–5 minutes for small datasets).
print("\nNote: neural backend trains a small score network during generation.")
print("Expect 2-5 min runtime for small retail configs.")