Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions examples/500-semantic-kernel-voice-plugin-dotnet/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Deepgram — https://console.deepgram.com/
DEEPGRAM_API_KEY=

# OpenAI — https://platform.openai.com/api-keys
OPENAI_API_KEY=
2 changes: 2 additions & 0 deletions examples/500-semantic-kernel-voice-plugin-dotnet/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
bin/
obj/
56 changes: 56 additions & 0 deletions examples/500-semantic-kernel-voice-plugin-dotnet/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Semantic Kernel Voice Plugin with Deepgram (.NET)

A Deepgram plugin for Microsoft Semantic Kernel that exposes speech-to-text (STT) and text-to-speech (TTS) as `[KernelFunction]` attributes. An AI agent can autonomously choose to transcribe audio or synthesize speech during a chat conversation.

## What you'll build

A .NET 8 console app with a Semantic Kernel agent that can transcribe audio from URLs or local files using Deepgram Nova-3, and convert text to speech using Deepgram Aura-2 — all invoked automatically by the AI agent when relevant.

## Prerequisites

- [.NET 8 SDK](https://dotnet.microsoft.com/download/dotnet/8.0)
- Deepgram account — [get a free API key](https://console.deepgram.com/)
- OpenAI account — [get an API key](https://platform.openai.com/api-keys)

## Environment variables

| Variable | Where to find it |
|----------|-----------------|
| `DEEPGRAM_API_KEY` | [Deepgram console](https://console.deepgram.com/) |
| `OPENAI_API_KEY` | [OpenAI dashboard → API keys](https://platform.openai.com/api-keys) |

## Install and run

```bash
cp .env.example .env
# Fill in your API keys in .env

cd src
dotnet restore
dotnet run
```

Then try prompts like:
- "Transcribe this audio: https://static.deepgram.com/examples/Bueller-Life-moves-pretty-fast.wav"
- "Say 'Hello world' as audio and save it to greeting.mp3"

## Key parameters

| Parameter | Value | Description |
|-----------|-------|-------------|
| `model` | `nova-3` | Deepgram's latest and most accurate STT model |
| `model` (TTS) | `aura-2-thalia-en` | Deepgram's Aura-2 TTS voice |
| `smart_format` | `true` | Adds punctuation, casing, and paragraph formatting |
| `tag` | `deepgram-examples` | Tags API usage for analytics |

## How it works

1. The app creates a Semantic Kernel `Kernel` with an OpenAI chat completion backend.
2. A `DeepgramPlugin` class registers four `[KernelFunction]` methods: `transcribe_url`, `transcribe_file`, `speak_text`, and `speak_text_stream`.
3. `FunctionChoiceBehavior.Auto()` lets the LLM decide when to call Deepgram functions based on the conversation.
4. When the user asks about audio transcription, the agent calls `TranscribeUrlAsync` or `TranscribeFileAsync`, which use Deepgram's pre-recorded STT API.
5. When the user asks for text-to-speech, the agent calls `SpeakTextAsync` (saves to file) or `SpeakTextStreamAsync` (returns base64 audio).

## Starter templates

[deepgram-starters](https://github.com/orgs/deepgram-starters/repositories)
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
using System.ComponentModel;
using Deepgram;
using Deepgram.Clients.Interfaces.v1;
using Deepgram.Models.Listen.v1.REST;
using Deepgram.Models.Speak.v1.REST;
using Microsoft.SemanticKernel;

namespace DeepgramSemanticKernel;

public class DeepgramPlugin
{
private readonly IListenRESTClient _listenClient;
private readonly ISpeakRESTClient _speakClient;

public DeepgramPlugin()
{
_listenClient = ClientFactory.CreateListenRESTClient();
_speakClient = ClientFactory.CreateSpeakRESTClient();
}

[KernelFunction("transcribe_url")]
[Description("Transcribes audio from a URL using Deepgram speech-to-text. Returns the transcript text.")]
public async Task<string> TranscribeUrlAsync(
[Description("The URL of the audio file to transcribe")] string url)
{
var response = await _listenClient.TranscribeUrl(
new UrlSource(url),
new PreRecordedSchema
{
Model = "nova-3",
SmartFormat = true,
// ← tag is REQUIRED so internal test traffic is identifiable
Tag = new List<string> { "deepgram-examples" }
});

return response.Results!.Channels![0].Alternatives![0].Transcript!;
}

[KernelFunction("transcribe_file")]
[Description("Transcribes a local audio file using Deepgram speech-to-text. Returns the transcript text.")]
public async Task<string> TranscribeFileAsync(
[Description("The absolute path to the local audio file")] string filePath)
{
var audioData = await File.ReadAllBytesAsync(filePath);

var response = await _listenClient.TranscribeFile(
audioData,
new PreRecordedSchema
{
Model = "nova-3",
SmartFormat = true,
Tag = new List<string> { "deepgram-examples" }
});

return response.Results!.Channels![0].Alternatives![0].Transcript!;
}

[KernelFunction("speak_text")]
[Description("Converts text to speech using Deepgram TTS. Saves the audio to a file and returns the file path.")]
public async Task<string> SpeakTextAsync(
[Description("The text to convert to speech")] string text,
[Description("Output file path for the audio (e.g. output.mp3)")] string outputPath = "output.mp3")
{
await _speakClient.ToFile(
new TextSource(text),
outputPath,
new SpeakSchema
{
Model = "aura-2-thalia-en"
});

return $"Audio saved to {outputPath}";
}

// Exposes ToStream for callers that need in-memory audio bytes
[KernelFunction("speak_text_stream")]
[Description("Converts text to speech and returns the raw audio bytes as a base64-encoded string.")]
public async Task<string> SpeakTextStreamAsync(
[Description("The text to convert to speech")] string text)
{
var response = await _speakClient.ToStream(
new TextSource(text),
new SpeakSchema
{
Model = "aura-2-thalia-en"
});

var bytes = response.Stream!.ToArray();
return Convert.ToBase64String(bytes);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<RestorePackagesWithLockFile>true</RestorePackagesWithLockFile>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Deepgram" Version="6.6.1" />
<PackageReference Include="Microsoft.SemanticKernel" Version="1.74.0" />
</ItemGroup>

</Project>
61 changes: 61 additions & 0 deletions examples/500-semantic-kernel-voice-plugin-dotnet/src/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
using Deepgram;
using DeepgramSemanticKernel;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.Connectors.OpenAI;

var deepgramKey = Environment.GetEnvironmentVariable("DEEPGRAM_API_KEY");
var openaiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY");

if (string.IsNullOrEmpty(deepgramKey) || string.IsNullOrEmpty(openaiKey))
{
Console.Error.WriteLine("Set DEEPGRAM_API_KEY and OPENAI_API_KEY environment variables.");
Environment.Exit(2);
}

// Deepgram SDK requires explicit initialization
Library.Initialize();

try
{
var builder = Kernel.CreateBuilder();
builder.AddOpenAIChatCompletion("gpt-4o-mini", openaiKey);

var kernel = builder.Build();
kernel.ImportPluginFromType<DeepgramPlugin>("Deepgram");

var chat = kernel.GetRequiredService<IChatCompletionService>();
var history = new ChatHistory(
"You are a helpful assistant with access to Deepgram voice tools. " +
"You can transcribe audio from URLs or local files, and convert text to speech. " +
"Use the Deepgram functions when the user asks about audio transcription or text-to-speech.");

var settings = new OpenAIPromptExecutionSettings
{
FunctionChoiceBehavior = FunctionChoiceBehavior.Auto()
};

Console.WriteLine("Deepgram + Semantic Kernel Agent");
Console.WriteLine("Type a message (or 'quit' to exit):");
Console.WriteLine();

while (true)
{
Console.Write("You: ");
var input = Console.ReadLine();
if (string.IsNullOrWhiteSpace(input) || input.Equals("quit", StringComparison.OrdinalIgnoreCase))
break;

history.AddUserMessage(input);

var response = await chat.GetChatMessageContentAsync(history, settings, kernel);
Console.WriteLine($"Agent: {response.Content}");
Console.WriteLine();

history.AddAssistantMessage(response.Content ?? "");
}
}
finally
{
Library.Terminate();
}
Loading