Replies: 2 comments 2 replies
-
So figured out how to create a custom Phi3 provider: using Microsoft.SemanticKernel;
using System.Text;
using Microsoft.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.Services;
using Microsoft.ML.OnnxRuntimeGenAI;
using System.Runtime.CompilerServices;
using System.Diagnostics;
namespace Phi3SemanticKernel
{
public class Phi3ChatCompletionService : IChatCompletionService
{
private static Model? model;
private static MultiModalProcessor? processor;
public Phi3ChatCompletionService(string modelPath)
{
model = new Model(modelPath);
processor = new MultiModalProcessor(model);
}
private async IAsyncEnumerable<StreamingChatMessageContent> Answer(ChatHistory history, [EnumeratorCancellation] CancellationToken cancellationToken = default)
{
if (processor is not null)
{
await Task.Delay(1).ConfigureAwait(false);
using var tokenizerStream = processor!.CreateStream();
StringBuilder prompt = new StringBuilder();
foreach(var item in history)
prompt.Append($"<|{item.Role}|>{item.Content}<|end|>");
prompt.Append("<|assistant|>");
var fullPrompt = prompt.ToString();
var inputTensors = processor.ProcessImages(fullPrompt, null);
using GeneratorParams generatorParams = new GeneratorParams(model);
generatorParams.SetSearchOption("max_length", 3072);
generatorParams.SetInputs(inputTensors);
// generate response
using var generator = new Generator(model, generatorParams);
while (!generator.IsDone())
{
generator.ComputeLogits();
generator.GenerateNextToken();
var seq = generator.GetSequence(0)[^1];
var str = tokenizerStream.Decode(seq);
if (cancellationToken.IsCancellationRequested)
break;
yield return new StreamingChatMessageContent(AuthorRole.Assistant, str)
;
await Task.Yield();
if (cancellationToken.IsCancellationRequested)
break;
}
}
}
IReadOnlyDictionary<string, object?> IAIService.Attributes => throw new NotImplementedException();
Task<IReadOnlyList<ChatMessageContent>> IChatCompletionService.GetChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
{
throw new NotImplementedException();
}
IAsyncEnumerable<StreamingChatMessageContent> IChatCompletionService.GetStreamingChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
{
return Answer(chatHistory, cancellationToken);
}
}
} However I'm not quite sure where to continue to implement function calling which is what I'm really interested in with the semantic kernel |
Beta Was this translation helpful? Give feedback.
0 replies
-
We have created the ONNX connector but at this time we don't have samples created. We'll have these published out shortly. https://github.com/microsoft/semantic-kernel/tree/main/dotnet/src/Connectors/Connectors.Onnx |
Beta Was this translation helpful? Give feedback.
2 replies
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
-
Every sample I see requires you to fire up llama and run a localhost website to access your local models. This makes for an awkward developer and user experience, and it also risk hitting port conflicts.
When I use onnxruntime-genai I can load and use my Phi3 model directly, without the need of a webservice running. Is this possible with semantic kernel?
This issue here might be there to address that, but the issue has no description so hard to tell: #6619 @matthewbolanos
Beta Was this translation helpful? Give feedback.
All reactions