Skip to content

Commit

Permalink
Merge pull request #136 from Encamina/@mramos/update-skVisionImageDoc…
Browse files Browse the repository at this point in the history
…umentConnector

Add resolution limit to `SkVisionImageDocumentConnector`
  • Loading branch information
MarioRamosEs authored Sep 11, 2024
2 parents 291a1f7 + d964ea3 commit b1467ed
Show file tree
Hide file tree
Showing 7 changed files with 72 additions and 11 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ Previous classification is not required if changes are simple or all belong to t

- Added the `IEnmarchaDocumentConnector` interface that extends the existing `IDocumentConnector`. This interface, by now, adds a `CompatibleFileFormats` property that returns the file formats supported by the connector. Existing document connectors have been updated to implement this interface.
- Added `CsvTsvDocumentConnector` document connector that allows to read CSV and TSV files keeping the headers in different chunks.
- Added `SkVisionImageDocumentConnector` which allows to read images and extract text from them. Using Semantic Kernel vision capabilities.
- Added `SkVisionImageDocumentConnector` which allows to read images and extract text from them. Using Semantic Kernel vision capabilities. You can configure the connector with the `SkVisionImageDocumentConnectorOptions` class.
- The `IDocumentConnectorProvider` interface now works with the `IEnframeDocumentConnector` interface instead of `IDocumentConnector`.
- The `AddDocumentConnector` function has been modified by removing the `fileExtension` parameter, which will now come in the `CompatibleFileFormats` property of the document connector.
- The `ParagraphPptxDocumentConnector` class is no longer sealed, allowing the creation of derived classes.
Expand Down
2 changes: 1 addition & 1 deletion Directory.Build.props
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

<PropertyGroup>
<VersionPrefix>8.1.8</VersionPrefix>
<VersionSuffix>preview-05</VersionSuffix>
<VersionSuffix>preview-06</VersionSuffix>
</PropertyGroup>

<!--
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
using CommunityToolkit.Diagnostics;

using Encamina.Enmarcha.SemanticKernel.Connectors.Document.Exceptions;
using Encamina.Enmarcha.SemanticKernel.Connectors.Document.Options;
using Encamina.Enmarcha.SemanticKernel.Connectors.Document.Utils;

using Microsoft.Extensions.Options;

using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.ChatCompletion;

Expand Down Expand Up @@ -49,14 +52,17 @@ 4. Transcribe only in Markdown format.
""";

private readonly IChatCompletionService chatCompletionService;
private readonly SkVisionImageDocumentConnectorOptions options;

/// <summary>
/// Initializes a new instance of the <see cref="SkVisionImageDocumentConnector"/> class.
/// </summary>
/// <param name="kernel">A valid <see cref="Kernel"/> instance.</param>
public SkVisionImageDocumentConnector(Kernel kernel)
/// <param name="options">Configuration options for this connector.</param>
public SkVisionImageDocumentConnector(Kernel kernel, IOptions<SkVisionImageDocumentConnectorOptions> options)
{
chatCompletionService = kernel.GetRequiredService<IChatCompletionService>();
this.options = options.Value;
}

/// <inheritdoc/>
Expand All @@ -67,9 +73,15 @@ public virtual string ReadText(Stream stream)
{
Guard.IsNotNull(stream);

var mimeType = ImageHelper.GetMimeType(stream);
var (mimeType, width, height) = ImageHelper.GetImageInfo(stream);
stream.Position = 0;

// Check image resolution
if (width > options.ResolutionLimit || height > options.ResolutionLimit)
{
throw new DocumentTooLargeException();
}

var history = new ChatHistory(SystemPrompt);

var message = new ChatMessageContentItemCollection()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

<ItemGroup>
<PackageReference Include="ExcelNumberFormat" Version="1.1.0" />
<PackageReference Include="Microsoft.Extensions.Options.ConfigurationExtensions" Version="8.0.0" />
<PackageReference Include="Microsoft.Extensions.Options.DataAnnotations" Version="8.0.0" />
<PackageReference Include="Microsoft.SemanticKernel.Plugins.Document" Version="1.15.0-alpha" />
<PackageReference Include="PdfPig" Version="0.1.8" />
<PackageReference Include="SixLabors.ImageSharp" Version="3.1.5" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

using Encamina.Enmarcha.SemanticKernel.Connectors.Document;
using Encamina.Enmarcha.SemanticKernel.Connectors.Document.Connectors;
using Encamina.Enmarcha.SemanticKernel.Connectors.Document.Options;

using Microsoft.Extensions.Configuration;

namespace Microsoft.Extensions.DependencyInjection;

Expand Down Expand Up @@ -196,12 +199,18 @@ public static IServiceCollection AddCsvTsvDocumentConnector(this IServiceCollect
}

/// <summary>
/// Adds the <see cref="SkVisionImageDocumentConnector"/> implementation of <see cref="IEnmarchaDocumentConnector"/> to the specified <see cref="IServiceCollection"/> as a singleton service.
/// Adds and configures the <see cref="SkVisionImageDocumentConnector"/> implementation of <see cref="IEnmarchaDocumentConnector"/> to the specified <see cref="IServiceCollection"/> as a scoped service.
/// </summary>
/// <param name="services">The <see cref="IServiceCollection"/> to add services to.</param>
/// <param name="configuration">The current set of key-value application configuration parameters.</param>
/// <returns>The <see cref="IServiceCollection"/> so that additional calls can be chained.</returns>
public static IServiceCollection AddSkVisionImageDocumentConnector(this IServiceCollection services)
public static IServiceCollection AddSkVisionImageDocumentConnector(this IServiceCollection services, IConfiguration configuration)
{
services.AddOptions<SkVisionImageDocumentConnectorOptions>()
.Bind(configuration.GetSection(nameof(SkVisionImageDocumentConnectorOptions)))
.ValidateDataAnnotations()
.ValidateOnStart();

return services.AddScoped<IEnmarchaDocumentConnector, SkVisionImageDocumentConnector>();
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
using Encamina.Enmarcha.SemanticKernel.Connectors.Document.Connectors;

namespace Encamina.Enmarcha.SemanticKernel.Connectors.Document.Options;

/// <summary>
/// Configuration options for <see cref="SkVisionImageDocumentConnector"/>.
/// </summary>
public sealed class SkVisionImageDocumentConnectorOptions
{
/// <summary>
/// Gets the resolution limit (in pixels) for images.
/// </summary>
public int ResolutionLimit { get; init; } = 8192;
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,42 @@ namespace Encamina.Enmarcha.SemanticKernel.Connectors.Document.Utils;
public static class ImageHelper
{
/// <summary>
/// Gets the MIME type of an image from a stream.
/// Gets both the MIME type and resolution (width and height) of an image from a stream.
/// </summary>
/// <param name="stream">The stream containing the image.</param>
/// <returns>The MIME type of the image, or "application/octet-stream" if the type is unknown.</returns>
/// <returns>A tuple containing the MIME type, width, and height of the image.</returns>
/// <exception cref="ArgumentNullException">Thrown when the stream is null.</exception>
/// <exception cref="ArgumentException">Thrown when the stream does not contain a valid image.</exception>
public static string GetMimeType(Stream stream)
public static (string MimeType, int Width, int Height) GetImageInfo(Stream stream)
{
Guard.IsNotNull(stream);

var image = Image.Load(stream);
using var image = Image.Load(stream);

var mimeType = image.Metadata.DecodedImageFormat?.DefaultMimeType ?? System.Net.Mime.MediaTypeNames.Application.Octet;

return (mimeType, image.Width, image.Height);
}

return image.Metadata.DecodedImageFormat?.DefaultMimeType ?? System.Net.Mime.MediaTypeNames.Application.Octet;
/// <summary>
/// Gets the MIME type of an image from a stream.
/// </summary>
/// <param name="stream">The stream containing the image.</param>
/// <returns>The MIME type of the image, or "application/octet-stream" if the type is unknown.</returns>
public static string GetMimeType(Stream stream)
{
var (mimeType, _, _) = GetImageInfo(stream);
return mimeType;
}

/// <summary>
/// Gets the resolution (width and height) of an image from a stream.
/// </summary>
/// <param name="stream">The stream containing the image.</param>
/// <returns>A tuple containing the width and height of the image.</returns>
public static (int Width, int Height) GetResolution(Stream stream)
{
var (_, width, height) = GetImageInfo(stream);
return (width, height);
}
}

0 comments on commit b1467ed

Please sign in to comment.