Skip to content

Feature: auto detect zip encoding; SharpZipLib no longer blocks UI thread; added more encodings to encoding list. #17045

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
<PackageVersion Include="Microsoft.Graphics.Win2D" Version="1.3.2" />
<PackageVersion Include="TagLibSharp" Version="2.3.0" />
<PackageVersion Include="Tulpep.ActiveDirectoryObjectPicker" Version="3.0.11" />
<PackageVersion Include="UTF.Unknown" Version="2.5.1" />
<PackageVersion Include="WinUIEx" Version="2.5.1" />
<PackageVersion Include="Vanara.Windows.Extensions" Version="4.0.1" />
<PackageVersion Include="Vanara.Windows.Shell" Version="4.0.1" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ public override async Task ExecuteAsync(object? parameter = null)

var isArchiveEncrypted = await FilesystemTasks.Wrap(() => StorageArchiveService.IsEncryptedAsync(archive.Path));
var isArchiveEncodingUndetermined = await FilesystemTasks.Wrap(() => StorageArchiveService.IsEncodingUndeterminedAsync(archive.Path));
Encoding? detectedEncoding = null;
if (isArchiveEncodingUndetermined)
{
detectedEncoding = await FilesystemTasks.Wrap(() => StorageArchiveService.DetectEncodingAsync(archive.Path));
}
var password = string.Empty;
Encoding? encoding = null;

Expand All @@ -51,7 +56,8 @@ public override async Task ExecuteAsync(object? parameter = null)
{
IsArchiveEncrypted = isArchiveEncrypted,
IsArchiveEncodingUndetermined = isArchiveEncodingUndetermined,
ShowPathSelection = true
ShowPathSelection = true,
DetectedEncoding = detectedEncoding,
};
decompressArchiveDialog.ViewModel = decompressArchiveViewModel;

Expand Down
9 changes: 8 additions & 1 deletion src/Files.App/Data/Contracts/IStorageArchiveService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,17 @@ public interface IStorageArchiveService
/// <summary>
/// Gets the value that indicates whether the archive file's encoding is undetermined.
/// </summary>
/// <param name="archiveFilePath">The archive file path to check if the item is encrypted.</param>
/// <param name="archiveFilePath">The archive file path to check if the encoding is undetermined.</param>
/// <returns>True if the archive file's encoding is undetermined; otherwise, false.</returns>
Task<bool> IsEncodingUndeterminedAsync(string archiveFilePath);

/// <summary>
/// Detect encoding for a zip file whose encoding is undetermined.
/// </summary>
/// <param name="archiveFilePath">The archive file path to detect encoding</param>
/// <returns>Null if the archive file doesn't need to detect encoding or its encoding can't be detected; otherwise, the encoding detected.</returns>
Task<Encoding?> DetectEncodingAsync(string archiveFilePath);

/// <summary>
/// Gets the <see cref="SevenZipExtractor"/> instance from the archive file path.
/// </summary>
Expand Down
43 changes: 41 additions & 2 deletions src/Files.App/Data/Items/EncodingItem.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public sealed class EncodingItem
/// Initializes a new instance of the <see cref="EncodingItem"/> class.
/// </summary>
/// <param name="code">The code of the language.</param>
public EncodingItem(string code)
public EncodingItem(string? code)
{
if (string.IsNullOrEmpty(code))
{
Expand All @@ -36,6 +36,45 @@ public EncodingItem(string code)
}
}

public override string ToString() => Name;
public EncodingItem(Encoding encoding, string name)
{
Encoding = encoding;
Name = name;
}

public static EncodingItem[] Defaults = new string?[] {
null,//System Default
"UTF-8",

//All possible Windows system encodings
//reference: https://en.wikipedia.org/wiki/Windows_code_page
//East Asian
"shift_jis", //Japanese
"gb2312", //Simplified Chinese
"big5", //Traditional Chinese
"ks_c_5601-1987", //Korean

//Southeast Asian
"Windows-1258", //Vietnamese
"Windows-874", //Thai

//Middle East
"Windows-1256", //Arabic
"Windows-1255", //Hebrew
"Windows-1254", //Turkish

//European
"Windows-1252", //Western European
"Windows-1250", //Central European
"Windows-1251", //Cyrillic
"Windows-1253", //Greek
"Windows-1257", //Baltic

"macintosh",
}
.Select(x => new EncodingItem(x))
.ToArray();

public override string ToString() => Name;
}
}
1 change: 1 addition & 0 deletions src/Files.App/Files.App.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@
<PackageReference Include="Microsoft.Graphics.Win2D" />
<PackageReference Include="TagLibSharp" />
<PackageReference Include="Tulpep.ActiveDirectoryObjectPicker" />
<PackageReference Include="UTF.Unknown" />
<PackageReference Include="WinUIEx" />
<PackageReference Include="Vanara.Windows.Extensions" />
<PackageReference Include="Vanara.Windows.Shell" />
Expand Down
113 changes: 76 additions & 37 deletions src/Files.App/Services/Storage/StorageArchiveService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
using ICSharpCode.SharpZipLib.Core;
using ICSharpCode.SharpZipLib.Zip;
using SevenZip;
using System.Collections;
using System.IO;
using System.Linq;
using System.Text;
using UtfUnknown;
using Windows.Storage;
using Windows.Win32;

Expand Down Expand Up @@ -233,51 +235,52 @@ async Task<bool> DecompressAsyncWithSharpZipLib(string archiveFilePath, string d
{
long processedBytes = 0;
int processedFiles = 0;

foreach (ZipEntry zipEntry in zipFile)
await Task.Run(async () =>
{
if (statusCard.CancellationToken.IsCancellationRequested)
foreach (ZipEntry zipEntry in zipFile)
{
isSuccess = false;
break;
}

if (!zipEntry.IsFile)
{
continue; // Ignore directories
}
if (statusCard.CancellationToken.IsCancellationRequested)
{
isSuccess = false;
break;
}

string entryFileName = zipEntry.Name;
string fullZipToPath = Path.Combine(destinationFolderPath, entryFileName);
string directoryName = Path.GetDirectoryName(fullZipToPath);
if (!zipEntry.IsFile)
{
continue; // Ignore directories
}

if (!Directory.Exists(directoryName))
{
Directory.CreateDirectory(directoryName);
}
string entryFileName = zipEntry.Name;
string fullZipToPath = Path.Combine(destinationFolderPath, entryFileName);
string directoryName = Path.GetDirectoryName(fullZipToPath);

byte[] buffer = new byte[4096]; // 4K is a good default
using (Stream zipStream = zipFile.GetInputStream(zipEntry))
using (FileStream streamWriter = File.Create(fullZipToPath))
{
await ThreadingService.ExecuteOnUiThreadAsync(() =>
if (!Directory.Exists(directoryName))
{
fsProgress.FileName = entryFileName;
fsProgress.Report();
});
Directory.CreateDirectory(directoryName);
}

StreamUtils.Copy(zipStream, streamWriter, buffer);
}
processedBytes += zipEntry.Size;
if (fsProgress.TotalSize > 0)
{
fsProgress.Report(processedBytes / (double)fsProgress.TotalSize * 100);
byte[] buffer = new byte[4096]; // 4K is a good default
using (Stream zipStream = zipFile.GetInputStream(zipEntry))
using (FileStream streamWriter = File.Create(fullZipToPath))
{
await ThreadingService.ExecuteOnUiThreadAsync(() =>
{
fsProgress.FileName = entryFileName;
fsProgress.Report();
});

StreamUtils.Copy(zipStream, streamWriter, buffer);
}
processedBytes += zipEntry.Size;
if (fsProgress.TotalSize > 0)
{
fsProgress.Report(processedBytes / (double)fsProgress.TotalSize * 100);
}
processedFiles++;
fsProgress.AddProcessedItemsCount(1);
fsProgress.Report();
}
processedFiles++;
fsProgress.AddProcessedItemsCount(1);
fsProgress.Report();
}

});
if (!statusCard.CancellationToken.IsCancellationRequested)
{
isSuccess = true;
Expand Down Expand Up @@ -365,6 +368,42 @@ public async Task<bool> IsEncodingUndeterminedAsync(string archiveFilePath)
}
}

public async Task<Encoding?> DetectEncodingAsync(string archiveFilePath)
{
//Temporarily using cp437 to decode zip file
//because SharpZipLib requires an encoding when decoding
//and cp437 contains all bytes as character
//which means that we can store any byte array as cp437 string losslessly
var cp437 = Encoding.GetEncoding(437);
try
{
using (ZipFile zipFile = new ZipFile(archiveFilePath, StringCodec.FromEncoding(cp437)))
{
var fileNameBytes = cp437.GetBytes(
String.Join("\n",
zipFile.Cast<ZipEntry>()
.Where(e => !e.IsUnicodeText)
.Select(e => e.Name)
)
);
var detectionResult = CharsetDetector.DetectFromBytes(fileNameBytes);
if (detectionResult.Detected != null && detectionResult.Detected.Confidence > 0.5)
{
return detectionResult.Detected.Encoding;
}
else
{
return null;
}
}
}
catch (Exception ex)
{
Console.WriteLine($"SharpZipLib error: {ex.Message}");
return null;
}
}

/// <inheritdoc/>
public async Task<SevenZipExtractor?> GetSevenZipExtractorAsync(string archiveFilePath, string password = "")
{
Expand Down
3 changes: 3 additions & 0 deletions src/Files.App/Strings/en-US/Resources.resw
Original file line number Diff line number Diff line change
Expand Up @@ -2099,6 +2099,9 @@
<data name="Encoding" xml:space="preserve">
<value>Encoding</value>
</data>
<data name="EncodingDetected" xml:space="preserve">
<value> (detected)</value>
</data>
<data name="ExtractToPath" xml:space="preserve">
<value>Path</value>
</data>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,16 @@ public bool IsArchiveEncodingUndetermined
set => SetProperty(ref isArchiveEncodingUndetermined, value);
}

private Encoding? detectedEncoding;
public Encoding? DetectedEncoding
{
get => detectedEncoding;
set {
SetProperty(ref detectedEncoding, value);
RefreshEncodingOptions();
}
}

private bool showPathSelection;
public bool ShowPathSelection
{
Expand All @@ -53,19 +63,27 @@ public bool ShowPathSelection

public DisposableArray? Password { get; private set; }

public EncodingItem[] EncodingOptions { get; set; } = new string?[] {
null,//System Default
"UTF-8",
"shift_jis",
"gb2312",
"big5",
"ks_c_5601-1987",
"Windows-1252",
"macintosh",
}
.Select(x=>new EncodingItem(x))
.ToArray();
public EncodingItem[] EncodingOptions { get; set; } = EncodingItem.Defaults;
public EncodingItem SelectedEncoding { get; set; }
void RefreshEncodingOptions()
{
if (detectedEncoding != null)
{
EncodingOptions = EncodingItem.Defaults
.Prepend(new EncodingItem(
detectedEncoding,
detectedEncoding.EncodingName + Strings.EncodingDetected.GetLocalizedResource())
)
.ToArray();
}
else
{
EncodingOptions = EncodingItem.Defaults;
}
SelectedEncoding = EncodingOptions.FirstOrDefault();
}



public IRelayCommand PrimaryButtonClickCommand { get; private set; }

Expand Down
Loading