Skip to content

Commit

Permalink
Add GlossaryExchangeService
Browse files Browse the repository at this point in the history
  • Loading branch information
ealbu committed Jan 11, 2023
1 parent 7511087 commit 020ad0d
Show file tree
Hide file tree
Showing 46 changed files with 1,300 additions and 63 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -239,3 +239,5 @@ IATETerminologyProvider/IATETerminologyProvider/pluginpackage.manifest_LOCAL_178
/TM Optimizer/Sdl.Community.TMOptimizer/Properties/launchSettings.json

*.json
/InterpretBank/InterpretBankTests/Resources/SavedGlossary.xlsx
/InterpretBank/InterpretBankTests/Resources/exp2.tbx
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
using System.Collections.Generic;
using InterpretBank.GlossaryExchangeService.Interface;
using InterpretBank.GlossaryExchangeService.Wrappers.Interface;

namespace InterpretBank.GlossaryExchangeService.ExchangeFormats
{
public class ExcelExport : IExport
{
public ExcelExport(ISpreadsheetDocumentWrapper excelDocument, string path)
{
ExcelDocument = excelDocument;
Path = path;
}

private ISpreadsheetDocumentWrapper ExcelDocument { get; }
private string Path { get; }

public void ExportTerms(IEnumerable<string[]> terms, string glossaryName = null, string subGlossaryName = null)
{
using var spreadsheet = ExcelDocument.CreateSpreadsheet(Path);

//for (var i = 1; i <= 500000; i++)
//{
// spreadsheet.CreateRow(spreadsheet, i);

// for (var j = 1; j <= 30; ++j)
// {
// spreadsheet.CreateCellWithValue($"R{i}C{j}");
// }

// // this is for Row
// spreadsheet.WriteEndElement();
//}

var termIndex = 0;
foreach (var term in terms)
{
termIndex++;
spreadsheet.CreateRow(termIndex);

for (var j = 1; j <= term.Length; ++j)
{
spreadsheet.CreateCellWithValue(term[j - 1]);
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
using System.Collections.Generic;
using System.Linq;
using DocumentFormat.OpenXml.Spreadsheet;
using InterpretBank.GlossaryExchangeService.Interface;
using InterpretBank.GlossaryExchangeService.Wrappers.Interface;

namespace InterpretBank.GlossaryExchangeService.ExchangeFormats
{
public class ExcelImport : IImport
{
public ExcelImport(ISpreadsheetDocumentWrapper excelDocument, string path)
{
ExcelDocument = excelDocument;
Path = path;
}

private ISpreadsheetDocumentWrapper ExcelDocument { get; }
private string Path { get; }

public IEnumerable<string[]> ImportTerms()
{
var rows = ExcelDocument.GetRowsSax(Path);

foreach (var row in rows)
{
var cells = row?.ChildElements.Select(cell => (Cell)cell).ToList();

var term = new string[cells.Count];
for (var i = 0; i < cells.Count; i++)
{
var value = cells[i].CellValue?.InnerText;
value = string.IsNullOrEmpty(value) ? null : value;

term[i] = ExcelDocument.GetCellText(value) ?? value;
}

yield return term;
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Xml.Linq;
using InterpretBank.GlossaryExchangeService.Interface;
using InterpretBank.GlossaryExchangeService.Wrappers.Interface;

namespace InterpretBank.GlossaryExchangeService.ExchangeFormats;

public class TbxExport : IExport
{
public TbxExport(IXmlReaderWriterWrapper tbxDocument, string path)
{
TbxDocument = tbxDocument;
Path = path;
}

private CultureInfo[] CultureData { get; } = CultureInfo.GetCultures(CultureTypes.NeutralCultures);
private string Path { get; }
private IXmlReaderWriterWrapper TbxDocument { get; }

public void ExportTerms(IEnumerable<string[]> terms, string glossaryName = null, string subGlossaryName = null)
{
var tableHeader = terms.First();
if ((tableHeader.Length - 1) % 3 != 0) return;

var lang = XName.Get("lang", XNamespace.Xml.ToString());

using var tbxDocument = TbxDocument.CreateTbx(Path, glossaryName, subGlossaryName);
foreach (var term in terms.Skip(1))
{
var termEntry = GetTerm(tableHeader, lang, term);
termEntry.WriteTo(tbxDocument);
}
}

private static XElement GetLangSet(XName lang, string[] term, int i, string language)
{
return new("langSet",
new XAttribute(lang, language),
new XElement("tig",
new XElement("term", term[i]),
new XElement("extra1", term[i + 1]),
new XElement("extra2", term[i + 2])));
}

private XElement GetTerm(string[] tableHeader, XName lang, string[] term)
{
var langSets = new List<XElement>();
var termLength = tableHeader.Length;
for (var i = 0; i < termLength - 1; i += 3)
{
var language = GetTwoLetterLanguageName(tableHeader[i]);
var langSet = GetLangSet(lang, term, i, language);

langSets.Add(langSet);
}

return new XElement("termEntry",
new XElement("CommentAll", term[termLength - 1]), langSets);
}

private string GetTwoLetterLanguageName(string languageName) => CultureData
.FirstOrDefault(c => c.DisplayName.StartsWith(languageName))
?.TwoLetterISOLanguageName.ToLower();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Xml.Linq;
using InterpretBank.GlossaryExchangeService.Interface;
using InterpretBank.GlossaryExchangeService.Wrappers.Interface;

namespace InterpretBank.GlossaryExchangeService.ExchangeFormats;

public class TbxImport : IImport
{
public TbxImport(IXmlReaderWriterWrapper tbxDocument, string path)
{
TbxDocument = tbxDocument;
Path = path;
}

private string Path { get; }

private IXmlReaderWriterWrapper TbxDocument { get; }

public IEnumerable<string[]> ImportTerms()
{
var termEntries = TbxDocument.GetTermElements(Path);

yield return GetTableHeader(termEntries.First());
foreach (var xmlTermEntry in termEntries)
{
var langSets = xmlTermEntry.Elements("langSet");
var langSetsTotal = langSets.Count();
var termEntryLength = langSetsTotal * 3 + 1;

var termEntry = new string[termEntryLength];
for (var langSetIndex = 0; langSetIndex < langSetsTotal; langSetIndex++)
{
var currentLangSet = langSets.ElementAt(langSetIndex);
var termData = currentLangSet.Element("tig");

var term = termData.Element("term").Value;
var extraA = termData.Element("extra1").Value;
var extraB = termData.Element("extra2").Value;

var index = langSetIndex * 3;

termEntry[index] = GetNormalizedValue(term);
termEntry[index + 1] = GetNormalizedValue(extraA);
termEntry[index + 2] = GetNormalizedValue(extraB);
}

var commentAll = xmlTermEntry.Element("CommentAll").Value;
termEntry[termEntry.Length - 1] = GetNormalizedValue(commentAll);
yield return termEntry;
}
}

private string GetNormalizedValue(string value) => !string.IsNullOrEmpty(value) ? value : null;

private static string[] GetTableHeader(XElement termEntry)
{
var langSets = termEntry.Elements("langSet");
var langSetsTotal = langSets.Count();
var termEntryLength = langSetsTotal * 3 + 1;

var tableHeader = new string[termEntryLength];
for (var langSetIndex = 0; langSetIndex < langSetsTotal; langSetIndex++)
{
var currentLangSet = langSets.ElementAt(langSetIndex);

var language = new CultureInfo(currentLangSet.Attribute(XNamespace.Xml + "lang").Value).EnglishName;

var index = langSetIndex * 3;

tableHeader[index] = language;
tableHeader[index + 1] = $"{language} ExtraA";
tableHeader[index + 2] = $"{language} ExtraB";
}

tableHeader[tableHeader.Length - 1] = "CommentAll";
return tableHeader;
}
}
8 changes: 8 additions & 0 deletions InterpretBank/InterpretBank/GlossaryExchangeService/Format.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
namespace InterpretBank.GlossaryExchangeService
{
public enum Format
{
Tbx,
Excel
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
using System;
using System.Collections.Generic;
using System.IO;
using InterpretBank.GlossaryExchangeService.ExchangeFormats;
using InterpretBank.GlossaryExchangeService.Interface;
using InterpretBank.GlossaryExchangeService.Wrappers;

namespace InterpretBank.GlossaryExchangeService
{
public class GlossaryExchangeService
{

public void ExportTerms(Format export, string path, IEnumerable<string[]> terms, string glossaryName = null,
string subGlossaryName = null)
{
switch (export)
{
case Format.Tbx:
new TbxExport(new TbxDocumentWrapper(), path).ExportTerms(terms, glossaryName, subGlossaryName);
break;

case Format.Excel:
new ExcelExport(new ExcelDocumentWrapper(), path).ExportTerms(terms);
break;
}
}

public IEnumerable<string[]> ImportTerms(string path)
{
//TODO: Try to handle/catch all exceptions
var extension = Path.GetExtension(path).TrimStart('.');

IImport import = extension switch
{
"xlsx" => new ExcelImport(new ExcelDocumentWrapper(), path),
"tbx" => new TbxImport(new TbxDocumentWrapper(), path),
_ => throw new ArgumentException(
string.Format(
PluginResources
.GlossaryExchangeServiceManager_CreateFileReader_The_file_type__0__is_not_supported,
extension), extension)
};

return import.ImportTerms();
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
using System.Collections.Generic;

namespace InterpretBank.GlossaryExchangeService.Interface
{
public interface IExport
{
void ExportTerms(IEnumerable<string[]> terms, string glossaryName = null, string subGlossaryName = null);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
using System.Collections.Generic;

namespace InterpretBank.GlossaryExchangeService.Interface
{
public interface IImport
{
IEnumerable<string[]> ImportTerms();
}
}
Loading

0 comments on commit 020ad0d

Please sign in to comment.