From c1f3a4e5a4ff3604ed73819f46b7d26d64c5abfe Mon Sep 17 00:00:00 2001 From: xulihang Date: Sat, 30 Oct 2021 22:08:51 +0800 Subject: [PATCH] add an option to use quick tmx importing --- BasicCAT/BasicCAT.b4j | 25 ++-- BasicCAT/BasicCAT.b4j.meta | 7 +- BasicCAT/Files/version.txt | 2 +- BasicCAT/Project.bas | 11 +- BasicCAT/TM.bas | 6 +- BasicCAT/TMManager.bas | 9 +- BasicCAT/TMXExporter.bas | 156 ++++++++++++++++++++++++ BasicCAT/TMXImporter.bas | 244 +++++++++++++++++++++++++++++++++++++ BasicCAT/importDialog.bas | 4 +- 9 files changed, 442 insertions(+), 22 deletions(-) create mode 100644 BasicCAT/TMXExporter.bas create mode 100644 BasicCAT/TMXImporter.bas diff --git a/BasicCAT/BasicCAT.b4j b/BasicCAT/BasicCAT.b4j index 7b20f06..8bae71d 100644 --- a/BasicCAT/BasicCAT.b4j +++ b/BasicCAT/BasicCAT.b4j @@ -271,22 +271,23 @@ Module68=TMDB Module69=TMEditor Module7=editDistance Module70=TMManager -Module71=TMX -Module72=txtFilter -Module73=Utils -Module74=viewSegment -Module75=xliffFilter -Module76=Xml2Map -Module77=XMLBuilder2 -Module78=XmlNode -Module79=XmlParser +Module71=TMXExporter +Module72=TMXImporter +Module73=txtFilter +Module74=Utils +Module75=viewSegment +Module76=xliffFilter +Module77=Xml2Map +Module78=XMLBuilder2 +Module79=XmlNode Module8=EnumClass -Module80=XMLUtils -Module81=zip4j +Module80=XmlParser +Module81=XMLUtils +Module82=zip4j Module9=ErrorReporter NumberOfFiles=88 NumberOfLibraries=26 -NumberOfModules=81 +NumberOfModules=82 Version=8.9 @EndOfDesignText@ #Region Project Attributes diff --git a/BasicCAT/BasicCAT.b4j.meta b/BasicCAT/BasicCAT.b4j.meta index 7fa1449..abec091 100644 --- a/BasicCAT/BasicCAT.b4j.meta +++ b/BasicCAT/BasicCAT.b4j.meta @@ -79,6 +79,7 @@ ModuleBookmarks79= ModuleBookmarks8= ModuleBookmarks80= ModuleBookmarks81= +ModuleBookmarks82= ModuleBookmarks9= ModuleBreakpoints0= ModuleBreakpoints1= @@ -161,6 +162,7 @@ ModuleBreakpoints79= ModuleBreakpoints8= ModuleBreakpoints80= ModuleBreakpoints81= +ModuleBreakpoints82= ModuleBreakpoints9= ModuleClosedNodes0= ModuleClosedNodes1= @@ -243,7 +245,8 @@ ModuleClosedNodes79= ModuleClosedNodes8= ModuleClosedNodes80= ModuleClosedNodes81= +ModuleClosedNodes82= ModuleClosedNodes9= -NavigationStack=opennlp,Initialize,36,6,Term,termsInASentenceUsingIteration,323,0,Term,termsInASentenceUsingHashMap,287,4,xliffFilter,createWorkFile,66,0,xliffFilter,generateFile,296,0,xliffFilter,updateTransUnit,391,0,xliffFilter,updateNode,381,0,xliffFilter,insertTranslation,375,0,xliffFilter,buildMrk,439,0,Project,updateWithWorkfileMI_Action,893,0,Project,updateSegmentsWithWorkfile,931,6 +NavigationStack=Main,Process_Globals,71,0,TM,importExternalTranslationMemory,196,6,TMXImporter,importedList,77,6,TMXImporter,importedListQuick,92,0,TMXImporter,Parser_StartElement,35,0,TMManager,exportToFile,211,0,importDialog,loadTerm,115,0,ProjectSettings,Class_Globals,0,0,Main,MenuBar1_Action,398,0,Project,saveSettings,182,6 SelectedBuild=0 -VisibleModules=10,45,51,72,60,25,46,42,41,47,75 +VisibleModules=10,45,51,73,60,25,46,42,41,47,72,71,70,20,81,67 diff --git a/BasicCAT/Files/version.txt b/BasicCAT/Files/version.txt index 62321af..5a68790 100644 --- a/BasicCAT/Files/version.txt +++ b/BasicCAT/Files/version.txt @@ -1 +1 @@ -1.10.3 \ No newline at end of file +1.10.4 \ No newline at end of file diff --git a/BasicCAT/Project.bas b/BasicCAT/Project.bas index 850978f..6b026bd 100644 --- a/BasicCAT/Project.bas +++ b/BasicCAT/Project.bas @@ -183,11 +183,20 @@ End Sub Public Sub saveSettings(newsettings As Map) projectFile.Put("settings",newsettings) + settings = newsettings Log(newsettings) save If newsettings.Get("tmListChanged")="yes" Then projectTM.deleteExternalTranslationMemory - wait for (projectTM.importExternalTranslationMemory(settings.Get("tmList"),projectFile)) complete (result As Boolean) + Dim tmList As List = settings.Get("tmList") + If tmList.Size>0 Then + Dim response As Int = fx.Msgbox2(Main.MainForm,"Please select a TMX import method:","","Quick (for pure text)","","Accurate (for tagged text)",fx.MSGBOX_CONFIRMATION) + Dim quickMode As Boolean = True + If response = fx.DialogResponse.NEGATIVE Then + quickMode = False + End If + wait for (projectTM.importExternalTranslationMemory(settings.Get("tmList"),projectFile,quickMode)) complete (result As Boolean) + End If End If If newsettings.Get("termListChanged")="yes" Then projectTerm.deleteExternalTerminology diff --git a/BasicCAT/TM.bas b/BasicCAT/TM.bas index 884d558..92fa72e 100644 --- a/BasicCAT/TM.bas +++ b/BasicCAT/TM.bas @@ -188,7 +188,7 @@ Public Sub deleteExternalTranslationMemory externalTranslationMemory.DeleteAll End Sub -Public Sub importExternalTranslationMemory(tmList As List,projectFile As Map) As ResumableSub +Public Sub importExternalTranslationMemory(tmList As List,projectFile As Map,quickMode As Boolean) As ResumableSub progressDialog.Show("Loading external memory","loadtm") Dim segments As List segments.Initialize @@ -198,7 +198,9 @@ Public Sub importExternalTranslationMemory(tmList As List,projectFile As Map) As If tmfileLowercase.EndsWith(".txt") Then segments.AddAll(importedTxt(tmfile)) Else if tmfileLowercase.EndsWith(".tmx") Then - segments.AddAll(TMX.importedList(File.Combine(Main.currentProject.path,"TM"),tmfile,projectFile.Get("source"),projectFile.Get("target"))) + Dim importer As TMXImporter + importer.Initialize + segments.AddAll(importer.importedList(File.Combine(Main.currentProject.path,"TM"),tmfile,projectFile.Get("source"),projectFile.Get("target"),quickMode)) else if tmfileLowercase.EndsWith(".xlsx") Then segments.AddAll(importedXlsx(tmfile)) End If diff --git a/BasicCAT/TMManager.bas b/BasicCAT/TMManager.bas index b13bff9..1af5bcd 100644 --- a/BasicCAT/TMManager.bas +++ b/BasicCAT/TMManager.bas @@ -195,6 +195,9 @@ Sub exportToFile segments.Add(bitext) Next + + Dim exporter As TMXExporter + exporter.Initialize Dim result As Int result=fx.Msgbox2(frm,"Include tags?","","Yes","Cancel","No",fx.MSGBOX_CONFIRMATION) If result=fx.DialogResponse.CANCEL Then @@ -203,15 +206,15 @@ Sub exportToFile If path.EndsWith(".tmx") Then Select result Case fx.DialogResponse.NEGATIVE - TMX.export(segments,Main.currentProject.projectFile.Get("source"),Main.currentProject.projectFile.Get("target"),path,False,False) + exporter.export(segments,Main.currentProject.projectFile.Get("source"),Main.currentProject.projectFile.Get("target"),path,False,False) Case fx.DialogResponse.POSITIVE Dim result2 As Int result2=fx.Msgbox2(frm,"How to handle tags?","","Keep the original format","Cancel","Conform to TMX Specification",fx.MSGBOX_CONFIRMATION) Select result2 Case fx.DialogResponse.NEGATIVE - TMX.export(segments,Main.currentProject.projectFile.Get("source"),Main.currentProject.projectFile.Get("target"),path,True,True) + exporter.export(segments,Main.currentProject.projectFile.Get("source"),Main.currentProject.projectFile.Get("target"),path,True,True) Case fx.DialogResponse.POSITIVE - TMX.export(segments,Main.currentProject.projectFile.Get("source"),Main.currentProject.projectFile.Get("target"),path,True,False) + exporter.export(segments,Main.currentProject.projectFile.Get("source"),Main.currentProject.projectFile.Get("target"),path,True,False) Case fx.DialogResponse.CANCEL Return End Select diff --git a/BasicCAT/TMXExporter.bas b/BasicCAT/TMXExporter.bas new file mode 100644 index 0000000..8918c73 --- /dev/null +++ b/BasicCAT/TMXExporter.bas @@ -0,0 +1,156 @@ +B4J=true +Group=Default Group +ModulesStructureVersion=1 +Type=Class +Version=8.9 +@EndOfDesignText@ +Sub Class_Globals + Private fx As JFX +End Sub + +'Initializes the object. You can add parameters to this method if needed. +Public Sub Initialize + +End Sub + + +public Sub export(segments As List,sourceLang As String,targetLang As String,path As String,includeTag As Boolean,isTMXTags As Boolean) + Dim tmxNode As XmlNode + tmxNode=CreateNode("tmx") + tmxNode.Attributes.Put("version","1.4") + Dim header As XmlNode + header=CreateNode("header") + header.Attributes.Put("creationtool","BasicCAT") + header.Attributes.Put("creationtoolversion","1.0.0") + header.Attributes.put("adminlang",sourceLang) + header.Attributes.put("srclang",sourceLang) + header.Attributes.put("segtype","sentence") + header.Attributes.put("o-tmf","BasicCAT") + Dim body As XmlNode + body=CreateNode("body") + Dim tuList As List + tuList.Initialize + For Each segment As List In segments + Dim tu As XmlNode + tu=CreateNode("tu") + Dim tuvList As List + tuvList.Initialize + Dim targetMap As Map + targetMap=segment.Get(2) + For i=0 To 1 + Dim seg As String=segment.Get(i) + If includeTag=False Then + seg=XMLUtils.TagsRemoved(seg,False) + End If + If i = 1 Then + Dim targetTuv As XmlNode + targetTuv=CreateNode("tuv") + targetTuv.Attributes.Put("xml:lang",targetLang) + If targetMap.ContainsKey("creator") Then + targetTuv.attributes.Put("creationid",targetMap.Get("creator")) + End If + If targetMap.ContainsKey("createdTime") Then + Dim creationDate As String + DateTime.DateFormat="yyyyMMdd" + DateTime.TimeFormat="HHmmss" + creationDate=DateTime.Date(targetMap.Get("createdTime"))&"T"&DateTime.Time(targetMap.Get("createdTime"))&"Z" + targetTuv.attributes.Put("creationdate",creationDate) + End If + Dim segNode As XmlNode + segNode=CreateNode("seg") + setNodeText(segNode,seg,isTMXTags) + targetTuv.Children.Add(segNode) + tuvList.Add(targetTuv) + Else if i = 0 Then + Dim sourceTuv As XmlNode + sourceTuv=CreateNode("tuv") + sourceTuv.Attributes.Put("xml:lang",sourceLang) + Dim segNode As XmlNode + segNode=CreateNode("seg") + setNodeText(segNode,seg,isTMXTags) + sourceTuv.Children.Add(segNode) + tuvList.Add(sourceTuv) + End If + Next + If targetMap.ContainsKey("note") Then + If targetMap.Get("note")<>"" Then + Dim note As XmlNode + note=CreateNode("note") + Dim textNode As XmlNode + textNode=CreateNode("text") + textNode.Text=targetMap.Get("note") + note.Children.Add(textNode) + tu.Children.InsertAt(0,note) + End If + End If + tu.Children.AddAll(tuvList) + tuList.Add(tu) + Next + body.Children=tuList + tmxNode.Children.Add(header) + tmxNode.Children.Add(body) + File.WriteString(path,"",XMLUtils.asString(tmxNode)) +End Sub + +private Sub setNodeText(node As XmlNode,text As String,isTMXTags As Boolean) + If isTMXTags=True Then + Try + text=XMLUtils.HandleXMLEntities(text,True) + text=Regex.Replace2("`(<.*?>)`",32,text,"$1") + node.innerXML=convertToTMXTags(text) + Return + Catch + Log(LastException) + End Try + End If + node.Children.Clear + Dim textNode As XmlNode + textNode.Initialize + textNode.Name="text" + textNode.Text=text + node.Children.Add(textNode) +End Sub + +private Sub convertToTMXTags(xml As String) As String + Dim sb As StringBuilder + sb.Initialize + Dim matcher As Matcher + matcher=Regex.Matcher("",xml) + Dim previousEndIndex As Int=0 + Do While matcher.Find + sb.Append(xml.SubString2(previousEndIndex,matcher.GetStart(0))) + previousEndIndex=matcher.GetEnd(0) + If matcher.Group(1).StartsWith("g") Then + Dim id As Int + id=matcher.Group(2) + If matcher.match.Contains("/") Then + sb.Append($""$) + sb.Append(XMLUtils.EscapeXml(matcher.match)) + sb.Append("") + Else + sb.Append($""$) + sb.Append(XMLUtils.EscapeXml(matcher.match)) + sb.Append("") + End If + Else If matcher.Group(1).StartsWith("x") Then + sb.Append("") + sb.Append(XMLUtils.EscapeXml(matcher.Match)) + sb.Append("") + Else + sb.Append(matcher.Match) + End If + Loop + If previousEndIndex<>xml.Length-1 Then + sb.Append(xml.SubString2(previousEndIndex,xml.Length)) + End If + Return sb.ToString +End Sub + +private Sub CreateNode(name As String) As XmlNode + Dim node As XmlNode + node.Initialize + node.Name=name + node.Attributes.Initialize + node.Children.Initialize + Return node +End Sub diff --git a/BasicCAT/TMXImporter.bas b/BasicCAT/TMXImporter.bas new file mode 100644 index 0000000..566430e --- /dev/null +++ b/BasicCAT/TMXImporter.bas @@ -0,0 +1,244 @@ +B4J=true +Group=Default Group +ModulesStructureVersion=1 +Type=Class +Version=8.9 +@EndOfDesignText@ +Sub Class_Globals + Private sax As SaxParser + Private tuvs As List + Private tus As List + Private aSourceLang,aTargetLang As String + Private numbers As Int +End Sub + +'Initializes the object. You can add parameters to this method if needed. +Public Sub Initialize + +End Sub + +private Sub parse(dir As String,filename As String) + tus.Initialize + tuvs.Initialize + sax.Initialize + Dim in As InputStream + in = File.OpenInput(dir, filename) 'This file was added with the file manager. + sax.Parse(in, "Parser") '"Parser" is the events subs prefix. + in.Close + 'Log("tus:"&tus) +End Sub + +private Sub Parser_StartElement (Uri As String, Name As String, Attributes As Attributes) + If Name="tuv" Or Name="tu" Then + Dim map1 As Map + map1.Initialize + Dim attr As Map + attr.Initialize + For i=0 To Attributes.Size-1 + attr.Put(Attributes.GetName(i),Attributes.GetValue(i)) + Next + map1.Put("Attributes",attr) + Select Name + Case "tuv" + tuvs.Add(map1) + Case "tu" + tus.Add(map1) + End Select + + End If +End Sub + +private Sub Parser_EndElement (Uri As String, Name As String, Text As StringBuilder) + If Name="seg" Then + numbers=numbers+1 + Dim map1 As Map = tuvs.Get(tuvs.Size-1) + Dim Attributes As Map =map1.Get("Attributes") + 'Log(Attributes.GetValue2("","xml:lang")) + Dim lang As String + If Attributes.ContainsKey("xml:lang") Then + lang=Attributes.Get("xml:lang") + else if Attributes.ContainsKey("lang") Then + lang=Attributes.Get("lang") + End If + 'Log("lang: "&lang) + If lang.StartsWith(aSourceLang) Or lang.StartsWith(aTargetLang) Then + map1.Put("Text",Text.ToString) + Else + tuvs.RemoveAt(tuvs.Size-1) + End If + 'Log(map1) + 'Log(numbers) + else if Name="note" Then + Dim map1 As Map = tus.Get(tus.Size-1) + map1.Put("note",Text.ToString) + Else if Name = "tu" Then + Dim newList As List + newList.Initialize + newList.AddAll(tuvs) + Dim map1 As Map = tus.Get(tus.Size-1) + map1.Put("tuv",newList) + tuvs.Clear + End If +End Sub + +public Sub importedList(dir As String,filename As String, sourceLang As String,targetLang As String,quickMode As Boolean) As List + If quickMode Then + Return importedListQuick(dir,filename,sourceLang,targetLang) + Else + Return importedListAccurate(dir,filename,sourceLang,targetLang) + End If +End Sub + +public Sub importedListQuick(dir As String,filename As String, sourceLang As String,targetLang As String) As List + Dim segments As List + segments.Initialize + parse(dir,filename) + For Each tu As Map In tus + Dim tuvList As List= tu.Get("tuv") + Dim newtu As Map + newtu.Initialize + Dim targetMap As Map + targetMap.Initialize + For Each tuv As Map In tuvList + Dim Attributes As Map =tuv.Get("Attributes") + 'Log(Attributes.GetValue2("","xml:lang")) + Dim lang As String + If Attributes.ContainsKey("xml:lang") Then + lang=Attributes.Get("xml:lang") + else if Attributes.ContainsKey("lang") Then + lang=Attributes.Get("lang") + End If + newtu.Put(lang,tuv.Get("Text")) + Next + + If tu.ContainsKey("note") Then + newtu.Put("note",tu.Get("note")) + End If + Dim segment As List + segment.Initialize + segment.Add(newtu.Get(sourceLang)) + segment.Add(newtu.Get(targetLang)) + segment.Add(filename) + segment.Add(targetMap) + segments.Add(segment) + Next + Return segments +End Sub + + +public Sub getTransUnits(xml As String) As List + Dim parser As XmlParser + parser.Initialize + Dim root As XmlNode=XMLUtils.Parse(xml) + Dim body As XmlNode=root.Get("body").Get(0) + Dim tus As List=body.Get("tu") + Return tus +End Sub + +public Sub importedListAccurate(dir As String, filename As String, sourceLang As String,targetLang As String) As List + Dim xml As String=File.ReadString(dir,filename) + Return importedAccurateList2(xml,filename,sourceLang,targetLang) +End Sub + +public Sub importedAccurateList2(xml As String,filename As String,sourceLang As String,targetLang As String) As List + Dim segments As List + segments.Initialize + sourceLang=sourceLang.ToLowerCase + targetLang=targetLang.ToLowerCase + Dim tus As List=getTransUnits(xml) + For Each tu As XmlNode In tus + Dim tuvList As List= tu.Get("tuv") + Dim segment As List + segment.Initialize + Dim targetMap As Map + targetMap.Initialize + segment.Add("source") + segment.Add("target") + Dim addedTimes As Int=0 + For Each tuv As XmlNode In tuvList + Dim lang As String + Dim seg As XmlNode=tuv.Get("seg").Get(0) + If tuv.Attributes.ContainsKey("xml:lang") Then + lang=tuv.Attributes.Get("xml:lang") + else if tuv.Attributes.ContainsKey("lang") Then + lang=tuv.Attributes.Get("lang") + End If + lang=lang.ToLowerCase + If lang.StartsWith(sourceLang) Then + segment.Set(0,getSegText(seg)) + addedTimes=addedTimes+1 + else if lang.StartsWith(targetLang) Then + segment.Set(1,getSegText(seg)) + addedTimes=addedTimes+1 + Else + Continue + End If + If tuv.Attributes.ContainsKey("creationid") And tuv.Attributes.ContainsKey("creationdate") Then + Try + Dim creationdate As String + creationdate=tuv.Attributes.Get("creationdate") + DateTime.DateFormat="yyyyMMdd" + DateTime.TimeFormat="HHmmss" + Dim date As String + Dim time As String + date=creationdate.SubString2(0,creationdate.IndexOf("T")) + time=creationdate.SubString2(creationdate.IndexOf("T")+1,creationdate.IndexOf("Z")) + targetMap.Put("createdTime",DateTime.DateTimeParse(date,time)) + targetMap.Put("creator",tuv.Attributes.Get("creationid")) + Catch + Log(LastException) + End Try + End If + Next + If addedTimes<>2 Then + Continue + End If + If tu.Contains("note") Then + Dim node As XmlNode=tu.Get("note").Get(0) + targetMap.Put("note",node.innerText) + End If + segment.Add(filename) + segment.Add(targetMap) + segments.Add(segment) + Next + Return segments +End Sub + +private Sub getSegText(seg As XmlNode) As String + If XMLUtils.XmlNodeContainsOnlyText(seg) Then + Dim text As String=XMLUtils.XmlNodeText(seg) + Return text + End If + Return XMLUtils.XMLToText(removeTMXTags(seg.innerXML)) +End Sub + +private Sub removeTMXTags(s As String) As String + '<g1> + Dim sb As StringBuilder + sb.Initialize + Dim parts As List + parts.Initialize + Dim tags As String + tags="(bpt|ept|ph)" + Dim previousEndIndex As Int=0 + Dim matcher As Matcher + matcher=Regex.Matcher($"<${tags}.*?>(.*?)"$,s) + Do While matcher.Find + Dim textBefore As String + textBefore=s.SubString2(previousEndIndex,matcher.GetStart(0)) + If textBefore<>"" Then + parts.Add(textBefore) + End If + parts.add(XMLUtils.UnescapeXml(matcher.Group(2))) + previousEndIndex=matcher.GetEnd(0) + Loop + Dim textAfter As String + textAfter=s.SubString2(previousEndIndex,s.Length) + If textAfter<>"" Then + parts.Add(textAfter) + End If + For Each part As String In parts + sb.Append(part) + Next + Return Regex.Replace($"<${tags}.*?>"$,sb.ToString,"") +End Sub diff --git a/BasicCAT/importDialog.bas b/BasicCAT/importDialog.bas index 945e373..6f0f485 100644 --- a/BasicCAT/importDialog.bas +++ b/BasicCAT/importDialog.bas @@ -95,11 +95,13 @@ Sub importTMXlsx(path As String) End Sub Sub importTMX(path As String,sourceLang As String,targetLang As String) + Dim importer As TMXImporter + importer.Initialize Try Dim tmxString As String tmxString=File.ReadString(path,"") tmxString=XMLUtils.pickSmallerXML(tmxString,"tu","body") - Dim segments As List=TMX.importedList2(tmxString,File.GetName(path),sourceLang,targetLang) + Dim segments As List=importer.importedAccurateList2(tmxString,File.GetName(path),sourceLang,targetLang) Dim i As Int For Each segment As List In segments i=i+1