diff --git a/build.gradle b/build.gradle index ebf364a..f2c3d0e 100644 --- a/build.gradle +++ b/build.gradle @@ -1,4 +1,5 @@ plugins { + id 'base' id 'groovy' id 'maven-publish' } @@ -6,14 +7,17 @@ plugins { group 'de.dfki.mary' version '0.2-SNAPSHOT' -targetCompatibility = 1.7 +sourceCompatibility = 1.8 +targetCompatibility = 1.8 repositories { jcenter() + mavenLocal() } dependencies { - compile group: 'org.codehaus.groovy', name: 'groovy-all', version: '2.4.4' + compile group: 'org.apache.logging.log4j', name: 'log4j-core', version: '2.13.1' + compile group: 'org.codehaus.groovy', name: 'groovy-all', version: '2.5.10' compile group: 'de.dfki.mary', name: 'marytts-runtime', version: maryttsVersion testCompile group: 'org.testng', name: 'testng', version: '6.9.4' testCompile group: 'xmlunit', name: 'xmlunit', version: '1.6' @@ -25,7 +29,7 @@ dependencies { configurations.all { resolutionStrategy { dependencySubstitution { - force 'org.codehaus.groovy:groovy-all:2.4.4' + force 'org.codehaus.groovy:groovy-all:2.5.10' substitute module('org.slf4j:slf4j-log4j12:1.6.1') with module('org.slf4j:log4j-over-slf4j:1.6.1') } } diff --git a/gradle.properties b/gradle.properties index 71e6d8b..c8084bb 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1 +1 @@ -maryttsVersion=5.2 +maryttsVersion=6.0-SNAPSHOT diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar index 209f36a..490fda8 100644 Binary files a/gradle/wrapper/gradle-wrapper.jar and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index ea8cdbd..b7c8c5d 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,6 +1,5 @@ -#Mon Jul 17 15:25:46 CEST 2017 distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-6.2-bin.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-4.0.1-bin.zip diff --git a/gradlew b/gradlew index cccdd3d..2fe81a7 100755 --- a/gradlew +++ b/gradlew @@ -1,5 +1,21 @@ #!/usr/bin/env sh +# +# Copyright 2015 the original author or authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + ############################################################################## ## ## Gradle start up script for UN*X @@ -28,7 +44,7 @@ APP_NAME="Gradle" APP_BASE_NAME=`basename "$0"` # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -DEFAULT_JVM_OPTS="" +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' # Use the maximum available, or set MAX_FD != -1 to use that value. MAX_FD="maximum" @@ -109,8 +125,8 @@ if $darwin; then GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" fi -# For Cygwin, switch paths to Windows format before running java -if $cygwin ; then +# For Cygwin or MSYS, switch paths to Windows format before running java +if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then APP_HOME=`cygpath --path --mixed "$APP_HOME"` CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` JAVACMD=`cygpath --unix "$JAVACMD"` @@ -138,19 +154,19 @@ if $cygwin ; then else eval `echo args$i`="\"$arg\"" fi - i=$((i+1)) + i=`expr $i + 1` done case $i in - (0) set -- ;; - (1) set -- "$args0" ;; - (2) set -- "$args0" "$args1" ;; - (3) set -- "$args0" "$args1" "$args2" ;; - (4) set -- "$args0" "$args1" "$args2" "$args3" ;; - (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; - (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; - (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; - (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; - (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; + 0) set -- ;; + 1) set -- "$args0" ;; + 2) set -- "$args0" "$args1" ;; + 3) set -- "$args0" "$args1" "$args2" ;; + 4) set -- "$args0" "$args1" "$args2" "$args3" ;; + 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; + 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; + 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; + 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; + 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; esac fi @@ -159,14 +175,9 @@ save () { for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done echo " " } -APP_ARGS=$(save "$@") +APP_ARGS=`save "$@"` # Collect all arguments for the java command, following the shell quoting and substitution rules eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" -# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong -if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then - cd "$(dirname "$0")" -fi - exec "$JAVACMD" "$@" diff --git a/gradlew.bat b/gradlew.bat index f955316..62bd9b9 100644 --- a/gradlew.bat +++ b/gradlew.bat @@ -1,3 +1,19 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem + @if "%DEBUG%" == "" @echo off @rem ########################################################################## @rem @@ -13,8 +29,11 @@ if "%DIRNAME%" == "" set DIRNAME=. set APP_BASE_NAME=%~n0 set APP_HOME=%DIRNAME% +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -set DEFAULT_JVM_OPTS= +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" @rem Find java.exe if defined JAVA_HOME goto findJavaFromJavaHome diff --git a/src/main/groovy/marytts/BatchProcessor.groovy b/src/main/groovy/marytts/BatchProcessor.groovy index f6f4f24..07c22b8 100644 --- a/src/main/groovy/marytts/BatchProcessor.groovy +++ b/src/main/groovy/marytts/BatchProcessor.groovy @@ -1,14 +1,14 @@ package marytts import groovy.json.JsonSlurper -import groovy.util.logging.Log4j +import groovy.util.logging.Log4j2 import groovy.xml.XmlUtil import marytts.util.MaryUtils import marytts.util.data.audio.MaryAudioUtils import marytts.util.dom.DomUtils -@Log4j +@Log4j2 class BatchProcessor { static void main(String[] args) { def mary = new LocalMaryInterface() diff --git a/src/main/groovy/marytts/FeatureLister.groovy b/src/main/groovy/marytts/FeatureLister.groovy index cc2eea9..32cf2ad 100644 --- a/src/main/groovy/marytts/FeatureLister.groovy +++ b/src/main/groovy/marytts/FeatureLister.groovy @@ -1,10 +1,10 @@ package marytts -import groovy.util.logging.Log4j +import groovy.util.logging.Log4j2 import marytts.features.FeatureProcessorManager -@Log4j +@Log4j2 class FeatureLister { def fpm diff --git a/src/main/java/marytts/tools/analysis/MaryTranscriptionAligner.java b/src/main/java/marytts/tools/analysis/MaryTranscriptionAligner.java new file mode 100644 index 0000000..fd40918 --- /dev/null +++ b/src/main/java/marytts/tools/analysis/MaryTranscriptionAligner.java @@ -0,0 +1,325 @@ +/** + * + */ +package marytts.tools.analysis; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; + +import marytts.datatypes.MaryXML; +import marytts.exceptions.InvalidDataException; +import marytts.modules.phonemiser.AllophoneSet; +import marytts.util.dom.MaryDomUtils; + +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.w3c.dom.traversal.NodeIterator; +import org.w3c.dom.traversal.TreeWalker; + +/** + * @author marc + * + */ +public class MaryTranscriptionAligner extends TranscriptionAligner { + + private boolean insertDummyDurations = false; + + public MaryTranscriptionAligner() { + super(null); + } + + /** + * @param allophoneSet + * allophoneSet + */ + public MaryTranscriptionAligner(AllophoneSet allophoneSet) { + super(allophoneSet); + } + + /** + * @param allophoneSet + * allophoneSet + * @param insertDummyDurations + * if true, in any inserted items, a duration of 1 millisecond will be set. + */ + public MaryTranscriptionAligner(AllophoneSet allophoneSet, boolean insertDummyDurations) { + super(allophoneSet); + this.insertDummyDurations = insertDummyDurations; + } + + /** + * + * This changes the transcription of a MARYXML document in ALLOPHONES format to match the label sequence given as the "labels" + * parameter. The symbols of the original transcription are aligned to corrected ones, with which they are replaced in turn. + * + * @param allophones + * the MARYXML document, in ALLOPHONES format + * @param labels + * the sequence of label symbols to use, separated by the entry separator as provided by getEntrySeparator(). + * @throws InvalidDataException + * if a manual label is encountered that is not in the AllophoneSet + */ + public void alignXmlTranscriptions(Document allophones, String labels) throws InvalidDataException { + // get all t and boundary elements + NodeIterator tokenIt = MaryDomUtils.createNodeIterator(allophones, MaryXML.TOKEN, MaryXML.BOUNDARY); + List tokens = new ArrayList(); + Element e; + while ((e = (Element) tokenIt.nextNode()) != null) { + tokens.add(e); + } + + String orig = this.collectTranscription(allophones); + + System.err.println("Orig : " + orig); + System.err.println("Correct: " + labels); + + // now we align the transcriptions and split it at the delimiters + String al = this.distanceAlign(orig.trim(), labels.trim()) + " "; + + System.err.println("Alignments: " + al); + String[] alignments = al.split("#"); + + // change the transcription in xml according to the aligned one + changeTranscriptions(allophones, alignments); + + if (allophoneSet == null) { // cannot verify + return; + } + // assert that all alignments should be in the AllophoneSet for this locale: + HashSet manualLabelSet = new HashSet(Arrays.asList(al.trim().split("[#\\s]+"))); + try { + for (String label : manualLabelSet) { + allophoneSet.getAllophone(label); + } + } catch (IllegalArgumentException iae) { + throw new InvalidDataException(iae.getMessage()); + } + } + + /** + * + * This computes a string of phonetic symbols out of an allophones xml: - standard phones are taken from "ph" elements in the + * document - after each token-element (except those followed by a "boundary"-element), a "bnd" symbol is inserted (standing + * for a possible pause). Entries are separated by the entrySeparator character. + * + * @param doc + * the document to analyse + * @return orig, converted into string + */ + private String collectTranscription(Document doc) { + // String storing the original transcription begins with a pause + StringBuilder orig = new StringBuilder(); + + NodeIterator ni = MaryDomUtils.createNodeIterator(doc, MaryXML.PHONE, MaryXML.BOUNDARY); + Element e; + Element prevToken = null; + boolean prevWasBoundary = false; + while ((e = (Element) ni.nextNode()) != null) { + if (e.getTagName().equals(MaryXML.PHONE)) { + Element token = (Element) MaryDomUtils.getAncestor(e, MaryXML.TOKEN); + if (token != prevToken && !prevWasBoundary) { + if (orig.length() > 0) + orig.append(entrySeparator); + orig.append(possibleBnd); + } + if (orig.length() > 0) + orig.append(entrySeparator); + orig.append(e.getAttribute("p")); + prevToken = token; + prevWasBoundary = false; + } else { // boundary + if (orig.length() > 0) + orig.append(entrySeparator); + orig.append(possibleBnd); + prevWasBoundary = true; + } + } + + return orig.toString(); + } + + /** + * + * This changes the transcription according to a given sequence of phonetic symbols (including boundaries and pauses). + * Boundaries in doc are added or deleted as necessary to match the pause symbols in alignments. + * + * @param doc + * the document in which to change the transcriptions + * @param alignments + * the aligned symbols to use in the update. + */ + private void changeTranscriptions(Document doc, String[] alignments) { + // Algorithm: + // * Go through and elements in doc on the one hand, + // and through alignments on the other hand. + // - Special steps for the first in a token: + // -> if the is the first in the current token, + // and alignment is a pause symbol, + // insert a new boundary before the token, and skip the alignment entry; + // -> if the is the first in the current token, + // and the alignment entry is empty, skip the alignment entry. + // - for elements: + // -> if the alignment entry is empty, delete the and, + // if it was the only in the current , also + // delete the syllable; + // -> else, use the current alignment entry, adding any + // elements as necessary. + // - for elements: + // -> if symbol is pause, keep boundary; + // -> if symbol is word separator, delete boundary. + + NodeIterator ni = MaryDomUtils.createNodeIterator(doc, MaryXML.PHONE, MaryXML.BOUNDARY); + List origPhonesAndBoundaries = new ArrayList(); + // We make a copy of the list of original entries, because when + // we add/remove entries later, that get the node iterator confused. + Element elt; + while ((elt = (Element) ni.nextNode()) != null) { + origPhonesAndBoundaries.add(elt); + } + int iAlign = 0; + Element prevToken = null; + boolean prevWasBoundary = false; + for (Element e : origPhonesAndBoundaries) { + if (e.getTagName().equals(MaryXML.PHONE)) { + boolean betweenTokens = false; + Element token = (Element) MaryDomUtils.getAncestor(e, MaryXML.TOKEN); + if (token != prevToken && !prevWasBoundary) { + betweenTokens = true; + } + if (betweenTokens) { + assert !prevWasBoundary; + if (alignments[iAlign].trim().equals(possibleBnd)) { + // Need to insert a boundary before token + System.err.println(" inserted boundary in xml"); + Element b = MaryXML.createElement(doc, MaryXML.BOUNDARY); + b.setAttribute("breakindex", "3"); + if (insertDummyDurations) { + b.setAttribute("duration", "1"); + } + token.getParentNode().insertBefore(b, token); + } else if (!alignments[iAlign].trim().equals("")) { + // one or more phones were inserted into the transcription + // -- treat them as word-final, i.e. insert them into the last syllable in prevToken + Element syllable = null; + Element ref = null; // insert before null = insert at the end + NodeList prevSyllables = null; + // if there is an insertion at the beginning, we don't have a prevToken! + if (prevToken != null) { + prevSyllables = prevToken.getElementsByTagNameNS(MaryXML.getNamespace(), MaryXML.SYLLABLE); + } + if (prevSyllables != null && prevSyllables.getLength() > 0) { // insert at end of previous token + syllable = (Element) prevSyllables.item(prevSyllables.getLength() - 1); + ref = null; + } else { // insert at beginning of current token + syllable = (Element) e.getParentNode(); + ref = e; // insert before current phone + } + String[] newPh = alignments[iAlign].trim().split("\\s+"); + for (int i = 0; i < newPh.length; i++) { + Element newPhElement = MaryXML.createElement(doc, MaryXML.PHONE); + newPhElement.setAttribute("p", newPh[i]); + syllable.insertBefore(newPhElement, ref); + System.err.println(" inserted phone from transcription: " + newPh[i]); + if (insertDummyDurations) { + newPhElement.setAttribute("d", "1"); + } + } + } // else it is an empty word boundary marker + iAlign++; // move beyond the marker between tokens + } + prevToken = token; + prevWasBoundary = false; + System.err.println("Ph = " + e.getAttribute("p") + ", align = " + alignments[iAlign]); + if (alignments[iAlign].trim().equals("")) { + // Need to delete the current element + Element syllable = (Element) e.getParentNode(); + assert syllable != null; + assert syllable.getTagName().equals(MaryXML.SYLLABLE); + syllable.removeChild(e); + if (MaryDomUtils.getFirstElementByTagName(syllable, MaryXML.PHONE) == null) { + // Syllable is now empty, need to delete it as well + syllable.getParentNode().removeChild(syllable); + } + } else { + // Replace , add siblings if necessary + String[] newPh = alignments[iAlign].trim().split("\\s+"); + e.setAttribute("p", newPh[0]); + if (newPh.length > 1) { + // any ph to be added + Element syllable = (Element) e.getParentNode(); + assert syllable != null; + assert syllable.getTagName().equals(MaryXML.SYLLABLE); + Node rightNeighbor = e.getNextSibling(); // can be null + for (int i = 1; i < newPh.length; i++) { + Element newPhElement = MaryXML.createElement(doc, MaryXML.PHONE); + newPhElement.setAttribute("p", newPh[i]); + syllable.insertBefore(newPhElement, rightNeighbor); + } + } + } + } else { // boundary + System.err.println("Boundary, align = " + alignments[iAlign]); + if (alignments[iAlign].trim().equals(possibleBnd)) { + // keep boundary + } else { + // delete boundary + System.err.println(" deleted boundary from xml"); + e.getParentNode().removeChild(e); + } + prevWasBoundary = true; + } + iAlign++; + } + updatePhAttributesFromPhElements(doc); + } + + private void updatePhAttributesFromPhElements(Document doc) { + NodeIterator ni = MaryDomUtils.createNodeIterator(doc, MaryXML.TOKEN); + Element t; + while ((t = (Element) ni.nextNode()) != null) { + updatePhAttributesFromPhElements(t); + } + } + + private void updatePhAttributesFromPhElements(Element token) { + if (token == null) + throw new NullPointerException("Got null token"); + if (!token.getTagName().equals(MaryXML.TOKEN)) { + throw new IllegalArgumentException("Argument should be a <" + MaryXML.TOKEN + ">, not a <" + token.getTagName() + ">"); + } + StringBuilder tPh = new StringBuilder(); + TreeWalker sylWalker = MaryDomUtils.createTreeWalker(token, MaryXML.SYLLABLE); + Element syl; + while ((syl = (Element) sylWalker.nextNode()) != null) { + StringBuilder sylPh = new StringBuilder(); + String stress = syl.getAttribute("stress"); + if (stress.equals("1")) + sylPh.append("'"); + else if (stress.equals("2")) + sylPh.append(","); + TreeWalker phWalker = MaryDomUtils.createTreeWalker(syl, MaryXML.PHONE); + Element ph; + while ((ph = (Element) phWalker.nextNode()) != null) { + if (sylPh.length() > 0) + sylPh.append(" "); + sylPh.append(ph.getAttribute("p")); + } + String sylPhString = sylPh.toString(); + syl.setAttribute("ph", sylPhString); + if (tPh.length() > 0) + tPh.append(" - "); + tPh.append(sylPhString); + if (syl.hasAttribute("tone")) { + tPh.append(" " + syl.getAttribute("tone")); + } + } + if (tPh.toString().length() > 0) { + token.setAttribute("ph", tPh.toString()); + } + } + +} diff --git a/src/main/java/marytts/tools/voiceimport/TimelineWriter.java b/src/main/java/marytts/tools/voiceimport/TimelineWriter.java new file mode 100644 index 0000000..60ab448 --- /dev/null +++ b/src/main/java/marytts/tools/voiceimport/TimelineWriter.java @@ -0,0 +1,372 @@ +/** + * Portions Copyright 2006 DFKI GmbH. + * Portions Copyright 2001 Sun Microsystems, Inc. + * Portions Copyright 1999-2001 Language Technologies Institute, + * Carnegie Mellon University. + * All Rights Reserved. Use is subject to license terms. + * + * Permission is hereby granted, free of charge, to use and distribute + * this software and its documentation without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of this work, and to + * permit persons to whom this work is furnished to do so, subject to + * the following conditions: + * + * 1. The code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * 2. Any modifications must be clearly marked as such. + * 3. Original authors' names are not deleted. + * 4. The authors' names are not used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * DFKI GMBH AND THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH + * REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DFKI GMBH NOR THE + * CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL + * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR + * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS + * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF + * THIS SOFTWARE. + */ +package marytts.tools.voiceimport; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.util.Vector; + +import marytts.unitselection.data.TimelineReader; +import marytts.util.data.Datagram; +import marytts.util.data.MaryHeader; + +/** + * The TimelineWriter class provides an interface to create or update a Timeline data file in Mary format, and to feed new + * datagrams to the timeline file. + * + * @author sacha, marc + * + */ +public class TimelineWriter { + + protected RandomAccessFile raf = null; // The file to read from + protected MaryHeader maryHdr = null; // The standard Mary header + protected TimelineReader.ProcHeader procHdr = null; // The processing info header + + protected TimelineReader.Index idx = null; // A global time index for the variable-sized datagrams + + /* Some specific header fields: */ + protected int sampleRate = 0; + protected long numDatagrams = 0; + + protected long datagramsBytePos = 0; + protected long timeIdxBytePos = 0; + + /* Pointers to navigate the file: */ + protected long timePtr = 0; // A time pointer to keep track of the time position in the file + // Note: a file pointer, keeping track of the byte position in the file, is implicitely + // maintained by the browsed RandomAccessFile. + + /****************/ + /* DATA FIELDS */ + /****************/ + private int idxInterval; + private long datagramZoneBytePos; + private Vector indexData; + private long prevBytePos; + private long prevTimePos; + + /****************/ + /* CONSTRUCTORS */ + /****************/ + + /** + * Constructor to create a timeline. + * + * @param fileName + * The file to read the timeline from. + * @param procHdrString + * the string to use as a processing header. + * @param reqSampleRate + * the sample rate requested to measure time in this timeline. + * @param setIdxIntervalInSeconds + * the interval between two index entries, in seconds + */ + public TimelineWriter(String fileName, String procHdrString, int reqSampleRate, double setIdxIntervalInSeconds) { + + /* Check the arguments */ + if (reqSampleRate <= 0) { + throw new RuntimeException("The sample rate [" + reqSampleRate + + "] can't be negative or null when creating a timeline."); + } + if (setIdxIntervalInSeconds <= 0.0) { + throw new RuntimeException("The index interval [" + setIdxIntervalInSeconds + + "] can't be negative or null when creating a timeline."); + } + + /* Open the file */ + try { + File fid = new File(fileName); + /* Check if the file exists and should be deleted first. */ + if (fid.exists()) + fid.delete(); + /* open */ + raf = new RandomAccessFile(fid, "rw"); + } catch (FileNotFoundException e) { + throw new Error("Timeline file [" + fileName + "] was not found."); + } catch (SecurityException e) { + throw new Error("You do not have read access to the file [" + fileName + "]."); + } + + /* Make a new header */ + try { + /* Make a new Mary header and write it */ + maryHdr = new MaryHeader(MaryHeader.TIMELINE); + maryHdr.writeTo(raf); + + /* Make a new processing header and write it */ + procHdr = new TimelineReader.ProcHeader(procHdrString); + procHdr.dump(raf); + + /* Make/write the data header */ + sampleRate = reqSampleRate; + raf.writeInt(sampleRate); + + numDatagrams = 0; + raf.writeLong(numDatagrams); + + /* Write the positions, with fake ones for the idx and basenames */ + datagramsBytePos = getBytePointer() + 16; // +16: account for the 2 upcoming long fields datagramsBytePos and + // timeIdxBytePos + raf.writeLong(datagramsBytePos); + timeIdxBytePos = 0; + raf.writeLong(0l); + + // Remember important facts for index creation + idxInterval = (int) Math.round(setIdxIntervalInSeconds * (double) sampleRate); + datagramZoneBytePos = datagramsBytePos; + indexData = new Vector(); + prevBytePos = datagramsBytePos; + prevTimePos = 0; + + /* Now we can output the datagrams. */ + + } catch (IOException e) { + throw new RuntimeException("IOException caught when constructing a timeline writer on file [" + fileName + "]: ", e); + } + } + + /*******************/ + /* MISC. METHODS */ + /*******************/ + + /** + * Get the current byte position in the file + * + * @throws IOException + * IOException + * @return raf.getFilePointer + */ + public synchronized long getBytePointer() throws IOException { + return (raf.getFilePointer()); + } + + /** + * Get the current time position in the file + * + * @return timePtr + */ + public synchronized long getTimePointer() { + return (timePtr); + } + + /** + * Set the current byte position in the file + * + * @param bytePos + * bytePos + * @throws IOException + * IOException + */ + protected void setBytePointer(long bytePos) throws IOException { + raf.seek(bytePos); + } + + /** + * Set the current time position in the file + * + * @param timePosition + * timePosition + */ + protected void setTimePointer(long timePosition) { + timePtr = timePosition; + } + + /** + * Scales a discrete time to the timeline's sample rate. + * + * @param reqSampleRate + * the externally given sample rate. + * @param targetTimeInSamples + * a discrete time, with respect to the externally given sample rate. + * + * @return a discrete time, in samples with respect to the timeline's sample rate. + */ + protected long scaleTime(int reqSampleRate, long targetTimeInSamples) { + if (reqSampleRate == sampleRate) + return (targetTimeInSamples); + /* else */return ((long) Math.round((double) (reqSampleRate) * (double) (targetTimeInSamples) / (double) (sampleRate))); + } + + /** + * Unscales a discrete time from the timeline's sample rate. + * + * @param reqSampleRate + * the externally given sample rate. + * @param timelineTimeInSamples + * a discrete time, with respect to the timeline sample rate. + * + * @return a discrete time, in samples with respect to the externally given sample rate. + */ + protected long unScaleTime(int reqSampleRate, long timelineTimeInSamples) { + if (reqSampleRate == sampleRate) + return (timelineTimeInSamples); + /* else */return ((long) Math.round((double) (sampleRate) * (double) (timelineTimeInSamples) / (double) (reqSampleRate))); + } + + public TimelineReader.Index getIndex() { + return idx; + } + + /** + * Returns the position of the datagram zone + * + * @return datagramsBytePos + */ + public long getDatagramsBytePos() { + return datagramsBytePos; + } + + /** + * Returns the current number of datagrams in the timeline. + * + * @return numDatagrams + */ + public long getNumDatagrams() { + return numDatagrams; + } + + /** + * Returns the sample rate of the timeline. + * + * @return sampleRate + */ + public int getSampleRate() { + return sampleRate; + } + + /** + * Output the internally maintained indexes and close the file. + * + * @throws IOException + * IOException + */ + public void close() throws IOException { + + /* Correct the number of datagrams */ + setBytePointer(datagramsBytePos - 24l); + raf.writeLong(numDatagrams); + + /* Go to the end of the file and output the time index */ + timeIdxBytePos = raf.length(); + setBytePointer(timeIdxBytePos); + idx = new TimelineReader.Index(idxInterval, indexData); + idx.dump(raf); + + /* Register the index positions */ + setBytePointer(datagramsBytePos - 8l); + raf.writeLong(timeIdxBytePos); + + /* Finally, close the random access file */ + raf.close(); + } + + /** + * Feeds a file position (in bytes) and a time position (in samples) from a timeline, and determines if a new index field is + * to be added. + * + * @param bytePosition + * bytePosition + * @param timePosition + * timePosition + * @return the number of index fields after the feed. + */ + private void feedIndex(long bytePosition, long timePosition) { + /* Get the time associated with the yet to come index field */ + long nextIdxTime = indexData.size() * (long) idxInterval; + /* + * If the current time position passes the next possible index field, register the PREVIOUS datagram position in the new + * index field + */ + while (nextIdxTime < timePosition) { + // System.out.println( "Hitting a new index at position\t[" + bytePosition + "," + timePosition + "]." ); + // System.out.println( "The crossed index is [" + nextIdxTime + "]." ); + // System.out.println( "The registered (previous) position is\t[" + prevBytePos + "," + prevTimePos + "]." ); + // IdxField testField = (IdxField)field.elementAt(currentNumIdx-1); + // System.out.println( "The previously indexed position was\t[" + testField.bytePtr + "," + testField.timePtr + "]." + // ); + + indexData.add(new TimelineReader.IdxField(prevBytePos, prevTimePos)); + nextIdxTime += idxInterval; + } + + /* Memorize the observed datagram position */ + prevBytePos = bytePosition; + prevTimePos = timePosition; + } + + /** + * Write one datagram to the timeline. + * + * @param d + * the datagram to write. + * @param reqSampleRate + * the sample rate at which the datagram duration is expressed. + * @throws IOException + * IOException + */ + public void feed(Datagram d, int reqSampleRate) throws IOException { + // System.out.println( "Feeding datagram [ " + d.data.length + " , " + d.duration + " ] at pos ( " + // + getBytePointer() + " , " + getTimePointer() + " )" ); + /* Filter the datagram through the index (to automatically add an index field if needed) */ + feedIndex(getBytePointer(), getTimePointer()); + /* Check if the datagram needs resampling */ + if (reqSampleRate != sampleRate) + d.setDuration(scaleTime(reqSampleRate, d.getDuration())); + /* Then write the datagram on disk */ + d.write(raf); // This implicitely advances the bytePointer + /* Then advance various other pointers */ + setTimePointer(getTimePointer() + d.getDuration()); + numDatagrams++; + // System.out.println( "Reached pos ( " + getBytePointer() + " , " + getTimePointer() + " )" ); + + } + + /** + * Write a series of datagrams to the timeline. + * + * @param dArray + * an array of datagrams. + * @param reqSampleTime + * the sample rate at which the datagram durations are expressed. + * @throws IOException + * IOException + */ + public void feed(Datagram[] dArray, int reqSampleTime) throws IOException { + for (int i = 0; i < dArray.length; i++) { + feed(dArray[i], reqSampleTime); + } + } + +} diff --git a/src/main/java/marytts/tools/voiceimport/WavReader.java b/src/main/java/marytts/tools/voiceimport/WavReader.java new file mode 100644 index 0000000..950cbd9 --- /dev/null +++ b/src/main/java/marytts/tools/voiceimport/WavReader.java @@ -0,0 +1,230 @@ +/** + * Portions Copyright 2006 DFKI GmbH. + * Portions Copyright 2001 Sun Microsystems, Inc. + * Portions Copyright 1999-2001 Language Technologies Institute, + * Carnegie Mellon University. + * All Rights Reserved. Use is subject to license terms. + * + * Permission is hereby granted, free of charge, to use and distribute + * this software and its documentation without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of this work, and to + * permit persons to whom this work is furnished to do so, subject to + * the following conditions: + * + * 1. The code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * 2. Any modifications must be clearly marked as such. + * 3. Original authors' names are not deleted. + * 4. The authors' names are not used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * DFKI GMBH AND THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH + * REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DFKI GMBH NOR THE + * CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL + * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR + * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS + * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF + * THIS SOFTWARE. + */ +package marytts.tools.voiceimport; + +import java.io.DataInputStream; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; + +import marytts.util.io.General; + +/** + * File reader for a wave (RIFF) waveform + */ +public class WavReader { + + private int numSamples; + private int sampleRate; + private short[] samples; + + // Only really used in loading of data. + private int headerSize; + private int numBytes; + private int numChannels = 1; // Only support mono + + static final short RIFF_FORMAT_PCM = 0x0001; + + /****************/ + /* CONSTRUCTORS */ + /****************/ + + /** + * Constructor from an already open DataInputStream + * + * @param dis + * DataInputStream to read the wav data from + * + */ + public WavReader(DataInputStream dis) { + loadHeaderAndData(dis); + } + + /** + * Constructor from a file name + * + * @param fileName + * the name of the file to read the wav data from + * + */ + public WavReader(String fileName) { + try { + /* Open the file */ + FileInputStream fis = new FileInputStream(fileName); + /* Stick the file to a DataInputStream to allow easy reading of primitive classes (numbers) */ + DataInputStream dis = new DataInputStream(fis); + /* Parse the header and load the data */ + loadHeaderAndData(dis); + /* Close the file */ + fis.close(); + } catch (FileNotFoundException e) { + throw new Error("WAV file [" + fileName + "] was not found."); + } catch (SecurityException e) { + throw new Error("You do not have read access to the file [" + fileName + "]."); + } catch (IOException e) { + throw new Error("IO Exception caught when closing file [" + fileName + "]: " + e.getMessage()); + } + } + + /*****************/ + /* OTHER METHODS */ + /*****************/ + + /** + * Read in a wave from a riff format + * + * @param dis + * DataInputStream to read data from + */ + private void loadHeaderAndData(DataInputStream dis) { + + try { + loadHeader(dis); + if (dis.skipBytes(headerSize - 16) != (headerSize - 16)) { + throw new Error("Unexpected error parsing wave file."); + } + + // Bunch of potential random headers + while (true) { + String s = new String(General.readChars(dis, 4)); + + if (s.equals("data")) { + numSamples = General.readInt(dis, false) / 2; + break; + } else if (s.equals("fact")) { + int i = General.readInt(dis, false); + if (dis.skipBytes(i) != i) { + throw new Error("Unexpected error parsing wave file."); + } + } else { + throw new Error("Unsupported wave header chunk type " + s); + } + } + + int dataLength = numSamples * numChannels; + samples = new short[numSamples]; + + for (int i = 0; i < dataLength; i++) { + samples[i] = General.readShort(dis, false); + } + + } catch (IOException ioe) { + throw new Error("IO error while parsing wave" + ioe.getMessage()); + } + + } + + /** + * load a RIFF header + * + * @param dis + * DataInputStream to read from + * + * @throws IOException + * on ill-formatted input + */ + private void loadHeader(DataInputStream dis) throws IOException { + + if (!checkChars(dis, "RIFF")) { + throw new Error("Invalid wave file format."); + } + numBytes = General.readInt(dis, false); + if (!checkChars(dis, "WAVEfmt ")) { + throw new Error("Invalid wave file format."); + } + + headerSize = General.readInt(dis, false); + + if (General.readShort(dis, false) != RIFF_FORMAT_PCM) { + throw new Error("Invalid wave file format."); + } + + if (General.readShort(dis, false) != 1) { + throw new Error("Only mono wave files supported."); + } + + sampleRate = General.readInt(dis, false); + General.readInt(dis, false); + General.readShort(dis, false); + General.readShort(dis, false); + + } + + /** + * Make sure that a string of characters appear next in the file + * + * @param dis + * DataInputStream to read in + * @param chars + * a String containing the ascii characters you want the dis to contain. + * + * @return true if chars appears next in dis, else false + * @throws IOException + * ill-formatted input (end of file, for example) + */ + private boolean checkChars(DataInputStream dis, String chars) throws IOException { + char[] carray = chars.toCharArray(); + for (int i = 0; i < carray.length; i++) { + if ((char) dis.readByte() != carray[i]) { + return false; + } + } + return true; + } + + /** + * Get the sample rate for this wave + * + * @return sample rate + */ + public int getSampleRate() { + return sampleRate; + } + + /** + * Get the number of samples for this wave + * + * @return number of samples + */ + public int getNumSamples() { + return numSamples; + } + + /** + * Get the sample data of this wave + * + * @return samples + */ + public short[] getSamples() { + return samples; + } +} diff --git a/src/test/groovy/marytts/BatchProcessorTest.groovy b/src/test/groovy/marytts/BatchProcessorTest.groovy index 8217b31..745aa42 100644 --- a/src/test/groovy/marytts/BatchProcessorTest.groovy +++ b/src/test/groovy/marytts/BatchProcessorTest.groovy @@ -1,13 +1,13 @@ package marytts import groovy.json.JsonBuilder -import groovy.util.logging.Log4j +import groovy.util.logging.Log4j2 import org.custommonkey.xmlunit.* import org.testng.annotations.* -@Log4j +@Log4j2 class BatchProcessorTest { def tmpDir @@ -42,6 +42,7 @@ class BatchProcessorTest { outputFile: outputPath ] } + def json = new JsonBuilder(batch).toPrettyString() log.info "batch = $json" def batchFile = File.createTempFile('batch', '.json') diff --git a/src/test/groovy/marytts/FeatureListerTest.groovy b/src/test/groovy/marytts/FeatureListerTest.groovy index c8825e6..2c7e160 100644 --- a/src/test/groovy/marytts/FeatureListerTest.groovy +++ b/src/test/groovy/marytts/FeatureListerTest.groovy @@ -1,11 +1,11 @@ package marytts -import groovy.util.logging.Log4j +import groovy.util.logging.Log4j2 import org.testng.Assert import org.testng.annotations.* -@Log4j +@Log4j2 class FeatureListerTest { def tmpDir diff --git a/src/test/resources/marytts/example1.acoustparams b/src/test/resources/marytts/example1.acoustparams index b326ac6..ac1c51e 100644 --- a/src/test/resources/marytts/example1.acoustparams +++ b/src/test/resources/marytts/example1.acoustparams @@ -1,82 +1,88 @@ - - -

- - - - Welcome - - - - - - - - - - - - - to - - - - - - - the - - - - - - - world - - - - - - - - - of - - - - - - - speech - - - - - - - - - - - synthesis - - - - - - - - - - - - - - - - ! - - - -

+ +

+ + + + Welcome + + + + + + + + + + + + + + to + + + + + + + + the + + + + + + + + world + + + + + + + + + + of + + + + + + + + speech + + + + + + + + + + synthesis + + + + + + + + + + + + + + + + + +! + + + + +

diff --git a/src/test/resources/marytts/example1.phonemes b/src/test/resources/marytts/example1.phonemes index 365a25e..9044fd8 100644 --- a/src/test/resources/marytts/example1.phonemes +++ b/src/test/resources/marytts/example1.phonemes @@ -4,10 +4,10 @@ Welcome to - the + the world of - speech + speech synthesis ! diff --git a/src/test/resources/marytts/example1.targetfeatures b/src/test/resources/marytts/example1.targetfeatures index ec7fc37..9604da1 100644 --- a/src/test/resources/marytts/example1.targetfeatures +++ b/src/test/resources/marytts/example1.targetfeatures @@ -13,15 +13,15 @@ k 0 0 m 0 0 t 1 0 u 1 0 -D 0 0 -@ 0 0 +D 1 0 +@ 1 0 w 1 1 r= 1 1 l 1 1 d 1 1 V 1 0 v 1 0 -s 0 0 +s 1 1 p 1 1 i 1 1 tS 1 1 @@ -43,15 +43,15 @@ _ 0 0 29 0 0 35 1 0 37 1 0 -5 0 0 -1 0 0 +5 1 0 +1 1 0 39 1 1 33 1 1 28 1 1 20 1 1 15 1 0 38 1 0 -34 0 0 +34 1 1 31 1 1 25 1 1 36 1 1