Skip to content

Commit

Permalink
Logging clean up + IT TN fix (#118)
Browse files Browse the repository at this point in the history
* fix utils and it TN

Signed-off-by: Evelina <[email protected]>

* clean up

Signed-off-by: Evelina <[email protected]>

* fix logging

Signed-off-by: Evelina <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix format

Signed-off-by: Evelina <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix format

Signed-off-by: Evelina <[email protected]>

* fix format

Signed-off-by: Evelina <[email protected]>

* add IT TN to CI

Signed-off-by: Evelina <[email protected]>

* update patch

Signed-off-by: Evelina <[email protected]>

---------

Signed-off-by: Evelina <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
ekmb and pre-commit-ci[bot] authored Oct 24, 2023
1 parent 0ded21a commit 4424db5
Show file tree
Hide file tree
Showing 65 changed files with 239 additions and 247 deletions.
37 changes: 10 additions & 27 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ pipeline {
VI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
SV_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
ZH_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/07-27-23-0'
IT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
DEFAULT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'

}
stages {

Expand Down Expand Up @@ -188,7 +188,7 @@ pipeline {
failFast true
parallel {
stage('L0: FR TN grammars') {
steps {
steps {
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=fr --text="2" --cache_dir ${FR_TN_CACHE}'
}
}
Expand All @@ -200,7 +200,7 @@ pipeline {

}
}
stage('L0: Create HU TN/ITN Grammars') {
stage('L0: Create VI ITN & HU TN & IT TN') {
when {
anyOf {
branch 'main'
Expand All @@ -209,38 +209,21 @@ pipeline {
}
failFast true
parallel {
stage('L0: VI ITN grammars') {
steps {
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=vi --text="một ngàn " --cache_dir ${VI_TN_CACHE}'
}
}
stage('L0: HU TN grammars') {
steps {
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=hu --text="100" --cache_dir ${HU_TN_CACHE}'
}
}
// stage('L0: HU ITN grammars') {
// steps {
// sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=hu --text="száz " --cache_dir ${HU_TN_CACHE}'
// }
// }
}
}
stage('L0: Create VI TN/ITN Grammars') {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
}
}
failFast true
parallel {
// stage('L0: VI TN grammars') {
// steps {
// sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=vi --text="2" --cache_dir ${VI_TN_CACHE}'
// }
// }
stage('L0: VI ITN grammars') {
stage('L0: IT TN grammars') {
steps {
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=vi --text="một ngàn " --cache_dir ${VI_TN_CACHE}'
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=it --text="122" --cache_dir ${IT_TN_CACHE}'
}
}

}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import logging

import pynini
from nemo_text_processing.inverse_text_normalization.en.taggers.tokenize_and_classify import ClassifyFst
from nemo_text_processing.inverse_text_normalization.en.verbalizers.verbalize import VerbalizeFst
from nemo_text_processing.inverse_text_normalization.en.verbalizers.verbalize_final import VerbalizeFinalFst
from nemo_text_processing.utils.logging import logger
from pynini.lib import pynutil
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import logging

import pynini
from nemo_text_processing.inverse_text_normalization.en.taggers.tokenize_and_classify import ClassifyFst
from nemo_text_processing.inverse_text_normalization.en.verbalizers.verbalize import VerbalizeFst
from nemo_text_processing.inverse_text_normalization.en.verbalizers.verbalize_final import VerbalizeFinalFst
from nemo_text_processing.utils.logging import logger
from pynini.lib import pynutil
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import os

import pynini
Expand All @@ -31,6 +30,7 @@
)
from nemo_text_processing.text_normalization.ar.taggers.tokenize_and_classify import ClassifyFst as TNClassifyFst
from nemo_text_processing.text_normalization.en.graph_utils import INPUT_LOWER_CASED
from nemo_text_processing.utils.logging import logger
from pynini.lib import pynutil


Expand Down Expand Up @@ -62,9 +62,9 @@ def __init__(
far_file = os.path.join(cache_dir, f"ar_itn_{input_case}.far")
if not overwrite_cache and far_file and os.path.exists(far_file):
self.fst = pynini.Far(far_file, mode="r")["tokenize_and_classify"]
logging.info(f"ClassifyFst.fst was restored from {far_file}.")
logger.info(f"ClassifyFst.fst was restored from {far_file}.")
else:
logging.info(f"Creating ClassifyFst grammars.")
logger.info(f"Creating ClassifyFst grammars.")
tn_classify = TNClassifyFst(
input_case='cased', deterministic=True, cache_dir=cache_dir, overwrite_cache=True
)
Expand Down Expand Up @@ -109,4 +109,3 @@ def __init__(

if far_file:
generator_main(far_file, {"tokenize_and_classify": self.fst})
logging.info(f"ClassifyFst grammars are saved to {far_file}.")
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import logging

import pynini
from nemo_text_processing.inverse_text_normalization.en.taggers.tokenize_and_classify import ClassifyFst
from nemo_text_processing.inverse_text_normalization.en.verbalizers.verbalize import VerbalizeFst
from nemo_text_processing.inverse_text_normalization.en.verbalizers.verbalize_final import VerbalizeFinalFst
from nemo_text_processing.utils.logging import logger
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import os

import pynini
Expand Down Expand Up @@ -46,6 +45,7 @@
delete_space,
generator_main,
)
from nemo_text_processing.utils.logging import logger
from pynini.lib import pynutil


Expand Down Expand Up @@ -78,9 +78,9 @@ def __init__(
far_file = os.path.join(cache_dir, f"de_itn_{input_case}.far")
if not overwrite_cache and far_file and os.path.exists(far_file):
self.fst = pynini.Far(far_file, mode="r")["tokenize_and_classify"]
logging.info(f"ClassifyFst.fst was restored from {far_file}.")
logger.info(f"ClassifyFst.fst was restored from {far_file}.")
else:
logging.info(f"Creating ClassifyFst grammars.")
logger.info(f"Creating ClassifyFst grammars.")
tn_cardinal_tagger = TNCardinalTagger(deterministic=False)
tn_date_tagger = TNDateTagger(cardinal=tn_cardinal_tagger, deterministic=False)
tn_decimal_tagger = TNDecimalTagger(cardinal=tn_cardinal_tagger, deterministic=False)
Expand Down Expand Up @@ -147,4 +147,3 @@ def __init__(

if far_file:
generator_main(far_file, {"tokenize_and_classify": self.fst})
logging.info(f"ClassifyFst grammars are saved to {far_file}.")
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import os

import pynini
Expand All @@ -36,6 +35,7 @@
delete_space,
generator_main,
)
from nemo_text_processing.utils.logging import logger
from pynini.lib import pynutil


Expand Down Expand Up @@ -67,9 +67,9 @@ def __init__(
far_file = os.path.join(cache_dir, f"en_itn_{input_case}.far")
if not overwrite_cache and far_file and os.path.exists(far_file):
self.fst = pynini.Far(far_file, mode="r")["tokenize_and_classify"]
logging.info(f"ClassifyFst.fst was restored from {far_file}.")
logger.info(f"ClassifyFst.fst was restored from {far_file}.")
else:
logging.info(f"Creating ClassifyFst grammars.")
logger.info(f"Creating ClassifyFst grammars.")
cardinal = CardinalFst(input_case=input_case)
cardinal_graph = cardinal.fst

Expand Down Expand Up @@ -116,4 +116,3 @@ def __init__(

if far_file:
generator_main(far_file, {"tokenize_and_classify": self.fst})
logging.info(f"ClassifyFst grammars are saved to {far_file}.")
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import os

import pynini
Expand All @@ -36,6 +35,7 @@
delete_space,
generator_main,
)
from nemo_text_processing.utils.logging import logger
from pynini.lib import pynutil


Expand Down Expand Up @@ -67,9 +67,9 @@ def __init__(
far_file = os.path.join(cache_dir, f"es_itn_{input_case}.far")
if not overwrite_cache and far_file and os.path.exists(far_file):
self.fst = pynini.Far(far_file, mode="r")["tokenize_and_classify"]
logging.info(f"ClassifyFst.fst was restored from {far_file}.")
logger.info(f"ClassifyFst.fst was restored from {far_file}.")
else:
logging.info(f"Creating ClassifyFst grammars.")
logger.info(f"Creating ClassifyFst grammars.")

cardinal = CardinalFst()
cardinal_graph = cardinal.fst
Expand Down Expand Up @@ -121,4 +121,3 @@ def __init__(

if far_file:
generator_main(far_file, {"tokenize_and_classify": self.fst})
logging.info(f"ClassifyFst grammars are saved to {far_file}.")
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import os

import pynini
Expand Down Expand Up @@ -49,6 +48,7 @@
delete_space,
generator_main,
)
from nemo_text_processing.utils.logging import logger
from pynini.lib import pynutil


Expand Down Expand Up @@ -85,9 +85,9 @@ def __init__(
far_file = os.path.join(cache_dir, f"es_en_itn_{input_case}.far")
if not overwrite_cache and far_file and os.path.exists(far_file):
self.fst = pynini.Far(far_file, mode="r")["tokenize_and_classify"]
logging.info(f"ClassifyFst.fst was restored from {far_file}.")
logger.info(f"ClassifyFst.fst was restored from {far_file}.")
else:
logging.info(f"Creating ClassifyFst grammars.")
logger.info(f"Creating ClassifyFst grammars.")

cardinal = CardinalFst()
cardinal_graph = cardinal.fst
Expand Down Expand Up @@ -174,4 +174,4 @@ def __init__(

if far_file:
generator_main(far_file, {"tokenize_and_classify": self.fst})
logging.info(f"ClassifyFst grammars are saved to {far_file}.")
logger.info(f"ClassifyFst grammars are saved to {far_file}.")
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import os
import string
from pathlib import Path
from typing import Dict

import pynini
from nemo_text_processing.inverse_text_normalization.fr.utils import get_abs_path
from nemo_text_processing.utils.logging import logger
from pynini import Far
from pynini.examples import plurals
from pynini.export import export
Expand Down Expand Up @@ -80,7 +80,7 @@ def generator_main(file_name: str, graphs: Dict[str, pynini.FstLike]):
for rule, graph in graphs.items():
exporter[rule] = graph.optimize()
exporter.close()
logging.info(f'Created {file_name}')
logger.info(f'Created {file_name}')


def get_plurals(fst):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import os

import pynini
Expand All @@ -36,6 +35,7 @@
from nemo_text_processing.inverse_text_normalization.fr.taggers.whitelist import WhiteListFst
from nemo_text_processing.inverse_text_normalization.fr.taggers.word import WordFst
from nemo_text_processing.text_normalization.en.graph_utils import INPUT_LOWER_CASED
from nemo_text_processing.utils.logging import logger
from pynini.lib import pynutil


Expand Down Expand Up @@ -67,9 +67,9 @@ def __init__(
far_file = os.path.join(cache_dir, f"fr_itn_{input_case}.far")
if not overwrite_cache and far_file and os.path.exists(far_file):
self.fst = pynini.Far(far_file, mode="r")["tokenize_and_classify"]
logging.info(f"ClassifyFst.fst was restored from {far_file}.")
logger.info(f"ClassifyFst.fst was restored from {far_file}.")
else:
logging.info(f"Creating ClassifyFst grammars.")
logger.info(f"Creating ClassifyFst grammars.")

cardinal = CardinalFst()
cardinal_graph = cardinal.fst
Expand Down Expand Up @@ -121,4 +121,3 @@ def __init__(

if far_file:
generator_main(far_file, {"tokenize_and_classify": self.fst})
logging.info(f"ClassifyFst grammars are saved to {far_file}.")
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import os

import pynini
Expand All @@ -35,6 +34,7 @@
delete_space,
generator_main,
)
from nemo_text_processing.utils.logging import logger
from pynini.lib import pynutil


Expand Down Expand Up @@ -66,9 +66,9 @@ def __init__(
far_file = os.path.join(cache_dir, f"pt_itn_{input_case}.far")
if not overwrite_cache and far_file and os.path.exists(far_file):
self.fst = pynini.Far(far_file, mode="r")["tokenize_and_classify"]
logging.info(f"ClassifyFst.fst was restored from {far_file}.")
logger.info(f"ClassifyFst.fst was restored from {far_file}.")
else:
logging.info(f"Creating ClassifyFst grammars.")
logger.info(f"Creating ClassifyFst grammars.")

cardinal = CardinalFst(use_strict_e=True)
cardinal_graph = cardinal.fst
Expand Down Expand Up @@ -115,4 +115,3 @@ def __init__(

if far_file:
generator_main(far_file, {"tokenize_and_classify": self.fst})
logging.info(f"ClassifyFst grammars are saved to {far_file}.")
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import os

import pynini
Expand All @@ -36,6 +35,7 @@
generator_main,
)
from nemo_text_processing.text_normalization.ru.taggers.tokenize_and_classify import ClassifyFst as TNClassifyFst
from nemo_text_processing.utils.logging import logger
from pynini.lib import pynutil


Expand Down Expand Up @@ -67,9 +67,9 @@ def __init__(
far_file = os.path.join(cache_dir, f"ru_itn_{input_case}.far")
if not overwrite_cache and far_file and os.path.exists(far_file):
self.fst = pynini.Far(far_file, mode="r")["tokenize_and_classify"]
logging.info(f"ClassifyFst.fst was restored from {far_file}.")
logger.info(f"ClassifyFst.fst was restored from {far_file}.")
else:
logging.info(f"Creating ClassifyFst grammars. This might take some time...")
logger.info(f"Creating ClassifyFst grammars. This might take some time...")
tn_classify = TNClassifyFst(
input_case='cased', deterministic=False, cache_dir=cache_dir, overwrite_cache=True
)
Expand Down Expand Up @@ -119,4 +119,3 @@ def __init__(

if far_file:
generator_main(far_file, {"tokenize_and_classify": self.fst})
logging.info(f"ClassifyFst grammars are saved to {far_file}.")
Loading

0 comments on commit 4424db5

Please sign in to comment.