Skip to content

Commit 4424f7d

Browse files
committed
Lint datasets/
1 parent f9ef8d3 commit 4424f7d

File tree

2 files changed

+7
-8
lines changed

2 files changed

+7
-8
lines changed

datasets/librispeech_preprocess.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,15 @@
44

55
import multiprocessing.dummy
66
import os
7-
from os.path import exists
87
import sys
98
import threading
109
import time
1110

12-
from absl import logging
1311
import numpy as np
1412
import pandas as pd
15-
from pydub import AudioSegment
1613
import tensorflow as tf
14+
from absl import logging
15+
from pydub import AudioSegment
1716

1817
from datasets import librispeech_tokenizer
1918

@@ -84,8 +83,8 @@ def process(index):
8483
return utterance_ids
8584

8685
with open(trans_file, 'r', encoding='UTF-8') as f:
87-
for l in f:
88-
utt, trans = l.strip().split(' ', maxsplit=1)
86+
for line in f:
87+
utt, trans = line.strip().split(' ', maxsplit=1)
8988
audio_path = (
9089
f'{data_folder}/{speaker_folder}/{chapter_folder}/{utt}.flac'
9190
)

datasets/librispeech_tokenizer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88
import tempfile
99
from typing import Dict
1010

11-
from absl import logging
1211
import sentencepiece as spm
1312
import tensorflow as tf
1413
import tensorflow_text as tftxt
14+
from absl import logging
1515

1616
gfile = tf.io.gfile
1717
copy = tf.io.gfile.copy
@@ -41,8 +41,8 @@ def dump_chars_for_training(data_folder, splits, maxchars: int = int(1e7)):
4141
logging.info('path does not exist -> %s', trans_file)
4242
continue
4343
with open(trans_file, 'r', encoding='UTF-8') as f:
44-
for l in f:
45-
_, line = l.strip().split(' ', maxsplit=1)
44+
for lines in f:
45+
_, line = lines.strip().split(' ', maxsplit=1)
4646
line = line + '\n'
4747
char_count += len(line)
4848
if char_count > maxchars:

0 commit comments

Comments
 (0)