Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
name: CI

on:
[push, pull_request]

jobs:
test:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
python-version: ["3.11", "3.12", "3.13"]

steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install system dependencies (Ubuntu)
if: runner.os == 'Linux'
run: |
bash install_dependencies.sh

- name: Install system dependencies (Windows)
if: runner.os == 'Windows'
run: |
# Install chocolatey packages if needed
# choco install espeak --yes
# For now, we'll use conda-forge for cross-platform audio packages
echo "Windows system dependencies would be installed here"

- name: Cache pip dependencies
uses: actions/cache@v4
with:
path: |
~/.cache/pip
%APPDATA%\pip\Cache
key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('**/requirements*.txt', '**/pyproject.toml', '**/setup.py') }}
restore-keys: |
${{ runner.os }}-pip-${{ matrix.python-version }}-
${{ runner.os }}-pip-

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install build setuptools wheel

- name: Build package
run: |
python -m build

- name: Install package
shell: bash
run: |
# Install the built package
pip install dist/*.whl

- name: Run tests with pytest
run: |
pip install pytest
pip install -r requirements_dev.txt
pytest aeneas/tests

3 changes: 1 addition & 2 deletions aeneas/cdtw/cdtw_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
from __future__ import absolute_import
from __future__ import print_function
from numpy import get_include
from numpy.distutils import misc_util
from setuptools import Extension
from setuptools import setup
import sys
Expand All @@ -52,7 +51,7 @@
version="1.7.4",
description="Python C Extension for computing the DTW as fast as your bare metal allows.",
ext_modules=[CMODULE],
include_dirs=misc_util.get_numpy_include_dirs()
include_dirs=[get_include()]
)

print("\n[INFO] Module cdtw successfully compiled\n")
Expand Down
3 changes: 1 addition & 2 deletions aeneas/cmfcc/cmfcc_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
from __future__ import absolute_import
from __future__ import print_function
from numpy import get_include
from numpy.distutils import misc_util
from setuptools import Extension
from setuptools import setup
import sys
Expand All @@ -53,7 +52,7 @@
version="1.7.4",
description="Python C Extension for computing the MFCCs as fast as your bare metal allows.",
ext_modules=[CMODULE],
include_dirs=misc_util.get_numpy_include_dirs()
include_dirs=[get_include()]
)

print("\n[INFO] Module cmfcc successfully compiled\n")
Expand Down
3 changes: 1 addition & 2 deletions aeneas/cwave/cwave_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
from __future__ import absolute_import
from __future__ import print_function
from numpy import get_include
from numpy.distutils import misc_util
from setuptools import Extension
from setuptools import setup
import sys
Expand All @@ -52,7 +51,7 @@
version="1.7.4",
description="Python C Extension for for reading WAVE files.",
ext_modules=[CMODULE],
include_dirs=misc_util.get_numpy_include_dirs()
include_dirs=[get_include()]
)

print("\n[INFO] Module cwave successfully compiled\n")
Expand Down
6 changes: 3 additions & 3 deletions aeneas/tests/test_textfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,13 +506,13 @@ def test_filter_ignore_regex_strip(self):
self.filter_ignore_regex("word", [u"word abc word"], [u"abc"])

def test_filter_ignore_regex_parenthesis(self):
self.filter_ignore_regex("\(.*?\)", [u"(CHAR) bla bla bla"], [u"bla bla bla"])
self.filter_ignore_regex(r"\(.*?\)", [u"(CHAR) bla bla bla"], [u"bla bla bla"])

def test_filter_ignore_regex_brackets(self):
self.filter_ignore_regex("\[.*?\]", [u"[CHAR] bla bla bla"], [u"bla bla bla"])
self.filter_ignore_regex(r"\[.*?\]", [u"[CHAR] bla bla bla"], [u"bla bla bla"])

def test_filter_ignore_regex_braces(self):
self.filter_ignore_regex("\{.*?\}", [u"{CHAR} bla bla bla"], [u"bla bla bla"])
self.filter_ignore_regex(r"\{.*?\}", [u"{CHAR} bla bla bla"], [u"bla bla bla"])

def test_filter_ignore_regex_entire_match(self):
self.filter_ignore_regex("word", [u"word"], [u""])
Expand Down
4 changes: 2 additions & 2 deletions aeneas/textfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -780,7 +780,7 @@ def nodes_at_level(root, level):
indent = u" " * 2 * (level - 1)
self.log([u"%sRegex for %s: '%s'", indent, attribute_name, regex_string])
regex = re.compile(r".*\b" + regex_string + r"\b.*")
return root.findAll(attrs={attribute_name: regex})
return root.find_all(attrs={attribute_name: regex})
#
# TODO better and/or parametric parsing,
# for example, removing tags but keeping text, etc.
Expand Down Expand Up @@ -953,7 +953,7 @@ def filter_attributes():
ids = []
filter_attributes = filter_attributes()
self.log([u"Finding elements matching attributes '%s'", filter_attributes])
nodes = soup.findAll(attrs=filter_attributes)
nodes = soup.find_all(attrs=filter_attributes)
for node in nodes:
try:
f_id = gf.safe_unicode(node["id"])
Expand Down
Empty file added aeneas/tools/bin/__init__.py
Empty file.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion aeneas/wavfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def _read_data_chunk(fid, comp, noc, bits, mmap=False):
else:
dtype += 'f%d' % bytes
if not mmap:
data = numpy.fromstring(fid.read(size), dtype=dtype)
data = numpy.frombuffer(fid.read(size), dtype=dtype)
else:
start = fid.tell()
data = numpy.memmap(fid, dtype=dtype, mode='c', offset=start,
Expand Down
160 changes: 160 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
[build-system]
requires = ["setuptools", "wheel", "numpy"]
build-backend = "setuptools.build_meta"

[project]
name = "aeneas"
version = "1.7.4.0"
description = "aeneas is a Python/C library and a set of tools to automagically synchronize audio and text (aka forced alignment)"
readme = "README.rst"
authors = [
{ name = "Alberto Pettarin", email = "alberto@albertopettarin.it" }
]
license = "AGPL-3.0-only"
keywords = [
"AUD",
"AWS Polly TTS API",
"CSV",
"DTW",
"EAF",
"ELAN",
"EPUB 3 Media Overlay",
"EPUB 3",
"EPUB",
"Festival",
"JSON",
"MFCC",
"Mel-frequency cepstral coefficients",
"Nuance TTS API",
"ReadBeyond Sync",
"ReadBeyond",
"SBV",
"SMIL",
"SRT",
"SSV",
"SUB",
"TGT",
"TSV",
"TTML",
"TTS",
"TextGrid",
"VTT",
"XML",
"aeneas",
"audio/text alignment",
"dynamic time warping",
"eSpeak",
"eSpeak-ng",
"espeak",
"espeak-ng",
"festival",
"ffmpeg",
"ffprobe",
"forced alignment",
"media overlay",
"rb_smil_emulator",
"speech to text",
"subtitles",
"sync",
"synchronization",
"text to speech",
"text2wave",
"tts",
]
classifiers = [
"Development Status :: 5 - Production/Stable",
"Environment :: Console",
"Intended Audience :: Developers",
"Intended Audience :: Education",
"Intended Audience :: End Users/Desktop",
"Intended Audience :: Science/Research",
"Natural Language :: English",
"Operating System :: MacOS :: MacOS X",
"Operating System :: Microsoft :: Windows",
"Operating System :: POSIX :: Linux",
"Programming Language :: C",
"Programming Language :: Python",
"Programming Language :: Python :: 2",
"Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.4",
"Programming Language :: Python :: 3.5",
"Topic :: Education",
"Topic :: Multimedia",
"Topic :: Multimedia :: Sound/Audio",
"Topic :: Multimedia :: Sound/Audio :: Analysis",
"Topic :: Multimedia :: Sound/Audio :: Speech",
"Topic :: Printing",
"Topic :: Scientific/Engineering",
"Topic :: Scientific/Engineering :: Mathematics",
"Topic :: Software Development :: Libraries :: Python Modules",
"Topic :: Text Processing",
"Topic :: Text Processing :: Linguistic",
"Topic :: Text Processing :: Markup",
"Topic :: Text Processing :: Markup :: HTML",
"Topic :: Text Processing :: Markup :: XML",
"Topic :: Utilities"
]
dependencies = [
"BeautifulSoup4>=4.5.1",
"lxml>=3.6.0",
"numpy>=1.9"
]

[project.optional-dependencies]
full = [
"boto3>=1.4.2",
"Pillow>=3.1.1",
"requests>=2.9.1",
"tgt>=1.4.2",
"youtube-dl>=2016.9.27",
]
nopillow = [
"boto3>=1.4.2",
"requests>=2.9.1",
"tgt>=1.4.2",
"youtube-dl>=2016.9.27",
]
boto3 = ["boto3>=1.4.2"]
pillow = ["Pillow>=3.1.1"]
requests = ["requests>=2.9.1"]
tgt = ["tgt>=1.4.2"]

[project.urls]
Homepage = "https://github.com/readbeyond/aeneas"

[project.scripts]
aeneas_check_setup = "aeneas.tools.bin.aeneas_check_setup:main"
aeneas_convert_syncmap = "aeneas.tools.bin.aeneas_convert_syncmap:main"
aeneas_download = "aeneas.tools.bin.aeneas_download:main"
aeneas_execute_job = "aeneas.tools.bin.aeneas_execute_job:main"
aeneas_execute_task = "aeneas.tools.bin.aeneas_execute_task:main"
aeneas_plot_waveform = "aeneas.tools.bin.aeneas_plot_waveform:main"
aeneas_synthesize_text = "aeneas.tools.bin.aeneas_synthesize_text:main"
aeneas_validate = "aeneas.tools.bin.aeneas_validate:main"

[tool.setuptools]
packages = [
"aeneas",
"aeneas.cdtw",
"aeneas.cew",
"aeneas.cfw",
"aeneas.cmfcc",
"aeneas.cwave",
"aeneas.extra",
"aeneas.syncmap",
"aeneas.tools",
"aeneas.ttswrappers",
"aeneas.tools.bin",
]

[tool.setuptools.package-data]
"aeneas" = ["res/*", "*.md"]
"aeneas.cdtw" = ["*.c", "*.h", "*.md"]
"aeneas.cew" = ["*.c", "*.h", "*.md", "*.dll", "*.cc"]
"aeneas.cmfcc" = ["*.c", "*.h", "*.md"]
"aeneas.cwave" = ["*.c", "*.h", "*.md"]
"aeneas.extra" = ["*.md"]
"aeneas.syncmap" = ["*.md"]
"aeneas.tools" = ["res/*", "*.md"]
"aeneas.ttswrappers" = ["*.md"]
Loading