Skip to content

Commit a3f6b5f

Browse files
committed
Draft
1 parent fdf794e commit a3f6b5f

File tree

179 files changed

+5892
-9347
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

179 files changed

+5892
-9347
lines changed

.github/build_windows_packages.ps1

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,12 +115,17 @@ Remove-Item $ffDir.FullName -Recurse -Force
115115
Write-Host "[INFO] Installing PyTorch..."
116116
& ".\runtime\python.exe" -m ensurepip
117117
& ".\runtime\python.exe" -m pip install --upgrade pip --no-warn-script-location
118+
118119
switch ($cuda) {
119120
"cu124" {
120-
& ".\runtime\python.exe" -m pip install torch==2.6 torchaudio --index-url https://download.pytorch.org/whl/cu124 --no-warn-script-location
121+
& ".\runtime\python.exe" -m pip install psutil ninja packaging wheel "setuptools>=42" --no-warn-script-location
122+
& ".\runtime\python.exe" -m pip install torch torchaudio --index-url https://download.pytorch.org/whl/cu124 --no-warn-script-location
123+
& ".\runtime\python.exe" -m pip install flash-attn -i https://xxxxrt666.github.io/PIP-Index/ --no-build-isolation
121124
}
122125
"cu128" {
126+
& ".\runtime\python.exe" -m pip install psutil ninja packaging wheel "setuptools>=42" --no-warn-script-location
123127
& ".\runtime\python.exe" -m pip install torch torchaudio --index-url https://download.pytorch.org/whl/cu128 --no-warn-script-location
128+
& ".\runtime\python.exe" -m pip install flash-attn -i https://xxxxrt666.github.io/PIP-Index/ --no-build-isolation
124129
}
125130
default {
126131
Write-Error "Unsupported CUDA version: $cuda"

.github/workflows/build_windows_packages.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,15 @@ jobs:
3131
- name: Checkout
3232
uses: actions/checkout@v4
3333

34+
- name: Install Windows CUDA 12.9
35+
if: ${{ runner.os == 'Windows' && matrix.torch_cuda == '12.8' }}
36+
uses: Jimver/cuda-toolkit
37+
id: cuda-toolkit-win-129
38+
with:
39+
cuda: 12.9.1
40+
method: "network"
41+
sub-packages: '["nvcc", "cudart", "visual_studio_integration"]'
42+
3443
- name: Run Build and Upload Script
3544
shell: pwsh
3645
run: |

.gitignore

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ ffprobe*
1616
cfg.json
1717
speakers.json
1818
ref_audios
19-
tools/AP_BWE_main/24kto48k/*
20-
!tools/AP_BWE_main/24kto48k/readme.txt
19+
tools/AP_BWE/24kto48k/*
20+
!tools/AP_BWE/24kto48k/readme.txt
2121

2222
# Byte-compiled / optimized / DLL files
2323
__pycache__/

Docker/miniconda_install.sh

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@ fi
2323

2424
if [ "$TARGETPLATFORM" = "linux/amd64" ]; then
2525
"${WGET_CMD[@]}" -O miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py311_25.3.1-1-Linux-x86_64.sh
26+
SYSROOT_PKG="sysroot_linux-64>=2.28"
2627
elif [ "$TARGETPLATFORM" = "linux/arm64" ]; then
2728
"${WGET_CMD[@]}" -O miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py311_25.3.1-1-Linux-aarch64.sh
29+
SYSROOT_PKG="sysroot_linux-aarch64>=2.28"
2830
else
2931
exit 1
3032
fi
@@ -45,20 +47,36 @@ rm miniconda.sh
4547

4648
source "$HOME/miniconda3/etc/profile.d/conda.sh"
4749

50+
"$HOME/miniconda3/bin/conda" init bash
51+
52+
source "$HOME/.bashrc"
53+
4854
"$HOME/miniconda3/bin/conda" config --add channels conda-forge
4955

5056
"$HOME/miniconda3/bin/conda" update -q --all -y 1>/dev/null
5157

5258
"$HOME/miniconda3/bin/conda" install python=3.11 -q -y
5359

54-
"$HOME/miniconda3/bin/conda" install gcc=14 gxx ffmpeg cmake make unzip -q -y
60+
"$HOME/miniconda3/bin/conda" install gcc=11 gxx ffmpeg cmake make unzip $SYSROOT_PKG "libstdcxx-ng>=11" -q -y
5561

5662
if [ "$CUDA_VERSION" = "12.8" ]; then
5763
"$HOME/miniconda3/bin/pip" install torch torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/cu128
64+
"$HOME/miniconda3/bin/conda" install cuda-nvcc=12.8 -c nvidia
5865
elif [ "$CUDA_VERSION" = "12.6" ]; then
59-
"$HOME/miniconda3/bin/pip" install torch==2.6 torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/cu126
66+
"$HOME/miniconda3/bin/pip" install torch torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/cu126
67+
"$HOME/miniconda3/bin/conda" install cuda-nvcc=12.6 -c nvidia
6068
fi
6169

70+
CUDA_PATH=$(echo "$HOME/miniconda3/targets/"*-linux | awk '{print $1}')
71+
72+
export CUDA_HOME=$CUDA_PATH
73+
export PATH="$HOME/miniconda3/bin:$PATH"
74+
export PATH="$CUDA_HOME/bin:$PATH"
75+
export PATH="$CUDA_HOME/nvvm/bin:$PATH"
76+
77+
"$HOME/miniconda3/bin/pip" install psutil ninja packaging wheel "setuptools>=42"
78+
"$HOME/miniconda3/bin/pip" install flash-attn -i https://xxxxrt666.github.io/PIP-Index/ --no-build-isolation
79+
6280
"$HOME/miniconda3/bin/pip" cache purge
6381

6482
rm $LOG_PATH

GPT_SoVITS/AR/data/data_module.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
from pytorch_lightning import LightningDataModule
44
from torch.utils.data import DataLoader
55

6-
from AR.data.bucket_sampler import DistributedBucketSampler
7-
from AR.data.dataset import Text2SemanticDataset
6+
from GPT_SoVITS.AR.data.bucket_sampler import DistributedBucketSampler
7+
from GPT_SoVITS.AR.data.dataset import Text2SemanticDataset
88

99

1010
class Text2SemanticDataModule(LightningDataModule):

GPT_SoVITS/AR/data/dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ def __getitem__(self, idx: int) -> Dict:
220220

221221
flag = 0
222222
path_bert = "%s/%s.pt" % (self.path3, item_name)
223-
if os.path.exists(path_bert) == True:
223+
if os.path.exists(path_bert) is True:
224224
bert_feature = torch.load(path_bert, map_location="cpu")
225225
else:
226226
flag = 1

GPT_SoVITS/AR/models/t2s_lightning_module.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@
1010
import torch
1111
from pytorch_lightning import LightningModule
1212

13-
from AR.models.t2s_model import Text2SemanticDecoder
14-
from AR.modules.lr_schedulers import WarmupCosineLRSchedule
15-
from AR.modules.optim import ScaledAdam
13+
from GPT_SoVITS.AR.models.t2s_model import Text2SemanticDecoder
14+
from GPT_SoVITS.AR.modules.lr_schedulers import WarmupCosineLRSchedule
15+
from GPT_SoVITS.AR.modules.optim import ScaledAdam
1616

1717

1818
class Text2SemanticLightningModule(LightningModule):
@@ -42,7 +42,7 @@ def __init__(self, config, output_dir, is_train=True):
4242
def training_step(self, batch: Dict, batch_idx: int):
4343
opt = self.optimizers()
4444
scheduler = self.lr_schedulers()
45-
forward = self.model.forward if self.config["train"].get("if_dpo", False) == True else self.model.forward_old
45+
forward = self.model.forward if self.config["train"].get("if_dpo", False) is True else self.model.forward_old
4646
loss, acc = forward(
4747
batch["phoneme_ids"],
4848
batch["phoneme_ids_len"],

GPT_SoVITS/AR/models/t2s_lightning_module_onnx.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@
1010
import torch
1111
from pytorch_lightning import LightningModule
1212

13-
from AR.models.t2s_model_onnx import Text2SemanticDecoder
14-
from AR.modules.lr_schedulers import WarmupCosineLRSchedule
15-
from AR.modules.optim import ScaledAdam
13+
from GPT_SoVITS.AR.models.t2s_model_onnx import Text2SemanticDecoder
14+
from GPT_SoVITS.AR.modules.lr_schedulers import WarmupCosineLRSchedule
15+
from GPT_SoVITS.AR.modules.optim import ScaledAdam
1616

1717

1818
class Text2SemanticLightningModule(LightningModule):

GPT_SoVITS/AR/models/t2s_model.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from torchmetrics.classification import MulticlassAccuracy
1010
from tqdm import tqdm
1111

12-
from AR.models.utils import (
12+
from GPT_SoVITS.AR.models.utils import (
1313
dpo_loss,
1414
get_batch_logps,
1515
make_pad_mask,
@@ -18,8 +18,8 @@
1818
sample,
1919
topk_sampling,
2020
)
21-
from AR.modules.embedding import SinePositionalEmbedding, TokenEmbedding
22-
from AR.modules.transformer import LayerNorm, TransformerEncoder, TransformerEncoderLayer
21+
from GPT_SoVITS.AR.modules.embedding import SinePositionalEmbedding, TokenEmbedding
22+
from GPT_SoVITS.AR.modules.transformer import LayerNorm, TransformerEncoder, TransformerEncoderLayer
2323

2424
default_config = {
2525
"embedding_dim": 512,
@@ -420,7 +420,7 @@ def forward(self, x, x_lens, y, y_lens, bert_feature):
420420
mask=xy_attn_mask,
421421
)
422422
x_len = x_lens.max()
423-
logits = self.ar_predict_layer(xy_dec[:, x_len-1:])
423+
logits = self.ar_predict_layer(xy_dec[:, x_len - 1 :])
424424

425425
###### DPO #############
426426
reject_xy_pos, reject_xy_attn_mask, reject_targets = self.make_input_data(
@@ -432,7 +432,7 @@ def forward(self, x, x_lens, y, y_lens, bert_feature):
432432
mask=reject_xy_attn_mask,
433433
)
434434
x_len = x_lens.max()
435-
reject_logits = self.ar_predict_layer(reject_xy_dec[:, x_len-1:])
435+
reject_logits = self.ar_predict_layer(reject_xy_dec[:, x_len - 1 :])
436436

437437
# loss
438438
# from feiteng: 每次 duration 越多, 梯度更新也应该更多, 所以用 sum
@@ -502,7 +502,7 @@ def forward_old(self, x, x_lens, y, y_lens, bert_feature):
502502
(xy_pos, None),
503503
mask=xy_attn_mask,
504504
)
505-
logits = self.ar_predict_layer(xy_dec[:, x_len-1:]).permute(0, 2, 1)
505+
logits = self.ar_predict_layer(xy_dec[:, x_len - 1 :]).permute(0, 2, 1)
506506
# loss
507507
# from feiteng: 每次 duration 越多, 梯度更新也应该更多, 所以用 sum
508508
loss = F.cross_entropy(logits, targets, reduction="sum")
@@ -724,8 +724,8 @@ def infer_panel_batch_infer(
724724
l1 = samples[:, 0] == self.EOS
725725
l2 = tokens == self.EOS
726726
l = l1.logical_or(l2)
727-
removed_idx_of_batch_for_y = torch.where(l == True)[0].tolist()
728-
reserved_idx_of_batch_for_y = torch.where(l == False)[0]
727+
removed_idx_of_batch_for_y = torch.where(l is True)[0].tolist()
728+
reserved_idx_of_batch_for_y = torch.where(l is False)[0]
729729
# batch_indexs = torch.tensor(batch_idx_map, device=y.device)[removed_idx_of_batch_for_y]
730730
for i in removed_idx_of_batch_for_y:
731731
batch_index = batch_idx_map[i]

GPT_SoVITS/AR/models/t2s_model_onnx.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
from torch.nn import functional as F
66
from torchmetrics.classification import MulticlassAccuracy
77

8-
from AR.modules.embedding_onnx import SinePositionalEmbedding, TokenEmbedding
9-
from AR.modules.transformer_onnx import LayerNorm, TransformerEncoder, TransformerEncoderLayer
8+
from GPT_SoVITS.AR.modules.embedding_onnx import SinePositionalEmbedding, TokenEmbedding
9+
from GPT_SoVITS.AR.modules.transformer_onnx import LayerNorm, TransformerEncoder, TransformerEncoderLayer
1010

1111
default_config = {
1212
"embedding_dim": 512,

0 commit comments

Comments
 (0)