Skip to content

Commit

Permalink
Update workflows for DeepSpeech v2 for PyTorch 2.x
Browse files Browse the repository at this point in the history
Signed-off-by: Beat Buesser <[email protected]>
  • Loading branch information
beat-buesser committed Dec 7, 2023
1 parent 34fe469 commit cf85969
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 25 deletions.
26 changes: 8 additions & 18 deletions .github/actions/deepspeech-v3/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Get base from a pytorch image
FROM pytorch/pytorch:1.6.0-cuda10.1-cudnn7-runtime
pod# Get base from a pytorch image
FROM pytorch/pytorch:2.1.1-cuda12.1-cudnn8-runtime

# Set to install things in non-interactive mode
ENV DEBIAN_FRONTEND noninteractive
Expand All @@ -17,26 +17,16 @@ RUN apt-get update \
curl \
libsndfile-dev \
libsndfile1 \
vim \
&& apt-get clean all \
&& rm -r /var/lib/apt/lists/*

RUN /opt/conda/bin/conda install --yes \
astropy \
matplotlib \
pandas \
scikit-learn \
scikit-image

# Install necessary libraries for deepspeech v3
RUN pip install torch
RUN pip install tensorflow
RUN pip install torchaudio==0.6.0
RUN pip install --no-build-isolation fairscale
RUN pip install --ignore-installed PyYAML torch==2.1.1 tensorflow==2.14.1 torchaudio==2.1.1 pytorch-lightning==2.1.2 scikit-learn==1.3.2
RUN pip install --no-build-isolation fairscale==0.4.13

RUN git clone https://github.com/SeanNaren/deepspeech.pytorch.git
RUN cd deepspeech.pytorch && pip install -r requirements.txt
RUN cd deepspeech.pytorch && pip install -e .
RUN cd deepspeech.pytorch && sed -i '/^sklearn/d' requirements.txt && pip install -r requirements.txt && pip install -e .

RUN pip install numba==0.50.0
RUN pip install pytest-cov
RUN pip install pydub==0.25.1
RUN pip install numba==0.56.4 pytest-cov==4.1.0 pydub==0.25.1
RUN pip list
4 changes: 2 additions & 2 deletions .github/workflows/ci-deepspeech-v3.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ on:

jobs:
test_deepspeech_v3_torch_1_10:
name: PyTorchDeepSpeech v3 / PyTorch 1.10
name: PyTorchDeepSpeech v3 / PyTorch 2.1.1
runs-on: ubuntu-latest
container: adversarialrobustnesstoolbox/art_testing_envs:deepspeech_v3_torch_1_10
container: adversarialrobustnesstoolbox/art_testing_envs:deepspeech_v3_torch_2_1_1
steps:
- name: Checkout Repo
uses: actions/checkout@v3
Expand Down
10 changes: 5 additions & 5 deletions art/estimators/speech_recognition/pytorch_deep_speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def __init__(
# Check DeepSpeech version
if str(DeepSpeech.__base__) == "<class 'torch.nn.modules.module.Module'>":
self._version = 2
elif str(DeepSpeech.__base__) == "<class 'pytorch_lightning.core.lightning.LightningModule'>":
elif str(DeepSpeech.__base__) in ["<class 'pytorch_lightning.core.lightning.LightningModule'>", "<class 'pytorch_lightning.core.module.LightningModule'>"]:
self._version = 3
else:
raise NotImplementedError("Only DeepSpeech version 2 and DeepSpeech version 3 are currently supported.")
Expand Down Expand Up @@ -381,7 +381,7 @@ def predict(

# Call to DeepSpeech model for prediction
with torch.no_grad():
outputs, output_sizes = self._model(
outputs, output_sizes, _ = self._model(
inputs[begin:end].to(self._device), input_sizes[begin:end].to(self._device)
)

Expand Down Expand Up @@ -455,7 +455,7 @@ def loss_gradient(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
input_sizes = input_rates.mul_(inputs.size()[-1]).int()

# Call to DeepSpeech model for prediction
outputs, output_sizes = self._model(inputs.to(self._device), input_sizes.to(self._device))
outputs, output_sizes, _ = self._model(inputs.to(self._device), input_sizes.to(self._device))
outputs = outputs.transpose(0, 1)

if self._version == 2:
Expand Down Expand Up @@ -566,7 +566,7 @@ def fit(self, x: np.ndarray, y: np.ndarray, batch_size: int = 128, nb_epochs: in
self.optimizer.zero_grad()

# Call to DeepSpeech model for prediction
outputs, output_sizes = self._model(inputs.to(self._device), input_sizes.to(self._device))
outputs, output_sizes, _ = self._model(inputs.to(self._device), input_sizes.to(self._device))
outputs = outputs.transpose(0, 1)

if self._version == 2:
Expand Down Expand Up @@ -625,7 +625,7 @@ def compute_loss_and_decoded_output(
input_sizes = input_rates.mul_(inputs.size()[-1]).int()

# Call to DeepSpeech model for prediction
outputs, output_sizes = self.model(inputs.to(self.device), input_sizes.to(self.device))
outputs, output_sizes, _ = self.model(inputs.to(self.device), input_sizes.to(self.device))
outputs_ = outputs.transpose(0, 1)

if self._version == 2:
Expand Down

0 comments on commit cf85969

Please sign in to comment.