Skip to content

Commit

Permalink
debug WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
pschroedl committed Aug 2, 2024
1 parent a07620e commit 036678d
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 45 deletions.
24 changes: 8 additions & 16 deletions runner/app/pipelines/text_to_speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,21 @@
class TextToSpeechPipeline(Pipeline):
def __init__(self, model_id: str):
self.model_id = model_id
# kwargs = {"cache_dir": get_model_dir()}
if os.getenv("MOCK_PIPELINE", "").strip().lower() == "true":
logger.info("Mocking TextToSpeechPipeline for %s", model_id)
return

# folder_name = file_download.repo_folder_name(
# repo_id=model_id, repo_type="model"
# )
# folder_path = os.path.join(get_model_dir(), folder_name)
self.device = get_torch_device()
# preload FastSpeech 2 & hifigan
self.TTS_tokenizer = FastSpeech2ConformerTokenizer.from_pretrained("espnet/fastspeech2_conformer", cache_dir=get_model_dir())
self.TTS_model = FastSpeech2ConformerModel.from_pretrained("espnet/fastspeech2_conformer", cache_dir=get_model_dir()).to(self.device)
self.TTS_hifigan = FastSpeech2ConformerHifiGan.from_pretrained("espnet/fastspeech2_conformer_hifigan", cache_dir=get_model_dir()).to(self.device)


def __call__(self, text):
# generate unique filename
if os.getenv("MOCK_PIPELINE", "").strip().lower() == "true":
unique_audio_filename = f"{uuid.uuid4()}.wav"
audio_path = os.path.join("/tmp/", unique_audio_filename)
sf.write(audio_path, [0] * 22050, samplerate=22050)
return audio_path
unique_audio_filename = f"{uuid.uuid4()}.wav"
audio_path = os.path.join("/tmp/", unique_audio_filename)

Expand All @@ -35,19 +35,11 @@ def __call__(self, text):
return audio_path

def generate_audio(self, text, output_file_name):
# Tokenize input text
inputs = self.TTS_tokenizer(text, return_tensors="pt").to(self.device)

# Ensure input IDs remain in Long tensor type
input_ids = inputs["input_ids"].to(self.device)

# Generate spectrogram
output_dict = self.TTS_model(input_ids, return_dict=True)
spectrogram = output_dict["spectrogram"]

# Convert spectrogram to waveform
waveform = self.TTS_hifigan(spectrogram)

sf.write(output_file_name, waveform.squeeze().detach().cpu().numpy(), samplerate=22050)
return output_file_name

Expand Down
4 changes: 1 addition & 3 deletions runner/app/routes/text_to_speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ class HTTPError(BaseModel):
}

class TextToSpeechParams(BaseModel):
# TODO: Make model_id and other None properties optional once Go codegen tool
# supports OAPI 3.1 https://github.com/deepmap/oapi-codegen/issues/373
text_input: Annotated[str, Form()] = ""
model_id: str = ""

Expand Down Expand Up @@ -61,7 +59,7 @@ async def text_to_speech(
)

if os.path.exists(result):
return FileResponse(path=result, media_type='audio/mp4', filename="generated_audio.mp4")
return FileResponse(path=result, media_type='audio/mp4', filename="generated_audio.mp4")
else:
return JSONResponse(
status_code=400,
Expand Down
4 changes: 2 additions & 2 deletions runner/dev/Dockerfile.debug
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ FROM livepeer/ai-runner:base
RUN pip install debugpy

# Expose the debugpy port and start the app as usual.
CMD ["python", "-m", "debugpy", "--listen", "0.0.0.0:5678", "-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
# CMD ["python", "-m", "debugpy", "--listen", "0.0.0.0:5678", "-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]

# If you want to wait for the debugger to attach before starting the app, use the --wait-for-client option.
# CMD ["python", "-m", "debugpy", "--listen", "0.0.0.0:5678", "--wait-for-client", "-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
CMD ["python", "-m", "debugpy", "--listen", "0.0.0.0:5678", "--wait-for-client", "-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
75 changes: 51 additions & 24 deletions runner/dev/patches/debug.patch
Original file line number Diff line number Diff line change
@@ -1,25 +1,52 @@
diff --git a/worker/docker.go b/worker/docker.go
index e7dcca1..7ad026a 100644
--- a/worker/docker.go
+++ b/worker/docker.go
@@ -148,6 +148,7 @@ func (m *DockerManager) createContainer(ctx context.Context, pipeline string, mo
},
ExposedPorts: nat.PortSet{
containerPort: struct{}{},
+ "5678/tcp": struct{}{},
},
Labels: map[string]string{
containerCreatorLabel: containerCreator,
@@ -176,6 +177,12 @@ func (m *DockerManager) createContainer(ctx context.Context, pipeline string, mo
HostPort: containerHostPort,
},
},
+ "5678/tcp": []nat.PortBinding{
+ {
+ HostIP: "0.0.0.0",
+ HostPort: "5678",
+ },
+ },
},
}
--- app/pipelines/text_to_speech.py 2024-08-02 20:39:18.658448901 +0000
+++ app/pipelines/text_to_speech_updated.py 2024-08-02 20:39:02.304028206 +0000
@@ -12,21 +12,21 @@
class TextToSpeechPipeline(Pipeline):
def __init__(self, model_id: str):
self.model_id = model_id
- # kwargs = {"cache_dir": get_model_dir()}
+ if os.getenv("MOCK_PIPELINE", "").strip().lower() == "true":
+ logger.info("Mocking TextToSpeechPipeline for %s", model_id)
+ return

- # folder_name = file_download.repo_folder_name(
- # repo_id=model_id, repo_type="model"
- # )
- # folder_path = os.path.join(get_model_dir(), folder_name)
self.device = get_torch_device()
- # preload FastSpeech 2 & hifigan
self.TTS_tokenizer = FastSpeech2ConformerTokenizer.from_pretrained("espnet/fastspeech2_conformer", cache_dir=get_model_dir())
self.TTS_model = FastSpeech2ConformerModel.from_pretrained("espnet/fastspeech2_conformer", cache_dir=get_model_dir()).to(self.device)
self.TTS_hifigan = FastSpeech2ConformerHifiGan.from_pretrained("espnet/fastspeech2_conformer_hifigan", cache_dir=get_model_dir()).to(self.device)

-
def __call__(self, text):
- # generate unique filename
+ if os.getenv("MOCK_PIPELINE", "").strip().lower() == "true":
+ unique_audio_filename = f"{uuid.uuid4()}.wav"
+ audio_path = os.path.join("/tmp/", unique_audio_filename)
+ sf.write(audio_path, [0] * 22050, samplerate=22050)
+ return audio_path
unique_audio_filename = f"{uuid.uuid4()}.wav"
audio_path = os.path.join("/tmp/", unique_audio_filename)

@@ -35,19 +35,11 @@
return audio_path

def generate_audio(self, text, output_file_name):
- # Tokenize input text
inputs = self.TTS_tokenizer(text, return_tensors="pt").to(self.device)
-
- # Ensure input IDs remain in Long tensor type
input_ids = inputs["input_ids"].to(self.device)
-
- # Generate spectrogram
output_dict = self.TTS_model(input_ids, return_dict=True)
spectrogram = output_dict["spectrogram"]
-
- # Convert spectrogram to waveform
waveform = self.TTS_hifigan(spectrogram)
-
sf.write(output_file_name, waveform.squeeze().detach().cpu().numpy(), samplerate=22050)
return output_file_name

0 comments on commit 036678d

Please sign in to comment.