debug WIP

livepeer · Aug 2, 2024 · 036678d · 036678d
1 parent a07620e
commit 036678d
Show file tree

Hide file tree

Showing 4 changed files with 62 additions and 45 deletions.
diff --git a/runner/app/pipelines/text_to_speech.py b/runner/app/pipelines/text_to_speech.py
@@ -12,21 +12,21 @@
 class TextToSpeechPipeline(Pipeline):
     def __init__(self, model_id: str):
         self.model_id = model_id
-        # kwargs = {"cache_dir": get_model_dir()}
+        if os.getenv("MOCK_PIPELINE", "").strip().lower() == "true":
+            logger.info("Mocking TextToSpeechPipeline for %s", model_id)
+            return
 
-        # folder_name = file_download.repo_folder_name(
-        #     repo_id=model_id, repo_type="model"
-        # )
-        # folder_path = os.path.join(get_model_dir(), folder_name)
         self.device = get_torch_device()
-        # preload FastSpeech 2 & hifigan
         self.TTS_tokenizer = FastSpeech2ConformerTokenizer.from_pretrained("espnet/fastspeech2_conformer", cache_dir=get_model_dir())
         self.TTS_model = FastSpeech2ConformerModel.from_pretrained("espnet/fastspeech2_conformer", cache_dir=get_model_dir()).to(self.device)
         self.TTS_hifigan = FastSpeech2ConformerHifiGan.from_pretrained("espnet/fastspeech2_conformer_hifigan", cache_dir=get_model_dir()).to(self.device)
 
-
     def __call__(self, text):
-        # generate unique filename
+        if os.getenv("MOCK_PIPELINE", "").strip().lower() == "true":
+            unique_audio_filename = f"{uuid.uuid4()}.wav"
+            audio_path = os.path.join("/tmp/", unique_audio_filename)
+            sf.write(audio_path, [0] * 22050, samplerate=22050)
+            return audio_path
         unique_audio_filename = f"{uuid.uuid4()}.wav"
         audio_path = os.path.join("/tmp/", unique_audio_filename)
 
@@ -35,19 +35,11 @@ def __call__(self, text):
         return audio_path
 
     def generate_audio(self, text, output_file_name):
-        # Tokenize input text
         inputs = self.TTS_tokenizer(text, return_tensors="pt").to(self.device)
-
-        # Ensure input IDs remain in Long tensor type
         input_ids = inputs["input_ids"].to(self.device)
-
-        # Generate spectrogram
         output_dict = self.TTS_model(input_ids, return_dict=True)
         spectrogram = output_dict["spectrogram"]
-
-        # Convert spectrogram to waveform
         waveform = self.TTS_hifigan(spectrogram)
-
         sf.write(output_file_name, waveform.squeeze().detach().cpu().numpy(), samplerate=22050)
         return output_file_name
 

diff --git a/runner/app/routes/text_to_speech.py b/runner/app/routes/text_to_speech.py
@@ -26,8 +26,6 @@ class HTTPError(BaseModel):
 }
 
 class TextToSpeechParams(BaseModel):
-    # TODO: Make model_id and other None properties optional once Go codegen tool
-    # supports OAPI 3.1 https://github.com/deepmap/oapi-codegen/issues/373
     text_input: Annotated[str, Form()] = ""
     model_id: str = ""
 
@@ -61,7 +59,7 @@ async def text_to_speech(
         )
 
     if os.path.exists(result):
-            return FileResponse(path=result, media_type='audio/mp4', filename="generated_audio.mp4")
+        return FileResponse(path=result, media_type='audio/mp4', filename="generated_audio.mp4")
     else:
         return JSONResponse(
             status_code=400,

diff --git a/runner/dev/Dockerfile.debug b/runner/dev/Dockerfile.debug
@@ -5,7 +5,7 @@ FROM livepeer/ai-runner:base
 RUN pip install debugpy
 
 # Expose the debugpy port and start the app as usual.
-CMD ["python", "-m", "debugpy", "--listen", "0.0.0.0:5678", "-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
+# CMD ["python", "-m", "debugpy", "--listen", "0.0.0.0:5678", "-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
 
 # If you want to wait for the debugger to attach before starting the app, use the --wait-for-client option.
-# CMD ["python", "-m", "debugpy", "--listen", "0.0.0.0:5678", "--wait-for-client", "-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
+CMD ["python", "-m", "debugpy", "--listen", "0.0.0.0:5678", "--wait-for-client", "-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/runner/dev/patches/debug.patch b/runner/dev/patches/debug.patch
@@ -1,25 +1,52 @@
-diff --git a/worker/docker.go b/worker/docker.go
-index e7dcca1..7ad026a 100644
---- a/worker/docker.go
-+++ b/worker/docker.go
-@@ -148,6 +148,7 @@ func (m *DockerManager) createContainer(ctx context.Context, pipeline string, mo
- 		},
- 		ExposedPorts: nat.PortSet{
- 			containerPort: struct{}{},
-+			"5678/tcp":    struct{}{},
- 		},
- 		Labels: map[string]string{
- 			containerCreatorLabel: containerCreator,
-@@ -176,6 +177,12 @@ func (m *DockerManager) createContainer(ctx context.Context, pipeline string, mo
- 					HostPort: containerHostPort,
- 				},
- 			},
-+			"5678/tcp": []nat.PortBinding{
-+				{
-+					HostIP:   "0.0.0.0",
-+					HostPort: "5678",
-+				},
-+			},
- 		},
- 	}
+--- app/pipelines/text_to_speech.py	2024-08-02 20:39:18.658448901 +0000
++++ app/pipelines/text_to_speech_updated.py	2024-08-02 20:39:02.304028206 +0000
+@@ -12,21 +12,21 @@
+ class TextToSpeechPipeline(Pipeline):
+     def __init__(self, model_id: str):
+         self.model_id = model_id
+-        # kwargs = {"cache_dir": get_model_dir()}
++        if os.getenv("MOCK_PIPELINE", "").strip().lower() == "true":
++            logger.info("Mocking TextToSpeechPipeline for %s", model_id)
++            return
 
+-        # folder_name = file_download.repo_folder_name(
+-        #     repo_id=model_id, repo_type="model"
+-        # )
+-        # folder_path = os.path.join(get_model_dir(), folder_name)
+         self.device = get_torch_device()
+-        # preload FastSpeech 2 & hifigan
+         self.TTS_tokenizer = FastSpeech2ConformerTokenizer.from_pretrained("espnet/fastspeech2_conformer", cache_dir=get_model_dir())
+         self.TTS_model = FastSpeech2ConformerModel.from_pretrained("espnet/fastspeech2_conformer", cache_dir=get_model_dir()).to(self.device)
+         self.TTS_hifigan = FastSpeech2ConformerHifiGan.from_pretrained("espnet/fastspeech2_conformer_hifigan", cache_dir=get_model_dir()).to(self.device)
+
+-
+     def __call__(self, text):
+-        # generate unique filename
++        if os.getenv("MOCK_PIPELINE", "").strip().lower() == "true":
++            unique_audio_filename = f"{uuid.uuid4()}.wav"
++            audio_path = os.path.join("/tmp/", unique_audio_filename)
++            sf.write(audio_path, [0] * 22050, samplerate=22050)
++            return audio_path
+         unique_audio_filename = f"{uuid.uuid4()}.wav"
+         audio_path = os.path.join("/tmp/", unique_audio_filename)
+
+@@ -35,19 +35,11 @@
+         return audio_path
+
+     def generate_audio(self, text, output_file_name):
+-        # Tokenize input text
+         inputs = self.TTS_tokenizer(text, return_tensors="pt").to(self.device)
+-        
+-        # Ensure input IDs remain in Long tensor type
+         input_ids = inputs["input_ids"].to(self.device)
+-        
+-        # Generate spectrogram
+         output_dict = self.TTS_model(input_ids, return_dict=True)
+         spectrogram = output_dict["spectrogram"]
+-
+-        # Convert spectrogram to waveform
+         waveform = self.TTS_hifigan(spectrogram)
+-
+         sf.write(output_file_name, waveform.squeeze().detach().cpu().numpy(), samplerate=22050)
+         return output_file_name
+