Skip to content

Commit 5867fa1

Browse files
fix: whisper transcription test use github url + update test (#8455)
* adding audio file * changing URL * updating tests * temporary removing failing test * updating tests * removing failing test * typo * linting * fixing URL * updating tests
1 parent a50593e commit 5867fa1

File tree

2 files changed

+43
-3
lines changed

2 files changed

+43
-3
lines changed

test/components/audio/test_whisper_local.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,3 +204,22 @@ def test_whisper_local_transcriber(self, test_files_path):
204204
assert docs[2].content.strip().lower() == "answer."
205205
# meta.audio_file should contain the temp path where we dumped the audio bytes
206206
assert docs[2].meta["audio_file"]
207+
208+
@pytest.mark.integration
209+
@pytest.mark.skipif(sys.platform in ["win32", "cygwin"], reason="ffmpeg not installed on Windows CI")
210+
def test_whisper_local_transcriber_pipeline_and_url_source(self):
211+
pipe = Pipeline()
212+
pipe.add_component("fetcher", LinkContentFetcher())
213+
pipe.add_component("transcriber", LocalWhisperTranscriber(model="tiny"))
214+
215+
pipe.connect("fetcher", "transcriber")
216+
result = pipe.run(
217+
data={
218+
"fetcher": {
219+
"urls": [
220+
"https://github.com/deepset-ai/haystack/raw/refs/heads/main/test/test_files/audio/MLK_Something_happening.mp3" # noqa: E501
221+
]
222+
}
223+
}
224+
)
225+
assert "masses of people" in result["transcriber"]["documents"][0].content

test/components/audio/test_whisper_remote.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import pytest
66

77
from haystack import Pipeline
8-
from haystack.components.audio import LocalWhisperTranscriber
98
from haystack.components.audio.whisper_remote import RemoteWhisperTranscriber
109
from haystack.components.fetchers import LinkContentFetcher
1110
from haystack.dataclasses import ByteStream
@@ -100,7 +99,7 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch):
10099
},
101100
}
102101

103-
def test_from_dict_with_defualt_parameters(self, monkeypatch):
102+
def test_from_dict_with_default_parameters(self, monkeypatch):
104103
monkeypatch.setenv("OPENAI_API_KEY", "test_api_key")
105104

106105
data = {
@@ -147,7 +146,7 @@ def test_from_dict_with_custom_init_parameters(self, monkeypatch):
147146
"temperature": "0.5",
148147
}
149148

150-
def test_from_dict_with_defualt_parameters_no_env_var(self, monkeypatch):
149+
def test_from_dict_with_default_parameters_no_env_var(self, monkeypatch):
151150
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
152151

153152
data = {
@@ -189,3 +188,25 @@ def test_whisper_remote_transcriber(self, test_files_path):
189188
assert str(test_files_path / "audio" / "the context for this answer is here.wav") == docs[1].meta["file_path"]
190189

191190
assert docs[2].content.strip().lower() == "answer."
191+
192+
@pytest.mark.skipif(
193+
not os.environ.get("OPENAI_API_KEY", None),
194+
reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.",
195+
)
196+
@pytest.mark.integration
197+
def test_whisper_remote_transcriber_pipeline_and_url_source(self):
198+
pipe = Pipeline()
199+
pipe.add_component("fetcher", LinkContentFetcher())
200+
pipe.add_component("transcriber", RemoteWhisperTranscriber())
201+
202+
pipe.connect("fetcher", "transcriber")
203+
result = pipe.run(
204+
data={
205+
"fetcher": {
206+
"urls": [
207+
"https://github.com/deepset-ai/haystack/raw/refs/heads/main/test/test_files/audio/MLK_Something_happening.mp3"
208+
] # noqa: E501
209+
}
210+
}
211+
)
212+
assert "masses of people" in result["transcriber"]["documents"][0].content

0 commit comments

Comments
 (0)