Skip to content

Commit

Permalink
Add compression_ratio_hallucination_threshold
Browse files Browse the repository at this point in the history
Add compression_ratio_hallucination_threshold to Discard High Compression Ratio Segments in transcribe()

#2420
  • Loading branch information
Ko4ka authored Nov 1, 2024
1 parent 5979f03 commit bb8c475
Showing 1 changed file with 12 additions and 2 deletions.
14 changes: 12 additions & 2 deletions whisper/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,11 @@ def transcribe(
verbose: Optional[bool] = None,
temperature: Union[float, Tuple[float, ...]] = (0.0, 0.2, 0.4, 0.6, 0.8, 1.0),
compression_ratio_threshold: Optional[float] = 2.4,
compression_ratio_halucination_threshold: Optional[float] = 3,
logprob_threshold: Optional[float] = -1.0,
no_speech_threshold: Optional[float] = 0.6,
condition_on_previous_text: bool = True,
initial_prompt: Optional[str] = None,
carry_initial_prompt: bool = False,
word_timestamps: bool = False,
prepend_punctuations: str = "\"'“¿([{-",
append_punctuations: str = "\"'.。,,!!??::”)]}、",
Expand Down Expand Up @@ -76,6 +76,9 @@ def transcribe(
compression_ratio_threshold: float
If the gzip compression ratio is above this value, treat as failed
compression_ratio_halcination_threshold: float
If the gzip compression ratio is above this value after all attempts to decode, treat as a halucination and skip
logprob_threshold: float
If the average log probability over sampled tokens is below this value, treat as failed
Expand Down Expand Up @@ -205,7 +208,7 @@ def decode_with_fallback(segment: torch.Tensor) -> DecodingResult:
compression_ratio_threshold is not None
and decode_result.compression_ratio > compression_ratio_threshold
):
needs_fallback = True # too repetitive
needs_fallback = True # too repetitive <-- We can inprove it...
if (
logprob_threshold is not None
and decode_result.avg_logprob < logprob_threshold
Expand All @@ -216,6 +219,13 @@ def decode_with_fallback(segment: torch.Tensor) -> DecodingResult:
and decode_result.no_speech_prob > no_speech_threshold
):
needs_fallback = False # silence
if (
compression_ratio_halucination_threshold is not None
and decode_result.compression_ratio > compression_ratio_halucination_threshold
and t == temperatures[-1]
):
# Discard the segment
continue # Skip to the next segment
if not needs_fallback:
break

Expand Down

0 comments on commit bb8c475

Please sign in to comment.