You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
# skip silence before any possible hallucination that is surrounded
423
+
# by silence or more hallucinations
424
+
hal_last_end=last_speech_timestamp
425
+
forsiinrange(len(current_segments)):
426
+
segment=current_segments[si]
427
+
ifnotsegment["words"]:
428
+
continue
429
+
ifis_segment_anomaly(segment):
430
+
next_segment=next_words_segment(
431
+
current_segments[si+1 :]
432
+
)
433
+
ifnext_segmentisnotNone:
434
+
hal_next_start=next_segment["words"][0]["start"]
435
+
else:
436
+
hal_next_start=time_offset+segment_duration
437
+
silence_before= (
438
+
segment["start"] -hal_last_end>threshold
439
+
orsegment["start"] <threshold
440
+
orsegment["start"] -time_offset<2.0
441
+
)
442
+
silence_after= (
443
+
hal_next_start-segment["end"] >threshold
444
+
oris_segment_anomaly(next_segment)
445
+
orwindow_end_time-segment["end"] <2.0
446
+
)
447
+
ifsilence_beforeandsilence_after:
448
+
seek=round(
449
+
max(time_offset+1, segment["start"])
450
+
*FRAMES_PER_SECOND
451
+
)
452
+
ifcontent_duration-segment["end"] <threshold:
453
+
seek=content_frames
454
+
current_segments[si:] = []
455
+
break
456
+
hal_last_end=segment["end"]
457
+
458
+
last_word_end=get_end(current_segments)
459
+
iflast_word_endisnotNone:
460
+
last_speech_timestamp=last_word_end
344
461
345
462
ifverbose:
346
463
forsegmentincurrent_segments:
@@ -427,6 +544,8 @@ def valid_model_name(name):
427
544
parser.add_argument("--max_line_count", type=optional_int, default=None, help="(requires --word_timestamps True) the maximum number of lines in a segment")
428
545
parser.add_argument("--max_words_per_line", type=optional_int, default=None, help="(requires --word_timestamps True, no effect with --max_line_width) the maximum number of words in a segment")
429
546
parser.add_argument("--threads", type=optional_int, default=0, help="number of threads used by torch for CPU inference; supercedes MKL_NUM_THREADS/OMP_NUM_THREADS")
547
+
parser.add_argument("--clip_timestamps", type=str, default="0", help="comma-separated list start,end,start,end,... timestamps (in seconds) of clips to process, where the last end timestamp defaults to the end of the file")
548
+
parser.add_argument("--hallucination_silence_threshold", type=optional_float, help="(requires --word_timestamps True) skip silent periods longer than this threshold (in seconds) when a possible hallucination is detected")
0 commit comments