Skip to content

Commit c01b4cc

Browse files
committed
InputStreamにタイムスタンプ管理を追加し、STT結果出力にタイムスタンプ情報を含めるよう実装
1 parent facb568 commit c01b4cc

File tree

1 file changed

+36
-5
lines changed

1 file changed

+36
-5
lines changed

src/plugin.rs

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,15 @@ impl<'text, 'raw> TryFrom<nojson::RawJsonValue<'text, 'raw>> for SpeechToTextPlu
4444
struct InputStream {
4545
_stream_id: MediaStreamId, // TODO
4646
chunk: Vec<i16>, // (left, right)...
47+
timestamp: Duration,
4748
}
4849

4950
impl InputStream {
5051
fn new(stream_id: MediaStreamId) -> Self {
5152
Self {
5253
_stream_id: stream_id,
5354
chunk: Vec::new(),
55+
timestamp: Duration::MAX, // sentinel value
5456
}
5557
}
5658

@@ -104,6 +106,8 @@ impl SpeechToTextPlugin {
104106
fn execute_stt(
105107
&mut self,
106108
input_stream_id: MediaStreamId,
109+
timestamp: Duration,
110+
duration: Duration,
107111
chunk: Vec<i16>,
108112
) -> orfail::Result<()> {
109113
let request_id: usize = 0;
@@ -141,7 +145,18 @@ impl SpeechToTextPlugin {
141145
.or_fail()?;
142146
let obj = JsonObject::new(result).or_fail()?;
143147
let text: String = obj.get_required("text").or_fail()?;
144-
todo!()
148+
149+
//
150+
let output = nojson::object(|f| {
151+
f.member("stream_id", input_stream_id.get())?;
152+
f.member("timestamp", timestamp.as_secs_f32())?;
153+
f.member("duration", duration.as_secs_f32())?;
154+
f.member("text", &text)?;
155+
Ok(())
156+
});
157+
writeln!(self.output_file, "{output}").or_fail()?;
158+
159+
Ok(())
145160
}
146161
}
147162

@@ -157,18 +172,34 @@ impl MediaProcessor for SpeechToTextPlugin {
157172
fn process_input(&mut self, input: MediaProcessorInput) -> orfail::Result<()> {
158173
if let Some(sample) = input.sample {
159174
let input_stream = self.input_streams.get_mut(&input.stream_id).or_fail()?;
175+
160176
let data = sample.expect_audio_data().or_fail()?;
177+
if input_stream.timestamp == Duration::MAX {
178+
input_stream.timestamp = data.timestamp;
179+
}
180+
161181
input_stream
162182
.chunk
163183
.extend(data.interleaved_stereo_samples().or_fail()?);
164-
if input_stream.duration() >= self.chunk_duration {
184+
let chunk_timestamp = input_stream.timestamp;
185+
let chunk_duration = input_stream.duration();
186+
input_stream.timestamp += chunk_duration;
187+
188+
if chunk_duration >= self.chunk_duration {
165189
let chunk = std::mem::take(&mut input_stream.chunk);
166-
self.execute_stt(input.stream_id, chunk).or_fail()?;
190+
self.execute_stt(input.stream_id, chunk_timestamp, chunk_duration, chunk)
191+
.or_fail()?;
167192
}
168193
} else {
169194
let input_stream = self.input_streams.remove(&input.stream_id).or_fail()?;
170-
self.execute_stt(input.stream_id, input_stream.chunk)
171-
.or_fail()?;
195+
let chunk_duration = input_stream.duration();
196+
self.execute_stt(
197+
input.stream_id,
198+
input_stream.timestamp,
199+
chunk_duration,
200+
input_stream.chunk,
201+
)
202+
.or_fail()?;
172203
}
173204
Ok(())
174205
}

0 commit comments

Comments
 (0)