Skip to content

Commit 7338ff8

Browse files
committed
use Effect hashmap for current active segment tracking
1 parent 8d3a392 commit 7338ff8

File tree

2 files changed

+47
-32
lines changed

2 files changed

+47
-32
lines changed

apps/desktop/src/utils/segment.test.ts

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { describe, expect, test } from "vitest";
2-
import { buildSegments } from "./segment";
2+
import { buildSegments, SegmentKey } from "./segment";
33

44
describe("buildSegments", () => {
55
const testCases = [
@@ -21,14 +21,14 @@ describe("buildSegments", () => {
2121
],
2222
expected: [
2323
expect.objectContaining({
24-
key: { channel: 0 },
24+
key: SegmentKey.Channel({ channel: 0 }),
2525
words: [
2626
expect.objectContaining({ text: "1", isFinal: true }),
2727
expect.objectContaining({ text: "2", isFinal: false }),
2828
],
2929
}),
3030
expect.objectContaining({
31-
key: { channel: 1 },
31+
key: SegmentKey.Channel({ channel: 1 }),
3232
words: [
3333
expect.objectContaining({ text: "3", isFinal: false }),
3434
expect.objectContaining({ text: "4", isFinal: false }),
@@ -47,14 +47,14 @@ describe("buildSegments", () => {
4747
],
4848
expected: [
4949
expect.objectContaining({
50-
key: { channel: 0 },
50+
key: SegmentKey.Channel({ channel: 0 }),
5151
words: [
5252
expect.objectContaining({ text: "1" }),
5353
expect.objectContaining({ text: "2" }),
5454
],
5555
}),
5656
expect.objectContaining({
57-
key: { channel: 1 },
57+
key: SegmentKey.Channel({ channel: 1 }),
5858
words: [
5959
expect.objectContaining({ text: "3" }),
6060
],
@@ -72,7 +72,7 @@ describe("buildSegments", () => {
7272
],
7373
expected: [
7474
expect.objectContaining({
75-
key: { channel: 0 },
75+
key: SegmentKey.Channel({ channel: 0 }),
7676
words: [
7777
expect.objectContaining({ text: "1" }),
7878
expect.objectContaining({ text: "2" }),
@@ -91,15 +91,15 @@ describe("buildSegments", () => {
9191
partialWords: [],
9292
expected: [
9393
expect.objectContaining({
94-
key: { channel: 0 },
94+
key: SegmentKey.Channel({ channel: 0 }),
9595
words: [expect.objectContaining({ text: "first" })],
9696
}),
9797
expect.objectContaining({
98-
key: { channel: 1 },
98+
key: SegmentKey.Channel({ channel: 1 }),
9999
words: [expect.objectContaining({ text: "other" })],
100100
}),
101101
expect.objectContaining({
102-
key: { channel: 0 },
102+
key: SegmentKey.Channel({ channel: 0 }),
103103
words: [expect.objectContaining({ text: "return" })],
104104
}),
105105
],
@@ -113,7 +113,7 @@ describe("buildSegments", () => {
113113
partialWords: [],
114114
expected: [
115115
expect.objectContaining({
116-
key: { channel: 0 },
116+
key: SegmentKey.Channel({ channel: 0 }),
117117
words: [
118118
expect.objectContaining({ text: "first" }),
119119
expect.objectContaining({ text: "second" }),
@@ -130,9 +130,18 @@ describe("buildSegments", () => {
130130
],
131131
partialWords: [],
132132
expected: [
133-
expect.objectContaining({ key: { channel: 0 }, words: [expect.objectContaining({ text: "a" })] }),
134-
expect.objectContaining({ key: { channel: 1 }, words: [expect.objectContaining({ text: "b" })] }),
135-
expect.objectContaining({ key: { channel: 2 }, words: [expect.objectContaining({ text: "c" })] }),
133+
expect.objectContaining({
134+
key: SegmentKey.Channel({ channel: 0 }),
135+
words: [expect.objectContaining({ text: "a" })],
136+
}),
137+
expect.objectContaining({
138+
key: SegmentKey.Channel({ channel: 1 }),
139+
words: [expect.objectContaining({ text: "b" })],
140+
}),
141+
expect.objectContaining({
142+
key: SegmentKey.Channel({ channel: 2 }),
143+
words: [expect.objectContaining({ text: "c" })],
144+
}),
136145
],
137146
},
138147
{
@@ -143,7 +152,7 @@ describe("buildSegments", () => {
143152
partialWords: [],
144153
expected: [
145154
expect.objectContaining({
146-
key: { channel: 0 },
155+
key: SegmentKey.Channel({ channel: 0 }),
147156
words: [expect.objectContaining({ text: "only", isFinal: true })],
148157
}),
149158
],
@@ -160,15 +169,15 @@ describe("buildSegments", () => {
160169
partialWords: [],
161170
expected: [
162171
expect.objectContaining({
163-
key: { channel: 0 },
172+
key: SegmentKey.Channel({ channel: 0 }),
164173
words: [
165174
expect.objectContaining({ text: "a1" }),
166175
expect.objectContaining({ text: "a2" }),
167176
expect.objectContaining({ text: "a3" }),
168177
],
169178
}),
170179
expect.objectContaining({
171-
key: { channel: 1 },
180+
key: SegmentKey.Channel({ channel: 1 }),
172181
words: [
173182
expect.objectContaining({ text: "b1" }),
174183
expect.objectContaining({ text: "b2" }),

apps/desktop/src/utils/segment.ts

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import { Data, HashMap, Option } from "effect";
2+
13
export type WordLike = {
24
text: string;
35
start_ms: number;
@@ -14,7 +16,11 @@ export type Segment<TWord extends SegmentWord = SegmentWord> = {
1416
words: TWord[];
1517
};
1618

17-
type SegmentKey = { channel: number } | { speaker_index: number; channel: number };
19+
export type SegmentKey = Data.TaggedEnum<{
20+
Channel: { channel: number };
21+
}>;
22+
23+
export const SegmentKey = Data.taggedEnum<SegmentKey>();
1824

1925
export function buildSegments<
2026
TFinal extends WordLike,
@@ -38,41 +44,41 @@ export function buildSegments<
3844
channel: word.channel,
3945
isFinal: false,
4046
})),
41-
];
47+
].sort((a, b) => a.start_ms - b.start_ms);
4248

4349
return createSpeakerTurns(allWords);
4450
}
4551

46-
function createSpeakerTurns<TWord extends SegmentWord>(
47-
words: TWord[],
48-
maxGapMs = 2000,
49-
): Segment<TWord>[] {
52+
function createSpeakerTurns<TWord extends SegmentWord>(words: TWord[]): Segment<TWord>[] {
53+
const MAX_GAP_MS = 2000;
54+
5055
if (words.length === 0) {
5156
return [];
5257
}
5358

54-
const sortedWords = [...words].sort((a, b) => a.start_ms - b.start_ms);
5559
const segments: Segment<TWord>[] = [];
56-
const currentByChannel = new Map<number, Segment<TWord>>();
60+
let currentActiveSegment = HashMap.empty<SegmentKey, Segment<TWord>>();
5761

58-
for (const word of sortedWords) {
59-
const current = currentByChannel.get(word.channel);
62+
for (const word of words) {
63+
const key = SegmentKey.Channel({ channel: word.channel });
64+
const currentOption = HashMap.get(currentActiveSegment, key);
6065

61-
if (!current) {
62-
const newSegment = { key: { channel: word.channel }, words: [word] };
63-
currentByChannel.set(word.channel, newSegment);
66+
if (Option.isNone(currentOption)) {
67+
const newSegment = { key, words: [word] };
68+
currentActiveSegment = HashMap.set(currentActiveSegment, key, newSegment);
6469
segments.push(newSegment);
6570
continue;
6671
}
6772

73+
const current = currentOption.value;
6874
const lastWord = current.words[current.words.length - 1];
6975
const gap = word.start_ms - lastWord.end_ms;
7076

71-
if (gap <= maxGapMs) {
77+
if (gap <= MAX_GAP_MS) {
7278
current.words.push(word);
7379
} else {
74-
const newSegment = { key: { channel: word.channel }, words: [word] };
75-
currentByChannel.set(word.channel, newSegment);
80+
const newSegment = { key, words: [word] };
81+
currentActiveSegment = HashMap.set(currentActiveSegment, key, newSegment);
7682
segments.push(newSegment);
7783
}
7884
}

0 commit comments

Comments
 (0)