Skip to content

Commit d9ef940

Browse files
authored
Port all Speech-to-Text code to TypeScript (#225)
* Clean up * Port all STT to TypeScript * Add entry * Fix ESLint * Fix ESLint
1 parent ccf35da commit d9ef940

File tree

9 files changed

+184
-110
lines changed

9 files changed

+184
-110
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
1515

1616
### Changed
1717

18-
- 💥 Modernized some code with TypeScript, more type-aligned to W3C Speech API, and moved to official Event Target API, in PR [#220](https://github.com/compulim/web-speech-cognitive-services/pull/220) and [#224](https://github.com/compulim/web-speech-cognitive-services/pull/224)
18+
- 💥 Modernized some code with TypeScript, more type-aligned to W3C Speech API, and moved to official Event Target API, in PR [#220](https://github.com/compulim/web-speech-cognitive-services/pull/220), [#224](https://github.com/compulim/web-speech-cognitive-services/pull/224) and [#225](https://github.com/compulim/web-speech-cognitive-services/pull/225)
1919
- `SpeechRecognitionResult` and `SpeechRecognitionResultList` is now a array-like object, use `Array.from()` to convert them into an array
2020
- Updated build tools and added named exports via CJS/ESM
2121
- Bumped dependencies, in PR [#216](https://github.com/compulim/web-speech-cognitive-services/pull/216) and [#218](https://github.com/compulim/web-speech-cognitive-services/issues/218)
Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@
44
/* eslint no-empty-function: "off" */
55
/* eslint no-magic-numbers: ["error", { "ignore": [0, 100, 150] }] */
66

7-
import patchOptions from '../patchOptions';
7+
import patchOptions, { type PatchOptionsInit } from '../patchOptions';
88
import SpeechSDK from '../SpeechSDK';
99
import createSpeechRecognitionPonyfillFromRecognizer from './createSpeechRecognitionPonyfillFromRecognizer';
1010

1111
const { AudioConfig, OutputFormat, SpeechConfig, SpeechRecognizer } = SpeechSDK;
1212

13-
export default function createSpeechRecognitionPonyfill(options) {
13+
export default function createSpeechRecognitionPonyfill(options: PatchOptionsInit) {
1414
const {
1515
audioConfig = AudioConfig.fromDefaultMicrophoneInput(),
1616

@@ -33,23 +33,26 @@ export default function createSpeechRecognitionPonyfill(options) {
3333
return {};
3434
}
3535

36-
const createRecognizer = async lang => {
37-
const { authorizationToken, region, speechRecognitionHostname, subscriptionKey } = await fetchCredentials();
36+
const createRecognizer = async (lang: string) => {
37+
const credentials = await fetchCredentials();
3838
let speechConfig;
3939

40-
if (speechRecognitionHostname) {
41-
const host = { hostname: speechRecognitionHostname, port: 443, protocol: 'wss:' };
40+
if (typeof credentials.speechRecognitionHostname !== 'undefined') {
41+
const host = new URL('wss://hostname:443');
4242

43-
if (authorizationToken) {
43+
host.hostname = credentials.speechRecognitionHostname;
44+
45+
if (credentials.authorizationToken) {
4446
speechConfig = SpeechConfig.fromHost(host);
45-
speechConfig.authorizationToken = authorizationToken;
47+
speechConfig.authorizationToken = credentials.authorizationToken;
4648
} else {
47-
speechConfig = SpeechConfig.fromHost(host, subscriptionKey);
49+
speechConfig = SpeechConfig.fromHost(host, credentials.subscriptionKey);
4850
}
4951
} else {
50-
speechConfig = authorizationToken
51-
? SpeechConfig.fromAuthorizationToken(authorizationToken, region)
52-
: SpeechConfig.fromSubscription(subscriptionKey, region);
52+
speechConfig =
53+
typeof credentials.authorizationToken !== 'undefined'
54+
? SpeechConfig.fromAuthorizationToken(credentials.authorizationToken, credentials.region)
55+
: SpeechConfig.fromSubscription(credentials.subscriptionKey, credentials.region);
5356
}
5457

5558
if (speechRecognitionEndpointId) {
@@ -66,7 +69,6 @@ export default function createSpeechRecognitionPonyfill(options) {
6669
};
6770

6871
return createSpeechRecognitionPonyfillFromRecognizer({
69-
audioConfig,
7072
createRecognizer,
7173
enableTelemetry,
7274
looseEvents,

packages/web-speech-cognitive-services/src/SpeechServices/SpeechToText/createSpeechRecognitionPonyfillFromRecognizer.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,15 @@ import createPromiseQueue from '../../Util/createPromiseQueue';
1616
import SpeechSDK from '../SpeechSDK';
1717
import cognitiveServiceEventResultToWebSpeechRecognitionResult from './cognitiveServiceEventResultToWebSpeechRecognitionResult';
1818
import cognitiveServicesAsyncToPromise from './cognitiveServicesAsyncToPromise';
19+
import EventListenerMap from './private/EventListenerMap';
20+
import prepareAudioConfig from './private/prepareAudioConfig';
21+
import serializeRecognitionResult from './private/serializeRecognitionResult';
1922
import SpeechGrammarList from './SpeechGrammarList';
2023
import SpeechRecognitionErrorEvent from './SpeechRecognitionErrorEvent';
2124
import SpeechRecognitionEvent from './SpeechRecognitionEvent';
2225
import { type SpeechRecognitionEventListenerMap } from './SpeechRecognitionEventListenerMap';
2326
import type SpeechRecognitionResult from './SpeechRecognitionResult';
2427
import SpeechRecognitionResultList from './SpeechRecognitionResultList';
25-
import EventListenerMap from './private/EventListenerMap';
26-
import prepareAudioConfig from './private/prepareAudioConfig';
27-
import serializeRecognitionResult from './private/serializeRecognitionResult';
2828

2929
// https://docs.microsoft.com/en-us/javascript/api/microsoft-cognitiveservices-speech-sdk/speechconfig?view=azure-node-latest#outputformat
3030
// {
@@ -54,7 +54,7 @@ type CreateSpeechRecognitionPonyfillFromRecognizerInit = {
5454
createRecognizer: (lang: string) => Promise<SpeechRecognizerType>;
5555
enableTelemetry: boolean;
5656
looseEvents: boolean;
57-
referenceGrammars: [];
57+
referenceGrammars?: readonly string[] | undefined;
5858
textNormalization: 'display' | 'itn' | 'lexical' | 'maskeditn';
5959
};
6060

@@ -355,7 +355,7 @@ export default function createSpeechRecognitionPonyfillFromRecognizer({
355355
// HACK: We are using the internal of SpeechRecognizer because they did not expose it
356356
const { dynamicGrammar } = recognizer['privReco'];
357357

358-
referenceGrammars && referenceGrammars.length && dynamicGrammar.addReferenceGrammar(referenceGrammars);
358+
referenceGrammars && referenceGrammars.length && dynamicGrammar.addReferenceGrammar([...referenceGrammars]);
359359
phrases && phrases.length && dynamicGrammar.addPhrase([...phrases]);
360360

361361
await cognitiveServicesAsyncToPromise<void>(recognizer.startContinuousRecognitionAsync, recognizer)();

packages/web-speech-cognitive-services/src/SpeechServices/SpeechToText/private/prepareAudioConfig.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@ import {
66
import averageAmplitude from './averageAmplitude';
77

88
export default function prepareAudioConfig(audioConfig: AudioConfig) {
9-
const audioConfigImpl = audioConfig as AudioConfigImpl; // HACK: Need internals of AudioConfig.
9+
// Speech SDK also force cast AudioConfig to AudioConfigImpl and pass it to ServiceRecognizerBase to use attach() and other methods.
10+
// https://github.com/microsoft/cognitive-services-speech-sdk-js/blob/a6e9d2a202534565ccc97650861a6b296de48ecf/src/sdk/SpeechRecognizer.ts#L291C27-L291C43
11+
const audioConfigImpl = audioConfig as AudioConfigImpl;
1012
const originalAttach = audioConfigImpl.attach;
1113
const boundOriginalAttach = audioConfigImpl.attach.bind(audioConfigImpl);
1214
let firstChunk = false;

packages/web-speech-cognitive-services/src/SpeechServices/fetchAuthorizationToken.js renamed to packages/web-speech-cognitive-services/src/SpeechServices/fetchAuthorizationToken.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,14 @@
11
const TOKEN_URL_TEMPLATE = 'https://{region}.api.cognitive.microsoft.com/sts/v1.0/issueToken';
22

3-
export default async function ({ region, subscriptionKey }) {
3+
type FetchAuthorizationTokenInit = {
4+
region: string;
5+
subscriptionKey: string;
6+
};
7+
8+
export default async function fetchAuthorizationToken({
9+
region,
10+
subscriptionKey
11+
}: FetchAuthorizationTokenInit): Promise<string> {
412
const res = await fetch(TOKEN_URL_TEMPLATE.replace(/\{region\}/u, region), {
513
headers: {
614
'Ocp-Apim-Subscription-Key': subscriptionKey

packages/web-speech-cognitive-services/src/SpeechServices/patchOptions.js

Lines changed: 0 additions & 89 deletions
This file was deleted.
Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
import { type AudioConfig } from 'microsoft-cognitiveservices-speech-sdk';
2+
import resolveFunctionOrReturnValue from './resolveFunctionOrReturnValue';
3+
4+
let shouldWarnOnSubscriptionKey = true;
5+
6+
export type Credentials = Readonly<
7+
(
8+
| { authorizationToken: string; subscriptionKey?: undefined }
9+
| { authorizationToken?: undefined; subscriptionKey: string }
10+
) &
11+
(
12+
| {
13+
customVoiceHostname?: undefined;
14+
region: string;
15+
speechRecognitionHostname?: undefined;
16+
speechSynthesisHostname?: undefined;
17+
}
18+
| {
19+
customVoiceHostname: string;
20+
region?: undefined;
21+
speechRecognitionHostname: string;
22+
speechSynthesisHostname: string;
23+
}
24+
)
25+
>;
26+
27+
export type PatchOptionsInit = {
28+
audioConfig: AudioConfig;
29+
credentials?: (() => Credentials | Promise<Credentials>) | Credentials | Promise<Credentials>;
30+
enableTelemetry: boolean;
31+
looseEvent?: boolean | undefined;
32+
looseEvents?: boolean | undefined;
33+
referenceGrammars?: readonly string[] | undefined;
34+
region?: string | undefined;
35+
speechRecognitionEndpointId: string;
36+
textNormalization: 'display' | 'itn' | 'lexical' | 'maskeditn';
37+
} & (
38+
| {
39+
authorizationToken: string;
40+
subscriptionKey?: undefined;
41+
}
42+
| {
43+
authorizationToken?: undefined;
44+
subscriptionKey: string;
45+
}
46+
);
47+
48+
type PatchedOptions = Readonly<{
49+
audioConfig: AudioConfig;
50+
enableTelemetry: boolean;
51+
fetchCredentials: () => Promise<Credentials>;
52+
looseEvents: boolean;
53+
referenceGrammars: readonly string[] | undefined;
54+
speechRecognitionEndpointId: string | undefined;
55+
textNormalization: 'display' | 'itn' | 'lexical' | 'maskeditn';
56+
}>;
57+
58+
export default function patchOptions(init: PatchOptionsInit): PatchedOptions {
59+
const {
60+
audioConfig,
61+
authorizationToken,
62+
enableTelemetry,
63+
looseEvent,
64+
referenceGrammars,
65+
region = 'westus',
66+
speechRecognitionEndpointId,
67+
subscriptionKey,
68+
textNormalization
69+
} = init;
70+
71+
let { credentials, looseEvents } = init;
72+
73+
if (typeof looseEvent !== 'undefined') {
74+
console.warn('web-speech-cognitive-services: The option "looseEvent" should be named as "looseEvents".');
75+
76+
looseEvents = looseEvent;
77+
}
78+
79+
if (!credentials) {
80+
if (!authorizationToken && !subscriptionKey) {
81+
throw new Error('web-speech-cognitive-services: Credentials must be specified.');
82+
} else {
83+
console.warn(
84+
'web-speech-cognitive-services: We are deprecating authorizationToken, region, and subscriptionKey. Please use credentials instead. The deprecated option will be removed on or after 2020-11-14.'
85+
);
86+
87+
credentials = async () =>
88+
typeof init.authorizationToken !== 'undefined'
89+
? { authorizationToken: await resolveFunctionOrReturnValue<string>(init.authorizationToken), region }
90+
: { region, subscriptionKey: await resolveFunctionOrReturnValue<string>(init.subscriptionKey) };
91+
}
92+
}
93+
94+
return Object.freeze({
95+
audioConfig,
96+
enableTelemetry,
97+
fetchCredentials: async () => {
98+
const {
99+
authorizationToken,
100+
customVoiceHostname,
101+
region,
102+
speechRecognitionHostname,
103+
speechSynthesisHostname,
104+
subscriptionKey
105+
} = await resolveFunctionOrReturnValue<Credentials>(credentials);
106+
107+
if ((!authorizationToken && !subscriptionKey) || (authorizationToken && subscriptionKey)) {
108+
throw new Error(
109+
'web-speech-cognitive-services: Either "authorizationToken" or "subscriptionKey" must be provided.'
110+
);
111+
} else if (!region && !(speechRecognitionHostname && speechSynthesisHostname)) {
112+
throw new Error(
113+
'web-speech-cognitive-services: Either "region" or "speechRecognitionHostname" and "speechSynthesisHostname" must be set.'
114+
);
115+
} else if (region && (customVoiceHostname || speechRecognitionHostname || speechSynthesisHostname)) {
116+
throw new Error(
117+
'web-speech-cognitive-services: Only either "region" or "customVoiceHostname", "speechRecognitionHostname" and "speechSynthesisHostname" can be set.'
118+
);
119+
} else if (authorizationToken) {
120+
if (typeof authorizationToken !== 'string') {
121+
throw new Error('web-speech-cognitive-services: "authorizationToken" must be a string.');
122+
}
123+
} else if (typeof subscriptionKey !== 'string') {
124+
throw new Error('web-speech-cognitive-services: "subscriptionKey" must be a string.');
125+
}
126+
127+
if (shouldWarnOnSubscriptionKey && subscriptionKey) {
128+
console.warn(
129+
'web-speech-cognitive-services: In production environment, subscription key should not be used, authorization token should be used instead.'
130+
);
131+
132+
shouldWarnOnSubscriptionKey = false;
133+
}
134+
135+
return {
136+
...(typeof authorizationToken !== 'undefined' ? { authorizationToken } : { subscriptionKey }),
137+
...(typeof region !== 'undefined'
138+
? { region }
139+
: {
140+
customVoiceHostname,
141+
speechRecognitionHostname,
142+
speechSynthesisHostname
143+
})
144+
} satisfies Credentials;
145+
},
146+
looseEvents: !!looseEvents,
147+
referenceGrammars: referenceGrammars && Object.freeze([...referenceGrammars]),
148+
speechRecognitionEndpointId,
149+
textNormalization
150+
});
151+
}

0 commit comments

Comments
 (0)