Skip to content

avoid using stt-align-node! #101

@Niceblueman

Description

@Niceblueman

Hi! 👋

Firstly, thanks for your work on this project! 🙂

Today I used patch-package to patch [email protected] for the project I'm working on.

Here is the diff that solved my problem:

diff --git a/node_modules/slate-transcript-editor/util/export-adapters/slate-to-dpe/update-timestamps/plain-text-align-to-slate.js b/node_modules/slate-transcript-editor/util/export-adapters/slate-to-dpe/update-timestamps/plain-text-align-to-slate.js
index be190f0..25a9686 100644
--- a/node_modules/slate-transcript-editor/util/export-adapters/slate-to-dpe/update-timestamps/plain-text-align-to-slate.js
+++ b/node_modules/slate-transcript-editor/util/export-adapters/slate-to-dpe/update-timestamps/plain-text-align-to-slate.js
@@ -1,8 +1,81 @@
-import { alignSTT } from 'stt-align-node';
+// import { alignSTT } from 'stt-align-node';
 import { shortTimecode } from '../../../timecode-converter';
 import countWords from '../../../count-words';
 import generatePreviousTimingsUpToCurrent from '../../../dpe-to-slate/generate-previous-timings-up-to-current';
+function alignSTT(sttWords, transcriptText, start, end) {
+  const sttWordsList = sttWords.words;
+  const opCodes = calculateDiff(sttWordsList, transcriptText);
+  const transcriptWords = convertRefTextToList(transcriptText);
+  const alignedResults = alignRefTextWithSTT(
+    opCodes,
+    sttWordsList,
+    transcriptWords,
+    start,
+    end
+  );
+  return alignedResults;
+}
+
+// Function to calculate the difference between two arrays of words
+function calculateDiff(array1, array2) {
+  const opCodes = [];
+
+  // Iterate over the arrays and find the differences
+  let i = 0;
+  let j = 0;
+
+  while (i < array1.length && j < array2.length) {
+    if (array1[i] === array2[j]) {
+      opCodes.push(['equal', i, i + 1, j, j + 1]);
+      i++;
+      j++;
+    } else {
+      opCodes.push(['delete', i, i + 1, j, j]);
+      i++;
+    }
+  }
 
+  // Handle remaining elements in array1
+  while (i < array1.length) {
+    opCodes.push(['delete', i, i + 1, j, j]);
+    i++;
+  }
+
+  // Handle remaining elements in array2
+  while (j < array2.length) {
+    opCodes.push(['insert', i, i, j, j + 1]);
+    j++;
+  }
+
+  return opCodes;
+}
+
+// Function to convert a text string to a list of words
+function convertRefTextToList(text) {
+  return text.split(/\s+/);
+}
+
+// Function to align reference text with STT output based on calculated diff
+function alignRefTextWithSTT(opCodes, sttWords, refWords, start, end) {
+  let alignedResults = [];
+
+  for (const op of opCodes) {
+    const [tag, i1, i2, j1, j2] = op;
+
+    if (tag === 'equal') {
+      alignedResults.push(...sttWords.slice(i1, i2));
+    } else if (tag === 'delete') {
+      alignedResults.push(...Array(i2 - i1).fill(''));
+    } else if (tag === 'insert') {
+      alignedResults.push(...refWords.slice(j1, j2));
+    }
+  }
+
+  // Trim the result based on the specified start and end indices
+  alignedResults = alignedResults.slice(start, end);
+
+  return alignedResults;
+}
 const createSlateContentFromSlateJsParagraphs = (currentContent, newEntities) => {
   // Update entites to block structure.
   const updatedBlockArray = [];

This issue body was partially generated by patch-package.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions