Add thread lock to glue model tokenizer call and update demos version.

jameswex · LIT team · commit f347fad01a84 · 2021-12-20T12:00:19.000-08:00
Fixes race condition when using multiple interpreters in separate threads of LIT server with same model.

PiperOrigin-RevId: 417452193
diff --git a/lit_nlp/examples/models/glue_models.py b/lit_nlp/examples/models/glue_models.py
@@ -3,6 +3,7 @@
 
 import os
 import re
+import threading
 from typing import Optional, Dict, List, Iterable
 
 import attr
@@ -58,6 +59,7 @@ def __init__(self,
                **config_kw):
     self.config = GlueModelConfig(**config_kw)
     self._load_model(model_name_or_path)
+    self._lock = threading.Lock()
 
   def _load_model(self, model_name_or_path):
     """Load model. Can be overridden for testing."""
@@ -76,8 +78,9 @@ def _load_model(self, model_name_or_path):
         config=model_config)
 
   def _get_tokens(self, ex: JsonDict, field_name: str) -> List[str]:
-    return (ex.get("tokens_" + field_name) or
-            self.tokenizer.tokenize(ex[field_name]))
+    with self._lock:
+      return (ex.get("tokens_" + field_name) or
+              self.tokenizer.tokenize(ex[field_name]))
 
   def _preprocess(self, inputs: Iterable[JsonDict]) -> Dict[str, tf.Tensor]:
     # Use pretokenized input if available.