Skip to content

Commit 8523ba9

Browse files
committed
Add url resolving for ML backends using HOSTNAME passed from /setup.
Better support for NeMo ASR.
1 parent 020ccb0 commit 8523ba9

File tree

7 files changed

+56
-20
lines changed

7 files changed

+56
-20
lines changed

label_studio/examples/audio_transcribe/config.xml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,9 @@
22
<View>
33
<Header value="Listen to the audio and write the transcription" />
44
<AudioPlus name="audio" value="$audio" />
5-
<TextArea name="transcription" toName="audio"
5+
<TextArea name="transcription" toName="audio" editable="true"
66
rows="4" transcription="true" maxSubmissions="1" />
77

8-
98
<Style>
109
[dataneedsupdate]>div:first-child{flex-grow:1;order:2}
1110
[dataneedsupdate]>div:last-child{margin-top:0 !important;margin-right:1em}

label_studio/ml/api.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ def _setup():
4343
project = data.get('project')
4444
schema = data.get('schema')
4545
force_reload = data.get('force_reload', False)
46-
model = _manager.fetch(project, schema, force_reload)
46+
hostname = data.get('hostname', '') # host name for uploaded files and building urls
47+
model = _manager.fetch(project, schema, force_reload, hostname=hostname)
4748
logger.debug('Fetch model version: {}'.format(model.model_version))
4849
return jsonify({'model_version': model.model_version})
4950

label_studio/ml/examples/nemo/README.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,21 @@ With ASR models, you can do audio pre-annotations drawn within a text area, aka
2020
After this app starts on the default 9090 port, configure the template for ASR:
2121
1. In Label Studio, open the project settings page.
2222
2. From the templates list, select `Speech Transcription`. You can also create your own with `<TextArea>` and `<Audio>` tags.
23+
24+
Or copy this labeling config into LS:
25+
```
26+
<View>
27+
<Header value="Listen to the audio and write the transcription" />
28+
<AudioPlus name="audio" value="$audio" />
29+
<TextArea name="transcription" toName="audio" editable="true"
30+
rows="4" transcription="true" maxSubmissions="1" />
31+
32+
33+
<Style>
34+
[dataneedsupdate]>div:first-child{flex-grow:1;order:2}
35+
[dataneedsupdate]>div:last-child{margin-top:0 !important;margin-right:1em}
36+
</Style>
37+
</View>
38+
```
39+
40+
> Note: The NeMo engine downloads models automatically. This can take some time and could cause Label Studio UI to hang on the Model page while the models download.

label_studio/ml/model.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616

1717
from label_studio.utils.misc import parse_config
1818

19-
2019
logger = logging.getLogger(__name__)
2120

2221

@@ -34,6 +33,7 @@ def __init__(self, label_config=None, train_output=None, **kwargs):
3433
self.label_config = label_config
3534
self.parsed_label_config = parse_config(self.label_config)
3635
self.train_output = train_output or {}
36+
self.hostname = kwargs.get('hostname', '')
3737

3838
@abstractmethod
3939
def predict(self, tasks, **kwargs):
@@ -42,6 +42,10 @@ def predict(self, tasks, **kwargs):
4242
def fit(self, completions, workdir=None, **kwargs):
4343
return {}
4444

45+
def get_local_path(self, url, project_dir=None):
46+
from label_studio.ml.utils import get_local_path
47+
return get_local_path(url, project_dir=project_dir, hostname=self.hostname)
48+
4549

4650
class LabelStudioMLManager(object):
4751

label_studio/ml/server.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,3 +120,7 @@ def main():
120120
create_dir(args)
121121
elif args.command == 'start':
122122
start_server(args, subargs)
123+
124+
125+
if __name__ == '__main__':
126+
main()

label_studio/ml/utils.py

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ def get_choice(completion):
7676

7777

7878
def get_image_local_path(url, image_cache_dir=None, project_dir=None):
79+
return get_local_path(url, image_cache_dir, project_dir)
80+
81+
82+
def get_local_path(url, cache_dir=None, project_dir=None, hostname=None):
7983
is_local_file = url.startswith('/data/') and '?d=' in url
8084
is_uploaded_file = url.startswith('/data/upload')
8185

@@ -86,27 +90,32 @@ def get_image_local_path(url, image_cache_dir=None, project_dir=None):
8690
filepath = os.path.join(dir_path, filename)
8791
if not os.path.exists(filepath):
8892
raise FileNotFoundError(filepath)
93+
return filepath
8994

9095
# File uploaded via import UI
91-
elif is_uploaded_file:
92-
if not project_dir or not os.path.exists(project_dir):
96+
elif is_uploaded_file and project_dir is not None:
97+
if not os.path.exists(project_dir):
9398
raise FileNotFoundError(
9499
"Can't find uploaded file by URL {url}: you need to pass a valid project_dir".format(url=url))
95100
filepath = os.path.join(project_dir, 'upload', os.path.basename(url))
101+
return filepath
102+
103+
elif is_uploaded_file and hostname:
104+
url = hostname + url
105+
logger.info('Resolving url using hostname [' + hostname + '] from LSB: ' + url)
96106

97107
# File specified by remote URL - download and cache it
98-
else:
99-
image_cache_dir = image_cache_dir or get_cache_dir()
100-
parsed_url = urlparse(url)
101-
url_filename = os.path.basename(parsed_url.path)
102-
url_hash = hashlib.md5(url.encode()).hexdigest()[:6]
103-
filepath = os.path.join(image_cache_dir, url_hash + '__' + url_filename)
104-
if not os.path.exists(filepath):
105-
logger.info('Download {url} to {filepath}'.format(url=url, filepath=filepath))
106-
r = requests.get(url, stream=True)
107-
r.raise_for_status()
108-
with io.open(filepath, mode='wb') as fout:
109-
fout.write(r.content)
108+
cache_dir = cache_dir or get_cache_dir()
109+
parsed_url = urlparse(url)
110+
url_filename = os.path.basename(parsed_url.path)
111+
url_hash = hashlib.md5(url.encode()).hexdigest()[:6]
112+
filepath = os.path.join(cache_dir, url_hash + '__' + url_filename)
113+
if not os.path.exists(filepath):
114+
logger.info('Download {url} to {filepath}'.format(url=url, filepath=filepath))
115+
r = requests.get(url, stream=True)
116+
r.raise_for_status()
117+
with io.open(filepath, mode='wb') as fout:
118+
fout.write(r.content)
110119
return filepath
111120

112121

label_studio/utils/models.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from requests.adapters import HTTPAdapter
1818
from .io import get_data_dir
1919
from .exceptions import ValidationError
20-
from .functions import _LABEL_CONFIG_SCHEMA_DATA
20+
from .functions import _LABEL_CONFIG_SCHEMA_DATA, get_external_hostname
2121

2222
DEFAULT_PROJECT_ID = 1
2323
logger = logging.getLogger(__name__)
@@ -376,7 +376,8 @@ def setup(self, project):
376376
"""
377377
return self._post('setup', request={
378378
'project': self._create_project_uid(project),
379-
'schema': project.label_config_line
379+
'schema': project.label_config_line,
380+
'hostname': get_external_hostname()
380381
})
381382

382383
def delete(self, project):

0 commit comments

Comments
 (0)