Skip to content

Commit 6448564

Browse files
committed
Default name updates
1 parent 6653546 commit 6448564

File tree

10 files changed

+35
-34
lines changed

10 files changed

+35
-34
lines changed

python/sparknlp/annotator/embeddings/nomic_embeddings.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ class NomicEmbeddings(AnnotatorModel, HasEmbeddingsProperties, HasCaseSensitiveP
3131
... .setOutputCol("nomic_embeddings")
3232
3333
34-
The default model is ``"nomic_small"``, if no name is provided.
34+
The default model is ``"nomic_embed_v1"``, if no name is provided.
3535
3636
For available pretrained models please see the
3737
`Models Hub <https://sparknlp.org/models?q=Nomic>`__.
@@ -159,13 +159,13 @@ def loadSavedModel(folder, spark_session, use_openvino=False):
159159
return NomicEmbeddings(java_model=jModel)
160160

161161
@staticmethod
162-
def pretrained(name="nomic_small", lang="en", remote_loc=None):
162+
def pretrained(name="nomic_embed_v1", lang="en", remote_loc=None):
163163
"""Downloads and loads a pretrained model.
164164
165165
Parameters
166166
----------
167167
name : str, optional
168-
Name of the pretrained model, by default "nomic_small"
168+
Name of the pretrained model, by default "nomic_embed_v1"
169169
lang : str, optional
170170
Language of the pretrained model, by default "en"
171171
remote_loc : str, optional

python/sparknlp/annotator/seq2seq/cpm_transformer.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class CPMTransformer(AnnotatorModel, HasBatchedAnnotate, HasEngine):
4444
... .setOutputCol("generation")
4545
4646
47-
The default model is ``"llam2-7b"``, if no name is provided. For available
47+
The default model is ``"mini_cpm_2b_8bit"``, if no name is provided. For available
4848
pretrained models please see the `Models Hub
4949
<https://sparknlp.org/models?q=cpm>`__.
5050
@@ -104,7 +104,7 @@ class CPMTransformer(AnnotatorModel, HasBatchedAnnotate, HasEngine):
104104
>>> documentAssembler = DocumentAssembler() \\
105105
... .setInputCol("text") \\
106106
... .setOutputCol("documents")
107-
>>> cpm = CPMTransformer.pretrained("llama_2_7b_chat_hf_int4") \\
107+
>>> cpm = CPMTransformer.pretrained("mini_cpm_2b_8bit","xx") \\
108108
... .setInputCols(["documents"]) \\
109109
... .setMaxOutputLength(50) \\
110110
... .setOutputCol("generation")
@@ -299,15 +299,15 @@ def loadSavedModel(folder, spark_session, use_openvino = False):
299299
return CPMTransformer(java_model=jModel)
300300

301301
@staticmethod
302-
def pretrained(name="llama_2_7b_chat_hf_int4", lang="en", remote_loc=None):
302+
def pretrained(name="mini_cpm_2b_8bit", lang="xx", remote_loc=None):
303303
"""Downloads and loads a pretrained model.
304304
305305
Parameters
306306
----------
307307
name : str, optional
308-
Name of the pretrained model, by default "llama_2_7b_chat_hf_int4"
308+
Name of the pretrained model, by default "mini_cpm_2b_8bit"
309309
lang : str, optional
310-
Language of the pretrained model, by default "en"
310+
Language of the pretrained model, by default "xx"
311311
remote_loc : str, optional
312312
Optional remote address of the resource, by default None. Will use
313313
Spark NLPs repositories otherwise.

python/sparknlp/annotator/seq2seq/nllb_transformer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ class NLLBTransformer(AnnotatorModel, HasBatchedAnnotate, HasEngine):
3232
... .setOutputCol("generation")
3333
3434
35-
The default model is ``"nllb_418M"``, if no name is provided. For available
35+
The default model is ``"nllb_distilled_600M_8int"``, if no name is provided. For available
3636
pretrained models please see the `Models Hub
3737
<https://sparknlp.org/models?q=nllb>`__.
3838
@@ -164,7 +164,7 @@ class NLLBTransformer(AnnotatorModel, HasBatchedAnnotate, HasEngine):
164164
>>> documentAssembler = DocumentAssembler() \\
165165
... .setInputCol("text") \\
166166
... .setOutputCol("documents")
167-
>>> nllb = NLLBTransformer.pretrained("nllb_418M") \\
167+
>>> nllb = NLLBTransformer.pretrained("nllb_distilled_600M_8int") \\
168168
... .setInputCols(["documents"]) \\
169169
... .setMaxOutputLength(50) \\
170170
... .setOutputCol("generation") \\
@@ -398,13 +398,13 @@ def loadSavedModel(folder, spark_session, use_openvino=False):
398398
return NLLBTransformer(java_model=jModel)
399399

400400
@staticmethod
401-
def pretrained(name="nllb_418M", lang="xx", remote_loc=None):
401+
def pretrained(name="nllb_distilled_600M_8int", lang="xx", remote_loc=None):
402402
"""Downloads and loads a pretrained model.
403403
404404
Parameters
405405
----------
406406
name : str, optional
407-
Name of the pretrained model, by default "nllb_418M"
407+
Name of the pretrained model, by default "nllb_distilled_600M_8int"
408408
lang : str, optional
409409
Language of the pretrained model, by default "en"
410410
remote_loc : str, optional

python/sparknlp/annotator/seq2seq/phi3_transformer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class Phi3Transformer(AnnotatorModel, HasBatchedAnnotate, HasEngine):
3737
... .setOutputCol("generation")
3838
3939
40-
The default model is ``"phi3"``, if no name is provided. For available
40+
The default model is ``phi_3_mini_128k_instruct``, if no name is provided. For available
4141
pretrained models please see the `Models Hub
4242
<https://sparknlp.org/models?q=phi3>`__.
4343
@@ -112,7 +112,7 @@ class Phi3Transformer(AnnotatorModel, HasBatchedAnnotate, HasEngine):
112112
>>> documentAssembler = DocumentAssembler() \\
113113
... .setInputCol("text") \\
114114
... .setOutputCol("documents")
115-
>>> phi3 = Phi3Transformer.pretrained("phi3") \\
115+
>>> phi3 = Phi3Transformer.pretrained(phi_3_mini_128k_instruct) \\
116116
... .setInputCols(["documents"]) \\
117117
... .setMaxOutputLength(50) \\
118118
... .setOutputCol("generation")
@@ -308,13 +308,13 @@ def loadSavedModel(folder, spark_session, use_openvino=False):
308308
return Phi3Transformer(java_model=jModel)
309309

310310
@staticmethod
311-
def pretrained(name="phi3", lang="en", remote_loc=None):
311+
def pretrained(name=phi_3_mini_128k_instruct, lang="en", remote_loc=None):
312312
"""Downloads and loads a pretrained model.
313313
314314
Parameters
315315
----------
316316
name : str, optional
317-
Name of the pretrained model, by default "phi3"
317+
Name of the pretrained model, by default phi_3_mini_128k_instruct
318318
lang : str, optional
319319
Language of the pretrained model, by default "en"
320320
remote_loc : str, optional

python/sparknlp/annotator/seq2seq/qwen_transformer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ class QwenTransformer(AnnotatorModel, HasBatchedAnnotate, HasEngine):
121121
>>> documentAssembler = DocumentAssembler() \\
122122
... .setInputCol("text") \\
123123
... .setOutputCol("documents")
124-
>>> qwen = QwenTransformer.pretrained("qwen-7b") \\
124+
>>> qwen = QwenTransformer.pretrained("qwen_7.5b_chat") \\
125125
... .setInputCols(["documents"]) \\
126126
... .setMaxOutputLength(50) \\
127127
... .setOutputCol("generation")
@@ -317,13 +317,13 @@ def loadSavedModel(folder, spark_session, use_openvino=False):
317317
return QwenTransformer(java_model=jModel)
318318

319319
@staticmethod
320-
def pretrained(name="qwen-7b", lang="en", remote_loc=None):
320+
def pretrained(name="qwen_7.5b_chat", lang="en", remote_loc=None):
321321
"""Downloads and loads a pretrained model.
322322
323323
Parameters
324324
----------
325325
name : str, optional
326-
Name of the pretrained model, by default "qwen-7b"
326+
Name of the pretrained model, by default "qwen_7.5b_chat"
327327
lang : str, optional
328328
Language of the pretrained model, by default "en"
329329
remote_loc : str, optional

src/main/scala/com/johnsnowlabs/nlp/annotators/seq2seq/CPMTransformer.scala

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ import org.json4s.jackson.JsonMethods._
6868
* .setInputCols("document")
6969
* .setOutputCol("generation")
7070
* }}}
71-
* The default model is `"llama_2_7b_chat_hf_int4"`, if no name is provided. For available
71+
* The default model is `"mini_cpm_2b_8bit"`, if no name is provided. For available
7272
* pretrained models please see the [[https://sparknlp.org/models?q=cpm Models Hub]].
7373
*
7474
* For extended examples of usage, see
@@ -94,7 +94,7 @@ import org.json4s.jackson.JsonMethods._
9494
* .setInputCol("text")
9595
* .setOutputCol("documents")
9696
*
97-
* val cpm = CPMTransformer.pretrained("llama_2_7b_chat_hf_int4")
97+
* val cpm = CPMTransformer.pretrained("mini_cpm_2b_8bit")
9898
* .setInputCols(Array("documents"))
9999
* .setMinOutputLength(10)
100100
* .setMaxOutputLength(50)
@@ -311,7 +311,8 @@ class CPMTransformer(override val uid: String)
311311
trait ReadablePretrainedCPMTransformerModel
312312
extends ParamsAndFeaturesReadable[CPMTransformer]
313313
with HasPretrained[CPMTransformer] {
314-
override val defaultModelName: Some[String] = Some("llama_2_7b_chat_hf_int4")
314+
override val defaultModelName: Some[String] = Some("mini_cpm_2b_8bit")
315+
override val defaultLang: String = "xx"
315316

316317
/** Java compliant-overrides */
317318
override def pretrained(): CPMTransformer = super.pretrained()

src/main/scala/com/johnsnowlabs/nlp/annotators/seq2seq/NLLBTransformer.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ import org.json4s.jackson.JsonMethods._
5959
* .setInputCols("document")
6060
* .setOutputCol("generation")
6161
* }}}
62-
* The default model is `"nllb_418M"`, if no name is provided. For available pretrained models
62+
* The default model is `"nllb_distilled_600M_8int"`, if no name is provided. For available pretrained models
6363
* please see the [[https://sparknlp.org/models?q=nllb Models Hub]].
6464
*
6565
* For extended examples of usage, see
@@ -156,7 +156,7 @@ import org.json4s.jackson.JsonMethods._
156156
* .setInputCol("text")
157157
* .setOutputCol("documents")
158158
*
159-
* val nllb = NLLBTransformer.pretrained("nllb_418M")
159+
* val nllb = NLLBTransformer.pretrained("nllb_distilled_600M_8int")
160160
* .setInputCols(Array("documents"))
161161
* .setSrcLang("zho_Hans")
162162
* .serTgtLang("eng_Latn")
@@ -635,7 +635,7 @@ class NLLBTransformer(override val uid: String)
635635
trait ReadablePretrainedNLLBTransformerModel
636636
extends ParamsAndFeaturesReadable[NLLBTransformer]
637637
with HasPretrained[NLLBTransformer] {
638-
override val defaultModelName: Some[String] = Some("nllb_418M")
638+
override val defaultModelName: Some[String] = Some("nllb_distilled_600M_8int")
639639
override val defaultLang: String = "xx"
640640

641641
/** Java compliant-overrides */

src/main/scala/com/johnsnowlabs/nlp/annotators/seq2seq/Phi3Transformer.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ import org.json4s.jackson.JsonMethods._
6565
* .setInputCols("document")
6666
* .setOutputCol("generation")
6767
* }}}
68-
* The default model is `"phi_3_mini_128k_instruct_int8"`, if no name is provided. For available
68+
* The default model is `"phi_3_mini_128k_instruct"`, if no name is provided. For available
6969
* pretrained models please see the [[https://sparknlp.org/models?q=phi3 Models Hub]].
7070
*
7171
* For extended examples of usage, see
@@ -106,7 +106,7 @@ import org.json4s.jackson.JsonMethods._
106106
* .setInputCol("text")
107107
* .setOutputCol("documents")
108108
*
109-
* val phi3 = Phi3Transformer.pretrained("phi_3_mini_128k_instruct_int8")
109+
* val phi3 = Phi3Transformer.pretrained("phi_3_mini_128k_instruct")
110110
* .setInputCols(Array("documents"))
111111
* .setMinOutputLength(10)
112112
* .setMaxOutputLength(50)
@@ -323,7 +323,7 @@ class Phi3Transformer(override val uid: String)
323323
trait ReadablePretrainedPhi3TransformerModel
324324
extends ParamsAndFeaturesReadable[Phi3Transformer]
325325
with HasPretrained[Phi3Transformer] {
326-
override val defaultModelName: Some[String] = Some("phi_3_mini_128k_instruct_int8")
326+
override val defaultModelName: Some[String] = Some("phi_3_mini_128k_instruct")
327327

328328
/** Java compliant-overrides */
329329
override def pretrained(): Phi3Transformer = super.pretrained()

src/main/scala/com/johnsnowlabs/nlp/annotators/seq2seq/QwenTransformer.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ import org.json4s.jackson.JsonMethods._
6868
* .setInputCols("document")
6969
* .setOutputCol("generation")
7070
* }}}
71-
* The default model is `"Qwen-13b"`, if no name is provided. For available pretrained models
71+
* The default model is `"qwen_7.5b_chat"`, if no name is provided. For available pretrained models
7272
* please see the [[https://sparknlp.org/models?q=Qwen Models Hub]].
7373
*
7474
* For extended examples of usage, see
@@ -113,7 +113,7 @@ import org.json4s.jackson.JsonMethods._
113113
* .setInputCol("text")
114114
* .setOutputCol("documents")
115115
*
116-
* val Qwen = QwenTransformer.pretrained("Qwen-7b")
116+
* val Qwen = QwenTransformer.pretrained("qwen_7.5b_chat")
117117
* .setInputCols(Array("documents"))
118118
* .setMinOutputLength(10)
119119
* .setMaxOutputLength(50)
@@ -334,7 +334,7 @@ class QwenTransformer(override val uid: String)
334334
trait ReadablePretrainedQwenTransformerModel
335335
extends ParamsAndFeaturesReadable[QwenTransformer]
336336
with HasPretrained[QwenTransformer] {
337-
override val defaultModelName: Some[String] = Some("Qwen-7b")
337+
override val defaultModelName: Some[String] = Some("qwen_7.5b_chat")
338338

339339
/** Java compliant-overrides */
340340
override def pretrained(): QwenTransformer = super.pretrained()

src/main/scala/com/johnsnowlabs/nlp/embeddings/NomicEmbeddings.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ import com.johnsnowlabs.ml.openvino.{OpenvinoWrapper, ReadOpenvinoModel, WriteOp
4949
* .setInputCols("document")
5050
* .setOutputCol("nomic_embeddings")
5151
* }}}
52-
* The default model is `"nomic_small"`, if no name is provided.
52+
* The default model is `"nomic_embed_v1"`, if no name is provided.
5353
*
5454
* For available pretrained models please see the
5555
* [[https://sparknlp.org/models?q=NomicEmbeddings Models Hub]].
@@ -86,7 +86,7 @@ import com.johnsnowlabs.ml.openvino.{OpenvinoWrapper, ReadOpenvinoModel, WriteOp
8686
* .setInputCol("text")
8787
* .setOutputCol("document")
8888
*
89-
* val embeddings = NomicEmbeddings.pretrained("nomic_small", "en")
89+
* val embeddings = NomicEmbeddings.pretrained("nomic_embed_v1", "en")
9090
* .setInputCols("document")
9191
* .setOutputCol("nomic_embeddings")
9292
*
@@ -357,7 +357,7 @@ class NomicEmbeddings(override val uid: String)
357357
trait ReadablePretrainedNomicEmbeddingsModel
358358
extends ParamsAndFeaturesReadable[NomicEmbeddings]
359359
with HasPretrained[NomicEmbeddings] {
360-
override val defaultModelName: Some[String] = Some("nomic_small")
360+
override val defaultModelName: Some[String] = Some("nomic_embed_v1")
361361

362362
/** Java compliant-overrides */
363363
override def pretrained(): NomicEmbeddings = super.pretrained()

0 commit comments

Comments
 (0)