Merge pull request #529 from JohnSnowLabs/206-release-candidate

saif-ellafi · web-flow · commit 3be5630e8e07 · 2019-05-29T20:24:26.000-03:00
Release candidate 2.0.6
diff --git a/CHANGELOG b/CHANGELOG
@@ -1,3 +1,16 @@
+========
+2.0.6
+========
+---------------
+Overview
+---------------
+Following the 2.0.5 (read notes below), this release fixes a bug when disabling contrib param in NerDLApproach on non-windows OS
+
+---------------
+Bugfixes
+---------------
+* Fixed NerDLApproach failing when training with setUseContrib(false)
+
 ========
 2.0.5
 ========
diff --git a/README.md b/README.md
@@ -40,7 +40,7 @@ Take a look at our official Spark NLP page: [http://nlp.johnsnowlabs.com/](http:
 
 ## Apache Spark Support
 
-Spark NLP *2.0.5* has been built on top of Apache Spark 2.4.3
+Spark NLP *2.0.6* has been built on top of Apache Spark 2.4.3
 
 Note that Spark is not retrocompatible with Spark 2.3.x, so models and environments might not work.
 
@@ -65,18 +65,18 @@ This library has been uploaded to the [spark-packages repository](https://spark-
 
 Benefit of spark-packages is that makes it available for both Scala-Java and Python
 
-To use the most recent version just add the `--packages JohnSnowLabs:spark-nlp:2.0.5` to you spark command
+To use the most recent version just add the `--packages JohnSnowLabs:spark-nlp:2.0.6` to you spark command
 
 ```sh
-spark-shell --packages JohnSnowLabs:spark-nlp:2.0.5
+spark-shell --packages JohnSnowLabs:spark-nlp:2.0.6
 ```
 
 ```sh
-pyspark --packages JohnSnowLabs:spark-nlp:2.0.5
+pyspark --packages JohnSnowLabs:spark-nlp:2.0.6
 ```
 
 ```sh
-spark-submit --packages JohnSnowLabs:spark-nlp:2.0.5
+spark-submit --packages JohnSnowLabs:spark-nlp:2.0.6
 ```
 
 This can also be used to create a SparkSession manually by using the `spark.jars.packages` option in both Python and Scala
@@ -144,7 +144,7 @@ Our package is deployed to maven central. In order to add this package as a depe
 <dependency>
     <groupId>com.johnsnowlabs.nlp</groupId>
     <artifactId>spark-nlp_2.11</artifactId>
-    <version>2.0.5</version>
+    <version>2.0.6</version>
 </dependency>
 ```
 
@@ -155,22 +155,22 @@ and
 <dependency>
     <groupId>com.johnsnowlabs.nlp</groupId>
     <artifactId>spark-nlp-ocr_2.11</artifactId>
-    <version>2.0.5</version>
+    <version>2.0.6</version>
 </dependency>
 ```
 
 ### SBT
 
 ```sbtshell
 // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp
-libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "2.0.5"
+libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "2.0.6"
 ```
 
 and
 
 ```sbtshell
 // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-ocr
-libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-ocr" % "2.0.5"
+libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-ocr" % "2.0.6"
 ```
 
 Maven Central: [https://mvnrepository.com/artifact/com.johnsnowlabs.nlp](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp)
@@ -184,7 +184,7 @@ Maven Central: [https://mvnrepository.com/artifact/com.johnsnowlabs.nlp](https:/
 If you installed pyspark through pip, you can install `spark-nlp` through pip as well.
 
 ```bash
-pip install spark-nlp==2.0.5
+pip install spark-nlp==2.0.6
 ```
 
 PyPI [spark-nlp package](https://pypi.org/project/spark-nlp/)
@@ -207,7 +207,7 @@ spark = SparkSession.builder \
     .master("local[4]")\
     .config("spark.driver.memory","4G")\
     .config("spark.driver.maxResultSize", "2G") \
-    .config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.0.5")\
+    .config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.0.6")\
     .config("spark.kryoserializer.buffer.max", "500m")\
     .getOrCreate()
 ```
@@ -221,7 +221,7 @@ Use either one of the following options
 * Add the following Maven Coordinates to the interpreter's library list
 
 ```bash
-com.johnsnowlabs.nlp:spark-nlp_2.11:2.0.5
+com.johnsnowlabs.nlp:spark-nlp_2.11:2.0.6
 ```
 
 * Add path to pre-built jar from [here](#pre-compiled-spark-nlp-and-spark-nlp-ocr) in the interpreter's library list making sure the jar is available to driver path
@@ -231,7 +231,7 @@ com.johnsnowlabs.nlp:spark-nlp_2.11:2.0.5
 Apart from previous step, install python module through pip
 
 ```bash
-pip install spark-nlp==2.0.5
+pip install spark-nlp==2.0.6
 ```
 
 Or you can install `spark-nlp` from inside Zeppelin by using Conda:
@@ -256,7 +256,7 @@ export PYSPARK_PYTHON=python3
 export PYSPARK_DRIVER_PYTHON=jupyter
 export PYSPARK_DRIVER_PYTHON_OPTS=notebook
 
-pyspark --packages JohnSnowLabs:spark-nlp:2.0.5
+pyspark --packages JohnSnowLabs:spark-nlp:2.0.6
 ```
 
 Alternatively, you can mix in using `--jars` option for pyspark + `pip install spark-nlp`
diff --git a/build.sbt b/build.sbt
@@ -16,7 +16,7 @@ if(is_gpu.equals("false")){
 
 organization:= "com.johnsnowlabs.nlp"
 
-version := "2.0.5"
+version := "2.0.6"
 
 scalaVersion in ThisBuild := scalaVer
 
@@ -178,7 +178,7 @@ assemblyMergeStrategy in assembly := {
 lazy val ocr = (project in file("ocr"))
   .settings(
     name := "spark-nlp-ocr",
-    version := "2.0.5",
+    version := "2.0.6",
 
     test in assembly := {},
 
diff --git a/docs/Gemfile.lock b/docs/Gemfile.lock
@@ -211,7 +211,7 @@ GEM
       sawyer (~> 0.8.0, >= 0.5.3)
     pathutil (0.16.2)
       forwardable-extended (~> 2.6)
-    public_suffix (2.0.5)
+    public_suffix (2.0.6)
     rb-fsevent (0.10.3)
     rb-inotify (0.10.0)
       ffi (~> 1.0)
diff --git a/docs/_layouts/landing.html b/docs/_layouts/landing.html
@@ -49,22 +49,22 @@ <h1>{{ _section.title }}</h1>
           <div class="cell cell--12 cell--lg-12" style="text-align: left; background-color: #2d2d2d; padding: 10px">
             {% highlight bash %}
 # Install Spark NLP from PyPI
-$ pip install spark-nlp==2.0.5
+$ pip install spark-nlp==2.0.6
 
 # Install Spark NLP from Anacodna/Conda
 $ conda install -c johnsnowlabs spark-nlp
 
 # Load Spark NLP with Spark Shell
-$ spark-shell --packages JohnSnowLabs:spark-nlp:2.0.5
+$ spark-shell --packages JohnSnowLabs:spark-nlp:2.0.6
 
 # Load Spark NLP with PySpark
-$ pyspark --packages JohnSnowLabs:spark-nlp:2.0.5
+$ pyspark --packages JohnSnowLabs:spark-nlp:2.0.6
 
 # Load Spark NLP with Spark Submit
-$ spark-submit --packages JohnSnowLabs:spark-nlp:2.0.5
+$ spark-submit --packages JohnSnowLabs:spark-nlp:2.0.6
 
 # Load Spark NLP as external JAR after comiling and bulding Spark NLP by `sbt assembly`
-$ spark-shell --jar spark-nlp-assembly-2.0.5
+$ spark-shell --jar spark-nlp-assembly-2.0.6
             {% endhighlight %}
           </div>
         </div>
diff --git a/docs/en/install.md b/docs/en/install.md
@@ -13,7 +13,7 @@ modify_date: "2019-05-16"
 If you installed pyspark through pip, you can install `spark-nlp` through pip as well.
 
 ```bash
-pip install spark-nlp==2.0.5
+pip install spark-nlp==2.0.6
 ```
 
 PyPI [spark-nlp package](https://pypi.org/project/spark-nlp/)
@@ -36,7 +36,7 @@ spark = SparkSession.builder \
     .master("local[*]")\
     .config("spark.driver.memory","8G")\
     .config("spark.driver.maxResultSize", "2G") \
-    .config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.0.5")\
+    .config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.0.6")\
     .config("spark.kryoserializer.buffer.max", "500m")\
     .getOrCreate()
 ```
@@ -97,7 +97,7 @@ Our package is deployed to maven central. In order to add this package as a depe
 <dependency>
     <groupId>com.johnsnowlabs.nlp</groupId>
     <artifactId>spark-nlp_2.11</artifactId>
-    <version>2.0.5</version>
+    <version>2.0.6</version>
 </dependency>
 ```
 
@@ -108,22 +108,22 @@ and
 <dependency>
     <groupId>com.johnsnowlabs.nlp</groupId>
     <artifactId>spark-nlp-ocr_2.11</artifactId>
-    <version>2.0.5</version>
+    <version>2.0.6</version>
 </dependency>
 ```
 
 ### SBT
 
 ```sbtshell
 // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp
-libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "2.0.5"
+libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "2.0.6"
 ```
 
 and
 
 ```sbtshell
 // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-ocr
-libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-ocr" % "2.0.5"
+libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-ocr" % "2.0.6"
 ```
 
 Maven Central: [https://mvnrepository.com/artifact/com.johnsnowlabs.nlp](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp)
@@ -151,7 +151,7 @@ Note: You can import these notebooks by using their URLs.
 4- From the Source drop-down menu, select **Maven Coordinate:**
 ![Databricks](https://databricks.com/wp-content/uploads/2015/07/select-maven-1024x711.png)
 
-5- Now, all available **Spark Packages** are at your fingertips! Just search for **JohnSnowLabs:spark-nlp:version** where **version** stands for the library version such as: `1.8.4` or `2.0.5`
+5- Now, all available **Spark Packages** are at your fingertips! Just search for **JohnSnowLabs:spark-nlp:version** where **version** stands for the library version such as: `1.8.4` or `2.0.6`
 ![Databricks](https://databricks.com/wp-content/uploads/2015/07/browser-1024x548.png)
 
 6- Select **spark-nlp** package and we are good to go!
diff --git a/docs/en/quickstart.md b/docs/en/quickstart.md
@@ -29,17 +29,17 @@ Spark NLP is built on top of **Apache Spark 2.4.0** and such is the **only** sup
 To start using the library, execute any of the following lines depending on your desired use case:
 
 ```bash
-spark-shell --packages JohnSnowLabs:spark-nlp:2.0.5
-pyspark --packages JohnSnowLabs:spark-nlp:2.0.5
-spark-submit --packages JohnSnowLabs:spark-nlp:2.0.5
+spark-shell --packages JohnSnowLabs:spark-nlp:2.0.6
+pyspark --packages JohnSnowLabs:spark-nlp:2.0.6
+spark-submit --packages JohnSnowLabs:spark-nlp:2.0.6
 ```
 
 ### **Straight forward Python on jupyter notebook**
 
 Use pip to install (after you pip installed numpy and pyspark)
 
 ```bash
-pip install spark-nlp==2.0.5
+pip install spark-nlp==2.0.6
 jupyter notebook
 ```
 
@@ -60,7 +60,7 @@ spark = SparkSession.builder \
     .appName('OCR Eval') \
     .config("spark.driver.memory", "6g") \
     .config("spark.executor.memory", "6g") \
-    .config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.0.5") \
+    .config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.0.6") \
     .getOrCreate()
 ```
 
@@ -69,13 +69,13 @@ spark = SparkSession.builder \
 Add the following maven coordinates in the dependency configuration page:
 
 ```bash
-com.johnsnowlabs.nlp:spark-nlp_2.11:2.0.5
+com.johnsnowlabs.nlp:spark-nlp_2.11:2.0.6
 ```
 
 For Python in **Apache Zeppelin** you may need to setup _**SPARK_SUBMIT_OPTIONS**_ utilizing --packages instruction shown above like this
 
 ```bash
-export SPARK_SUBMIT_OPTIONS="--packages JohnSnowLabs:spark-nlp:2.0.5"
+export SPARK_SUBMIT_OPTIONS="--packages JohnSnowLabs:spark-nlp:2.0.6"
 ```
 
 ### **Python Jupyter Notebook with PySpark**
@@ -85,7 +85,7 @@ export SPARK_HOME=/path/to/your/spark/folder
 export PYSPARK_DRIVER_PYTHON=jupyter
 export PYSPARK_DRIVER_PYTHON_OPTS=notebook
 
-pyspark --packages JohnSnowLabs:spark-nlp:2.0.5
+pyspark --packages JohnSnowLabs:spark-nlp:2.0.6
 ```
 
 ### S3 based standalone cluster (No Hadoop)
@@ -297,7 +297,7 @@ lightPipeline.annotate("Hello world, please annotate my text")
 Spark NLP OCR Module is not included within Spark NLP. It is not an annotator and not an extension to Spark ML. You can include it with the following coordinates for Maven:
 
 ```bash
-com.johnsnowlabs.nlp:spark-nlp-ocr_2.11:2.0.5
+com.johnsnowlabs.nlp:spark-nlp-ocr_2.11:2.0.6
 ```
 
 ### Creating Spark datasets from PDF (To be used with Spark NLP)
diff --git a/python/setup.py b/python/setup.py
@@ -40,7 +40,7 @@
     # For a discussion on single-sourcing the version across setup.py and the
     # project code, see
     # https://packaging.python.org/en/latest/single_source_version.html
-    version='2.0.5',  # Required
+    version='2.0.6',  # Required
 
     # This is a one-line description or tagline of what your project does. This
     # corresponds to the "Summary" metadata field:
diff --git a/python/sparknlp/__init__.py b/python/sparknlp/__init__.py
@@ -40,14 +40,14 @@ def start(include_ocr=False):
 
     if include_ocr:
         builder \
-            .config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.0.5,com.johnsnowlabs.nlp:spark-nlp-ocr_2.11:2.0.5,javax.media.jai:com.springsource.javax.media.jai.core:1.1.3") \
+            .config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.0.6,com.johnsnowlabs.nlp:spark-nlp-ocr_2.11:2.0.6,javax.media.jai:com.springsource.javax.media.jai.core:1.1.3") \
             .config("spark.jars.repositories", "http://repo.spring.io/plugins-release")
 
     else:
-        builder.config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.0.5") \
+        builder.config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.0.6") \
 
     return builder.getOrCreate()
 
 
 def version():
-    print('2.0.5')
+    print('2.0.6')
diff --git a/src/main/scala/com/johnsnowlabs/nlp/SparkNLP.scala b/src/main/scala/com/johnsnowlabs/nlp/SparkNLP.scala
@@ -13,18 +13,18 @@ object SparkNLP {
 
     if (includeOcr) {
       build
-        .config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.0.5,com.johnsnowlabs.nlp:spark-nlp-ocr_2.11:2.0.5,javax.media.jai:com.springsource.javax.media.jai.core:1.1.3")
+        .config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.0.6,com.johnsnowlabs.nlp:spark-nlp-ocr_2.11:2.0.6,javax.media.jai:com.springsource.javax.media.jai.core:1.1.3")
         .config("spark.jars.repositories", "http://repo.spring.io/plugins-release")
     } else {
       build
-        .config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.0.5")
+        .config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.0.6")
     }
 
     build.getOrCreate()
   }
 
   def version(): Unit = {
-    println("2.0.5")
+    println("2.0.6")
   }
 
 }
diff --git a/src/main/scala/com/johnsnowlabs/util/Build.scala b/src/main/scala/com/johnsnowlabs/util/Build.scala
@@ -11,6 +11,6 @@ object Build {
     if (version != null && version.nonEmpty)
       version
     else
-      "2.0.5"
+      "2.0.6"
   }
 }
diff --git a/src/test/scala/com/johnsnowlabs/nlp/annotators/sbd/deep/DeepSentenceDetectorTestSpec.scala b/src/test/scala/com/johnsnowlabs/nlp/annotators/sbd/deep/DeepSentenceDetectorTestSpec.scala
@@ -51,6 +51,7 @@ class DeepSentenceDetectorTestSpec extends FlatSpec with DeepSentenceDetectorBeh
     .setOutputCol("ner")
     .setMaxEpochs(100)
     .setRandomSeed(0)
+    .setUseContrib(false)
 
   private val nerConverter = new NerConverter()
     .setInputCols(Array("document", "token", "ner"))

Original file line number	Diff line number	Diff line change
`@@ -13,18 +13,18 @@ object SparkNLP {`
`13`	`13`
`14`	`14`	`if (includeOcr) {`
`15`	`15`	`build`
`16`		`- .config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.0.5,com.johnsnowlabs.nlp:spark-nlp-ocr_2.11:2.0.5,javax.media.jai:com.springsource.javax.media.jai.core:1.1.3")`
	`16`	`+ .config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.0.6,com.johnsnowlabs.nlp:spark-nlp-ocr_2.11:2.0.6,javax.media.jai:com.springsource.javax.media.jai.core:1.1.3")`
`17`	`17`	`.config("spark.jars.repositories", "http://repo.spring.io/plugins-release")`
`18`	`18`	`} else {`
`19`	`19`	`build`
`20`		`- .config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.0.5")`
	`20`	`+ .config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.0.6")`
`21`	`21`	`}`
`22`	`22`
`23`	`23`	`build.getOrCreate()`
`24`	`24`	`}`
`25`	`25`
`26`	`26`	`def version(): Unit = {`
`27`		`- println("2.0.5")`
	`27`	`+ println("2.0.6")`
`28`	`28`	`}`
`29`	`29`
`30`	`30`	`}`