diff --git a/octis/preprocessing/preprocessing.py b/octis/preprocessing/preprocessing.py index 64ede3ee..294a8351 100644 --- a/octis/preprocessing/preprocessing.py +++ b/octis/preprocessing/preprocessing.py @@ -264,7 +264,7 @@ def filter_words(self, docs): self.preprocessing_steps.append('filter words with document frequency lower than ' + str(self.min_df) + ' and higher than ' + str(self.max_df)) self.preprocessing_steps.append('filter words with less than ' + str(self.min_chars) + " character") - vectorizer = TfidfVectorizer(df_max_freq=self.max_df, df_min_freq=self.min_df, vocabulary=self.vocabulary, + vectorizer = TfidfVectorizer(max_df=self.max_df, min_df=self.min_df, vocabulary=self.vocabulary, token_pattern=r"(?u)\b\w{" + str(self.min_chars) + ",}\b", lowercase=self.lowercase, stop_words=self.stopwords)