add telugu support

rkcosmos · rkcosmos · commit c7fb3029d987 · 2020-11-15T17:13:34.000+07:00
diff --git a/easyocr/config.py b/easyocr/config.py
@@ -21,7 +21,7 @@
                       'ava','dar','inh','che','lbe','lez','tab']
 devanagari_lang_list = ['hi','mr','ne','bh','mai','ang','bho','mah','sck','new',\
                         'gom','sa','bgc']
-other_lang_list = ['th','ch_sim','ch_tra','ja','ko','ta']
+other_lang_list = ['th','ch_sim','ch_tra','ja','ko','ta','te','kn']
 
 all_lang_list = latin_lang_list + arabic_lang_list+ cyrillic_lang_list +\
                 devanagari_lang_list + bengali_lang_list + other_lang_list
@@ -48,6 +48,7 @@
     'bn_char' : '।ঁংঃঅআইঈউঊঋঌএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহ়ািীুূৃেৈোৌ্ৎড়ঢ়য়০১২৩৪৫৬৭৮৯',
     'th_char' : 'กขคฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮฤ' +'เแโใไะา'+ special_c +  'ํฺ'+'ฯๆ',
     'th_number' : '0123456789๑๒๓๔๕๖๗๘๙',
+    'te_char': 'ఁంఃఅఆఇఈఉఊఋఌఎఏఐఒఓఔకఖగఘఙచఛజఝఞటఠడఢణతథదధనపఫబభమయరఱలళవశషసహాిీుూృౄెేైొోౌ్ౠౡౢౣ',
 }
 
 # first element is url path, second is file size
@@ -64,4 +65,6 @@
     'arabic.pth': ('https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/arabic.zip', '993074555550e4e06a6077d55ff0449a'),
     'tamil.pth': ('https://github.com/JaidedAI/EasyOCR/releases/download/v1.1.7/tamil.zip', '4b93972fdacdcdabe6d57097025d4dc2'),
     'bengali.pth': ('https://github.com/JaidedAI/EasyOCR/releases/download/v1.1.8/bengali.zip', 'cea9e897e2c0576b62cbb1554997ce1c'),
+    'telugu.pth': ('https://github.com/JaidedAI/EasyOCR/releases/download/v1.1.11/telugu.zip', 'f7576012a3abe593950c47bfa1bd8ddc'),
+    'kannada.pth': ('https://github.com/JaidedAI/EasyOCR/releases/download/v1.1.11/kannada.zip', ''),
 }
diff --git a/easyocr/easyocr.py b/easyocr/easyocr.py
@@ -125,6 +125,8 @@ def __init__(self, lang_list, gpu=True, model_storage_directory=None,
                 self.setModelLanguage('korean', lang_list, ['ko','en'], '["ko","en"]')
             elif 'ta' in lang_list:
                 self.setModelLanguage('tamil', lang_list, ['ta','en'], '["ta","en"]')
+            elif 'te' in lang_list:
+                self.setModelLanguage('telugu', lang_list, ['te','en'], '["te","en"]')
             elif set(lang_list) & set(bengali_lang_list):
                 self.setModelLanguage('bengali', lang_list, bengali_lang_list+['en'], '["bn","as","en"]')
             elif set(lang_list) & set(arabic_lang_list):
@@ -171,6 +173,10 @@ def __init__(self, lang_list, gpu=True, model_storage_directory=None,
                 ta_char = self.getChar("ta_char.txt")
                 self.character = number + symbol + characters['en_char'] + ta_char
                 model_file = 'tamil.pth'
+            elif  self.model_lang == 'telugu':
+                self.character = number + symbol + characters['en_char'] + characters['te_char']
+                model_file = 'telugu.pth'
+                recog_network = 'lite'
             elif self.model_lang == 'thai':
                 separator_list = {
                     'th': ['\xa2', '\xa3'],
@@ -227,6 +233,12 @@ def __init__(self, lang_list, gpu=True, model_storage_directory=None,
                     'output_channel': 512,
                     'hidden_size': 512
                     }
+            elif recog_network == 'lite':
+                network_params = {
+                    'input_channel': 1,
+                    'output_channel': 256,
+                    'hidden_size': 256
+                    }
             else:
                 network_params = recog_config['network_params']
             self.recognizer, self.converter = get_recognizer(recog_network, network_params,\
diff --git a/easyocr/model/modules.py b/easyocr/model/modules.py
@@ -89,6 +89,30 @@ def forward(self, input):
         output = self.linear(recurrent)  # batch_size x T x output_size
         return output
 
+class VGG_FeatureExtractor(nn.Module):
+
+    def __init__(self, input_channel, output_channel=256):
+        super(VGG_FeatureExtractor, self).__init__()
+        self.output_channel = [int(output_channel / 8), int(output_channel / 4),
+                               int(output_channel / 2), output_channel]
+        self.ConvNet = nn.Sequential(
+            nn.Conv2d(input_channel, self.output_channel[0], 3, 1, 1), nn.ReLU(True),
+            nn.MaxPool2d(2, 2),
+            nn.Conv2d(self.output_channel[0], self.output_channel[1], 3, 1, 1), nn.ReLU(True),
+            nn.MaxPool2d(2, 2),
+            nn.Conv2d(self.output_channel[1], self.output_channel[2], 3, 1, 1), nn.ReLU(True),
+            nn.Conv2d(self.output_channel[2], self.output_channel[2], 3, 1, 1), nn.ReLU(True),
+            nn.MaxPool2d((2, 1), (2, 1)),
+            nn.Conv2d(self.output_channel[2], self.output_channel[3], 3, 1, 1, bias=False),
+            nn.BatchNorm2d(self.output_channel[3]), nn.ReLU(True),
+            nn.Conv2d(self.output_channel[3], self.output_channel[3], 3, 1, 1, bias=False),
+            nn.BatchNorm2d(self.output_channel[3]), nn.ReLU(True),
+            nn.MaxPool2d((2, 1), (2, 1)),
+            nn.Conv2d(self.output_channel[3], self.output_channel[3], 2, 1, 0), nn.ReLU(True))
+
+    def forward(self, input):
+        return self.ConvNet(input)
+
 class ResNet_FeatureExtractor(nn.Module):
     """ FeatureExtractor of FAN (http://openaccess.thecvf.com/content_ICCV_2017/papers/Cheng_Focusing_Attention_Towards_ICCV_2017_paper.pdf) """
 
diff --git a/easyocr/model/vgg_model.py b/easyocr/model/vgg_model.py
@@ -0,0 +1,35 @@
+import torch.nn as nn
+from .modules import VGG_FeatureExtractor, BidirectionalLSTM
+
+class Model(nn.Module):
+
+    def __init__(self, input_channel, output_channel, hidden_size, num_class):
+        super(Model, self).__init__()
+        """ FeatureExtraction """
+        self.FeatureExtraction = VGG_FeatureExtractor(input_channel, output_channel)
+        self.FeatureExtraction_output = output_channel
+        self.AdaptiveAvgPool = nn.AdaptiveAvgPool2d((None, 1))
+
+        """ Sequence modeling"""
+        self.SequenceModeling = nn.Sequential(
+            BidirectionalLSTM(self.FeatureExtraction_output, hidden_size, hidden_size),
+            BidirectionalLSTM(hidden_size, hidden_size, hidden_size))
+        self.SequenceModeling_output = hidden_size
+
+        """ Prediction """
+        self.Prediction = nn.Linear(self.SequenceModeling_output, num_class)
+
+
+    def forward(self, input, text):
+        """ Feature extraction stage """
+        visual_feature = self.FeatureExtraction(input)
+        visual_feature = self.AdaptiveAvgPool(visual_feature.permute(0, 3, 1, 2))
+        visual_feature = visual_feature.squeeze(3)
+
+        """ Sequence modeling stage """
+        contextual_feature = self.SequenceModeling(visual_feature)
+
+        """ Prediction stage """
+        prediction = self.Prediction(contextual_feature.contiguous())
+
+        return prediction
diff --git a/easyocr/recognition.py b/easyocr/recognition.py
@@ -147,6 +147,8 @@ def get_recognizer(recog_network, network_params, character,\
 
     if recog_network == 'standard':
         model_pkg = importlib.import_module("easyocr.model.model")
+    elif recog_network == 'lite':
+        model_pkg = importlib.import_module("easyocr.model.vgg_model")
     else:
         model_pkg = importlib.import_module(recog_network)
     model = model_pkg.Model(num_class=num_class, **network_params)