ProblemFactory
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎ArtScanner/Tools/datagen.py‎
Lines changed: 3 additions & 1 deletion b/‎ArtScanner/Tools/datagen.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎ArtScanner/Tools/train.ipynb‎
Lines changed: 695 additions & 0 deletions b/‎ArtScanner/Tools/train.ipynb‎
Lines changed: 695 additions & 0 deletions
diff --git a/‎ArtScanner/build.cmd‎
Lines changed: 1 addition & 1 deletion b/‎ArtScanner/build.cmd‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ArtScanner/mn_model_weight_artnames.h5‎ renamed to ‎ArtScanner/generic_model.h5‎
13 MB b/‎ArtScanner/mn_model_weight_artnames.h5‎ renamed to ‎ArtScanner/generic_model.h5‎
13 MB
diff --git a/‎ArtScanner/main.py‎
Lines changed: 2 additions & 2 deletions b/‎ArtScanner/main.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎ArtScanner/mn_model_weight.h5‎ renamed to ‎ArtScanner/name_model.h5‎
10.8 MB b/‎ArtScanner/mn_model_weight.h5‎ renamed to ‎ArtScanner/name_model.h5‎
10.8 MB
diff --git a/‎ArtScanner/ocr.py‎
Lines changed: 100 additions & 113 deletions b/‎ArtScanner/ocr.py‎
Lines changed: 100 additions & 113 deletions
@@ -8,3 +8,5 @@ ArtScanner/artifacts/**
 ArtScanner/artifacts-all/**
 
 .vscode/**
+**/.ipynb_checkpoints/**
+ArtScanner/data/**
@@ -1,7 +1,9 @@
 import json
 import numpy as np   
 from PIL import ImageFont, Image, ImageDraw
-from .. import ArtsInfo
+import sys
+sys.path.append("..")
+import ArtsInfo
 
 MainAttrDatabase = json.load(open('ReliquaryLevelExcelConfigData.json'))
 SubAttrDatabase = json.load(open('ReliquaryAffixExcelConfigData.json'))
 
@@ -1 +1 @@
-pyinstaller --onefile --add-data "mn_model_weight.h5;." --add-data "mn_model_weight_artnames.h5;." --add-data "Tools/ReliquaryLevelExcelConfigData.json;./Tools" --add-data "Tools/ReliquaryAffixExcelConfigData.json;./Tools" --hidden-import=h5py --hidden-import=h5py.defs --hidden-import=h5py.utils --hidden-import=h5py.h5ac --hidden-import=h5py._proxy --uac-admin -n ArtScanner main.py
+pyinstaller --onefile --add-data "generic_model.h5;." --add-data "name_model.h5;." --add-data "Tools/ReliquaryLevelExcelConfigData.json;./Tools" --add-data "Tools/ReliquaryAffixExcelConfigData.json;./Tools" --hidden-import=h5py --hidden-import=h5py.defs --hidden-import=h5py.utils --hidden-import=h5py.h5ac --hidden-import=h5py._proxy --uac-admin -n ArtScanner main.py
@@ -90,8 +90,8 @@ def is_admin():
 # margin near level number, color=233,229,220
 
 # initialization
-ocr_model = ocr.OCR(scale_ratio=game_info.scale_ratio, model_weight=os.path.join(bundle_dir, 'mn_model_weight.h5'), 
-                    ocr_model_artnames=ocr.OCR_artnames(model_weight=os.path.join(bundle_dir, 'mn_model_weight_artnames.h5')))
+ocr_model = ocr.OCR(scale_ratio=game_info.scale_ratio, generic_model_weight=os.path.join(bundle_dir, 'generic_model.h5'), 
+                    name_model_weight=os.path.join(bundle_dir, 'name_model.h5'))
 art_id = 0
 saved = 0
 skipped = 0
 
@@ -32,22 +32,10 @@ class Config:
     subattr_3_coords = [67, 584, 560, 624]
     subattr_4_coords = [67, 636, 560, 676]
 
-class OCR:
-    def __init__(self, model_weight='mn_model_weight.h5', scale_ratio=1, ocr_model_artnames=None):
-        self.scale_ratio = scale_ratio
-        self.characters = sorted(
-                                [
-                                    *set(
-                                        "".join(
-                                            sum(ArtsInfo.ArtNames[:-2], [])
-                                            + ArtsInfo.TypeNames
-                                            + list(ArtsInfo.MainAttrNames.values())
-                                            + list(ArtsInfo.SubAttrNames.values())
-                                            + list(".,+%0123456789")
-                                        )
-                                    )
-                                ]
-                            )
+class OCRModel:
+    def __init__(self, characters, model_weight, width, height, max_length):
+        
+        self.characters = characters
         # Mapping characters to integers
         self.char_to_num = StringLookup(
             vocabulary=list(self.characters), num_oov_indices=0, mask_token=""
@@ -57,21 +45,94 @@ def __init__(self, model_weight='mn_model_weight.h5', scale_ratio=1, ocr_model_a
         self.num_to_char = StringLookup(
             vocabulary=self.char_to_num.get_vocabulary(), oov_token="", mask_token="", invert=True
         )
+        
+        self.width = width
+        self.height = height
+        self.max_length = max_length
+        
+        self.model = OCRModel.build_model(characters=self.characters, input_shape=(self.width, self.height))
+        if model_weight:
+            self.model.load_weights(model_weight)
+    
+    def predict(self, x):
+        return self.decode(self.model.predict(x))
+        
+    def decode(self, pred):
+        input_len = np.ones(pred.shape[0]) * pred.shape[1]
+        # Use greedy search. For complex tasks, you can use beam search
+        results = ctc_decode(pred, input_length=input_len, greedy=True)[0][0][
+            :, :self.max_length
+        ]
+        # Iterate over the results and get back the text
+        output_text = []
+        for res in results:
+            res = self.num_to_char(res)
+            res = reduce_join(res)
+            res = res.numpy().decode("utf-8")
+            output_text.append(res)
+        return output_text
+    
+    @staticmethod
+    def build_model(characters, input_shape):
+        input_img = Input(
+            shape=(input_shape[0], input_shape[1], 1), name="image", dtype="float32"
+        )
+        mobilenet = MobileNetV3_Small(
+            (input_shape[0], input_shape[1], 1), 0, alpha=1.0, include_top=False
+        ).build()
+        x = mobilenet(input_img)
+        new_shape = ((input_shape[0] // 8), (input_shape[1] // 8) * 576)
+        x = Reshape(target_shape=new_shape, name="reshape")(x)
+        x = Dense(64, activation="relu", name="dense1")(x)
+        x = Dropout(0.2)(x)
+
+        # RNNs
+        x = Bidirectional(LSTM(128, return_sequences=True, dropout=0.25))(x)
+        x = Bidirectional(LSTM(64, return_sequences=True, dropout=0.25))(x)
+
+        # Output layer
+        output = Dense(len(characters) + 2, activation="softmax", name="dense2")(x)
 
+        # Define the model
+        return Model(inputs=[input_img], outputs=output, name="ocr_model_v1")
+    
+class OCR:
+    def __init__(self, generic_model_weight='generic_model.h5', name_model_weight='name_model.h5', scale_ratio=1):
         self.width = 240
         self.height = 16
         self.max_length = 15
-        self.build_model(input_shape=(self.width, self.height))
-        self.model.load_weights(model_weight)
-        self.ocr_model_artnames = ocr_model_artnames
+        self.scale_ratio = scale_ratio
+        self.generic_characters = sorted(
+                                [
+                                    *set(
+                                        "".join(
+                                            ArtsInfo.TypeNames
+                                            + list(ArtsInfo.MainAttrNames.values())
+                                            + list(ArtsInfo.SubAttrNames.values())
+                                            + list(".,+%0123456789")
+                                        )
+                                    )
+                                ]
+                            )
+        
+        self.name_characters = sorted([*set("".join(sum(ArtsInfo.ArtNames, [])))])
+
+        self.name_model = OCRModel(characters=self.name_characters, 
+                                   model_weight=name_model_weight, 
+                                   width=self.width, height=self.height, 
+                                   max_length=self.max_length)
+        self.generic_model = OCRModel(characters=self.generic_characters, 
+                                      model_weight=generic_model_weight, 
+                                      width=self.width, height=self.height, 
+                                      max_length=self.max_length)
 
     def detect_info(self, art_img):
         info = self.extract_art_info(art_img)
-        x = np.concatenate([self.preprocess(info[key]).T[None, :, :, None] for key in sorted(info.keys())], axis=0)
-        y = self.model.predict(x)
-        y = self.decode(y)
-        y[3] = self.ocr_model_artnames.reg(x[3][None])
-        return {**{key:v for key, v in zip(sorted(info.keys()), y)}, **{'star':self.detect_star(art_img)}}
+        generic_keys = [key for key in sorted(info.keys()) if key!='name']
+        x = np.concatenate([self.preprocess(info[key]).T[None, :, :, None] for key in generic_keys], axis=0)
+        y_generic = self.generic_model.predict(x)
+        y_name = self.name_model.predict(self.preprocess(info['name']).T[None,:,:,None])
+        return {**{key:v for key, v in zip(generic_keys, y_generic)}, **{'star':self.detect_star(art_img)}, **{'name':y_name[0]}}
 
     def extract_art_info(self, art_img):
         name = art_img.crop([i*self.scale_ratio for i in Config.name_coords])
@@ -112,12 +173,12 @@ def to_gray(self, text_img):
             text_img = (text_img[..., :3] @ [[[0.299], [0.587], [0.114]]])[:, :, 0]
         return np.array(text_img, np.float32)
 
-    def normalize(self, img, auto_inverse=True):
-        img -= img.min()
+    def normalize(self, img, auto_inverse=True, min_jitter=0):
+        img -= img.min() + np.random.random() * min_jitter * img.max()
         img /= img.max()
         if auto_inverse and img[-1, -1] > 0.5:
             img = 1 - img
-        return img
+        return np.array(img, np.float32)
 
 
     def crop(self, img, tol=0.7):
@@ -154,7 +215,18 @@ def pad_to_width(self, img):
         )
 
 
-    def preprocess(self, text_img):
+    def preprocess(self, text_img, inference=True):
+        result = self.to_gray(text_img)
+        if inference:
+            result = self.normalize(result, True, 0)
+            result = self.crop(result)
+        else:
+            result = self.normalize(result, True, 0.2)
+            result = self.crop(result, np.random.random() * 0.25 + 0.6)
+        result = self.normalize(result, False, 0)
+        result = self.resize_to_height(result)
+        result = self.pad_to_width(result)
+        return result
         result = self.to_gray(text_img)
         result = self.normalize(result, True)
         result = self.crop(result)
@@ -163,88 +235,3 @@ def preprocess(self, text_img):
         result = self.pad_to_width(result)
         return result
 
-    
-    def decode(self, pred):
-        input_len = np.ones(pred.shape[0]) * pred.shape[1]
-        # Use greedy search. For complex tasks, you can use beam search
-        results = ctc_decode(pred, input_length=input_len, greedy=True)[0][0][
-            :, :self.max_length
-        ]
-        # Iterate over the results and get back the text
-        output_text = []
-        for res in results:
-            res = self.num_to_char(res)
-            res = reduce_join(res)
-            res = res.numpy().decode("utf-8")
-            output_text.append(res)
-        return output_text
-    
-    def build_model(self, input_shape):
-        input_img = Input(
-            shape=(input_shape[0], input_shape[1], 1), name="image", dtype="float32"
-        )
-        mobilenet = MobileNetV3_Small(
-            (input_shape[0], input_shape[1], 1), 0, alpha=1.0, include_top=False
-        ).build()
-        x = mobilenet(input_img)
-        new_shape = ((input_shape[0] // 8), (input_shape[1] // 8) * 576)
-        x = Reshape(target_shape=new_shape, name="reshape")(x)
-        x = Dense(64, activation="relu", name="dense1")(x)
-        x = Dropout(0.2)(x)
-
-        # RNNs
-        x = Bidirectional(LSTM(128, return_sequences=True, dropout=0.25))(x)
-        x = Bidirectional(LSTM(64, return_sequences=True, dropout=0.25))(x)
-
-        # Output layer
-        output = Dense(len(self.characters) + 2, activation="softmax", name="dense2")(x)
-
-        # Define the model
-        self.model = Model(inputs=[input_img], outputs=output, name="ocr_model_v1")
-        
-class OCR_artnames:
-    def __init__(self, model_weight='mn_model_weight_artnames.h5'):
-        self.artnames = sorted(set(sum(ArtsInfo.ArtNames, [])))
-
-        self.model = self.build_model(input_shape=(240, 16))
-        self.model.load_weights(model_weight)
-        
-    def build_model(self, input_shape):
-        input_img = Input(
-            shape=(input_shape[0], input_shape[1], 1), name="image", dtype="float32"
-        )
-        mobilenet = MobileNetV3_Small(
-            (input_shape[0], input_shape[1], 1), 0, alpha=1.0, include_top=False
-        ).build()
-        x = mobilenet(input_img)
-        new_shape = ((input_shape[0] // 8), (input_shape[1] // 8) * 576)
-        x = Reshape(target_shape=new_shape, name="reshape")(x)
-        x = Dense(64, activation="relu", name="dense1")(x)
-        x = Dropout(0.2)(x)
-
-        # RNNs
-        x = Bidirectional(LSTM(128, return_sequences=True, dropout=0.25))(x)
-        x = Bidirectional(LSTM(64, return_sequences=True, dropout=0.25))(x)
-
-        # Output layer
-        x = Flatten(name="flatten")(x)
-        x = Dense(
-            len(self.artnames), activation="softmax", name="dense2"
-        )(x)
-
-        output = x
-
-        # Define the model
-        model = Model(inputs=[input_img], outputs=output, name="ocr_model_artnames")
-        
-        return model
-    
-    def decode_single(self, pred):
-        i = pred[0].argmax()
-        if pred[0][i] > 0.75:
-            return self.artnames[i]
-        else:
-            return 'Unknown'
-   
-    def reg(self, x):
-        return self.decode_single(self.model.predict(x))
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-pyinstaller --onefile --add-data "mn_model_weight.h5;." --add-data "mn_model_weight_artnames.h5;." --add-data "Tools/ReliquaryLevelExcelConfigData.json;./Tools" --add-data "Tools/ReliquaryAffixExcelConfigData.json;./Tools" --hidden-import=h5py --hidden-import=h5py.defs --hidden-import=h5py.utils --hidden-import=h5py.h5ac --hidden-import=h5py._proxy --uac-admin -n ArtScanner main.py`
	`1`	`+pyinstaller --onefile --add-data "generic_model.h5;." --add-data "name_model.h5;." --add-data "Tools/ReliquaryLevelExcelConfigData.json;./Tools" --add-data "Tools/ReliquaryAffixExcelConfigData.json;./Tools" --hidden-import=h5py --hidden-import=h5py.defs --hidden-import=h5py.utils --hidden-import=h5py.h5ac --hidden-import=h5py._proxy --uac-admin -n ArtScanner main.py`