11# -*- coding: utf-8 -*-
22
3- from .recognition import get_recognizer , get_text
3+ from .recognition import get_recognizer , get_text , get_text_prob
44from .utils import group_text_box , get_image_list , calculate_md5 , get_paragraph ,\
55 download_and_unzip , printProgressBar , diff , reformat_input ,\
66 make_rotated_img_list , set_result_with_confidence ,\
@@ -350,6 +350,93 @@ def detect(self, img, min_size = 20, text_threshold = 0.7, low_text = 0.4,\
350350
351351 return horizontal_list_agg , free_list_agg
352352
353+ def recognize_prob (self , img_cv_grey , horizontal_list = None , free_list = None ,\
354+ decoder = 'greedy' , beamWidth = 5 , batch_size = 1 ,\
355+ workers = 0 , allowlist = None , blocklist = None , detail = 1 ,\
356+ rotation_info = None ,paragraph = False ,\
357+ contrast_ths = 0.1 ,adjust_contrast = 0.5 , filter_ths = 0.003 ,\
358+ y_ths = 0.5 , x_ths = 1.0 , reformat = True , output_format = 'standard' ):
359+
360+ if reformat :
361+ img , img_cv_grey = reformat_input (img_cv_grey )
362+
363+ if allowlist :
364+ ignore_char = '' .join (set (self .character )- set (allowlist ))
365+ elif blocklist :
366+ ignore_char = '' .join (set (blocklist ))
367+ else :
368+ ignore_char = '' .join (set (self .character )- set (self .lang_char ))
369+
370+ if self .model_lang in ['chinese_tra' ,'chinese_sim' ]: decoder = 'greedy'
371+
372+ if (horizontal_list == None ) and (free_list == None ):
373+ y_max , x_max = img_cv_grey .shape
374+ horizontal_list = [[0 , x_max , 0 , y_max ]]
375+ free_list = []
376+
377+ # without gpu/parallelization, it is faster to process image one by one
378+ if ((batch_size == 1 ) or (self .device == 'cpu' )) and not rotation_info :
379+ result = []
380+ for bbox in horizontal_list :
381+ h_list = [bbox ]
382+ f_list = []
383+ image_list , max_width = get_image_list (h_list , f_list , img_cv_grey , model_height = imgH )
384+ result0 = get_text_prob (self .character , imgH , int (max_width ), self .recognizer , self .converter , image_list ,\
385+ ignore_char , decoder , beamWidth , batch_size , contrast_ths , adjust_contrast , filter_ths ,\
386+ workers , self .device )
387+ result += result0
388+ for bbox in free_list :
389+ h_list = []
390+ f_list = [bbox ]
391+ image_list , max_width = get_image_list (h_list , f_list , img_cv_grey , model_height = imgH )
392+ result0 = get_text_prob (self .character , imgH , int (max_width ), self .recognizer , self .converter , image_list ,\
393+ ignore_char , decoder , beamWidth , batch_size , contrast_ths , adjust_contrast , filter_ths ,\
394+ workers , self .device )
395+ result += result0
396+ # default mode will try to process multiple boxes at the same time
397+ else :
398+ image_list , max_width = get_image_list (horizontal_list , free_list , img_cv_grey , model_height = imgH )
399+ image_len = len (image_list )
400+ if rotation_info and image_list :
401+ image_list = make_rotated_img_list (rotation_info , image_list )
402+ max_width = max (max_width , imgH )
403+
404+ result = get_text_prob (self .character , imgH , int (max_width ), self .recognizer , self .converter , image_list ,\
405+ ignore_char , decoder , beamWidth , batch_size , contrast_ths , adjust_contrast , filter_ths ,\
406+ workers , self .device )
407+
408+ if rotation_info and (horizontal_list + free_list ):
409+ # Reshape result to be a list of lists, each row being for
410+ # one of the rotations (first row being no rotation)
411+ result = set_result_with_confidence (
412+ [result [image_len * i :image_len * (i + 1 )] for i in range (len (rotation_info ) + 1 )])
413+
414+ if self .model_lang == 'arabic' :
415+ direction_mode = 'rtl'
416+ result = [list (item ) for item in result ]
417+ for item in result :
418+ item [1 ] = get_display (item [1 ])
419+ else :
420+ direction_mode = 'ltr'
421+
422+ if paragraph :
423+ result = get_paragraph (result , x_ths = x_ths , y_ths = y_ths , mode = direction_mode )
424+
425+ if detail == 0 :
426+ return [item [1 ] for item in result ]
427+ elif output_format == 'dict' :
428+ if paragraph :
429+ return [ {'boxes' :item [0 ],'text' :item [1 ]} for item in result ]
430+ return [ {'boxes' :item [0 ],'text' :item [1 ],'confident' :item [2 ]} for item in result ]
431+ elif output_format == 'json' :
432+ if paragraph :
433+ return [json .dumps ({'boxes' :[list (map (int , lst )) for lst in item [0 ]],'text' :item [1 ]}, ensure_ascii = False ) for item in result ]
434+ return [json .dumps ({'boxes' :[list (map (int , lst )) for lst in item [0 ]],'text' :item [1 ],'confident' :item [2 ]}, ensure_ascii = False ) for item in result ]
435+ elif output_format == 'free_merge' :
436+ return merge_to_free (result , free_list )
437+ else :
438+ return result
439+
353440 def recognize (self , img_cv_grey , horizontal_list = None , free_list = None ,\
354441 decoder = 'greedy' , beamWidth = 5 , batch_size = 1 ,\
355442 workers = 0 , allowlist = None , blocklist = None , detail = 1 ,\
@@ -472,6 +559,42 @@ def readtext(self, image, decoder = 'greedy', beamWidth= 5, batch_size = 1,\
472559 filter_ths , y_ths , x_ths , False , output_format )
473560
474561 return result
562+
563+ def readtext_prob (self , image , decoder = 'greedy' , beamWidth = 5 , batch_size = 1 ,\
564+ workers = 0 , allowlist = None , blocklist = None , detail = 1 ,\
565+ rotation_info = None , paragraph = False , min_size = 20 ,\
566+ contrast_ths = 0.1 ,adjust_contrast = 0.5 , filter_ths = 0.003 ,\
567+ text_threshold = 0.7 , low_text = 0.4 , link_threshold = 0.4 ,\
568+ canvas_size = 2560 , mag_ratio = 1. ,\
569+ slope_ths = 0.1 , ycenter_ths = 0.5 , height_ths = 0.5 ,\
570+ width_ths = 0.5 , y_ths = 0.5 , x_ths = 1.0 , add_margin = 0.1 ,
571+ threshold = 0.2 , bbox_min_score = 0.2 , bbox_min_size = 3 , max_candidates = 0 ,
572+ output_format = 'standard' ):
573+ '''
574+ Parameters:
575+ image: file path or numpy-array or a byte stream object
576+ '''
577+ img , img_cv_grey = reformat_input (image )
578+
579+ horizontal_list , free_list = self .detect (img ,
580+ min_size = min_size , text_threshold = text_threshold ,\
581+ low_text = low_text , link_threshold = link_threshold ,\
582+ canvas_size = canvas_size , mag_ratio = mag_ratio ,\
583+ slope_ths = slope_ths , ycenter_ths = ycenter_ths ,\
584+ height_ths = height_ths , width_ths = width_ths ,\
585+ add_margin = add_margin , reformat = False ,\
586+ threshold = threshold , bbox_min_score = bbox_min_score ,\
587+ bbox_min_size = bbox_min_size , max_candidates = max_candidates
588+ )
589+ # get the 1st result from hor & free list as self.detect returns a list of depth 3
590+ horizontal_list , free_list = horizontal_list [0 ], free_list [0 ]
591+ result = self .recognize_prob (img_cv_grey , horizontal_list , free_list ,\
592+ decoder , beamWidth , batch_size ,\
593+ workers , allowlist , blocklist , detail , rotation_info ,\
594+ paragraph , contrast_ths , adjust_contrast ,\
595+ filter_ths , y_ths , x_ths , False , output_format )
596+
597+ return result
475598
476599 def readtextlang (self , image , decoder = 'greedy' , beamWidth = 5 , batch_size = 1 ,\
477600 workers = 0 , allowlist = None , blocklist = None , detail = 1 ,\
@@ -577,3 +700,18 @@ def readtext_batched(self, image, n_width=None, n_height=None,\
577700 filter_ths , y_ths , x_ths , False , output_format ))
578701
579702 return result_agg
703+
704+
705+ def convert_prob_to_word (prob , converter ):
706+ """
707+ For use with the readtest_prob outputs.
708+
709+ - prob should be 2d
710+ - convert = reader.converter
711+ """
712+ assert prob .ndim == 2
713+ preds_index = np .argmax (prob , axis = 1 )
714+ preds_index = preds_index .flatten ()
715+ preds_size = np .array ([prob .shape [0 ]])
716+ preds_str = converter .decode_greedy (preds_index , preds_size )[0 ]
717+ return preds_str
0 commit comments