Merge pull request #238 from IINemo/official_em

rvashurin · web-flow · commit 8de12fc60472 · 2024-10-17T15:47:30.000+03:00
Official em
diff --git a/examples/configs/base_processing_coqa.yaml b/examples/configs/base_processing_coqa.yaml
@@ -0,0 +1,6 @@
+process_output_fn:
+  path: instruct/output_processing_scripts/coqa.py
+  fn_name: normalize_em_coqa
+process_target_fn:
+  path: instruct/output_processing_scripts/coqa.py
+  fn_name: normalize_em_coqa
diff --git a/examples/configs/base_processing_triviaqa.yaml b/examples/configs/base_processing_triviaqa.yaml
@@ -0,0 +1,6 @@
+process_output_fn:
+  path: instruct/output_processing_scripts/triviaqa.py
+  fn_name: normalize_em_triviaqa
+process_target_fn:
+  path: instruct/output_processing_scripts/triviaqa.py
+  fn_name: normalize_em_triviaqa
diff --git a/examples/configs/instruct/cot_processing.yaml b/examples/configs/instruct/cot_processing.yaml
@@ -3,4 +3,4 @@ process_output_fn:
   fn_name: process_output_cot
 process_target_fn:
   path: output_processing_scripts/default.py
-  fn_name: process_target
+  fn_name: normalize_text
diff --git a/examples/configs/instruct/cot_processing_coqa.yaml b/examples/configs/instruct/cot_processing_coqa.yaml
@@ -0,0 +1,6 @@
+process_output_fn:
+  path: output_processing_scripts/coqa.py
+  fn_name: process_output_cot_coqa
+process_target_fn:
+  path: output_processing_scripts/coqa.py
+  fn_name: normalize_em_coqa
diff --git a/examples/configs/instruct/cot_processing_triviaqa.yaml b/examples/configs/instruct/cot_processing_triviaqa.yaml
@@ -0,0 +1,6 @@
+process_output_fn:
+  path: output_processing_scripts/triviaqa.py
+  fn_name: process_output_triviaqa
+process_target_fn:
+  path: output_processing_scripts/triviaqa.py
+  fn_name: normalize_em_triviaqa
diff --git a/examples/configs/instruct/output_processing_scripts/coqa.py b/examples/configs/instruct/output_processing_scripts/coqa.py
@@ -0,0 +1,44 @@
+import re
+import string
+
+from default import (
+    TOP1_OUTPUT_IGNORE_REGEX,
+    TOPK_OUTPUT_IGNORE_REGEX,
+    CoT_OUTPUT_IGNORE_REGEX,
+)
+
+
+def normalize_em_coqa(s: str) -> str:
+    def remove_articles(text):
+        regex = re.compile(r"\b(a|an|the)\b", re.UNICODE)
+        return re.sub(regex, " ", text)
+
+    def white_space_fix(text):
+        return " ".join(text.split())
+
+    def remove_punc(text):
+        exclude = set(string.punctuation)
+        return "".join(ch for ch in text if ch not in exclude)
+
+    def lower(text):
+        return text.lower()
+
+    return white_space_fix(remove_articles(remove_punc(lower(s))))
+
+
+def process_output_top1_coqa(output: str) -> str:
+    output = TOP1_OUTPUT_IGNORE_REGEX.sub("", output)
+    output = normalize_em_coqa(output)
+    return output
+
+
+def process_output_topk_coqa(output: str) -> str:
+    output = TOPK_OUTPUT_IGNORE_REGEX.sub("", output)
+    output = normalize_em_coqa(output)
+    return output
+
+
+def process_output_cot_coqa(output: str) -> str:
+    output = CoT_OUTPUT_IGNORE_REGEX.sub("", output)
+    output = normalize_em_coqa(output)
+    return output
diff --git a/examples/configs/instruct/output_processing_scripts/default.py b/examples/configs/instruct/output_processing_scripts/default.py
@@ -12,11 +12,6 @@ def normalize_text(text: str) -> str:
     return text
 
 
-def process_target(target: str) -> str:
-    target = normalize_text(target)
-    return target
-
-
 def process_output_top1(output: str) -> str:
     output = TOP1_OUTPUT_IGNORE_REGEX.sub("", output)
     output = normalize_text(output)
diff --git a/examples/configs/instruct/output_processing_scripts/triviaqa.py b/examples/configs/instruct/output_processing_scripts/triviaqa.py
@@ -0,0 +1,48 @@
+import re
+import string
+
+from default import (
+    TOP1_OUTPUT_IGNORE_REGEX,
+    TOPK_OUTPUT_IGNORE_REGEX,
+    CoT_OUTPUT_IGNORE_REGEX,
+)
+
+
+def normalize_em_triviaqa(s: str) -> str:
+    def remove_articles(text):
+        return re.sub(r"\b(a|an|the)\b", " ", text)
+
+    def white_space_fix(text):
+        return " ".join(text.split())
+
+    def handle_punc(text):
+        exclude = set(string.punctuation + "".join(["‘", "’", "´", "`"]))
+        return "".join(ch if ch not in exclude else " " for ch in text)
+
+    def lower(text):
+        return text.lower()
+
+    def replace_underscore(text):
+        return text.replace("_", " ")
+
+    return white_space_fix(
+        remove_articles(handle_punc(lower(replace_underscore(s))))
+    ).strip()
+
+
+def process_output_top1_triviaqa(output: str) -> str:
+    output = TOP1_OUTPUT_IGNORE_REGEX.sub("", output)
+    output = normalize_em_triviaqa(output)
+    return output
+
+
+def process_output_topk_triviaqa(output: str) -> str:
+    output = TOPK_OUTPUT_IGNORE_REGEX.sub("", output)
+    output = normalize_em_triviaqa(output)
+    return output
+
+
+def process_output_cot_triviaqa(output: str) -> str:
+    output = CoT_OUTPUT_IGNORE_REGEX.sub("", output)
+    output = normalize_em_triviaqa(output)
+    return output
diff --git a/examples/configs/instruct/polygraph_eval_coqa_empirical_baselines.yaml b/examples/configs/instruct/polygraph_eval_coqa_empirical_baselines.yaml
@@ -1,6 +1,6 @@
 defaults:
   - polygraph_eval_coqa_default_instruct
-  - top1_processing
+  - top1_processing_coqa
   - _self_
 
 experiment_name: coqa_empirical_baselines
diff --git a/examples/configs/instruct/polygraph_eval_coqa_ling_1s.yaml b/examples/configs/instruct/polygraph_eval_coqa_ling_1s.yaml
@@ -1,6 +1,6 @@
 defaults:
   - polygraph_eval_coqa_default_instruct
-  - top1_processing
+  - top1_processing_coqa
   - _self_
 
 experiment_name: coqa_ling_1s
diff --git a/examples/configs/instruct/polygraph_eval_coqa_verb_1s_top1.yaml b/examples/configs/instruct/polygraph_eval_coqa_verb_1s_top1.yaml
@@ -1,6 +1,6 @@
 defaults:
   - polygraph_eval_coqa_default_instruct
-  - top1_processing
+  - top1_processing_coqa
   - _self_
 
 experiment_name: coqa_verb_1s_top1
diff --git a/examples/configs/instruct/polygraph_eval_coqa_verb_1s_topk.yaml b/examples/configs/instruct/polygraph_eval_coqa_verb_1s_topk.yaml
@@ -1,6 +1,6 @@
 defaults:
   - polygraph_eval_coqa_default_instruct
-  - topk_processing
+  - topk_processing_coqa
   - _self_
 
 experiment_name: coqa_verb_1s_topk
diff --git a/examples/configs/instruct/polygraph_eval_coqa_verb_2s_cot.yaml b/examples/configs/instruct/polygraph_eval_coqa_verb_2s_cot.yaml
@@ -1,6 +1,6 @@
 defaults:
   - polygraph_eval_coqa_default_instruct
-  - cot_processing
+  - cot_processing_coqa
   - _self_
 
 experiment_name: coqa_verb_2s_cot
diff --git a/examples/configs/instruct/polygraph_eval_coqa_verb_2s_top1.yaml b/examples/configs/instruct/polygraph_eval_coqa_verb_2s_top1.yaml
@@ -1,6 +1,6 @@
 defaults:
   - polygraph_eval_coqa_default_instruct
-  - top1_processing
+  - top1_processing_coqa
   - _self_
 
 experiment_name: coqa_verb_2s_top1
diff --git a/examples/configs/instruct/polygraph_eval_coqa_verb_2s_topk.yaml b/examples/configs/instruct/polygraph_eval_coqa_verb_2s_topk.yaml
@@ -1,6 +1,6 @@
 defaults:
   - polygraph_eval_coqa_default_instruct
-  - topk_processing
+  - topk_processing_coqa
   - _self_
 
 experiment_name: coqa_verb_2s_topk
diff --git a/examples/configs/instruct/polygraph_eval_triviaqa_empirical_baselines.yaml b/examples/configs/instruct/polygraph_eval_triviaqa_empirical_baselines.yaml
@@ -1,6 +1,6 @@
 defaults:
   - polygraph_eval_triviaqa_default_instruct
-  - top1_processing
+  - top1_processing_triviaqa
   - _self_
 
 experiment_name: triviaqa_empirical_baselines
diff --git a/examples/configs/instruct/polygraph_eval_triviaqa_ling_1s.yaml b/examples/configs/instruct/polygraph_eval_triviaqa_ling_1s.yaml
@@ -1,6 +1,6 @@
 defaults:
   - polygraph_eval_triviaqa_default_instruct
-  - top1_processing
+  - top1_processing_triviaqa
   - _self_
 
 experiment_name: triviaqa_ling_1s
diff --git a/examples/configs/instruct/polygraph_eval_triviaqa_verb_1s_top1.yaml b/examples/configs/instruct/polygraph_eval_triviaqa_verb_1s_top1.yaml
@@ -1,6 +1,6 @@
 defaults:
   - polygraph_eval_triviaqa_default_instruct
-  - top1_processing
+  - top1_processing_triviaqa
   - _self_
 
 experiment_name: triviaqa_verb_1s_top1
diff --git a/examples/configs/instruct/polygraph_eval_triviaqa_verb_1s_topk.yaml b/examples/configs/instruct/polygraph_eval_triviaqa_verb_1s_topk.yaml
@@ -1,6 +1,6 @@
 defaults:
   - polygraph_eval_triviaqa_default_instruct
-  - topk_processing
+  - topk_processing_triviaqa
   - _self_
 
 experiment_name: triviaqa_verb_1s_topk
diff --git a/examples/configs/instruct/polygraph_eval_triviaqa_verb_2s_cot.yaml b/examples/configs/instruct/polygraph_eval_triviaqa_verb_2s_cot.yaml
@@ -1,6 +1,6 @@
 defaults:
   - polygraph_eval_triviaqa_default_instruct
-  - cot_processing
+  - cot_processing_triviaqa
   - _self_
 
 experiment_name: triviaqa_verb_2s_cot
diff --git a/examples/configs/instruct/polygraph_eval_triviaqa_verb_2s_top1.yaml b/examples/configs/instruct/polygraph_eval_triviaqa_verb_2s_top1.yaml
@@ -1,6 +1,6 @@
 defaults:
   - polygraph_eval_triviaqa_default_instruct
-  - top1_processing
+  - top1_processing_triviaqa
   - _self_
 
 experiment_name: triviaqa_verb_2s_top1
diff --git a/examples/configs/instruct/polygraph_eval_triviaqa_verb_2s_topk.yaml b/examples/configs/instruct/polygraph_eval_triviaqa_verb_2s_topk.yaml
@@ -1,6 +1,6 @@
 defaults:
   - polygraph_eval_triviaqa_default_instruct
-  - topk_processing
+  - topk_processing_triviqa
   - _self_
 
 experiment_name: triviaqa_verb_2s_topk
diff --git a/examples/configs/instruct/top1_processing.yaml b/examples/configs/instruct/top1_processing.yaml
@@ -3,4 +3,4 @@ process_output_fn:
   fn_name: process_output_top1
 process_target_fn:
   path: output_processing_scripts/default.py
-  fn_name: process_target
+  fn_name: normalize_text
diff --git a/examples/configs/instruct/top1_processing_coqa.yaml b/examples/configs/instruct/top1_processing_coqa.yaml
@@ -0,0 +1,6 @@
+process_output_fn:
+  path: output_processing_scripts/coqa.py
+  fn_name: process_output_top1_coqa
+process_target_fn:
+  path: output_processing_scripts/coqa.py
+  fn_name: normalize_em_coqa
diff --git a/examples/configs/instruct/top1_processing_triviaqa.yaml b/examples/configs/instruct/top1_processing_triviaqa.yaml
@@ -0,0 +1,6 @@
+process_output_fn:
+  path: output_processing_scripts/triviaqa.py
+  fn_name: process_output_top1_triviaqa
+process_target_fn:
+  path: output_processing_scripts/triviaqa.py
+  fn_name: normalize_em_triviaqa
diff --git a/examples/configs/instruct/topk_processing.yaml b/examples/configs/instruct/topk_processing.yaml
@@ -3,4 +3,4 @@ process_output_fn:
   fn_name: process_output_topk
 process_target_fn:
   path: output_processing_scripts/default.py
-  fn_name: process_target
+  fn_name: normalize_text
diff --git a/examples/configs/instruct/topk_processing_coqa.yaml b/examples/configs/instruct/topk_processing_coqa.yaml
@@ -0,0 +1,6 @@
+process_output_fn:
+  path: output_processing_scripts/coqa.py
+  fn_name: process_output_topk_coqa
+process_target_fn:
+  path: output_processing_scripts/coqa.py
+  fn_name: normalize_em_coqa
diff --git a/examples/configs/instruct/topk_processing_triviaqa.yaml b/examples/configs/instruct/topk_processing_triviaqa.yaml
@@ -0,0 +1,6 @@
+process_output_fn:
+  path: output_processing_scripts/triviaqa.py
+  fn_name: process_output_topk_triviaqa
+process_target_fn:
+  path: output_processing_scripts/triviaqa.py
+  fn_name: normalize_em_triviaqa
diff --git a/examples/configs/polygraph_eval_coqa.yaml b/examples/configs/polygraph_eval_coqa.yaml
@@ -4,6 +4,7 @@ hydra:
 
 defaults:
   - model: bloomz-560m
+  - base_processing_coqa
   - _self_
 
 cache_path: ./workdir/output
@@ -20,7 +21,6 @@ train_split: train
 eval_split: validation
 max_new_tokens: 20
 load_from_disk: false
-normalize: true
 generation_params:
   generate_until:
     - "\n"
diff --git a/examples/configs/polygraph_eval_triviaqa.yaml b/examples/configs/polygraph_eval_triviaqa.yaml
@@ -4,6 +4,7 @@ hydra:
 
 defaults:
   - model: bloomz-560m
+  - base_processing_triviaqa
   - _self_
 
 cache_path: ./workdir/output
@@ -22,7 +23,6 @@ max_new_tokens: 20
 load_from_disk: false
 n_shot: 5
 multiref: true
-normalize: true
 generation_params:
   generate_until:
     - "\n"