The 7b model cannot achieve the pass@1 in the paper

Thank you so much for open-sourcing the model!

I modified the script of the Gödel Prover to test your 7b model, but the pass@1 of non-COT only reached about 47.5%. I think there might be an issue with my codes or the instructions in the data.

Here is my code for inference：
```
import re
from transformers import AutoModelForCausalLM, AutoTokenizer
from vllm import LLM, SamplingParams
import torch

import json
import argparse

parser = argparse.ArgumentParser()
# datasets/minif2f.jsonl
parser.add_argument('--input_path',  type=str)
# Goedel-LM/Goedel-Prover-SFT
parser.add_argument('--model_path', type=str)
# results/test
parser.add_argument('--output_dir',  type=str)
parser.add_argument('--split', default="none", type=str)
parser.add_argument('--n', default=64, type=int)
parser.add_argument('--gpu', default=1, type=int)

torch.manual_seed(30)

args = parser.parse_args()

model_name = args.model_path


data_path = args.input_path
# Initialize an empty list to hold the dictionaries
data_list = []

# Open the file and read each line
with open(data_path, 'r') as file:
    for line in file:
        
        data = json.loads(line)
        if args.split == "none":
            data_list.append(data)
        else:
            try:
                int_split = int(args.split)
            except:
                int_split = None
                pass
            if isinstance(int_split, int):
                if (int(data["split"]) == int(args.split)):
                    data_list.append(data)
            else:
                if ((data["split"]) == (args.split)):
                    data_list.append(data)

LEAN4_DEFAULT_HEADER = "import Mathlib\nimport Aesop\n\nset_option maxHeartbeats 0\n\nopen BigOperators Real Nat Topology Rat\n\n"

model_inputs = []
for data in data_list:
    # ---------deepseek-prover-v2---------
    prompt = "Complete the following Lean 4 code:\n\n```lean4\n{header}{informal_prefix}{formal_statement}sorry```".format(
            header=data.get('header', LEAN4_DEFAULT_HEADER),
            informal_prefix=data.get('informal_prefix', str()),
            formal_statement=data['formal_statement'],
        )

    prompt =  f"<｜begin▁of▁sentence｜><｜User｜>{prompt}<｜Assistant｜>"
    model_inputs.append(prompt)

model = LLM(model=model_name, seed=1, enforce_eager=True,trust_remote_code=True, swap_space=8, gpu_memory_utilization=0.92,tensor_parallel_size=args.gpu, max_model_len=8192,max_num_batched_tokens=8192,max_num_seqs=32)


sampling_params = SamplingParams(
    temperature=0.0,
    max_tokens=8192,
    top_p=0.95,
    n=args.n,
)

model_outputs = model.generate(
    model_inputs,
    sampling_params,
    use_tqdm=True,
)

assert len(model_outputs) == len(model_inputs)

def extrac_code(inputs):
    try:
        matches = re.finditer(r'```lean4\n(.*?)\n```', inputs, re.DOTALL)
        last_match = None
        for match in matches:
            last_match = match
        return last_match.group(1) if last_match else "None"
    except:
        return "None"
    

to_inference_codes = []
for i in range(len(data_list)):
    data_list[i]["model_input"] = model_inputs[i]
    data_list[i]["model_outputs"] = [output.text for output in model_outputs[i].outputs]

    full_code = [extrac_code(output.text) for output in model_outputs[i].outputs]
    header = data_list[i].get('header', LEAN4_DEFAULT_HEADER)

    full_code = [header + code for code in full_code]
    data_list[i]["full_code"] = [code for code in full_code if code != "None"]

    if "problem_id" in data_list[i]:
        to_inference_codes += [{"name": data_list[i]["problem_id"], "code": code} for code in data_list[i]["full_code"]]
    else:
        to_inference_codes += [{"name": data_list[i]["name"], "code": code} for code in data_list[i]["full_code"]]

import os
os.makedirs(args.output_dir, exist_ok=True)

output_file_path = F'{args.output_dir}/full_records.json'
print(F"Outputting to {output_file_path}")
# Dump the list to a JSON file with indents
with open(output_file_path, 'w') as json_file:
    json.dump(data_list, json_file, indent=4)

toinfer_file_path = F'{args.output_dir}/to_inference_codes.json'
print(F"Outputting to {toinfer_file_path}")
# Dump the list to a JSON file with indents
with open(toinfer_file_path, 'w') as json_file:
    json.dump(to_inference_codes, json_file, indent=4)
```

Here is my example instruction:
```
<\uff5cbegin\u2581of\u2581sentence\uff5c><\uff5cUser\uff5c>Complete the following Lean 4 code:\n\n```lean4\nimport Mathlib\nimport Aesop\n\nset_option maxHeartbeats 0\n\nopen BigOperators Real Nat Topology Rat\n\n/-- The volume of a cone is given by the formula $V = \\frac{1}{3}Bh$, where $B$ is the area of the base and $h$ is the height. The area of the base of a cone is 30 square units, and its height is 6.5 units. What is the number of cubic units in its volume? Show that it is 65.-/\ntheorem mathd_algebra_478 (b h v : \u211d) (h\u2080 : 0 < b \u2227 0 < h \u2227 0 < v) (h\u2081 : v = 1 / 3 * (b * h))\n    (h\u2082 : b = 30) (h\u2083 : h = 13 / 2) : v = 65 := by\nsorry```<\uff5cAssistant\uff5c>
```

Here is my example model output:

```

```lean4\ntheorem mathd_algebra_478 (b h v : \u211d) (h\u2080 : 0 < b \u2227 0 < h \u2227 0 < v) (h\u2081 : v = 1 / 3 * (b * h))\n    (h\u2082 : b = 30) (h\u2083 : h = 13 / 2) : v = 65 := by\n  rw [h\u2082, h\u2083] at h\u2081\n  norm_num at h\u2081\n  linarith\n```

```

Thank u very much!

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

The 7b model cannot achieve the pass@1 in the paper #8

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

The 7b model cannot achieve the pass@1 in the paper #8

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions