Skip to content

The 7b model cannot achieve the pass@1 in the paper #8

@kadjasiodbi

Description

@kadjasiodbi

Thank you so much for open-sourcing the model!

I modified the script of the Gödel Prover to test your 7b model, but the pass@1 of non-COT only reached about 47.5%. I think there might be an issue with my codes or the instructions in the data.

Here is my code for inference:

import re
from transformers import AutoModelForCausalLM, AutoTokenizer
from vllm import LLM, SamplingParams
import torch

import json
import argparse

parser = argparse.ArgumentParser()
# datasets/minif2f.jsonl
parser.add_argument('--input_path',  type=str)
# Goedel-LM/Goedel-Prover-SFT
parser.add_argument('--model_path', type=str)
# results/test
parser.add_argument('--output_dir',  type=str)
parser.add_argument('--split', default="none", type=str)
parser.add_argument('--n', default=64, type=int)
parser.add_argument('--gpu', default=1, type=int)

torch.manual_seed(30)

args = parser.parse_args()

model_name = args.model_path


data_path = args.input_path
# Initialize an empty list to hold the dictionaries
data_list = []

# Open the file and read each line
with open(data_path, 'r') as file:
    for line in file:
        
        data = json.loads(line)
        if args.split == "none":
            data_list.append(data)
        else:
            try:
                int_split = int(args.split)
            except:
                int_split = None
                pass
            if isinstance(int_split, int):
                if (int(data["split"]) == int(args.split)):
                    data_list.append(data)
            else:
                if ((data["split"]) == (args.split)):
                    data_list.append(data)

LEAN4_DEFAULT_HEADER = "import Mathlib\nimport Aesop\n\nset_option maxHeartbeats 0\n\nopen BigOperators Real Nat Topology Rat\n\n"

model_inputs = []
for data in data_list:
    # ---------deepseek-prover-v2---------
    prompt = "Complete the following Lean 4 code:\n\n```lean4\n{header}{informal_prefix}{formal_statement}sorry```".format(
            header=data.get('header', LEAN4_DEFAULT_HEADER),
            informal_prefix=data.get('informal_prefix', str()),
            formal_statement=data['formal_statement'],
        )

    prompt =  f"<|begin▁of▁sentence|><|User|>{prompt}<|Assistant|>"
    model_inputs.append(prompt)

model = LLM(model=model_name, seed=1, enforce_eager=True,trust_remote_code=True, swap_space=8, gpu_memory_utilization=0.92,tensor_parallel_size=args.gpu, max_model_len=8192,max_num_batched_tokens=8192,max_num_seqs=32)


sampling_params = SamplingParams(
    temperature=0.0,
    max_tokens=8192,
    top_p=0.95,
    n=args.n,
)

model_outputs = model.generate(
    model_inputs,
    sampling_params,
    use_tqdm=True,
)

assert len(model_outputs) == len(model_inputs)

def extrac_code(inputs):
    try:
        matches = re.finditer(r'```lean4\n(.*?)\n```', inputs, re.DOTALL)
        last_match = None
        for match in matches:
            last_match = match
        return last_match.group(1) if last_match else "None"
    except:
        return "None"
    

to_inference_codes = []
for i in range(len(data_list)):
    data_list[i]["model_input"] = model_inputs[i]
    data_list[i]["model_outputs"] = [output.text for output in model_outputs[i].outputs]

    full_code = [extrac_code(output.text) for output in model_outputs[i].outputs]
    header = data_list[i].get('header', LEAN4_DEFAULT_HEADER)

    full_code = [header + code for code in full_code]
    data_list[i]["full_code"] = [code for code in full_code if code != "None"]

    if "problem_id" in data_list[i]:
        to_inference_codes += [{"name": data_list[i]["problem_id"], "code": code} for code in data_list[i]["full_code"]]
    else:
        to_inference_codes += [{"name": data_list[i]["name"], "code": code} for code in data_list[i]["full_code"]]

import os
os.makedirs(args.output_dir, exist_ok=True)

output_file_path = F'{args.output_dir}/full_records.json'
print(F"Outputting to {output_file_path}")
# Dump the list to a JSON file with indents
with open(output_file_path, 'w') as json_file:
    json.dump(data_list, json_file, indent=4)

toinfer_file_path = F'{args.output_dir}/to_inference_codes.json'
print(F"Outputting to {toinfer_file_path}")
# Dump the list to a JSON file with indents
with open(toinfer_file_path, 'w') as json_file:
    json.dump(to_inference_codes, json_file, indent=4)

Here is my example instruction:

<\uff5cbegin\u2581of\u2581sentence\uff5c><\uff5cUser\uff5c>Complete the following Lean 4 code:\n\n```lean4\nimport Mathlib\nimport Aesop\n\nset_option maxHeartbeats 0\n\nopen BigOperators Real Nat Topology Rat\n\n/-- The volume of a cone is given by the formula $V = \\frac{1}{3}Bh$, where $B$ is the area of the base and $h$ is the height. The area of the base of a cone is 30 square units, and its height is 6.5 units. What is the number of cubic units in its volume? Show that it is 65.-/\ntheorem mathd_algebra_478 (b h v : \u211d) (h\u2080 : 0 < b \u2227 0 < h \u2227 0 < v) (h\u2081 : v = 1 / 3 * (b * h))\n    (h\u2082 : b = 30) (h\u2083 : h = 13 / 2) : v = 65 := by\nsorry```<\uff5cAssistant\uff5c>

Here is my example model output:


```lean4\ntheorem mathd_algebra_478 (b h v : \u211d) (h\u2080 : 0 < b \u2227 0 < h \u2227 0 < v) (h\u2081 : v = 1 / 3 * (b * h))\n    (h\u2082 : b = 30) (h\u2083 : h = 13 / 2) : v = 65 := by\n  rw [h\u2082, h\u2083] at h\u2081\n  norm_num at h\u2081\n  linarith\n```

Thank u very much!

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions