Skip to content

Commit de45d16

Browse files
authored
v1.5.0
1 parent 50c1532 commit de45d16

27 files changed

+651
-44
lines changed

README.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33

44
Simple binder design pipeline using AlphaFold2 backpropagation, MPNN, and PyRosetta. Select your target and let the script do the rest of the work and finish once you have enough designs to order!
55

6-
[Preprint link for BindCraft](https://www.biorxiv.org/content/10.1101/2024.09.30.615802v1)
6+
[Take the user experience poll!](https://forms.gle/XsGHDCyHtczVbamPA)
7+
8+
[Preprint link for BindCraft](https://www.biorxiv.org/content/10.1101/2024.09.30.615802)
79

810
## Installation
911
First you need to clone this repository. Replace **[install_folder]** with the path where you want to install it.
@@ -72,6 +74,8 @@ rm_template_seq_design -> remove target template sequence for design (i
7274
rm_template_seq_predict -> remove target template sequence for reprediction (increases target flexibility)
7375
rm_template_sc_design -> remove sidechains from target template for design
7476
rm_template_sc_predict -> remove sidechains from target template for reprediction
77+
predict_initial_guess -> Introduce bias by providing binder atom positions as a starting point for prediction. Recommended if designs fail after MPNN optimization.
78+
predict_bigbang -> Introduce atom position bias into the structure module for atom initilisation. Recommended if target and design are large (more than 600 amino acids).
7579
7680
# Design iterations
7781
soft_iterations -> number of soft iterations (all amino acids considered at all positions)
@@ -100,6 +104,7 @@ use_rg_loss -> use radius of gyration loss?
100104
weights_rg -> Design weight - radius of gyration weight for binder
101105
use_termini_distance_loss -> Try to minimise distance between N- and C-terminus of binder? Helpful for grafting
102106
weights_termini_loss -> Design weight - N- and C-terminus distance minimisation weight of binder
107+
cyclize_peptide -> Make the binder/peptide design cyclic
103108
104109
# MPNN settings
105110
mpnn_fix_interface -> whether to fix the interface designed in the starting trajectory

bindcraft.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@
5858
####################################
5959
### initialise PyRosetta
6060
pr.init(f'-ignore_unrecognized_res -ignore_zero_occupancy -mute all -holes:dalphaball {advanced_settings["dalphaball_path"]} -corrections::beta_nov16 true -relax:default_repeats 1')
61+
print(f"Running binder design for target {settings_file}")
62+
print(f"Design settings used: {advanced_file}")
63+
print(f"Filtering designs based on {filters_file}")
6164

6265
####################################
6366
# initialise counters
@@ -119,7 +122,7 @@
119122
print("")
120123

121124
# Proceed if there is no trajectory termination signal
122-
if trajectory.aux["log"]['terminate'] == "":
125+
if trajectory.aux["log"]["terminate"] == "":
123126
# Relax binder to calculate statistics
124127
trajectory_relaxed = os.path.join(design_paths["Trajectory/Relaxed"], design_name + ".pdb")
125128
pr_relax(trajectory_pdb, trajectory_relaxed)
@@ -194,9 +197,13 @@
194197
clear_mem()
195198
# compile complex prediction model
196199
complex_prediction_model = mk_afdesign_model(protocol="binder", num_recycles=advanced_settings["num_recycles_validation"], data_dir=advanced_settings["af_params_dir"],
197-
use_multimer=multimer_validation)
198-
complex_prediction_model.prep_inputs(pdb_filename=target_settings["starting_pdb"], chain=target_settings["chains"], binder_len=length, rm_target_seq=advanced_settings["rm_template_seq_predict"],
199-
rm_target_sc=advanced_settings["rm_template_sc_predict"])
200+
use_multimer=multimer_validation, use_initial_guess=advanced_settings["predict_initial_guess"], use_initial_atom_pos=advanced_settings["predict_bigbang"])
201+
if advanced_settings["predict_initial_guess"] or advanced_settings["predict_bigbang"]:
202+
complex_prediction_model.prep_inputs(pdb_filename=trajectory_pdb, chain='A', binder_chain='B', binder_len=length, use_binder_template=True, rm_target_seq=advanced_settings["rm_template_seq_predict"],
203+
rm_target_sc=advanced_settings["rm_template_sc_predict"], rm_template_ic=True)
204+
else:
205+
complex_prediction_model.prep_inputs(pdb_filename=target_settings["starting_pdb"], chain=target_settings["chains"], binder_len=length, rm_target_seq=advanced_settings["rm_template_seq_predict"],
206+
rm_target_sc=advanced_settings["rm_template_sc_predict"])
200207

201208
# compile binder monomer prediction model
202209
binder_prediction_model = mk_afdesign_model(protocol="hallucination", use_templates=False, initial_guess=False,
@@ -221,7 +228,7 @@
221228
save_fasta(mpnn_design_name, mpnn_sequence['seq'], design_paths)
222229

223230
### Predict mpnn redesigned binder complex using masked templates
224-
mpnn_complex_statistics, pass_af2_filters = masked_binder_predict(complex_prediction_model,
231+
mpnn_complex_statistics, pass_af2_filters = predict_binder_complex(complex_prediction_model,
225232
mpnn_sequence['seq'], mpnn_design_name,
226233
target_settings["starting_pdb"], target_settings["chains"],
227234
length, trajectory_pdb, prediction_models, advanced_settings,
@@ -452,4 +459,4 @@
452459
### Script finished
453460
elapsed_time = time.time() - script_start_time
454461
elapsed_text = f"{'%d hours, %d minutes, %d seconds' % (int(elapsed_time // 3600), int((elapsed_time % 3600) // 60), int(elapsed_time % 60))}"
455-
print("Finished all designs. Script execution for "+str(trajectory_n)+" trajectories took: "+elapsed_text)
462+
print("Finished all designs. Script execution for "+str(trajectory_n)+" trajectories took: "+elapsed_text)

bindcraft.slurm

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,11 @@ while true ; do
3737
esac
3838
done
3939

40+
# Ensure that SETTINGS is not empty
41+
if [ -z "$SETTINGS" ]; then
42+
echo "Error: The -s or --settings option is required."
43+
exit 1
44+
fi
45+
4046
echo "Running the BindCraft pipeline"
41-
echo "Running binder design for target ${SETTINGS}"
42-
echo "Design settings used: ${ADVANCED}"
43-
echo "Filtering designs based on ${FILTERS}"
4447
python -u "${SCRIPT_DIR}/bindcraft.py" --settings "${SETTINGS}" --filters "${FILTERS}" --advanced "${ADVANCED}"

functions/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,4 @@
1717
#os.environ["SLURM_STEP_NODELIST"] = os.environ["SLURM_NODELIST"]
1818
warnings.simplefilter(action='ignore', category=FutureWarning)
1919
warnings.simplefilter(action='ignore', category=DeprecationWarning)
20-
warnings.simplefilter(action='ignore', category=BiopythonWarning)
20+
warnings.simplefilter(action='ignore', category=BiopythonWarning)

functions/colabdesign_utils.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ def binder_hallucination(design_name, starting_pdb, chain, target_hotspot_residu
236236
return af_model
237237

238238
# run prediction for binder with masked template target
239-
def masked_binder_predict(prediction_model, binder_sequence, mpnn_design_name, target_pdb, chain, length, trajectory_pdb, prediction_models, advanced_settings, filters, design_paths, failure_csv, seed=None):
239+
def predict_binder_complex(prediction_model, binder_sequence, mpnn_design_name, target_pdb, chain, length, trajectory_pdb, prediction_models, advanced_settings, filters, design_paths, failure_csv, seed=None):
240240
prediction_stats = {}
241241

242242
# clean sequence
@@ -246,6 +246,10 @@ def masked_binder_predict(prediction_model, binder_sequence, mpnn_design_name, t
246246
pass_af2_filters = True
247247
filter_failures = {}
248248

249+
if advanced_settings["cyclize_peptide"]:
250+
# make macrocycle peptide
251+
add_cyclic_offset(prediction_model)
252+
249253
# start prediction per AF2 model, 2 are used by default due to masked templates
250254
for model_num in prediction_models:
251255
# check to make sure prediction does not exist already
@@ -313,6 +317,10 @@ def predict_binder_alone(prediction_model, binder_sequence, mpnn_design_name, le
313317
binder_sequence = re.sub("[^A-Z]", "", binder_sequence.upper())
314318
prediction_model.set_seq(binder_sequence)
315319

320+
if advanced_settings["cyclize_peptide"]:
321+
# make macrocycle peptide
322+
add_cyclic_offset(prediction_model)
323+
316324
# predict each model separately
317325
for model_num in prediction_models:
318326
# check to make sure prediction does not exist already

functions/generic_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ def save_fasta(design_name, sequence, design_paths):
303303
def clean_pdb(pdb_file):
304304
# Read the pdb file and filter relevant lines
305305
with open(pdb_file, 'r') as f_in:
306-
relevant_lines = [line for line in f_in if line.startswith(('ATOM', 'HETATM', 'MODEL', 'TER', 'END'))]
306+
relevant_lines = [line for line in f_in if line.startswith(('ATOM', 'HETATM', 'MODEL', 'TER', 'END', 'LINK'))]
307307

308308
# Write the cleaned lines back to the original pdb file
309309
with open(pdb_file, 'w') as f_out:

notebooks/BindCraft.ipynb

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,9 +165,11 @@
165165
"# @markdown ---\n",
166166
"# @markdown Which binder design protocol to run? Default is recommended. \"Beta-sheet\" promotes the design of more beta sheeted proteins, but requires more sampling. \"Peptide\" is optimised for helical peptide binders.\n",
167167
"design_protocol = \"Default\" # @param [\"Default\",\"Beta-sheet\",\"Peptide\"]\n",
168-
"# @markdown What interface design method to use?. \"AlphaFold2\" is the default, interface is generated by AlphaFold2. \"MPNN\" uses soluble MPNN to optimise the interface, but majority of residues still originate from AlphaFold2.\n",
168+
"# @markdown What prediction protocol to use?. \"Default\" performs single sequence prediction of the binder. \"HardTarget\" uses initial guess to improve complex prediction for difficult targets, but might introduce some bias.\n",
169+
"prediction_protocol = \"Default\" # @param [\"Default\",\"HardTarget\"]\n",
170+
"# @markdown What interface design method to use?. \"AlphaFold2\" is the default, interface is generated by AlphaFold2. \"MPNN\" uses soluble MPNN to optimise the interface.\n",
169171
"interface_protocol = \"AlphaFold2\" # @param [\"AlphaFold2\",\"MPNN\"]\n",
170-
"# @markdown What target template protocol to use? \"Default\" allows for limited amount flexibility. \"Masked\" allows for greater target flexibility on both sidechain and backbone level, but might result in reduced experimental success rates.\n",
172+
"# @markdown What target template protocol to use? \"Default\" allows for limited amount flexibility. \"Masked\" allows for greater target flexibility on both sidechain and backbone level.\n",
171173
"template_protocol = \"Default\" # @param [\"Default\",\"Masked\"]\n",
172174
"# @markdown ---\n",
173175
"\n",
@@ -194,6 +196,16 @@
194196
"else:\n",
195197
" raise ValueError(f\"Unsupported template protocol\")\n",
196198
"\n",
199+
"if design_protocol in [\"Peptide\"]:\n",
200+
" prediction_protocol_tag = \"\"\n",
201+
"else:\n",
202+
" if prediction_protocol == \"Default\":\n",
203+
" prediction_protocol_tag = \"\"\n",
204+
" elif prediction_protocol == \"HardTarget\":\n",
205+
" prediction_protocol_tag = \"_hardtarget\"\n",
206+
" else:\n",
207+
" raise ValueError(f\"Unsupported prediction protocol\")\n",
208+
"\n",
197209
"advanced_settings_path = \"/content/bindcraft/settings_advanced/\" + design_protocol_tag + interface_protocol_tag + template_protocol_tag + \".json\"\n",
198210
"\n",
199211
"currenttime = datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n",
@@ -212,7 +224,7 @@
212224
"#@title Filters\n",
213225
"# @markdown ---\n",
214226
"# @markdown Which filters for designs to use? \"Default\" are recommended, \"Peptide\" are for the design of peptide binders, \"Relaxed\" are more permissive but may result in fewer experimental successes, \"Peptide_Relaxed\" are more permissive filters for non-helical peptides, \"None\" is for benchmarking.\n",
215-
"filter_option = \"Peptide\" # @param [\"Default\", \"Peptide\", \"Relaxed\", \"Peptide_Relaxed\", \"None\"]\n",
227+
"filter_option = \"Default\" # @param [\"Default\", \"Peptide\", \"Relaxed\", \"Peptide_Relaxed\", \"None\"]\n",
216228
"# @markdown ---\n",
217229
"\n",
218230
"if filter_option == \"Default\":\n",

settings_advanced/betasheet_4stage_multimer.json

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
"rm_template_seq_predict": false,
99
"rm_template_sc_design": false,
1010
"rm_template_sc_predict": false,
11+
"predict_initial_guess": false,
12+
"predict_bigbang": false,
1113
"soft_iterations": 75,
1214
"temporary_iterations": 45,
1315
"hard_iterations": 5,
@@ -23,7 +25,7 @@
2325
"intra_contact_distance": 14.0,
2426
"inter_contact_distance": 20.0,
2527
"intra_contact_number": 2,
26-
"inter_contact_number": 1,
28+
"inter_contact_number": 2,
2729
"weights_helicity": -2.0,
2830
"random_helicity": false,
2931
"use_i_ptm_loss": true,
@@ -32,6 +34,7 @@
3234
"weights_rg": 0.3,
3335
"use_termini_distance_loss": false,
3436
"weights_termini_loss": 0.1,
37+
"cyclize_peptide": false,
3538
"enable_mpnn": true,
3639
"mpnn_fix_interface": true,
3740
"num_seqs": 20,
@@ -57,7 +60,7 @@
5760
"max_trajectories": false,
5861
"enable_rejection_check": true,
5962
"acceptance_rate": 0.01,
60-
"start_monitoring": 200,
63+
"start_monitoring": 600,
6164
"af_params_dir": "",
6265
"dssp_path": "",
6366
"dalphaball_path": ""

settings_advanced/betasheet_4stage_multimer_flexible.json

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
"rm_template_seq_predict": true,
99
"rm_template_sc_design": false,
1010
"rm_template_sc_predict": false,
11+
"predict_initial_guess": false,
12+
"predict_bigbang": false,
1113
"soft_iterations": 75,
1214
"temporary_iterations": 45,
1315
"hard_iterations": 5,
@@ -23,7 +25,7 @@
2325
"intra_contact_distance": 14.0,
2426
"inter_contact_distance": 20.0,
2527
"intra_contact_number": 2,
26-
"inter_contact_number": 1,
28+
"inter_contact_number": 2,
2729
"weights_helicity": -2.0,
2830
"random_helicity": false,
2931
"use_i_ptm_loss": true,
@@ -32,6 +34,7 @@
3234
"weights_rg": 0.3,
3335
"use_termini_distance_loss": false,
3436
"weights_termini_loss": 0.1,
37+
"cyclize_peptide": false,
3538
"enable_mpnn": true,
3639
"mpnn_fix_interface": true,
3740
"num_seqs": 20,
@@ -57,7 +60,7 @@
5760
"max_trajectories": false,
5861
"enable_rejection_check": true,
5962
"acceptance_rate": 0.01,
60-
"start_monitoring": 200,
63+
"start_monitoring": 600,
6164
"af_params_dir": "",
6265
"dssp_path": "",
6366
"dalphaball_path": ""
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
{
2+
"omit_AAs": "C",
3+
"force_reject_AA": false,
4+
"use_multimer_design": true,
5+
"design_algorithm": "4stage",
6+
"sample_models": true,
7+
"rm_template_seq_design": true,
8+
"rm_template_seq_predict": true,
9+
"rm_template_sc_design": false,
10+
"rm_template_sc_predict": false,
11+
"predict_initial_guess": true,
12+
"predict_bigbang": false,
13+
"soft_iterations": 75,
14+
"temporary_iterations": 45,
15+
"hard_iterations": 5,
16+
"greedy_iterations": 15,
17+
"greedy_percentage": 5,
18+
"save_design_animations": true,
19+
"save_design_trajectory_plots": true,
20+
"weights_plddt": 0.15,
21+
"weights_pae_intra": 0.4,
22+
"weights_pae_inter": 0.1,
23+
"weights_con_intra": 0.4,
24+
"weights_con_inter": 0.5,
25+
"intra_contact_distance": 14.0,
26+
"inter_contact_distance": 20.0,
27+
"intra_contact_number": 2,
28+
"inter_contact_number": 2,
29+
"weights_helicity": -2.0,
30+
"random_helicity": false,
31+
"use_i_ptm_loss": true,
32+
"weights_iptm": 0.05,
33+
"use_rg_loss": true,
34+
"weights_rg": 0.3,
35+
"use_termini_distance_loss": false,
36+
"weights_termini_loss": 0.1,
37+
"cyclize_peptide": false,
38+
"enable_mpnn": true,
39+
"mpnn_fix_interface": true,
40+
"num_seqs": 20,
41+
"max_mpnn_sequences": 2,
42+
"sampling_temp": 0.1,
43+
"backbone_noise": 0.00,
44+
"model_path": "v_48_020",
45+
"mpnn_weights": "soluble",
46+
"save_mpnn_fasta": false,
47+
"num_recycles_design": 1,
48+
"num_recycles_validation": 3,
49+
"optimise_beta": true,
50+
"optimise_beta_extra_soft": 0,
51+
"optimise_beta_extra_temp": 0,
52+
"optimise_beta_recycles_design": 3,
53+
"optimise_beta_recycles_valid": 3,
54+
"remove_unrelaxed_trajectory": true,
55+
"remove_unrelaxed_complex": true,
56+
"remove_binder_monomer": true,
57+
"zip_animations": true,
58+
"zip_plots": true,
59+
"save_trajectory_pickle": false,
60+
"max_trajectories": false,
61+
"enable_rejection_check": true,
62+
"acceptance_rate": 0.01,
63+
"start_monitoring": 600,
64+
"af_params_dir": "",
65+
"dssp_path": "",
66+
"dalphaball_path": ""
67+
}

0 commit comments

Comments
 (0)