diff --git a/README.md b/README.md
index 458b012..801288b 100644
--- a/README.md
+++ b/README.md
@@ -205,6 +205,15 @@ The downloaded dataset can be placed in the `/data` folder. The overall director
+### :floppy_disk: Custom Data Preparation
+
+If you want to fine-tune on your own dataset (non-RLDS), you can modify `vla-scripts/finetune.py`.
+We provided a commented block in `finetune.py` (around line 932) that demonstrates how to swap the RLDS dataset for a standard PyTorch Dataset.
+You will need to implement a dataset class that returns the appropriate dictionary format (input_ids, pixel_values, labels, etc.).
+
+
+
+
## ⚓ VLM backbone
We use the `Prismatic-VLMs` architecture. Since the file is large, please download it from [here](https://huggingface.co/Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b). Then put it in the `/pretrained_models` folder. The file structure is:
diff --git a/experiments/robot/libero/run_libero_eval.py b/experiments/robot/libero/run_libero_eval.py
index cb84517..cdcef96 100644
--- a/experiments/robot/libero/run_libero_eval.py
+++ b/experiments/robot/libero/run_libero_eval.py
@@ -17,7 +17,13 @@
import draccus
import numpy as np
import tqdm
-from libero.libero import benchmark
+try:
+ from libero.libero import benchmark
+except ImportError:
+ print("ERROR: Failed to import 'libero'. Please ensure you have installed the LIBERO benchmark dependencies.")
+ print("See README.md for installation instructions: https://github.com/Lifelong-Robot-Learning/LIBERO")
+ print("Run: pip install -e LIBERO and pip install -r experiments/robot/libero/libero_requirements.txt")
+ sys.exit(1)
import wandb
diff --git a/prismatic/vla/datasets/rlds/dataset.py b/prismatic/vla/datasets/rlds/dataset.py
index f07215a..ebd951f 100644
--- a/prismatic/vla/datasets/rlds/dataset.py
+++ b/prismatic/vla/datasets/rlds/dataset.py
@@ -563,6 +563,7 @@ def make_interleaved_dataset(
# Validation =>> fix a single shuffle buffer of data and cache it in RAM; prevents gradual memory increase!
if not train:
dataset = dataset.take(shuffle_buffer_size).cache()
+ dataset_len = min(dataset_len, shuffle_buffer_size)
# Shuffle the Dataset
# =>> IMPORTANT :: Shuffle AFTER .cache(), or else memory will still leak!
diff --git a/pyproject.toml b/pyproject.toml
index 245d9b3..7941e87 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,9 +44,9 @@ dependencies = [
"sentencepiece==0.1.99",
"timm==0.9.10",
"tokenizers==0.19.1",
- "torch==2.2.0",
- "torchvision==0.17.0",
- "torchaudio==2.2.0",
+ "torch>=2.2.0",
+ "torchvision>=0.17.0",
+ "torchaudio>=2.2.0",
"transformers @ git+https://github.com/moojink/transformers-openvla-oft.git", # IMPORTANT: Use this fork for bidirectional attn (for parallel decoding)
"wandb",
"tensorflow==2.15.0",
diff --git a/vla-scripts/finetune.py b/vla-scripts/finetune.py
index 03263c1..7fd1d4d 100644
--- a/vla-scripts/finetune.py
+++ b/vla-scripts/finetune.py
@@ -88,7 +88,7 @@ class FinetuneConfig:
# Training configuration
batch_size: int = 8 # Batch size per device (total batch size = batch_size * num GPUs)
- learning_rate: float = 5e-4 # Learning rate
+ learning_rate: float = 2e-4 # Learning rate
lr_warmup_steps: int = 0.1 # Number of steps to warm up learning rate (from 10% to 100%)
num_steps_before_decay: int = 100000 # Number of steps before LR decays by 10x
grad_accumulation_steps: int = 1 # Number of gradient accumulation steps
@@ -991,6 +991,7 @@ def rename_state_dict_keys(state_dict, replace_map):
sampler=None,
collate_fn=collator,
num_workers=0, # Important: Set to 0 if using RLDS, which uses its own parallelism
+ pin_memory=True,
)
print('Len of dataloader: ', len(dataloader))
if cfg.use_val_set:
@@ -1001,6 +1002,7 @@ def rename_state_dict_keys(state_dict, replace_map):
sampler=None,
collate_fn=collator,
num_workers=0, # Important: Set to 0 if using RLDS, which uses its own parallelism
+ pin_memory=True,
)
# Deque to store recent train metrics (used for computing smoothened metrics for gradient accumulation)