diff --git a/README.md b/README.md index 458b012..801288b 100644 --- a/README.md +++ b/README.md @@ -205,6 +205,15 @@ The downloaded dataset can be placed in the `/data` folder. The overall director

+### :floppy_disk: Custom Data Preparation + +If you want to fine-tune on your own dataset (non-RLDS), you can modify `vla-scripts/finetune.py`. +We provided a commented block in `finetune.py` (around line 932) that demonstrates how to swap the RLDS dataset for a standard PyTorch Dataset. +You will need to implement a dataset class that returns the appropriate dictionary format (input_ids, pixel_values, labels, etc.). + +
+
+ ## ⚓ VLM backbone We use the `Prismatic-VLMs` architecture. Since the file is large, please download it from [here](https://huggingface.co/Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b). Then put it in the `/pretrained_models` folder. The file structure is: diff --git a/experiments/robot/libero/run_libero_eval.py b/experiments/robot/libero/run_libero_eval.py index cb84517..cdcef96 100644 --- a/experiments/robot/libero/run_libero_eval.py +++ b/experiments/robot/libero/run_libero_eval.py @@ -17,7 +17,13 @@ import draccus import numpy as np import tqdm -from libero.libero import benchmark +try: + from libero.libero import benchmark +except ImportError: + print("ERROR: Failed to import 'libero'. Please ensure you have installed the LIBERO benchmark dependencies.") + print("See README.md for installation instructions: https://github.com/Lifelong-Robot-Learning/LIBERO") + print("Run: pip install -e LIBERO and pip install -r experiments/robot/libero/libero_requirements.txt") + sys.exit(1) import wandb diff --git a/prismatic/vla/datasets/rlds/dataset.py b/prismatic/vla/datasets/rlds/dataset.py index f07215a..ebd951f 100644 --- a/prismatic/vla/datasets/rlds/dataset.py +++ b/prismatic/vla/datasets/rlds/dataset.py @@ -563,6 +563,7 @@ def make_interleaved_dataset( # Validation =>> fix a single shuffle buffer of data and cache it in RAM; prevents gradual memory increase! if not train: dataset = dataset.take(shuffle_buffer_size).cache() + dataset_len = min(dataset_len, shuffle_buffer_size) # Shuffle the Dataset # =>> IMPORTANT :: Shuffle AFTER .cache(), or else memory will still leak! diff --git a/pyproject.toml b/pyproject.toml index 245d9b3..7941e87 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,9 +44,9 @@ dependencies = [ "sentencepiece==0.1.99", "timm==0.9.10", "tokenizers==0.19.1", - "torch==2.2.0", - "torchvision==0.17.0", - "torchaudio==2.2.0", + "torch>=2.2.0", + "torchvision>=0.17.0", + "torchaudio>=2.2.0", "transformers @ git+https://github.com/moojink/transformers-openvla-oft.git", # IMPORTANT: Use this fork for bidirectional attn (for parallel decoding) "wandb", "tensorflow==2.15.0", diff --git a/vla-scripts/finetune.py b/vla-scripts/finetune.py index 03263c1..7fd1d4d 100644 --- a/vla-scripts/finetune.py +++ b/vla-scripts/finetune.py @@ -88,7 +88,7 @@ class FinetuneConfig: # Training configuration batch_size: int = 8 # Batch size per device (total batch size = batch_size * num GPUs) - learning_rate: float = 5e-4 # Learning rate + learning_rate: float = 2e-4 # Learning rate lr_warmup_steps: int = 0.1 # Number of steps to warm up learning rate (from 10% to 100%) num_steps_before_decay: int = 100000 # Number of steps before LR decays by 10x grad_accumulation_steps: int = 1 # Number of gradient accumulation steps @@ -991,6 +991,7 @@ def rename_state_dict_keys(state_dict, replace_map): sampler=None, collate_fn=collator, num_workers=0, # Important: Set to 0 if using RLDS, which uses its own parallelism + pin_memory=True, ) print('Len of dataloader: ', len(dataloader)) if cfg.use_val_set: @@ -1001,6 +1002,7 @@ def rename_state_dict_keys(state_dict, replace_map): sampler=None, collate_fn=collator, num_workers=0, # Important: Set to 0 if using RLDS, which uses its own parallelism + pin_memory=True, ) # Deque to store recent train metrics (used for computing smoothened metrics for gradient accumulation)