Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions .devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"build": {
"dockerfile": "Dockerfile"
},
"workspaceFolder": "/workspace",
"customizations": {
"vscode": {
"extensions": [
"ms-python.python",
"ms-toolsai.jupyter"
],
"settings": {
"terminal.integrated.defaultProfile.linux": "zsh",
"terminal.integrated.profiles.linux": { "zsh": { "path": "/bin/zsh" } }
}
}
},
"mounts": [
"source=${localWorkspaceFolder},target=/workspace,type=bind"
]
}
2 changes: 2 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*
!requirements*.txt
34 changes: 0 additions & 34 deletions .github/workflows/ci-ipu.yaml

This file was deleted.

16 changes: 9 additions & 7 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,19 @@ jobs:
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- uses: actions/checkout@v3
- name: Install dependencies
- name: Checkout code
uses: actions/checkout@v3

- name: Build Docker Image
run: |
sudo apt-get update
sudo apt-get install -y git
pip install -r requirements-dev.txt
docker build -t unit-scaling-dev:latest .

- name: Run CI
run: ./dev ci
run: docker run --rm -v $(pwd):/workspace unit-scaling-dev:latest ./dev ci

- name: Publish documentation
if: ${{github.ref == 'refs/heads/main'}}
uses: Cecilapp/GitHub-Pages-deploy@v3
env: { GITHUB_TOKEN: "${{ github.token }}" }
with:
build_dir: docs/_build/html
build_dir: docker run --rm -v $(pwd):/workspace unit-scaling-dev:latest docs/_build/html
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this right - I thought it looked like a dir not a command?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's completely mad! No idea what's going on there. Removed

39 changes: 39 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Use PyTorch base image
FROM pytorch/pytorch:latest

# Install additional dependencies
RUN apt-get update && apt-get install -y \
git \
vim \
sudo \
make \
g++ \
zsh \
&& chsh -s /bin/zsh \
&& apt-get clean && rm -rf /var/lib/apt/lists/* # cleanup (smaller image)

# Set working directory
WORKDIR /workspace

# Install Python dependencies
COPY requirements-dev.txt .
RUN pip install -r requirements-dev.txt

# Puts pip install libs on $PATH & sets correct locale
ENV PATH="$PATH:/home/$USERNAME/.local/bin" \
LC_ALL=C.UTF-8

# Configure a non-root user with sudo privileges
ARG USERNAME=developer # Change this to preferred username
ARG USER_UID=1001
ARG USER_GID=$USER_UID
RUN groupadd --gid $USER_GID $USERNAME \
&& useradd --uid $USER_UID --gid $USER_GID -m $USERNAME \
&& echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \
&& chmod 0440 /etc/sudoers.d/$USERNAME
USER $USERNAME

# Creates basic .zshrc
RUN sudo cp /etc/zsh/newuser.zshrc.recommended /home/$USERNAME/.zshrc

CMD ["/bin/zsh"]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why zsh, out of interest? Just personal preference? (No objection, just curious.)

32 changes: 1 addition & 31 deletions analysis/almost_scaled_dot_product_attention/demo_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,6 @@
from torch import nn, Tensor
import tqdm

try:
import poptorch

poptorch_available = True
except ModuleNotFoundError:
poptorch_available = False


class Config(dict):
def __init__(self, *args: Any, **kwargs: Any):
Expand Down Expand Up @@ -132,7 +125,7 @@ def forward(self, indices: Tensor) -> Tensor:
)


def train_cpu() -> Tensor:
def train() -> Tensor:
model = Model()
opt = torch.optim.Adam(model.parameters(), lr=CONFIG.lr)
losses = []
Expand All @@ -143,26 +136,3 @@ def train_cpu() -> Tensor:
opt.step()
losses.append(float(loss))
return torch.tensor(losses)


def train_ipu() -> Tensor:
model = Model()
options = poptorch.Options()
options.showCompilationProgressBar(False)
opt = torch.optim.Adam(model.parameters(), lr=CONFIG.lr)
session = poptorch.trainingModel(model, options, opt)
try:
return torch.tensor(
[
float(session(batch.int()))
for batch in tqdm.tqdm(
islice(batches(), CONFIG.steps), total=CONFIG.steps
)
]
)
finally:
session.destroy()


def train() -> Tensor:
return train_ipu() if poptorch_available else train_cpu()
2 changes: 1 addition & 1 deletion docs/development.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ For users who wish to develop using this codebase, the following setup is requir
python3 -m venv .venv
echo "export PYTHONPATH=\${PYTHONPATH}:\$(dirname \${VIRTUAL_ENV})" >> .venv/bin/activate
source .venv/bin/activate
pip install -r requirements-dev.txt # Or requirements-dev-ipu.txt for the ipu
pip install -r requirements-dev.txt
```

**Subsequent setup**:
Expand Down
12 changes: 0 additions & 12 deletions requirements-dev-ipu.txt

This file was deleted.

42 changes: 28 additions & 14 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,28 @@
-r requirements.txt
black==24.3.0
flake8==6.0.0
isort==5.12.0
mypy==1.2.0
myst-parser==1.0.0
pandas-stubs==2.0.2.230605
pytest==7.3.1
pytest-cov==4.0.0
sphinx==6.2.1
sphinx-rtd-theme==1.2.0
transformers==4.38.0
types-Pygments==2.15.0.0
types-tabulate==0.9.0.2
# Look in pytorch-cpu first, then pypi second
--index-url https://download.pytorch.org/whl/cpu
--extra-index-url=https://pypi.org/simple

# Same as requirements.txt, but with versions locked-in
datasets==3.1.0
docstring-parser==0.16
einops==0.8.0
numpy==2.1.3
seaborn==0.13.2
tabulate==0.9.0
torch==2.5.1+cpu

# Additional dev requirements
black==24.10.0
flake8==7.1.1
isort==5.13.2
mypy==1.13.0
myst-parser==4.0.0
pandas-stubs==2.2.3.241009
pytest==8.3.3
pytest-cov==6.0.0
sphinx==8.1.3
sphinx-rtd-theme==3.0.1
transformers==4.46.1
triton==3.1.0
types-Pygments==2.18.0.20240506
types-tabulate==0.9.0.20240106
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
datasets
docstring-parser
einops
numpy<2.0
numpy
seaborn
tabulate
torch>=2.2
6 changes: 0 additions & 6 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,6 @@ show_error_codes = true
strict = true
check_untyped_defs = true

[mypy-poptorch.*]
ignore_missing_imports = True

[mypy-poptorch_experimental_addons.*]
ignore_missing_imports = True

# As torch.fx doesn't explicitly export many of its useful modules.
[mypy-torch.fx]
implicit_reexport = True
Expand Down
9 changes: 0 additions & 9 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,6 @@
import setuptools

requirements = Path("requirements.txt").read_text().rstrip("\n").split("\n")
try:
import poptorch

# This should match requirements-dev-ipu.txt
requirements.append(
"poptorch-experimental-addons @ git+https://github.com/graphcore-research/poptorch-experimental-addons@beb12678d1e7ea2c033bd061d32167be262dfa58"
)
except ImportError:
pass

version = re.search("__version__ = \"(.+)\"", Path("unit_scaling/_version.py").read_text()).group(1)

Expand Down
4 changes: 2 additions & 2 deletions unit_scaling/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
import matplotlib.colors
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns # type: ignore[import]
from datasets import load_dataset # type: ignore[import]
import seaborn as sns # type: ignore[import-untyped]
from datasets import load_dataset # type: ignore[import-untyped]
from torch import Tensor, nn
from torch.fx.graph import Graph
from torch.fx.node import Node
Expand Down
30 changes: 0 additions & 30 deletions unit_scaling/formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,6 @@

from ._internal_utils import generate__all__

try: # pragma: no cover
import poptorch
import poptorch_experimental_addons as pea

_poptorch_available = True
except ImportError: # pragma: no cover
_poptorch_available = False

Shape = Tuple[int, ...]


Expand Down Expand Up @@ -68,14 +60,6 @@ def min_absolute_subnormal(self) -> float:

def quantise(self, x: Tensor) -> Tensor:
"""Non-differentiably quantise the given tensor in this format."""
if _poptorch_available and poptorch.isRunningOnIpu():
return pea.quantise_fpx( # type: ignore[no-any-return]
x,
exponent_bits=self.exponent_bits,
mantissa_bits=self.mantissa_bits,
rounding=self.rounding,
) # pragma: no cover

absmax = self.max_absolute_value
downscale = 2.0 ** (127 - 2 ** (self.exponent_bits - 1))
mask = torch.tensor(2 ** (23 - self.mantissa_bits) - 1, device=x.device)
Expand Down Expand Up @@ -108,13 +92,6 @@ def quantise(self, x: Tensor) -> Tensor:

def quantise_fwd(self, x: Tensor) -> Tensor:
"""Quantise the given tensor in the forward pass only."""
if _poptorch_available and poptorch.isRunningOnIpu():
return pea.quantise_fpx_ste( # type: ignore[no-any-return]
x,
exponent_bits=self.exponent_bits,
mantissa_bits=self.mantissa_bits,
rounding=self.rounding,
) # pragma: no cover

class QuantiseForward(torch.autograd.Function):
@staticmethod
Expand All @@ -131,13 +108,6 @@ def backward( # type:ignore[override]

def quantise_bwd(self, x: Tensor) -> Tensor:
"""Quantise the given tensor in the backward pass only."""
if _poptorch_available and poptorch.isRunningOnIpu():
return pea.quantise_fpx_grad( # type: ignore[no-any-return]
x,
exponent_bits=self.exponent_bits,
mantissa_bits=self.mantissa_bits,
rounding=self.rounding,
) # pragma: no cover

class QuantiseBackward(torch.autograd.Function):
@staticmethod
Expand Down
10 changes: 5 additions & 5 deletions unit_scaling/optim.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@ def scaled_parameters(
result = []
for entry in params:
group = dict(params=[entry]) if isinstance(entry, Tensor) else entry.copy()
group.setdefault("lr", lr)
group.setdefault("weight_decay", weight_decay)
group.setdefault("lr", lr) # type: ignore[arg-type]
group.setdefault("weight_decay", weight_decay) # type: ignore[arg-type]
if group["lr"] is None:
raise ValueError(
"scaled_params() requires lr to be provided,"
Expand All @@ -133,10 +133,10 @@ def scaled_parameters(
for param in group["params"]:
# Careful not to overwrite `lr` or `weight_decay`
param_lr = group["lr"]
if has_parameter_data(param):
if has_parameter_data(param): # type: ignore[arg-type]
if isinstance(param_lr, Tensor):
param_lr = param_lr.clone()
param_lr *= lr_scale_func(param)
param_lr *= lr_scale_func(param) # type: ignore[operator]
elif not allow_non_unit_scaling_params:
raise ValueError(
"Non-unit-scaling parameter (no mup_type),"
Expand All @@ -145,7 +145,7 @@ def scaled_parameters(
param_weight_decay = group["weight_decay"]
if independent_weight_decay:
# Note: only independent of peak LR, not of schedule
param_weight_decay /= float(param_lr)
param_weight_decay /= float(param_lr) # type: ignore

result.append(
dict(
Expand Down
Loading
Loading