onnx
diff --git a/‎.github/workflows/test_lemonade.yml‎
Lines changed: 10 additions & 2 deletions b/‎.github/workflows/test_lemonade.yml‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎.github/workflows/test_lemonade_oga_cpu.yml‎
Lines changed: 8 additions & 1 deletion b/‎.github/workflows/test_lemonade_oga_cpu.yml‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎.github/workflows/test_turnkey.yml‎
Lines changed: 16 additions & 47 deletions b/‎.github/workflows/test_turnkey.yml‎
Lines changed: 16 additions & 47 deletions
diff --git a/‎README.md‎
Lines changed: 3 additions & 3 deletions b/‎README.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/code.md‎
Lines changed: 5 additions & 5 deletions b/‎docs/code.md‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎docs/contribute.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/contribute.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/lemonade_getting_started.md‎ renamed to ‎docs/lemonade/getting_started.md‎
Lines changed: 13 additions & 3 deletions b/‎docs/lemonade_getting_started.md‎ renamed to ‎docs/lemonade/getting_started.md‎
Lines changed: 13 additions & 3 deletions
diff --git a/‎docs/humaneval_accuracy.md‎ renamed to ‎docs/lemonade/humaneval_accuracy.md‎ b/‎docs/humaneval_accuracy.md‎ renamed to ‎docs/lemonade/humaneval_accuracy.md‎
diff --git a/‎docs/llamacpp.md‎ renamed to ‎docs/lemonade/llamacpp.md‎ b/‎docs/llamacpp.md‎ renamed to ‎docs/lemonade/llamacpp.md‎
diff --git a/‎docs/mmlu_accuracy.md‎ renamed to ‎docs/lemonade/mmlu_accuracy.md‎ b/‎docs/mmlu_accuracy.md‎ renamed to ‎docs/lemonade/mmlu_accuracy.md‎
@@ -45,6 +45,7 @@ jobs:
         shell: bash -el {0}
         run: |
           pylint src/lemonade --rcfile .pylintrc --disable E0401
+          pylint examples --rcfile .pylintrc --disable E0401,E0611 --jobs=1
       - name: Test HF+CPU server
         if: runner.os == 'Windows'
         timeout-minutes: 10
@@ -55,7 +56,14 @@ jobs:
       - name: Run lemonade tests
         shell: bash -el {0}
         run: |
-          lemonade -i facebook/opt-125m huggingface-load llm-prompt -p "hi" --max-new-tokens 10
-          python test/llm_api.py
+          # Test CLI
+          lemonade -m -i facebook/opt-125m huggingface-load llm-prompt -p "hi" --max-new-tokens 10
           
+          # Test low-level APIs
+          python test/lemonade/llm_api.py
+          
+
+          # Test high-level LEAP APIs
+          python examples/lemonade/leap_basic.py
+          python examples/lemonade/leap_streaming.py
 
@@ -55,6 +55,13 @@ jobs:
         env:
           HF_TOKEN: "${{ secrets.HUGGINGFACE_ACCESS_TOKEN }}" # Required by OGA model_builder in OGA 0.4.0 but not future versions
         run: |
+          # Test CLI
           lemonade -i TinyPixel/small-llama2 oga-load --device cpu --dtype int4 llm-prompt -p "tell me a story" --max-new-tokens 5
-          python test/oga_cpu_api.py
+
+          # Test low-level APIs
+          python test/lemonade/oga_cpu_api.py
+
+          # Test high-level LEAP APIs
+          python examples/lemonade/leap_oga_cpu.py
+          python examples/lemonade/leap_oga_cpu_streaming.py
 
@@ -8,6 +8,11 @@ on:
     branches: ["main", "canary", "refresh"]
   pull_request:
     branches: ["main", "canary", "refresh"]
+    paths:
+    - src/turnkeyml/**
+    - test/turnkey/**
+    - examples/turnkey/**
+    - .github/workflows/test_turnkey.yml
 
 permissions:
   contents: read
@@ -50,68 +55,32 @@ jobs:
         shell: bash -el {0}
         run: |
           # Unit tests
-          python test/unit.py
+          python test/turnkey/unit.py
 
           # turnkey examples
           # Note: we clear the default cache location prior to each block of example runs
           rm -rf ~/.cache/turnkey
-          python examples/api/onnx_opset.py --onnx-opset 15
-          python examples/api/loading_a_build.py
+          python examples/turnkey/api/onnx_opset.py --onnx-opset 15
+          python examples/turnkey/api/loading_a_build.py
 
           rm -rf ~/.cache/turnkey
-          turnkey -i examples/cli/scripts/hello_world.py discover export-pytorch benchmark
+          turnkey -i examples/turnkey/cli/scripts/hello_world.py discover export-pytorch benchmark
           rm -rf ~/.cache/turnkey
-          turnkey -i examples/cli/scripts/multiple_invocations.py discover export-pytorch benchmark
+          turnkey -i examples/turnkey/cli/scripts/multiple_invocations.py discover export-pytorch benchmark
           rm -rf ~/.cache/turnkey
-          turnkey -i examples/cli/scripts/max_depth.py discover --max-depth 1 export-pytorch benchmark
+          turnkey -i examples/turnkey/cli/scripts/max_depth.py discover --max-depth 1 export-pytorch benchmark
           rm -rf ~/.cache/turnkey
-          turnkey -i examples/cli/scripts/two_models.py discover export-pytorch benchmark
+          turnkey -i examples/turnkey/cli/scripts/two_models.py discover export-pytorch benchmark
           rm -rf ~/.cache/turnkey
-          turnkey -i examples/cli/onnx/hello_world.onnx load-onnx benchmark
+          turnkey -i examples/turnkey/cli/onnx/hello_world.onnx load-onnx benchmark
 
           # E2E tests
-          cd test/
+          cd test/turnkey
           python cli.py
           python analysis.py
       - name: Test example plugins
         shell: bash -el {0}
         run: |
           rm -rf ~/.cache/turnkey
-          pip install -e examples/cli/plugins/example_tool
-          turnkey -i examples/cli/scripts/hello_world.py discover export-pytorch example-plugin-tool benchmark
-      # - name: Install and Start Slurm
-      #   if: runner.os != 'Windows'
-      #   shell: bash -el {0}
-      #   run: |
-      #     sudo apt update -y
-      #     sudo apt install slurm-wlm -y
-      #     cp test/helpers/slurm.conf test/helpers/slurm_modified.conf
-      #     sed -i "s/YOUR_HOSTNAME_HERE/$HOSTNAME/" test/helpers/slurm_modified.conf
-      #     sudo mv test/helpers/slurm_modified.conf /etc/slurm/slurm.conf
-      #     sudo service slurmd start
-      #     sudo service slurmctld start
-      #     sudo service munge start
-      # - name: Test turnkey on Slurm
-      #   if: runner.os != 'Windows'
-      #   shell: bash -el {0}
-      #   run: |
-      #     # Create conda environment for Slurm using srun (sbatch + wait)
-      #     export SKIP_REQUIREMENTS_INSTALL="True"
-      #     export TORCH_CPU="True"
-      #     srun src/turnkeyml/cli/setup_venv.sh
-
-      #     # Run tests on Slurm
-      #     export TURNKEY_SLURM_USE_DEFAULT_MEMORY="True"
-      #     turnkey -i models/selftest/linear.py --use-slurm --cache-dir local_cache discover export-pytorch
-      #     bash test/helpers/check_slurm_output.sh slurm-2.out
-
-      # Below tests are commented out as the GitHub runner runs out of space installing the requirements
-      # - name: Check installation of requirements.txt and their compatibility with turnkey
-      #   shell: bash -el {0}
-      #   run: |
-      #     conda create --name test-requirements python=3.8
-      #     conda activate test-requirements
-      #     pip install -r models/requirements.txt
-      #     python -m pip check
-      #     python -c "import torch_geometric"
-      #     conda deactivate
+          pip install -e examples/turnkey/cli/plugins/example_tool
+          turnkey -i examples/turnkey/cli/scripts/hello_world.py discover export-pytorch example-plugin-tool benchmark
@@ -7,10 +7,10 @@
 
 We are on a mission to make it easy to use the most important tools in the ONNX ecosystem. TurnkeyML accomplishes this by providing no-code CLIs and low-code APIs for both general ONNX workflows with `turnkey` as well as LLMs with `lemonade`.
 
-|                     [**Lemonade**](https://github.com/onnx/turnkeyml/tree/main/src/turnkeyml/llm)                    	|                            [**Turnkey**](https://github.com/onnx/turnkeyml/blob/main/docs/classic_getting_started.md)                                	|
+|                     [**Lemonade**](https://github.com/onnx/turnkeyml/tree/main/src/turnkeyml/llm)                    	|                            [**Turnkey**](https://github.com/onnx/turnkeyml/blob/main/docs/turnkey/getting_started.md)                                	|
 |:----------------------------------------------:	|:-----------------------------------------------------------------:	|
-| Serve and benchmark LLMs on CPU, GPU, and NPU. <br/>	[Click here to get started with `lemonade`.](https://github.com/onnx/turnkeyml/blob/main/docs/lemonade_getting_started.md) | Export and optimize ONNX models for CNNs and Transformers. <br/>	[Click here to get started with `turnkey`.](https://github.com/onnx/turnkeyml/blob/main/docs/classic_getting_started.md)	|
-| <img src="img/llm_demo.png"/> | <img src="img/classic_demo.png"/> |
+| Serve and benchmark LLMs on CPU, GPU, and NPU. <br/>	[Click here to get started with `lemonade`.](https://github.com/onnx/turnkeyml/blob/main/docs/lemonade/getting_started.md) | Export and optimize ONNX models for CNNs and Transformers. <br/>	[Click here to get started with `turnkey`.](https://github.com/onnx/turnkeyml/blob/main/docs/turnkey/getting_started.md)	|
+| <img src="https://github.com/onnx/turnkeyml/blob/main/img/llm_demo.png?raw=true"/> | <img src="https://github.com/onnx/turnkeyml/blob/main/img/classic_demo.png?raw=true"/> |
 
 
 ## How It Works
 
@@ -5,9 +5,9 @@
 The TurnkeyML source code has a few major top-level directories:
 - `docs`: documentation for the entire project.
 - `examples`: example scripts for use with the TurnkeyML tools.
-  - `examples/cli`: tutorial series starting in `examples/cli/readme.md` to help learn the `turnkey` CLI.
-    - `examples/cli/scripts`: example scripts that can be fed as input into the `turnkey` CLI. These scripts each have a docstring that recommends one or more `turnkey` CLI commands to try out.
-  - `examples/api`: examples scripts that invoke `Tools` via APIs.
+  - `examples/turnkey/cli`: tutorial series starting in `examples/turnkey/cli/readme.md` to help learn the `turnkey` CLI.
+    - `examples/turnkey/cli/scripts`: example scripts that can be fed as input into the `turnkey` CLI. These scripts each have a docstring that recommends one or more `turnkey` CLI commands to try out.
+  - `examples/turnkey/api`: examples scripts that invoke `Tools` via APIs.
 - `models`: the corpora of models that makes up the TurnkeyML models (see [the models readme](https://github.com/onnx/turnkeyml/blob/main/models/readme.md)).
   - Each subdirectory under `models` represents a corpus of models pulled from somewhere on the internet. For example, `models/torch_hub` is a corpus of models from [Torch Hub](https://github.com/pytorch/hub).
 - `src/turnkeyml`: source code for the TurnkeyML package.
@@ -20,8 +20,8 @@ The TurnkeyML source code has a few major top-level directories:
   - `src/turnkeyml/state.py`: implements the `State` class.
   - `src/turnkeyml/files_api.py`: implements the `evaluate_files()` API, which is the top-level API called by the CLI.
 - `test`: tests for the TurnkeyML tools.
-  - `test/analysis.py`: tests focusing on the `discover` `Tool`.
-  - `test/cli.py`: tests focusing on top-level CLI features.
+  - `test/turnkey/analysis.py`: tests focusing on the `discover` `Tool`.
+  - `test/turnkey/cli.py`: tests focusing on top-level CLI features.
 
 ## Tool Classes
 
 
@@ -88,7 +88,7 @@ We require the following naming scheme:
 
 ### Example
 
-See the [example_tool](https://github.com/onnx/turnkeyml/tree/main/examples/cli/plugins/example_tool) plugin for an example.
+See the [example_tool](https://github.com/onnx/turnkeyml/tree/main/examples/turnkey/cli/plugins/example_tool) plugin for an example.
 
 The `__init__.py` file with its `implements` dictionary looks like:
 
 
@@ -64,6 +64,16 @@ That command will run a few warmup iterations, then a few generation iterations
 
 The prompt size, number of output tokens, and number iterations are all parameters. Learn more by running `lemonade huggingface-bench -h`.
 
+## Memory Usage
+
+The peak memory used by the lemonade build is captured in the build output.  To capture more granular
+memory usage information, use the `--memory` flag.  For example:
+
+`lemonade -i facebook/opt-125m --memory huggingface-load huggingface-bench`
+
+In this case a `memory_usage.png` file will be generated and stored in the build folder.  This file
+contains a figure plotting the memory usage over the build time.  Learn more by running `lemonade -h`.
+
 ## Serving
 
 You can launch a WebSocket server for your LLM with:
@@ -111,9 +121,9 @@ You can also try Phi-3-Mini-128k-Instruct with the following commands:
 
 `lemonade -i microsoft/Phi-3-mini-4k-instruct oga-load --device igpu --dtype int4 serve`
 
-You can learn more about the CPU and iGPU support in our [OGA documentation](https://github.com/onnx/turnkeyml/blob/main/docs/ort_genai_igpu.md).
+You can learn more about the CPU and iGPU support in our [OGA documentation](https://github.com/onnx/turnkeyml/blob/main/docs/lemonade/ort_genai_igpu.md).
 
-> Note: early access to AMD's RyzenAI NPU is also available. See the [RyzenAI NPU OGA documentation](https://github.com/onnx/turnkeyml/blob/main/docs/ort_genai_npu.md) for more information.
+> Note: early access to AMD's RyzenAI NPU is also available. See the [RyzenAI NPU OGA documentation](https://github.com/onnx/turnkeyml/blob/main/docs/lemonade/ort_genai_npu.md) for more information.
 
 ## Install RyzenAI NPU for PyTorch
 
@@ -131,7 +141,7 @@ If you decide to contribute, please:
 
 - do so via a pull request.
 - write your code in keeping with the same style as the rest of this repo's code.
-- add a test under `test/llm_api.py` that provides coverage of your new feature.
+- add a test under `test/lemonade/llm_api.py` that provides coverage of your new feature.
 
 The best way to contribute is to add new tools to cover more devices and usage scenarios.