diff --git a/tutorials/Makefile b/tutorials/Makefile index f52fca3..823023b 100644 --- a/tutorials/Makefile +++ b/tutorials/Makefile @@ -12,8 +12,9 @@ help: @echo " merge-notebooks Merge notebook files into single files for Colab" @echo " build Build the jupyter book" @echo " push-pages Push built book to GitHub Pages" + @echo " preview Preview the book locally" -.PHONY: help Makefile clean merge-notebooks build push-pages +.PHONY: help Makefile clean merge-notebooks build push-pages preview clean: rm -rf $(BUILDDIR)/ diff --git a/tutorials/notebooks/shortclips/05_fit_wordnet_model.ipynb b/tutorials/notebooks/shortclips/05_fit_wordnet_model.ipynb index 92b451b..2a8c034 100644 --- a/tutorials/notebooks/shortclips/05_fit_wordnet_model.ipynb +++ b/tutorials/notebooks/shortclips/05_fit_wordnet_model.ipynb @@ -122,14 +122,14 @@ "outputs": [], "source": [ "from scipy.stats import zscore\n", + "from voxelwise_tutorials.utils import zscore_runs\n", "\n", "# indice of first sample of each run\n", "run_onsets = load_hdf5_array(file_name, key=\"run_onsets\")\n", "print(run_onsets)\n", "\n", "# zscore each training run separately\n", - "Y_train = np.split(Y_train, run_onsets[1:])\n", - "Y_train = np.concatenate([zscore(run, axis=0) for run in Y_train], axis=0)\n", + "Y_train = zscore_runs(Y_train, run_onsets)\n", "# zscore each test run separately\n", "Y_test = zscore(Y_test, axis=1)" ] @@ -156,7 +156,6 @@ "source": [ "Y_test = Y_test.mean(0)\n", "# We need to zscore the test data again, because we took the mean across repetitions.\n", - "# This averaging step makes the standard deviation approximately equal to 1/sqrt(n_repeats)\n", "Y_test = zscore(Y_test, axis=0)\n", "\n", "print(\"(n_samples_test, n_voxels) =\", Y_test.shape)" @@ -799,7 +798,7 @@ "Similarly to {cite:t}`huth2012`, we correct the coefficients of features linked by a\n", "semantic relationship. When building the wordnet features, if a frame was\n", "labeled with `wolf`, the authors automatically added the semantically linked\n", - "categories `canine`, `carnivore`, `placental mammal`, `mamma`, `vertebrate`,\n", + "categories `canine`, `carnivore`, `placental mammal`, `mammal`, `vertebrate`,\n", "`chordate`, `organism`, and `whole`. The authors thus argue that the same\n", "correction needs to be done on the coefficients.\n", "\n" diff --git a/tutorials/notebooks/shortclips/06_visualize_hemodynamic_response.ipynb b/tutorials/notebooks/shortclips/06_visualize_hemodynamic_response.ipynb index 8bd4efe..49190cd 100644 --- a/tutorials/notebooks/shortclips/06_visualize_hemodynamic_response.ipynb +++ b/tutorials/notebooks/shortclips/06_visualize_hemodynamic_response.ipynb @@ -84,6 +84,7 @@ "import numpy as np\n", "from scipy.stats import zscore\n", "from voxelwise_tutorials.io import load_hdf5_array\n", + "from voxelwise_tutorials.utils import zscore_runs\n", "\n", "file_name = os.path.join(directory, \"responses\", f\"{subject}_responses.hdf\")\n", "Y_train = load_hdf5_array(file_name, key=\"Y_train\")\n", @@ -96,8 +97,7 @@ "run_onsets = load_hdf5_array(file_name, key=\"run_onsets\")\n", "\n", "# zscore each training run separately\n", - "Y_train = np.split(Y_train, run_onsets[1:])\n", - "Y_train = np.concatenate([zscore(run, axis=0) for run in Y_train], axis=0)\n", + "Y_train = zscore_runs(Y_train, run_onsets)\n", "# zscore each test run separately\n", "Y_test = zscore(Y_test, axis=1)" ] @@ -287,14 +287,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Intermission: understanding delays\n", + "## Understanding delays\n", "\n", "To have an intuitive understanding of what we accomplish by delaying the\n", "features before model fitting, we will simulate one voxel and a single\n", "feature. We will then create a ``Delayer`` object (which was used in the\n", - "previous pipeline) and visualize its effect on our single feature. Let's\n", - "start by simulating the data.\n", - "\n" + "previous pipeline) and visualize its effect on our single feature. \n", + "\n", + "Let's start by simulating the data. We assume a simple scenario in which an event in\n", + "our experiment occurred at $t = 20$ seconds and lasted for 10 seconds. For each timepoint, our simulated feature\n", + "is a simple variable that indicates whether the event occurred or not." ] }, { @@ -305,71 +307,83 @@ }, "outputs": [], "source": [ - "# number of total trs\n", - "n_trs = 50\n", - "# repetition time for the simulated data\n", - "TR = 2.0\n", - "rng = np.random.RandomState(42)\n", - "y = rng.randn(n_trs)\n", - "x = np.zeros(n_trs)\n", - "# add some arbitrary value to our feature\n", - "x[15:20] = 0.5\n", - "x += rng.randn(n_trs) * 0.1 # add some noise\n", + "from voxelwise_tutorials.delays_toy import create_voxel_data\n", "\n", - "# create a delayer object and delay the features\n", - "delayer = Delayer(delays=[0, 1, 2, 3, 4])\n", - "x_delayed = delayer.fit_transform(x[:, None])" + "# simulate an activation pulse at 20 s for 10 s of duration\n", + "simulated_X, simulated_Y, times = create_voxel_data(onset=20, duration=10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We next plot the simulated data. In this toy example, we assumed a \"canonical\" \n", + "hemodynamic response function (HRF) (a double gamma function). This is an idealized\n", + "HRF that is often used in the literature to model the BOLD response. In practice, \n", + "however, the HRF can vary significantly across brain areas.\n", + "\n", + "Because of the HRF, notice that even though the event occurred at $t = 20$ seconds, \n", + "the BOLD response is delayed in time. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "from voxelwise_tutorials.delays_toy import plot_delays_toy\n", + "\n", + "plot_delays_toy(simulated_X, simulated_Y, times)\n", + "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "In the next cell we are plotting six lines. The subplot at the top shows the\n", - "simulated BOLD response, while the other subplots show the simulated feature\n", - "at different delays. The effect of the delayer is clear: it creates multiple\n", + "We next create a `Delayer` object and use it to delay the simulated feature. \n", + "The effect of the delayer is clear: it creates multiple\n", "copies of the original feature shifted forward in time by how many samples we\n", "requested (in this case, from 0 to 4 samples, which correspond to 0, 2, 4, 6,\n", "and 8 s in time with a 2 s TR).\n", "\n", "When these delayed features are used to fit a voxelwise encoding model, the\n", "brain response $y$ at time $t$ is simultaneously modeled by the\n", - "feature $x$ at times $t-0, t-2, t-4, t-6, t-8$. In the remaining\n", - "of this example we will see that this method improves model prediction\n", - "accuracy and it allows to account for the underlying shape of the hemodynamic\n", - "response function.\n", - "\n" + "feature $x$ at times $t-0, t-2, t-4, t-6, t-8$. For example, the time sample highlighted\n", + "in the plot below ($t = 30$ seconds) is modeled by the features at \n", + "$t = 30, 28, 26, 24, 22$ seconds." ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ - "import matplotlib.pyplot as plt\n", + "# create a delayer object and delay the features\n", + "delayer = Delayer(delays=[0, 1, 2, 3, 4])\n", + "simulated_X_delayed = delayer.fit_transform(simulated_X[:, None])\n", "\n", - "fig, axs = plt.subplots(6, 1, figsize=(6, 6), constrained_layout=True, sharex=True)\n", - "times = np.arange(n_trs) * TR\n", - "\n", - "axs[0].plot(times, y, color=\"r\")\n", - "axs[0].set_title(\"BOLD response\")\n", - "for i, (ax, xx) in enumerate(zip(axs.flat[1:], x_delayed.T)):\n", - " ax.plot(times, xx, color=\"k\")\n", - " ax.set_title(\n", - " \"$x(t - {0:.0f})$ (feature delayed by {1} sample{2})\".format(\n", - " i * TR, i, \"\" if i == 1 else \"s\"\n", - " )\n", - " )\n", - "for ax in axs.flat:\n", - " ax.axvline(40, color=\"gray\")\n", - " ax.set_yticks([])\n", - "_ = axs[-1].set_xlabel(\"Time [s]\")\n", + "# plot the simulated data and highlight t = 30\n", + "plot_delays_toy(simulated_X_delayed, simulated_Y, times, highlight=30)\n", "plt.show()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This simple example shows how the delayed features take into account of the HRF. \n", + "This approach is often referred to as a \"finite impulse response\" (FIR) model.\n", + "By delaying the features, the regression model learns the weights for each voxel \n", + "separately. Therefore, the FIR approach is able to adapt to the shape of the HRF in each \n", + "voxel, without assuming a fixed canonical HRF shape. \n", + "As we will see in the remaining of this notebook, this approach improves model \n", + "prediction accuracy significantly." + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/tutorials/notebooks/shortclips/08_fit_motion_energy_model.ipynb b/tutorials/notebooks/shortclips/08_fit_motion_energy_model.ipynb index f0d07f3..763db12 100644 --- a/tutorials/notebooks/shortclips/08_fit_motion_energy_model.ipynb +++ b/tutorials/notebooks/shortclips/08_fit_motion_energy_model.ipynb @@ -91,6 +91,7 @@ "import numpy as np\n", "from scipy.stats import zscore\n", "from voxelwise_tutorials.io import load_hdf5_array\n", + "from voxelwise_tutorials.utils import zscore_runs\n", "\n", "file_name = os.path.join(directory, \"responses\", f\"{subject}_responses.hdf\")\n", "Y_train = load_hdf5_array(file_name, key=\"Y_train\")\n", @@ -103,8 +104,7 @@ "run_onsets = load_hdf5_array(file_name, key=\"run_onsets\")\n", "\n", "# zscore each training run separately\n", - "Y_train = np.split(Y_train, run_onsets[1:])\n", - "Y_train = np.concatenate([zscore(run, axis=0) for run in Y_train], axis=0)\n", + "Y_train = zscore_runs(Y_train, run_onsets)\n", "# zscore each test run separately\n", "Y_test = zscore(Y_test, axis=1)" ] @@ -486,7 +486,7 @@ "semantic information.\n", "\n", "To better disentangle the two feature spaces, we developed a joint model\n", - "called `banded ridge regression` {cite}`nunez2019,dupre2022`, which fits multiple feature spaces\n", + "called **banded ridge regression** {cite}`nunez2019,dupre2022`, which fits multiple feature spaces\n", "simultaneously with optimal regularization for each feature space. This model\n", "is described in the next example.\n", "\n" diff --git a/tutorials/notebooks/shortclips/09_fit_banded_ridge_model.ipynb b/tutorials/notebooks/shortclips/09_fit_banded_ridge_model.ipynb index 1c7ddf6..0fe1e3a 100644 --- a/tutorials/notebooks/shortclips/09_fit_banded_ridge_model.ipynb +++ b/tutorials/notebooks/shortclips/09_fit_banded_ridge_model.ipynb @@ -77,6 +77,7 @@ "import numpy as np\n", "from scipy.stats import zscore\n", "from voxelwise_tutorials.io import load_hdf5_array\n", + "from voxelwise_tutorials.utils import zscore_runs\n", "\n", "file_name = os.path.join(directory, \"responses\", f\"{subject}_responses.hdf\")\n", "Y_train = load_hdf5_array(file_name, key=\"Y_train\")\n", @@ -89,8 +90,7 @@ "run_onsets = load_hdf5_array(file_name, key=\"run_onsets\")\n", "\n", "# zscore each training run separately\n", - "Y_train = np.split(Y_train, run_onsets[1:])\n", - "Y_train = np.concatenate([zscore(run, axis=0) for run in Y_train], axis=0)\n", + "Y_train = zscore_runs(Y_train, run_onsets)\n", "# zscore each test run separately\n", "Y_test = zscore(Y_test, axis=1)" ] diff --git a/tutorials/notebooks/shortclips/vem_tutorials_merged_for_colab.ipynb b/tutorials/notebooks/shortclips/vem_tutorials_merged_for_colab.ipynb index ed5a38c..a47672d 100644 --- a/tutorials/notebooks/shortclips/vem_tutorials_merged_for_colab.ipynb +++ b/tutorials/notebooks/shortclips/vem_tutorials_merged_for_colab.ipynb @@ -1440,14 +1440,14 @@ "outputs": [], "source": [ "from scipy.stats import zscore\n", + "from voxelwise_tutorials.utils import zscore_runs\n", "\n", "# indice of first sample of each run\n", "run_onsets = load_hdf5_array(file_name, key=\"run_onsets\")\n", "print(run_onsets)\n", "\n", "# zscore each training run separately\n", - "Y_train = np.split(Y_train, run_onsets[1:])\n", - "Y_train = np.concatenate([zscore(run, axis=0) for run in Y_train], axis=0)\n", + "Y_train = zscore_runs(Y_train, run_onsets)\n", "# zscore each test run separately\n", "Y_test = zscore(Y_test, axis=1)" ] @@ -1474,7 +1474,6 @@ "source": [ "Y_test = Y_test.mean(0)\n", "# We need to zscore the test data again, because we took the mean across repetitions.\n", - "# This averaging step makes the standard deviation approximately equal to 1/sqrt(n_repeats)\n", "Y_test = zscore(Y_test, axis=0)\n", "\n", "print(\"(n_samples_test, n_voxels) =\", Y_test.shape)" @@ -2117,7 +2116,7 @@ "Similarly to {cite:t}`huth2012`, we correct the coefficients of features linked by a\n", "semantic relationship. When building the wordnet features, if a frame was\n", "labeled with `wolf`, the authors automatically added the semantically linked\n", - "categories `canine`, `carnivore`, `placental mammal`, `mamma`, `vertebrate`,\n", + "categories `canine`, `carnivore`, `placental mammal`, `mammal`, `vertebrate`,\n", "`chordate`, `organism`, and `whole`. The authors thus argue that the same\n", "correction needs to be done on the coefficients.\n", "\n" @@ -2413,6 +2412,7 @@ "import numpy as np\n", "from scipy.stats import zscore\n", "from voxelwise_tutorials.io import load_hdf5_array\n", + "from voxelwise_tutorials.utils import zscore_runs\n", "\n", "file_name = os.path.join(directory, \"responses\", f\"{subject}_responses.hdf\")\n", "Y_train = load_hdf5_array(file_name, key=\"Y_train\")\n", @@ -2425,8 +2425,7 @@ "run_onsets = load_hdf5_array(file_name, key=\"run_onsets\")\n", "\n", "# zscore each training run separately\n", - "Y_train = np.split(Y_train, run_onsets[1:])\n", - "Y_train = np.concatenate([zscore(run, axis=0) for run in Y_train], axis=0)\n", + "Y_train = zscore_runs(Y_train, run_onsets)\n", "# zscore each test run separately\n", "Y_test = zscore(Y_test, axis=1)" ] @@ -2616,14 +2615,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Intermission: understanding delays\n", + "## Understanding delays\n", "\n", "To have an intuitive understanding of what we accomplish by delaying the\n", "features before model fitting, we will simulate one voxel and a single\n", "feature. We will then create a ``Delayer`` object (which was used in the\n", - "previous pipeline) and visualize its effect on our single feature. Let's\n", - "start by simulating the data.\n", - "\n" + "previous pipeline) and visualize its effect on our single feature. \n", + "\n", + "Let's start by simulating the data. We assume a simple scenario in which an event in\n", + "our experiment occurred at $t = 20$ seconds and lasted for 10 seconds. For each timepoint, our simulated feature\n", + "is a simple variable that indicates whether the event occurred or not." ] }, { @@ -2634,71 +2635,83 @@ }, "outputs": [], "source": [ - "# number of total trs\n", - "n_trs = 50\n", - "# repetition time for the simulated data\n", - "TR = 2.0\n", - "rng = np.random.RandomState(42)\n", - "y = rng.randn(n_trs)\n", - "x = np.zeros(n_trs)\n", - "# add some arbitrary value to our feature\n", - "x[15:20] = 0.5\n", - "x += rng.randn(n_trs) * 0.1 # add some noise\n", + "from voxelwise_tutorials.delays_toy import create_voxel_data\n", "\n", - "# create a delayer object and delay the features\n", - "delayer = Delayer(delays=[0, 1, 2, 3, 4])\n", - "x_delayed = delayer.fit_transform(x[:, None])" + "# simulate an activation pulse at 20 s for 10 s of duration\n", + "simulated_X, simulated_Y, times = create_voxel_data(onset=20, duration=10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We next plot the simulated data. In this toy example, we assumed a \"canonical\" \n", + "hemodynamic response function (HRF) (a double gamma function). This is an idealized\n", + "HRF that is often used in the literature to model the BOLD response. In practice, \n", + "however, the HRF can vary significantly across brain areas.\n", + "\n", + "Because of the HRF, notice that even though the event occurred at $t = 20$ seconds, \n", + "the BOLD response is delayed in time. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "from voxelwise_tutorials.delays_toy import plot_delays_toy\n", + "\n", + "plot_delays_toy(simulated_X, simulated_Y, times)\n", + "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "In the next cell we are plotting six lines. The subplot at the top shows the\n", - "simulated BOLD response, while the other subplots show the simulated feature\n", - "at different delays. The effect of the delayer is clear: it creates multiple\n", + "We next create a `Delayer` object and use it to delay the simulated feature. \n", + "The effect of the delayer is clear: it creates multiple\n", "copies of the original feature shifted forward in time by how many samples we\n", "requested (in this case, from 0 to 4 samples, which correspond to 0, 2, 4, 6,\n", "and 8 s in time with a 2 s TR).\n", "\n", "When these delayed features are used to fit a voxelwise encoding model, the\n", "brain response $y$ at time $t$ is simultaneously modeled by the\n", - "feature $x$ at times $t-0, t-2, t-4, t-6, t-8$. In the remaining\n", - "of this example we will see that this method improves model prediction\n", - "accuracy and it allows to account for the underlying shape of the hemodynamic\n", - "response function.\n", - "\n" + "feature $x$ at times $t-0, t-2, t-4, t-6, t-8$. For example, the time sample highlighted\n", + "in the plot below ($t = 30$ seconds) is modeled by the features at \n", + "$t = 30, 28, 26, 24, 22$ seconds." ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [ - "import matplotlib.pyplot as plt\n", + "# create a delayer object and delay the features\n", + "delayer = Delayer(delays=[0, 1, 2, 3, 4])\n", + "simulated_X_delayed = delayer.fit_transform(simulated_X[:, None])\n", "\n", - "fig, axs = plt.subplots(6, 1, figsize=(6, 6), constrained_layout=True, sharex=True)\n", - "times = np.arange(n_trs) * TR\n", - "\n", - "axs[0].plot(times, y, color=\"r\")\n", - "axs[0].set_title(\"BOLD response\")\n", - "for i, (ax, xx) in enumerate(zip(axs.flat[1:], x_delayed.T)):\n", - " ax.plot(times, xx, color=\"k\")\n", - " ax.set_title(\n", - " \"$x(t - {0:.0f})$ (feature delayed by {1} sample{2})\".format(\n", - " i * TR, i, \"\" if i == 1 else \"s\"\n", - " )\n", - " )\n", - "for ax in axs.flat:\n", - " ax.axvline(40, color=\"gray\")\n", - " ax.set_yticks([])\n", - "_ = axs[-1].set_xlabel(\"Time [s]\")\n", + "# plot the simulated data and highlight t = 30\n", + "plot_delays_toy(simulated_X_delayed, simulated_Y, times, highlight=30)\n", "plt.show()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This simple example shows how the delayed features take into account of the HRF. \n", + "This approach is often referred to as a \"finite impulse response\" (FIR) model.\n", + "By delaying the features, the regression model learns the weights for each voxel \n", + "separately. Therefore, the FIR approach is able to adapt to the shape of the HRF in each \n", + "voxel, without assuming a fixed canonical HRF shape. \n", + "As we will see in the remaining of this notebook, this approach improves model \n", + "prediction accuracy significantly." + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -2988,6 +3001,7 @@ "import numpy as np\n", "from scipy.stats import zscore\n", "from voxelwise_tutorials.io import load_hdf5_array\n", + "from voxelwise_tutorials.utils import zscore_runs\n", "\n", "file_name = os.path.join(directory, \"responses\", f\"{subject}_responses.hdf\")\n", "Y_train = load_hdf5_array(file_name, key=\"Y_train\")\n", @@ -3000,8 +3014,7 @@ "run_onsets = load_hdf5_array(file_name, key=\"run_onsets\")\n", "\n", "# zscore each training run separately\n", - "Y_train = np.split(Y_train, run_onsets[1:])\n", - "Y_train = np.concatenate([zscore(run, axis=0) for run in Y_train], axis=0)\n", + "Y_train = zscore_runs(Y_train, run_onsets)\n", "# zscore each test run separately\n", "Y_test = zscore(Y_test, axis=1)" ] @@ -3383,7 +3396,7 @@ "semantic information.\n", "\n", "To better disentangle the two feature spaces, we developed a joint model\n", - "called `banded ridge regression` {cite}`nunez2019,dupre2022`, which fits multiple feature spaces\n", + "called **banded ridge regression** {cite}`nunez2019,dupre2022`, which fits multiple feature spaces\n", "simultaneously with optimal regularization for each feature space. This model\n", "is described in the next example.\n", "\n" @@ -3488,6 +3501,7 @@ "import numpy as np\n", "from scipy.stats import zscore\n", "from voxelwise_tutorials.io import load_hdf5_array\n", + "from voxelwise_tutorials.utils import zscore_runs\n", "\n", "file_name = os.path.join(directory, \"responses\", f\"{subject}_responses.hdf\")\n", "Y_train = load_hdf5_array(file_name, key=\"Y_train\")\n", @@ -3500,8 +3514,7 @@ "run_onsets = load_hdf5_array(file_name, key=\"run_onsets\")\n", "\n", "# zscore each training run separately\n", - "Y_train = np.split(Y_train, run_onsets[1:])\n", - "Y_train = np.concatenate([zscore(run, axis=0) for run in Y_train], axis=0)\n", + "Y_train = zscore_runs(Y_train, run_onsets)\n", "# zscore each test run separately\n", "Y_test = zscore(Y_test, axis=1)" ] diff --git a/tutorials/notebooks/shortclips/vem_tutorials_merged_for_colab_model_fitting.ipynb b/tutorials/notebooks/shortclips/vem_tutorials_merged_for_colab_model_fitting.ipynb index 1f98331..85656f2 100644 --- a/tutorials/notebooks/shortclips/vem_tutorials_merged_for_colab_model_fitting.ipynb +++ b/tutorials/notebooks/shortclips/vem_tutorials_merged_for_colab_model_fitting.ipynb @@ -843,14 +843,14 @@ "outputs": [], "source": [ "from scipy.stats import zscore\n", + "from voxelwise_tutorials.utils import zscore_runs\n", "\n", "# indice of first sample of each run\n", "run_onsets = load_hdf5_array(file_name, key=\"run_onsets\")\n", "print(run_onsets)\n", "\n", "# zscore each training run separately\n", - "Y_train = np.split(Y_train, run_onsets[1:])\n", - "Y_train = np.concatenate([zscore(run, axis=0) for run in Y_train], axis=0)\n", + "Y_train = zscore_runs(Y_train, run_onsets)\n", "# zscore each test run separately\n", "Y_test = zscore(Y_test, axis=1)" ] @@ -877,7 +877,6 @@ "source": [ "Y_test = Y_test.mean(0)\n", "# We need to zscore the test data again, because we took the mean across repetitions.\n", - "# This averaging step makes the standard deviation approximately equal to 1/sqrt(n_repeats)\n", "Y_test = zscore(Y_test, axis=0)\n", "\n", "print(\"(n_samples_test, n_voxels) =\", Y_test.shape)" @@ -1520,7 +1519,7 @@ "Similarly to {cite:t}`huth2012`, we correct the coefficients of features linked by a\n", "semantic relationship. When building the wordnet features, if a frame was\n", "labeled with `wolf`, the authors automatically added the semantically linked\n", - "categories `canine`, `carnivore`, `placental mammal`, `mamma`, `vertebrate`,\n", + "categories `canine`, `carnivore`, `placental mammal`, `mammal`, `vertebrate`,\n", "`chordate`, `organism`, and `whole`. The authors thus argue that the same\n", "correction needs to be done on the coefficients.\n", "\n" @@ -1823,6 +1822,7 @@ "import numpy as np\n", "from scipy.stats import zscore\n", "from voxelwise_tutorials.io import load_hdf5_array\n", + "from voxelwise_tutorials.utils import zscore_runs\n", "\n", "file_name = os.path.join(directory, \"responses\", f\"{subject}_responses.hdf\")\n", "Y_train = load_hdf5_array(file_name, key=\"Y_train\")\n", @@ -1835,8 +1835,7 @@ "run_onsets = load_hdf5_array(file_name, key=\"run_onsets\")\n", "\n", "# zscore each training run separately\n", - "Y_train = np.split(Y_train, run_onsets[1:])\n", - "Y_train = np.concatenate([zscore(run, axis=0) for run in Y_train], axis=0)\n", + "Y_train = zscore_runs(Y_train, run_onsets)\n", "# zscore each test run separately\n", "Y_test = zscore(Y_test, axis=1)" ] @@ -2218,7 +2217,7 @@ "semantic information.\n", "\n", "To better disentangle the two feature spaces, we developed a joint model\n", - "called `banded ridge regression` {cite}`nunez2019,dupre2022`, which fits multiple feature spaces\n", + "called **banded ridge regression** {cite}`nunez2019,dupre2022`, which fits multiple feature spaces\n", "simultaneously with optimal regularization for each feature space. This model\n", "is described in the next example.\n", "\n" @@ -2323,6 +2322,7 @@ "import numpy as np\n", "from scipy.stats import zscore\n", "from voxelwise_tutorials.io import load_hdf5_array\n", + "from voxelwise_tutorials.utils import zscore_runs\n", "\n", "file_name = os.path.join(directory, \"responses\", f\"{subject}_responses.hdf\")\n", "Y_train = load_hdf5_array(file_name, key=\"Y_train\")\n", @@ -2335,8 +2335,7 @@ "run_onsets = load_hdf5_array(file_name, key=\"run_onsets\")\n", "\n", "# zscore each training run separately\n", - "Y_train = np.split(Y_train, run_onsets[1:])\n", - "Y_train = np.concatenate([zscore(run, axis=0) for run in Y_train], axis=0)\n", + "Y_train = zscore_runs(Y_train, run_onsets)\n", "# zscore each test run separately\n", "Y_test = zscore(Y_test, axis=1)" ] diff --git a/voxelwise_tutorials/delays_toy.py b/voxelwise_tutorials/delays_toy.py new file mode 100644 index 0000000..a89b987 --- /dev/null +++ b/voxelwise_tutorials/delays_toy.py @@ -0,0 +1,154 @@ +import matplotlib.pyplot as plt +import numpy as np +from scipy.stats import gamma + + +def simulate_bold(stimulus_array, TR=2.0, duration=32.0): + """Simulate BOLD signal by convolving a stimulus time series with a canonical HRF. + + Parameters: + ----------- + stimulus_array : array-like + Binary array representing stimulus onset (1) and absence (0) + TR : float, optional + Repetition time of the scanner in seconds (default: 2.0s) + duration : float, optional + Total duration to model the HRF in seconds (default: 32.0s) + + Returns: + -------- + bold_signal : ndarray + The simulated BOLD signal after convolution + """ + # Create time array based on TR + t = np.arange(0, duration, TR) + + # Define canonical double-gamma HRF + # Parameters for the canonical HRF (based on SPM defaults) + peak_delay = 6.0 # delay of peak in seconds + undershoot_delay = 16.0 # delay of undershoot + peak_disp = 1.0 # dispersion of peak + undershoot_disp = 1.0 # dispersion of undershoot + peak_amp = 1.0 # amplitude of peak + undershoot_amp = 0.1 # amplitude of undershoot + + # Compute positive and negative gamma functions + pos_gamma = peak_amp * gamma.pdf(t, peak_delay / peak_disp, scale=peak_disp) + neg_gamma = undershoot_amp * gamma.pdf( + t, undershoot_delay / undershoot_disp, scale=undershoot_disp + ) + + # Canonical HRF = positive gamma - negative gamma + hrf = pos_gamma - neg_gamma + + # Normalize HRF to have sum = 1 + hrf = hrf / np.sum(hrf) + + # Perform convolution + bold_signal = np.convolve(stimulus_array, hrf, mode="full") + + # Return only the part of the signal that matches the length of the input + return bold_signal[: len(stimulus_array)] + + +def create_voxel_data(n_trs=50, TR=2.0, onset=30, duration=10, random_seed=42): + """Create a toy dataset with a single voxel and a single feature. + + Parameters + ---------- + n_trs : int + Number of time points (TRs). + TR : float + Repetition time in seconds. + activation_t : float + Time point of activation onset in seconds. + activation_duration : float + Duration of activation in seconds. + random_seed : int + Seed for random number generation. + + Returns + ------- + X : array of shape (n_trs,) + The generated feature. + Y : array of shape (n_trs,) + The generated voxel data. + times : array of shape (n_trs,) + The time points corresponding to the voxel data. + """ + if onset > n_trs * TR: + raise ValueError("onset must be less than n_trs * TR") + if duration > n_trs * TR: + raise ValueError("duration must be less than n_trs * TR") + if duration < 0: + raise ValueError("duration must be greater than 0") + if onset < 0: + raise ValueError("onset must be greater than 0") + rng = np.random.RandomState(random_seed) + # figure out slices of activation + activation = slice(int(onset / TR), int((onset + duration) / TR)) + X = np.zeros(n_trs) + # add some arbitrary value to our feature + X[activation] = 1 + Y = simulate_bold(X, TR=TR) + # add some noise + Y += rng.randn(n_trs) * 0.1 + times = np.arange(n_trs) * TR + return X, Y, times + + +def plot_delays_toy(X_delayed, Y, times, highlight=None): + """Creates a figure showing a BOLD response and delayed versions of a stimulus. + + Parameters + ---------- + X_delayed : ndarray, shape (n_timepoints, n_delays) + The delayed stimulus, where each column corresponds to a different delay. + Y : ndarray, shape (n_timepoints,) + The BOLD response time series. + times : ndarray, shape (n_timepoints,) + Time points in seconds. + highlight : float or None, optional + Time point to highlight in the plot (default: 30 seconds). + + Returns + ------- + axs : ndarray + Array of matplotlib axes objects containing the plots. + """ + if X_delayed.ndim == 1: + X_delayed = X_delayed[:, np.newaxis] + n_delays = X_delayed.shape[1] + n_rows = n_delays + 1 + TR = times[1] - times[0] + + fig, axs = plt.subplots( + n_rows, + 1, + figsize=(6, n_rows), + constrained_layout=True, + sharex=True, + sharey=True, + ) + axs[0].plot(times, Y, color="r") + axs[0].set_title("BOLD response") + if len(axs) == 2: + axs[1].set_title("Feature") + axs[1].plot(times, X_delayed, color="k") + else: + for i, (ax, xx) in enumerate(zip(axs.flat[1:], X_delayed.T)): + ax.plot(times, xx, color="k") + ax.set_title( + "$x(t - {0:.0f})$ (feature delayed by {1} sample{2})".format( + i * TR, i, "" if i == 1 else "s" + ) + ) + if highlight is not None: + for ax in axs.flat: + ax.axvline(highlight, color="gray") + ax.set_yticks([]) + _ = axs[-1].set_xlabel("Time [s]") + # add more margin at the top of the y axis + ylim = axs[0].get_ylim() + axs[0].set_ylim(ylim[0], ylim[1] * 1.2) + return axs diff --git a/voxelwise_tutorials/tests/test_delays_toy.py b/voxelwise_tutorials/tests/test_delays_toy.py new file mode 100644 index 0000000..0b645cd --- /dev/null +++ b/voxelwise_tutorials/tests/test_delays_toy.py @@ -0,0 +1,78 @@ +import matplotlib.pyplot as plt +import numpy as np +import pytest + +from voxelwise_tutorials import delays_toy + + +def test_simulate_bold(): + """Test that simulate_bold function works correctly""" + # Create a simple stimulus array with an onset + stimulus = np.zeros(50) + stimulus[10:15] = 1 # activation for 5 time points + + # Test with default parameters + bold = delays_toy.simulate_bold(stimulus) + + # Check that output has the same length as input + assert len(bold) == len(stimulus) + + # Check that there is activation (values > 0) after stimulus onset + assert np.any(bold[10:] > 0) + + # Test with different TR + bold_tr1 = delays_toy.simulate_bold(stimulus, TR=1.0) + assert len(bold_tr1) == len(stimulus) + + +def test_create_voxel_data(): + """Test that create_voxel_data function works correctly""" + # Test with default parameters + X, Y, times = delays_toy.create_voxel_data() + + # Check shapes + assert X.shape == (50,) + assert Y.shape == (50,) + assert times.shape == (50,) + + # Check that activation happens at expected time + onset_idx = int(30 / 2.0) # 30 seconds at TR=2.0 + assert np.all( + X[onset_idx : onset_idx + 5] == 1 + ) # Should be active for 5 TRs (10s duration) + + # Test with different parameters + X2, Y2, times2 = delays_toy.create_voxel_data( + n_trs=100, TR=1.0, onset=20, duration=5 + ) + assert X2.shape == (100,) + assert Y2.shape == (100,) + assert times2.shape == (100,) + + # Test error cases + with pytest.raises(ValueError): + delays_toy.create_voxel_data(n_trs=50, onset=120) # onset > n_trs * TR + + with pytest.raises(ValueError): + delays_toy.create_voxel_data(duration=-5) # negative duration + + +def test_plot_delays_toy(): + """Test that plot_delays_toy function works correctly""" + # Create sample data + X, Y, times = delays_toy.create_voxel_data() + + # Test with 1D array (single delay) + axs = delays_toy.plot_delays_toy(X, Y, times) + assert len(axs) == 2 # Should have 2 subplots + plt.close() + + # Test with 2D array (multiple delays) + X_delayed = np.column_stack([X, np.roll(X, 1), np.roll(X, 2)]) + axs = delays_toy.plot_delays_toy(X_delayed, Y, times) + assert len(axs) == 4 # Should have 4 subplots (1 for Y, 3 for X) + plt.close() + + # Test with highlight + axs = delays_toy.plot_delays_toy(X, Y, times, highlight=30) + plt.close() diff --git a/voxelwise_tutorials/tests/test_utils.py b/voxelwise_tutorials/tests/test_utils.py index 8d0084b..5e6a926 100644 --- a/voxelwise_tutorials/tests/test_utils.py +++ b/voxelwise_tutorials/tests/test_utils.py @@ -1,6 +1,7 @@ import numpy as np import pytest -from voxelwise_tutorials.utils import generate_leave_one_run_out + +from voxelwise_tutorials.utils import generate_leave_one_run_out, zscore_runs def test_generate_leave_one_run_out_disjoint(): @@ -20,3 +21,45 @@ def test_generate_leave_one_run_out_empty_runs(run_onsets): n_samples = 40 with pytest.raises(ValueError): list(generate_leave_one_run_out(n_samples, run_onsets)) + + +def test_zscore_runs(): + # Create sample data + data = np.array([ + [1, 2, 3], + [2, 3, 4], + [3, 4, 5], + [4, 5, 6], + [10, 11, 12], + [11, 12, 13], + [12, 13, 14], + [13, 14, 15] + ], dtype=np.float64) + + run_onsets = [0, 4] # Two runs: first 4 samples and last 4 samples + + # Apply zscore_runs + zscored_data = zscore_runs(data, run_onsets) + + # Check shape preserved + assert zscored_data.shape == data.shape + + # Check that each run has mean 0 and std 1 + run1 = zscored_data[:4] + run2 = zscored_data[4:] + + # For each run and each feature column, mean should be close to 0 and std close to 1 + for run in [run1, run2]: + assert np.allclose(run.mean(axis=0), 0, atol=1e-10) + assert np.allclose(run.std(axis=0), 1, atol=1e-10) + + # Test with integer data to ensure dtype handling works correctly + int_data = np.array([ + [1, 2, 3], + [2, 3, 4], + [3, 4, 5], + [4, 5, 6] + ], dtype=np.int32) + + int_result = zscore_runs(int_data, [0]) + assert int_result.dtype == np.int32 \ No newline at end of file diff --git a/voxelwise_tutorials/utils.py b/voxelwise_tutorials/utils.py index 6fcb4da..f8f1a1f 100644 --- a/voxelwise_tutorials/utils.py +++ b/voxelwise_tutorials/utils.py @@ -79,3 +79,26 @@ def explainable_variance(data, bias_correction=True, do_zscore=True): n_repeats = data.shape[0] ev = ev - (1 - ev) / (n_repeats - 1) return ev + + +def zscore_runs(data, run_onsets): + """Perform z-scoring of fMRI data within each run. + + Parameters + ---------- + data : array of shape (n_samples, n_features) + fMRI responses in the training set. + run_onsets : array of int of shape (n_runs, ) + Indices of the run onsets. + The first run is assumed to start at index 0. + + Returns + ------- + zscored_data : array of shape (n_samples, n_features) + fMRI responses z-scored within each run. + """ + # zscore each training run separately + orig_dtype = data.dtype + data = np.split(data.astype(np.float64), run_onsets[1:]) + data = np.concatenate([scipy.stats.zscore(run, axis=0) for run in data], axis=0) + return data.astype(orig_dtype)