Skip to content

Commit 63d1782

Browse files
Merge pull request #20 from Michael-Howes/main
Power analysis
2 parents 0bf1ccf + 5b801f0 commit 63d1782

File tree

8 files changed

+2359
-30
lines changed

8 files changed

+2359
-30
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ There is also a file, ```./ppi_py/baselines.py```, which implements several base
111111
Finally, the file ```./ppi_py/datasets/datasets.py``` handles the loading of the sample datasets.
112112

113113
The folder ```./examples``` contains notebooks for implementing prediction-powered inference on several datasets and estimands. These are listed [above](https://github.com/aangelopoulos/ppi_py/tree/main#examples). There is also an additional subfolder, ```./examples/baselines```, which contains comparisons to certain baseline algorithms, as in the appendix of the original PPI paper.
114+
There is an additional notebook, [```./examples/ppi_power_analysis.py```](https://github.com/aangelopoulos/ppi_py/blob/main/examples/power_analysis.ipynb), which shows how to choose the optimal labeled and unlabeled dataset sizes subject to a constraint on the budget.
114115

115116
The folder ```./tests``` contains unit tests for each function implemented in the ```ppi_py``` package. The tests are organized by estimand, and can be run by executing ```pytest``` in the root directory. Some of the tests are stochastic, and therefore, have some failure probability, even if the functions are all implemented correctly. If a test fails, it may be worth running it again. Debugging the tests can be done by adding the ```-s``` flag and using print statements or ```pdb```. Note that in order to be recognized by ```pytest```, all tests must be preceded by ```test_```.
116117

@@ -141,3 +142,5 @@ The repository currently implements the methods developed in the following paper
141142
[Cross-Prediction-Powered Inference](https://arxiv.org/abs/2309.16598)
142143

143144
[Prediction-Powered Bootstrap](https://arxiv.org/abs/2405.18379)
145+
146+
[The Mixed Subjects Design: Treating Large Language Models as (Potentially) Informative Observations](https://osf.io/preprints/socarxiv/j3bnt)

examples/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,5 @@ Each notebook runs a simulation that forms a dataframe containing confidence int
1616
- the average interval width for PPI and the classical method, together with a scatterplot of the widths from the five random draws.
1717

1818
Each notebook also compares PPI and classical inference in terms of the number of labeled examples needed to reject a natural null hypothesis in the analyzed problem.
19+
20+
Finally, there is a notebook that shows how to compute the optimal `n` and `N` given a cost constraint ([```power_analysis.ipynb```](https://github.com/aangelopoulos/ppi_py/blob/main/examples/power_analysis.ipynb)).

examples/census_education.ipynb

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,9 @@
6363
"data = load_dataset(dataset_folder, \"census_education\")\n",
6464
"Y_total = data[\"Y\"]\n",
6565
"Yhat_total = data[\"Yhat\"]\n",
66-
"X_total = data[\"X\"]/10000 # scale X to avoid numerical issues; interpretation is \"per 10,000 dollars\""
66+
"X_total = (\n",
67+
" data[\"X\"] / 10000\n",
68+
") # scale X to avoid numerical issues; interpretation is \"per 10,000 dollars\""
6769
]
6870
},
6971
{
@@ -152,7 +154,13 @@
152154
" )\n",
153155
"\n",
154156
" ppi_ci = ppi_poisson_ci(\n",
155-
" _X, _Y, _Yhat, _X_unlabeled, _Yhat_unlabeled, alpha=alpha, optimizer_options=optimizer_options\n",
157+
" _X,\n",
158+
" _Y,\n",
159+
" _Yhat,\n",
160+
" _X_unlabeled,\n",
161+
" _Yhat_unlabeled,\n",
162+
" alpha=alpha,\n",
163+
" optimizer_options=optimizer_options,\n",
156164
" )\n",
157165
"\n",
158166
" # Classical interval\n",
@@ -289,7 +297,13 @@
289297
" )\n",
290298
"\n",
291299
" ppi_ci = ppi_poisson_ci(\n",
292-
" _X, _Y, _Yhat, _X_unlabeled, _Yhat_unlabeled, alpha=alpha, optimizer_options=optimizer_options\n",
300+
" _X,\n",
301+
" _Y,\n",
302+
" _Yhat,\n",
303+
" _X_unlabeled,\n",
304+
" _Yhat_unlabeled,\n",
305+
" alpha=alpha,\n",
306+
" optimizer_options=optimizer_options,\n",
293307
" )\n",
294308
"\n",
295309
" if ppi_ci[0][coordinate] > null_to_reject:\n",

examples/census_healthcare_ppboot.ipynb

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,9 @@
6666
"data = load_dataset(dataset_folder, \"census_healthcare\")\n",
6767
"Y_total = data[\"Y\"]\n",
6868
"Yhat_total = data[\"Yhat\"]\n",
69-
"X_total = data[\"X\"][:,0] # first coordinate is income; second is constant term"
69+
"X_total = data[\"X\"][\n",
70+
" :, 0\n",
71+
"] # first coordinate is income; second is constant term"
7072
]
7173
},
7274
{
@@ -94,10 +96,13 @@
9496
" int\n",
9597
") # Test for different numbers of labeled ballots\n",
9698
"num_trials = 100\n",
99+
"\n",
100+
"\n",
97101
"# define Pearson correlation coefficient\n",
98102
"def pearson(X, Y):\n",
99-
" return np.corrcoef(X, Y)[0,1]\n",
100-
" \n",
103+
" return np.corrcoef(X, Y)[0, 1]\n",
104+
"\n",
105+
"\n",
101106
"# Compute ground truth\n",
102107
"true_theta = pearson(X_total, Y_total)"
103108
]
@@ -151,13 +156,7 @@
151156
" )\n",
152157
"\n",
153158
" ppi_ci = ppboot(\n",
154-
" pearson,\n",
155-
" _Y,\n",
156-
" _Yhat,\n",
157-
" _Yhat_unlabeled,\n",
158-
" _X,\n",
159-
" _X_unlabeled,\n",
160-
" alpha=alpha\n",
159+
" pearson, _Y, _Yhat, _Yhat_unlabeled, _X, _X_unlabeled, alpha=alpha\n",
161160
" )\n",
162161
"\n",
163162
" # Classical interval\n",
@@ -192,7 +191,9 @@
192191
" ]\n",
193192
"\n",
194193
"# Imputed CI\n",
195-
"imputed_ci = classical_bootstrap_ci(pearson, X_total, (Yhat_total > 0.5).astype(int), alpha=alpha)\n",
194+
"imputed_ci = classical_bootstrap_ci(\n",
195+
" pearson, X_total, (Yhat_total > 0.5).astype(int), alpha=alpha\n",
196+
")\n",
196197
"results += [\n",
197198
" pd.DataFrame(\n",
198199
" [\n",
@@ -290,13 +291,7 @@
290291
" )\n",
291292
"\n",
292293
" ppi_ci = ppboot(\n",
293-
" pearson,\n",
294-
" _Y,\n",
295-
" _Yhat,\n",
296-
" _Yhat_unlabeled,\n",
297-
" _X,\n",
298-
" _X_unlabeled,\n",
299-
" alpha=alpha\n",
294+
" pearson, _Y, _Yhat, _Yhat_unlabeled, _X, _X_unlabeled, alpha=alpha\n",
300295
" )\n",
301296
" if ppi_ci[0] > 0.15:\n",
302297
" nulls_rejected += 1\n",

examples/census_income_covshift.ipynb

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -203,9 +203,11 @@
203203
" _Yhat = Yhat_inD[rand_idx[:n]]\n",
204204
" importance_weights = np.array(\n",
205205
" [\n",
206-
" weights[0] / inD_weights[0]\n",
207-
" if z == 0\n",
208-
" else weights[1] / inD_weights[1]\n",
206+
" (\n",
207+
" weights[0] / inD_weights[0]\n",
208+
" if z == 0\n",
209+
" else weights[1] / inD_weights[1]\n",
210+
" )\n",
209211
" for z in _Z\n",
210212
" ]\n",
211213
" )\n",
@@ -341,9 +343,11 @@
341343
" _Yhat = Yhat_inD[rand_idx[:n]]\n",
342344
" importance_weights = np.array(\n",
343345
" [\n",
344-
" weights[0] / inD_weights[0]\n",
345-
" if z == 0\n",
346-
" else weights[1] / inD_weights[1]\n",
346+
" (\n",
347+
" weights[0] / inD_weights[0]\n",
348+
" if z == 0\n",
349+
" else weights[1] / inD_weights[1]\n",
350+
" )\n",
347351
" for z in _Z\n",
348352
" ]\n",
349353
" )\n",
@@ -369,9 +373,11 @@
369373
" _Yhat = Yhat_inD[rand_idx[:n]]\n",
370374
" importance_weights = np.array(\n",
371375
" [\n",
372-
" weights[0] / inD_weights[0]\n",
373-
" if z == 0\n",
374-
" else weights[1] / inD_weights[1]\n",
376+
" (\n",
377+
" weights[0] / inD_weights[0]\n",
378+
" if z == 0\n",
379+
" else weights[1] / inD_weights[1]\n",
380+
" )\n",
375381
" for z in _Z\n",
376382
" ]\n",
377383
" )\n",

examples/power_analysis.ipynb

Lines changed: 971 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)