Merge pull request #20 from Michael-Howes/main

Power analysis
aangelopoulos · Dec 22, 2024 · 63d1782 · 63d1782
2 parents 0bf1ccf + 5b801f0
commit 63d1782
Show file tree

Hide file tree

Showing 8 changed files with 2,359 additions and 30 deletions.
diff --git a/README.md b/README.md
@@ -111,6 +111,7 @@ There is also a file, ```./ppi_py/baselines.py```, which implements several base
 Finally, the file ```./ppi_py/datasets/datasets.py``` handles the loading of the sample datasets.
 
 The folder ```./examples``` contains notebooks for implementing prediction-powered inference on several datasets and estimands. These are listed [above](https://github.com/aangelopoulos/ppi_py/tree/main#examples). There is also an additional subfolder, ```./examples/baselines```, which contains comparisons to certain baseline algorithms, as in the appendix of the original PPI paper.
+There is an additional notebook, [```./examples/ppi_power_analysis.py```](https://github.com/aangelopoulos/ppi_py/blob/main/examples/power_analysis.ipynb), which shows how to choose the optimal labeled and unlabeled dataset sizes subject to a constraint on the budget.
 
 The folder ```./tests``` contains unit tests for each function implemented in the ```ppi_py``` package. The tests are organized by estimand, and can be run by executing ```pytest``` in the root directory. Some of the tests are stochastic, and therefore, have some failure probability, even if the functions are all implemented correctly. If a test fails, it may be worth running it again. Debugging the tests can be done by adding the ```-s``` flag and using print statements or ```pdb```. Note that in order to be recognized by ```pytest```, all tests must be preceded by ```test_```.
 
@@ -141,3 +142,5 @@ The repository currently implements the methods developed in the following paper
 [Cross-Prediction-Powered Inference](https://arxiv.org/abs/2309.16598)
 
 [Prediction-Powered Bootstrap](https://arxiv.org/abs/2405.18379)
+
+[The Mixed Subjects Design: Treating Large Language Models as  (Potentially) Informative Observations](https://osf.io/preprints/socarxiv/j3bnt)
diff --git a/examples/README.md b/examples/README.md
@@ -16,3 +16,5 @@ Each notebook runs a simulation that forms a dataframe containing confidence int
 - the average interval width for PPI and the classical method, together with a scatterplot of the widths from the five random draws.
 
 Each notebook also compares PPI and classical inference in terms of the number of labeled examples needed to reject a natural null hypothesis in the analyzed problem.
+
+Finally, there is a notebook that shows how to compute the optimal `n` and `N` given a cost constraint ([```power_analysis.ipynb```](https://github.com/aangelopoulos/ppi_py/blob/main/examples/power_analysis.ipynb)).
diff --git a/examples/census_education.ipynb b/examples/census_education.ipynb
@@ -63,7 +63,9 @@
     "data = load_dataset(dataset_folder, \"census_education\")\n",
     "Y_total = data[\"Y\"]\n",
     "Yhat_total = data[\"Yhat\"]\n",
-    "X_total = data[\"X\"]/10000 # scale X to avoid numerical issues; interpretation is \"per 10,000 dollars\""
+    "X_total = (\n",
+    "    data[\"X\"] / 10000\n",
+    ")  # scale X to avoid numerical issues; interpretation is \"per 10,000 dollars\""
    ]
   },
   {
@@ -152,7 +154,13 @@
     "        )\n",
     "\n",
     "        ppi_ci = ppi_poisson_ci(\n",
-    "            _X, _Y, _Yhat, _X_unlabeled, _Yhat_unlabeled, alpha=alpha, optimizer_options=optimizer_options\n",
+    "            _X,\n",
+    "            _Y,\n",
+    "            _Yhat,\n",
+    "            _X_unlabeled,\n",
+    "            _Yhat_unlabeled,\n",
+    "            alpha=alpha,\n",
+    "            optimizer_options=optimizer_options,\n",
     "        )\n",
     "\n",
     "        # Classical interval\n",
@@ -289,7 +297,13 @@
     "        )\n",
     "\n",
     "        ppi_ci = ppi_poisson_ci(\n",
-    "            _X, _Y, _Yhat, _X_unlabeled, _Yhat_unlabeled, alpha=alpha, optimizer_options=optimizer_options\n",
+    "            _X,\n",
+    "            _Y,\n",
+    "            _Yhat,\n",
+    "            _X_unlabeled,\n",
+    "            _Yhat_unlabeled,\n",
+    "            alpha=alpha,\n",
+    "            optimizer_options=optimizer_options,\n",
     "        )\n",
     "\n",
     "        if ppi_ci[0][coordinate] > null_to_reject:\n",

diff --git a/examples/census_healthcare_ppboot.ipynb b/examples/census_healthcare_ppboot.ipynb
@@ -66,7 +66,9 @@
     "data = load_dataset(dataset_folder, \"census_healthcare\")\n",
     "Y_total = data[\"Y\"]\n",
     "Yhat_total = data[\"Yhat\"]\n",
-    "X_total = data[\"X\"][:,0] # first coordinate is income; second is constant term"
+    "X_total = data[\"X\"][\n",
+    "    :, 0\n",
+    "]  # first coordinate is income; second is constant term"
    ]
   },
   {
@@ -94,10 +96,13 @@
     "    int\n",
     ")  # Test for different numbers of labeled ballots\n",
     "num_trials = 100\n",
+    "\n",
+    "\n",
     "# define Pearson correlation coefficient\n",
     "def pearson(X, Y):\n",
-    "    return np.corrcoef(X, Y)[0,1]\n",
-    "    \n",
+    "    return np.corrcoef(X, Y)[0, 1]\n",
+    "\n",
+    "\n",
     "# Compute ground truth\n",
     "true_theta = pearson(X_total, Y_total)"
    ]
@@ -151,13 +156,7 @@
     "        )\n",
     "\n",
     "        ppi_ci = ppboot(\n",
-    "            pearson,\n",
-    "            _Y,\n",
-    "            _Yhat,\n",
-    "            _Yhat_unlabeled,\n",
-    "            _X,\n",
-    "            _X_unlabeled,\n",
-    "            alpha=alpha\n",
+    "            pearson, _Y, _Yhat, _Yhat_unlabeled, _X, _X_unlabeled, alpha=alpha\n",
     "        )\n",
     "\n",
     "        # Classical interval\n",
@@ -192,7 +191,9 @@
     "        ]\n",
     "\n",
     "# Imputed CI\n",
-    "imputed_ci = classical_bootstrap_ci(pearson, X_total, (Yhat_total > 0.5).astype(int), alpha=alpha)\n",
+    "imputed_ci = classical_bootstrap_ci(\n",
+    "    pearson, X_total, (Yhat_total > 0.5).astype(int), alpha=alpha\n",
+    ")\n",
     "results += [\n",
     "    pd.DataFrame(\n",
     "        [\n",
@@ -290,13 +291,7 @@
     "        )\n",
     "\n",
     "        ppi_ci = ppboot(\n",
-    "            pearson,\n",
-    "            _Y,\n",
-    "            _Yhat,\n",
-    "            _Yhat_unlabeled,\n",
-    "            _X,\n",
-    "            _X_unlabeled,\n",
-    "            alpha=alpha\n",
+    "            pearson, _Y, _Yhat, _Yhat_unlabeled, _X, _X_unlabeled, alpha=alpha\n",
     "        )\n",
     "        if ppi_ci[0] > 0.15:\n",
     "            nulls_rejected += 1\n",

diff --git a/examples/census_income_covshift.ipynb b/examples/census_income_covshift.ipynb
@@ -203,9 +203,11 @@
     "        _Yhat = Yhat_inD[rand_idx[:n]]\n",
     "        importance_weights = np.array(\n",
     "            [\n",
-    "                weights[0] / inD_weights[0]\n",
-    "                if z == 0\n",
-    "                else weights[1] / inD_weights[1]\n",
+    "                (\n",
+    "                    weights[0] / inD_weights[0]\n",
+    "                    if z == 0\n",
+    "                    else weights[1] / inD_weights[1]\n",
+    "                )\n",
     "                for z in _Z\n",
     "            ]\n",
     "        )\n",
@@ -341,9 +343,11 @@
     "        _Yhat = Yhat_inD[rand_idx[:n]]\n",
     "        importance_weights = np.array(\n",
     "            [\n",
-    "                weights[0] / inD_weights[0]\n",
-    "                if z == 0\n",
-    "                else weights[1] / inD_weights[1]\n",
+    "                (\n",
+    "                    weights[0] / inD_weights[0]\n",
+    "                    if z == 0\n",
+    "                    else weights[1] / inD_weights[1]\n",
+    "                )\n",
     "                for z in _Z\n",
     "            ]\n",
     "        )\n",
@@ -369,9 +373,11 @@
     "        _Yhat = Yhat_inD[rand_idx[:n]]\n",
     "        importance_weights = np.array(\n",
     "            [\n",
-    "                weights[0] / inD_weights[0]\n",
-    "                if z == 0\n",
-    "                else weights[1] / inD_weights[1]\n",
+    "                (\n",
+    "                    weights[0] / inD_weights[0]\n",
+    "                    if z == 0\n",
+    "                    else weights[1] / inD_weights[1]\n",
+    "                )\n",
     "                for z in _Z\n",
     "            ]\n",
     "        )\n",

diff --git a/examples/power_analysis.ipynb b/examples/power_analysis.ipynb
Original file line number	Diff line number	Diff line change
Expand Up		@@ -16,3 +16,5 @@ Each notebook runs a simulation that forms a dataframe containing confidence int
		- the average interval width for PPI and the classical method, together with a scatterplot of the widths from the five random draws.

		Each notebook also compares PPI and classical inference in terms of the number of labeled examples needed to reject a natural null hypothesis in the analyzed problem.

		Finally, there is a notebook that shows how to compute the optimal `n` and `N` given a cost constraint ([```power_analysis.ipynb```](https://github.com/aangelopoulos/ppi_py/blob/main/examples/power_analysis.ipynb)).