Skip to content

Commit

Permalink
Merge pull request #11 from aangelopoulos/poisson-regression
Browse files Browse the repository at this point in the history
[add poisson regression]
  • Loading branch information
aangelopoulos authored Jul 6, 2024
2 parents f1ae5ae + 459fb85 commit 1ec2932
Show file tree
Hide file tree
Showing 8 changed files with 851 additions and 34 deletions.
364 changes: 364 additions & 0 deletions examples/census_education.ipynb

Large diffs are not rendered by default.

18 changes: 6 additions & 12 deletions examples/census_healthcare.ipynb

Large diffs are not rendered by default.

Binary file added examples/plots/census_education.pdf
Binary file not shown.
36 changes: 17 additions & 19 deletions ppi_py/baselines.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from sklearn.isotonic import IsotonicRegression
from .utils import dataframe_decorator, bootstrap
from .ppi import _ols, _wls
import pdb
from sklearn.linear_model import PoissonRegressor

"""
MEAN ESTIMATION
Expand Down Expand Up @@ -258,37 +258,36 @@ def postprediction_ols_ci(


"""
LOGISTIC REGRESSION
POISSON REGRESSION
"""


def logistic(X, Y):
"""Compute the logistic regression coefficients.
def poisson(X, Y):
"""Compute the Poisson regression coefficients.
Args:
X (ndarray): Labeled features.
Y (ndarray): Labeled responses.
Y (ndarray): Labeled responses (count data).
Returns:
ndarray: Logistic regression coefficients.
ndarray: Poisson regression coefficients.
"""
regression = LogisticRegression(
penalty=None,
solver="lbfgs",
regression = PoissonRegressor(
alpha=0,
fit_intercept=False,
max_iter=10000,
tol=1e-15,
fit_intercept=False,
).fit(X, Y)
return regression.coef_.squeeze()
return regression.coef_


def classical_logistic_ci(X, Y, alpha=0.1, alternative="two-sided"):
"""Confidence interval for the logistic regression coefficients using the classical method.
def classical_poisson_ci(X, Y, alpha=0.1, alternative="two-sided"):
"""Confidence interval for the Poisson regression coefficients using the classical method.
Args:
X (ndarray): Labeled
Y (ndarray): Labeled responses.
X (ndarray): Labeled features.
Y (ndarray): Labeled responses (count data).
alpha (float, optional): Error level. Confidence interval will target a coverage of 1 - alpha. Defaults to 0.1. Must be in (0, 1).
alternative (str, optional): One of "two-sided", "less", or "greater". Defaults to "two-sided".
Expand All @@ -297,20 +296,19 @@ def classical_logistic_ci(X, Y, alpha=0.1, alternative="two-sided"):
"""
n = Y.shape[0]
d = X.shape[1]
pointest = logistic(X, Y)
mu = expit(X @ pointest)
pointest = poisson(X, Y)
mu = np.exp(X @ pointest) # Expected value for Poisson regression
V = np.zeros((d, d))
grads = np.zeros((n, d))
for i in range(n):
V += 1 / n * mu[i] * (1 - mu[i]) * X[i : i + 1, :].T @ X[i : i + 1, :]
V += 1 / n * mu[i] * X[i : i + 1, :].T @ X[i : i + 1, :]
grads[i] += (mu[i] - Y[i]) * X[i]
V_inv = np.linalg.inv(V)
cov_mat = V_inv @ np.cov(grads.T) @ V_inv
return _zconfint_generic(
pointest, np.sqrt(np.diag(cov_mat) / n), alpha, alternative
)


"""
BOOTSTRAP CI
Expand Down
1 change: 1 addition & 0 deletions ppi_py/datasets/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def load_dataset(dataset_folder, dataset_name, download=True):
dataset_google_drive_ids = {
"alphafold": "1lOhdSJEcFbZmcIoqmlLxo3LgLG1KqPho",
"ballots": "1DJvTWvPM6zQD0V4yGH1O7DL3kfnTE06u",
"census_education": "15iq7nLjwogb46v3stknMmx7kMuK9cnje",
"census_income": "15dZeWw-RTw17-MieG4y1ILTZlreJOmBS",
"census_healthcare": "1RjWsnq-gMngRFRj22DvezcdCVl2MxAIX",
"forest": "1Vqi1wSmVnWh_2lLQuDwrhkGcipvoWBc0",
Expand Down
Loading

0 comments on commit 1ec2932

Please sign in to comment.