Skip to content
This repository has been archived by the owner on Apr 2, 2020. It is now read-only.

[WIP] T fcontarst cleaning #210

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 7 additions & 9 deletions nistats/contrasts.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@
from warnings import warn
import numpy as np
import scipy.stats as sps
from scipy.linalg import sqrtm

from .utils import multiple_mahalanobis, z_score
from .utils import z_score


DEF_TINY = 1e-50
Expand Down Expand Up @@ -54,18 +55,15 @@ def compute_contrast(labels, regression_result, con_val, contrast_type=None):
'"{0}" is not a known contrast type. Allowed types are {1}'.
format(contrast_type, acceptable_contrast_types))

var_ = np.zeros((1, 1, labels.size))
effect_ = np.zeros((dim, labels.size))
if contrast_type == 't':
effect_ = np.zeros((1, labels.size))
var_ = np.zeros(labels.size)
for label_ in regression_result:
label_mask = labels == label_
resl = regression_result[label_].Tcontrast(con_val)
effect_[:, label_mask] = resl.effect.T
var_[label_mask] = (resl.sd ** 2).T
reg = regression_result[label_]
effect_[:, label_mask] = con_val.dot(reg.theta)
var_[:, :, label_mask] = reg.vcov(matrix=np.atleast_2d(con_val))
elif contrast_type == 'F':
from scipy.linalg import sqrtm
effect_ = np.zeros((dim, labels.size))
var_ = np.zeros(labels.size)
for label_ in regression_result:
label_mask = labels == label_
reg = regression_result[label_]
Expand Down
221 changes: 3 additions & 218 deletions nistats/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# Inverse t cumulative distribution
inv_t_cdf = t_distribution.ppf


class LikelihoodModelResults(object):
''' Class to contain results from likelihood models '''

Expand Down Expand Up @@ -45,7 +46,7 @@ def __init__(self, theta, Y, model, cov=None, dispersion=1., nuisance=None,
multiplicative factor in front of `cov`

nuisance : None of ndarray
parameter estimates needed to compute logL
parameter estimates needed to compute log-lokelohood
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typos in "lokelohood"


rank : None or scalar
rank of the model. If rank is not None, it is used for df_model
Expand Down Expand Up @@ -75,14 +76,8 @@ def __init__(self, theta, Y, model, cov=None, dispersion=1., nuisance=None,
self.df_model = model.df_model
# put this as a parameter of LikelihoodModel
self.df_resid = self.df_total - self.df_model

@setattr_on_read
def logL(self):
"""
The maximized log-likelihood
"""
return self.model.logL(self.theta, self.Y, nuisance=self.nuisance)


def t(self, column=None):
"""
Return the (Wald) t-statistic for a given parameter estimate.
Expand Down Expand Up @@ -156,213 +151,3 @@ def vcov(self, matrix=None, column=None, dispersion=None, other=None):
return tmp[:, :, np.newaxis] * dispersion
if matrix is None and column is None:
return self.cov * dispersion

def Tcontrast(self, matrix, store=('t', 'effect', 'sd'), dispersion=None):
""" Compute a Tcontrast for a row vector `matrix`

To get the t-statistic for a single column, use the 't' method.

Parameters
----------
matrix : 1D array-like
contrast matrix

store : sequence, optional
components of t to store in results output object. Defaults to all
components ('t', 'effect', 'sd').

dispersion : None or float, optional

Returns
-------
res : ``TContrastResults`` object
"""
matrix = np.asarray(matrix)
# 1D vectors assumed to be row vector
if matrix.ndim == 1:
matrix = matrix[None]
if matrix.shape[0] != 1:
raise ValueError("t contrasts should have only one row")
if matrix.shape[1] != self.theta.shape[0]:
raise ValueError("t contrasts should be length P=%d, "
"but this is length %d" % (self.theta.shape[0],
matrix.shape[1]))
store = set(store)
if not store.issubset(('t', 'effect', 'sd')):
raise ValueError('Unexpected store request in %s' % store)
st_t = st_effect = st_sd = effect = sd = None
if 't' in store or 'effect' in store:
effect = np.dot(matrix, self.theta)
if 'effect' in store:
st_effect = np.squeeze(effect)
if 't' in store or 'sd' in store:
sd = np.sqrt(self.vcov(matrix=matrix, dispersion=dispersion))
if 'sd' in store:
st_sd = np.squeeze(sd)
if 't' in store:
st_t = np.squeeze(effect * positive_reciprocal(sd))
return TContrastResults(effect=st_effect, t=st_t, sd=st_sd,
df_den=self.df_resid)

def Fcontrast(self, matrix, dispersion=None, invcov=None):
""" Compute an Fcontrast for a contrast matrix `matrix`.

Here, `matrix` M is assumed to be non-singular. More precisely

.. math::

M pX pX' M'

is assumed invertible. Here, :math:`pX` is the generalized inverse of
the design matrix of the model. There can be problems in non-OLS models
where the rank of the covariance of the noise is not full.

See the contrast module to see how to specify contrasts. In particular,
the matrices from these contrasts will always be non-singular in the
sense above.

Parameters
----------
matrix : 1D array-like
contrast matrix

dispersion : None or float, optional
If None, use ``self.dispersion``

invcov : None or array, optional
Known inverse of variance covariance matrix.
If None, calculate this matrix.

Returns
-------
f_res : ``FContrastResults`` instance
with attributes F, df_den, df_num

Notes
-----
For F contrasts, we now specify an effect and covariance
"""
matrix = np.asarray(matrix)
# 1D vectors assumed to be row vector
if matrix.ndim == 1:
matrix = matrix[None]
if matrix.shape[1] != self.theta.shape[0]:
raise ValueError("F contrasts should have shape[1] P=%d, "
"but this has shape[1] %d" % (self.theta.shape[0],
matrix.shape[1]))
ctheta = np.dot(matrix, self.theta)
if matrix.ndim == 1:
matrix = matrix.reshape((1, matrix.shape[0]))
if dispersion is None:
dispersion = self.dispersion
q = matrix.shape[0]
if invcov is None:
invcov = inv(self.vcov(matrix=matrix, dispersion=1.0))
F = np.add.reduce(np.dot(invcov, ctheta) * ctheta, 0) * \
positive_reciprocal((q * dispersion))
F = np.squeeze(F)
return FContrastResults(
effect=ctheta, covariance=self.vcov(
matrix=matrix, dispersion=dispersion[np.newaxis]),
F=F, df_den=self.df_resid, df_num=invcov.shape[0])

def conf_int(self, alpha=.05, cols=None, dispersion=None):
''' The confidence interval of the specified theta estimates.

Parameters
----------
alpha : float, optional
The `alpha` level for the confidence interval.
ie., `alpha` = .05 returns a 95% confidence interval.

cols : tuple, optional
`cols` specifies which confidence intervals to return

dispersion : None or scalar
scale factor for the variance / covariance (see class docstring and
``vcov`` method docstring)

Returns
-------
cis : ndarray
`cis` is shape ``(len(cols), 2)`` where each row contains [lower,
upper] for the given entry in `cols`

Examples
--------
>>> from numpy.random import standard_normal as stan
>>> from nistats.regression import OLSModel
>>> x = np.hstack((stan((30,1)),stan((30,1)),stan((30,1))))
>>> beta=np.array([3.25, 1.5, 7.0])
>>> y = np.dot(x,beta) + stan((30))
>>> model = OLSModel(x).fit(y)
>>> confidence_intervals = model.conf_int(cols=(1,2))

Notes
-----
Confidence intervals are two-tailed.
TODO:
tails : string, optional
`tails` can be "two", "upper", or "lower"
'''
if cols is None:
lower = self.theta - inv_t_cdf(1 - alpha / 2, self.df_resid) *\
np.sqrt(np.diag(self.vcov(dispersion=dispersion)))
upper = self.theta + inv_t_cdf(1 - alpha / 2, self.df_resid) *\
np.sqrt(np.diag(self.vcov(dispersion=dispersion)))
else:
lower, upper = [], []
for i in cols:
lower.append(
self.theta[i] - inv_t_cdf(1 - alpha / 2, self.df_resid) *
np.sqrt(self.vcov(column=i, dispersion=dispersion)))
upper.append(
self.theta[i] + inv_t_cdf(1 - alpha / 2, self.df_resid) *
np.sqrt(self.vcov(column=i, dispersion=dispersion)))
return np.asarray(list(zip(lower, upper)))


class TContrastResults(object):
""" Results from a t contrast of coefficients in a parametric model.

The class does nothing, it is a container for the results from T contrasts,
and returns the T-statistics when np.asarray is called.
"""

def __init__(self, t, sd, effect, df_den=None):
if df_den is None:
df_den = np.inf
self.t = t
self.sd = sd
self.effect = effect
self.df_den = df_den

def __array__(self):
return np.asarray(self.t)

def __str__(self):
return ('<T contrast: effect=%s, sd=%s, t=%s, df_den=%d>' %
(self.effect, self.sd, self.t, self.df_den))


class FContrastResults(object):
""" Results from an F contrast of coefficients in a parametric model.

The class does nothing, it is a container for the results from F contrasts,
and returns the F-statistics when np.asarray is called.
"""

def __init__(self, effect, covariance, F, df_num, df_den=None):
if df_den is None:
df_den = np.inf
self.effect = effect
self.covariance = covariance
self.F = F
self.df_den = df_den
self.df_num = df_num
def __array__(self):
return np.asarray(self.F)

def __str__(self):
return '<F contrast: F=%s, df_den=%d, df_num=%d>' % \
(repr(self.F), self.df_den, self.df_num)
71 changes: 0 additions & 71 deletions nistats/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ class OLSModel(object):
Methods
-------
model.__init___(design)
model.logL(b=self.beta, Y)

Attributes
----------
Expand Down Expand Up @@ -98,70 +97,6 @@ def initialize(self, design):
self.df_model = matrix_rank(self.design, eps)
self.df_resid = self.df_total - self.df_model

def logL(self, beta, Y, nuisance=None):
r''' Returns the value of the loglikelihood function at beta.

Given the whitened design matrix, the loglikelihood is evaluated
at the parameter vector, beta, for the dependent variable, Y
and the nuisance parameter, sigma.

Parameters
----------
beta : ndarray
The parameter estimates. Must be of length df_model.

Y : ndarray
The dependent variable

nuisance : dict, optional
A dict with key 'sigma', which is an optional estimate of sigma. If
None, defaults to its maximum likelihood estimate (with beta fixed)
as ``sum((Y - X*beta)**2) / n``, where n=Y.shape[0], X=self.design.

Returns
-------
loglf : float
The value of the loglikelihood function.

Notes
-----
The log-Likelihood Function is defined as

.. math::

\ell(\beta,\sigma,Y)=
-\frac{n}{2}\log(2\pi\sigma^2) - \|Y-X\beta\|^2/(2\sigma^2)

The parameter :math:`\sigma` above is what is sometimes referred to as a
nuisance parameter. That is, the likelihood is considered as a function
of :math:`\beta`, but to evaluate it, a value of :math:`\sigma` is
needed.

If :math:`\sigma` is not provided, then its maximum likelihood estimate:

.. math::

\hat{\sigma}(\beta) = \frac{\text{SSE}(\beta)}{n}

is plugged in. This likelihood is now a function of only :math:`\beta`
and is technically referred to as a profile-likelihood.

References
----------
.. [1] W. Green. "Econometric Analysis," 5th ed., Pearson, 2003.
'''
# This is overwriting an abstract method of LikelihoodModel
X = self.wdesign
wY = self.whiten(Y)
r = wY - np.dot(X, beta)
n = self.df_total
SSE = (r ** 2).sum(0)
if nuisance is None:
sigmasq = SSE / n
else:
sigmasq = nuisance['sigma']
loglf = - n / 2. * np.log(2 * np.pi * sigmasq) - SSE / (2 * sigmasq)
return loglf

def whiten(self, X):
""" Whiten design matrix
Expand Down Expand Up @@ -353,12 +288,6 @@ def __init__(self, results):
# put this as a parameter of LikelihoodModel
self.df_resid = self.df_total - self.df_model

def logL(self, Y):
"""
The maximized log-likelihood
"""
raise ValueError('can not use this method for simple results')

def resid(self, Y):
"""
Residuals from the fit.
Expand Down
Loading