#!/usr/bin/env python3
"``naginterfaces.library.correg.glm_binomial`` Python Example."
# NAG Copyright 2017-2019.
# pylint: disable=invalid-name,too-many-locals,too-many-statements
import numpy as np
from naginterfaces.base import utils
from naginterfaces.library import blgm, correg, rand
[docs]def main():
"""
Example for :func:`naginterfaces.library.correg.glm_binomial`.
Use k-fold cross validation to estimate the true positive and
negative rates of a prediction from a logistic regression model.
The data used in this example was simulated.
>>> main()
naginterfaces.library.correg.glm_binomial Python Example Results.
Use k-fold cross validation to estimate the true positive and
negative rates of a prediction from a logistic regression model.
Observed
--------------------------
Predicted | Negative Positive Total
--------------------------------------
Negative | 19 6 25
Positive | 3 12 15
Total | 22 18 40
True Positive Rate (Sensitivity): 0.67
True Negative Rate (Specificity): 0.86
"""
print('naginterfaces.library.correg.glm_binomial Python Example Results.')
print('Use k-fold cross validation to estimate the true positive and')
print('negative rates of a prediction from a logistic regression model.')
# Input the independent variables:
dat = np.asarray([
[0.0, -0.1, 1.0, 1.0],
[0.4, -1.1, 2.0, 1.0],
[-0.5, 0.2, 3.0, 0.0],
[0.6, 1.1, 2.0, 0.0],
[-0.3, -1.0, 3.0, 1.0],
[2.8, -1.8, 1.0, 1.0],
[0.4, -0.7, 1.0, 1.0],
[-0.4, -0.3, 2.0, 0.0],
[0.5, -2.6, 1.0, 0.0],
[-1.6, -0.3, 1.0, 1.0],
[0.4, 0.6, 1.0, 0.0],
[-1.6, 0.0, 2.0, 1.0],
[0.0, 0.4, 2.0, 1.0],
[-0.1, 0.7, 2.0, 1.0],
[-0.2, 1.8, 3.0, 1.0],
[-0.9, 0.7, 3.0, 1.0],
[-1.1, -0.5, 1.0, 1.0],
[-0.1, -2.2, 3.0, 1.0],
[-1.8, -0.5, 2.0, 1.0],
[-0.8, -0.9, 1.0, 1.0],
[1.9, -0.1, 2.0, 1.0],
[0.3, 1.4, 2.0, 1.0],
[0.4, -1.2, 3.0, 0.0],
[2.2, 1.8, 2.0, 0.0],
[1.4, -0.4, 1.0, 1.0],
[0.4, 2.4, 2.0, 1.0],
[-0.6, 1.1, 3.0, 1.0],
[1.4, -0.6, 3.0, 1.0],
[-0.1, -0.1, 1.0, 0.0],
[-0.6, -0.4, 1.0, 0.0],
[0.6, -0.2, 2.0, 1.0],
[-1.8, -0.3, 2.0, 1.0],
[-0.3, 1.6, 3.0, 1.0],
[-0.6, 0.8, 1.0, 1.0],
[0.3, -0.5, 1.0, 0.0],
[1.6, 1.4, 2.0, 1.0],
[-1.1, 0.6, 2.0, 1.0],
[-0.3, 0.6, 2.0, 1.0],
[-0.6, 0.1, 3.0, 1.0],
[1.0, 0.6, 3.0, 1.0]
])
# Input the dependent variables (y, t), with y out of t successes:
y = np.asarray([0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0,
0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0,
1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0,
1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0])
t = np.ones(len(y))
# Describe the data:
hddesc = utils.Handle()
nobs = len(dat)
levels = [1, 1, 3, 1]
vnames = ['V1', 'V2', 'V3', 'V4']
blgm.lm_describe_data(hddesc, nobs, levels, vnames=vnames)
# Specify the model:
hform = utils.Handle()
blgm.lm_formula(hform, 'V1 + V2 + V3 + V4 + V1.V4')
# We are using a logistic model, so the errors are assumed to be
# Binomial (the logistic link is the default link)
errfn = 'Binomial'
# Generate the design matrix, x:
hxdesc = utils.Handle()
x = blgm.lm_design_matrix(hform, hddesc, dat, hxdesc)
m = x.shape[1]
# Convert the model into a form that can be passed to correg.glm_binomial
mean, _, isx, _, _ = blgm.lm_submodel('S', hform, hxdesc, m, 0, 0)
# Formula and data description handles are no longer required:
map(blgm.handle_free, [hform, hddesc])
# Number of folds:
k = 5
# Not going to be using predicted standard errors
vfobs = False
# Initialise the random number generator:
seed = [42321]
statecomm = rand.init_repeat(6, seed)
lres = correg.glm_binomial(
x, isx, y, t, mean=mean,
)
# Loop over each fold
cnt = np.zeros(shape=(3, 3), dtype='int')
for fold in range(1, k+1):
# Split the data into training and validation datasets:
# the first nt rows of X and elements of Y and T contain the training
# data, the remaining entries the validation data
nt = rand.kfold_xyw(k, fold, x, statecomm, y=y, w=t)
nv = nobs - nt
# Fit the logistic model to the training dataset:
lres = correg.glm_binomial(
x[:nt, :], isx, y[:nt], t[:nt], mean=mean
)
# Predict the response for observations in the validation dataset:
_, _, pred, _ = correg.glm_predict(
errfn, x[nt:, :], isx, lres.b, lres.cov, vfobs, t=t[nt:,],
mean=mean,
)
# Count the true/false positives/negatives:
for i in range(nv):
obs_val = int(y[nt+i])
pred_val = 1 if pred[i] >= 0.5 else 0
cnt[pred_val, obs_val] += 1
# Calculate the marginals:
cnt[-1, :] = np.sum(cnt, 0)
cnt[:, -1] = np.sum(cnt, 1)
# Display the results:
print(' Observed')
print(' --------------------------')
print('Predicted | Negative Positive Total')
print('--------------------------------------')
print('Negative | {:5d} {:5d} {:5d}'.format(*cnt[0, :]))
print('Positive | {:5d} {:5d} {:5d}'.format(*cnt[1, :]))
print('Total | {:5d} {:5d} {:5d}'.format(*cnt[2, :]))
if cnt[2, 1] > 0:
print('True Positive Rate (Sensitivity): {:5.2f}'.format(
cnt[1, 1] / float(cnt[2, 1])))
else:
print('True Positive Rate (Sensitivity): No positives in data')
if cnt[2, 0] > 0:
print('True Negative Rate (Specificity): {:5.2f}'.format(
cnt[0, 0] / float(cnt[2, 0])))
else:
print('True Negative Rate (Specificity): No negatives in data')
# Free the rest of the blgm handles:
map(blgm.handle_free, [hxdesc])
if __name__ == '__main__':
import doctest
import sys
sys.exit(
doctest.testmod(
None, verbose=True, report=False,
optionflags=doctest.REPORT_NDIFF,
).failed
)