Source code for naginterfaces.library.examples.correg.glm_binomial_ex

#!/usr/bin/env python3
"``naginterfaces.library.correg.glm_binomial`` Python Example."

# NAG Copyright 2017-2019.

# pylint: disable=invalid-name,too-many-locals,too-many-statements

import numpy as np

from naginterfaces.base import utils
from naginterfaces.library import blgm, correg, rand

[docs]def main(): """ Example for :func:`naginterfaces.library.correg.glm_binomial`. Use k-fold cross validation to estimate the true positive and negative rates of a prediction from a logistic regression model. The data used in this example was simulated. >>> main() naginterfaces.library.correg.glm_binomial Python Example Results. Use k-fold cross validation to estimate the true positive and negative rates of a prediction from a logistic regression model. Observed -------------------------- Predicted | Negative Positive Total -------------------------------------- Negative | 19 6 25 Positive | 3 12 15 Total | 22 18 40 True Positive Rate (Sensitivity): 0.67 True Negative Rate (Specificity): 0.86 """ print('naginterfaces.library.correg.glm_binomial Python Example Results.') print('Use k-fold cross validation to estimate the true positive and') print('negative rates of a prediction from a logistic regression model.') # Input the independent variables: dat = np.asarray([ [0.0, -0.1, 1.0, 1.0], [0.4, -1.1, 2.0, 1.0], [-0.5, 0.2, 3.0, 0.0], [0.6, 1.1, 2.0, 0.0], [-0.3, -1.0, 3.0, 1.0], [2.8, -1.8, 1.0, 1.0], [0.4, -0.7, 1.0, 1.0], [-0.4, -0.3, 2.0, 0.0], [0.5, -2.6, 1.0, 0.0], [-1.6, -0.3, 1.0, 1.0], [0.4, 0.6, 1.0, 0.0], [-1.6, 0.0, 2.0, 1.0], [0.0, 0.4, 2.0, 1.0], [-0.1, 0.7, 2.0, 1.0], [-0.2, 1.8, 3.0, 1.0], [-0.9, 0.7, 3.0, 1.0], [-1.1, -0.5, 1.0, 1.0], [-0.1, -2.2, 3.0, 1.0], [-1.8, -0.5, 2.0, 1.0], [-0.8, -0.9, 1.0, 1.0], [1.9, -0.1, 2.0, 1.0], [0.3, 1.4, 2.0, 1.0], [0.4, -1.2, 3.0, 0.0], [2.2, 1.8, 2.0, 0.0], [1.4, -0.4, 1.0, 1.0], [0.4, 2.4, 2.0, 1.0], [-0.6, 1.1, 3.0, 1.0], [1.4, -0.6, 3.0, 1.0], [-0.1, -0.1, 1.0, 0.0], [-0.6, -0.4, 1.0, 0.0], [0.6, -0.2, 2.0, 1.0], [-1.8, -0.3, 2.0, 1.0], [-0.3, 1.6, 3.0, 1.0], [-0.6, 0.8, 1.0, 1.0], [0.3, -0.5, 1.0, 0.0], [1.6, 1.4, 2.0, 1.0], [-1.1, 0.6, 2.0, 1.0], [-0.3, 0.6, 2.0, 1.0], [-0.6, 0.1, 3.0, 1.0], [1.0, 0.6, 3.0, 1.0] ]) # Input the dependent variables (y, t), with y out of t successes: y = np.asarray([0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0]) t = np.ones(len(y)) # Describe the data: hddesc = utils.Handle() nobs = len(dat) levels = [1, 1, 3, 1] vnames = ['V1', 'V2', 'V3', 'V4'] blgm.lm_describe_data(hddesc, nobs, levels, vnames=vnames) # Specify the model: hform = utils.Handle() blgm.lm_formula(hform, 'V1 + V2 + V3 + V4 + V1.V4') # We are using a logistic model, so the errors are assumed to be # Binomial (the logistic link is the default link) errfn = 'Binomial' # Generate the design matrix, x: hxdesc = utils.Handle() x = blgm.lm_design_matrix(hform, hddesc, dat, hxdesc) m = x.shape[1] # Convert the model into a form that can be passed to correg.glm_binomial mean, _, isx, _, _ = blgm.lm_submodel('S', hform, hxdesc, m, 0, 0) # Formula and data description handles are no longer required: map(blgm.handle_free, [hform, hddesc]) # Number of folds: k = 5 # Not going to be using predicted standard errors vfobs = False # Initialise the random number generator: seed = [42321] statecomm = rand.init_repeat(6, seed) lres = correg.glm_binomial( x, isx, y, t, mean=mean, ) # Loop over each fold cnt = np.zeros(shape=(3, 3), dtype='int') for fold in range(1, k+1): # Split the data into training and validation datasets: # the first nt rows of X and elements of Y and T contain the training # data, the remaining entries the validation data nt = rand.kfold_xyw(k, fold, x, statecomm, y=y, w=t) nv = nobs - nt # Fit the logistic model to the training dataset: lres = correg.glm_binomial( x[:nt, :], isx, y[:nt], t[:nt], mean=mean ) # Predict the response for observations in the validation dataset: _, _, pred, _ = correg.glm_predict( errfn, x[nt:, :], isx, lres.b, lres.cov, vfobs, t=t[nt:,], mean=mean, ) # Count the true/false positives/negatives: for i in range(nv): obs_val = int(y[nt+i]) pred_val = 1 if pred[i] >= 0.5 else 0 cnt[pred_val, obs_val] += 1 # Calculate the marginals: cnt[-1, :] = np.sum(cnt, 0) cnt[:, -1] = np.sum(cnt, 1) # Display the results: print(' Observed') print(' --------------------------') print('Predicted | Negative Positive Total') print('--------------------------------------') print('Negative | {:5d} {:5d} {:5d}'.format(*cnt[0, :])) print('Positive | {:5d} {:5d} {:5d}'.format(*cnt[1, :])) print('Total | {:5d} {:5d} {:5d}'.format(*cnt[2, :])) if cnt[2, 1] > 0: print('True Positive Rate (Sensitivity): {:5.2f}'.format( cnt[1, 1] / float(cnt[2, 1]))) else: print('True Positive Rate (Sensitivity): No positives in data') if cnt[2, 0] > 0: print('True Negative Rate (Specificity): {:5.2f}'.format( cnt[0, 0] / float(cnt[2, 0]))) else: print('True Negative Rate (Specificity): No negatives in data') # Free the rest of the blgm handles: map(blgm.handle_free, [hxdesc])
if __name__ == '__main__': import doctest import sys sys.exit( doctest.testmod( None, verbose=True, report=False, optionflags=doctest.REPORT_NDIFF, ).failed )