/* nag_rand_kfold_xyw (g05pvc) Example Program.
*
* Copyright 2024 Numerical Algorithms Group.
*
* Mark 30.1, 2024.
*/
/* Pre-processor includes */
#include <nag.h>
#include <stdio.h>
int main(void) {
/* Integer scalar and array declarations */
Integer fn, fold, fp, i, ip, k, pdx, lstate, m, max_nv, n, nn, np, nt, nv,
obs_val, pred_val, subid, tn, tp, j, pdv, rank, max_iter, print_iter;
Integer exit_status = 0, lseed = 1;
Integer *isx = 0, *state = 0;
Integer seed[1];
/* NAG structures and types */
NagError fail;
Nag_Link link;
Nag_IncludeMean mean;
Nag_BaseRNG genid;
Nag_Distributions errfn;
Nag_Boolean vfobs;
Nag_DataByObsOrVar sordx;
/* Double scalar and array declarations */
double ex_power, dev, eps, tol, df, scale;
double *b = 0, *cov = 0, *eta = 0, *pred = 0, *se = 0, *seeta = 0,
*sepred = 0, *v = 0, *offset = 0, *wt = 0, *x = 0, *y = 0, *t = 0;
/* Character scalar and array declarations */
char clink[40], cmean[40], cgenid[40];
/* Initialize the error structure */
INIT_FAIL(fail);
printf("nag_rand_kfold_xyw (g05pvc) Example Program Results\n\n");
/* Skip heading in data file */
scanf("%*[^\n] ");
/* Set variables required by the regression (g02gbc) ... */
/* Read in the type of link function, whether a mean is required */
/* and the problem size */
scanf("%39s%39s%" NAG_IFMT "%" NAG_IFMT "%*[^\n] ", clink, cmean, &n, &m);
link = (Nag_Link)nag_enum_name_to_value(clink);
mean = (Nag_IncludeMean)nag_enum_name_to_value(cmean);
/* Set storage order for g05pvc */
/* (pick the one required by g02gbc and g02gpc) */
sordx = Nag_DataByObs;
pdx = m;
if (!(x = NAG_ALLOC(pdx * n, double)) || !(y = NAG_ALLOC(n, double)) ||
!(t = NAG_ALLOC(n, double)) || !(isx = NAG_ALLOC(m, Integer))) {
printf("Allocation failure\n");
exit_status = -1;
goto END;
}
/* This example is not using an offset or weights */
offset = 0;
wt = 0;
/* Read in data */
for (i = 0; i < n; i++) {
for (j = 0; j < m; j++) {
scanf("%lf", &x[i * pdx + j]);
}
scanf("%lf%lf%*[^\n] ", &y[i], &t[i]);
}
/* Read in variable inclusion flags */
for (j = 0; j < m; j++) {
scanf("%" NAG_IFMT "", &isx[j]);
}
scanf("%*[^\n] ");
/* Read in control parameters for the regression */
scanf("%" NAG_IFMT "%lf%lf%" NAG_IFMT "%*[^\n] ", &print_iter, &eps, &tol,
&max_iter);
/* Calculate IP */
for (ip = 0, i = 0; i < m; i++)
ip += (isx[i] > 0);
if (mean == Nag_MeanInclude)
ip++;
/* ... End of setting variables required by the regression */
/* Set variables required by data sampling routine (g05pvc) ... */
/* Read in the base generator information and seed */
scanf("%39s%" NAG_IFMT "%" NAG_IFMT "%*[^\n] ", cgenid, &subid, &seed[0]);
genid = (Nag_BaseRNG)nag_enum_name_to_value(cgenid);
/* Initial call to g05kfc to get size of STATE array */
lstate = 0;
nag_rand_init_repeat(genid, subid, seed, lseed, state, &lstate,
NAGERR_DEFAULT);
/* Allocate state array */
if (!(state = NAG_ALLOC(lstate, Integer))) {
printf("Allocation failure\n");
exit_status = -1;
goto END;
}
/* Initialize the generator to a repeatable sequence using g05kfc */
nag_rand_init_repeat(genid, subid, seed, lseed, state, &lstate,
NAGERR_DEFAULT);
/* Read in the number of folds */
scanf("%" NAG_IFMT "%*[^\n] ", &k);
/* ... End of setting variables required by data sampling routine */
/* Set variables required by prediction routine (g02gpc) ... */
/* Regression is performed using g02gbc so error structure is binomial */
errfn = Nag_Binomial;
/* This example does not use the predicted standard errors, so */
/* it doesn't matter what VFOBS is set to */
vfobs = Nag_FALSE;
/* The error and link being used in the linear model don't use scale */
/* and ex_power so they can be set to anything */
ex_power = 0.0;
scale = 0.0;
/* ... End of setting variables required by prediction routine */
/* This is the maximum size for a validation dataset */
max_nv = (Integer)(((double)n / (double)k) + 0.5);
/* Allocate arrays */
pdv = n;
if (!(b = NAG_ALLOC(ip, double)) || !(se = NAG_ALLOC(ip, double)) ||
!(cov = NAG_ALLOC(ip * (ip + 1) / 2, double)) ||
!(v = NAG_ALLOC(n * pdv, double)) || !(eta = NAG_ALLOC(max_nv, double)) ||
!(seeta = NAG_ALLOC(max_nv, double)) ||
!(pred = NAG_ALLOC(max_nv, double)) ||
!(sepred = NAG_ALLOC(max_nv, double)))
{
printf("Allocation failure\n");
exit_status = -1;
goto END;
}
/* Initialize counts */
tp = tn = fp = fn = 0;
/* Loop over each fold */
for (fold = 1; fold <= k; fold++) {
/* Use g05pvc to split the data into training and validation datasets */
nag_rand_kfold_xyw(k, fold, n, m, sordx, x, pdx, y, t, &nt, state, &fail);
if (fail.code != NE_NOERROR) {
printf("Error from nag_rand_kfold_xyw (g05pvc).\n%s\n", fail.message);
exit_status = 1;
if (fail.code != NW_POTENTIAL_PROBLEM)
goto END;
}
/* Calculate the size of the validation dataset */
nv = n - nt;
/* Call g02gbc to fit generalized linear model, with Binomial */
/* errors to training data */
nag_correg_glm_binomial(link, mean, nt, x, pdx, m, isx, ip, y, t, wt,
offset, &dev, &df, b, &rank, se, cov, v, pdv, tol,
max_iter, print_iter, "", eps, &fail);
if (fail.code != NE_NOERROR) {
printf("Error from nag_correg_glm_binomial (g02gbc).\n%s\n",
fail.message);
exit_status = 1;
goto END;
}
/* Call g02gpc to predict the response for the observations in the */
/* validation dataset */
/* We want to start passing X and T at the (NT+1)th observation, */
/* These start at (i,j)=(nt+1,1), hence the (nt*pdx+0)th element */
/* of X and the nt'th element of T */
nag_correg_glm_predict(errfn, link, mean, nv, &x[nt * pdx], pdx, m, isx, ip,
&t[nt], offset, wt, scale, ex_power, b, cov, vfobs,
eta, seeta, pred, sepred, &fail);
if (fail.code != NE_NOERROR) {
printf("Error from nag_correg_glm_predict (g02gpc).\n%s\n", fail.message);
exit_status = 1;
goto END;
}
/* Count the true/false positives/negatives */
for (i = 0; i < nv; i++) {
obs_val = (Integer)y[nt + i];
pred_val = (pred[i] >= 0.5 ? 1 : 0);
if (obs_val) {
/* Positive */
if (pred_val) {
/* True positive */
tp++;
} else {
/* False Negative */
fn++;
}
} else {
/* Negative */
if (pred_val) {
/* False positive */
fp++;
} else {
/* True negative */
tn++;
}
}
}
}
/* Display results */
np = tp + fn;
nn = fp + tn;
printf(" Observed\n");
printf(" --------------------------\n");
printf(" Predicted | Negative Positive Total\n");
printf(" --------------------------------------\n");
printf(" Negative | %5" NAG_IFMT " %5" NAG_IFMT " %5" NAG_IFMT "\n",
tn, fn, tn + fn);
printf(" Positive | %5" NAG_IFMT " %5" NAG_IFMT " %5" NAG_IFMT "\n",
fp, tp, fp + tp);
printf(" Total | %5" NAG_IFMT " %5" NAG_IFMT " %5" NAG_IFMT "\n",
nn, np, nn + np);
printf("\n");
if (np != 0) {
printf(" True Positive Rate (Sensitivity): %4.2f\n",
(double)tp / (double)np);
} else {
printf(" True Positive Rate (Sensitivity): No positives in data\n");
}
if (nn != 0) {
printf(" True Negative Rate (Specificity): %4.2f\n",
(double)tn / (double)nn);
} else {
printf(" True Negative Rate (Specificity): No negatives in data\n");
}
END:
NAG_FREE(isx);
NAG_FREE(state);
NAG_FREE(b);
NAG_FREE(cov);
NAG_FREE(eta);
NAG_FREE(pred);
NAG_FREE(se);
NAG_FREE(seeta);
NAG_FREE(sepred);
NAG_FREE(v);
NAG_FREE(offset);
NAG_FREE(wt);
NAG_FREE(x);
NAG_FREE(y);
NAG_FREE(t);
return (exit_status);
}