NAG Library Manual, Mark 30.1
Interfaces:  FL   CL   CPP   AD 

NAG FL Interface Introduction
Example description
    Program g05pvfe

!     G05PVF Example Program Text

!     Mark 30.1 Release. NAG Copyright 2024.

!     .. Use Statements ..
      Use nag_library, Only: g02gbf, g02gpf, g05kff, g05pvf, nag_wp
!     .. Implicit None Statement ..
      Implicit None
!     .. Parameters ..
      Integer, Parameter               :: lseed = 1, nin = 5, nout = 6
!     .. Local Scalars ..
      Real (Kind=nag_wp)               :: a, dev, eps, s, tol
      Integer                          :: fn, fold, fp, genid, i, idf, ifail,  &
                                          ip, iprint, irank, k, ldv, ldx,      &
                                          lstate, lwk, m, maxit, max_nv, n,    &
                                          nn, np, nt, nv, obs_val, pred_val,   &
                                          sordx, subid, tn, tp, uset, usey
      Logical                          :: vfobs
      Character (1)                    :: errfn, link, mean, offset, weight
!     .. Local Arrays ..
      Real (Kind=nag_wp), Allocatable  :: b(:), cov(:), eta(:), pred(:),       &
                                          se(:), seeta(:), sepred(:), t(:),    &
                                          v(:,:), wk(:), x(:,:), y(:)
      Real (Kind=nag_wp)               :: off(1), wt(1)
      Integer, Allocatable             :: isx(:), state(:)
      Integer                          :: seed(lseed)
!     .. Intrinsic Procedures ..
      Intrinsic                        :: ceiling, count, int, real
!     .. Executable Statements ..
      Write (nout,*) 'G05PVF Example Program Results'
      Write (nout,*)

!     Skip heading in data file
      Read (nin,*)

!     Set variables required by the regression (G02GBF) ...

!     Read in the type of link function, whether a mean is required
!     and the problem size
      Read (nin,*) link, mean, n, m

!     Set storage order for G05PVF (pick the one required by G02GBF and
!     G02GPF)
      sordx = 1

      ldx = n
      Allocate (x(ldx,m),y(n),t(n),isx(m))

!     This example is not using an offset or weights
      offset = 'N'
      weight = 'U'

!     Read in data
      Read (nin,*)(x(i,1:m),y(i),t(i),i=1,n)

!     Read in variable inclusion flags
      Read (nin,*) isx(1:m)

!     Read in control parameters for the regression
      Read (nin,*) iprint, eps, tol, maxit

!     Calculate IP
      ip = count(isx(1:m)>0)
      If (mean=='M' .Or. mean=='m') Then
        ip = ip + 1
      End If
!     ... End of setting variables required by the regression

!     Set variables required by data sampling routine (G05PVF) ...

!     Read in the base generator information and seed
      Read (nin,*) genid, subid, seed(1:lseed)

!     Will always have a Y and T variable
      usey = 1
      uset = 1

!     Query the required size of the STATE array
      lstate = 0
      Allocate (state(lstate))
      ifail = 0
      Call g05kff(genid,subid,seed,lseed,state,lstate,ifail)

!     Reallocate STATE
      Deallocate (state)
      Allocate (state(lstate))

!     Initialize the generator to a repeatable sequence
      ifail = 0
      Call g05kff(genid,subid,seed,lseed,state,lstate,ifail)

!     Read in the number of folds
      Read (nin,*) k

!     ... End of setting variables required by data sampling routine

!     Set variables required by prediction routine (G02GPF) ...

!     Regression is performed using G02GBF so error structure is binomial
      errfn = 'B'

!     This example does not use the predicted standard errors
      vfobs = .False.
!     ... End of setting variables required by prediction routine

!     This is the maximum size for a validation dataset
      max_nv = ceiling(real(n,kind=nag_wp)/real(k,kind=nag_wp))

!     Allocate arrays
      ldv = n
      lwk = (ip*ip+3*ip+22)/2
      Allocate (b(ip),se(ip),cov(ip*(ip+1)/2),v(ldv,ip+7),wk(lwk))
      Allocate (eta(max_nv),seeta(max_nv),pred(max_nv),sepred(max_nv))

!     Initialize counts
      tp = 0
      tn = 0
      fp = 0
      fn = 0

!     Loop over each fold
      Do fold = 1, k
!       Split the data into training and validation datasets
        ifail = -1
        Call g05pvf(k,fold,n,m,sordx,x,ldx,usey,y,uset,t,nt,state,ifail)
        If (ifail/=0 .And. ifail/=61) Then
          Go To 100
        End If

!       Calculate the size of the validation dataset
        nv = n - nt

!       Call routine to fit generalized linear model, with Binomial errors
!       to training data
        ifail = -1
        Call g02gbf(link,mean,offset,weight,nt,x,ldx,m,isx,ip,y,t,wt,dev,idf,  &
          b,irank,se,cov,v,ldv,tol,maxit,iprint,eps,wk,ifail)
        If (ifail/=0) Then
          If (ifail<6) Then
            Go To 100
          End If
        End If

!       Predict the response for the observations in the validation dataset
        ifail = 0
        Call g02gpf(errfn,link,mean,offset,weight,nv,x(nt+1,1),ldx,m,isx,ip,   &
          t(nt+1),off,wt,s,a,b,cov,vfobs,eta,seeta,pred,sepred,ifail)

!       Count the true/false positives/negatives
        Do i = 1, nv
          obs_val = int(y(nt+i))

          If (pred(i)>=0.5_nag_wp) Then
            pred_val = 1
          Else
            pred_val = 0
          End If

          Select Case (obs_val)
          Case (0)
!           Negative
            Select Case (pred_val)
            Case (0)
!             True negative
              tn = tn + 1
            Case (1)
!             False positive
              fp = fp + 1
            End Select
          Case (1)
!           Positive
            Select Case (pred_val)
            Case (0)
!             False negative
              fn = fn + 1
            Case (1)
!             True positive
              tp = tp + 1
            End Select
          End Select
        End Do
      End Do

!     Display results
      np = tp + fn
      nn = fp + tn

      Write (*,99998) '                      Observed'
      Write (*,99998) '            --------------------------'
      Write (*,99998) 'Predicted | Negative  Positive   Total'
      Write (*,99998) '--------------------------------------'
      Write (*,99997) 'Negative  |', tn, fn, tn + fn
      Write (*,99997) 'Positive  |', fp, tp, fp + tp
      Write (*,99997) 'Total     |', nn, np, nn + np
      Write (*,*)

      If (np/=0) Then
        Write (nout,99999) 'True Positive Rate (Sensitivity):',                &
          real(tp,kind=nag_wp)/real(np,kind=nag_wp)
      Else
        Write (nout,99998)                                                     &
          'True Positive Rate (Sensitivity): No positives in data'
      End If
      If (nn/=0) Then
        Write (nout,99999) 'True Negative Rate (Specificity):',                &
          real(tn,kind=nag_wp)/real(nn,kind=nag_wp)
      Else
        Write (nout,99998)                                                     &
          'True Negative Rate (Specificity): No negatives in data'
      End If

100   Continue
99999 Format (1X,A,F5.2)
99998 Format (1X,A)
99997 Format (1X,A,1X,I5,5X,I5,5X,I5)
    End Program g05pvfe