! H05AAF Example Program Text
! Mark 30.2 Release. NAG Copyright 2024.
Module h05aafe_mod
! .. Use Statements ..
Use nag_library, Only: nag_wp
! .. Implicit None Statement ..
Implicit None
! .. Accessibility Statements ..
Private
Public :: calc_subset_score, free_subset_score
! .. Parameters ..
Integer, Parameter, Public :: nin = 5, nout = 6
! .. Derived Type Definitions ..
Type, Public :: calc_subset_data
Integer :: n, ldq, ldx
Real (Kind=nag_wp) :: tol
Real (Kind=nag_wp), Allocatable :: x(:,:), y(:), b(:), cov(:), h(:), &
p(:), q(:,:), res(:), se(:), wk(:), &
wt(:)
Integer, Allocatable :: isx(:)
Character (1) :: mean, weight
End Type calc_subset_data
Contains
Subroutine calc_subset_score(m,drop,lz,z,la,a,bscore,cs)
! Calculate the score associated with a particular set of feature
! subsets.
! M,DROP,LZ,Z,LA,A and BSCORE are all as described in the documentation
! of H05AAF.
! CS - Variable of type CALC_SUBSET_DATA that holds any additional data
! required by this routine
! This particular example finds the set, of a given size, of explanatory
! variables that best fit a response variable when a linear regression
! model is used. Therefore the feature set is the set of all the
! explanatory variables and the best set of features is defined as set
! of explanatory variables that gives the smallest residual sums of
! squares.
! See the documentation for G02DAF for details on linear regression
! models.
! .. Use Statements ..
Use nag_library, Only: g02daf
! .. Scalar Arguments ..
Type (calc_subset_data), Intent (Inout) :: cs
Integer, Intent (In) :: drop, la, lz, m
! .. Array Arguments ..
Real (Kind=nag_wp), Intent (Out) :: bscore(:)
Integer, Intent (In) :: a(:), z(:)
! .. Local Scalars ..
Real (Kind=nag_wp) :: rss
Integer :: i, idf, ifail, inv_drop, ip, irank
Logical :: svd
Character (200) :: line
! .. Intrinsic Procedures ..
Intrinsic :: abs, allocated, count, max
! .. Executable Statements ..
! Allocate various arrays and read in the data if this is the first time
! this routine has been called
If (.Not. allocated(cs%x)) Then
! Read in the number of observations for the data used in the linear
! regression skipping any headings or blank lines
Do
Read (nin,*,Iostat=ifail) line
If (ifail/=0) Then
Exit
End If
Read (line,*,Iostat=ifail) cs%n
If (ifail==0) Then
Exit
End If
End Do
! Read in the control parameters for the linear regression
Read (nin,*) cs%tol
! Read in the data
cs%ldx = cs%n
Allocate (cs%x(cs%ldx,m),cs%y(cs%n))
Read (nin,*)(cs%x(i,1:m),cs%y(i),i=1,cs%n)
! No intercept term and no weights
cs%mean = 'Z'
cs%weight = 'U'
! Allocate memory required by the regression routine
cs%ldq = cs%n
Allocate (cs%b(m),cs%cov((m*m+m)/2),cs%h(cs%n),cs%p(m*(m+ &
2)),cs%q(cs%ldq,m+1),cs%res(cs%n),cs%se(m),cs%wk(m*m+5*(m- &
1)),cs%wt(1),cs%isx(m))
End If
! Set up the initial feature set.
! If DROP = 0, this is the Null set (i.e. no features).
! If DROP = 1 then this is the full set (i.e. all features)
cs%isx(1:m) = drop
! Add (if DROP = 0) or remove (if DROP = 1) the all the features
! specified in Z
inv_drop = abs(drop-1)
Do i = 1, lz
cs%isx(z(i)) = inv_drop
End Do
Do i = 1, max(la,1)
If (la>0) Then
If (i>1) Then
! Reset the feature altered at the last iteration
cs%isx(a(i-1)) = drop
End If
! Add or drop the I'th feature in A
cs%isx(a(i)) = inv_drop
End If
ip = count(cs%isx(1:m)==1)
! Fit the regression model
ifail = 0
Call g02daf(cs%mean,cs%weight,cs%n,cs%x,cs%ldx,m,cs%isx,ip,cs%y, &
cs%wt,rss,idf,cs%b,cs%se,cs%cov,cs%res,cs%h,cs%q,cs%ldq,svd,irank, &
cs%p,cs%tol,cs%wk,ifail)
! Return the score (the residual sums of squares)
bscore(i) = rss
End Do
End Subroutine calc_subset_score
Subroutine free_subset_score(cs)
! .. Scalar Arguments ..
Type (calc_subset_data), Intent (Inout) :: cs
! .. Intrinsic Procedures ..
Intrinsic :: allocated
! .. Executable Statements ..
If (allocated(cs%x)) Then
Deallocate (cs%x)
End If
If (allocated(cs%y)) Then
Deallocate (cs%y)
End If
If (allocated(cs%b)) Then
Deallocate (cs%b)
End If
If (allocated(cs%cov)) Then
Deallocate (cs%cov)
End If
If (allocated(cs%h)) Then
Deallocate (cs%h)
End If
If (allocated(cs%p)) Then
Deallocate (cs%p)
End If
If (allocated(cs%q)) Then
Deallocate (cs%q)
End If
If (allocated(cs%res)) Then
Deallocate (cs%res)
End If
If (allocated(cs%se)) Then
Deallocate (cs%se)
End If
If (allocated(cs%wk)) Then
Deallocate (cs%wk)
End If
If (allocated(cs%wt)) Then
Deallocate (cs%wt)
End If
If (allocated(cs%isx)) Then
Deallocate (cs%isx)
End If
End Subroutine free_subset_score
End Module h05aafe_mod
Program h05aafe
! .. Use Statements ..
Use h05aafe_mod, Only: calc_subset_data, calc_subset_score, &
free_subset_score, nin, nout
Use nag_library, Only: h05aaf, nag_wp
! .. Implicit None Statement ..
Implicit None
! .. Local Scalars ..
Type (calc_subset_data) :: cs
Real (Kind=nag_wp) :: gamma
Integer :: cnt, drop, i, ifail, ip, irevcm, j, &
la, licomm, lrcomm, lz, m, mincnt, &
mincr, mip, nbest
! .. Local Arrays ..
Real (Kind=nag_wp) :: acc(2)
Real (Kind=nag_wp), Allocatable :: bscore(:), rcomm(:)
Integer, Allocatable :: a(:), bz(:,:), ibz(:), icomm(:), &
z(:)
Logical, Allocatable :: mask(:)
! .. Intrinsic Procedures ..
Intrinsic :: max, pack
! .. Executable Statements ..
Write (nout,*) 'H05AAF Example Program Results'
Write (nout,*)
! Skip headings in data file
Read (nin,*)
Read (nin,*)
! Read in the problem size
Read (nin,*) m, ip, nbest
! Read in the control parameters for the subset selection
Read (nin,*) mincr, mincnt, gamma, acc(1:2)
! Allocate memory required by the subset selection routine
mip = m - ip
Allocate (z(mip),a(max(nbest,m)),bz(mip,nbest),bscore(max(nbest,m)))
! Allocate dummy communication arrays, as we will query the required size
licomm = 2
lrcomm = 0
Allocate (icomm(licomm),rcomm(lrcomm))
! Query the required length for the communication arrays
irevcm = 0
ifail = 1
Call h05aaf(irevcm,mincr,m,ip,nbest,drop,lz,z,la,a,bscore,bz,mincnt, &
gamma,acc,icomm,licomm,rcomm,lrcomm,ifail)
! Extract the required sizes from the communication array
licomm = icomm(1)
lrcomm = icomm(2)
! Reallocate communication arrays to the correct size
Deallocate (icomm,rcomm)
Allocate (icomm(licomm),rcomm(lrcomm))
! Initialize reverse communication control flag
irevcm = 0
! Call the reverse communication best subset routine in a loop.
! The loop should terminate when IRECVM = 0 on exit
cnt = 0
rev_lp: Do
ifail = -1
Call h05aaf(irevcm,mincr,m,ip,nbest,drop,lz,z,la,a,bscore,bz,mincnt, &
gamma,acc,icomm,licomm,rcomm,lrcomm,ifail)
If (irevcm==0) Then
If (ifail==0 .Or. ifail==53) Then
! No error, or a warning was issued
Exit rev_lp
Else
! An error occurred
Go To 100
End If
End If
! Calculate and return the score for the required models and keep track
! of the number of subsets evaluated
cnt = cnt + max(1,la)
Call calc_subset_score(m,drop,lz,z,la,a,bscore,cs)
End Do rev_lp
Call free_subset_score(cs)
! Titles
Write (nout,99999) ' Score Feature Subset'
Write (nout,99999) ' ----- --------------'
! Display the best subsets and corresponding scores. H05AAF returns a list
! of features excluded from the best subsets, so this is inverted to give
! the set of features included in each subset
Allocate (ibz(m),mask(m))
ibz(1:m) = (/(i,i=1,m)/)
Do i = 1, la
mask(1:m) = .True.
Do j = 1, mip
mask(bz(j,i)) = .False.
End Do
Write (nout,99998) bscore(i), pack(ibz,mask)
End Do
Write (nout,*)
If (ifail==53) Then
Write (nout,99997) nbest, &
' subsets of the requested size do not exist, only ', la, &
' are displayed.'
End If
Write (nout,99996) cnt, ' subsets evaluated in total'
100 Continue
99999 Format (1X,A)
99998 Format (1X,E12.5,100(1X,I5))
99997 Format (1X,I0,A,I0,A)
99996 Format (1X,I0,A)
End Program h05aafe