# SPLUS PROGRAM FOR MAXIMUM LIKELIHOOD (ML) LINEAR REGRESSION IN # THE PRESENCE OF TYPE II LEFT AND INTERVAL CENSORING OF THE # OUTCOME VARIABLE. IT IS ASSUMED THAT SOME OBSERVATIONS ARE # "NON-DETECTS", I.E. THEY FALL BELOW THE DETECTION LEVEL OF THE # INSTRUMENT AND ARE LEFT CENSORED, OTHERS ARE "TRACE" # MEASUREMENTS, I.E. THEY LIE ABOVE THE LIMIT OF DETECTION, BUT # ARE BELOW SOME LEVEL WHERE RELIABLE QUANTIFICATION IS # POSSIBLE. THESE OBSERVATIONS ARE INTERVAL CENSORED. # ALTHOUGH THIS PROBLEM IS FRAMED IN TERMS OF LEFT CENSORING, # THE METHODOLOGY APPLIES EQUALLY TO THE RIGHT CENSORING # SETTING, SIMPLY BY LOOKING AT THE MIRROR IMAGE. # 29 November, 1999 # THIS IS A MAXIMUM-LIKELIHOOD TECHNIQUE THAT WILL ESTIMATE THE # COEFFICIENTS AND VARIANCE USING AN EM ALGORITHM # TO USE THIS FUNCTION, THE FOLLOWING ARE REQUIRED: # - VECTOR OF OUTCOMES, y; (n x 1) # - IT IS ASSUMED THAT y IS ALREADY TRANSFORMED TO BE # APPROXIMATELY NORMALLY DISTRIBUTED. # - CENSORED VALUES FOR y ARE REPLACED BY SENSIBLE FIRST # ESTIMATES # FOR EXAMPLE, THE MIDPOINT OF THE RELEVANT CENSORING INTERVAL # - MATRIX OF DEPENDENT VARIABLES, x, NOT INCLUDING # INTERCEPT;(n x p) # - VECTOR OF "FLAGS" (flag) DESCRIBING FOR EACH OBSERVATION # WHETHER IT IS AT THE NON-DETECT ("ND") # OR WHETHER IT IS AT TRACE ("0) mew.uncens[j]<-mew[j] if (uncens[j]==0) mew.uncens[j]<-0 } # THE FINAL CALCULATION OF SIGMA-SQUARED sigmasq <-sum((uncens-mew.uncens)^2)/D sqrtsigmasq <-sqrt(sigmasq) mlvariance.keep <-sigmasq } # END OF LOOP mlbetas.keep <-lmmodel1$coefficients mlvariance.keep <-sigmasq # CALCULATING THE INFORMATION MATRIX sigma <-sqrtsigmasq c1 <-as.vector(ND.limit - mew) c2 <-as.vector(QL.limit - mew) phi1 <-dnorm(c1/sigma) cphi1 <-pnorm(c1/sigma) phi2 <-dnorm(c2/sigma) cphi2 <-pnorm(c2/sigma) numxvars<-dim(x.variables)[2] inf.matrix<-matrix(0,ncol=(numxvars+1), nrow=(numxvars+1)) for(i in 1:n) { if (flag[i]=="ND") { # CODE FOR THE ELEMENTS OF THE INFORMATION MATRIX TOP LEFT # POSITIONS for (j in 1:numxvars) { for (k in j:numxvars) { inf.matrix[j,k]<-inf.matrix[j,k]+(c1[i]*phi1[i]*cphi1[i]/sigmasq+phi1[i]* phi1[i]/sigma)*x.variables[i,k]*x.variables[i,j]/cphi1[i]/cphi1[i] } } # CODE FOR THE BOTTOM ELEMENTS OF THE INFORMATION MATRIX for (j in 1:numxvars) { inf.matrix[(numxvars+1),j]<- inf.matrix[(numxvars+1),j]+phi1[i]*(1- c1[i]^2 /sigmasq-phi1[i]*c1[i]/sigma/cphi1[i])*x.variables[i,j]/cphi1[i] } # CODE FOR THE SINGLE RHS DIAGONAL BOTTOM ELEMENT inf.matrix[numxvars+1,numxvars+1]<- inf.matrix[numxvars+1,numxvars+1] + phi1[i]*c1[i]*(3-(c1[i])^2/sigmasq-phi1[i]*c1[i]/sigma/cphi1[i])/4/cphi1[i] } # END OF CODE FOR "ND" OBSERVATIONS if (flag[i]=="=1) {cat(paste("* Note: Information matrix had negative variance (due to censoring) which has been replaced by a conservative estimate","\n"))} else if (sum(marker==0)) {cat(paste("\n"))} # A LIST OF POSSIBLE ITEMS THAT USERS MAY WISH TO ACCESS FROM # THE FUNCTION INCLUDING THE INITIAL AND FINAL BETA-COEFFICIENT # ESTIMATES, INITIAL AND FINAL VARIANCE ESTIMATES, R-SQUARED, # THE FITTED Y'S AND THE FINAL ESTIMATES OF THE Y-VALUES. list( y.new = yi.new, y.fitted = lmmodel1$fitted.values, mlbetas.initial = mlbetas.initial, mlbetas.final = mlbetas.keep, R.squared = lmmodel1.sum$r.squared, mlvariance.initial = mlvariance.initial, mlvariance.final = mlvariance.keep) } # END OF MAXIMUM LIKELIHOOD FUNCTION