# # This is a collection of functions to implement mclust-em clustering. # They were originally written by A. Dasgupta for the paper # Dasgupta and Raftery (1995), then altered and presented here by # Simon Byers, Dept of Statistics, University of Washington. # November 13, 1996 # # Copyright 1996 by Simon Byers, Abhijit Dasgupta and Adrian Raftery # # There is a good latex help document that tells how to use them. # # source("functions.S") # This file contains the functions: # em.a # emsimp.a # emcomp.a # mixloglik # find.alpha # find.alpha.NR # normal.density # em # emsimp # emcomp # rm.mclust.em em.a <- function(dat,ind,niter,alpha,A) # # This set of functions performs the EM algorithm including # estimation of alpha. # # Inputs: # dat, n by 2 matrix of data point. # ind, initial classification of points. # niter, number of iterations to be done. # alpha, an initial value for alpha. # A, area of region. If A is missing the rectangle enclosing # the data is used,. For instance you could input the area of # the convex hull of the data if you really wanted. # { if( missing(A)){A <- diff(range(dat[,1]))* diff(range(dat[,2]))} a <- diag(c(1,alpha)) u <- sort(unique(ind)) if(length(u)==2){ emsimp.a(dat,ind,u,a,niter,A) } else{ emcomp.a(dat,ind,u,a,niter,A) } } emsimp.a <- function(dat,ind,u,a,niter,A) # only 2 classes # # This function is called by em.a for the case of two groups only. # Inputs as in em.a along with u, which is the vector of the group # designations used, derived in em.a # { mu <- apply(dat[ind==u[2],],2,mean) ev <- eigen(var(dat[ind==u[2],]));v <- max(ev\$val)*ev\$vec%*%a%*%t(ev\$vec) p <- length(ind[ind==u[2]])/length(ind) lik<-sum(log((1-p)/A+p*normal.density(dat,mu,v))) cat(list(lik=lik,alpha=a[2,2],p=p,mu=mu,v=v),labels=c('lik','alpha','p','mu','v'),file='runv.dat',fill=4,append=T) #initial for(i in 1:niter) { # E-step z <- p*normal.density(dat,mu,v)/(p*normal.density(dat,mu,v)+(1-p)/A) # Modified M-step mu <- apply(dat*z,2,sum)/sum(z) wt <- z/sum(z) tmp <- sqrt(wt)*scale(dat,mu,scale=F) v <- t(tmp)%*%tmp ev <- eigen(v) alpha <- min(ev\$val)/max(ev\$val) v <- max(ev\$values)*ev\$vectors %*% diag(c(1,alpha)) %*% t(ev\$vectors) p <- sum(z)/length(ind) lik <- sum(log((1-p)/A+p*normal.density(dat,mu,v))) cat(list(lik=lik,alpha=alpha,p=p,mu=mu,v=v),labels=c('lik','alpha','p','mu','v'),file="runv.dat",fill=4,append=T) } list(z=z,p=p,alpha=alpha,mu=mu,v=v,lik=lik) } emcomp.a<-function(dat,ind,u,a,niter, A) # # This function is called by em.a for the case of more than two groups. # Inputs as in em.a along with u, which is the vector of the group # designations used, derived in em.a. # { mu <- matrix(rep(0, ncol(dat) * (length(u) - 1)), ncol = ncol(dat)) v <- vector("list", length(u) - 1) d <- matrix(rep(0, 2*(length(u)-1)),nrow=2) ev <- vector("list", length(u) - 1) for(i in 1:(length(u) - 1)) { mu[i, ] <- apply(dat[ind == u[i + 1], ], 2, mean) ev [[i]]<- eigen(var(dat[ind == u[i + 1], ])) v[[i]] <- ev[[i]]\$val[1] * ev[[i]]\$vec %*% a %*% t(ev[[i]]\$vec) } p<-as.vector(table(ind))/length(ind) z<-matrix(rep(0,length(u)*length(ind)),ncol=length(u)) lik<- mixloglik(dat,length(u),p,mu,v,A) cat(list(lik=lik,alpha=a[2,2],p=p,mu=mu,v=v),labels=c('lik','alpha','p','mu','v'),file='run.dat',fill=4,append=T) #initial for(i in 1:niter) # Start main iteration loop { # E-step z[, 1] <- p[1]/A for(j in 2:length(u)) z[, j] <- p[j] * normal.density(dat, mu[j - 1, ], v[[j - 1]]) # v incorporates a zsum <- apply(z, 1, sum) z <- t(scale(t(z), center = F, scale = zsum)) # Modified M-step for(j in 2:length(u)) { mu[j - 1, ] <- apply(dat * z[, j], 2, sum)/sum(z[, j]) wt <- z[, j]/sum(z[, j]) tmp <- sqrt(wt) * scale(dat, center = mu[j-1, ], scale= F) ev[[j-1]] <- eigen(t(tmp) %*% tmp) d[,j-1]<-ev[[j-1]]\$val } n<-apply(z[,-1],2,sum) alpha<-min(find.alpha.NR(length(u)-1,n,d)) print(alpha) a<-diag(c(1,alpha)) for(j in 1:(length(u)-1)) { v[[j]]<-max(ev[[j]]\$val)*ev[[j]]\$vec %*% a %*% t(ev[[j]]\$vec) } p<-apply(z,2,sum)/length(ind) lik<- mixloglik(dat,length(u),p,mu,v,A) cat(list(lik=lik,alpha=alpha,p=p,mu=mu,v=v),labels=c('lik','alpha','p','mu','v'),file='run.dat',fill=4,append=T) } list(lik=lik,z=z,alpha=alpha,p=p,mu=mu,v=v) } mixloglik<-function(dat,nmix,pmix,mu,v,A) # # mixture log likelihood, for use in em functions # { like<-matrix(rep(0,nmix*nrow(dat)),ncol=nmix) like[,1]<-rep(1/A,nrow(dat)) for(i in 2:nmix)like[,i]<-normal.density(dat,mu[i-1,],v[[i-1]]) lik<-sum(log(apply(t(like)*pmix,2,sum))) lik } find.alpha <- function(nmix,n,d) # # Function to do grid search on [0,1] for alpha # n is sum(z_kj) # d is matrix of eigenvalues from components # { alpha.values _ seq(0,1,length=101) results _ rep(0, 101) for(i in 1:101) { for(j in 1:nmix) { results[i] <- results[i] + (n[j] * d[2,j])/(alpha.values[i]*d[1, j] + d[2, j]) } } results <- results/sum(n) -1 + 1/2 alpha <- alpha.values[abs(results) == min(abs(results))] return(alpha) } find.alpha.NR <- function(nmix,n,d) # # Function to do NR on [0,1] for alpha, if it fails call grid search # nmix number of components in mixture # n is sum(z_kj) # d is matrix of eigenvalues from components # { alpha _ 0.01; new.alpha <-1; old.alpha <- 10 iteration <- 0 while( (abs((new.alpha-old.alpha)/old.alpha)>0.0001) && (iteration < 100) ){ f <-0; fprime<-0 for(j in 1:nmix) { f <- f + (n[j] * d[2,j])/(alpha*d[1, j] + d[2, j]) fprime <- fprime - (n[j] * d[2,j] * d[1,j])/((alpha*d[1,j] + d[2,j])^2) } f <- f-sum(n)/2 new.alpha <- alpha - f/fprime old.alpha <- alpha alpha <- new.alpha iteration <- iteration + 1 } if( (alpha < 0) || (alpha > 1) ){ alpha <- find.alpha(nmix,n,d) } return(alpha) } normal.density<-function(mat, mn = apply(mat, 2, mean), v = var(mat)) # # Computes the bivariate normal density for each sample point # { sq <- eigen(solve(v))\$vectors %*% diag(sqrt(eigen(solve(v))\$values)) ll <- exp(-0.5 * apply((scale(mat, center = mn, scale = F) %*% sq)^2, 1, sum))/(2 * pi * sqrt(det(v))) ll } ################################################################# # Stuff without alpha here em <- function(dat,ind,niter,alpha,A) # # This set of functions performs the EM algorithm without for fixed # alpha # # Inputs: # dat, n by 2 matrix of data point. # ind, initial classification of points. # alpha, value of alpha to be used. # niter, number of iterations to be done. # A, area of region. If A is missing the rectangle enclosing # the data is used,. For instance you could input the area of # the convex hull of the data if you really wanted. # { if( missing(A)){A <- diff(range(dat[,1]))* diff(range(dat[,2]))} u <- sort(unique(ind)) a <- diag(c(1,alpha)) if(length(u)==2){ emsimp(dat,ind,u,niter,a,A) } else{ emcomp(dat,ind,u,niter,a,A) } } emsimp <- function(dat,ind,u,niter,a,A) # # This function is called by em for the case of two groups only. # Inputs as in em along with u, which is the vector of the group # designations used, derived in em1. # { mu <- apply(dat[ind==u[2],],2,mean) ev <- eigen(var(dat[ind==u[2],]));v <- ev\$values[1]*ev\$vector%*%a%*% t(ev\$vector) p <- length(ind[ind==u[1]])/length(ind) for(i in 1:niter) { # E-step z <- p*normal.density(dat,mu,v)/(p*normal.density(dat,mu,v)+(1-p)/A) # Modified M-step mu <- apply(dat*z,2,sum)/sum(z) wt <- z/sum(z) tmp <- sqrt(wt)*scale(dat,mu,scale=F) ev <- eigen(t(tmp)%*%tmp) v <- ev\$values[1]*ev\$vectors%*%a%*%t(ev\$vectors) p <- sum(z)/length(ind) print(p) } list(z=z,p=p,mu=mu,v=v) } emcomp<-function(dat,ind,u,niter,a,A) # # This function is called by em for the case of more than two groups. # Inputs as in em along with u, which is the vector of the group # designations used, derived in em. # { mu <- matrix(rep(0,ncol(dat)*(length(u)-1)),ncol=ncol(dat)) v <- vector("list",length(u)-1) ev<-vector("list", length(u) - 1) for(i in 1:(length(u)-1)) { mu[i,] <- apply(dat[ind==u[i+1],],2,mean) ev [[i]]<- eigen(var(dat[ind == u[i + 1], ])) v[[i]] <- ev[[i]]\$val[1]*ev[[i]]\$vec %*% a %*% t(ev[[i]]\$vec) } p <- as.vector(table(ind))/length(ind) z <- matrix(rep(0,length(u)*length(ind)),ncol=length(u)) #Start algorithm for(i in 1:niter) { # E-step z[,1]<-p[1]/A for(j in 2:length(u)) z[,j]<- p[j] * normal.density(dat,mu[j-1,],v[[j-1]]) zsum <- apply(z,1,sum) z<- t(scale(t(z), center=F, scale=zsum)) # Modified M-step for(j in 2:length(u)) { mu[j-1,] <- apply(dat* z[,j], 2, sum)/sum(z[,j]) wt <- z[,j]/sum(z[,j]) tmp <- sqrt(wt) * scale(dat, center=mu[j-1, ], scale=F) ev[[j-1]] <- eigen(t(tmp) %*% tmp) v[[j-1]] <- max(ev[[j-1]]\$val)*ev[[j-1]]\$vect %*% a %*% t(ev[[j-1]]\$vect) p <- apply(z,2,sum)/length(ind) } } list(z=z,p=p,mu=mu,v=v) } rm.mclust.em <- function() # # Execute this function to remove all the mclust-em functions. # { rm(em.a,emsimp.a,emcomp.a,em,emsimp,emcomp,find.alpha,find.alpha.NR,normal.density,mixloglik,rm.mclust.em) }