R Correspondence Analysis (対応分析)

「Rで学ぶデータサイエンス カテゴリカルデータ解析」より抜粋

library(vcd)

tabledata <- xtabs(Freq~Status+Urban, data=DanishWelfare)

tabledata

 

result <- chisq.test(tabledata)

 

round(prop.table(tabledata,margin=2), digits=3)

#margin = 1:horizontally, 2:vertically

#digits = rounding digit number

 

result <- corresp(tabledata, nf=1)

result

 

plot(-3:2, -3:2, type ="n")

for (i in 1:3) {

  for(j in 1:5) {

    points(result$rscore[i], result$cscore[j],

           cex = log(tabledata[i,j]), xlim=c(0,4))

  }

}

 

plot(result)

 

result2 <- corresp(tabledata, nf=2)

plot(result2)

 

##############Answer pattern Analysis##############

gakuryoku <- rbind(c(3,4,1),c(3,4,1),c(3,4,1),c(4,4,1),c(2,2,1),c(3,4,4),c(1,1,1),c(2,3,1),c(1,2,1),c(3,4,1),

                    c(2,3,1),c(2,1,1),c(3,2,1),c(2,2,1),c(3,4,4),c(2,3,1),c(4,4,1),c(4,3,2),c(1,4,1),c(4,4,1),

                    c(1,1,1),c(1,4,1),c(2,3,1),c(3,4,1),c(3,4,1),c(2,4,3),c(1,1,1),c(2,3,1),c(2,3,1),c(1,1,4))

colnames(gakuryoku) <- list("A","B","C")

id <- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,

        21,22,23,24,25,26,27,28,29,30)

gakuryoku <- cbind(id,gakuryoku)

gakuryoku <- gakuryoku[,2:4]

 

gaku <- matrix(rep(0,120),nrow=30)

 

for (j in 1:3) {

  for (i in 1:30) {

    if (gakuryoku[i,j] == 1) {

      gaku[i,] <- c(1,0,0,0)

    }

    if (gakuryoku[i,j] == 2) {

      gaku[i,] <- c(0,1,0,0)

    }

    if (gakuryoku[i,j] == 3) {

      gaku[i,] <- c(0,0,1,0)

    }

    if (gakuryoku[i,j] == 4) {

      gaku[i,] <- c(0,0,0,1)

    }

  }

  assign(paste("gaku",j,sep=""),gaku)

}

 

gakuryoku <- cbind(gaku1, gaku2, gaku3)

colnames(gakuryoku) <- c("A1","A2","A3","A4",

                         "B1","B2","B3","B4",

                         "C1","C2","C3","C4")

 

library(MASS)

result <- corresp(gakuryoku, nf=2)

summary(result)

 

plot(result)

 

##############Multiple Answer pattern Analysis##############

gakuryoku <- rbind(c(3,4,1),c(3,4,1),c(3,4,1),c(4,4,1),c(2,2,1),c(3,4,4),c(1,1,1),c(2,3,1),c(1,2,1),c(3,4,1),

                   c(2,3,1),c(2,1,1),c(3,2,1),c(2,2,1),c(3,4,4),c(2,3,1),c(4,4,1),c(4,3,2),c(1,4,1),c(4,4,1),

                   c(1,1,1),c(1,4,1),c(2,3,1),c(3,4,1),c(3,4,1),c(2,4,3),c(1,1,1),c(2,3,1),c(2,3,1),c(1,1,4))

colnames(gakuryoku) <- list("A","B","C")

 

#Rは文字列として入力されている場合には自動的にカテゴリカル変数

#として取り扱うが、文字列として入力されている場合は

#量的変数として判断する

 

A <- factor(gakuryoku[,1], level=c(1:4), labels=c("A1","A2","A3","A4"))

B <- factor(gakuryoku[,2], level=c(1:4), labels=c("B1","B2","B3","B4"))

C <- factor(gakuryoku[,3], level=c(1:4), labels=c("C1","C2","C3","C4"))

gakuryoku <- data.frame(A, B, C)

summary(gakuryoku)

 

#変数がfactor型の変数となった

 

library(MASS)

result <- mca(gakuryoku, nf=3)

result

plot(result)

 

##############Practice##############

class(caith)

result <- corresp(caith,nf=2)

plot(result)

 

class(farms)

farms

result <- mca(farms, nf=2)

plot(farms)