library(ggplot2) library(corrplot) library(Hmisc) d=read.csv("states_m.csv") d2=d[,c(3,5,10,12,25,26, 1,13,16,57, 15,17,27,29,38, 30,37, 50, 8,32,48, 55,64,53)] d2=d2[complete.cases(d2),] cor=rcorr(as.matrix(d2[,-c(13,24)])) corrplot(cor$r, type="upper", order="hclust", tl.col="black", tl.srt=45,tl.cex=.75) pdf("Figure.pdf") corrplot(cor$r, type="upper", order="hclust", tl.col="black", tl.srt=45,tl.cex=.65) dev.off() pr.out=prcomp(d2[,-c(13,24)], scale=T) pcax = as.data.frame(pr.out$x) pr.out$rotation sum((pr.out$rotation[,"PC1"])^2) [1] 1 sum((pr.out$rotation[,"PC2"])^2) [1] 1 Stdev=apply(pcax, 2, sd) PC1 PC2 PC3 PC4 PC5 PC6 PC7 2.74108822 2.28739029 1.51877530 1.25722699 1.02957328 1.01703909 0.85764683 PC8 PC9 PC10 PC11 PC12 PC13 PC14 0.74389634 0.69564518 0.63101477 0.53464406 0.45984263 0.39408799 0.38171109 PC15 PC16 PC17 PC18 PC19 PC20 PC21 0.30924919 0.27016760 0.22657225 0.19366681 0.16716120 0.10315234 0.07730557 PC22 GiniIndex 0.03487876 0.01896804 plot(cumsum(Stdev)) cor = rcorr(as.matrix(pcax)) corrplot(cor$r, type="upper", order="hclust", tl.col="black", tl.srt=45) rownames(pcax)=d2[,24] ggplot(pcax, aes(x=PC1,y=PC2,label=rownames(pcax))) + geom_point() + geom_text(size=3,vjust=1.5)+theme_bw() pcax$GiniIndex=d2$GiniIndex lm1=lm(GiniIndex~PC1+PC2,data=pcax) summary(lm1) lm2=lm(GiniIndex~.,data=pcax) summary(lm2) lm2b=lm(GiniIndex~PC2+PC5+PC6+PC7+PC8+PC9+PC10+PC15+PC16+PC18+PC22,data=pcax) summary(lm2b) lm3=lm(GiniIndex~PC2+PC5+PC10,data=pcax) summary(lm3) Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 0.462416 0.001603 288.427 < 2e-16 *** PC2 0.002889 0.000708 4.081 0.000177 *** PC5 -0.012764 0.001573 -8.115 2e-10 *** PC10 -0.007559 0.002567 -2.945 0.005048 ** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 0.01134 on 46 degrees of freedom Multiple R-squared: 0.6647, Adjusted R-squared: 0.6428 F-statistic: 30.39 on 3 and 46 DF, p-value: 5.523e-11 plot(lm3) plot(pcax$GiniIndex,lm3$fitted.values) lm3a=lm3$model rownames(lm3a)=d2[,24] lm3a$fitted.values=lm3$fitted.values ggplot(lm3a, aes(x=GiniIndex,y=fitted.values,label=rownames(lm3a) )) + geom_point() + geom_text(size=3,vjust=1.5)+theme_bw() lm4=lm(GiniIndex~PC2+PC5,data=pcax) summary(lm4) Estimate Std. Error t value Pr(>|t|) (Intercept) 0.4624160 0.0017292 267.419 < 2e-16 *** PC2 0.0028892 0.0007636 3.783 0.000437 *** PC5 -0.0127645 0.0016966 -7.524 1.32e-09 *** Residual standard error: 0.01223 on 47 degrees of freedom Multiple R-squared: 0.6014, Adjusted R-squared: 0.5845 F-statistic: 35.46 on 2 and 47 DF, p-value: 4.092e-10 b=-coef(lm4)["PC2"]/coef(lm4)["PC5"] a=-(coef(lm4)["(Intercept)"]-mean(d2$GiniIndex))/coef(lm4)["PC5"] ggplot(pcax, aes(x=PC2,y=PC5,label=rownames(pcax))) + geom_point() + geom_text(size=3,vjust=1.5)+theme_bw()+ geom_abline(intercept = a, slope = b, color="red") lm5=lm(GiniIndex~PC2+PC5+PC10,data=pcax) summary(lm5) library(Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 0.462416 0.001603 288.427 < 2e-16 *** PC2 0.002889 0.000708 4.081 0.000177 *** PC5 -0.012764 0.001573 -8.115 2e-10 *** PC10 -0.007559 0.002567 -2.945 0.005048 ** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 0.01134 on 46 degrees of freedom Multiple R-squared: 0.6647, Adjusted R-squared: 0.6428 F-statistic: 30.39 on 3 and 46 DF, p-value: 5.523e-11 boot) gl1=glm(GiniIndex~PC2+PC5+PC10,data=pcax) > mean(gl1$residuals^2) [1] 0.0001182368 library(boot) cv.glm(pcax,gl1)$delta sqrt(cv.glm(pcax,gl1)$delta) pr.out$rotation j= 0.002889*pr.out$rotation[,"PC2"]-0.012764*pr.out$rotation[,"PC5"]-0.007559*pr.out$rotation[,"PC10"] sort(abs(j/sqrt(sum(j^2)) )) lm7=lm(GiniIndex~GrossStateProduct+MedianHouseholdIncome+BurglaryRate,data=d) summary(lm7) Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 4.663e-01 2.607e-02 17.887 < 2e-16 *** GrossStateProduct 1.858e-14 5.360e-15 3.467 0.00114 ** MedianHouseholdIncome -2.445e-07 3.571e-07 -0.685 0.49689 BurglaryRate 8.647e-01 2.007e+00 0.431 0.66856 Residual standard error: 0.01947 on 47 degrees of freedom Multiple R-squared: 0.2107, Adjusted R-squared: 0.1603 F-statistic: 4.182 on 3 and 47 DF, p-value: 0.01053 trump=read.csv("trump.csv") trump=trump[trump$StateAbbreviation != "DC",] blue=c("blue","red") ggplot(pcax, aes(x=PC1,y=PC2,label=rownames(pcax))) + geom_point(color=blue[1+trump$Trump]) + geom_text(size=3,vjust=1.5,color=blue[1+trump$Trump])+theme_bw() ggplot(pcax, aes(x=PC1,y=PC3,label=rownames(pcax))) + geom_point(color=blue[1+trump$Trump]) + geom_text(size=3,vjust=1.5,color=blue[1+trump$Trump])+theme_bw() pcax$trump=trump$Trump glm1=glm(trump~GiniIndex+PC1+PC2+PC3+PC4+PC5,family=binomial,data=pcax) glm2=glm(trump~PC1+PC4,family=binomial,data=pcax)