匯入會使用到的LIBRARY e1071SVM用

library(e1071)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(lattice)
library(ggplot2)
library(caret)

這次使用R語言本身的兩種耐寒植物對二氧化碳攝取的差異影響

data =CO2%>%as.data.frame()
plot(data)

###首先繪製以種類為底的散布圖

ggplot(data=CO2) +                        
    geom_point(aes(x=conc,y=uptake,color=Type)) 

###再來是盒鬚圖

qplot(x=conc,y=uptake,data=CO2,geom="boxplot",color=Type)

###anova分析 測定

model1<-lm(conc~Type,data=CO2)
anova(model1) 
## Analysis of Variance Table
## 
## Response: conc
##           Df  Sum Sq Mean Sq F value Pr(>F)
## Type       1       0       0       0      1
## Residuals 82 7268400   88639
model2<- lm(uptake~Type,data=CO2)
anova(model2)
## Analysis of Variance Table
## 
## Response: uptake
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## Type       1 3365.5  3365.5  43.519 3.835e-09 ***
## Residuals 82 6341.4    77.3                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TRAINING 我們的DATA去預測TYPE

###資料有約莫98筆 我們拆半TRAINING 效果可能會較好
test=sample(nrow(CO2),52, replace=FALSE)
x <- subset(CO2[test,], select = -Type)
y <- CO2$Type[test]
training = CO2[-test,]
svm_model1 = 
  svmfit = svm(Type ~ ., data = CO2[-test,])
pred = predict(svm_model1,x)
confusionMatrix(pred,y)
## Confusion Matrix and Statistics
## 
##              Reference
## Prediction    Quebec Mississippi
##   Quebec          21           1
##   Mississippi      4          26
##                                          
##                Accuracy : 0.9038         
##                  95% CI : (0.7897, 0.968)
##     No Information Rate : 0.5192         
##     P-Value [Acc > NIR] : 3.139e-09      
##                                          
##                   Kappa : 0.8065         
##  Mcnemar's Test P-Value : 0.3711         
##                                          
##             Sensitivity : 0.8400         
##             Specificity : 0.9630         
##          Pos Pred Value : 0.9545         
##          Neg Pred Value : 0.8667         
##              Prevalence : 0.4808         
##          Detection Rate : 0.4038         
##    Detection Prevalence : 0.4231         
##       Balanced Accuracy : 0.9015         
##                                          
##        'Positive' Class : Quebec         
## 

準確率可達94% 因為有取用一項變異數為1的項 提升很多的準確度