library(e1071)#SVM
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lattice)
library(ggplot2)
library(caret)#cofusionmatrix
summary(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
plot(iris)
由圖可以發現,花瓣長度(Petal.Length)、花瓣寬度(Petal.Width)之間線性關係最佳
#花瓣長度(Petal.Length)、花瓣寬度(Petal.Width)以不同種花進行散布圖繪圖
ggplot(data=iris) +
geom_point(aes(x=Petal.Length, # 散布圖
y=Petal.Width,
color=Species)) # 把不同品種著色
#花瓣長度(Petal.Length)、花瓣寬度(Petal.Width)以不同種花進行盒鬚圖繪圖
qplot(x=Petal.Length,
y=Petal.Width,
data=iris,
geom="boxplot", # graph type is boxplot
color=Species)
modelPW<-lm(Petal.Width~Species,data=iris)
anova(modelPW)
## Analysis of Variance Table
##
## Response: Petal.Width
## Df Sum Sq Mean Sq F value Pr(>F)
## Species 2 80.413 40.207 960.01 < 2.2e-16 ***
## Residuals 147 6.157 0.042
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
modelPL<- lm(Petal.Length~Species, data=iris)
anova(modelPL)
## Analysis of Variance Table
##
## Response: Petal.Length
## Df Sum Sq Mean Sq F value Pr(>F)
## Species 2 437.10 218.551 1180.2 < 2.2e-16 ***
## Residuals 147 27.22 0.185
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
檢定的結果 p-value < 2.2e-16 也遠小於 0.05 因此強力的否決了H0
#150組中 1~50 51-100 101-150分別為3種品種 要打散訓練才可以有比較好的結果
testId = sample(nrow(iris),100, replace=FALSE)
testId
## [1] 64 149 121 48 83 129 1 111 27 13 140 116 17 26 119 139 80
## [18] 135 126 35 33 36 39 69 74 63 133 89 11 55 98 4 62 91
## [35] 58 78 10 68 25 43 51 16 73 109 136 143 42 118 72 18 147
## [52] 7 137 37 128 144 104 112 65 85 107 40 19 106 59 87 66 47
## [69] 21 28 114 130 14 92 15 75 131 49 45 60 61 12 97 105 138
## [86] 96 150 100 9 94 103 123 95 77 115 117 127 50 52 108
x <- subset(iris[testId,], select = -Species)
y <- iris$Species[testId]
#訓練
trainingId = iris[-testId,]
svm_model1 =
svmfit = svm(Species ~ ., data = iris[-testId,])
#訓練 confusionMatrix
pred = predict(svm_model1,x)
confusionMatrix(pred,y)
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 32 0 0
## versicolor 0 32 5
## virginica 0 1 30
##
## Overall Statistics
##
## Accuracy : 0.94
## 95% CI : (0.874, 0.9777)
## No Information Rate : 0.35
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.91
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.00 0.9697 0.8571
## Specificity 1.00 0.9254 0.9846
## Pos Pred Value 1.00 0.8649 0.9677
## Neg Pred Value 1.00 0.9841 0.9275
## Prevalence 0.32 0.3300 0.3500
## Detection Rate 0.32 0.3200 0.3000
## Detection Prevalence 0.32 0.3700 0.3100
## Balanced Accuracy 1.00 0.9475 0.9209