Skip to content

Commit bbaf1c2

Browse files
committed
Use Social_Network_Ads.csv File.
1 parent 8e63fbf commit bbaf1c2

File tree

1 file changed

+101
-0
lines changed

1 file changed

+101
-0
lines changed

Random Forest Classification.R

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
# Steps ;
2+
# Step 1 : Pick at random K data points from the Training set.
3+
# Step 2 : Build the Decision Tree associated to these K data points.
4+
# Step 3 : Choose the number Ntree of trees you want to build and repeat step 1 and 2.
5+
# Step 4 : For a new data point, make each one of your ntree tree predict the category to which the dagta point belongs ans
6+
# assign the new data point to the category that wins the majority vote.
7+
8+
# ---------------------------------------------------- Importing Data ------------------------------------------ #
9+
10+
dataset = read.csv('Social_Network_Ad.csv')
11+
12+
# Selecting particular columns
13+
dataset = dataset[3:5]
14+
15+
# ---------------------------------------- Encoding the target feature as factor ------------------------------- #
16+
17+
dataset$Purchased = factor(dataset$Purchased, levels = c(0, 1))
18+
19+
# ---------------------------------- Splitting the dataset into Training and Test Set ------------------------- #
20+
21+
# install.packages('caTools')
22+
library(caTools)
23+
set.seed(123)
24+
split = sample.split(dataset$Purchased, SplitRatio = 0.75)
25+
training_set = subset(dataset, split == TRUE)
26+
test_set = subset(dataset, split == FALSE)
27+
28+
# --------------------------------------------------- Feature Scalling ----------------------------------------- #
29+
30+
training_set[-3] = scale(training_set[-3])
31+
test_set[-3] = scale(test_set[-3])
32+
33+
# ------------------------------------- Fitting Classifier to the Training Set --------------------------------- #
34+
35+
library(randomForest)
36+
classifier = randomForest(x = training_set[-3], y = training_set$Purchased, ntree = 10)
37+
summary(classifier)
38+
39+
# -------------------------------------------- Predicting the Test Set result ---------------------------------- #
40+
41+
y_pred = predict(classifier, newdata = test_set[-3])
42+
y_pred
43+
44+
# ------------------------------------------------- Confusion Matrix ------------------------------------------- #
45+
46+
cm = table(test_set[, 3], y_pred)
47+
cm
48+
49+
# --------------------------------------- Visualising the Training Set results --------------------------------- #
50+
51+
library(ElemStatLearn)
52+
53+
set = training_set
54+
train1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)
55+
train2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)
56+
57+
grid_set = expand.grid(train1, train2)
58+
59+
colnames(grid_set) = c('Age', 'EstimatedSalary')
60+
61+
y_grid = predict(classifier, newdata = grid_set, type = 'class')
62+
63+
# Plotting
64+
plot(set[, -3],
65+
main = 'Random Forest Classification (Training set)',
66+
xlab = 'Age', ylab = 'Estimated Salary',
67+
xlim = range(train1), ylim = range(train2))
68+
69+
# Regression Line
70+
contour(train1, train2, matrix(as.numeric(y_grid), length(train1), length(train2)), add = TRUE)
71+
72+
# Giving Colour
73+
points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato'))
74+
points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))
75+
76+
# ------------------------------------------- Visualising the Test Set results --------------------------------- #
77+
78+
library(ElemStatLearn)
79+
80+
set = test_set
81+
82+
test1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01)
83+
test2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01)
84+
85+
grid_set = expand.grid(test1, test2)
86+
87+
colnames(grid_set) = c('Age', 'EstimatedSalary')
88+
89+
y_grid = predict(classifier, newdata = grid_set, type = 'class')
90+
91+
# Plotting
92+
plot(set[, -3], main = 'Random Forest Classification (Test set)',
93+
xlab = 'Age', ylab = 'Estimated Salary',
94+
xlim = range(test1), ylim = range(test2))
95+
96+
# Regression Line
97+
contour(test1, test2, matrix(as.numeric(y_grid), length(test1), length(test2)), add = TRUE)
98+
99+
# Giving Colour
100+
points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato'))
101+
points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3'))

0 commit comments

Comments
 (0)