R¶
Install¶
Ubuntu¶
https://cran.r-project.org/bin/linux/ubuntu/
And then run in terminal simply by entering R
Mac¶
Install from the cran page
You can also install with brew
but it doesn't come with xquartz so you can't use View()
Run in terminal¶
macos¶
Just enter R, if nothing then try
or
Usage¶
help(thing)
package¶
install package¶
Load package¶
List all objects in a package¶
https://stackoverflow.com/a/2615147/15493213
Show package path¶
Array¶
c(1, 2, "bruh") -> [1, 2, "bruh"]
Range¶
seq(0, 10, 2) -> range(0, 10+2, 2)
Levels¶
Levels = possible values
Remove unused levels¶
To remove the unused values
https://www.rdocumentation.org/packages/base/versions/3.6.2/topics/droplevels
Tables¶
Summary
Contingency Table
Without unused levels
With sum
Graph¶
ggplot2 remove everything¶
theme(
    axis.line=element_blank(),
    axis.text.x=element_blank(),
    axis.text.y=element_blank(),
    axis.ticks=element_blank(),
    axis.title.x=element_blank(),
    axis.title.y=element_blank(),
    legend.position="none",
    panel.background=element_blank(),
    panel.border=element_blank(),
    panel.grid.major=element_blank(),
    panel.grid.minor=element_blank(),
    plot.background=element_blank()
)
https://stackoverflow.com/questions/6528180/
Multiple graphs side by side¶
https://stackoverflow.com/a/3935554/15493213
Decision Tree & Random Forest¶
Sample code
library(randomForest)
library(caret)
library(rpart)
library(rpart.plot)
# parse & format data
train_x <- read.csv("trainX.csv")
train_y <- read.csv("trainY.csv")
test_x <- read.csv("testX.csv")
test_y <- read.csv("testY.csv")
x_title <- list()
for (i in 1:ncol(train_x)) {
    x_title <- c(x_title, paste('A', toString(i), sep=''))
colnames(test_x) <- x_title
colnames(test_y) <- "Class"
test_y$Class <- as.factor(test_y$Class)
}
colnames(train_x) <- x_title
colnames(train_y) <- "Class"
train_y$Class <- as.factor(train_y$Class)
dataset <- cbind(train_x, train_y)
# decision tree
print("------------- Decision Tree -------------")
tree <- rpart(Class ~., data = dataset)
rpart.plot(tree)
pred_tree <- predict(tree, test_x, type = "class")
confusionMatrix(pred_tree, test_y$Class)
# random forest
print("------------- Random Forest -------------")
rf <- randomForest(
    Class ~ ., 
    data=dataset, 
    # ntree=100, 
    importance=TRUE,
    proximity=TRUE,
)
print(rf)
pred_rf <- predict(rf, test_x)
confusionMatrix(pred_rf, test_y$Class)