Random Forest
Awesome links
Installation
install.packages("randomForest")
install.packages("raster")
install.packages("rgdal")
install.packages("sp")
Basic Usage
Loading the Packages
Loading and Preprocessing Data
Load Raster Data
Load Vector Data
Extracting Values for Training
# Ensure the CRS match
vector_data <- spTransform(vector_data, crs(raster_data))
# Extract raster values using shapefile
training_data <- extract(raster_data, vector_data, df = TRUE)
training_data$target <- vector_data$target_column # Assuming target column in shapefile
Training a Random Forest Model
Training the Model
# Ensure the target variable is a factor
training_data$target <- as.factor(training_data$target)
# Train the Random Forest model
set.seed(123)
rf_model <- randomForest(target ~ ., data = training_data, ntree = 500, mtry = 3)
Viewing the Model Summary
Model Evaluation
Prediction on Raster Data
# Predict on the entire raster dataset
predicted_raster <- predict(raster_data, rf_model, type = "response")
Save the Predicted Raster
# Save the predicted raster
writeRaster(predicted_raster, "predicted_raster.tif", format = "GTiff", overwrite = TRUE)
Variable Importance
Example Workflow
Complete Example
# Load necessary libraries
library(randomForest)
library(raster)
library(rgdal)
library(sp)
# Load the raster dataset
raster_data <- stack("path_to_raster_file.tif")
# Load the shapefile
vector_data <- readOGR("path_to_shapefile.shp")
# Ensure CRS match
vector_data <- spTransform(vector_data, crs(raster_data))
# Extract raster values using shapefile
training_data <- extract(raster_data, vector_data, df = TRUE)
training_data$target <- vector_data$target_column # Assuming target column in shapefile
# Ensure the target variable is a factor
training_data$target <- as.factor(training_data$target)
# Train the Random Forest model
set.seed(123)
rf_model <- randomForest(target ~ ., data = training_data, ntree = 500, mtry = 3)
# Print the model summary
print(rf_model)
# Predict on the entire raster dataset
predicted_raster <- predict(raster_data, rf_model, type = "response")
# Save the predicted raster
writeRaster(predicted_raster, "predicted_raster.tif", format = "GTiff", overwrite = TRUE)
# Variable importance
importance(rf_model)
varImpPlot(rf_model)
For more details, refer to the randomForest package documentation and the raster package documentation.