Code
DATEN <- iris |>
select(-Species)
# Compute k-means with k = 3
set.seed(123)
res.km <- kmeans(scale(DATEN), 3, nstart = 25)
# Dimension reduction using PCA
res.pca <- prcomp(DATEN, scale = TRUE)
# Coordinates of individuals
ind.coord <- as.data.frame(factoextra::get_pca_ind(res.pca)$coord)
# Add clusters obtained using the K-means algorithm
ind.coord$cluster <- factor(res.km$cluster)
# Add Species groups from the original data sett
ind.coord$Species <- iris$Species
# Data inspection
# head(ind.coord)
# Percentage of variance explained by dimensions
eigenvalue <- round(factoextra::get_eigenvalue(res.pca), 1)
variance.percent <- eigenvalue$variance.percent
# head(eigenvalue)
ggpubr::ggscatter(
ind.coord, x = "Dim.1", y = "Dim.2",
color = "cluster", palette = "npg", ellipse = TRUE, ellipse.type = "convex",
shape = "Species", size = 1.5, legend = "right", ggtheme = theme_bw(),
xlab = paste0("Dim 1 (", variance.percent[1], "% )" ),
ylab = paste0("Dim 2 (", variance.percent[2], "% )" )
) +
ggpubr::stat_mean(aes(color = cluster), size = 4)