Ryan Miller
Does the top representation (length) or bottom representation (area) better encode the underlying numeric differences in these five categories?
Notice how each chart communicates the same information, but draws your attention to different aspects.
## group var1 var2 var3 var4 var5
## 1 A 0.0000000 0.1894722 0.9777146 1.0000000 1.000000
## 2 B 1.0000000 1.0000000 0.0000000 0.1419618 0.802495
## 3 C 0.3637406 0.0000000 1.0000000 0.0000000 0.000000
# devtools::install_github("ricardo-bion/ggradar") # This package must be installed from github using the "devtools" package
library(ggradar)
library(scales)
data <- data.frame(group = c("A", "B", "C"),
var1 = rescale(rnorm(3)),
var2 = rescale(rnorm(3)),
var3 = rescale(rnorm(3)),
var4 = rescale(rnorm(3)),
var5 = rescale(rnorm(3)))
## Note: the rescale function in the "scales" package is used to map values onto a [0,1] scale
ggradar(plot.data = data,
group.colours = c("red", "green", "blue"))
group | subgroup | value |
---|---|---|
group-1 | subgroup-1 | 13 |
group-1 | subgroup-2 | 5 |
group-1 | subgroup-3 | 22 |
group-1 | subgroup-4 | 12 |
group-2 | subgroup-1 | 11 |
group-2 | subgroup-2 | 7 |
group-3 | subgroup-1 | 3 |
group-3 | subgroup-2 | 1 |
group-3 | subgroup-3 | 23 |
library(treemap)
data <- data.frame(group = c(rep("group-1",4),rep("group-2",2),rep("group-3",3)),
subgroup = paste("subgroup" , c(1,2,3,4,1,2,1,2,3), sep="-"),
value = c(13,5,22,12,11,7,3,1,23))
treemap(data,
index=c("group","subgroup"),
vSize="value", type="index",
fontsize.labels=c(15,12), # size of labels (ie: size for group, size for subgroup, sub-subgroups...)
fontcolor.labels=c("white","orange"), # Color of labels
fontface.labels=c(2,1), # Font of labels: 1 = normal, 2 = bold
bg.labels=c("transparent"), # Background color of labels
align.labels=list(c("center", "center"),
c("right", "bottom")), # Group label = center-center, subgroup labels = right-bottom
overlap.labels=0.5,
inflate.labels=F # If true, labels are bigger when rectangle is bigger.
)
time | group | value |
---|---|---|
1 | A | 25.42546 |
1 | B | 30.34203 |
1 | C | 70.11054 |
1 | D | 64.36381 |
1 | E | 93.72924 |
1 | F | 76.74978 |
1 | G | 24.16071 |
2 | A | 87.26334 |
2 | B | 54.00489 |
2 | C | 74.97030 |
2 | D | 77.39340 |
2 | E | 37.74548 |
2 | F | 11.25851 |
2 | G | 66.29368 |
3 | A | 39.36816 |
3 | B | 91.39390 |
3 | C | 17.90009 |
3 | D | 79.10068 |
3 | E | 96.47492 |
3 | F | 79.74737 |
3 | G | 16.36403 |
4 | A | 54.15328 |
4 | B | 96.27023 |
4 | C | 75.18214 |
4 | D | 60.44227 |
4 | E | 73.71466 |
4 | F | 41.62576 |
4 | G | 43.18518 |
5 | A | 30.75828 |
5 | B | 38.46621 |
5 | C | 61.88028 |
5 | D | 85.97456 |
5 | E | 38.05831 |
5 | F | 21.54447 |
5 | G | 91.36906 |
6 | A | 84.82081 |
6 | B | 63.67155 |
6 | C | 83.36836 |
6 | D | 35.71742 |
6 | E | 35.95104 |
6 | F | 32.95370 |
6 | G | 18.79741 |
7 | A | 43.18970 |
7 | B | 40.49496 |
7 | C | 29.05453 |
7 | D | 84.30594 |
7 | E | 33.81314 |
7 | F | 54.75820 |
7 | G | 22.12186 |
data <- data.frame(time = as.numeric(rep(seq(1,7),each=7)),
group = rep(LETTERS[1:7],times=7),
value = runif(49, 10, 100))
# Compute percentages
data_per <- data %>%
group_by(time, group) %>%
summarise(n = sum(value)) %>%
mutate(percentage = n / sum(n))
# Plot #1 - Counts
p1 <- ggplot(data, aes(x=time, y=value, fill=group)) +
geom_area(alpha=0.6 , size=1, color = "black") + labs(title = "Frequencies")
# Plot #2 - Percentages
p2 <- ggplot(data_per, aes(x=time, y=percentage, fill=group)) +
geom_area(alpha=0.6 , size=1, color="black") + labs(title = "Conditional Proportions")
grid.arrange(p1, p2)
I recommend you browse these repositories when considering what graphics to use in your midterm project: