ggplot2 library (for later plotting) and to access the diamonds datasetlibrary(ggplot2)
str(diamonds) # diamonds was loaded automatically when we imported ggplot2
## tibble [53,940 × 10] (S3: tbl_df/tbl/data.frame)
## $ carat : num [1:53940] 0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ...
## $ cut : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
## $ color : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ...
## $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ...
## $ depth : num [1:53940] 61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ...
## $ table : num [1:53940] 55 61 65 58 58 57 57 55 61 61 ...
## $ price : int [1:53940] 326 326 327 334 335 336 336 337 337 338 ...
## $ x : num [1:53940] 3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ...
## $ y : num [1:53940] 3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ...
## $ z : num [1:53940] 2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...
base Rplot(x = diamonds$carat, y = diamonds$price)
plot(
x = diamonds$carat,
y = diamonds$price,
pch = 21, # shape
col = 'chocolate', # color
cex = 3 # size
)
## Set the range of the x and y axes, add axis and plot titles and a legend.
plot(
x = diamonds$carat,
y = diamonds$price,
pch = 18, # shape
col = ifelse(diamonds$cut == 'Fair', 'chocolate', 'blue'),
cex = 3, # size
xlim = c(min(diamonds$carat), 1), # x-axis range
ylim = c(min(diamonds$price), 10000), # y-axis range
xlab = 'Carat',
ylab = 'Price',
main = 'This Is The Plot Title!' # set the plot title
)
legend(
x = 0.2, # x-coordinate on the graph of the center of the legend
y = 9000, # x-coordinate on the graph of the center of the legend
legend = c('Fair', 'Better'), # the labels for the legend
col = c('chocolate', 'blue'), # colors in the same order as the above
pch = 18 # all same shape
)
## Adding a regression line To add a regression line, we first fit a model using lm, and then supply that model (after generating the scatterplot) to the function abline.
mod <- lm(diamonds$price ~ diamonds$carat)
# Generate the plot again
plot(
x = diamonds$carat,
y = diamonds$price,
pch = 18, # shape
col = ifelse(diamonds$cut == 'Fair', 'chocolate', 'blue'),
cex = 3, # size
xlim = c(min(diamonds$carat), 1), # x-axis range
ylim = c(min(diamonds$price), 10000), # y-axis range
xlab = 'Carat',
ylab = 'Price',
main = 'This Is The Plot Title!' # set the plot title
)
legend(
x = 0.2, # x-coordinate on the graph of the center of the legend
y = 9000, # x-coordinate on the graph of the center of the legend
legend = c('Fair', 'Better'), # the labels for the legend
col = c('chocolate', 'blue'), # colors in the same order as the above
pch = 18 # all same shape
)
# Add the model to the plot:
abline(reg = mod)
The regression line can be customized using parameters that govern the aesthetics of lines. These include (but are not limited to):
lty: the type of the line, e.g., solid, dotted, dashed lwd: width of line col is the same as for modifying points.plot(
x = diamonds$carat,
y = diamonds$price,
pch = 18, # shape
col = ifelse(diamonds$cut == 'Fair', 'chocolate', 'blue'),
cex = 3, # size
xlim = c(min(diamonds$carat), 1), # x-axis range
ylim = c(min(diamonds$price), 10000), # y-axis range
xlab = 'Carat',
ylab = 'Price',
main = 'This Is The Plot Title!' # set the plot title
)
legend(
x = 0.2, # x-coordinate on the graph of the center of the legend
y = 9000, # x-coordinate on the graph of the center of the legend
legend = c('Fair', 'Better'), # the labels for the legend
col = c('chocolate', 'blue'), # colors in the same order as the above
pch = 18 # all same shape
)
# Add the model to the plot:
abline(reg = mod, lwd = 4, lty = 3, col ='grey')
For a complete overview of all the graphical parameters (such as col and lty) and what they control on your graph, run:
?`graphical parameters`
ggplot2ggplot2 or “ggplot” operates by building a graph a layer at a time, beginning with the square plain white or grey image on which your data will be projected, up to the actual data points, and even additional visualizations such as regression lines that are computed directly from the data during plotting
Pro-tip: layers in ggplot are seperated by the "+" operator
ggplot()
We render just a plotting region, but the data isn’t mapped onto it anywhere or in anyway
ggplot(data = diamonds, mapping = aes(x = carat, y = price))
Here we have mapped variables onto the axes, and as such, they have adopted (automatically) the scale/range of those variables that ggplot has computed for us internally, but we still have not mapped the data to an geometric shape.
Here we map the data on the x and y axes to be represented geometrically as a point:
ggplot(data = diamonds, mapping = aes(x = carat, y = price)) +
geom_point() # note, geom_point inherits the mapping that we input to ggplot() above
As you may have started to pick up, ggplot works by mapping data to different elements of each layer that you add. The basic breakdown is as follows:
mapping argument equal to a call to aes, in which the variable is assigned geom_ functions determine how the data will be represented geometrically, and these shape different parameters need to be set inside aesggplot2ggplot(data = diamonds, mapping = aes(x = carat, y = price)) +
geom_point(aes(color = as.factor(cut == 'Fair')),
shape = 18, size = 3) +
xlab('Carat') +
ylab('Price') +
scale_color_manual(values = c('blue', 'chocolate'), labels = c('Better', 'Fair')) +
guides(color = guide_legend(title = '', reverse = T)) +
coord_cartesian(xlim = c(min(diamonds$carat), 1), ylim = c(0, 10000)) +
theme(legend.position = c(.2, .7)) +
geom_smooth(method = 'lm',
formula = 'y ~ x',
se = F,
color = 'grey',
size = 1.4,
linetype = 2) +
# Some theme options to make it look more like a base R plot:
theme(plot.title = element_text(hjust = .5), text = element_text(size = 12)) +
theme(panel.border = element_rect(color = 'black', fill = NA, size = .9)) +
theme(panel.background = element_rect(color = 'black', fill = 'white')) +
theme(panel.grid = element_line(color = 'lightgrey', size = .2)) +
theme(plot.background = element_rect(fill = 'white')) +
theme(axis.text = element_text(size = 10, color = 'black')) +
# We can add a title too
ggtitle(label = 'This Is the Plot Title!')
A huge advantage of ggplot’s use of the mapping and layering technique is that you can easily map continuous values to sets of points.
Here we color code by price itself, a continuous variable
ggplot(data = diamonds, mapping = aes(x = carat, y = price)) +
geom_point(aes(color = price),
shape = 18, size = 3) +
xlab('Carat') +
ylab('Price') +
#scale_color_manual(values = c('blue', 'chocolate'), labels = c('Better', 'Fair')) +
guides(color = guide_colorbar(title = '', reverse = T)) +
#coord_cartesian(xlim = c(min(diamonds$carat), 1), ylim = c(0, 10000)) +
theme(legend.position = c(.1, .7)) +
geom_smooth(method = 'lm',
formula = 'y ~ x',
se = F,
color = 'grey',
size = 1.4,
linetype = 2) +
# Some theme options to make it look more like a base R plot:
theme(plot.title = element_text(hjust = .5), text = element_text(size = 12)) +
theme(panel.border = element_rect(color = 'black', fill = NA, size = .9)) +
theme(panel.background = element_rect(color = 'black', fill = 'white')) +
theme(panel.grid = element_line(color = 'lightgrey', size = .2)) +
theme(plot.background = element_rect(fill = 'white')) +
theme(axis.text = element_text(size = 10, color = 'black')) +
# We can add a title too
ggtitle(label = 'This Is the Plot Title!')
We can also easily view effects between groups by mapping a grouping variable
ggplot(data = diamonds, mapping = aes(x = carat, y = price)) +
geom_point(aes(color = price),
shape = 18, size = 3) +
xlab('Carat') +
ylab('Price') +
geom_smooth(method = 'lm',
formula = 'y ~ x',
se = F,
color = 'grey',
size = 1.4,
linetype = 2) +
# Some theme options to make it look more like a base R plot:
theme(plot.title = element_text(hjust = .5), text = element_text(size = 12)) +
theme(panel.border = element_rect(color = 'black', fill = NA, size = .9)) +
theme(panel.background = element_rect(color = 'black', fill = 'white')) +
theme(panel.grid = element_line(color = 'lightgrey', size = .2)) +
theme(plot.background = element_rect(fill = 'white')) +
theme(axis.text = element_text(size = 10, color = 'black')) +
# We can add a title too
ggtitle(label = 'This Is the Plot Title!') +
# Assign the grouping variable
facet_wrap(~cut, scales = 'free') +
# And we can map the points to a new color pallette
scale_color_gradient(low = 'gold', high = 'chocolate')