ggplot2
library (for later plotting) and to access the diamonds
datasetlibrary(ggplot2)
str(diamonds) # diamonds was loaded automatically when we imported ggplot2
## tibble [53,940 × 10] (S3: tbl_df/tbl/data.frame)
## $ carat : num [1:53940] 0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ...
## $ cut : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
## $ color : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ...
## $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ...
## $ depth : num [1:53940] 61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ...
## $ table : num [1:53940] 55 61 65 58 58 57 57 55 61 61 ...
## $ price : int [1:53940] 326 326 327 334 335 336 336 337 337 338 ...
## $ x : num [1:53940] 3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ...
## $ y : num [1:53940] 3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ...
## $ z : num [1:53940] 2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...
base R
plot(x = diamonds$carat, y = diamonds$price)
plot(
x = diamonds$carat,
y = diamonds$price,
pch = 21, # shape
col = 'chocolate', # color
cex = 3 # size
)
## Set the range of the x and y axes, add axis and plot titles and a legend.
plot(
x = diamonds$carat,
y = diamonds$price,
pch = 18, # shape
col = ifelse(diamonds$cut == 'Fair', 'chocolate', 'blue'),
cex = 3, # size
xlim = c(min(diamonds$carat), 1), # x-axis range
ylim = c(min(diamonds$price), 10000), # y-axis range
xlab = 'Carat',
ylab = 'Price',
main = 'This Is The Plot Title!' # set the plot title
)
legend(
x = 0.2, # x-coordinate on the graph of the center of the legend
y = 9000, # x-coordinate on the graph of the center of the legend
legend = c('Fair', 'Better'), # the labels for the legend
col = c('chocolate', 'blue'), # colors in the same order as the above
pch = 18 # all same shape
)
## Adding a regression line To add a regression line, we first fit a model using lm
, and then supply that model (after generating the scatterplot) to the function abline
.
mod <- lm(diamonds$price ~ diamonds$carat)
# Generate the plot again
plot(
x = diamonds$carat,
y = diamonds$price,
pch = 18, # shape
col = ifelse(diamonds$cut == 'Fair', 'chocolate', 'blue'),
cex = 3, # size
xlim = c(min(diamonds$carat), 1), # x-axis range
ylim = c(min(diamonds$price), 10000), # y-axis range
xlab = 'Carat',
ylab = 'Price',
main = 'This Is The Plot Title!' # set the plot title
)
legend(
x = 0.2, # x-coordinate on the graph of the center of the legend
y = 9000, # x-coordinate on the graph of the center of the legend
legend = c('Fair', 'Better'), # the labels for the legend
col = c('chocolate', 'blue'), # colors in the same order as the above
pch = 18 # all same shape
)
# Add the model to the plot:
abline(reg = mod)
The regression line can be customized using parameters that govern the aesthetics of lines. These include (but are not limited to):
lty
: the type of the line, e.g., solid, dotted, dashed lwd
: width of line col
is the same as for modifying points.plot(
x = diamonds$carat,
y = diamonds$price,
pch = 18, # shape
col = ifelse(diamonds$cut == 'Fair', 'chocolate', 'blue'),
cex = 3, # size
xlim = c(min(diamonds$carat), 1), # x-axis range
ylim = c(min(diamonds$price), 10000), # y-axis range
xlab = 'Carat',
ylab = 'Price',
main = 'This Is The Plot Title!' # set the plot title
)
legend(
x = 0.2, # x-coordinate on the graph of the center of the legend
y = 9000, # x-coordinate on the graph of the center of the legend
legend = c('Fair', 'Better'), # the labels for the legend
col = c('chocolate', 'blue'), # colors in the same order as the above
pch = 18 # all same shape
)
# Add the model to the plot:
abline(reg = mod, lwd = 4, lty = 3, col ='grey')
For a complete overview of all the graphical parameters (such as col
and lty
) and what they control on your graph, run:
?`graphical parameters`
ggplot2
ggplot2
or “ggplot” operates by building a graph a layer at a time, beginning with the square plain white or grey image on which your data will be projected, up to the actual data points, and even additional visualizations such as regression lines that are computed directly from the data during plotting
Pro-tip: layers in ggplot are seperated by the "+" operator
ggplot()
We render just a plotting region, but the data isn’t mapped onto it anywhere or in anyway
ggplot(data = diamonds, mapping = aes(x = carat, y = price))
Here we have mapped variables onto the axes, and as such, they have adopted (automatically) the scale/range of those variables that ggplot
has computed for us internally, but we still have not mapped the data to an geometric shape.
Here we map the data on the x and y axes to be represented geometrically as a point:
ggplot(data = diamonds, mapping = aes(x = carat, y = price)) +
geom_point() # note, geom_point inherits the mapping that we input to ggplot() above
As you may have started to pick up, ggplot works by mapping data to different elements of each layer that you add. The basic breakdown is as follows:
mapping
argument equal to a call to aes
, in which the variable is assigned geom_
functions determine how the data will be represented geometrically, and these shape different parameters need to be set inside aes
ggplot2
ggplot(data = diamonds, mapping = aes(x = carat, y = price)) +
geom_point(aes(color = as.factor(cut == 'Fair')),
shape = 18, size = 3) +
xlab('Carat') +
ylab('Price') +
scale_color_manual(values = c('blue', 'chocolate'), labels = c('Better', 'Fair')) +
guides(color = guide_legend(title = '', reverse = T)) +
coord_cartesian(xlim = c(min(diamonds$carat), 1), ylim = c(0, 10000)) +
theme(legend.position = c(.2, .7)) +
geom_smooth(method = 'lm',
formula = 'y ~ x',
se = F,
color = 'grey',
size = 1.4,
linetype = 2) +
# Some theme options to make it look more like a base R plot:
theme(plot.title = element_text(hjust = .5), text = element_text(size = 12)) +
theme(panel.border = element_rect(color = 'black', fill = NA, size = .9)) +
theme(panel.background = element_rect(color = 'black', fill = 'white')) +
theme(panel.grid = element_line(color = 'lightgrey', size = .2)) +
theme(plot.background = element_rect(fill = 'white')) +
theme(axis.text = element_text(size = 10, color = 'black')) +
# We can add a title too
ggtitle(label = 'This Is the Plot Title!')
A huge advantage of ggplot
’s use of the mapping and layering technique is that you can easily map continuous values to sets of points.
Here we color code by price itself, a continuous variable
ggplot(data = diamonds, mapping = aes(x = carat, y = price)) +
geom_point(aes(color = price),
shape = 18, size = 3) +
xlab('Carat') +
ylab('Price') +
#scale_color_manual(values = c('blue', 'chocolate'), labels = c('Better', 'Fair')) +
guides(color = guide_colorbar(title = '', reverse = T)) +
#coord_cartesian(xlim = c(min(diamonds$carat), 1), ylim = c(0, 10000)) +
theme(legend.position = c(.1, .7)) +
geom_smooth(method = 'lm',
formula = 'y ~ x',
se = F,
color = 'grey',
size = 1.4,
linetype = 2) +
# Some theme options to make it look more like a base R plot:
theme(plot.title = element_text(hjust = .5), text = element_text(size = 12)) +
theme(panel.border = element_rect(color = 'black', fill = NA, size = .9)) +
theme(panel.background = element_rect(color = 'black', fill = 'white')) +
theme(panel.grid = element_line(color = 'lightgrey', size = .2)) +
theme(plot.background = element_rect(fill = 'white')) +
theme(axis.text = element_text(size = 10, color = 'black')) +
# We can add a title too
ggtitle(label = 'This Is the Plot Title!')
We can also easily view effects between groups by mapping a grouping variable
ggplot(data = diamonds, mapping = aes(x = carat, y = price)) +
geom_point(aes(color = price),
shape = 18, size = 3) +
xlab('Carat') +
ylab('Price') +
geom_smooth(method = 'lm',
formula = 'y ~ x',
se = F,
color = 'grey',
size = 1.4,
linetype = 2) +
# Some theme options to make it look more like a base R plot:
theme(plot.title = element_text(hjust = .5), text = element_text(size = 12)) +
theme(panel.border = element_rect(color = 'black', fill = NA, size = .9)) +
theme(panel.background = element_rect(color = 'black', fill = 'white')) +
theme(panel.grid = element_line(color = 'lightgrey', size = .2)) +
theme(plot.background = element_rect(fill = 'white')) +
theme(axis.text = element_text(size = 10, color = 'black')) +
# We can add a title too
ggtitle(label = 'This Is the Plot Title!') +
# Assign the grouping variable
facet_wrap(~cut, scales = 'free') +
# And we can map the points to a new color pallette
scale_color_gradient(low = 'gold', high = 'chocolate')