Load the ggplot2 library (for later plotting) and to access the diamonds dataset

library(ggplot2)

Quick glance at the data

str(diamonds) # diamonds was loaded automatically when we imported ggplot2
## tibble [53,940 × 10] (S3: tbl_df/tbl/data.frame)
##  $ carat  : num [1:53940] 0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ...
##  $ cut    : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
##  $ color  : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ...
##  $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ...
##  $ depth  : num [1:53940] 61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ...
##  $ table  : num [1:53940] 55 61 65 58 58 57 57 55 61 61 ...
##  $ price  : int [1:53940] 326 326 327 334 335 336 336 337 337 338 ...
##  $ x      : num [1:53940] 3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ...
##  $ y      : num [1:53940] 3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ...
##  $ z      : num [1:53940] 2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...

Generate a scatterplot in base R

plot(x = diamonds$carat, y = diamonds$price)

Change the shape, color, and size of the points

plot(
  x = diamonds$carat,
  y = diamonds$price,
  pch = 21, # shape
  col = 'chocolate', # color
  cex = 3 # size
)


## Set the range of the x and y axes, add axis and plot titles and a legend.

plot(
  x = diamonds$carat,
  y = diamonds$price,
  pch = 18, # shape
  col = ifelse(diamonds$cut == 'Fair', 'chocolate', 'blue'),
  cex = 3, # size
  xlim = c(min(diamonds$carat), 1), # x-axis range
  ylim = c(min(diamonds$price), 10000), # y-axis range
  xlab = 'Carat',
  ylab = 'Price',
  main = 'This Is The Plot Title!' # set the plot title
)
legend(
  x = 0.2, # x-coordinate on the graph of the center of the legend
  y = 9000, # x-coordinate on the graph of the center of the legend
  legend = c('Fair', 'Better'), # the labels for the legend
  col = c('chocolate', 'blue'), # colors in the same order as the above
  pch = 18 # all same shape
)


## Adding a regression line To add a regression line, we first fit a model using lm, and then supply that model (after generating the scatterplot) to the function abline.

mod <- lm(diamonds$price ~ diamonds$carat)
# Generate the plot again
plot(
  x = diamonds$carat,
  y = diamonds$price,
  pch = 18, # shape
  col = ifelse(diamonds$cut == 'Fair', 'chocolate', 'blue'),
  cex = 3, # size
  xlim = c(min(diamonds$carat), 1), # x-axis range
  ylim = c(min(diamonds$price), 10000), # y-axis range
  xlab = 'Carat',
  ylab = 'Price',
  main = 'This Is The Plot Title!' # set the plot title
)
legend(
  x = 0.2, # x-coordinate on the graph of the center of the legend
  y = 9000, # x-coordinate on the graph of the center of the legend
  legend = c('Fair', 'Better'), # the labels for the legend
  col = c('chocolate', 'blue'), # colors in the same order as the above
  pch = 18 # all same shape
)
# Add the model to the plot:
abline(reg = mod)


The regression line can be customized using parameters that govern the aesthetics of lines. These include (but are not limited to):

  • lty: the type of the line, e.g., solid, dotted, dashed
  • lwd: width of line
    col is the same as for modifying points.
plot(
  x = diamonds$carat,
  y = diamonds$price,
  pch = 18, # shape
  col = ifelse(diamonds$cut == 'Fair', 'chocolate', 'blue'),
  cex = 3, # size
  xlim = c(min(diamonds$carat), 1), # x-axis range
  ylim = c(min(diamonds$price), 10000), # y-axis range
  xlab = 'Carat',
  ylab = 'Price',
  main = 'This Is The Plot Title!' # set the plot title
)
legend(
  x = 0.2, # x-coordinate on the graph of the center of the legend
  y = 9000, # x-coordinate on the graph of the center of the legend
  legend = c('Fair', 'Better'), # the labels for the legend
  col = c('chocolate', 'blue'), # colors in the same order as the above
  pch = 18 # all same shape
)
# Add the model to the plot:
abline(reg = mod, lwd = 4, lty = 3, col ='grey')

For a complete overview of all the graphical parameters (such as col and lty) and what they control on your graph, run:

?`graphical parameters`

ggplot2

ggplot2 or “ggplot” operates by building a graph a layer at a time, beginning with the square plain white or grey image on which your data will be projected, up to the actual data points, and even additional visualizations such as regression lines that are computed directly from the data during plotting
Pro-tip: layers in ggplot are seperated by the "+" operator

A quick demonstration of layers:

ggplot()


We render just a plotting region, but the data isn’t mapped onto it anywhere or in anyway

ggplot(data = diamonds, mapping = aes(x = carat, y = price))

Here we have mapped variables onto the axes, and as such, they have adopted (automatically) the scale/range of those variables that ggplot has computed for us internally, but we still have not mapped the data to an geometric shape.

Here we map the data on the x and y axes to be represented geometrically as a point:

ggplot(data = diamonds, mapping = aes(x = carat, y = price)) +
  geom_point() # note, geom_point inherits the mapping that we input to ggplot() above

As you may have started to pick up, ggplot works by mapping data to different elements of each layer that you add. The basic breakdown is as follows:

  • Layers are added sequentially, separated by the “+”
  • Mapping a variable to a given aesthetic element is acheived by setting the mapping argument equal to a call to aes, in which the variable is assigned
  • geom_ functions determine how the data will be represented geometrically, and these shape different parameters need to be set inside aes

Recreating the regression plot above in ggplot2

ggplot(data = diamonds, mapping = aes(x = carat, y = price)) +
  geom_point(aes(color = as.factor(cut == 'Fair')),
             shape = 18, size = 3) +
  xlab('Carat') +
  ylab('Price') +
  scale_color_manual(values = c('blue', 'chocolate'), labels = c('Better', 'Fair')) +
  guides(color = guide_legend(title = '', reverse = T)) +
  coord_cartesian(xlim = c(min(diamonds$carat), 1), ylim = c(0, 10000)) +
  theme(legend.position = c(.2, .7)) +
  geom_smooth(method = 'lm', 
              formula = 'y ~ x',
              se = F,
              color = 'grey', 
              size = 1.4, 
              linetype = 2) +
  # Some theme options to make it look more like a base R plot:
  theme(plot.title = element_text(hjust = .5), text = element_text(size = 12)) +
  theme(panel.border = element_rect(color = 'black', fill = NA, size = .9)) +
  theme(panel.background = element_rect(color = 'black', fill = 'white')) +
  theme(panel.grid = element_line(color = 'lightgrey', size = .2)) +
  theme(plot.background = element_rect(fill = 'white')) +
  theme(axis.text = element_text(size = 10, color = 'black')) +
  # We can add a title too
  ggtitle(label = 'This Is the Plot Title!')

A huge advantage of ggplot’s use of the mapping and layering technique is that you can easily map continuous values to sets of points.

Here we color code by price itself, a continuous variable

ggplot(data = diamonds, mapping = aes(x = carat, y = price)) +
  geom_point(aes(color = price),
             shape = 18, size = 3) +
  xlab('Carat') +
  ylab('Price') +
  #scale_color_manual(values = c('blue', 'chocolate'), labels = c('Better', 'Fair')) +
  guides(color = guide_colorbar(title = '', reverse = T)) +
  #coord_cartesian(xlim = c(min(diamonds$carat), 1), ylim = c(0, 10000)) +
  theme(legend.position = c(.1, .7)) +
  geom_smooth(method = 'lm', 
              formula = 'y ~ x',
              se = F,
              color = 'grey', 
              size = 1.4, 
              linetype = 2) +
  # Some theme options to make it look more like a base R plot:
  theme(plot.title = element_text(hjust = .5), text = element_text(size = 12)) +
  theme(panel.border = element_rect(color = 'black', fill = NA, size = .9)) +
  theme(panel.background = element_rect(color = 'black', fill = 'white')) +
  theme(panel.grid = element_line(color = 'lightgrey', size = .2)) +
  theme(plot.background = element_rect(fill = 'white')) +
  theme(axis.text = element_text(size = 10, color = 'black')) +
  # We can add a title too
  ggtitle(label = 'This Is the Plot Title!')


We can also easily view effects between groups by mapping a grouping variable

ggplot(data = diamonds, mapping = aes(x = carat, y = price)) +
  geom_point(aes(color = price),
             shape = 18, size = 3) +
  xlab('Carat') +
  ylab('Price') +
  geom_smooth(method = 'lm', 
              formula = 'y ~ x',
              se = F,
              color = 'grey', 
              size = 1.4, 
              linetype = 2) +
  # Some theme options to make it look more like a base R plot:
  theme(plot.title = element_text(hjust = .5), text = element_text(size = 12)) +
  theme(panel.border = element_rect(color = 'black', fill = NA, size = .9)) +
  theme(panel.background = element_rect(color = 'black', fill = 'white')) +
  theme(panel.grid = element_line(color = 'lightgrey', size = .2)) +
  theme(plot.background = element_rect(fill = 'white')) +
  theme(axis.text = element_text(size = 10, color = 'black')) +
  # We can add a title too
  ggtitle(label = 'This Is the Plot Title!') +
  # Assign the grouping variable
  facet_wrap(~cut, scales = 'free') +
  # And we can map the points to a new color pallette
  scale_color_gradient(low = 'gold', high = 'chocolate')