# Remove all variables to start this lesson with a clean slate
rm(list = ls())
###################################################################.
#
# TERMINOLOGY: indexing/index vs subsetting/subscript
#
# In class I've been referring to code that extracts specific
# values out of a vector or out of a list as "indexing" the vector
# or the list. For example, I would refer to the following code
#
# someVector[1:3]
#
# as "indexing" the vector and I would refer to the 1:3 in the
# code above as the "index".
#
# An alternative terminology is to describe the code as
# performing "subsetting" of the vector and to describe the 1:3
# as a "subscript".
#
# Both indexing/index and subsetting/subscript are valid terminology
# and are alternate ways to describe the same things. Be aware
# of this if you try to search for additional information online.
#
###################################################################.
24 22. intro to “lists” - i.e. collections of any R objects
24.1 Terminology: indexing/subsetting index/subscript
24.2 Other sources of info
###################################################################.
#
# OTHER SOURCES OF INFORMATION
#
# Material in this file that discusses the rules
# for "indexing" (AKA "subsetting") lists and vectors can be confusing. I tried
# to make the material straightforward but there are always situations
# that will nevertheless be confusing. These are some other sources
# of information that may help.
#
#
# ADVANCED R, 2nd Edition by Hadley Wickham (available online)
#
# This page: https://adv-r.hadley.nz/subsetting.html
# from the book "Advanced R" by Hadley Wickham, describes these rules
# in detail. At this point, you may not be familiar with all of the
# terminology that is used on this page, but this is a good reference
# when you are trying to figure something out.
#
#
# OFFICIAL R-LANGUAGE DEFINITION
# (by the R Core Team - the people who create R)
#
# The following is a link to the section on Indexing in the
# "Official R Language Definition":
#
# https://stat.ethz.ch/R-manual/R-devel/doc/manual/R-lang.html#Indexing
#
# This material is NOT easy to read but it is the "official" word on how
# things work in R. The authors assume that you know a lot and they do
# not "hold your hand". It may be hard to read this material but it is
# good to know it exists.
#
#
# GOOGLE :) (yes, I know it's obvious)
#
# There are many many webpages and websites devoted to trying to explain R.
# Obviously, one of the first things to do when faced with a problem you don't
# understand is to describe the problem accurately in Google and see
# what you find. If you get an error or warning message when you run
# your code type the error/warning message into the search. This usually
# works well to find a webpage that helps to clarify things.
#
#######################################################################.
24.3 list( OBJECT1, OBJECT2, etc)
#######################################################################.
#######################################################################.
##
## TOPIC : lists
##
## A list is a collection of ANY R "objects" ####
##
## So far we learned about the following R "objects".
## - character vectors
## - numeric vectors
## - logical vectors
## These R objects (as well as other types of R objects that
## we haven't learned about yet) can all be included in a "list".
##
## A "list" itself, is also an R "object". Therefore a list can contain other lists. ####
## A list that is contained in another list is called a "nested list".
##
## You can create a list with the list function.
## See examples below.
##
#######################################################################.
#######################################################################.
# create some vectors
= c("bob", "charlie", "frank")
students = c(70,80,90)
test1 = c(75,85,88)
test2 = c(TRUE, FALSE,FALSE)
honors
# create a list (called gradebook_MathClass) that contains all the vectors.
= list(students, test1, test2, honors)
gradebook_MathClass
gradebook_MathClass
[[1]]
[1] "bob" "charlie" "frank"
[[2]]
[1] 70 80 90
[[3]]
[1] 75 85 88
[[4]]
[1] TRUE FALSE FALSE
24.4 mode( SOME_LIST ) is “list”
#---------------------------------------------------------------------.
# mode( SOME_LIST ) is "list" ####
#
# So far, at this point we have seen that mode(SOME_OBJECT) can return one of the
# following values: "numeric", "logical", "character" and "list"
#---------------------------------------------------------------------.
mode(gradebook_MathClass) # "list"
[1] "list"
24.5 A LIST CONTAINS A SEPARATE COPY OF DATA FROM VARIABLES USED TO CREATE THE LIST
#-----------------------------------------------------------------------------.
# A LIST CONTAINS A SEPARATE COPY OF DATA FROM VARIABLES USED TO CREATE THE LIST ####
#
# If the variables that were used to create a list are removed
# the list continues to exist (with all the data intact).
#
# In other words, the existence of a list, is NOT dependent on the underlying
# variables, (e.g. students, test1, test2, honors) that were used to create it.
#-----------------------------------------------------------------------------.
# We can remove the variables that were used to create the list ...
rm( students, test1, test2, honors)
# ... the list, still exists
gradebook_MathClass
[[1]]
[1] "bob" "charlie" "frank"
[[2]]
[1] 70 80 90
[[3]]
[1] 75 85 88
[[4]]
[1] TRUE FALSE FALSE
############################################################################.
#
# You can create a list without using any other variables.
# Just put the information directly in the list function.
# See examples below.
#
############################################################################.
# EXAMPLE - create another gradebook without using other variables
= list( c("bob", "charlie", "frank"), # student names
gradebook_HistoryClass c(70,80,90), # grades from first test
c(75,85,88), # grades from second test
c(TRUE, FALSE,FALSE)) # TRUE for honors students
gradebook_HistoryClass
[[1]]
[1] "bob" "charlie" "frank"
[[2]]
[1] 70 80 90
[[3]]
[1] 75 85 88
[[4]]
[1] TRUE FALSE FALSE
###############################################################################.
# The data used to create a list can be generated using ANY functions
# not just the c() function
###############################################################################.
# EXAMPLE
= list ( seq(2, 10, by=2),
stuff c("apple", "orange", "pear"),
rep(c(1,2,3), 2))
stuff
[[1]]
[1] 2 4 6 8 10
[[2]]
[1] "apple" "orange" "pear"
[[3]]
[1] 1 2 3 1 2 3
24.6 length( SOME_LIST ) # returns the number of objects in the list
###########################################################################.
#
# length( SOME_LIST ) # returns the number of objects in the list ####
#
###########################################################################.
# There are 4 objects in the gradebook_HistoryClass list, i.e.
#
# - the character vector of student names
# - the numeric vector of the test1 grades
# - the numeric vector of the test2 grades
# - the logical vector of honors statuses
gradebook_HistoryClass
[[1]]
[1] "bob" "charlie" "frank"
[[2]]
[1] 70 80 90
[[3]]
[1] 75 85 88
[[4]]
[1] TRUE FALSE FALSE
# The length function returns the number of objects in the list.
length(gradebook_HistoryClass) # 4
[1] 4
24.7 A list can contain other lists (recursive lists)
# Create a list that contains other lists
= list( "Fall 2023", # 1st item in the list
gradebooks # 2nd item in the list
gradebook_MathClass, # 3rd item in the list
gradebook_HistoryClass)
length(gradebooks) # 3 - see comments above
[1] 3
gradebooks
[[1]]
[1] "Fall 2023"
[[2]]
[[2]][[1]]
[1] "bob" "charlie" "frank"
[[2]][[2]]
[1] 70 80 90
[[2]][[3]]
[1] 75 85 88
[[2]][[4]]
[1] TRUE FALSE FALSE
[[3]]
[[3]][[1]]
[1] "bob" "charlie" "frank"
[[3]][[2]]
[1] 70 80 90
[[3]][[3]]
[1] 75 85 88
[[3]][[4]]
[1] TRUE FALSE FALSE
24.8 str( SOME_OBJECT ) # displays info about object (very useful for lists)
###########################################################################.
# TOOLS TO HELP YOU UNDERSTAND THE STRUCTURE OF LISTS AND OTHER OBJECTS
#
# str( SOME_OBJECT) # shows "human readable" description of the structure of the object ####
#
# The str function shows the "structure" of an R object.
# It provides a more "human readable" description of what an object contains.
# It is usable with any R objects (i.e. vectors, lists, dataframes and other types)
# but is especially useful when trying to understand what is inside a
# complex list. Remember the str function. It could help you a lot
# at some point.
#
#
#
# View( SOME_OBJECT ) # (It's a CAPITAL "V" in "View") displays object in another tab ####
#
# The "View" function opens up the object in another tab in RStudio. The
# "View" tab for that object displays the contents of the object in a
# way that is easy to see and navigate through the data in the object.
# This is especially helpful for objects that contain a lot of data.
#
#
#
# THE ">" ARROW BUTTON NEXT TO LISTS IN THE ENVIRONMENT TAB
#
# Next to lists that appear in the Environment tab in RStudio, there is a
# small button that looks like a right pointing arrow ">". If you press
# on it, you will see the full contents of the list. This is similar
# to the output you get with the View(SOME_OBJECT) command but is
# easier to use for small to medium sized lists. The View command may be
# easier to use for very large lists.
#
###########################################################################.
str(gradebook_HistoryClass)
List of 4
$ : chr [1:3] "bob" "charlie" "frank"
$ : num [1:3] 70 80 90
$ : num [1:3] 75 85 88
$ : logi [1:3] TRUE FALSE FALSE
str(gradebooks)
List of 3
$ : chr "Fall 2023"
$ :List of 4
..$ : chr [1:3] "bob" "charlie" "frank"
..$ : num [1:3] 70 80 90
..$ : num [1:3] 75 85 88
..$ : logi [1:3] TRUE FALSE FALSE
$ :List of 4
..$ : chr [1:3] "bob" "charlie" "frank"
..$ : num [1:3] 70 80 90
..$ : num [1:3] 75 85 88
..$ : logi [1:3] TRUE FALSE FALSE
24.9 View( SOME_OBJECT )
# *** READ THIS BEFORE YOU RUN THE NEXT LINE OF CODE!!! ***
# The following command will open a new "tab" in RStudio that
# shows the contents of the gradebook_HistoryClass variable. This seems
# unnecessary for such a small set of data but the View tab can be very
# helpful for large collections of data. To get back to reading this file
# you can close the View tab or alternatively find the tab for this
# file and click on the tab.
# Uncooment the following line to run it. I commented it so that
# it shouldn't be run by mistake and confuse you when the new tab opens.
#View(gradebooks) # A new "tab" opens in RStudio that
# Try pressing the ">" arrow button that appears in the Environment
# tab, next to the word "gradebook_HistoryClass" and
# you will see the full contents of the list.
24.10 Lists are indexed with [single-brackets], [[double-brackets]] and $dollar-signs
############################################################################.
############################################################################.
## Indexing with [single-brackets], [[double-brackets]] and $notation
##
## We have spent a lot of time up until now learning how to "index"
## (i.e. to select specific values from) a VECTOR. For vectors we
## used various forms of [single-bracket] notation to select only
## the values we wanted.
##
## With lists, you can also select just the values you want using
## the same [single-bracket-notation]. This notation is very similar to
## [single-bracket-notation] as used with vectors.
##
## We will also introduce two additional notations for selecting specific
## values from lists that we have not used with vectors.
## The new notations are [[double-bracket]] notation as well as
## $dollar-sign notation.
##
## First let's show you how the [single-bracket-notation] is used
## with lists in a very similar way to how the [single-bracket-notation]
## is used with vectors.
############################################################################.
############################################################################.
single brackets - i.e. - SOME_LIST[ SOME_VECTOR ]
###########################################################################.
# Indexing lists with [single-brackets] follows the same rules as
# indexing vectors with [single-brackets].
#
# You can use
# - positive position numbers - to get JUST the values in those positions
# - negative position numbers - to get everything except the values in those positions
# - TRUE/FALSE values - to get just the values that correspond to the TRUE's
#
# However,
# with VECTORS, you get back a new VECTOR that contains just the specified values.
#
# with LISTS, you get back a new LIST that contains just the specified values.
#
# See examples below.
#
###########################################################################.
gradebook_HistoryClass
[[1]]
[1] "bob" "charlie" "frank"
[[2]]
[1] 70 80 90
[[3]]
[1] 75 85 88
[[4]]
[1] TRUE FALSE FALSE
# someList[c(1,3)] # returns a new list with objects 1 and 3 from someList ####
# EXAMPLE:
c(1,3)] # positive position numbers gradebook_HistoryClass[
[[1]]
[1] "bob" "charlie" "frank"
[[2]]
[1] 75 85 88
# someList[-c(1,3)] # a new list with everything except for 1st and 3rd objects ####
# EXAMPLE:
-c(1,3)] # negative position numbers gradebook_HistoryClass[
[[1]]
[1] 70 80 90
[[2]]
[1] TRUE FALSE FALSE
# someList[c(TRUE, FALSE)] # new list with every other object starting with first
# EXAMPLE:
c(TRUE, FALSE)] # logicals - this example uses recycling rule gradebook_HistoryClass[
[[1]]
[1] "bob" "charlie" "frank"
[[2]]
[1] 75 85 88
#------------------------------------------------.
# Each of the examples above return a LIST
# We can prove this with the mode function
#------------------------------------------------.
mode( gradebook_HistoryClass[c(1,3)] ) # list
[1] "list"
mode( gradebook_HistoryClass[-c(1,3)] ) # list
[1] "list"
mode( gradebook_HistoryClass[c(TRUE, FALSE)] ) # list
[1] "list"
#------------------------------------------------.
# You can assign the results to a new variable.
#------------------------------------------------.
= gradebook_HistoryClass[c(1,3)]
names_and_test2
names_and_test2
[[1]]
[1] "bob" "charlie" "frank"
[[2]]
[1] 75 85 88
mode( names_and_test2)
[1] "list"
24.11 Use only one vector between the brackets
######################################################################.
# Be careful. As with vectors you may only use ONE VECTOR between the
# [square-brackets]. See the exmamples below:
######################################################################.
rm(list=ls()) # start from scratch
# Setup a list of info about some movies.
=list( c("Angels", "The Bat", "Fast Cars", "Dogs Eat Stuff"),
movies c("PG", "R", "G", "PG"),
c(102, 152, 73, 135)
)
movies
[[1]]
[1] "Angels" "The Bat" "Fast Cars" "Dogs Eat Stuff"
[[2]]
[1] "PG" "R" "G" "PG"
[[3]]
[1] 102 152 73 135
# The following contains ONE vector between the [brackets]
# since the c() combines the 1 and 3 into a single vector.
c(1,3)] # returns a new list with the 1st and 3rd objects movies[
[[1]]
[1] "Angels" "The Bat" "Fast Cars" "Dogs Eat Stuff"
[[2]]
[1] 102 152 73 135
# However, the following returns an error since the 1 and 3
# are two different vectors and there is only allowed to be
# one vector between the [square-brackets]
1,3] # ERROR - may only have ONE vector between the [brackets] movies[
Error in movies[1, 3]: incorrect number of dimensions