41  39. Practice Questions - TOPIC: Dataframes

41.1 Dataframes

##############################################################################################.
##############################################################################################.
##
##            TOPIC:  DATAFRAMES
##
##############################################################################################.
##############################################################################################.


#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# QUESTION 46
# TOPICS: dataframes
#
# Write R code that creates a dataframe that has 2 columns.
#
# The first column should contain the even numbers 
# from 2 through 1000, ie. 2,4,6 ... 1000.
#
# The 2nd column should contain the odd numbers
# from 999 counting down to 1, i.e. 999, 997, 995, ... 1
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
##############.
# ANSWER 1
##############.

# This answer creates the data.frame in one command.
numbers <- data.frame(evens=seq(2,1000,by=2), odds = seq(999,1,by=-2))   # ANSWER

# show the first 3 and last 3 rows of numbers (this is not part of the answer)
head(numbers,3)   # show the first 3 rows
  evens odds
1     2  999
2     4  997
3     6  995
tail(numbers,3)   # show the last 3 rows
    evens odds
498   996    5
499   998    3
500  1000    1
##############.
# ANSWER 2 
##############.

# This answer creates variables for the vectors first.
# The column names will be the same as the variable names of the vectors.
evens <- seq(2,1000,by=2)              # ANSWER (MULTIPLE LINES)
odds <- seq(999,1, by=-2)              # ANSWER (MULTIPLE LINES)
numbers <- data.frame(evens,odds)      # ANSWER (MULTIPLE LINES)

# show the first 3 and last 3 rows of numbers (this is not part of the answer)
head(numbers,3)   # show the first 3 rows
  evens odds
1     2  999
2     4  997
3     6  995
tail(numbers,3)   # show the last 3 rows
    evens odds
498   996    5
499   998    3
500  1000    1

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# QUESTION 47
# TOPICS: dataframes
#
# Write R code that creates a dataframe that contains the following information:
#  
# The name of the 1st column should be: "department"
# The values in this column should be: produce, bakery, produce, produce, bakery. 
#
# The name of the 2nd column should be: "product"
# The values in this column should be: apple, chocolate cake, orange, pear, rye bread. 
#
# The name of the 3rd column should be: "price"
# The values in this column should be: 1.99 , 1.59, 0.99, 1.99, 4.99.
#
# Store the dataframe in a variable named "prices".
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
###########################################################.
# ANSWER 1 - create the data.frame with a single command
###########################################################.

prices <- data.frame(department=c("produce","bakery","produce","produce","bakery"),   # ANSWER (MULTIPLE LINES)
                     product=c("apple","chocolate cake","orange","pear","rye bread"), # ANSWER (MULTIPLE LINES)
                     price=c(1.99 , 1.59, 0.99, 1.99, 4.99))                          # ANSWER (MULTIPLE LINES)


# check our answer (this is NOT part of the answer)
prices
  department        product price
1    produce          apple  1.99
2     bakery chocolate cake  1.59
3    produce         orange  0.99
4    produce           pear  1.99
5     bakery      rye bread  4.99
###########################################################.
# ANSWER 2 - create variables for the columns first
###########################################################.

department <- c("produce","bakery","produce","produce","bakery")   # ANSWER (MULTIPLE LINES)
product <- c("apple","chocolate cake","orange","pear","rye bread") # ANSWER (MULTIPLE LINES)
price <- c(1.99 , 1.59, 0.99, 1.99, 4.99)                          # ANSWER (MULTIPLE LINES)
prices <- data.frame(department,product,price)                     # ANSWER (MULTIPLE LINES)

# check our answer (this is NOT part of the answer)
prices
  department        product price
1    produce          apple  1.99
2     bakery chocolate cake  1.59
3    produce         orange  0.99
4    produce           pear  1.99
5     bakery      rye bread  4.99

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# QUESTION 48
# TOPICS: dataframes
#
# Write a command that changes the names of the columns in the dataframe
# from the previous question. The first column should now be named itemName and
# the 2nd column should be named pricePerLb. Hint: use the colnames function.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
###############.
# ANSWER 1
###############.
colnames(prices)[c(2,3)] <- c("itemName","pricePerLb")  # ANSWER
# check ...
prices
  department       itemName pricePerLb
1    produce          apple       1.99
2     bakery chocolate cake       1.59
3    produce         orange       0.99
4    produce           pear       1.99
5     bakery      rye bread       4.99
################.
# ANSWER 2
################.
colnames(prices)[2] <- "itemName"     # ANSWER (MULTIPLE LINES)
colnames(prices)[3] <- "pricePerLb"   # ANSWER (MULTIPLE LINES)
# check ...
prices
  department       itemName pricePerLb
1    produce          apple       1.99
2     bakery chocolate cake       1.59
3    produce         orange       0.99
4    produce           pear       1.99
5     bakery      rye bread       4.99

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# QUESTION 49
# TOPICS: dataframes
#
# Challenge: Write a command that changes the names of the rows in the
# "prices" dataframe from the previous question. The new names of the rows
# should be item1, item2 , etc. Write the code so that no matter how many rows
# the dataframe would contain the names of the rows would follow the same pattern.
# For example, if the dataframe happened to contain 1000 rows, then after your
# command executed, they would be named: item1,item2,item3,item4,... item1000
#
# Hint: As part of your answer, you will need to use the paste function
# with sep="" (nothing between the quotes). Note that we did not cover the paste
# function in class. You should look at the help for the paste function and try a
# few examples of your own. You can also type the R command example(paste) to
# automatically run the examples that appear at the end of the help page for the
# paste function.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# NOTE: see the answers to questions 47 and 48 for code to create the prices dataframe.

#############################.
# sample answer:
#############################.

# To generate the new rownames you can use the following command:
paste("item", 1:length(rownames(prices)), sep="")
[1] "item1" "item2" "item3" "item4" "item5"
# or the following command
paste("item", 1:nrow(prices), sep="")
[1] "item1" "item2" "item3" "item4" "item5"
# now just assign the result of one of these commands to the rownames:
rownames(prices) <- paste("item", 1:nrow(prices), sep="")    # ANSWER  (THIS IS THE ONLY LINE YOU NEED)

# check:
prices
      department       itemName pricePerLb
item1    produce          apple       1.99
item2     bakery chocolate cake       1.59
item3    produce         orange       0.99
item4    produce           pear       1.99
item5     bakery      rye bread       4.99

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# QUESTION 50 (a)
# TOPICS: dataframes
#
# a. Write a command that modifies the dataframe from the previous question.
#    The command should change the word "apple" to "mcintosh apple".
#
# b. Do this again, but this time DO NOT refer to the actual row number or 
#    column number in your answer. Your answer should work successfully, 
#    no matter what row contains "apple". 
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# NOTE: see the answers to questions 47 and 48 for code to create the dataframe.

# ANSWER - part a
prices[1,2] <- "mcintosh apple"    # ANSWER

# ANSWER - part b
prices[prices$itemName == "apple" , "itemName"] <- "mcintosh apple"    # ANSWER
prices
      department       itemName pricePerLb
item1    produce mcintosh apple       1.99
item2     bakery chocolate cake       1.59
item3    produce         orange       0.99
item4    produce           pear       1.99
item5     bakery      rye bread       4.99

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# QUESTION 51
# TOPICS: dataframes
#
# Write a command that modifies the dataframe from the previous question.
# The command should increase the pricePerLb of oranges by 10%. Round the increased
# price to the nearest penny.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# NOTE: see the answers to questions 47 and 48 for code to create the prices dataframe.

###############################.
# ANSWER 1 
#    - using $ notation  
#    - answer the question using two commands (may be easier to understand than a single command)
###############################.
# first let's identify which row contains oranges:
rows <- prices$itemName == "orange"                                      # ANSWER (MULTIPLE LINES)
rows
[1] FALSE FALSE  TRUE FALSE FALSE
# now lets change the price for that row. Note that we are rounding the new price to the nearest penny.
prices[rows,"pricePerLb"] <- round( prices[rows,"pricePerLb"] * 1.1, 2)  # ANSWER (MULTIPLE LINES)

# check our answer
prices
      department       itemName pricePerLb
item1    produce mcintosh apple       1.99
item2     bakery chocolate cake       1.59
item3    produce         orange       1.09
item4    produce           pear       1.99
item5     bakery      rye bread       4.99
#########################################.
# ANSWER 2
#    - using [[double-bracket]] notation
#    - answer the question using two commands (may be easier to understand than a single command)
#########################################.
# first let's identify which row contains oranges:
rows <- prices[["itemName"]] == "orange"                                 # ANSWER (MULTIPLE LINES)
rows
[1] FALSE FALSE  TRUE FALSE FALSE
# now lets change the price for that row. Note that we are rounding the new price to the nearest penny.
prices[rows,"pricePerLb"] <- round( prices[rows,"pricePerLb"] * 1.1, 2)  # ANSWER (MULTIPLE LINES)

# check our answer
prices
      department       itemName pricePerLb
item1    produce mcintosh apple       1.99
item2     bakery chocolate cake       1.59
item3    produce         orange       1.20
item4    produce           pear       1.99
item5     bakery      rye bread       4.99
#########################################.
# ANSWER 3
#    - using $ notation  
#    - answer using ONE command (avoids the need to create new variables that are otherwise unnecessary)
#########################################.

# we can avoid the need to create a new variable using the following approach to 
# answer the question using a single command (for readability, the command is typed on two lines)
prices[prices$itemName=="orange","pricePerLb"] <-                    # ANSWER (MULTIPLE LINES)
  round(prices[prices$itemName=="orange","pricePerLb"]*1.1, 2) # ANSWER (MULTIPLE LINES)

# check our answer
prices
      department       itemName pricePerLb
item1    produce mcintosh apple       1.99
item2     bakery chocolate cake       1.59
item3    produce         orange       1.32
item4    produce           pear       1.99
item5     bakery      rye bread       4.99

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# QUESTION 52
# TOPICS: dataframes
#
# Write a command that modifies the "prices" dataframe from the previous question.
# The command should increase the price of all items by 10%. Round the increased prices
# to the nearest penny.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# NOTE: see the answers to questions 47 and 48 for code to create the prices dataframe.

##############################.
# ANSWER 1 - using $ notation
##############################.

prices$pricePerLb <- round( prices$pricePerLb * 1.1 , 2 )              # ANSWER

# check
prices
      department       itemName pricePerLb
item1    produce mcintosh apple       2.19
item2     bakery chocolate cake       1.75
item3    produce         orange       1.45
item4    produce           pear       2.19
item5     bakery      rye bread       5.49
###############################################.
# ANSWER 2 - using [[double-bracket]] notation
###############################################.
prices[["pricePerLb"]] <- round ( prices[["pricePerLb"]] * 1.1 , 2)   # ANSWER

# check
prices
      department       itemName pricePerLb
item1    produce mcintosh apple       2.41
item2     bakery chocolate cake       1.93
item3    produce         orange       1.59
item4    produce           pear       2.41
item5     bakery      rye bread       6.04

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# QUESTION 53
# TOPICS: dataframes
#
# Refer to the dataframe from the previous question. Write a command that adds
# a new column named salePrice. The new column should contain the values from
# the price column reduced by 25%. Round the prices in the new column to the nearest penny.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# NOTE: see the answers to questions 47 and 48 for code to create the prices dataframe.

###############################################.
# ANSWER 1 - using $ notation
###############################################.

prices$salePrice <- round ( prices$pricePerLb * 0.75 , 2)     # ANSWER

# check 
prices
      department       itemName pricePerLb salePrice
item1    produce mcintosh apple       2.41      1.81
item2     bakery chocolate cake       1.93      1.45
item3    produce         orange       1.59      1.19
item4    produce           pear       2.41      1.81
item5     bakery      rye bread       6.04      4.53
###############################################.
# ANSWER 2 - using [[double-bracket]] notation
###############################################.

prices[["salePrice"]] <- round ( prices[["pricePerLb"]] * 0.75 , 2)   # ANSWER

# check 
prices
      department       itemName pricePerLb salePrice
item1    produce mcintosh apple       2.41      1.81
item2     bakery chocolate cake       1.93      1.45
item3    produce         orange       1.59      1.19
item4    produce           pear       2.41      1.81
item5     bakery      rye bread       6.04      4.53

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# QUESTION 54
# TOPICS: dataframes
#
# Refer to the dataframe from the previous question.
# Write a command that displays the itemName and price of all items from the
# produce department. Write the code so that it will continue work even if the
# specific data in the dataframe changes.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# NOTE: see the answers to questions 47 and 48 for code to create the prices dataframe.

###############################################.
# ANSWER 1 - using $ notation
###############################################.

# First identify the rows we want:
rows <- prices$department == "produce"
rows
[1]  TRUE FALSE  TRUE  TRUE FALSE
# now get the data for those rows from the specified columns:
prices[rows,c("itemName","pricePerLb")]
            itemName pricePerLb
item1 mcintosh apple       2.41
item3         orange       1.59
item4           pear       2.41
#
# Alternatively, we can do all this in one line:
prices[     prices$department == "produce"    ,c("itemName","pricePerLb")]
            itemName pricePerLb
item1 mcintosh apple       2.41
item3         orange       1.59
item4           pear       2.41
###############################################.
# ANSWER 2 - using [[double-bracket]] notation
###############################################.

# First identify the rows we want:
rows <- prices[["department"]] == "produce"
rows
[1]  TRUE FALSE  TRUE  TRUE FALSE
# now get the data for those rows from the specified columns:
prices[rows,c("itemName","pricePerLb")]
            itemName pricePerLb
item1 mcintosh apple       2.41
item3         orange       1.59
item4           pear       2.41
# Alternatively, we can do all this in one line:
prices[     prices[["department"]] == "produce"    ,c("itemName","pricePerLb")]
            itemName pricePerLb
item1 mcintosh apple       2.41
item3         orange       1.59
item4           pear       2.41

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# QUESTION 55
# TOPICS: dataframes
#
# Refer to the dataframe from the previous question.
# Write a command that displays the itemName and price of all items whose price
# is 2.00 or more. Write the code so that it will continue work even if the
# specific data in the dataframe changes.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# NOTE: see the answers to questions 47 and 48 for code to create the dataframe.

###############################################.
# ANSWER 1 - using $ notation
###############################################.

# First identify the rows we want:
rows <- prices$pricePerLb >= 2
rows
[1]  TRUE FALSE FALSE  TRUE  TRUE
# now get the data for those rows from the specified columns:
prices[rows,c("itemName","pricePerLb")]
            itemName pricePerLb
item1 mcintosh apple       2.41
item4           pear       2.41
item5      rye bread       6.04
# Alternatively, we can do all this in one line:
prices[     prices$pricePerLb >= 2    ,c("itemName","pricePerLb")]
            itemName pricePerLb
item1 mcintosh apple       2.41
item4           pear       2.41
item5      rye bread       6.04
###############################################.
# ANSWER 2 - using [[double-bracket]] notation
###############################################.

# First identify the rows we want:
rows <- prices[["pricePerLb"]] >= 2
rows
[1]  TRUE FALSE FALSE  TRUE  TRUE
# now get the data for those rows from the specified columns:
prices[rows,c("itemName","pricePerLb")]
            itemName pricePerLb
item1 mcintosh apple       2.41
item4           pear       2.41
item5      rye bread       6.04
#
# Alternatively, we can do all this in one line:
prices[     prices[["pricePerLb"]] >= 2    ,c("itemName","pricePerLb")]
            itemName pricePerLb
item1 mcintosh apple       2.41
item4           pear       2.41
item5      rye bread       6.04

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# QUESTION 56
# TOPICS: dataframes
#
# Refer to the dataframe from the previous question.
# Write a command that displays all of the columns for all items whose
# price is 2.00 or more. Write the code so that it will continue work
# even if the specific data in the dataframe changes.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# NOTE: see the answers to questions 47 and 48 for code to create the dataframe.

###############################################.
# ANSWER 1 - using $ notation
###############################################.

# First identify the rows we want:
rows <- prices$pricePerLb >= 2
rows
[1]  TRUE FALSE FALSE  TRUE  TRUE
# now get the data for those rows from the specified columns:
prices[rows,]
      department       itemName pricePerLb salePrice
item1    produce mcintosh apple       2.41      1.81
item4    produce           pear       2.41      1.81
item5     bakery      rye bread       6.04      4.53
# Alternatively, we can do all this in one line:
prices[     prices$pricePerLb >= 2  ,   ]
      department       itemName pricePerLb salePrice
item1    produce mcintosh apple       2.41      1.81
item4    produce           pear       2.41      1.81
item5     bakery      rye bread       6.04      4.53
###############################################.
# ANSWER 2 - using [[double-bracket]] notation
###############################################.

# First identify the rows we want:
rows <- prices[["pricePerLb"]] >= 2
rows
[1]  TRUE FALSE FALSE  TRUE  TRUE
# now get the data for those rows from the specified columns:
prices[rows,]
      department       itemName pricePerLb salePrice
item1    produce mcintosh apple       2.41      1.81
item4    produce           pear       2.41      1.81
item5     bakery      rye bread       6.04      4.53
# Alternatively, we can do all this in one line:
prices[     prices[["pricePerLb"]] >= 2  ,   ]
      department       itemName pricePerLb salePrice
item1    produce mcintosh apple       2.41      1.81
item4    produce           pear       2.41      1.81
item5     bakery      rye bread       6.04      4.53

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# QUESTION 57
# TOPICS: dataframes
#
# Refer to the dataframe from the previous question. 
# Write a command that calculates the average pricePerLb of all
# items in the produce department.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# NOTE: see the answers to questions 47 and 48 for code to create the dataframe.

###############################################.
# ANSWER 1 - using $ notation
###############################################.
# First identify the rows we want:
rows <- prices$department == "produce"
rows
[1]  TRUE FALSE  TRUE  TRUE FALSE
# now get the data for those rows from the specified columns:
mean( prices[rows,"pricePerLb"] )
[1] 2.136667
# Alternatively, we can do all this in one line:
mean( prices[   prices$department == "produce"   ,"pricePerLb"] )
[1] 2.136667
###############################################.
# ANSWER 2 - using [[double-bracket]] notation
###############################################.
# First identify the rows we want:
rows <- prices[["department"]] == "produce"
rows
[1]  TRUE FALSE  TRUE  TRUE FALSE
# now get the data for those rows from the specified columns:
mean( prices[rows,"pricePerLb"] )
[1] 2.136667
# Alternatively, we can do all this in one line:
mean( prices[   prices[["department"]] == "produce"   ,"pricePerLb"] )
[1] 2.136667

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# QUESTION 58
# TOPICS: dataframes
#
# Refer to the dataframe from the previous question. 
# Write a command that displays all of the data from just the odd numbered rows.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# NOTE: see the answers to questions 47 and 48 for code to create the dataframe.

##########.
# ANSWER 
##########.

# First identify the rows we want:
rows <- seq(1,nrow(prices),by=2)
rows
[1] 1 3 5
# now get the data for those rows from the specified columns:
prices[ rows , ]
      department       itemName pricePerLb salePrice
item1    produce mcintosh apple       2.41      1.81
item3    produce         orange       1.59      1.19
item5     bakery      rye bread       6.04      4.53
# Alternatively, we can do all this in one line:
prices[  seq(1,nrow(prices),by=2)   ,  ]
      department       itemName pricePerLb salePrice
item1    produce mcintosh apple       2.41      1.81
item3    produce         orange       1.59      1.19
item5     bakery      rye bread       6.04      4.53

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# QUESTION 59
# TOPICS: dataframes
#
# Refer to the dataframe from the previous question.
# Write a command that displays just the name of the items whose
# pricePerLb is at least 1.00 but not more than 2.00.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# NOTE: see the answers to questions 47 and 48 for code to create the dataframe.

###############################################.
# ANSWER 1 - using $ notation
###############################################.

# First identify the rows we want:
rows <- prices$pricePerLb >= 1 & prices$pricePerLb <= 2
rows
[1] FALSE  TRUE  TRUE FALSE FALSE
# now get the data for those rows from the specified columns:
prices[ rows , "itemName"]
[1] "chocolate cake" "orange"        
# Alternatively, we can do all this in one line:
prices[    prices$pricePerLb >= 1 & prices$pricePerLb <= 2    , "itemName"]
[1] "chocolate cake" "orange"        
###############################################.
# ANSWER 2 - using [[double-bracket]] notation
###############################################.
# First identify the rows we want:
rows <- prices[["pricePerLb"]] >= 1 & prices[["pricePerLb"]] <= 2
rows
[1] FALSE  TRUE  TRUE FALSE FALSE
# now get the data for those rows from the specified columns:
prices[ rows , "itemName"]
[1] "chocolate cake" "orange"        
# Alternatively, we can do all this in one line:
prices[    prices[["pricePerLb"]] >= 1 & prices[["pricePerLb"]] <= 2    , "itemName"]
[1] "chocolate cake" "orange"        

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# QUESTION 60
# TOPICS: dataframes
#
# Refer to the dataframe from the previous question.
# Write a command that displays just the name of the items whose
# pricePerLb is either less than 1.00 or more than 2.00.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# NOTE: see the answers to questions 47 and 48 for code to create the dataframe.

###############################################.
# ANSWER 1 - using $ notation
###############################################.

# First identify the rows we want:
rows <- prices$pricePerLb < 1 | prices$pricePerLb > 2
rows
[1]  TRUE FALSE FALSE  TRUE  TRUE
# now get the data for those rows from the specified columns:
prices[ rows , "itemName"]
[1] "mcintosh apple" "pear"           "rye bread"     
# Alternatively, we can do all this in one line:
prices[    prices$pricePerLb < 1 | prices$pricePerLb > 2    , "itemName"]
[1] "mcintosh apple" "pear"           "rye bread"     
###############################################.
# ANSWER 2 - using [[double-bracket]] notation
###############################################.

# First identify the rows we want:
rows <- prices[["pricePerLb"]] < 1 | prices[["pricePerLb"]] > 2
rows
[1]  TRUE FALSE FALSE  TRUE  TRUE
# now get the data for those rows from the specified columns:
prices[ rows , "itemName"]
[1] "mcintosh apple" "pear"           "rye bread"     
# Alternatively, we can do all this in one line:
prices[    prices[["pricePerLb"]] < 1 | prices[["pricePerLb"]] > 2    , "itemName"]
[1] "mcintosh apple" "pear"           "rye bread"     

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# QUESTION 61
# TOPICS: dataframes
#
# Refer to the dataframe from the previous question.
# Write a command that displays just the name of the produce whose pricePerLb is at
# least 1.00 but not more than 2.00.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# NOTE: see the answers to questions 47 and 48 for code to create the dataframe.

###############################################.
# ANSWER 1 - using $ notation
###############################################.

# First identify the rows we want:
rows <- prices$department == "produce" & prices$pricePerLb >= 1 & prices$pricePerLb <= 2
rows
[1] FALSE FALSE  TRUE FALSE FALSE
# now get the data for those rows from the specified columns:
prices[ rows , "itemName"]
[1] "orange"
# Alternatively, we can do all this in one line:
prices[prices$department == "produce" & prices$pricePerLb >= 1 & prices$pricePerLb <= 2      , "itemName"]
[1] "orange"
###############################################.
# ANSWER 2 - using [[double-bracket]] notation
###############################################.

# First identify the rows we want:
rows <- prices[["department"]] == "produce" & prices[["pricePerLb"]] >= 1 & prices[["pricePerLb"]] <= 2
rows
[1] FALSE FALSE  TRUE FALSE FALSE
# now get the data for those rows from the specified columns:
prices[ rows , "itemName"]
[1] "orange"
# Alternatively, we can do all this in one line:
prices[prices[["department"]]=="produce" & prices[["pricePerLb"]]>=1 & prices[["pricePerLb"]]<=2  , "itemName"]
[1] "orange"

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# QUESTION 62
# TOPICS: dataframes
#
# Refer to the dataframe from the previous question.
# Write a single command that displays 
#      the name of the produce whose pricePerLb is less than 1.00 and also 
#      the produce whose pricePerLb is greater than 2.00
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# NOTE: see the answers to questions 47 and 48 for code to create the dataframe.

###############################################.
# ANSWER 1 - using $ notation
###############################################.
# First identify the rows we want (notice the parentheses - they ARE necessary)
rows <- prices$department == "produce" & ( prices$pricePerLb < 1 | prices$pricePerLb > 2 )
rows
[1]  TRUE FALSE FALSE  TRUE FALSE
# now get the data for those rows from the specified columns:
prices[ rows , "itemName"]
[1] "mcintosh apple" "pear"          
# Alternatively, we can do all this in one line:
prices[prices$department=="produce" & (prices$pricePerLb<1 | prices$pricePerLb>2 ) , "itemName"]
[1] "mcintosh apple" "pear"          
###############################################.
# ANSWER 2 - using [[double-bracket]] notation
###############################################.
# First identify the rows we want (notice the parentheses - they ARE necessary)
rows<-prices[["department"]]=="produce" & (prices[["pricePerLb"]]<1 | prices[["pricePerLb"]]>2)
rows
[1]  TRUE FALSE FALSE  TRUE FALSE
# now get the data for those rows from the specified columns:
prices[ rows , "itemName"]
[1] "mcintosh apple" "pear"          
# Alternatively, we can do all this in one line:
prices[prices[["department"]]=="produce" & (prices[["pricePerLb"]]<1 | prices[["pricePerLb"]]>2 ) , "itemName"]
[1] "mcintosh apple" "pear"          

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# QUESTION 63
# TOPICS: dataframes
#
# a. Write a command that displays all of the data for a "randomly chosen" row
#    from the dataframe. Your answer should work even if the data in the dataframe changes.
#
#    Hint: as part of your answer, use the nrow function to determine the number of rows
#    that are in the dataframe.
#
# b. Write a command that displays the rows in the reverse order (ie last row first).
#
# c. Write a command that displays the rows in the dataframe in a randomly
#    chosen order.
#
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# NOTE: see the answers to questions 47 and 48 for code to create the dataframe.

###############.
# Answer (a)
###############.

# First identify a random row number.
# You can do this with the sample function.

row  <- sample(1:nrow(prices), 1)
row
[1] 1
# Alternatively, the following uses the runif function to do the same thing.
# Either the code above or the following code will work just fine.
# The "+1" in the code below is because we are "truncating" this number.
#
# row <- trunc( runif(1,1,nrow(prices) + 1) )
# row

# now display the data for the randomly chosen row:
prices[ row , ]
      department       itemName pricePerLb salePrice
item1    produce mcintosh apple       2.41      1.81
# Alternatively, we can do all this in one line.
# (Note that, we might get a different random row this time.)
prices[   sample(1:nrow(prices), 1) ,   ]
      department       itemName pricePerLb salePrice
item1    produce mcintosh apple       2.41      1.81
# NOTE: see the answers to questions 47 and 48 for code to create the dataframe.

###############.
# Answer (b)
###############.

# Get the row numbers in reverse order
# You can do this with the sample function.

rows  <- sort( 1:nrow(prices), decreasing=TRUE)
rows  
[1] 5 4 3 2 1
# now display the data from the randomly chosen rows:
prices[ rows , ]
      department       itemName pricePerLb salePrice
item5     bakery      rye bread       6.04      4.53
item4    produce           pear       2.41      1.81
item3    produce         orange       1.59      1.19
item2     bakery chocolate cake       1.93      1.45
item1    produce mcintosh apple       2.41      1.81
# Alternatively, we can do all this in one line:
prices[   sort(1:nrow(prices), decreasing=TRUE) ,   ]
      department       itemName pricePerLb salePrice
item5     bakery      rye bread       6.04      4.53
item4    produce           pear       2.41      1.81
item3    produce         orange       1.59      1.19
item2     bakery chocolate cake       1.93      1.45
item1    produce mcintosh apple       2.41      1.81
# NOTE: see the answers to questions 47 and 48 for code to create the dataframe.

###############.
# Answer (c)
###############.

# First chose a random order for the rows. 
# You can do this with the sample function.

rows  <- sample(1:nrow(prices), size=length(row))
rows  
[1] 3
# Another way to do this with sample is just to leave out the size parameter.
rows  <- sample(1:nrow(prices))
rows  
[1] 5 3 2 4 1
# now display the data from the randomly chosen rows:
prices[ rows , ]
      department       itemName pricePerLb salePrice
item5     bakery      rye bread       6.04      4.53
item3    produce         orange       1.59      1.19
item2     bakery chocolate cake       1.93      1.45
item4    produce           pear       2.41      1.81
item1    produce mcintosh apple       2.41      1.81
# Alternatively, we can do all this in one line:
# (Note that, we might get a different order this time.)
prices[   sample(1:nrow(prices))   ,   ]
      department       itemName pricePerLb salePrice
item2     bakery chocolate cake       1.93      1.45
item5     bakery      rye bread       6.04      4.53
item4    produce           pear       2.41      1.81
item1    produce mcintosh apple       2.41      1.81
item3    produce         orange       1.59      1.19

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# QUESTION 64
# TOPICS: dataframes
#
# Write a command that displays the average pricePerLb of those items
# whose pricePerLb is at least 1.50
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# NOTE: see the answers to questions 47 and 48 for code to create the dataframe.

###############################################.
# ANSWER 1 - using $ notation
###############################################.

# First identify the rows we want
rows <- prices$pricePerLb >= 1.5
rows
[1] TRUE TRUE TRUE TRUE TRUE
# now get the data for those rows from the specified columns:
mean( prices[ rows , "pricePerLb"] )
[1] 2.876
# Alternatively, we can do all this in one line:
mean( prices[ prices$pricePerLb >= 1.5 , "pricePerLb"] )
[1] 2.876
###############################################.
# ANSWER 2 - using [[double-bracket]] notation
###############################################.
# First identify the rows we want
rows <- prices[["pricePerLb"]] >= 1.5
rows
[1] TRUE TRUE TRUE TRUE TRUE
# now get the data for those rows from the specified columns:
mean( prices[ rows , "pricePerLb"] )
[1] 2.876
# Alternatively, we can do all this in one line:
mean( prices[ prices[["pricePerLb"]] >= 1.5 , "pricePerLb"] )
[1] 2.876

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# QUESTION 65
# TOPICS: dataframes
#
# Write a command that displays the number of items that are listed in
# the "produce" department.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# NOTE: see the answers to questions 47 and 48 for code to create the dataframe.

#########.
# ANSWER 
#########.

# First identify the rows we want as a logical vector
rows <- prices$department == "produce"
rows
[1]  TRUE FALSE  TRUE  TRUE FALSE
# Now sum the logical vector to get the total number of TRUEs
sum(rows)
[1] 3
# Alternatively, we can do all this in one line:
sum(  prices$department == "produce"  )
[1] 3

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# QUESTION 66
# TOPICS: dataframes
#
# Write a command that displays the number of items that are listed
# in the "produce" department whose pricePerLb is at least 1.50.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# NOTE: see the answers to questions 47 and 48 for code to create the dataframe.

#########.
# ANSWER 
#########.

# First identify the rows we want as a logical vector
rows <- prices$department == "produce"  &  prices$pricePerLb >= 1.5
rows
[1]  TRUE FALSE  TRUE  TRUE FALSE
# Now sum the logical vector to get the total number of TRUEs
sum(rows)
[1] 3
# Alternatively, we can do all this in one line:
sum(     prices$department == "produce"  &  prices$pricePerLb >= 1.5    )
[1] 3