##################################################################.
#
# TOPICS
#
# - functions: sqrt abs max min ceiling floor sum mean
# trunc round
#
# - vector arithmetic and recycling rule
#
# - combining vectors with c function
#
# - functions: c length sum rep seq range
#
# - colon operator (e.g. 3:5 5:-3)
#
##################################################################.
# It's recommended to start coding sessions by removing all variables that
# you may have stored from the last time you've used R. This prevents confusion
# in case you may have a variable from last time.
rm( list=ls() )
4 4. Using some built-in functions
4.1 rm(list=ls())
4.2 sqrt() abs() NaN nesting function calls
############################################################.
#
# Intro to functions
#
# Intro to R vectors
#
############################################################.
#-----------------------------------------------------------.
# sqrt function - eg. sqrt(49) ####
#
# abs function - eg. abs(-49) ####
#
# NaN is "not a number" - eg. sqrt(-49) ####
#
# nesting function calls - eg. sqrt(abs(-49)) ####
#----------------------------------------------------------.
# To take the square-root of a number in R, use the sqrt function
# For example:
sqrt(25) # get the square root of 25
[1] 5
sqrt(10) # get the square root of 10
[1] 3.162278
sqrt(-5) # square roots of negative numbers return NaN (i.e. "not a number")
Warning in sqrt(-5): NaNs produced
[1] NaN
# sqrt is an example of a "function".
# A function takes some information as input (e.g. 25)
# and returns a value as output, (e.g. 5)
#
# To see R's help page for the sqrt function type the following:
#
# ?sqrt # show the help page for sqrt ####
# Some R help pages show information for multiple functions.
# The help page for sqrt also show information about the abs function.
#
# abs gives you the absolute value of a number (i.e. the positive version of the number)
abs(2) # 2
[1] 2
abs(-2) # 2
[1] 2
# We can "nest" one function call inside another function call.
#
# When we do so the value that is "returned" by the "inner" function call
# is then "passed" to the "outer" function call.
sqrt(-49) # NaN
Warning in sqrt(-49): NaNs produced
[1] NaN
sqrt(abs(-49)) # 7
[1] 7
4.4 function call
#.......................................................................
# A particular use of a function is known as a "function call" ####
#.......................................................................
sqrt(100) # this is a function call of the sqrt function
[1] 10
sqrt(64) # this is a different function call of the sqrt function
[1] 8
4.5 return value
#.......................................................................
# The output of a function is known as the "return value" of the function. ####
#.......................................................................
sqrt(64) # The "return value" of this "function call" is 8
[1] 8
4.6 max() min() ceiling() floor() sum()
# Some functions can take more than one argument.
# However, all functions return exactly one item.
# (we will describe an exception to this later).
#
# max and min functions return the maximum and minimum value of all of their arguments. ####
# For example:
max(4,10,2,5) # four arguments, 4,10,2,5 - one return value, i.e. 10
[1] 10
min(4,10,2,5) # four arguments, 4,10,2,5 - one return value, i.e. 2
[1] 2
# another example
<- 50
joesSalary <- 70
suesSalary <- 60
bobsSalary
# three arguments - joesSalary, suesSalary, bobsSalary
# one return value, i.e. 70
max(joesSalary, suesSalary, bobsSalary)
[1] 70
4.7 arguments (AKA parameters)
#.......................................................................
# The input values to a function are known as the argument(s) or the parameter(s) of
# a function. (Some people/books may draw a distinction between the word argument
# and the word parameter but for our purposes they mean the same thing.)
#.......................................................................
# In the following code:
# 36 is an argument (or parameter), i.e. 36 is "passed" to the sqrt function.
# the return value is 6
sqrt(36)
[1] 6
4.8 “passing values” to a function
#.......................................................................
# Specifying a value as an argument to a function is known as "passing" that value to the function. ####
#.......................................................................
sqrt(36) # 36 is being "passed" to the sqrt function.
[1] 6
#.......................................................................
# The arguments to a function may be expressions, not just single value. ####
#.......................................................................
2 * max ( pi ^ 2 , pi * 2) # 1st argument: pi^2 , 2nd argument: pi*2
[1] 19.73921
4.9 more functions: ceiling, floor, sum
ceiling(3.2) # ceiling rounds up to next higher number ####
[1] 4
ceiling(-3.2) # ... be careful with negatives
[1] -3
floor (3.2) # floor rounds down to nearest whole number ####
[1] 3
floor(-3.2) # ... be careful with negatives
[1] -4
sum(2,10,4) # sum returns the sum of its arguments ####
[1] 16
# we will speak about averages, or the "mean function" later ...
4.10 R’s “help” system ?someFunction ??anyWord
########################################################.
#
# R's "help" system ####
#
########################################################.
#----------------------------------------------------------------------------.
# To get more information about a particular function, you
# use the "help" function. You must put the name of the R function you
# want help with in "quotes". The "help page" or "manual page" for
# that function (or group of functions) will appear in the "help"
# window.
#
# help("sum") # show the R documentation page for the sum function.
#
# help(sum) # same thing - you don't need the quotes
#
# ?sum # same thing - ? is shorthand for the help function
#
# ?help # you can even get help on the help function
#
# ??max # The double question mark ?? searches for a particular word in any help page.
#----------------------------------------------------------------------------.
####
# Some help pages describe several different R functions in single page
#
# ?ceiling # this describes ceiling, floor and several other functions all in one help page
#
# ?floor # this shows the same thing
# NOTE:
#
# In posit.cloud you can press F1 when the cursor is on the name of a function ####
# (this only works in the "script" window)
4.11 pi
# pi is a built-in variable that contains the first few digits of the value of pi
# value of pi pi
[1] 3.141593
* 2 # pi times 2 pi
[1] 6.283185
^ 2 # pi quared pi
[1] 9.869604
4.12 trunc()
#-----------------------------------------------------------------------------.
# trunc function ####
#
# trunc stands for "truncate" which means to "shorten" or to "chop off"
# The trunc function "chops off" the values after the deicmal point.
#-----------------------------------------------------------------------------.
trunc(3.2) # chops off the decimal points
[1] 3
trunc(-3.2) # compare this with "floor and ceiling" ... how are they different?
[1] -3
4.13 round() function
#-----------------------------------------------------------------------------.
# round function ####
#
# first arugment - value to round
# second argument - which position to round
#-----------------------------------------------------------------------------.
# round a value to a particular number of decimal places
round(1.129, 2) # 1.13
[1] 1.13
round(1.129, 1) # 1.1
[1] 1.1
# display the value of pi #### pi
[1] 3.141593
round(pi, 2) # round a value to a particular number of dcimal places
[1] 3.14
round(pi, 3) # round a value to a particular number of dcimal places
[1] 3.142
# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
# if 2nd argument is 0, the number is rounded to the closest whole number
# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
round(pi, 0) # round pi to the closest whole number
[1] 3
#..........................................................................
# You can also supply a negative value for digits
#..........................................................................
round(1939, -1) # negative values are allowed, e.g. round to closest multiple of 10
[1] 1940
round(1939, -2) # round to closest multiple of 100
[1] 1900
round(1939.1598, 2) # 1939.16
[1] 1939.16
round(1939.1598, -2) # 1900
[1] 1900
#.................................................................
# Default value for the digits argument of the round function
#.................................................................
# Some arguments for some functions have a "default value".
# The default value is used when the argument does not appear in the function call.
# For example, 0 is the "default value" for the digits argument of the round function.
#
# This is described in the Usage section on the help page for the round function (?round)
# The usage section includes the following information:
#
# USAGE:
# round(x, digits = 0)
#
# "digits = 0" means that the defualt value of the
# digits argument (i.e. the 2nd argument) is 0 (zero).
#
round(pi) # answer is 3 because 0 is the default number of digits
[1] 3
round(1.234) # answer is 1 because 0 is the default number of digits
[1] 1
# ?round # view the help page for the round function
4.14 Default values for arguments
#--------------------------------------------------------------------------------.
# NAMES AND DEFAULT VALUES OF ARGUMENTS ARE SHOWN ON THE HELP PAGES ####
#
# The arguments for each function have "names"
#
# Some arguments have "default values". The default value for an argument is
# used when the function call does NOT specify a value for that argument.
# (see examples below).
#--------------------------------------------------------------------------------.
# Every argument for every function in R has a "name".
# SOME arguments for SOME functions have a "default value".
# All of this information is shown in the "Usage" section on the help page
# for the function.
#
# FOR EXAMPLE
# Look at the help page for the round function (i.e. ?round).
# The "Usage" section includes the following information:
#
# USAGE:
# round(x, digits = 0)
#
# This means
#
# - The name of the 1st argument is "x"
#
# - The name of the 2nd argument is "digits".
# The default value for the "digits" function is 0.
# This is shown in the documentation as "digits = 0".
#
# - Note that the first argument, x, does NOT have a default value.
#
# View the help page by typing:
#
# ?round # arguments are "x" and "digits", the default value for digits is 0
4.15 specifying arguments in function calls
# You may specify the names of the arguments when calling a function,
# but you don't have to (see examples below).
#
# Specifying the names of the arguments allows you to:
#
# (a) type the arguments out of order (see below) and/or
#
# (b) skip some arguments (examples of this to be shown later ...)
# The following function call will round 12345 to the
# nearest hundred (i.e. the 2nd argument is -2) to result in 12300.
#
# The arguments must be specified in same order as specified on the help page
# (see ?round). i.e. first the number to be rounded (12345 in this case)
# and then the position to round it to (-2 in this case).
round( 12345, -2) # round 12345 to the nearest hundred
[1] 12300
# You don't have to but you may specify the names of the arguments if you like.
round ( x=12345, digits=-2 )
[1] 12300
# If you specify the names of the arguments (see below), then you
# may write the arguments out of order.
#
# Otherwise, the arguments must be typed in the same order as they appear
# in the "Usage" section on the help page.
#
# In the following command the arguments are not in the order as specified
# on the help page. However, that is OK since we specified the names of the
# arguments.
round( digits = -2, x=12345) # specify arguments out of order, same result as above
[1] 12300
# You may omit the names of the first few arguments in a function call.
# If you do so then the first few arguments, without names in the function call,
# are assumed to be the first few arguments as specified on the help page.
#
# For example, in the following command the first argument, 12345,
# does not include a name. Since this is the first argument in the function
# call, it is assumed to be the "x" argument (which is the first argument
# specified in the help page (?round)).
round (12345, digits = -2) # you can specify names for some args but not others
[1] 12300
# If you want to, you MAY always specify the names of the arguments
# However, it is not necessary to type the names of the arguments as long as
# you type the arguments in the expected order (as defined in the help pages).
#
# Many R programmers choose to leave out names for the first argument or
# two and then specify names for the subsequent arguments,
# e.g. seq(2, 10, by=2) (this returns 2 4 6 8 10 - see ?seq).
#
# The reason for this is that the first argument or two
# of most functions are obvious as to their meaning. After that, it becomes
# less clear as to what the additional arguments mean. By specifying the names
# of these additional arguments it becomes easier to read the code.
4.16 What’s a vector? The is.vector() function.
###############################################.
#
# VECTORS
#
###############################################.
#-----------------------------------------------------------------------.
# A "vector" is a collection of values that can be processed as a group. ####
#-----------------------------------------------------------------------.
#-----------------------------------------------------------------------.
# The is.vector function returns TRUE if its argument is a vector ####
# and FALSE otherwise.
#-----------------------------------------------------------------------.
#-----------------------------------------------------------------------.
# The simplest vector is just a single value ... ####
# (it is technically a collection of just one value).
#-----------------------------------------------------------------------.
# View the help page by typing:
#
# ?is.vector
is.vector ( 3 )
[1] TRUE
is.vector( 99923141.32412431 )
[1] TRUE
# A variable that contains a vector is a vector ####
= 1.99
priceOfApple is.vector(priceOfApple) # TRUE
[1] TRUE
# The c() function is used to combine multiple values into a single vector. ####
#
# You can think of the "c" as standing for the word "combine".
# "c" actually stands for the word "concatenate" which
# is a technical fancy shmancy word for "combine things together".
# The following is a vector with mutliple values.
# The c function combines (i.e. "concatenates") the multiple values into a
# single "vector"
c(100,200,300, 50, -2, 25)
[1] 100 200 300 50 -2 25
is.vector(c(100,200,300, 50, -2, 25)) # this works
[1] TRUE
is.vector(100,200,300, 50, -2, 25) # ERROR: use c() to tie together different values
Error in is.vector(100, 200, 300, 50, -2, 25): unused arguments (300, 50, -2, 25)
= c(100,200,300, 50, -2, 25) # combine (or concatenate) values into one vector
someNumbers someNumbers
[1] 100 200 300 50 -2 25
is.vector(someNumbers) # TRUE
[1] TRUE
4.17 range() function
#-----------------------------------------------------.
# Other functions can also create vectors.
#-----------------------------------------------------.
#.............................................................................
# The range function returns a vector
#
# The range function returns the minimum and maximum values that are in a vector ####
#.............................................................................
range(someNumbers)
[1] -2 300
is.vector(range(someNumbers))
[1] TRUE
# You can also capture the result in a variable
= range(someNumbers)
lowestAndHighest # -2 300 lowestAndHighest
[1] -2 300
is.vector(lowestAndHighest) # TRUE
[1] TRUE
4.18 seq() function
#.............................................................................
# The seq function returns a vector. In its simplest use,
# seq returns the sequence starting with the 1st argument, ending with the 2nd argument ####
#
# NOTE - we will come back to the seq function to learn about
# much more complex ways of using it.
#.............................................................................
# Example 1
seq(5,10) # 5 6 7 8 9 10
[1] 5 6 7 8 9 10
is.vector( seq(5, 10) ) # TRUE
[1] TRUE
# Example 2
seq(10,5) # 10 9 8 7 6 5
[1] 10 9 8 7 6 5
is.vector( seq(10,5) ) # TRUE
[1] TRUE
# Example 3
seq(0.5, 2.5) # 0.5 1.5 2.5
[1] 0.5 1.5 2.5
is.vector( seq(0.5, 2.5) ) # TRUE
[1] TRUE
# We can also capture the results in variables
= seq(5,10)
example1 # 5 6 7 8 9 10 example1
[1] 5 6 7 8 9 10
is.vector(example1) # TRUE
[1] TRUE
= seq(10,5)
example2 # 10 9 8 7 6 5 example2
[1] 10 9 8 7 6 5
is.vector(example2) # TRUE
[1] TRUE
= seq(.5, 2.5)
example3 # 0.5 1.5 2.5 example3
[1] 0.5 1.5 2.5
is.vector(example3) # TRUE
[1] TRUE
seq(0.5, 3)
[1] 0.5 1.5 2.5
4.19 rep() function
#.............................................................................
# The rep function returns a vector ####
#
# In its simplest use, the rep function returns a vector of it's first
# argument repeated the number of times specified by its 2nd argument.
#
# NOTE - we will come back to the rep function to learn about
# more complex ways of using it.
#.............................................................................
rep(100,3) # 100 100 100
[1] 100 100 100
rep( seq(1,3) , 2) # 1 2 3 1 2 3
[1] 1 2 3 1 2 3
# QUESTION
# Create a vector that has the numbers 1 3 1 3 1 3 etc. for a total
# of 20 numbers. Store the resulting vector into a variable named nums.
= rep( c(1,3) , 10) # ANSWER nums
4.20 Use c() to combine vectors
#-------------------------------------------------------------------------------------.
# DO NOT WRITE INDIVIDUAL VALUES WITHOUT COMBINING THEM TOGETHER WITH A FUNCTION CALL!
#-------------------------------------------------------------------------------------.
100,200,300 # ERROR - individual values separated by commas are meaningless to R ####
# REMEMBER - if no other function call is being used, you can use the
# c function to combine individual values
c(100,200,300) # 100 200 300 (no error)
#-------------------------------------------------------------.
#
# More about the c function ####
#
#-------------------------------------------------------------.
#..............................................................................
# If you "nest" calls to "c", ie. if you combine one vector inside of another
# vector by using the c function, the result is a single vector
#..............................................................................
c(100, 200, c(30, 20, 10), 600) # same as c(100,200,300,400,500,600)
c(100, 200, 30, 20, 10, 600) # same thing
#..............................................................................
# You can use the c function to combine multiple vectors into a single vector.
#..............................................................................
<- c(10,20,30)
x <- c(40, 50)
y <- c(x, y) # combine the values from x and y into z
z
z
<- x, y # ERROR - use the c function to combine vectors into a single vector z
Error in parse(text = input): <text>:5:4: unexpected ','
4:
5: 100,
^
# QUESTION ####
# Find the sum of all the values that are in x and y, without using z
# ANSWER
sum(c(x,y)) # This works
Error: object 'x' not found
sum(x,y) # This works too - sum allows multiple vectors to be summed
Error: object 'x' not found
# QUESTION ####
#
# Find the average (i.e. mean) of all the values that are in x and y,
# without using z
# ANSWER
mean(c(x,y)) #This works
Error: object 'x' not found
mean(x,y) # ERROR
Error: object 'x' not found
# QUESTION
# Why did we get an error in the last example?
# ANSWER
#
# From the documentation for sum and mean (i.e. ?sum and ?mean) we can
# see that the sum function allows multiple vectors that contain the numbers to be
# to be passed as separate arguments. However, the mean function requires
# all of the numbers to be averaged to be in a single vector that is passed
# to the argument named x. It's true that one might expect these functions
# to be more similar in how they are called. However, the designers of the
# language decided otherwise. The underlying reasons for the difference in
# the design of these functions is irrelevant - bottom line is you
# need to know how to call the functions. The place to learn this is
# in the documentation for the functions (i.e. ?sum and ?mean)
#
# Look at the documentation for sum and for mean (i.e. ?sum and ?mean).
# The "Usage" section shows the names of the arguments and their default values.
# The "Arguments" section explains what each argument is expected to contain.
# The "Value" section explains how the return value for the function is calculated.
#
# It takes some time and practice to be proficient at reading R's help pages.
# However, understanding how to read and interpret R's help pages
# is a critical skill that allows you to become familiar with R's built in
# functions.
#
# An "ellipsis" (i.e. three periods, ... ) in the help pages
# stands for the ability to type several values in place of the
# ellipsis. For example, the ... in the help page for sum, indicates
# the ability to type several different values to be summed. This is
# described in the ARGUMENTS section where it explains that ... stands
# for "numeric or complex or logical vectors".
# View the help page by typing:
#
# ?sum
# USAGE: sum(..., na.rm = FALSE)
# ARGUMENTS:
# ... numeric or complex or logical vectors
# na.rm (see the help page)
# However, for the mean function, there is a single argument named x that
# is expected to contain the values to be averaged. The ellipsis shown
# in the help page for mean is used for a more subtle reason. It shows where
# additional arguments, not listed on this help page, might be specified
# (this is an advanced concept that we'll return to later).
# View the help page by typing:
#
# ?mean
# USAGE: mean(x, trim = 0, na.rm = FALSE, ...)
# ARGUMENTS:
# x An R object. (i.e. a vector - these are the numbers)
# trim (see help page)
# na.rm (see help page)
# ... further arguments passed to or from other methods.
# You can use the c function to combine values from different functions.
# Make sure that you match parentheses correctly.
c( rep(100,3) , seq(-5,-7) ) # 100 100 100 -5 -6 -7
[1] 100 100 100 -5 -6 -7
# DON'T FORGET THE c( ... )
#rep(100,3), seq(-5,-7) # ERROR
range( rep(100,3) , seq(990,1005) , seq(-5,-7) ) #
[1] -7 1005
range( c( rep(100,3) , seq(990,1005) , seq(-5,-7) ) )
[1] -7 1005
4.21 — Practice —
#----------------------------------------------------.
# QUESTION
# Write R code that takes the average of the first
# 200 even numbers.
#----------------------------------------------------.
4.22 non-vectors (later in the course).
#----------------------------------------------------------------------------.
# Things that aren't vectors (e.g. dataframes, factors, matrices, etc) ####
#----------------------------------------------------------------------------.
# A vector is the simplest arrangement of values in R.
# R allows for more complex arrangements of data, which we will learn about
# later in the course, such as factors, matrices, dataframes, etc.
# These more complex arrangements of data are created from vectors but are
# technically not vectors themselves. One example of such an arrangement
# of data is a data.frame.
# We will cover dataframes later in the course.
# For now, I just want to demonstrate that R has structures that are NOT vectors.
# A dataframe is made up of vectors, but it itself is NOT a vector.
= data.frame(students = c("joe", "sue", "bob"),
example test1 = c(71,85,90),
test2 = c(83, 92, 95), stringsAsFactors = FALSE)
example
students test1 test2
1 joe 71 83
2 sue 85 92
3 bob 90 95
is.vector(example) # FALSE
[1] FALSE
is.data.frame(example) # TRUE
[1] TRUE
4.23 Vector arithmetic
#--------------------.
# Vector arithmetic ####
#--------------------.
# When you perform arithmetic with a vector each item in the vector is operated upon
c(100,200,300) + 5 # return a vector that contains c(105, 205, 305)
[1] 105 205 305
# vector arithmetic also respects the order of operations
# In the following example the multiplication is done before the addition
# to yield the value c(205, 405, 605)
5 + c(100, 200, 300) * 2 # do the multiplication first
[1] 205 405 605
# This works as follows
#
# original: 5 + c(100, 200, 300) * 2
#
# do the *: 5 + c(200, 400, 600)
#
# then do the +: c(205, 405, 605)
#
# result is displayed as: 205 405 605
# we can change the order of operations with parentheses
# This yields a different result.
5 + c(100,200,300)) * 2 # pay close attention to the parenthesis!!! (
[1] 210 410 610
# This works follows
#
# original: (5 + c(100, 200, 300)) * 2
#
# do the +: c(105, 205, 305) * 2
#
# then do the *: c(210, 410, 610)
#
# result is displayed as: 210 410 610
###########################################.
#
# You may assign a vector to a variable
#
###########################################.
<- c(72,95,79,85)
grades
# show the values grades
[1] 72 95 79 85
# QUESTION:
#
# Modify the grades variable by adding 2 points to each grade
# ANSWER ####
= grades + 2 # you must assign the answer back to grades
grades grades
[1] 74 97 81 87
4.24 length( SOME_VECTOR )
#-----------------------------------------------------------------------.
#
# length(vector) returns the number of values in the vector ####
#
#-----------------------------------------------------------------------.
# Set the value of grades
<- c(72,95,79,85)
grades
# the length function returns the number of values in a vector
length(grades) #4
[1] 4
length(c(25, 10)) #2
[1] 2
length(c(100,200,300)) #3
[1] 3
# A single value is a vector - but it doesn't need to be surrounded with c()
length(c(100)) # the length of a vector that contains a single item is 1
[1] 1
length(100) # ... same thing ... don't use the c - it's not necessary
[1] 1
c(100) # this is the same as just 100, the "c" is not necessary if you have just one value.
[1] 100
100 # same thing - don't use the c for a single value
[1] 100
# show all grades grades
[1] 72 95 79 85
+ 5 # show what the values would be if we added 5 to each grade grades
[1] 77 100 84 90
# however, grades did NOT actually change grades
[1] 72 95 79 85
# If you want to change the value of grades, you need to
# use the = sign or the <- or the ->. For example:
# show grades grades
[1] 72 95 79 85
<- grades + 10 # add 10 to each grade and update grades with the new values
grades # grades now has the new values grades
[1] 82 105 89 95
= c(1.99, 2.99, 3.99)
prices = 2 * prices
doublePrices doublePrices
[1] 3.98 5.98 7.98
4.25 Counting arguments
#############################################################.
#
# Arguments (AKA "parameters") to a function. ####
#
# It is important to know how many arguments are being passed
# to a function. The arguments to a function appear in the (parentheses)
# next to the function name and are separated from each other with commas.
#
#########################################################################.
# Remember that the round function takes TWO arguments
#
# x is the values to round
#
# digits is the position to round to
round(100.729, 1) # 100.7
[1] 100.7
round(100.729, 2) # 100.73
[1] 100.73
round (100.729) # 101
[1] 101
# The first argument is allowed to be a vector with multiple values
round ( c(100.729, 200.618) , 1) # 100.7 200.6
[1] 100.7 200.6
= c(82, 105, 89, 95)
grades
sum(grades) # one argument - add up all grades (not very useful for grading ...)
[1] 371
sum(c(82,105,89,95)) # also one argument - same exact thing, sum is given 1 vector
[1] 371
sum(82,105,89,95) # four arguments - same result, HOWEVER sum is given 4 different vectors - same answer
[1] 371
# The sum function will sum all of the values in all
# of its arguments. The following all produce the same
# result (i.e. 306) but in different ways.
sum( c(100,200) , c(1,2,3)) # 2 arguments
[1] 306
sum( c(100,200,1,2,3) ) # 1 argument
[1] 306
sum( 100,200,1,2,3 ) # 5 arguments
[1] 306
4.26 To get an average use the mean function
# IMPORTANT: the mean function works a little differently than the sum function.
#
# The mean function requires that all values being averaged are passed as a single vector. ####
# show all the grades grades
[1] 82 105 89 95
= c(82, 105, 89, 95)
grades mean(grades) # get the average
[1] 92.75
mean( c(82,105,89,95) ) # same thing - there is ONE vector, ie a vector, c(72,95,79,85)
[1] 92.75
mean(82,105,89,95) # I didn't use the c() function here - there are 4 vectors!!!
[1] 82
# To summarize:
# sum and mean are not consistent in the way they handle multiple arguments
sum(1,2,3) # works as expected
[1] 6
mean(1,2,3) # does not work as most people would expect - answer is 1
[1] 1
# View the help page by typing:
#
# ?mean
# Examine the documentation for mean to see why. The Usage section of the
# documentation includes the following: mean(x, trim = 0, na.rm = FALSE, ...)
# The "x" corresponds to a single vector that contains
# the values to be averaged. If you pass the values without
# the c() function, then the 2nd value listed is actually
# passed to the "trim" argument of mean. If you want to know
# what the "trim" argument is used for, see the help
# page for "mean". If you don't specify any value for "trim"
# then "mean" will work as you expect.
# "x"
#
# View the help page by typing:
#
# ?mean # see the documentation for mean
# Arguments passed to mean:
#
# x - a vector that contains the values to be averaged
#
# trim - a fraction (0 to 0.5) of observations to be ignored (i.e. trimmed) from the beginning and end of the vector
#
# na.rm - WE WILL DISCUSS THIS LATER ...
# Return the average of the numbers in the vector.
#
# Return value is 400 , i.e. (100+200+300+500+900) / 5
mean(c(100,200,300,500,900))
[1] 400
# the code above does the same as the next line
sum(c(100,200,300,500,900)) / 5
[1] 400
# DO NOT DO THE FOLLOWING !!!!
# The mean function is being passed a SINGLE value and
# does nothing meaningful in this case.
mean(sum(100,200,300,500,900) / 5) # basically same as: sum(100,200,300,500,900) / 5
[1] 400
# This is because by the time, the mean function
# starts working the value: sum(100,200,300,500,900) / 5
# has already been calculated as 400.
# It would be just as ridiculous as running the following code
# which just returns the number 400 - the mean function does
# nothing meaningful in this case.
mean ( 400 ) # This is the same as 400 / 1
[1] 400
4.26.1 trim argument to mean
# the "trim" argument to mean ####
#
# trim (ie. remove) 0.2 (ie. 1/5) of the values (ie. 1 value)
# from the beginning and end of the vector
#
# Return value is 333.333, ie. mean(c(200,300,500))
mean(c(100,200,300,500,900), 0.2)
[1] 333.3333
mean(c(200,300,500)) # same result
[1] 333.3333
= c(5, 82, 85, 89, 105)
grades
mean(grades) # mean ( c(5,82,85,89,105))
[1] 73.2
mean(grades, trim = 0.2) # mean(c(82,85,89))
[1] 85.33333
grades
[1] 5 82 85 89 105
# trim (ie. remove) 0.4 (ie. 2/5) of the values (i.e. 2 values)
# from the beginning and end of the vector
#
# Return value is 300, i.e. mean(300)
mean(c(100,200,300,500,900), 0.4) # trim 0.4 = 2/5 of the values from the beginning and end
[1] 300
mean(c(500,200,300,900,100), 0.4) # trim 0.4 = 2/5 of the values from the beginning and end
[1] 300
# In the following the result is 100
# This is because the arguments are assigned in the following order
#
# x, ie. the values to be averaged = first argument = 100
# trim = second argument = 200
# na.rm = 3rd argument = 300
# ... = all other arguments = c(500,900)
#
# Other than the x=100, the other arguments are really meaningless so
# the result is the average of 100, which is 100.
mean(100,200,300,500,900)
[1] 100
# PROBLEM:
#
# REMEMBER that mean requires that all values being averaged are in a SINGLE vector
# Therefore to take the average of the values in x and in y the following WILL NOT WORK:
mean(x, y) # will not work - will just show the mean of the values in x
Error: object 'x' not found
# SOLUTION:
#
# Remember that you can combine multiple vectors into a single vector with
# the c function.
<- c(10,20,30)
x <- c(40, 50)
y
mean(c(x,y)) # combine x and y into a single vector and take the mean of that vector
[1] 30
#--------------------------------------------------------.
# QUESTION : ####
#
# Grades for class1 and class2 are as shown below.
#
# class1grades <- c(80,90,100)
# class2grades <- c(85, 88)
#
# (a) get the two averages, one for each class
# (b) get the average for all the students in both classes
#--------------------------------------------------------.