##########################################################.##########################################################.#### NA is used for data that is "Not Available" ################################################################.##########################################################.# NA is a special value in R that represents values that are "not available" ##### Do NOT put quotes around NA ##### (see more info below)# EXAMPLE: grades is a vector that contains the grades that the students in a# class received on a test. Some students didn't take the test yet so their# grades are recorded as NA (i.e. "not available").grades =c(NA, 72, NA, 98, 83)# A variable can be set to NA, i.e. a vector of 1 element that just contains# the value, NA.x =NA#-----------------------------------------------------------------------------.# Arithmetic and logical expressions that include even one NA result in NA. ====## This is because if even one value in an expression is "not available",# you simply cannot know the value of the entire expression.#-----------------------------------------------------------------------------.# EXAMPLES:3+NA# NA
[1] NA
3>NA# NA
[1] NA
100+15-NA*2>2^4*7# NA
[1] NA
joesSalary =50annesSalary =NAannesSalary +5# NA
[1] NA
joesSalary > annesSalary # NA
[1] NA
grades =c(NA, 72, NA, 98, 83)grades <80# NA TRUE NA FALSE FALSE
[1] NA TRUE NA FALSE FALSE
# original command: grades < 80# replace variable name: c(NA, 72, NA, 98, 83) < 80# recycling rule: c(NA, 72, NA, 98, 83) < c(80, 80, 80, 80, 80)# c(NA<80 , 72<80 , NA<80 , 98<80 , 83<80)# expressions with NA become NA: c( NA , TRUE , NA , FALSE , FALSE)# displayed on screen: NA TRUE NA FALSE FALSE#-----------------------------------------------------------------------------.# A more "typical" example#-----------------------------------------------------------------------------.joesSalary <-50suesSalary <-70annesSalary <-NAmikesSalary <-NAjoesSalary >= suesSalary # FALSE
[1] FALSE
joesSalary >= annesSalary # NA
[1] NA
annesSalary == joesSalary # NA
[1] NA
annesSalary != joesSalary # NA
[1] NA
joesSalary +5# 55
[1] 55
annesSalary +5# NA
[1] NA
suesSalary >=5+max(joesSalary, annesSalary) # NA
[1] NA
#-------------------------------------------------------------------------.# As shown above, if an expression includes even one NA the value of the entire# expression is NA. The most confusing part of this rule is the following:## The value of NA == NA is NA. ###### Similarly:## The value of NA != NA is NA ###### To test to see if a value is NA, use the function is.na( SOME_VECTOR ) #####-------------------------------------------------------------------------.# DO NOT TEST FOR NAs with == OR WITH !=NA==NA# NA
[1] NA
NA!=NA# NA
[1] NA
annesSalary =NAannesSalary ==NA# NA
[1] NA
annesSalary !=NA# NA
[1] NA
# To test is a value is NA or not use is.na(SOME_VALUE) ####is.na(NA) # TRUE
#-----------------------------------------------------------------.# Check values for NA with the is.na(SOME_VECTOR) function #####-----------------------------------------------------------------.annesSalary =NA# assign NA to annesSalarymikesSalary =50# DON'T DO THISannesSalary ==NA# NA
[1] NA
annesSalary !=NA# NA
[1] NA
# DO THISis.na(annesSalary) # TRUE
[1] TRUE
is.na(mikesSalary) # FALSE
[1] FALSE
annesSalary == mikesSalary #NA
[1] NA
annesSalary != mikesSalary #NA
[1] NA
#----------------------------------------------------------------.# is.na works with vectors that contains several values ====#----------------------------------------------------------------.# Test for NAs with is.na( SOME_VECTOR )is.na(c(100,NA,NA,200)) # FALSE TRUE TRUE FALSE
[1] FALSE TRUE TRUE FALSE
# Test for values that are NOT NA with !is.na( SOME_VECTOR )!is.na(c(100,NA,NA,200)) # TRUE FALSE FALSE TRUE
[1] TRUE FALSE FALSE TRUE
#----------------------------------------------------------------------.# is.na works with vectors of any mode. ====#----------------------------------------------------------------------.# The following uses a vector of logical values that includes some NAs# (make sure you understand this)is.na(c(TRUE, NA, NA, FALSE)) # FALSE TRUE TRUE FALSE
[1] FALSE TRUE TRUE FALSE
#----------------------------------------------------------------------.# NEVER test for NA using == or !=#----------------------------------------------------------------------.# DON'T DO THIS !!!c(100, NA, NA, 200) ==NA# NA NA NA NA
[1] NA NA NA NA
c(100, NA, NA, 200) !=NA# NA NA NA NA
[1] NA NA NA NA
############################################################################## 2023 - BEREN - UP TO HERE - AFTER CLASS 8 ##################################################################################---------------------------------------------------------------------.# The length of vectors that contain NAs INCLUDE the NA values.#---------------------------------------------------------------------.# The length of the following vector includes the NA valueslength( c ( 100, NA, NA, 200) ) # 4
[1] 4
# The na.rm argument in the sum and mean functions allow# you to "remove" (i.e. ignore) the NA values for purpose# of the sum or the mean.nums =c(NA, 10, 5, NA, 20)#--------------------------------------------------------------------------# The na.rm argument in the sum and mean functions:## sum( ... , na.rm=FALSE)# mean (x, trim=0, na.rm=FALSE, ...)## The sum and mean functions include an argument named na.rm# If na.rm is TRUE then the NA's are ignored for the purpose of the function call#--------------------------------------------------------------------------sum(nums) # NA