April 17, 2019

R Vectors

Vectors in R

Collections of data items/values of the same type.
Vector is a one dimensional structure, in R, containing data in a sequence.
atomic vectors -- logical, int, double, char, complex & raw.
In R everything is a vector.

Different ways to create vector in R: 
vector1 <- 8:18
my_vector <- -6:14
age <- c(23, 55, 36, 87, 12)
character_vector <- c("a", "b", "c")
float_vector <- c(1, 6.888, pi, exp(2))
cards <- c(spades=11, hearts=12, diamonds=11, clubs=13)
vector(mode = "logical", length = 0)
output = vector("double", ncol(df))
char_vect <- vector(mode="character", length=10)
num_balls <- vector(mode="integer", length=total)
X <- seq(0, 3, length=10)
ZZZ <- rep(2:4, 3)

vect2 <- readLines("test.txt")
vector9 <- scan("file.txt")
char_vector = character(5)
a_numeric_vector <- rlnorm(50)
an_integer_vector = rpois(24, lambda = 5)
a_numeric_vector = rbeta(24, shape1 = 1, shape2 = 1)
my.vector<-c((-B+sqrt(B^2-4*A*C))/(2*A), (-B-sqrt(B^2-4*A*C))/(2*A))
logical_vector <- abs(my_vector) <3
as.vector(x, mode = "any")
as.vector(x)
r <- as.vector(sqrt(outer(x^2, y^2, "+")))
num_vect <- as.numeric(c("2", "3", "4"))

class(age) # numeric
y <- c(1.4, "ten")
c(1:5, 10.5, "next")
   coercion # converts all data items into most generic types
   boolean < numeric < char # least generic...to...most generic
class(c(FALSE))
class(c(FALSE, 3))
class(c(FALSE, 3, "twelve"))
mode(vect)

Accessing Vector elements:
There are three ways to access/subset vectors: by using the indices, by using the names (if the vectors are named) and by using logical vectors. 
my_vector[3] # 3rd element
vec[2:6] # 2,3,4,5,6 elements
vector1[-3] # striping, all elements, except 3rd
vect[-c(1:3, 9:13)]
heights[1] <- "Ultra-short"

> x <- c(1, 5.4, TRUE, "hello")
> x[3]            # access 3rd element
> x[-1]          # access all but 1st element
> x[c(2, 4)]      # access 2nd and 4th element
> x[c(2.4, 3.54)]    # real numbers are truncated to integers
last <- tail(linkedin, 1)
> x[c(TRUE, FALSE, FALSE, TRUE)]
> x <- c("first"=3, "second"=0, "third"=9)
> x["second"]
> x[c("first", "third")]
ro_sel <- ro_vector[2:5]
> x[x<0] <- 5    # modify elements less than 0
> x <- x[1:4];        # truncate x to first 4 elements
> x <- NULL # deleting a vector
corrected_vector <- as.numeric(gsub("[*]", "NA", vect1))
corrected_vector <- as.numeric(gsub("\\*", "", vector))
phone_vector[(nchar(phone_vector))!= 10] <- NA
clean_vector <- replace(phone_vector, nchar(phone_vector) != 10 , NA)

Operations on Vector elements:
my_vector1 <- c(5, 2, 5, 7)
my_vector2 <- c(7, 4, 9, 8)
added_vector <- my_vector1 + my_vector2
sum=c(2, 6, 5) + c(4, 3, 5, 7)
diff=c(1, 8, 5, 4) - c(4, 3, 5, 7)
mul_vector <- my_vector1 * my_vector2
div=vect1/b
c(vect, 555, vect)
linkedin > facebook
B_greater_A <- B_votes > A_votes
sum(A_votes >= 4)
(A_votes > 3) & (B_votes > 3)

recycling => length of larger vector is an integral multiple of the length of shorter vector
If the length of larger vector is an integral multiple of the length of shorter vector, then complete recycling will be done.
If the length of the vector is not integral multiple, the operation still gets performed and command gets executed, this is called partial recycling.
a = c(2 ,6) 
b = c(4, 3, 5, 7)
c = a + b
8 > c(2, 9, 6, 8, 10)
c(8, 5, 7, 1, 0) > c(2, 9, 6, 8, 0)
c(8, 5, 7, 1, 0) > c(2, 9, 8)

length(vector) # number of elements
names(some_vector) <- c("Name", "Profession") # give a name to the elements of a vector
poker_start <- poker_vector[c("Monday", "Tuesday", "Wednesday")]
tail(vect1)
head(vect2)

is.vector(x, mode = "any")
is.vector(x)
! is.vector(df, mode = "list")

sum(x, na.rm=TRUE)
prod(x, na.rm=TRUE)
> sample(10)
> sample(x, 2)
> sample(x, replace = TRUE)
> sample(c("H","T"),10, replace = TRUE)

min(my_vector1) # min value in vector
max(bank$age) # max value in vector
range(vector_9) # min & max value in vector
range(eigen(C2, only.values = TRUE)$values)
which.min() # index of min value in vector
which.max() # index of max value in vector
x[which.min(x)] # min value in vector
lil <- hotdogs[which.min(hotdogs$calories), ]

sort(x, decreasing = FALSE, na.last = NA, ...)
sort(my_vector1)
> sort(x, decreasing=TRUE)
sort(x, partial = c(10, 15))
sort(c(10:3, 2:12), method = "quick", index.return = TRUE)
sort.int(x, partial = NULL, na.last = NA, decreasing = FALSE, method = c("auto", "shell", "quick", "radix"), index.return = FALSE)
tail(sort.int(x, partial=length(x) - 4), 5)
is.unsorted(x) 
is.unsorted(x, strictly = TRUE)
> order(x) # index of the sorted vector
> order(x, decreasing=TRUE)
intersect(1:10, 7:20)
(xu <- x[!duplicated(x)])
duplicated(iris)[140:143]
anyDuplicated(x)

strsplit(vector[6], " ")
as.numeric(strsplit(vec[4],"-")[[1]]

is.na(my_vector)
total <- sum(shopping_bill)
sum(is.na(bank))
sum(is.na(bank$salary))
sum(is.na(diamonds$price < 250))
summary(vect)
vect[order(vect)]
round(my.vector, digits=1)

which(x, arr.ind = FALSE, useNames = TRUE)
which(x==10)
which(is.na(bank$salary))
which(div.3, arr.ind = FALSE)
which.max(bank$age)
bank[which.max(bank$age), ]
length(which(bank$my_decision == "yes"))
length(which(bank$y == "yes" & bank$marital == "single"))
length(which(airquality$Temp>mean(airquality$Temp)))
sort(x,partial=n-1)[n-1] # n largest
tail(sort(a), 5) # 5th largest
sort(unique(c))[1:N]
top_2_movie <- bollywood[order(bollywood$Tcollection,decreasing=TRUE)[2],1]
ndx <- order(x)[1:N] # n smallest
x[order(x)[1:5]]
match(x, table, nomatch = NA_integer_, incomparables = NULL)

mean(x, trim = 0, na.rm = FALSE, ...)
mean(c(6, 7, 7, 7, 8, 8, 8, 9))
mean(A_votes)
mean(bank$age)
c(xm, mean(x, trim = 0.10))
mean(c(1:9, NA), trim=0.1, na.rm=TRUE)
mean(bank$age, na.rm = TRUE)
median(news$shares)
mode = names(sort(table(v), decreasing = TRUE))[1]
sd(bank$age)
sd(titanic_train$Age, na.rm=TRUE)

quantile(x, probs = seq(0, 1, 0.25), na.rm = FALSE, names = TRUE, type = 7, ...)
quantile(x <- rnorm(1001))
quantile(x, seq(0, 1, 0.25))
quantile(news$shares, seq(0.1, 0.9, 0.01))
quantile(x,  probs = c(0.1, 0.5, 1, 2, 5, 10, 50, NA)/100)
quantile(news$shares,  probs = 0.78)
news_without_outliers = news[!news$shares > quantile(news$shares,0.95),]
quantile(titanic_train$Age, probs=c(0.4, 0.6), na.rm = T)
var(x, y = NULL, na.rm = FALSE, use)
var(1:10)
var(1:5, 1:5)
cov(x, y = NULL, use = "everything", method = c("son", "kdall", "spearn"))
C1 <- cov(swiss)
cor(x, y = NULL, use = "everything", method = c("son", "kdall", "spearn"))
cor(1:10, 2:11)
round(cor(gold_silver$GoldPrice, gold_silver$SilverPrice), 2)
cor(x, y, use="pair")
cor(titanic_train$Age, titanic_train$Fare, use = "na.or.complete")
cov2cor(V)
symnum(clS <- cor(longley, method = "spearman"))
colnames(swM) <- abbreviate(colnames(swiss), min=6)

any(vect1)
all(vect2)


Arrays in R
Arrays are multi-dimensional generalisations of vectors.
Matrices are restricted to two dimensions while array can have any dimension.

array(data = NA, dim = length(data), dimnames = NULL)
array(1:3, c(2, 4))
integer_array_in_R = array(rbinom(24, size = 8, prob = 0.5), dim = c(2, 3, 4))
R_numeric_array = array(rweibull(24, shape = 1, scale = 1), dim = c(2, 3, 4))
array1 <- array(1:12, dim=c(2, 3, 2))
array(1:24, dim=c(4, 3, 2))

as.array(x, ...)
dim(as.array(letters))
is.array(x)
arrayInd(ind, .dim, .dimnames = NULL, useNames = FALSE)

outer(X, Y, FUN = "*", ...) # outer product
outer(y, x, "^")
outer(month.abb, 2015:2017, FUN = "paste")
X %o% Y
x %o% x %o% y[1:3]

aperm(a, perm, ...) # array permute
aperm(a, perm = NULL, resize = TRUE, keep.class = TRUE, ...)
xt <- aperm(x, c(2, 1, 3))
UCB <- aperm(UCBAdmissions, c(1, 2))

Related R Articles:  Lists in R      Lists in Python

No comments:

Post a Comment