- The source codes and necessary documentations are available at the repository: (https://github.com/mdtuhinsheikh/introR_ucsas2019).
- The workshop slides are available at the link.
- Prerequisites:
5+7 # 12 exp(5) # 148.4132 log(2) # 0.6931472
?function_name
, e.g.,?mean # Search mean function
help.search("text")
, e.g.,help.search("print")
<-
or =
or ->
. For example,x <- "Hello World" x # [1] "Hello World" x = 5 x # [1] 5 5 -> x x # [1] 5
c()
and []
is used for subsetting and indexing.x <- c(2, 5, 8) # [1] 2 5 8 x[2] # [1] 5 x[1:2] # [1] 2 5 x[1]**2 # [1] 4 exp(x) # [1] 7.389056 148.413159 2980.957987 x[1] %% 2 # [1] 0 x[1] == 2 # [1] TRUE x[1] < 2 # [1] FALSE x[1] != 2 # [1] FALSE
vector(), length(), names(), setNames(), rep(), rep.int(), rep_len(), seq(), seq.int(), seq_len(), seq.along(), %in%, match(), rev(), head(), tail(), paste(), paste0(), typeof(), is.character(), as.character(), is.numeric(), as.numeric(), is.logical(), as.logical(), ...
?rep
to get to know about this function and then use this function to print {1 2 3 1 2 3} and {1 1 2 2 3 3}.rep(c(1, 2, 3), times = 2) # 1 2 3 1 2 3 rep(c(1, 2, 3), each = 2) # 1 1 2 2 3 3
sort(), order(), rank(), quantile(), duplicated(), unique(), table(), split(), cut().
x <- c(3, 3, 5, 5, 10, 20) length(x) # 6 unique(x) # 5 3 10 20 sort(x) # 3 5 10 20 table(x) # 3 5 10 20 # 2 2 1 1
Data Type | Also know as | Example |
---|---|---|
Numeric | float | 42, 3.14, -19.2 |
Character | string or text | “a”, “block1”, “red”, “John” |
Logical | boolean | TRUE, FALSE |
[]
.x <- c(10.4, 5.6, 3.1, 6.4, 21.7) x[2] # Second element 5.6
[[]]
.x <- list(c(1, 2), c(5, 8, 9)) x[[1]] # [1] 1 2 x[[2]] # [1] 5 8 9
x <- array(1:12, dim = c(3,2,2)) x[ , , 1] ## [,1] [,2] ## [1,] 1 4 ## [2,] 2 5 ## [3,] 3 6
x <- matrix(1:6, nrow = 2) x
## [,1] [,2] [,3] ## [1,] 1 3 5 ## [2,] 2 4 6
A <- matrix(c(1, 2, 3, 4), ncol = 2, byrow = TRUE) B <- matrix(c(10, 20, 30, 40), ncol = 2, byrow = TRUE) dim(A) dim(B)
det(A) det(B)
A %*% B solve(A) solve(B)
function_name <- function(arguments) { # Some operation }
find_abs <- function(x) { if (x > 0) { return(x) } x * -1 } find_abs(-5) # 5
pow <- function(x, y) { result <- x^y return(result) } pow(2, 3) # 8
for(i in 1:5){ index <- i print(index) } i=1 while(i <= 5){ index <- i print(index) i = i + 1 }
Operation | Syntax |
---|---|
\(x \neq 5\) | x != 5 |
\(x = 5\) | x == 5 |
\(x < 5\) | x < 5 |
\(x \leq 5\) | x <= 5 |
\(x > 5\) | x > 5 |
\(x \geq 5\) | x >= 5 |
for(i in 1:5){ if(i != 2){ index <- i print(index) } }
A <- matrix(c(1, 2, 3, 4), ncol = 2, byrow = TRUE) B <- matrix(c(10, 20, 30, 40), ncol = 2, byrow = TRUE) dim(A) dim(B) if (dim(A)[2] == dim(B)[1]){ print("The multiplication is possible") } else { print("The multiplication is not possible") }
install.packages("package_name")
library("package_name")
install.packages("Lahman") library(Lahman)
data(package = "package_name")
data(package = "Lahman")
read.table(file = "location_of_file", header = TRUE)
write.table(dataName, file = "location_of_file")
read.csv(), read.csv2(), write.csv(), write.csv2(), etc.
data("dataName", package = "package_name")
data("Batting", package = "Lahman") # the data will be loaded if no error shows
str("Batting") #'data.frame': 105861 obs. of 22 variables: # $ playerID: chr "abercda01" "addybo01" "allisar01" "allisdo01" ... # $ yearID : int 1871 1871 1871 1871 1871 1871 1871 1871 1871 ... # $ stint : int 1 1 1 1 1 1 1 1 1 1 ... # $ teamID : Factor w/ 149 levels "ALT","ANA","ARI",..: 136 111 39 ... # $ lgID : Factor w/ 7 levels "AA","AL","FL",..: 4 4 4 4 4 4 4 4 ... # $ G : int 1 25 29 27 25 12 1 31 1 18 ... # $ AB : int 4 118 137 133 120 49 4 157 5 86 ... # $ R : int 0 30 28 28 29 9 0 66 1 13 ... # $ H : int 0 32 40 44 39 11 1 63 1 13 ... #...
head(Batting) # playerID yearID stint teamID lgID G AB R H X2B X3B HR RBI ... # 1 abercda01 1871 1 TRO NA 1 4 0 0 0 0 0 0 ... # 2 addybo01 1871 1 RC1 NA 25 118 30 32 6 0 0 13 ... # 3 allisar01 1871 1 CL1 NA 29 137 28 40 4 5 0 19 ... # 4 allisdo01 1871 1 WS3 NA 27 133 28 44 10 2 2 27 ... # 5 ansonca01 1871 1 RC1 NA 25 120 29 39 11 3 0 16 ... # 6 armstbo01 1871 1 FW1 NA 12 49 9 11 2 1 0 5 ...
Batting[, 1:4] # playerID yearID stint teamID # 1 abercda01 1871 1 TRO # 2 addybo01 1871 1 RC1 # 3 allisar01 1871 1 CL1 # 4 allisdo01 1871 1 WS3 # 5 ansonca01 1871 1 RC1 # 6 armstbo01 1871 1 FW1 # ...
Batting$playerID Batting[, c("playerID")] # Displayes all rows of playerID variable Batting[1:10, c("playerID", "yearID")] # Displayes 10 rows of playerID and yearID variables
Batting$CS_SO <- with(Batting, CS + SO) head(Batting[, c("CS", "SO", "CS_SO")]) # CS SO CS_SO # 1 0 0 0 # 2 1 0 1 # 3 1 5 6 # 4 1 2 3 # 5 2 1 3 # 6 1 1 2
Batting[Batting$yearID == "1871", ] # Displays data from 1871 only # Displays data for aardsda01 from 1871 Batting[Batting$yearID == "1871" & Batting$playerID == "aardsda01", ]
length(unique(Batting$playerID)) # [1] 19428 length(unique(Batting$teamID)) # [1] 149 length(unique(Batting$yearID)) # [1] 148
apply(Batting, 2, function(x) {length(unique(x))}) # playerID yearID stint teamID ... # 19428 148 5 149 ...
Batting_1872 <- Batting[Batting$yearID == "1872", ]
install.packages("dplyr") library(dplyr)
data("Salaries", package = "Lahman") head(Salaries) # yearID teamID lgID playerID salary # 1 1985 ATL NL barkele01 870000 # 2 1985 ATL NL bedrost01 550000 # 3 1985 ATL NL benedbr01 545000 # 4 1985 ATL NL campri01 633333 # 5 1985 ATL NL ceronri01 625000 # 6 1985 ATL NL chambch01 800000
salaries <- Salaries %>% select(playerID, yearID, teamID, salary) head(salaries) # playerID yearID teamID salary # 1 barkele01 1985 ATL 870000 # 2 bedrost01 1985 ATL 550000 # ...
batting <- left_join(Batting, salaries, by =c("playerID", "yearID", "teamID"))
mean(batting$salary, na.rm = TRUE)
batting <- batting %>% arrange(playerID, yearID, stint) # playerID yearID stint teamID lgID ... salary #1 aardsda01 2004 1 SFN NL ... 300000 #2 aardsda01 2006 1 CHN NL ... NA #3 aardsda01 2007 1 CHA AL ... 387500 #4 aardsda01 2008 1 BOS AL ... 403250 #5 aardsda01 2009 1 SEA AL ... 419000 #6 aardsda01 2010 1 SEA AL ... 2750000
eligibleHitters <- batting %>% filter(yearID >= 1900 & BB > 20)
goodPlayers <- eligibleHitters %>% arrange(desc(BB))