# 10 Days of Statistics my solution

my solution

## Day 1: Quartiles

Sample Input

Sample Output

Explanation

Lower half (L): 3, 5, 7, 8

Upper half (U): 13, 14, 18, 21

Sample Input

Sample Output

# R odds and ends R basics

exam typeclass()type()
vectorc(...)one-dimension arrays
same type
name vectornames(vect)<-c(...)
slicing vectorvect[3]
vect[c(3,5,6)]
starting from 1 compares to 0 in Python
2:5!ERROR! unexpected operator '='includes 5
vect[3:5]!ERROR! illegal character '['
use names as indexvec[c('name1','name2',...)]
calculate averagemean()in Python, have to import other librariesnp.mean()
vectors comparison c(2,3,4,5)>3in Python, have to in numpy, pandas
logical selection vect[c(...)>n]
vect[vect2(logical)]
in Python, pandas is common
matrix matrix()
matrix(1:9, byrow = TRUE, nrow = 3)
two-dimensional
same data type
np.matrix()
Naming a matrixrownames(my_matrix) <- row_names_vector

colnames(my_matrix) <- col_names_vector
dimnames =
list(rowname, columnname)
Sum of values of each rowrowSums(some_matrix)ndarray.sum(axis=1)
df.sum(axis=1)
add column(s) to a matrixbigger<- cbind(matrix1, matrix2, ...)pd.concat([df1,df2],axis=1)
Adding a row(s) to a matrixrbind(matrix1, matrix2, ...)pd.concat([df1,df2],axis=0)
df1.append(df2)
Sum of values of each columnndarray.sum(axis=0)
df.sum(axis=0)
slicing Matrixmatrix[row,col]
my_matrix[1,2]
my_matrix[1:3,2:4]
my_matrix[ ,1]
my_matrix[2, ]
factorsfactor()categorical
Convert vector to factormy_factor<-(vector,
order/ non-ordertemp_vector <- c("High", "Low", "High","Low", "Medium")

factor_temp_vector <- factor(temp_vector, order = TRUE, levels = c("Low", "Medium", "High"))
nominal categorical variable

ordinal categorical variable.
s = pd.Series(["a","b","c","a"], dtype="category")

raw_cat = pd.Categorical(["a","b","c","a"], categories=["b","c","d"], .... ordered=False)
Factor levelslevels()
levels(factor_vector) <- c("name1", "name2",...)
summary()summary(my_var)df.describe()
Series.value_counts()
orderedfactor_speed_vector <-factor(speed_vector,ordered=TRUE,levels=c('slow','fast','insane'))
ordered factor can be compared
tail(df)
each column must be same data type
examine structure of a dataframestr(df)
create data framedata.frame(vectors)
slicing df[rows,columns]
df[row2,] entire row2
df[,column3] entire column3
use name slicingdf[2:5, 'name']
df['name', ]
df[ ,'name']
subset()
create
subset(planets_df, diameter<1)
==
planets_df[planets_df[,'diameter']<1,]
sortingorder()
returns ranked index not values
values: a[order(a)]
sorting dfindexes=order(df\$column3)

df[indexes, ]
listmy_list <- list(comp1, comp2 ...)
Creating a named listmy_list <- list(name1 = your_comp1,
name2 = your_comp2)
same as abovemy_list <- list(your_comp1, your_comp2)
names(my_list) <- c("name1", "name2")
selecting elements from a listshining_list[["reviews"]]
==
shining_list\$reviews
list[[2]][1]
add data to listext_list <- c(my_list , my_val)
comparison& and
| or
! not
double sign only compares the first element
&&
||
if syntax in Rif (condition)
{do sth}
else if (condition)
{do sth}
else
{do sth}
hotdogs2 <- read.delim("hotdogs.txt", header = FALSE, col.names = c("type", "calories", "sodium"), colClasses = c("factor", "NULL", "numeric"))
check environmentenvironment(func)
specify func without a namefunction(x){x+1}(2)

=> 3
mean()mean(c(1:9, NA),trim=0.1,na.rm=TRUE)trim -> remove outliers
environment> f<-function () x
> x<-99
> f()
[1] 99
exists()a<-5
exists("a")
TRUE
vector propertiestypeoff()
length()
nun value in RNULL (absent of entire vector)
NA (absent of one value in vector)
check nunis.na()
sequenceseq(1,10)
1:10
merge vectorc(vector1, vector2, singlevalue, ...)
paste()
paste0()
paste() sep=" "
paste0 sep=""
string.join(list)
paste0("year_", 1:5)[1] "year_1" "year_2" "year_3" "year_4" "year_5"
plottinghist(one_dim_data)
hist(df\$column)
boxplot(multi_dim_data)
boxplot(df)