10 Days of Statistics my solution

Environment: Python2

Day 0: Mean, Median, and Mode

# Enter your code here. Read input from STDIN. Print output to STDOUT
wtf=raw_input()
wtf2=raw_input()

print wtf,type(wtf)
print wtf2, type(wtf2)

# Your Output (stdout)

10 <type 'str'>
64630 11735 14216 99233 14470 4978 73429 38120 51135 67060 <type 'str'>

my solution 

def quartile_1(l):
    return sorted(l)[int(len(l) * .25)]

def median(l):
    return sorted(l)[len(l)/2]

def quartile_3(l):
    return sorted(l)[int(len(l) * .75)]

 

li=wtf2.split()
ll=[float(i) for i in li]


def mean(x):
    return sum(x)/len(x)

def median(x):
    x.sort()
    while len(x)>2:
        x=x[1:-1]
    return sum(x)/len(x)
    
def mode(x):
 x=[int(i) for i in li]
 dic={}
 for i in x:
 dic[i]=0
 for i in x:
 dic[i]+=1
 m=max(dic.items(), key=lambda x: x[1])[1]
 
 c=[]
 for i in dic.items():
 if i[1]==m:
 c.append(i[0])
 return min(c)
 
print mean(ll)
print median(ll)
print mode(ll)


Environment: R

# Enter your code here. Read input from STDIN. Print output to STDOUT

x <- suppressWarnings(readLines(file("stdin")))
x <- strsplit(x,' ')
x <- lapply(x,as.numeric)[[2]]
#print(x)
print(mean(x))
print(median(x))

getmode <- function(v) {
   uniqv <- unique(v)
   uniqv[which.max(tabulate(match(v, uniqv)))]
}
print(min(x))

Day 0: Weighted Mean

# Enter your code here. Read input from STDIN. Print output to STDOUT
c=raw_input()
a=raw_input()
b=raw_input()

# Input (stdin)
# 5
# 10 40 30 50 20
# 1 2 3 4 5


a=[float(i) for i in a.split(' ')]
b=[float(i) for i in b.split(' ')]

up = [i*j for i,j in zip(a,b)]

print round(sum(up)/sum(b),1)

Day 1: Quartiles

Sample Input

9
3 7 8 5 12 14 21 13 18

Sample Output

6
12
16

Explanation

Lower half (L): 3, 5, 7, 8

Upper half (U): 13, 14, 18, 21

def quartile_1(l):
    return sorted(l)[int(len(l) * .25)]

def median(l):
    return sorted(l)[len(l)/2]

def quartile_3(l):
    return sorted(l)[int(len(l) * .75)]

 

# Enter your code here. Read input from STDIN. Print output to STDOUT

y=raw_input()
x=raw_input()

y=int(y)
x=[int(i) for i in x.split(' ')]
x.sort()

if y%2==0:
    print median(x[:x.index(median(x)[1])+1])[0]
else:
    
    print median(x[:x.index(median(x)[1])])[0]
 
print(median(x)[0])
    
if y%2==0:
     print median(x[x.index(median(x)[2]):])[0]
else:
     print median(x[x.index(median(x)[2])+1:])[0]

if y%2==0:
    print median(x[:x.index(median(x)[1])+1])[0]
else:
    
    print median(x[:x.index(median(x)[1])])[0]
 
print(median(x)[0])
    
if y%2==0:
     print median(x[x.index(median(x)[2]):])[0]
else:
     print median(x[x.index(median(x)[2])+1:])[0]


Or

# Enter your code here. Read input from STDIN. Print output to STDOUT


y=raw_input()
x=raw_input()

y=float(y)
x=[int(i) for i in x.split(' ')]
x.sort()

if y%2==1:
    m=round(y/2)
    l=(m-1)/2
    r=m+l
    
    m,l,r=int(m),int(l),int(r)
    
    print (x[l-1]+x[l])/2
    print x[m-1]
    print (x[r-1]+x[r])/2
    
if y%2==0:
    m=y/2
    l=round(m/2)
    r=m+l
    
    m,l,r=int(m),int(l),int(r)

    if m%2==0:
        print (x[l-1]+x[l])/2
        print (x[m-1]+x[m])/2
        print (x[r-1]+x[r])/2
    else:
        print x[l-1]
        print (x[m-1]+x[m])/2
        print x[r-1]        

Day 1: Standard Deviation

Sample Input

5
10 40 30 50 20

Sample Output

14.1
# Enter your code here. Read input from STDIN. Print output to STDOUT
y=raw_input()
y=int(y)
x=raw_input()


x=[int(i) for i in x.split(' ')]
x.sort()

m=sum(x)/float(len(x))
square=map(lambda x:(x-m)**2, x)
mu=(sum(square)/len(x))**0.5

print mu

 

R odds and ends R basics

DescriptionRCommentsPython
exam typeclass()type()
vectorc(...)one-dimension arrays
same type
name vectornames(vect)<-c(...)
slicing vectorvect[3]
vect[c(3,5,6)]
starting from 1 compares to 0 in Python
2:5!ERROR! unexpected operator '='includes 5
vect[3:5]!ERROR! illegal character '['
use names as indexvec[c('name1','name2',...)]
calculate averagemean()in Python, have to import other librariesnp.mean()
vectors comparison c(2,3,4,5)>3in Python, have to in numpy, pandas
logical selection vect[c(...)>n]
vect[vect2(logical)]
in Python, pandas is common
matrix matrix()
matrix(1:9, byrow = TRUE, nrow = 3)
two-dimensional
same data type
np.matrix()
Naming a matrixrownames(my_matrix) <- row_names_vector

colnames(my_matrix) <- col_names_vector
dimnames =
list(rowname, columnname)
Sum of values of each rowrowSums(some_matrix)ndarray.sum(axis=1)
df.sum(axis=1)
add column(s) to a matrixbigger<- cbind(matrix1, matrix2, ...)pd.concat([df1,df2],axis=1)
Adding a row(s) to a matrixrbind(matrix1, matrix2, ...)pd.concat([df1,df2],axis=0)
df1.append(df2)
Sum of values of each columnndarray.sum(axis=0)
df.sum(axis=0)
slicing Matrixmatrix[row,col]
my_matrix[1,2]
my_matrix[1:3,2:4]
my_matrix[ ,1]
my_matrix[2, ]
factorsfactor()categorical
Convert vector to factormy_factor<-(vector,
order/ non-ordertemp_vector <- c("High", "Low", "High","Low", "Medium")

factor_temp_vector <- factor(temp_vector, order = TRUE, levels = c("Low", "Medium", "High"))
nominal categorical variable

ordinal categorical variable.
s = pd.Series(["a","b","c","a"], dtype="category")

raw_cat = pd.Categorical(["a","b","c","a"], categories=["b","c","d"], .... ordered=False)
Factor levelslevels()
levels(factor_vector) <- c("name1", "name2",...)
summary()summary(my_var)df.describe()
Series.value_counts()
orderedfactor_speed_vector <-factor(speed_vector,ordered=TRUE,levels=c('slow','fast','insane'))
ordered factor can be compared
data frame head(df)
tail(df)
each column must be same data type
examine structure of a dataframestr(df)
create data framedata.frame(vectors)
slicing df[rows,columns]
df[row2,] entire row2
df[,column3] entire column3
use name slicingdf[2:5, 'name']
df['name', ]
df[ ,'name']
subset()
create
subset(planets_df, diameter<1)
==
planets_df[planets_df[,'diameter']<1,]
sortingorder()
returns ranked index not values
values: a[order(a)]
sorting dfindexes=order(df$column3)

df[indexes, ]
listmy_list <- list(comp1, comp2 ...)
Creating a named listmy_list <- list(name1 = your_comp1,
name2 = your_comp2)
same as abovemy_list <- list(your_comp1, your_comp2)
names(my_list) <- c("name1", "name2")
selecting elements from a listshining_list[["reviews"]]
==
shining_list$reviews
list[[2]][1]
add data to listext_list <- c(my_list , my_val)
comparison& and
| or
! not
double sign only compares the first element
&&
||
if syntax in Rif (condition)
{do sth}
else if (condition)
{do sth}
else
{do sth}
read dataread.table
read.delim
read.csv
read.csv2
hotdogs2 <- read.delim("hotdogs.txt", header = FALSE, col.names = c("type", "calories", "sodium"), colClasses = c("factor", "NULL", "numeric"))
check environmentenvironment(func)
specify func without a namefunction(x){x+1}(2)

=> 3
mean()mean(c(1:9, NA),trim=0.1,na.rm=TRUE)trim -> remove outliers
environment> f<-function () x
> x<-99
> f()
[1] 99
exists()a<-5
exists("a")
TRUE
vector propertiestypeoff()
length()
nun value in RNULL (absent of entire vector)
NA (absent of one value in vector)
check nunis.na()
sequenceseq(1,10)
1:10
merge vectorc(vector1, vector2, singlevalue, ...)
paste()
paste0()
paste() sep=" "
paste0 sep=""
string.join(list)
paste0("year_", 1:5)[1] "year_1" "year_2" "year_3" "year_4" "year_5"
plottinghist(one_dim_data)
hist(df$column)
boxplot(multi_dim_data)
boxplot(df)