##################################################
# Name: StochasticVariables
# Author: F.Nakai
# Version: 2018.12.05
# Description: This is a test code of R seminar
##################################################

#excersise 1
###########  please type the code below. #############


#example of function
YESNO <- function(a){       # assign the function to "YESNO"
  if(a > 5){           # if the input is larger than 5, return "Yes"
    return("Yes")
  }
  else if(a <= 5){      # if the input is smaller than 5, return "No"
    return("No")
  }
}

#See the result of function YESNO
YESNO(10)
YESNO(8)
YESNO(6)
YESNO(4)
YESNO(2)

#excersise 2
###########  please type the code below. ###########



###########  See the result of function  ###########
###########  please type the code below. ###########




#equipped functions
#Seeing what the function is
?mean

#convenient functions
#Generating vector
Only12 <- c(1,2,1,2,1,2,1) 
Only12
VYesNo <- c("yes","No","yes","yes","No") 

#Checking length of the vector
length(Only12)
length(VYesNo)

#Generating cross-tabulation table of the vector
ftable(Only12)
ftable(VYesNo)

#Generating regular vector
V1_100 <- c(1:100)
V1_100_10 <- seq(1, 100, by = 10)


#Extracting data ex)check the 3rd element of the vector
V1_100_10[3]

#Conditional expression ex) show elements whose value is smaller than 30
V1_100_10[V1_100_10 < 30]

#Returns the index of TRUE of the logical value object
which(VYesNo=="yes")

#excersise 3
###########  please type the code below. ###########





#excersise 4
###########  please type the code below. ###########




#see results
###########  please type the code below. ###########




#See the probability of binomial distribution
x4 <- dbinom(4, size=12, prob=0.2) 
x4

x0_4 <- dbinom(0:4, size=12, prob=0.2) 
x0_4
sum(x0_4)

###############################
#binomial distributions
###############################
#generating stochastic variables
rbinom(100, 50, 0.2)
rbinom(5, 50, 0.2)
rbinom(n=100, size=50, prob=0.2)


#Probability density function
dbinom(4, 12, prob=0.2)
pbinom(4, 12, prob=0.2)
qbinom(0.5, 12, prob=0.2)
k <- rbinom(1000, 100, prob=0.2)
hist(k)

#normal distribution
###########  please type the code below. ###########




#poisson distribution
###########  please type the code below. ###########



#graphics
x <- rnorm(100)
plot(x, main="Simple Time Series")

#binomial distribution
#drawing PDF
gdbinom <- dbinom(0:50, 12, prob=0.2)
plot(gdbinom,type="o")

#layering graphs
#B(1,0.2)
gdbinom <- dbinom(0:20, 1, prob=0.2)
plot(gdbinom,type="b",xlim = c(0,25),ylim = c(0,0.9),col=1)
par(new=T)

#B(5,0.2)
gdbinom <- dbinom(0:20, 5, prob=0.2)
plot(gdbinom,type="b",xlim = c(0,25),ylim = c(0,0.9),col=2)
par(new=T)

#B(10,0.2)
gdbinom <- dbinom(0:20, 10, prob=0.2)
plot(gdbinom,type="b",xlim = c(0,25),ylim = c(0,0.9),col=3)
par(new=T)

#B(15,0.2)
gdbinom <- dbinom(0:20, 15, prob=0.2)
plot(gdbinom,type="b",xlim = c(0,25),ylim = c(0,0.9),col=4)
par(new=T)

#advanced drawing techniques by par(new=T) and for()
for(i in 1:100){
  gdbinom <- dbinom(0:20, i, prob=0.2)
  plot(gdbinom,type="b",xlim = c(0,25),ylim = c(0,0.9),col=i)
  par(new=T)
}

#drawing PDF

#B(15,0.1)
gdbinom <- dbinom(0:50, 12, prob=0.1)
plot(gdbinom,type="b",xlim = c(0,25),ylim = c(0,0.9),col=1)
par(new=T)

#B(15,0.3)
gdbinom <- dbinom(0:50, 12, prob=0.3)
plot(gdbinom,type="b",xlim = c(0,25),ylim = c(0,0.9),col=2)
par(new=T)

#B(15,0.5)
gdbinom <- dbinom(0:50, 12, prob=0.5)
plot(gdbinom,type="b",xlim = c(0,25),ylim = c(0,0.9),col=3)
par(new=T)

#B(15,0.7)
gdbinom <- dbinom(0:50, 12, prob=0.7)
plot(gdbinom,type="b",xlim = c(0,25),ylim = c(0,0.9),col=4)
par(new=T)

#B(15,0.9)
gdbinom <- dbinom(0:50, 12, prob=0.9)
plot(gdbinom,type="b",xlim = c(0,25),ylim = c(0,0.9),col=5)
par(new=T)

#advanced drawing techniques by par(new=T) and for()
v <- seq(0,0.9,0.1)
for(i in v){
  gdbinom <- dbinom(0:20, 12, prob=i)
  plot(gdbinom,type="b",xlim = c(0,15),ylim = c(0,0.6),col=i*10)
  par(new=T)
}

#sampling from B(12,0.2)
gbinom <- rbinom(1000, 12, prob=0.2)
hist(gbinom)

###############################
#normal distribution
###############################
#drawing PDF
gdnorm <- dnorm(0:150, mean=72, sd=15.2)
plot(gdnorm,type="o")


#advanced drawing techniques by par(new=T) and for()
#change mean
a <- 0
v <- seq(50,80,5)
for(i in v){
  gdnorm <- dnorm(0:150, mean=i, sd=15.2)
  plot(gdnorm, type="l",xlim = c(0,150),ylim = c(0,0.03),col=a)
  par(new=T)
  a <- a + 1
}

#change sd
a <- 0
for(i in 1:20){
  gdnorm <- dnorm(0:150, mean=72, sd=i)
  plot(gdnorm, type="l",xlim = c(0,150),ylim = c(0,0.5),col=a)
  par(new=T)
  a <- a + 1
}

#sampling from N(72,15.2)
grnorm <- rnorm(1000, mean=72, sd=15.2)
hist(gbinom)


###############################
#poisson distribution
###############################
#drawing PDF
gpois <- dpois(0:50, lambda=12) 
plot(gpois,type="o")

#advanced drawing techniques by par(new=T) and for()
#change mean

for(i in 1:20){
  gpois <- dpois(0:50, lambda=i) 
  plot(gpois, type="b",xlim = c(0,60),ylim = c(0,0.5),col=i)
  par(new=T)
}

#sampling from N(72,15.2)
grpois <- rpois(100, lambda=12) 
hist(grpois)

# ------------------------------------------------
# データフレーム・行列の生成と操作：データの前処理編
# Rを使う多くの場面では、実際のデータを使用することになります。
# ------------------------------------------------

#Rには組み込みデータが用意されている。
#see what kind of data is available
data()

#we use iris data set today
#irisのデータ型を確認
class(iris)

#格納されているデータを確認
str(iris)

#view data
iris$Sepal.Length
iris$Sepal.Width
iris$Petal.Length
iris$Petal.Width
iris$Species

length(iris$Sepal.Length)

#plot and see their relationships intuitivelly
plot(iris)

#Summary of statistics
summary(iris)
boxplot(iris)


# 名古屋市の人口データを扱う
#データをデータフレームオブジェクトに付値
Nagoya_ku <- read.csv()

#データを様々な視点から見てみる
#データ型
class(Nagoya_ku)

#格納されているデータを確認
str(Nagoya_ku)

#全データ表示
Nagoya_ku

#昭和区のデータだけと取り出す。
#"Nagoya_ku"の"地域名"という列が"昭和区"に一致する要素を行番号に持つようなデータを抽出し、新たなオブジェクト"Showa"に付値する。
Showa <- Nagoya_ku[Nagoya_ku$地域名=="昭和区",]

#緑区のデータだけと取り出す。
#"Nagoya_ku"の"地域名"という列が"緑区"に一致する要素を行番号に持つようなデータを抽出し、新たなオブジェクト"Midori"に付値する。
Midori <- Nagoya_ku[Nagoya_ku$地域名=="緑区",]

#北区のデータだけと取り出す。
#"Nagoya_ku"の"地域名"という列が"北区"に一致する要素を行番号に持つようなデータを抽出し、新たなオブジェクト"Kita"に付値する。
Kita <- Nagoya_ku[Nagoya_ku$地域名=="北区",]

#Showaのクラスはデータフレームである。以下でチェックする。
class(Showa)

# ------------------------------------------------
# データフレーム・行列の生成と操作：可視化
# 関数plot()を使って、散布図や折れ線グラフを描くことができる。
# パッケージ"ggplot"を使えば、非常に美しい図を描くこともできる。
# ggplotについて（参考）：http://r4ds.had.co.nz/data-visualisation.html
# ------------------------------------------------

#昭和区
Showa_plot <- plot(Showa$西暦, Showa$X1世帯あたり人員_人by1世帯, #NagoyaPopulationの中の"density"（人口密度）の列のみをプロット
                   main = "昭和区の1世帯当たりの人員の推移（1998年～2018年）",  #グラフ上部のタイトルを指定
                   ylab="1世帯当たりの人員（人／1世帯）", #y軸の軸名を表示
                   xlab="年" #y軸の軸名を表示
)

#緑区
Midori_plot <- plot(Midori$西暦, Midori$X1世帯あたり人員_人by1世帯, #NagoyaPopulationの中の"density"（人口密度）の列のみをプロット
                    main = "緑区の1世帯当たりの人員の推移（1998年～2018年）", 
                    ylab="1世帯当たりの人員（人／1世帯）", 
                    xlab="年",
                    col="red"
)

#北区
Kita_plot <- plot(Kita$西暦, Kita$X1世帯あたり人員_人by1世帯, #NagoyaPopulationの中の"density"（人口密度）の列のみをプロット
                  main = "北区の1世帯当たりの人員の推移（1998年～2018年）", 
                  ylab="1世帯当たりの人員（人／1世帯）", 
                  xlab="年",
                  col="blue"
)

#重ね書きをすることで、視覚的に推移を比較する。
#xlim, ylimというパラメータで軸の範囲を揃える（たとえば、ylim=c(0,5)）ことで、スケールがばらばらになるのを防ぐ。
#typeというパラメータは、線分やプロットの形式を指定する。

#昭和区
Showa_plot <- plot(Showa$西暦, Showa$X1世帯あたり人員_人by1世帯, #NagoyaPopulationの中の"density"（人口密度）の列のみをプロット
                   ylab="", 
                   xlab="",
                   xlim=c(1920,2017),
                   ylim=c(0,5),
                   xaxt="n",
                   yaxt="n",
                   col="black",
                   type = "b"
)

par(new=T)

#緑区
Midori_plot <- plot(Midori$西暦, Midori$X1世帯あたり人員_人by1世帯, #NagoyaPopulationの中の"density"（人口密度）の列のみをプロット
                    ylab="", 
                    xlab="",
                    xlim=c(1920,2017),
                    ylim=c(0,5),
                    xaxt="n",
                    yaxt="n",
                    col="red",
                    type = "b"
)

par(new=T)

#北区
Kita_plot <- plot(Kita$西暦, Kita$X1世帯あたり人員_人by1世帯, #NagoyaPopulationの中の"density"（人口密度）の列のみをプロット
                  main = "昭和区・緑区・北区の1世帯当たりの人員の推移（1998年～2018年）", 
                  ylab="1世帯当たりの人員（人／1世帯）", 
                  xlab="年",
                  xlim=c(1920,2017),
                  ylim=c(0,5),
                  col="blue",
                  type = "b"
)