## Podiplomski studij statistike, 2002/2003 ## V. Batagelj: Informacijska tehnologija v analizi podatkov ## Slucajna zaporedja v R-ju ## 17.jan 03 / 06.dec 02 ## # ------------------------------------------------------------------ ## enakomerna porazdelitev na [0,1) random <- function(){return(runif(1,0,1))} dice <- function(n=6){return(1+trunc(n*random()))} s <- NULL; for(i in 1:10000) s <- c(s,dice(10)) t <- table(s); t/sum(t) plot(t) # ------------------------------------------------------------------ ## geometrijska porazdelitev geometric <- function(p){ if (p>=1) return(1) if (p<=0) return(Inf) return(trunc(log(1-random())/log(1-p))+1) } # ------------------------------------------------------------------ ## tabelaricna porazdelitev # preprosta tabelaR <- function(t){ x <- random(); k <- 0; while (x > 0) {k <- k+1; x <- x - t[[1]][k]} return(t[[2]][k]) } p <- c(1,2,3,2,1); n <- c("mon","tue","wed","thu","fri") t <- list( p/sum(p), n) s <- NULL; for (i in 1:10) s <- c(s, tabelaR(t)) p <- c(1,2,3,2,1,6); n <- c(" . "," : ","...",": :",":.:",":::") t <- list( p/sum(p), n) s <- NULL; for (i in 1:5000) s <- c(s, tabelaR(t)) z <- table(s); z <- z/sum(z) -------------------------------------------------------------------- # s predpripravo - urejanje pripravi <- function(x,l){ t <- sort(x,method="quick",index.return=TRUE) q <- rev(t$x); p <- rev(t$ix) q <- q/sum(q); s <- l[p[1]] for (i in 2:length(x)) {q[i] <- q[i-1]+q[i]; s <- c(s,l[p[i]])} return(list(q,s)) } tabrnd <- function(t){ q <- t[[1]]; r <- random(); i <- 1 while (r>q[i]) i <- i+1 return(t[[2]][i]) } q <- c(1,2,3,2,1); n <- c("mon","tue","wed","thu","fri") t <- pripravi(q,n) d <- NULL; for(i in 1:10000) d <- c(d,tabrnd(t)) f <- rep(0,5); for (i in 1:5) f[i] <- length(which(d==n[i])) q/sum(q); f/sum(f) #-------------------------------- ## Poisson PoissonRnd <- function(lambda){ k <- 0; p <- exp(-lambda); x <- random()-p while (x > 0) { k <- k+1; p <- p*lambda/k; x <- x-p } return(k) } s <- NULL; for (i in 1:10) s <- c(s, PoissonRnd(0.3)) s <- NULL; for (i in 1:10000) s <- c(s, PoissonRnd(0.3)) table(s) z <- rpois(10000,0.3) table(z) # -------------------------------------------------------------------- # Cauchyeva porazdelitev cauchyRnd <- function(a=1,b=0){ return( tan(pi*(random()-0.5))/a + b ) } s <- NULL; for(i in 1:5000) s <- c(s,cauchyRnd()) plot(s) # ------------------------------------------------------------------ ## von Neumannov postopek vonNeumann <- function(a,b,G,g,...){ repeat { s <- a + random()*(b-a) if (G*random()<=g(s,...)) return(s) } } trik <- function(x){if (x>2) return(0); if (x<0) return(0); if (x<1) return(x) else return(2-x) } vonNeumann(-0.5,2.5,1,trik) # =================================== # normalna (Gaussova) porazdelitev # gaussRnd <- function(st=1,m=0,s=1){ if (st==0){ new <- TRUE; Gauss <<- list(m=m,s=s,n=new,r=0) } else {m <- Gauss$m; s <- Gauss$s; new <- Gauss$n } if (new) {p <- sqrt(-2*log(random())); q <- 2*pi*random() x <- p*sin(q); Gauss$r <<- p*cos(q) } else x <- Gauss$r Gauss$n <<- !new return(m+s*x) } s <- gaussRnd(0,180,15) for(i in 1:10000) s <- c(s,gaussRnd()) hist(s,nclass=50) # ------------------------------------------------------------------ ## nakljucna smer v prostoru # dir3D <- function(){ return(c(2*pi*random(),acos(1-2*random()))) } vector3D <- function(dir){ return( c( sin(dir[2])*cos(dir[1]), sin(dir[2])*sin(dir[1]), cos(dir[2]) )) } points3D <- function(n){ cat(file="sphere.net",c("*vertices ",n,"\n")) for (i in 1:n) { cat(file="sphere.net", i, i, vector3D(dir3D()), "\n", append=TRUE) } } # ------------------------------------------------------------------ ## k-razse"zni enotski vektor - smer direction <- function(k=3){ x <- rnorm(k) return( x/sqrt(sum(x^2)) ) } m <- NULL; for(i in 1:1000) m <- rbind(m,direction(5)) pairs(m) # k-razsezni elipsoid elipsoid <- function(T){ k <- dim(T)[1]; r <- random()^(1/k) x <- rnorm(k); y <- x*r/sqrt(sum(x^2)) return(t(t(T) %*% y)) } R <- c( 4.0000000, 1.1395235, 1.775876, 0.7753723, 1.1395235, 4.0000000, 1.065415, 0.7881692, 1.7758761, 1.0654147, 4.000000, 1.5841046, 0.7753723, 0.7881692, 1.584105, 4.0000000 ) dim(R) <- c(4,4); T <- chol(R) s <- NULL; for (i in 1:2000) s <- rbind(s,elipsoid(T)) pairs(s) R <- matrix(0,4,4); diag(R) <- c(1,2,3,4); T <- chol(R) # k-razsezna normalna porazdelitev multinormal <- function(T){ return(t(t(T) %*% rnorm(dim(T)[1]))) } data(longley); pairs(longley) R <- cor(longley); T <- chol(R) s <- NULL; for (i in 1:2000) s <- rbind(s,multinormal(T)) pairs(s) (C <- cor(s)) R # ------------------------------------------------------------------ ## Levy x <- c(0,0); s <- NULL for (i in 1:100) { x <- x + direction(2); s <- rbind(s,x) } plot(s); lines(s) x <- c(0,0); s <- NULL for (i in 1:1000) { x <- x + cauchyRnd(3)*direction(2); s <- rbind(s,x) } plot(s); lines(s) x <- c(0,0); s <- NULL for (i in 1:2000) { x <- x + cauchyRnd()*direction(4); s <- rbind(s,x) } plot(s); lines(s) x <- c(0,0); s <- NULL; for (i in 1:1000) { x <- x + cauchyRnd(0.1)*direction(2); s <- rbind(s,x)} plot(s); # ------------------------------------------------------------------ ## vzorci shrink <- function(s){ t <- s[1] for(i in 2:length(s)) if(s[i-1] != s[i]) t <- c(t,s[i]) return(t) } vzorecP <- function(n,m){ s <- NULL for(i in 1:m) s <- c(s,dice(n)) return(s) } t <- NULL; for(k in 1:100) t <- c(t,length(shrink(sort(vzorecP(100,10))))) vzorecV <- function(n,m){ s <- NULL for(i in 1:n) if (n*random() < m) s <- c(s,i) return(s) } # vzorec m razlicnih izmed n vzorecN <- function(n,m){ k <- 0; s <- NULL for(i in 0:n) if ((n-i)*random() < m-k) { s <- c(s,i); k <- k+1 if (k==m) return(s) } } #-------------------------------- ## Piran u <- NULL; k <- 0; n <- 2000 for(i in 1:n){ if (random()^2+random()^2 < 1) k <- k+1; u <- c(u,k)} p <- u/seq(n)*4 plot(p,pch=20) lines(c(-10,n),c(pi,pi),col="red") # =================================== # Monte Carlo - Sandokan # sandokan <- function(n,m=-1,r=100){ if (m<0) m <- n s <- NULL for ( i in 1:r ){ album <- rep(TRUE,n) razlicnih <- 0; slikic <-0 while (razlicnih < m) { k <- dice(n); slikic <- slikic + 1 if (album[k]) { album[k] <- FALSE; razlicnih <- razlicnih + 1 } } s <- c(s, slikic) } } sandokanT <- function(n,m=-1){ if (m<0) m <- n h <- 0 for (i in (n-m+1):n) h <- h + 1/i return(n*h) } slika1 <- function(t,s,...){ n <- length(s) sredina <- cumsum(s)/1:n odklon <- sqrt(cumsum(s^2)/1:n - sredina^2) plot(s,pch=16,cex=0.2,...) lines(1:n,sredina,col="red") lines(1:n,sredina+odklon,col="blue") lines(1:n,sredina-odklon,col="blue") abline(h=t,col="goldenrod") } slika2 <- function(s,breaks=50,...){ t<-NULL; n <- length(s) for(x in min(s):max(s)) {t <- c(t,5*n*dnorm(x,mean(s),sqrt(var(s))))} hist(s,breaks=breaks,...) lines(min(s):max(s),t) } s <- sandokan(400,350,2000) slika1(sandokanT(400,350),s) slika2(s) pdf("sandokan.pdf") slika1(sandokanT(400,350),s,ylim=c(820,835)) dev.off() # =================================== # Bootstrap # v <- c(2.3, 3.4, 2.5, 3.2, 2.7, 2.6, 3.1, 3.5, 2.9, 2.5 ) plot(sort(v),1:10/10,type="S") n <- 20; delta <- 0.01; s <- NULL for(k in 1:n){s <- c(s,mean(sample(v,length(v),replace=TRUE)))} s0 <- mean(v); s1 <- mean(s); bs <- s1 - s0 ses <- sqrt(sum((s-s1)^2)/(n-1)) ns <- (2*ses/delta)^2; pc <- 2*ses/sqrt(n) s0; s1; bs; ses; ns; pc # mean -> median