I have the following situation:
A variable moves +1 with prob=0.5 and -1 with prob=-0.5 ... if this Markov Chain starts at position=5
I tried to do this as follows (I created a tracking variable to see where the simulation gets stuck):
# the simulations can take a long time to run, I interrupted them
library(ggplot2)
library(gridExtra)
n_sims <- 100
times_to_end_0 <- numeric(n_sims)
times_to_end_0_after_10 <- numeric(n_sims)
paths_0 <- vector("list", n_sims)
paths_0_after_10 <- vector("list", n_sims)
for (i in 1:n_sims) {
print(paste("Running simulation", i, "for situation 1..."))
y <- 5
time <- 0
path_0 <- c(y)
while(y > 0) {
step <- sample(c(-1, 1), 1, prob = c(0.5, 0.5))
y <- y + step
path_0 <- c(path_0, y)
time <- time + 1
if (y == 0) {
times_to_end_0[i] <- time
paths_0[[i]] <- data.frame(time = 1:length(path_0), y = path_0, sim = i)
break
}
}
print(paste("Running simulation", i, "for situation 2..."))
y <- 5
time <- 0
reached_10 <- FALSE
path_0_after_10 <- c(y)
while(y > 0 || !reached_10) {
step <- sample(c(-1, 1), 1, prob = c(0.5, 0.5))
y <- y + step
path_0_after_10 <- c(path_0_after_10, y)
time <- time + 1
if (y == 10) {
reached_10 <- TRUE
}
if (y == 0 && reached_10) {
times_to_end_0_after_10[i] <- time
paths_0_after_10[[i]] <- data.frame(time = 1:length(path_0_after_10), y = path_0_after_10, sim = i)
break
}
}
}
df1 <- data.frame(time = times_to_end_0)
df2 <- data.frame(time = times_to_end_0_after_10[times_to_end_0_after_10 > 0])
mean1 <- mean(log(df1$time))
mean2 <- mean(log(df2$time))
p1 <- ggplot(df1, aes(x = log(time))) +
geom_density() +
geom_vline(aes(xintercept = exp(mean1)), color = "red", linetype = "dotted") +
labs(title = paste("Density of Times to Reach 0 - Mean Time:", round(exp(mean1), 2)), x = "Time", y = "Density") + theme_bw()
p2 <- ggplot(df2, aes(x = log(time))) +
geom_density() +
geom_vline(aes(xintercept = exp(mean2)), color = "red", linetype = "dotted") +
labs(title = paste("Density of Times to Reach 0 After Reaching 10 - Mean Time:", round(exp(mean2), 2)), x = "Time", y = "Density") + theme_bw()
plot_df_0 <- do.call(rbind, paths_0)
p3 <- ggplot(plot_df_0, aes(x = log(time), y = y, group = sim)) +
geom_line() +
labs(title = "Paths of First Simulation", x = "Time", y = "Y") +
theme_bw()
plot_df_0_after_10 <- do.call(rbind, paths_0_after_10)
p4 <- ggplot(plot_df_0_after_10, aes(x = log(time), y = y, group = sim)) +
geom_line() +
labs(title = "Paths of Second Simulation", x = "Time", y = "Y") +
theme_bw()
grid.arrange(p1, p2, p3, p4, ncol = 2)

My Question: Is there anything I can do to improve the efficiency of this simulation?
Thanks!
It is well known that the expected time to move from k to k + 1 in a symmetric random walk is infinite. We can still simulate right-censored walk times.
Below is a more direct approach to the simulation. Think about moving from 5 to 6. It will always take an odd number of steps. The probability of it taking 2*n - 1 steps is abs(choose(0.5, n)). Once it reaches 6, the number of steps to go from 6 to 7 is independent of the number of steps it took to go from 5 to 6.
To speed up the simulation, first pre-calculate the CDF of the number of steps to move 1 space. If we want to simulate how many steps it takes to move n spaces total, we can sum n random samples from the pre-calculated CDF. (The distribution is very heavy tailed, so we will let it run up to a maximum number of steps, at which the simulation will be terminated early and return Inf.)
Implementing the simulation as a function:
max.steps <- 1e8 + 1
p <- c(0, cumsum(abs(choose(0.5, 1:((max.steps + 1)/2)))), 1)
f <- function(n, path) {
m <- matrix(2*findInterval(runif(sum(abs(diff(path)))*n), p) - 1, n)
m[m > max.steps] <- Inf
rowSums(m)
}
Simulating situation 1:
# moving from 5 to 0
path <- c(5, 0)
f(10, path)
#> [1] 21 189 25 15 19 31 137 25 43 1199
# probability of stopping the simulation early (steps = Inf)
1 - p[length(p) - 1]^sum(abs(diff(path)))
#> [1] 0.0003988786
Simulating situation 2:
path <- c(5, 10, 0)
f(10, path)
#> [1] 34459 33 545 115 237 109 143 14287 1251545
#> [10] 613
# probability of stopping the simulation early (steps = Inf)
1 - p[length(p) - 1]^sum(abs(diff(path)))
#> [1] 0.001196159
The pre-calculations take some time, but they need only be computed once. And once they are complete, the simulation runs very fast:
# pre-calculation time
system.time(p <- c(0, cumsum(abs(choose(0.5, 1:((max.steps + 1)/2)))), 1))
#> user system elapsed
#> 18.02 0.47 18.50
# time 1M simulation runs
system.time(f(1e6, c(5, 10, 0)))
#> user system elapsed
#> 0.83 0.02 0.86
Calling sample() in each cycle of your for/while loop creates a lot of overhead
and causes your simulations to run slow.
The solution below runs sample() once per simulation with a size of 1e6 (1,000,000).
You could also go bigger, 1e7 seems reasonably fast on my machine and all cycles
have hit 0 for several runs with 1e7. At 1e8 things get slow and
at 1e9 is likely too much for most machines. The smaller you set the sample size
the more runs that don’t hit 0 you’ll get.
This function relies on vectorization to speed things up.
sim <- function(y = 5,
size = 1e6) {
steps <- sample(c(-1, 1),
size,
prob = c(0.5, 0.5),
replace = TRUE)
y_steps <- c(y, steps)
y_steps_cum <- cumsum(y_steps)
times <- which(y_steps_cum == 0)
times_to_0 <- times[1]
path <- if (!is.na(times_to_0))
y_steps_cum[1:times_to_0]
else
NA
touched_10 <- which(y_steps_cum == 10)[1]
times_to_0_after_10 <-
if (!is.na(touched_10))
times[times > touched_10][1]
else
NA
path_to_0_after_10 <-
if (!is.na(times_to_0_after_10))
y_steps_cum[1:times_to_0_after_10]
else
NA
list(
times_to_0 = times_to_0,
path = list(path),
touched_10 = touched_10,
times_to_0_after_10 = times_to_0_after_10,
path_to_0_after_10 = list(path_to_0_after_10)
)
}
A single simulation:
sim()
#> $times_to_0
#> [1] 314
#>
#> $path
#> $path[[1]]
#> [1] 5 4 3 2 3 4 3 4 3 2 3 4 5 6 7 8 9 8 9 10 9 10 9 8 7
#> [26] 6 7 6 5 4 5 6 7 8 9 10 9 10 11 12 11 12 11 12 13 12 11 12 13 12
#> [51] 13 12 11 12 11 10 11 10 11 10 11 12 13 12 13 14 13 12 11 10 9 8 7 6 7
#> [76] 6 7 8 9 8 7 8 9 8 7 8 7 6 7 8 9 8 9 8 7 6 5 6 5 4
#> [101] 3 4 5 6 5 6 5 6 7 8 7 8 7 6 7 6 7 8 9 8 7 8 9 8 9
#> [126] 8 7 6 5 6 7 8 9 8 7 8 7 8 9 10 9 10 9 8 9 8 9 8 9 10
#> [151] 11 12 11 12 11 10 11 10 11 12 11 12 13 12 13 14 15 16 15 14 15 14 13 12 13
#> [176] 14 15 14 15 16 17 18 19 18 19 20 19 18 17 16 15 14 13 12 11 10 11 10 9 10
#> [201] 9 10 11 12 11 10 11 10 9 10 11 10 11 12 13 12 13 14 15 14 15 16 17 16 15
#> [226] 14 15 14 13 12 11 10 9 8 9 8 7 8 9 10 11 10 11 12 11 10 11 10 9 10
#> [251] 9 10 11 12 11 12 11 10 9 10 11 12 11 12 13 12 11 12 13 12 13 14 13 12 13
#> [276] 12 11 12 11 10 9 8 7 6 7 8 7 6 7 6 5 6 5 4 5 4 3 2 3 4
#> [301] 5 4 5 4 5 4 5 4 3 2 1 2 1 0
#>
#>
#> $touched_10
#> [1] 20
#>
#> $times_to_0_after_10
#> [1] 314
#>
#> $path_to_0_after_10
#> $path_to_0_after_10[[1]]
#> [1] 5 4 3 2 3 4 3 4 3 2 3 4 5 6 7 8 9 8 9 10 9 10 9 8 7
#> [26] 6 7 6 5 4 5 6 7 8 9 10 9 10 11 12 11 12 11 12 13 12 11 12 13 12
#> [51] 13 12 11 12 11 10 11 10 11 10 11 12 13 12 13 14 13 12 11 10 9 8 7 6 7
#> [76] 6 7 8 9 8 7 8 9 8 7 8 7 6 7 8 9 8 9 8 7 6 5 6 5 4
#> [101] 3 4 5 6 5 6 5 6 7 8 7 8 7 6 7 6 7 8 9 8 7 8 9 8 9
#> [126] 8 7 6 5 6 7 8 9 8 7 8 7 8 9 10 9 10 9 8 9 8 9 8 9 10
#> [151] 11 12 11 12 11 10 11 10 11 12 11 12 13 12 13 14 15 16 15 14 15 14 13 12 13
#> [176] 14 15 14 15 16 17 18 19 18 19 20 19 18 17 16 15 14 13 12 11 10 11 10 9 10
#> [201] 9 10 11 12 11 10 11 10 9 10 11 10 11 12 13 12 13 14 15 14 15 16 17 16 15
#> [226] 14 15 14 13 12 11 10 9 8 9 8 7 8 9 10 11 10 11 12 11 10 11 10 9 10
#> [251] 9 10 11 12 11 12 11 10 9 10 11 12 11 12 13 12 11 12 13 12 13 14 13 12 13
#> [276] 12 11 12 11 10 9 8 7 6 7 8 7 6 7 6 5 6 5 4 5 4 3 2 3 4
#> [301] 5 4 5 4 5 4 5 4 3 2 1 2 1 0
100 simulations:
library(tidyverse)
tictoc::tic() # tic() and toc() are for benchmarking
res <-
lapply(rep(5, 100),
sim) |>
bind_rows(.id = "sim")
tictoc::toc()
#> 2.596 sec elapsed
res
#> # A tibble: 100 × 6
#> sim times_to_0 path touched_10 times_to_0_after_10 path_to_0_after_10
#> <chr> <int> <list> <int> <int> <list>
#> 1 1 3202 <dbl> 26 3202 <dbl [3,202]>
#> 2 2 1690 <dbl> 16 1690 <dbl [1,690]>
#> 3 3 32 <dbl [32]> 11586 12012 <dbl [12,012]>
#> 4 4 386 <dbl> 54 386 <dbl [386]>
#> 5 5 280 <dbl> 20 280 <dbl [280]>
#> 6 6 20 <dbl [20]> 272 2890 <dbl [2,890]>
#> 7 7 3520 <dbl> 20 3520 <dbl [3,520]>
#> 8 8 160 <dbl> 8 160 <dbl [160]>
#> 9 9 141814 <dbl> 8 141814 <dbl [141,814]>
#> 10 10 1474 <dbl> 10 1474 <dbl [1,474]>
#> # ℹ 90 more rows
library(ggplot2)
library(gridExtra)
#>
#> Attaching package: 'gridExtra'
#> The following object is masked from 'package:dplyr':
#>
#> combine
mean1 <- mean(log(res$times_to_0), na.rm = TRUE)
mean2 <- mean(log(res$times_to_0_after_10), na.rm = TRUE)
p1 <-
ggplot(res, aes(x = log(times_to_0))) +
geom_density() +
geom_vline(aes(xintercept = exp(mean1)),
color = "red",
linetype = "dotted") +
labs(
title = paste("Density of Times to Reach 0 - Mean Time:", round(exp(mean1), 2)),
x = "Time",
y = "Density"
) + theme_bw()
p2 <-
ggplot(res, aes(x = log(times_to_0_after_10))) +
geom_density() +
geom_vline(aes(xintercept = exp(mean2)),
color = "red",
linetype = "dotted") +
labs(
title = paste(
"Density of Times to Reach 0 After Reaching 10 - Mean Time:",
round(exp(mean2), 2)
),
x = "Time",
y = "Density"
) + theme_bw()
p3 <-
res |>
select(sim, path) |>
unnest(path) |>
mutate(time = 1:n(), .by = sim) |>
ggplot(aes(
x = log(time),
y = path,
group = sim
)) +
geom_line(linewidth = .05) +
labs(title = "Paths of First Simulation", x = "Time", y = "Y") +
theme_bw()
p4 <-
res |>
select(sim, path_to_0_after_10) |>
unnest(path_to_0_after_10) |>
mutate(time = 1:n(), .by = sim) |>
ggplot(aes(
x = log(time),
y = path_to_0_after_10,
group = sim
)) +
geom_line(linewidth = .05) +
labs(title = "Paths of Second Simulation", x = "Time", y = "Y") +
theme_bw()
grid.arrange(p1, p2, p3, p4, ncol = 2)
#> Warning: Removed 1 rows containing non-finite values (`stat_density()`).
#> Warning: Removed 3 rows containing non-finite values (`stat_density()`).
#> Warning: Removed 1 row containing missing values (`geom_line()`).
#> Warning: Removed 3 rows containing missing values (`geom_line()`).

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With