# simulator
# do not modify! (except final few lines)

from eps_greedy import eps_greedy
from ucb import ucb
from thompson import thompson
from multiprocessing import Pool
import matplotlib.pyplot as plt
import numpy as np


def simulate(algorithm, probs, horizon, num_sims=50):
  """simulates algorithm of class Algorithm
  for BernoulliBandit bandit, with horizon=horizon
  """
  
  def multiple_sims(num_sims=50):
    if algorithm == "Eps_Greedy" :
      single_sim = eps_greedy
    elif algorithm == "UCB" :
      single_sim = ucb
    elif algorithm == "Thompson" :
      single_sim = thompson

    with Pool(10) as pool:
      regrets = pool.starmap(single_sim,
        [(i, probs, horizon) for i in range(num_sims)])
    return regrets

  return np.mean(multiple_sims(num_sims))


def task(probs, num_sims=50):
  """generates the plots and regrets for task
  """
  horizons = [2**i for i in range(7, 17)]
  regrets = []
  for algorithm in ["Eps_Greedy", "UCB", "Thompson"] :
    regrets.append([])
    for horizon in horizons:
      regrets[-1].append(simulate(algorithm, probs, horizon, num_sims))

  print("Epsilon-Greedy", regrets[0])
  print("UCB", regrets[1])
  print("Thompson", regrets[2])

  plt.plot(horizons, regrets[0], color="r", label="Eps-Greedy")
  plt.plot(horizons, regrets[1], color="g", label="UCB")
  plt.plot(horizons, regrets[2], color="b", label="Thompson")

  plt.title("Regret vs Horizon")
  plt.ylabel("Regret")
  plt.xlabel("Horizon")

  plt.legend()
  
  plt.show()


if __name__ == '__main__':

    # Note - all the plots generated will be for the following bandit instance:
    # 20 arms with uniformly distributed means
    probs = [i/20 for i in range(20)]
    task(probs)
