import io
import numpy

# S = {0, 1}
# A = {0, 1}
T = []
R = [] 
for s in range(0, 2):
    Ts = []
    Rs = [] 
    for a in range(0, 2):
        Ta = []
        Ra = [] 
        for sPrime in range(0, 2):
            TsasPrime = 0
            Ta.append(TsasPrime)
            RsasPrime = 0
            Ra.append(RsasPrime)
            
        Ts.append(Ta)
        Rs.append(Ra)

    T.append(Ts)
    R.append(Rs)

#print(T)
#print(R)

T[0][0][0] = 0.25
T[0][0][1] = 1 - T[0][0][0]
T[0][1][0] = 1
T[0][1][1] = 1 - T[0][1][0]
T[1][0][0] = 0.5
T[1][0][1] = 1 - T[1][0][0]
T[1][1][0] = 1
T[1][1][1] = 1 - T[1][1][0]

R[0][0][0] = 1
R[0][0][1] = 0
R[0][1][0] = 2
R[0][1][1] = 0#Dummy
R[1][0][0] = -1
R[1][0][1] = 10
R[1][1][0] = 0
R[1][1][1] = 0

gamma = 0.5

def bellmanOptimalityOperator(V):
    #Write this in class
    Vnext = []
    Vnext.append(0)
    Vnext.append(0)

    for s in range(0, 2):
        q0 = 0
        # First action
        for sPrime in range(0, 2):
            q0 += T[s][0][sPrime] * (R[s][0][sPrime] + gamma * V[sPrime])
 
        q1 = 0
        for sPrime in range(0, 2):
            q1 += T[s][1][sPrime] * (R[s][1][sPrime] + gamma * V[sPrime])

        Vnext[s] = max(q0, q1)
        
    return Vnext

V = []
#V.append(10345.3)
#V.append(21.346576)
V.append(0)
V.append(0)


for i in range(0, 1000):
    V = bellmanOptimalityOperator(V)

print(V)
