import math import numpy as np import matplotlib.pyplot as plt # settings: p = 0.16 N = 10 file = open("cv5/upf_cv5.dat","r") def P_binomial(k,n,p): """function to return binomial probablity""" P = 1.0 for i in range(1,n+1): # avoid unnecessary factorial calculations if i <= k and i <= n-k: P = P / i elif i > k and i > n-k: P = P * i return P * pow(p,k) * pow(1-p,n-k) def P_poisson(k,nu): """function to return Poissonian probablity""" P = math.exp(-nu) for i in range(1,k+1): # cancel divergencies (avoid overflow) P *= nu / i return P n = 0 # counter of samples i = 0 # counter of lines within a sample k = 0 # number of CZ010 (Prague) in the N lines histo = np.zeros(N) # array (0,0,...0) = histogram for line in file: if line[0] == "#": continue columns = line.split() if i == N: histo[k] = histo[k] + 1 # increment k-th bin of the histogram i = 0 k = 0 n = n + 1 if len(columns) > 2 and columns[3] == "CZ010": # if the region is Prague k = k + 1 i = i + 1 nExp = np.empty(N) nExpPois = np.empty(N) for k in range(N): nExp[k] = P_binomial(k,N,p) * n nExpPois[k] = P_poisson(k,N*p) * n print(k,histo[k],"\t",nExp[k]) plt.bar(range(N),histo,alpha=0.5) plt.title("Binomial distribution p = " + str(round(p,2)) + ", N = " + str(N)) plt.xlabel("Frequency of Prague region in " + str(N) + " COVID cases") plt.plot(range(N),nExp,color='red',marker='o') plt.plot(range(N),nExpPois,color='green',marker='x') plt.show()