import math import numpy as np import matplotlib.pyplot as plt # settings: p = 0.00123 # DE N = 10000 file = open("cv5/upf_cv5.dat","r") def P_poisson(k,nu): """function to return Poissonian probablity""" P = math.exp(-nu) for i in range(1,k+1): # cancel divergencies (avoid overflow) P *= nu / i return P n = 0 # counter of samples i = 0 # counter of lines within a sample k = 0 # number of 'DE' in the N lines nu = N * p nBins = int(math.sqrt(math.ceil(nu))) * 10 histo = np.zeros(nBins) # array (0,0,...0) = histogram print("nu = " + str(round(nu,2)) + ", nBins = " + str(nBins)) for line in file: if line[0] == "#": continue columns = line.split() if i == N: if (k < nBins): histo[k] = histo[k] + 1 # increment k-th bin of the histogram i = 0 k = 0 n = n + 1 if len(columns) > 4 and columns[4] == "DE": # if the origin is DE k = k + 1 i = i + 1 nExp = np.empty(nBins) for k in range(nBins): P = P_poisson(k,nu) nExp[k] = P * n print(k,histo[k],"\t",nExp[k]) plt.bar(range(nBins),histo,alpha=0.5) plt.title("Poissonian distribution nu = " + str(round(nu,2))) plt.xlabel("Frequency of DE in " + str(N) + " COVID cases") plt.plot(range(nBins),nExp,color='red',marker='o') plt.show() plt.bar(range(nBins),histo,alpha=0.5) plt.title("Poissonian distribution nu = " + str(round(nu,2))) plt.xlabel("Frequency of DE in " + str(N) + " COVID cases") plt.plot(range(nBins),nExp,color='red',marker='o') plt.yscale("log") plt.show()