#
# Stats.PY
# functions to draw probability distribution function of fractianal polarization of sources.
#
# date: 2017-12-21
# author: GIUSEPPE PUGLISI
#
# Copyright (C) 2017 Giuseppe Puglisi giuspugl@sissa.it
#
import pylab as pl
import healpy as hp
import numpy as np
import h5py as h5
import sys
#from polarization_pdf import *
import scipy as sp
from scipy import interpolate
from scipy import stats
from pointsource_utilities import *
import glob
from scipy import special as spec
[docs]def lognormal_distribution(X,A,Xm, sigma ):
"""
Compute log-normal distribution on `X`:
.. math::
\mathcal{P} (\Pi ) =A/ (\sqrt{2\pi \sigma^2} \Pi) \exp [ - ( \ln(\Pi / \mu))^2 /{2\sigma^2} ]
**Parameters**
- `X`:{array or float}
polarization fraction array
- `A`:{float}
constant
- `Xm`:{float}
- `sigma`:{float}
"""
pi=np.pi
return A* 1./(X*sigma *np.sqrt(2*pi)) * np.exp(- (np.log(X/Xm ))**2 /2./sigma**2)
[docs]def compute_poissonian_uncertainties(bc,Ntot,S,perr):
"""
Compute Poissonian upper and lower limit uncertainties from Gehrels 1986.
**Parameters**
- `bc`:{array}
counts within each bin
- `Ntot`: {int}
total number of data
- `S`: {int}
the confidence level (CL) for uncertainties, we consider S=1, meaning 68% CL, S=2 for the 95%, etc...
- `perr`:{array}
array where upper and lower uncertainties are stored, `shape= 2 x size(bc)`.
"""
ppl=[lambda N:N * (1. - 1. / (9. * N) - S / (3. * np.sqrt(N))) ** 3,
lambda N: N + S * np.sqrt(N + 1) + (S**2 + 2) / 3.] # Approximations Gehrels 1986
for i in range(len(bc)) :
n=bc[i]
if n<=100:
perr[1][i]= (ppl[1] (n) - n)/Ntot
if n==0:
perr[0][i]= perr[1][i]
else:
perr[0][i]=(n- ppl[0](n))/Ntot
else:
perr[0][i]= S*np.sqrt(n)/Ntot
perr[1][i]= S*np.sqrt(n)/Ntot
return perr
[docs]def bootstrap_resampling(I, sigmaI, P, sigmaP,nsamples,upper_limit=False):
"""
It performs the bootstrap resampling from observations. For each group of values
(I,P, sigmaI, sigmaP) it resamples `nsamples` values with Gaussian random values `Isamples` and `Psamples`
with center around I, or P and width sigmaI and sigmaP. If data are upper limit it resamples by assuming a uniform
distribution of random number from 0 to :math:`\sqrt{P^2 +\sigma_P^2}`.
**Parameters**
- `I, sigmaI, P, sigmaP`={floats}
values for resampling
- `nsamples`:{int}
how many resampling to do
- `upper_limit`:{bool}
**Returns**
- `f`:{array}
resampled fractional polarization
"""
Isamples=abs(np.random.normal(loc=I, scale=sigmaI, size=nsamples))
if upper_limit:
Psamples=np.random.uniform(low=0.0, high=np.sqrt(P**2+sigmaP**2), size=nsamples)#
else:
Psamples=abs(np.random.normal(loc=P, scale=sigmaP, size=nsamples))
f= Psamples/Isamples
m=np.ma.masked_less_equal(Psamples/Isamples,1.) # mask the sources whose P>I
return f[m.mask]
[docs]def pi2(mu, sigma, apply_correction=False ) :
"""
Compute :math:`< \Pi^2 >` from :math:`\mu, \sigma` lognormal best fit parameters, following Battye et al. 2011.
.. math::
<\Pi^2 > = \mu^2 e^{2\sigma^2}
"""
if apply_correction:
t= np.log(1./mu)/ np.sqrt(2)/sigma
factor=( (1+ spec.erf(t+ (np.sqrt(2) *sigma ) ) ) / (1 +spec.erf(t) ) )
else:
factor=1
pi2= mu**2 *np.exp(2.*sigma**2) * factor
return pi2
[docs]def pi(mu, sigma, apply_correction=False) :
"""
Compute :math:`< \Pi >` from :math:`\mu, \sigma` lognormal best fit parameters, following Battye et al. 2011.
.. math::
<\Pi> = \mu e^{\sigma^2/2}
"""
if apply_correction:
t= np.log(1./mu)/ np.sqrt(2)/sigma
factor=( (1+ spec.erf(t+ (sigma/np.sqrt(2) ) ) ) / (1 +spec.erf(t) ) )
else:
factor=1
pi= mu *np.exp(.5*sigma**2) * factor
return pi
[docs]def pimed(mu, sigma, apply_correction=False ):
"""
Compute :math:`\Pi_{med}` from :math:`\mu, \sigma` lognormal best fit parameters, following Battye et al. 2011.
"""
if apply_correction:
t= np.log(1./mu)/ np.sqrt(2)/sigma
factor=np.exp( np.sqrt(2)*sigma * spec.erfinv( .5 * (spec.erf(t) -1 ) ) )
else:
factor=1.
pi= mu *factor
return pi
[docs]def err_p2(mu, sigma, cov):
"""
Compute errors on :math:`< \Pi^2 >` propagating uncertainties on :math:`\mu, \sigma` parameters.
"""
dfdmu=2.* mu*np.exp(2.*sigma**2)
dfdsig=4.* mu**2 *np.exp(2.*sigma**2) * sigma
deltap2= np.sqrt(dfdmu**2 * cov[1,1] + 2. * dfdmu* dfdsig* cov[2,1] + dfdsig**2* cov[2,2] )
return deltap2
[docs]def err_pi (mu, sigma,cov ):
"""
Compute errors on :math:`< \Pi >` propagating uncertainties on :math:`\mu, \sigma` parameters.
"""
dfdmu= np.exp(.5*sigma**2)
dfdsig=mu*sigma *np.exp(.5*sigma**2)
deltap= np.sqrt(dfdmu**2 * cov[1,1] + 2. * dfdmu* dfdsig* cov[2,1] + dfdsig**2* cov[2,2] )
return deltap
[docs]def chisquare(obs,exp,sigma):
return np.sum(((obs-exp)/sigma)**2)
[docs]def fitting_lognormal_from_fluxes(flux, polflux,eI, eP, idstring,
fig=None, nbins=15, resampling =True,workdir='./',
**kwargs):
"""
Fit logrnormal distribution of fractional polarization from catalog encoding total intensity and polarized fluxes.
**Parameters**
- `flux, eI`:{array}
total intensity fluxes and errors
- `polflux, eP`:{array}
polarization flux and errors
- `idstring`: {string}
string used for plotting
- `fig`:{int}
id of figure
- `nbins`:{int}
fractional polarization histogram bins
- `resampling`:{bool}
if `True` resample the catalog with 1000 resampling, otherwise it does not.
- `workdir`:{string}
path to store plots and files.
- `saveplot`:{string}
name and format to store the plot of the lognormal fit in `workdir`
- `bestfitparams2file`:{bool}
save the computed best fit parameters, the average fractional polarizations to binary file `*.npy` in `workdir`.
**Returns**
- `Pim, Pi2, Pi`:{floats}
:math:`\Pi_{med},< \Pi^2 >,< \Pi >`
- `deltapim, deltap2 deltap`
errors on the computed quantities.
"""
if not 'saveplot' in kwargs:
kwargs['saveplot']=None
if not 'verbose' in kwargs:
kwargs['verbose']=True
if not 'bestfitparams2file' in kwargs:
kwargs['bestfitparams2file']=True
fpol=np.array(polflux/flux)
if resampling :
ziplist=zip(flux, polflux,eI, eP)
resampled=[]
nresampl=1000
for I,P,sigmaI,sigmaP in ziplist :
resampled.append(p_resampling(I,sigmaI,P,sigmaP,nresampl))
resampled=np.concatenate(resampled)
else :
resampled=fpol
if min(fpol)/max(fpol)< 1e-3:
if kwargs['verbose']:print "using a not uniform binning"
edges= np.concatenate([np.linspace(min(fpol),min(fpol)*5., 3), np.linspace(min(fpol)*5., max(fpol), nbins-2)])
else:
edges=np.linspace(min(fpol),max(fpol),nbins+1)
pdf,edges=np.histogram(resampled,bins= edges,normed=False )
bins=np.array([.5*(edges[i+1] +edges[i] ) for i in range(nbins-1 )])
maskbins=np.digitize(resampled,bins)
mbins=np.digitize(fpol,bins)
Ntot_r=1.*len(resampled)
Ntot= 1.*len(fpol)
pdf= 1.*pdf /Ntot_r
pmean=np.zeros(nbins)
deltab=np.array([.5*(edges[i+1] -edges[i] ) *100. for i in xrange(nbins)])
bc= np.bincount(mbins)
S=1 # 1 sigma 68% CL
perr=np.zeros(2*(nbins)).reshape(2,nbins )
perr=compute_poissonian_uncertainties(bc,Ntot,S,perr)
for i in range(max(maskbins)+1) :
idx=np.where(maskbins == i)[0]
pmean[i]= np.mean(resampled[idx])
try:
pl.figure(fig)
pl.errorbar(pmean*100.,pdf,yerr=[perr[0],perr[1]],fmt='go')
popt,pcov=so.curve_fit(lognormal_distribution, (pmean)*100., pdf,sigma= np.sqrt(perr[1]**2 + perr[0]**2))
chi2=chisquare(pdf ,lognormal_distribution(pmean*100. ,*popt) , np.sqrt(perr[1]**2 + perr[0]**2))
DOF=(nbins-len(popt))
redchi2=chi2/DOF
pte = 1- stats.chi2.cdf(chi2,DOF)
if kwargs['verbose']:
print "----"*20,"\n","Fitting lognormal distribution on "+idstring+" with %d data"%(len(fpol))
print "A= %g\nmu =%g +- %g \nsigma=%g per cent +- %g"%(popt[0],(popt[1]),np.sqrt(pcov[1,1]),popt[2],np.sqrt(pcov[2,2]))
print "Reduced Chi^2 : %g\nPTE :%g"%(redchi2, pte )
print "----"*20
x=np.linspace(0.1,max(fpol)*100.,1024)
pl.plot(x,lognormal_distribution(x,*popt),label=idstring + r' $A= %.2f ,\, \mu= %.2f\, \sigma=%.2g $'%(popt[0],(popt[1]),popt[2]))
Pi_m=popt[1]
deltapim=np.sqrt(pcov[1,1])
sigma=popt[2]
Pi2= Pi_m**2*np.exp(2.*sigma**2)
deltap2=err_p2(Pi_m, sigma, pcov)
Pi= Pi_m*np.exp(sigma**2/2.)
deltap =err_pi(Pi_m, sigma, pcov)
except RuntimeError:
print "Unable to fit Lognormal too few data points"
Pi_m=np.median(resampled)
Pi2= np.mean(resampled**2)
deltap2=0.
Pi= np.mean(resampled)
deltap=0.
deltapim=0.
pass
finally:
pl.xticks(fontsize=14)
pl.yticks(fontsize=15)
pl.xlabel(r'$\Pi$'+' [ % ] ' ,fontsize=14)
pl.ylabel(r'$P\,(\Pi)$ ',fontsize=15)
pl.legend(loc='best', fontsize=14)
pl.tight_layout()
#pl.title(idstring)
if max(fpol)>.5:
pl.xlim([0.,30.])
else:
pl.xlim([0,max(fpol)*100.])
if kwargs['saveplot'] is not None :
pl.savefig(workdir+idstring+kwargs['saveplot'])
pl.close()
if kwargs['bestfitparams2file']:
np.save(workdir+'bestfit_params_'+idstring, [popt, pcov, redchi2, pte, Pi_m, deltapim, Pi, deltap, Pi2, deltap2])
return Pi_m, Pi2, deltap2,deltapim, Pi, deltap
[docs]def resampling_fpol(fpol, sigma,flag, nresampl=100):
"""
Resampling a catalogue starting from fractional polarization values, errors and flags into the data. Flags identify which data have to be
considered as upper limits.
**Parameters**
- `fpol`:{float}
- `sigma`:{float}
- `flag`:{bool}
- `nresampl`:{int}
how many resampling to do
**Returns**
- `fpolres`:{array}
array of resampled fractional polarizations.
"""
if flag:
fpolres=np.random.uniform(low=0.,high=fpol, size=nresampl)
else:
fpolres=abs(np.random.normal(loc=fpol,scale=sigma, size=nresampl))
return fpolres
[docs]def fitting_lognormal_from_fpol(fpol,errfpol, flags,idstring,
fig=None, nbins=15, resampling=True,workdir='./', **kwargs):
"""
Fit logrnormal distribution of fractional polarization from catalog encoding fractional polarization and errors on it.
**Parameters**
- `pol`:{array}
fractional polarization
- `errfpol`:{array}
errors of fractional polariz.
- `flag`:{array}
if `flag=1 ` or `flag=True` it means that data are flagged and they are considered as upper limits.
The rest of the parameters are the very same as in :func:`fitting_lognormal_from_fluxes`
"""
if not 'saveplot' in kwargs:
kwargs['saveplot']=None
if not 'verbose' in kwargs:
kwargs['verbose']=True
if not 'bestfitparams2file' in kwargs:
kwargs['bestfitparams2file']=True
if resampling :
resampled=[]
nresampl=1000
for P,eP, F in zip(fpol,errfpol, flags):
resampled.append(resampling_fpol(P,eP,F,nresampl=nresampl ))
resampled=np.concatenate(resampled)
else :
resampled=(fpol)
if min(fpol)/max(fpol)< 1e-3:
if kwargs['verbose']:print "using a not uniform binning"
edges= np.concatenate([np.linspace(min(fpol),min(fpol)*5., 3), np.linspace(min(fpol)*5., max(fpol), nbins-2)])
else:
edges=np.linspace(min(fpol),max(fpol),nbins+1)
pdf,edges=np.histogram(resampled,bins= edges,normed=False )
bins=np.array([.5*(edges[i+1] +edges[i] ) for i in range(nbins-1 )])
maskbins=np.digitize(resampled,bins)
mbins=np.digitize(fpol,bins)
Ntot_r=1.*len(resampled)
Ntot= 1.*len(fpol)
pdf= 1.*pdf /Ntot_r
pmean=np.zeros(nbins)
deltab=np.array([.5*(edges[i+1] -edges[i] ) *100. for i in xrange(nbins)])
bc= np.bincount(mbins)
S=1 # 1 sigma 68% CL
perr=np.zeros(2*(nbins)).reshape(2,nbins )
perr=compute_poissonian_uncertainties(bc,Ntot,S,perr)
for i in range(max(maskbins)+1) :
idx=np.where(maskbins == i)[0]
pmean[i]= np.mean(resampled[idx])
pl.figure(fig)
pl.errorbar(pmean*100.,pdf,yerr=[perr[0],perr[1]],fmt='go')
try:
popt,pcov=so.curve_fit(lognormal_distribution, (pmean)*100., pdf,sigma= np.sqrt(perr[1]**2 + perr[0]**2))
mu=popt[1]
sigma=popt[2]
Pi_m = pimed(mu, sigma)
deltapim=np.sqrt(pcov[1,1])
Pi2= pi2(mu, sigma)
Pi=pi (mu, sigma )
chi2=chisquare(pdf ,lognormal_distribution(pmean*100. ,*popt) , np.sqrt(perr[1]**2 + perr[0]**2))
DOF=(nbins-len(popt))
redchi2=chi2/DOF
pte = 1- stats.chi2.cdf(chi2,DOF)
if kwargs['verbose']:
print "----"*20,"\n","Fitting lognormal distribution on "+idstring+" with %d data"%(len(fpol))
print "A= %g\nmu =%g +- %g \nsigma=%g per cent +- %g"%(popt[0],(popt[1]),np.sqrt(pcov[1,1]),popt[2],np.sqrt(pcov[2,2]))
print "Reduced Chi^2 : %g\nPTE :%g"%(redchi2, pte )
print "----"*20
x=np.linspace(0.1,max(fpol)*100.,1024)
pl.plot(x,lognormal_distribution(x,*popt),label=idstring+r' $A= %.2f ,\, \mu= %.2f, \, \sigma=%.2g $'%(popt[0],(popt[1]),popt[2]))
deltap2=err_p2(Pi_m, sigma, pcov)
deltap =err_pi(Pi_m, sigma, pcov)
except RuntimeError:
print "Unable to fit Lognormal too few data points"
Pi_m=np.median(resampled)
Pi2= np.mean(resampled**2)
Pi= np.mean(resampled)
deltap2=0.
deltap=0.
deltapim=0.
pass
finally:
pl.xticks(fontsize=14)
pl.yticks(fontsize=15)
pl.xlabel(r'$\Pi$'+' [ % ] ' ,fontsize=14)
pl.ylabel(r'$P\,(\Pi)$ ',fontsize=15)
pl.legend(loc='best', fontsize=14)
#pl.title(idstring)
pl.tight_layout()
if max(fpol)>.5:
pl.xlim([0.,10.])
else:
pl.xlim([0,max(fpol)*100.])
if kwargs['saveplot'] is not None :
pl.savefig(workdir+idstring+kwargs['saveplot'])
pl.close()
if kwargs['bestfitparams2file']:
np.save(workdir+'bestfit_params_'+idstring, [popt, pcov, redchi2, pte, Pi_m, deltapim, Pi, deltap, Pi2, deltap2])
return Pi_m, Pi2, deltap2,deltapim, Pi, deltap