Source code for Stats

#
#   Stats.PY
#   functions to draw probability distribution function of fractianal polarization of sources.
#
#   date: 2017-12-21
#   author: GIUSEPPE PUGLISI
#
#   Copyright (C) 2017   Giuseppe Puglisi    giuspugl@sissa.it
#

import pylab as pl
import healpy as hp
import numpy as np
import h5py as h5
import sys
#from polarization_pdf import *
import scipy as sp
from scipy import interpolate
from scipy import stats
from pointsource_utilities import *
import glob
from scipy import special as spec

[docs]def lognormal_distribution(X,A,Xm, sigma ):
    """
	Compute log-normal distribution on `X`:

	.. math::

		\mathcal{P} (\Pi ) =A/ (\sqrt{2\pi \sigma^2} \Pi)  \exp [  - ( \ln(\Pi /  \mu))^2 /{2\sigma^2} ]

	**Parameters**

	- `X`:{array or float}
	   polarization fraction array
	- `A`:{float}
	   constant
	- `Xm`:{float}
	- `sigma`:{float}

    """

    pi=np.pi
    return A* 1./(X*sigma *np.sqrt(2*pi)) * np.exp(- (np.log(X/Xm ))**2 /2./sigma**2)

[docs]def compute_poissonian_uncertainties(bc,Ntot,S,perr):
    """
	Compute Poissonian upper and lower limit uncertainties from Gehrels 1986.

	**Parameters**

	- `bc`:{array}
	   counts within each bin
	- `Ntot`: {int}
	   total number of data
	- `S`: {int}
	   the confidence level (CL) for uncertainties, we consider S=1, meaning 68% CL, S=2 for the 95%, etc...
	- `perr`:{array}
	   array where upper and lower uncertainties are stored, `shape= 2 x size(bc)`.

    """

    ppl=[lambda N:N * (1. - 1. / (9. * N) - S  / (3. * np.sqrt(N))) ** 3,
		 lambda N: N + S * np.sqrt(N + 1) + (S**2 + 2) / 3.] 			# Approximations Gehrels 1986

    for i in range(len(bc)) :
		n=bc[i]
		if n<=100:
			perr[1][i]= (ppl[1] (n) - n)/Ntot
			if n==0:
				perr[0][i]= perr[1][i]
			else:
				perr[0][i]=(n- ppl[0](n))/Ntot
		else:
			perr[0][i]= S*np.sqrt(n)/Ntot
			perr[1][i]= S*np.sqrt(n)/Ntot

    return perr

[docs]def bootstrap_resampling(I, sigmaI, P, sigmaP,nsamples,upper_limit=False):
    """
	It performs the bootstrap resampling  from observations. For each group  of values
	(I,P, sigmaI, sigmaP) it resamples `nsamples` values with Gaussian random values `Isamples` and `Psamples`
	with center around I, or P and width sigmaI and sigmaP. If data are upper limit  it resamples by assuming a uniform
    distribution of random number from 0 to  :math:`\sqrt{P^2 +\sigma_P^2}`.

	**Parameters**

	- `I, sigmaI, P, sigmaP`={floats}
	   values for resampling
	- `nsamples`:{int}
	   how many resampling to do
	-  `upper_limit`:{bool}

	**Returns**

	- `f`:{array}
	   resampled fractional polarization

    """
    Isamples=abs(np.random.normal(loc=I, scale=sigmaI, size=nsamples))
    if upper_limit:
		Psamples=np.random.uniform(low=0.0, high=np.sqrt(P**2+sigmaP**2), size=nsamples)#
    else:
		Psamples=abs(np.random.normal(loc=P, scale=sigmaP, size=nsamples))
    f= Psamples/Isamples
    m=np.ma.masked_less_equal(Psamples/Isamples,1.) # mask the sources whose P>I
    return f[m.mask]

[docs]def pi2(mu, sigma, apply_correction=False ) :
    """
	Compute  :math:`< \Pi^2 >` from :math:`\mu, \sigma` lognormal best fit parameters, following Battye et al. 2011.

	.. math::

	   <\Pi^2 > = \mu^2 e^{2\sigma^2}

    """
    if apply_correction:
		t= np.log(1./mu)/ np.sqrt(2)/sigma
		factor=( (1+ spec.erf(t+ (np.sqrt(2) *sigma ) ) ) / (1 +spec.erf(t) )  )
    else:
		factor=1
    pi2= mu**2 *np.exp(2.*sigma**2) * factor
    return pi2

[docs]def pi(mu, sigma, apply_correction=False) :
    """
	Compute  :math:`< \Pi >` from :math:`\mu, \sigma` lognormal best fit parameters, following Battye et al. 2011.

	.. math::

		<\Pi> = \mu e^{\sigma^2/2}
    """
    if apply_correction:
		t= np.log(1./mu)/ np.sqrt(2)/sigma
		factor=( (1+ spec.erf(t+ (sigma/np.sqrt(2) ) ) ) / (1 +spec.erf(t) )  )
    else:
        factor=1
    pi= mu *np.exp(.5*sigma**2) * factor
    return pi

[docs]def pimed(mu, sigma, apply_correction=False ):
    """
	Compute  :math:`\Pi_{med}` from :math:`\mu, \sigma` lognormal best fit parameters, following Battye et al. 2011.
    """
    if apply_correction:
        t= np.log(1./mu)/ np.sqrt(2)/sigma
        factor=np.exp( np.sqrt(2)*sigma * spec.erfinv( .5 * (spec.erf(t) -1 )  ) )
    else:
        factor=1.
    pi= mu *factor
    return pi

[docs]def err_p2(mu, sigma, cov):
    """
	Compute errors on :math:`< \Pi^2 >` propagating uncertainties on  :math:`\mu, \sigma`  parameters.
    """
    dfdmu=2.* mu*np.exp(2.*sigma**2)
    dfdsig=4.* mu**2 *np.exp(2.*sigma**2) * sigma
    deltap2= np.sqrt(dfdmu**2 * cov[1,1] + 2. * dfdmu* dfdsig* cov[2,1]  + dfdsig**2* cov[2,2] )
    return deltap2
[docs]def err_pi (mu, sigma,cov ):
    """
	Compute errors on :math:`< \Pi >` propagating uncertainties on  :math:`\mu, \sigma`  parameters.
    """
    dfdmu= np.exp(.5*sigma**2)
    dfdsig=mu*sigma *np.exp(.5*sigma**2)
    deltap= np.sqrt(dfdmu**2 * cov[1,1] + 2. * dfdmu* dfdsig* cov[2,1]  + dfdsig**2* cov[2,2] )
    return deltap

[docs]def chisquare(obs,exp,sigma):

    return np.sum(((obs-exp)/sigma)**2)

[docs]def fitting_lognormal_from_fluxes(flux, polflux,eI, eP, idstring,
										fig=None, nbins=15, resampling =True,workdir='./',
										 **kwargs):
    """
	Fit logrnormal distribution of fractional  polarization from catalog encoding total intensity and polarized fluxes.

	**Parameters**

	- `flux, eI`:{array}
	   total intensity fluxes and errors
	- `polflux, eP`:{array}
	   polarization flux and errors
	- `idstring`: {string}
	   string used for plotting
	- `fig`:{int}
	   id of figure
	- `nbins`:{int}
	   fractional polarization histogram bins
	- `resampling`:{bool}
	   if `True` resample the catalog with 1000 resampling, otherwise it does not.
	- `workdir`:{string}
	   path to store plots and files.
	- `saveplot`:{string}
	   name and format to store the plot of the lognormal fit in `workdir`
	- `bestfitparams2file`:{bool}
	   save the computed best fit parameters, the average fractional polarizations to binary file `*.npy` in `workdir`.

	**Returns**

	- `Pim, Pi2, Pi`:{floats}
	   :math:`\Pi_{med},< \Pi^2 >,< \Pi >`
	- `deltapim, deltap2 deltap`
		errors on the computed quantities.

    """

    if not 'saveplot' in kwargs:
        kwargs['saveplot']=None
    if not 'verbose' in kwargs:
		kwargs['verbose']=True
    if not 'bestfitparams2file' in kwargs:
        kwargs['bestfitparams2file']=True
    fpol=np.array(polflux/flux)
    if resampling :
        ziplist=zip(flux, polflux,eI, eP)
        resampled=[]
        nresampl=1000
        for I,P,sigmaI,sigmaP  in ziplist :
            resampled.append(p_resampling(I,sigmaI,P,sigmaP,nresampl))
        resampled=np.concatenate(resampled)
    else :
        resampled=fpol

    if min(fpol)/max(fpol)< 1e-3:
        if kwargs['verbose']:print "using a not uniform binning"
        edges= np.concatenate([np.linspace(min(fpol),min(fpol)*5., 3), np.linspace(min(fpol)*5., max(fpol), nbins-2)])
    else:
        edges=np.linspace(min(fpol),max(fpol),nbins+1)
    pdf,edges=np.histogram(resampled,bins= edges,normed=False )
    bins=np.array([.5*(edges[i+1] +edges[i] ) for i in range(nbins-1 )])

    maskbins=np.digitize(resampled,bins)
    mbins=np.digitize(fpol,bins)
    Ntot_r=1.*len(resampled)
    Ntot= 1.*len(fpol)
    pdf= 1.*pdf /Ntot_r
    pmean=np.zeros(nbins)
    deltab=np.array([.5*(edges[i+1] -edges[i] ) *100. for i in xrange(nbins)])
    bc= np.bincount(mbins)

    S=1 # 1 sigma 68% CL

    perr=np.zeros(2*(nbins)).reshape(2,nbins )
    perr=compute_poissonian_uncertainties(bc,Ntot,S,perr)

    for i in range(max(maskbins)+1) :
        idx=np.where(maskbins == i)[0]
        pmean[i]= np.mean(resampled[idx])
    try:
        pl.figure(fig)
        pl.errorbar(pmean*100.,pdf,yerr=[perr[0],perr[1]],fmt='go')
        popt,pcov=so.curve_fit(lognormal_distribution, (pmean)*100., pdf,sigma= np.sqrt(perr[1]**2 + perr[0]**2))
        chi2=chisquare(pdf ,lognormal_distribution(pmean*100. ,*popt) ,  np.sqrt(perr[1]**2 + perr[0]**2))
        DOF=(nbins-len(popt))
        redchi2=chi2/DOF
        pte =  1- stats.chi2.cdf(chi2,DOF)
        if kwargs['verbose']:
			print "----"*20,"\n","Fitting lognormal distribution on "+idstring+" with %d data"%(len(fpol))
			print "A= %g\nmu =%g +- %g \nsigma=%g per cent +- %g"%(popt[0],(popt[1]),np.sqrt(pcov[1,1]),popt[2],np.sqrt(pcov[2,2]))
			print "Reduced Chi^2 : %g\nPTE :%g"%(redchi2, pte )
			print "----"*20
        x=np.linspace(0.1,max(fpol)*100.,1024)

        pl.plot(x,lognormal_distribution(x,*popt),label=idstring + r' $A= %.2f ,\, \mu= %.2f\, \sigma=%.2g $'%(popt[0],(popt[1]),popt[2]))
        Pi_m=popt[1]
        deltapim=np.sqrt(pcov[1,1])
        sigma=popt[2]
        Pi2= Pi_m**2*np.exp(2.*sigma**2)
        deltap2=err_p2(Pi_m, sigma, pcov)
        Pi= Pi_m*np.exp(sigma**2/2.)
        deltap =err_pi(Pi_m, sigma, pcov)
    except RuntimeError:
        print "Unable to fit Lognormal too few data points"
        Pi_m=np.median(resampled)
        Pi2= np.mean(resampled**2)
        deltap2=0.
        Pi= np.mean(resampled)
        deltap=0.
        deltapim=0.
        pass
    finally:
        pl.xticks(fontsize=14)
        pl.yticks(fontsize=15)
        pl.xlabel(r'$\Pi$'+' [ % ] ' ,fontsize=14)
        pl.ylabel(r'$P\,(\Pi)$ ',fontsize=15)
        pl.legend(loc='best', fontsize=14)
        pl.tight_layout()

        #pl.title(idstring)
        if max(fpol)>.5:
            pl.xlim([0.,30.])
        else:
            pl.xlim([0,max(fpol)*100.])
	    if kwargs['saveplot'] is not None :
			pl.savefig(workdir+idstring+kwargs['saveplot'])
        pl.close()
        if kwargs['bestfitparams2file']:
			np.save(workdir+'bestfit_params_'+idstring, [popt, pcov, redchi2, pte, Pi_m, deltapim, Pi, deltap, Pi2, deltap2])

        return Pi_m, Pi2, deltap2,deltapim, Pi, deltap
[docs]def resampling_fpol(fpol, sigma,flag, nresampl=100):
    """
	Resampling a catalogue starting from fractional polarization values, errors and flags into the data. Flags identify which data have to be
	considered as upper limits.

	**Parameters**

	- `fpol`:{float}
	- `sigma`:{float}
	- `flag`:{bool}
	- `nresampl`:{int}
        how many resampling to do

	**Returns**

	- `fpolres`:{array}
	   array of resampled fractional polarizations.

    """

    if flag:
        fpolres=np.random.uniform(low=0.,high=fpol, size=nresampl)

    else:
        fpolres=abs(np.random.normal(loc=fpol,scale=sigma, size=nresampl))
    return fpolres

[docs]def fitting_lognormal_from_fpol(fpol,errfpol, flags,idstring,
 								fig=None, nbins=15, resampling=True,workdir='./', **kwargs):

    """
	Fit logrnormal distribution of fractional  polarization from catalog encoding fractional polarization and errors on it.

	**Parameters**

	- `pol`:{array}
		fractional polarization
	- `errfpol`:{array}
		errors of fractional polariz.
	- `flag`:{array}
		if `flag=1 ` or `flag=True` it means that data are flagged and they are considered as upper limits.

	The rest of the parameters are the very same as in :func:`fitting_lognormal_from_fluxes`
    """
    if not 'saveplot' in kwargs:
		kwargs['saveplot']=None
    if not 'verbose' in kwargs:
		kwargs['verbose']=True

    if not 'bestfitparams2file' in kwargs:
        kwargs['bestfitparams2file']=True


    if resampling :
        resampled=[]
        nresampl=1000
        for P,eP, F in zip(fpol,errfpol, flags):
            resampled.append(resampling_fpol(P,eP,F,nresampl=nresampl ))
        resampled=np.concatenate(resampled)
    else :
        resampled=(fpol)


    if min(fpol)/max(fpol)< 1e-3:
        if kwargs['verbose']:print "using a not uniform binning"
        edges= np.concatenate([np.linspace(min(fpol),min(fpol)*5., 3), np.linspace(min(fpol)*5., max(fpol), nbins-2)])
    else:
        edges=np.linspace(min(fpol),max(fpol),nbins+1)

    pdf,edges=np.histogram(resampled,bins= edges,normed=False )
    bins=np.array([.5*(edges[i+1] +edges[i] ) for i in range(nbins-1 )])

    maskbins=np.digitize(resampled,bins)
    mbins=np.digitize(fpol,bins)
    Ntot_r=1.*len(resampled)
    Ntot= 1.*len(fpol)
    pdf= 1.*pdf /Ntot_r
    pmean=np.zeros(nbins)
    deltab=np.array([.5*(edges[i+1] -edges[i] ) *100. for i in xrange(nbins)])
    bc= np.bincount(mbins)

    S=1 # 1 sigma 68% CL

    perr=np.zeros(2*(nbins)).reshape(2,nbins )
    perr=compute_poissonian_uncertainties(bc,Ntot,S,perr)

    for i in range(max(maskbins)+1) :
        idx=np.where(maskbins == i)[0]
        pmean[i]= np.mean(resampled[idx])
    pl.figure(fig)
    pl.errorbar(pmean*100.,pdf,yerr=[perr[0],perr[1]],fmt='go')

    try:
        popt,pcov=so.curve_fit(lognormal_distribution, (pmean)*100., pdf,sigma= np.sqrt(perr[1]**2 + perr[0]**2))
        mu=popt[1]
        sigma=popt[2]
        Pi_m = pimed(mu, sigma)
        deltapim=np.sqrt(pcov[1,1])
        Pi2= pi2(mu, sigma)
        Pi=pi (mu, sigma )
        chi2=chisquare(pdf ,lognormal_distribution(pmean*100. ,*popt) ,  np.sqrt(perr[1]**2 + perr[0]**2))
        DOF=(nbins-len(popt))
        redchi2=chi2/DOF
        pte =  1- stats.chi2.cdf(chi2,DOF)
        if kwargs['verbose']:
            print "----"*20,"\n","Fitting lognormal distribution on "+idstring+" with %d data"%(len(fpol))
            print "A= %g\nmu =%g +- %g \nsigma=%g per cent +- %g"%(popt[0],(popt[1]),np.sqrt(pcov[1,1]),popt[2],np.sqrt(pcov[2,2]))
            print "Reduced Chi^2 : %g\nPTE :%g"%(redchi2, pte )
            print "----"*20
        x=np.linspace(0.1,max(fpol)*100.,1024)
        pl.plot(x,lognormal_distribution(x,*popt),label=idstring+r' $A= %.2f ,\, \mu= %.2f, \, \sigma=%.2g $'%(popt[0],(popt[1]),popt[2]))
        deltap2=err_p2(Pi_m, sigma, pcov)
        deltap =err_pi(Pi_m, sigma, pcov)
    except RuntimeError:
        print "Unable to fit Lognormal too few data points"
        Pi_m=np.median(resampled)
        Pi2= np.mean(resampled**2)
        Pi= np.mean(resampled)
        deltap2=0.
        deltap=0.
        deltapim=0.
        pass
    finally:
        pl.xticks(fontsize=14)
        pl.yticks(fontsize=15)
        pl.xlabel(r'$\Pi$'+' [ % ] ' ,fontsize=14)
        pl.ylabel(r'$P\,(\Pi)$ ',fontsize=15)
        pl.legend(loc='best', fontsize=14)
        #pl.title(idstring)
        pl.tight_layout()
        if max(fpol)>.5:
            pl.xlim([0.,10.])
        else:
            pl.xlim([0,max(fpol)*100.])
	    if kwargs['saveplot'] is not None :
			pl.savefig(workdir+idstring+kwargs['saveplot'])
        pl.close()
        if kwargs['bestfitparams2file']:
			np.save(workdir+'bestfit_params_'+idstring, [popt, pcov, redchi2, pte, Pi_m, deltapim, Pi, deltap, Pi2, deltap2])

        return Pi_m, Pi2, deltap2,deltapim, Pi, deltap
Navigation

Source code for Stats

Quick search

Navigation