Source code for source_release.prime_mcmc

import sys
import numpy as npy
import scipy.stats
import scipy.linalg
import math
import uuid
import matplotlib.pyplot as plt

#---------------------------------------------------------------------------------------
#  Adaptive Metropolis Markov Chain Monte Carlo
#---------------------------------------------------------------------------------------
def _ucov(spl,splmean,cov,lastup):
    r"""
    Update proposal covariance
    """
    if len(spl.shape) == 1:
        nspl = 1;
        ndim = spl.shape[0]
    else:
        (nspl,ndim)=spl.shape

    if nspl>0:
        for i in range(nspl):
            iglb    = lastup+i
            splmean = (iglb*splmean+spl[i])/(iglb+1)
            rt = (iglb-1.0)/iglb
            st = (iglb+1.0)/iglb**2
            cov = rt*cov+st*npy.dot(npy.reshape(spl[i]-splmean,(ndim,1)),npy.reshape(spl[i]-splmean,(1,ndim)))

    return lastup + nspl, splmean, cov


[docs]
def ammcmc(opts,likTpr,lpinfo,progress=True):
    r"""
    
    Adaptive Metropolis Markov Chain Monte Carlo

    
    Parameters
    ----------
    opts : dictionary of parameters

        - nsteps : no. of mcmc steps
        - nburn  : no. of mcmc steps for burn-in (proposal fixed to initial covariance)
        - nadapt : adapt every nadapt steps after nburn
        - nfinal : stop adapting after nfinal steps
        - inicov : initial covariance
        - coveps : small additive factor to ensure covariance matrix is positive definite (only added to diagonal if covariance matrix is singular without it)
        - burnsc : factor to scale up/down proposal if acceptance rate is too high/low
        - gamma  : factor to multiply proposed jump size with in the chain past the burn-in phase (Reduce this factor to get a higher acceptance rate. Defaults to 1.0)
        - spllo  : lower bounds for chain samples
        - splhi  : upper bounds for chain samples
        - rnseed : Optional seed for random number generator (needs to be integer >= 0) If not specified, then random number seed is not fixed and every chain will be different.
        - tmpchn : Optional; if present, will save chain state every 'ofreq' to ascii file. Filename is randomly generated if tmpchn is set to 'tmpchn', or set to the string passed through this option if not present, chain states are not saved during the MCMC progress

    cini : starting mcmc state

    likTpr : log-posterior function; it takes two input parameters as follows
        - first parameter is a 1D array containing the chain state at which the posterior will to be evaluated
        - the second parameter contains settings the user can pass to this function; see below info for 'lpinfo'
        - this function is expected to return log-Likelihood and log-Prior values (in this order)

    lpinfo : info to be passed to the log-posterior function

        this object can be of any type (e.g. None, scalar, list, array, dictionary, etc) 
        as long as it is consistent with settings expected inside the 'likTpr' function

    Returns
    -------
    mcmcRes: results dictionary

        - 'chain' : chain samples (nsteps x chain dimension)
        - 'cmap' :  MAP estimate 
        - 'pmap' :  MAP log posterior
        - 'accr' :  overall acceptance rate
        - 'accb' : fraction of samples inside bounds
        - 'rejAll' : overall no. of samples rejected
        - rejOut' : no. of samples rejected due to being outside bounds
        - 'minfo' : meta_info, acceptance probability, log likelihood, log prior
        - 'final_cov' : the covariance matrix at the end of the run
    """
    # -------------------------------------------------------------------------------
    # Parse options
    # -------------------------------------------------------------------------------
    nsteps = opts['nsteps']
    nburn  = opts['nburn' ]
    nadapt = opts['nadapt']
    nfinal = opts['nfinal']
    inicov = opts['inicov']
    coveps = opts['coveps']
    burnsc = opts['burnsc']
    spllo  = opts['spllo' ]
    splhi  = opts['splhi' ]
    cini   = opts['inistate']

    if 'gamma' not in opts:
        gamma = 1.0 # default
    else:
        gamma  = opts['gamma' ]

    if 'ofreq' not in opts:
        ofreq = 10000 # default
    else:
        ofreq  = opts['ofreq' ]

    if 'logfile' not in opts:
        log_file = 'logMCMC.txt'
    else:
        log_file = opts['logfile']

    f = open(log_file, "w")
    f.write("nsteps: %d\n"%(nsteps))
    f.write("gamma: %f\n"%(gamma))
    f.close()

    if 'tmpchn' not in opts:
        tmp_file = 'None'
    else:
        if opts['tmpchn'] == 'tmpchn':
            tmp_file = str(uuid.uuid4())+'.dat'
        else:
            tmp_file = opts['tmpchn']
        f = open(log_file, "a")
        f.write('Saving intermediate chains to %s\n'%(tmp_file))
        f.close()

    # If desired, fix random number seed to make chain reproducible
    if 'rnseed' in opts:
        iseed = opts['rnseed']
        if isinstance(iseed, (int,long)) and iseed >= 0:
            npy.random.seed(iseed)
            print('\nam-mcmc::Fixing the random number seed to ', iseed)
        else:
            print('\nWARNING: am-mcmc::invalid random number seed specified: ', iseed)
            print('Will proceed without fixing random number seed.\n')

    rej    = 0;                        # Counts number of samples rejected
    rejlim = 0;                        # Counts number of samples rejected as out of prior bounds
    rejsc  = 0;                        # Counts number of rejected samples since last rescaling
    # -------------------------------------------------------------------------------
    # Pre-processing
    # -------------------------------------------------------------------------------
    cdim   = cini.shape[0]             # chain dimensionality
    cov    = npy.zeros((cdim,cdim))    # covariance matrix
    spls   = npy.zeros((nsteps,cdim))  # MCMC samples
    meta_info = npy.zeros((nsteps,3))  # Column for acceptance probability and posterior prob. of current sample
    na     = 0                         # counter for accepted jumps
    sigcv  = 2.4*gamma/npy.sqrt(cdim)  # covariance factor
    spls[0] = cini                     # initial sample set
    p1LikPri = likTpr(spls[0],lpinfo)  # and posterior probability of initial sample set
    if not isinstance(p1LikPri, list):
        print('\nERROR: This version requires the model return both log-likelihood and log-prior')
        return {}
    p1 = p1LikPri[0]+p1LikPri[1]
    meta_info[0] = [0.e0,p1LikPri[0],p1LikPri[1]]     # Arbitrary initial acceptance and posterior probability of initial guess
    pmode = p1                         # store current chain MAP probability value
    cmode = spls[0]                    # current MAP parameter Set
    nref  = 0                          # Samples since last proposal rescaling
    # -------------------------------------------------------------------------------
    # optional progress bar
    # -------------------------------------------------------------------------------
    if progress == True:
        try:
            from tqdm import tqdm
            mcmc_iterator = tqdm(range(nsteps-1))
        except:
            print("*For a progress bar, please install tqdm.")
            mcmc_iterator = range(nsteps-1)
    # -------------------------------------------------------------------------------
    # Main loop
    # -------------------------------------------------------------------------------
    for k in mcmc_iterator:
        # Deal with covariance matrix
        if k == 0:
            splmean   = spls[0];
            propcov   = inicov ;
            Rchol     = scipy.linalg.cholesky(propcov) ;
            lastup    = 1;      # last covariance update
        else:
            if (nadapt>0) and ((k+1)%nadapt)==0:
                if k<nburn:
                    if float(rejsc)/nref>0.95:
                        Rchol = Rchol/burnsc # scale down proposal
                        print('   Scaling down the proposal at step %d'%(k))
                    elif float(rejsc)/nref<0.05:
                        Rchol = Rchol*burnsc # scale up proposal
                        print('   Scaling up the proposal at step %d'%(k))
                    nref  = 0 ;
                    rejsc = 0 ;
                else:
                    lastup,splmean,cov=_ucov(spls[lastup:lastup+nadapt,:],splmean,cov,lastup)
                    try:
                        Rchol = scipy.linalg.cholesky(cov)
                    except scipy.linalg.LinAlgError:
                        try:
                            # add to diagonal to make the matrix positive definite
                            Rchol = scipy.linalg.cholesky(cov+coveps*npy.identity(cdim))
                        except scipy.linalg.LinAlgError:
                            print('WARNING: Covariance matrix is singular even after the correction')
                    Rchol = Rchol*sigcv
        #-Done with covariance matrix
        nref = nref + 1
        #
        # generate proposal and check bounds
        #
        u  = spls[k]+npy.dot(npy.random.randn(1,cdim),Rchol)[0];
        if npy.any(npy.less(u,spllo)) or npy.any(npy.greater(u,splhi)):
            outofbound = True
            accept     = False
            p2 = -1.e100        # Arbitrarily low posterior likelihood
            pr = -1.e100        # Arbitrarily low acceptance probability
        else:
            outofbound = False
        if not outofbound:
            p2Lik,p2Pri = likTpr(u,lpinfo)
            p2 = p2Lik+p2Pri
            pr = npy.exp(p2-p1);
            if (pr>=1.0) or (npy.random.random_sample()<=pr):
                spls[k+1] = u.copy();                # Store accepted sample
                meta_info[k+1] = [pr,p2Lik,p2Pri]    # and its meta information
                p1 = p2;
                if p1 > pmode:
                    pmode = p1 ;
                    cmode = spls[k+1] ;
                accept = True
            else:
                accept = False
        #
        # See if we can do anything about a rejected proposal
        #
        if not accept:
            # if 'am' then reject
            spls[k+1]=spls[k];
            meta_info[k+1,0] = pr               # acceptance probability of failed sample
            meta_info[k+1,1:] = meta_info[k,1:] # Posterior probability of sample k that has been retained
            rej   = rej   + 1;
            rejsc = rejsc + 1;
            if outofbound:
                rejlim  = rejlim + 1;
            # Done with if over methods
        # Done with if over original accept
        if ((k+2)%ofreq==0 and tmp_file != 'None'):
            f = open(log_file, "a")
            acc_rate = float(k+1)/float(k+1+rej)*100
            f.write("No. steps: %d, No. of rej:%d, acc. rate:%d%%\n"%(k+1,rej,round(acc_rate)))
            f.close()
            fout = open(tmp_file, 'ab')
            dataout = npy.concatenate((spls[k-ofreq+1:k+1,:], meta_info[k-ofreq+1:k+1,1:]), axis=1)
            npy.savetxt(fout, dataout, fmt='%.8e',delimiter=' ', newline='\n')
            fout.close()
    # Done loop over all steps

    # return output dictionary: samples, MAP sample and its posterior probability, overall acceptance probability
    # and probability of having sample inside prior bounds, overall number of samples rejected, and rejected
    # due to being out of bounds.
    mcmcRes={}
    mcmcRes['chain' ] = spls                     # chain
    mcmcRes['cmap'  ] = cmode                    # MAP state
    mcmcRes['pmap'  ] = pmode                    # MAP log posterior
    mcmcRes['accr'  ] = 1.0-float(rej)/nsteps    # acceptance rate (overall)
    mcmcRes['accb'  ] = 1.0-float(rejlim)/nsteps # samples inside bounds
    mcmcRes['rejAll'] = rej                      # overall no. of samples rejected
    mcmcRes['rejOut'] = rejlim                   # no. of samples rejected due to being outside bounds
    mcmcRes['minfo' ] = meta_info                # acceptance probability, log likelihood, log prior
    mcmcRes['final_cov'] = cov                   # the covariance matrix at the end of the run.
    return mcmcRes



[docs]
def save_mcmc_chain(fout,mcmc_chain,filetype="h5"):
    """
    save mcmc output
    """
    if filetype=="h5":
        import h5py
        f = h5py.File(fout, "w")
        dset = f.create_dataset("chain",     data=mcmc_chain['chain'],     compression="gzip")
        mdIn = f.create_dataset("mode",      data=mcmc_chain['cmap'],      compression="gzip")
        mdPs = f.create_dataset("modepost",  data=[mcmc_chain['pmap']],    compression="gzip")
        minf = f.create_dataset("minfo",     data=mcmc_chain['minfo'],     compression="gzip")
        accr = f.create_dataset("accr",      data=[mcmc_chain['accr']],    compression="gzip")
        fcov = f.create_dataset("final_cov", data=mcmc_chain['final_cov'], compression="gzip")
        f.close()
    elif filetype=="pkl":
        output = open(fout, 'wb')
        pickle.dump(mcmc_chain, output)
        output.close()
    else:
        print("save_mcmc_chain(): Unknown filetype ",filetype," ->quit!")
        sys.exit()