Source code for source_release.prime_mcmc

import sys
import numpy as npy
import scipy.stats
import scipy.linalg
import math
import uuid
import matplotlib.pyplot as plt

#---------------------------------------------------------------------------------------
#  Adaptive Metropolis Markov Chain Monte Carlo
#---------------------------------------------------------------------------------------
def _ucov(spl,splmean,cov,lastup):
    r"""
    Update proposal covariance
    """
    if len(spl.shape) == 1:
        nspl = 1;
        ndim = spl.shape[0]
    else:
        (nspl,ndim)=spl.shape

    if nspl>0:
        for i in range(nspl):
            iglb    = lastup+i
            splmean = (iglb*splmean+spl[i])/(iglb+1)
            rt = (iglb-1.0)/iglb
            st = (iglb+1.0)/iglb**2
            cov = rt*cov+st*npy.dot(npy.reshape(spl[i]-splmean,(ndim,1)),npy.reshape(spl[i]-splmean,(1,ndim)))

    return lastup + nspl, splmean, cov

[docs] def ammcmc(opts,likTpr,lpinfo,progress=True): r""" Adaptive Metropolis Markov Chain Monte Carlo Parameters ---------- opts : dictionary of parameters - nsteps : no. of mcmc steps - nburn : no. of mcmc steps for burn-in (proposal fixed to initial covariance) - nadapt : adapt every nadapt steps after nburn - nfinal : stop adapting after nfinal steps - inicov : initial covariance - coveps : small additive factor to ensure covariance matrix is positive definite (only added to diagonal if covariance matrix is singular without it) - burnsc : factor to scale up/down proposal if acceptance rate is too high/low - gamma : factor to multiply proposed jump size with in the chain past the burn-in phase (Reduce this factor to get a higher acceptance rate. Defaults to 1.0) - spllo : lower bounds for chain samples - splhi : upper bounds for chain samples - rnseed : Optional seed for random number generator (needs to be integer >= 0) If not specified, then random number seed is not fixed and every chain will be different. - tmpchn : Optional; if present, will save chain state every 'ofreq' to ascii file. Filename is randomly generated if tmpchn is set to 'tmpchn', or set to the string passed through this option if not present, chain states are not saved during the MCMC progress cini : starting mcmc state likTpr : log-posterior function; it takes two input parameters as follows - first parameter is a 1D array containing the chain state at which the posterior will to be evaluated - the second parameter contains settings the user can pass to this function; see below info for 'lpinfo' - this function is expected to return log-Likelihood and log-Prior values (in this order) lpinfo : info to be passed to the log-posterior function this object can be of any type (e.g. None, scalar, list, array, dictionary, etc) as long as it is consistent with settings expected inside the 'likTpr' function Returns ------- mcmcRes: results dictionary - 'chain' : chain samples (nsteps x chain dimension) - 'cmap' : MAP estimate - 'pmap' : MAP log posterior - 'accr' : overall acceptance rate - 'accb' : fraction of samples inside bounds - 'rejAll' : overall no. of samples rejected - rejOut' : no. of samples rejected due to being outside bounds - 'minfo' : meta_info, acceptance probability, log likelihood, log prior - 'final_cov' : the covariance matrix at the end of the run """ # ------------------------------------------------------------------------------- # Parse options # ------------------------------------------------------------------------------- nsteps = opts['nsteps'] nburn = opts['nburn' ] nadapt = opts['nadapt'] nfinal = opts['nfinal'] inicov = opts['inicov'] coveps = opts['coveps'] burnsc = opts['burnsc'] spllo = opts['spllo' ] splhi = opts['splhi' ] cini = opts['inistate'] if 'gamma' not in opts: gamma = 1.0 # default else: gamma = opts['gamma' ] if 'ofreq' not in opts: ofreq = 10000 # default else: ofreq = opts['ofreq' ] if 'logfile' not in opts: log_file = 'logMCMC.txt' else: log_file = opts['logfile'] f = open(log_file, "w") f.write("nsteps: %d\n"%(nsteps)) f.write("gamma: %f\n"%(gamma)) f.close() if 'tmpchn' not in opts: tmp_file = 'None' else: if opts['tmpchn'] == 'tmpchn': tmp_file = str(uuid.uuid4())+'.dat' else: tmp_file = opts['tmpchn'] f = open(log_file, "a") f.write('Saving intermediate chains to %s\n'%(tmp_file)) f.close() # If desired, fix random number seed to make chain reproducible if 'rnseed' in opts: iseed = opts['rnseed'] if isinstance(iseed, (int,long)) and iseed >= 0: npy.random.seed(iseed) print('\nam-mcmc::Fixing the random number seed to ', iseed) else: print('\nWARNING: am-mcmc::invalid random number seed specified: ', iseed) print('Will proceed without fixing random number seed.\n') rej = 0; # Counts number of samples rejected rejlim = 0; # Counts number of samples rejected as out of prior bounds rejsc = 0; # Counts number of rejected samples since last rescaling # ------------------------------------------------------------------------------- # Pre-processing # ------------------------------------------------------------------------------- cdim = cini.shape[0] # chain dimensionality cov = npy.zeros((cdim,cdim)) # covariance matrix spls = npy.zeros((nsteps,cdim)) # MCMC samples meta_info = npy.zeros((nsteps,3)) # Column for acceptance probability and posterior prob. of current sample na = 0 # counter for accepted jumps sigcv = 2.4*gamma/npy.sqrt(cdim) # covariance factor spls[0] = cini # initial sample set p1LikPri = likTpr(spls[0],lpinfo) # and posterior probability of initial sample set if not isinstance(p1LikPri, list): print('\nERROR: This version requires the model return both log-likelihood and log-prior') return {} p1 = p1LikPri[0]+p1LikPri[1] meta_info[0] = [0.e0,p1LikPri[0],p1LikPri[1]] # Arbitrary initial acceptance and posterior probability of initial guess pmode = p1 # store current chain MAP probability value cmode = spls[0] # current MAP parameter Set nref = 0 # Samples since last proposal rescaling # ------------------------------------------------------------------------------- # optional progress bar # ------------------------------------------------------------------------------- if progress == True: try: from tqdm import tqdm mcmc_iterator = tqdm(range(nsteps-1)) except: print("*For a progress bar, please install tqdm.") mcmc_iterator = range(nsteps-1) # ------------------------------------------------------------------------------- # Main loop # ------------------------------------------------------------------------------- for k in mcmc_iterator: # Deal with covariance matrix if k == 0: splmean = spls[0]; propcov = inicov ; Rchol = scipy.linalg.cholesky(propcov) ; lastup = 1; # last covariance update else: if (nadapt>0) and ((k+1)%nadapt)==0: if k<nburn: if float(rejsc)/nref>0.95: Rchol = Rchol/burnsc # scale down proposal print(' Scaling down the proposal at step %d'%(k)) elif float(rejsc)/nref<0.05: Rchol = Rchol*burnsc # scale up proposal print(' Scaling up the proposal at step %d'%(k)) nref = 0 ; rejsc = 0 ; else: lastup,splmean,cov=_ucov(spls[lastup:lastup+nadapt,:],splmean,cov,lastup) try: Rchol = scipy.linalg.cholesky(cov) except scipy.linalg.LinAlgError: try: # add to diagonal to make the matrix positive definite Rchol = scipy.linalg.cholesky(cov+coveps*npy.identity(cdim)) except scipy.linalg.LinAlgError: print('WARNING: Covariance matrix is singular even after the correction') Rchol = Rchol*sigcv #-Done with covariance matrix nref = nref + 1 # # generate proposal and check bounds # u = spls[k]+npy.dot(npy.random.randn(1,cdim),Rchol)[0]; if npy.any(npy.less(u,spllo)) or npy.any(npy.greater(u,splhi)): outofbound = True accept = False p2 = -1.e100 # Arbitrarily low posterior likelihood pr = -1.e100 # Arbitrarily low acceptance probability else: outofbound = False if not outofbound: p2Lik,p2Pri = likTpr(u,lpinfo) p2 = p2Lik+p2Pri pr = npy.exp(p2-p1); if (pr>=1.0) or (npy.random.random_sample()<=pr): spls[k+1] = u.copy(); # Store accepted sample meta_info[k+1] = [pr,p2Lik,p2Pri] # and its meta information p1 = p2; if p1 > pmode: pmode = p1 ; cmode = spls[k+1] ; accept = True else: accept = False # # See if we can do anything about a rejected proposal # if not accept: # if 'am' then reject spls[k+1]=spls[k]; meta_info[k+1,0] = pr # acceptance probability of failed sample meta_info[k+1,1:] = meta_info[k,1:] # Posterior probability of sample k that has been retained rej = rej + 1; rejsc = rejsc + 1; if outofbound: rejlim = rejlim + 1; # Done with if over methods # Done with if over original accept if ((k+2)%ofreq==0 and tmp_file != 'None'): f = open(log_file, "a") acc_rate = float(k+1)/float(k+1+rej)*100 f.write("No. steps: %d, No. of rej:%d, acc. rate:%d%%\n"%(k+1,rej,round(acc_rate))) f.close() fout = open(tmp_file, 'ab') dataout = npy.concatenate((spls[k-ofreq+1:k+1,:], meta_info[k-ofreq+1:k+1,1:]), axis=1) npy.savetxt(fout, dataout, fmt='%.8e',delimiter=' ', newline='\n') fout.close() # Done loop over all steps # return output dictionary: samples, MAP sample and its posterior probability, overall acceptance probability # and probability of having sample inside prior bounds, overall number of samples rejected, and rejected # due to being out of bounds. mcmcRes={} mcmcRes['chain' ] = spls # chain mcmcRes['cmap' ] = cmode # MAP state mcmcRes['pmap' ] = pmode # MAP log posterior mcmcRes['accr' ] = 1.0-float(rej)/nsteps # acceptance rate (overall) mcmcRes['accb' ] = 1.0-float(rejlim)/nsteps # samples inside bounds mcmcRes['rejAll'] = rej # overall no. of samples rejected mcmcRes['rejOut'] = rejlim # no. of samples rejected due to being outside bounds mcmcRes['minfo' ] = meta_info # acceptance probability, log likelihood, log prior mcmcRes['final_cov'] = cov # the covariance matrix at the end of the run. return mcmcRes
[docs] def save_mcmc_chain(fout,mcmc_chain,filetype="h5"): """ save mcmc output """ if filetype=="h5": import h5py f = h5py.File(fout, "w") dset = f.create_dataset("chain", data=mcmc_chain['chain'], compression="gzip") mdIn = f.create_dataset("mode", data=mcmc_chain['cmap'], compression="gzip") mdPs = f.create_dataset("modepost", data=[mcmc_chain['pmap']], compression="gzip") minf = f.create_dataset("minfo", data=mcmc_chain['minfo'], compression="gzip") accr = f.create_dataset("accr", data=[mcmc_chain['accr']], compression="gzip") fcov = f.create_dataset("final_cov", data=mcmc_chain['final_cov'], compression="gzip") f.close() elif filetype=="pkl": output = open(fout, 'wb') pickle.dump(mcmc_chain, output) output.close() else: print("save_mcmc_chain(): Unknown filetype ",filetype," ->quit!") sys.exit()