The Markov Chain Monte Carlo Algorithm

pdf version

Chapter 8: MCMC continued

Section 8.1: Tree Proposals

Subsection 8.1.1: Updating node ages

First, let us implement a move that updates the node ages.

void MCMC::updateNodeAges( void )
{
    // store the current probabilities
    // we need these to compute the acceptance probabilities
    double current_prior = my_model.getTreePrior();
    double current_likelihood = my_model.getLikelihood();

    // we need to pick an internal node randomly
    Tree* my_tree = my_model.getTree();
    int num_nodes = my_tree->getNumberOfNodes();
    int num_tip_nodes = ((num_nodes + 1) / 2);
    int num_internal_nodes = num_tip_nodes - 2;

    const std::vector<TreeNode*> my_nodes = my_tree->getNodes();

    // pick a random node which is not the root and neithor the direct descendant of the root
    double u = rng.uniform01();
    int index = num_tip_nodes + int( std::floor(num_internal_nodes * u) );

    TreeNode* node = my_nodes[index];
    TreeNode* parent = node->getParent();

    // we need to work with the times
    double parent_age  = parent->getAge();
    double my_age      = node->getAge();
    double child_Age   = node->getLeftChild()->getAge();
    if ( child_Age < node->getRightChild()->getAge())
    {
        child_Age = node->getRightChild()->getAge();
    }

    // draw new ages and compute the hastings ratio at the same time
    double my_new_age = (parent_age-child_Age) * rng.uniform01() + child_Age;

    // set the age
    node->setAge( my_new_age );

    double new_prior = my_model.getTreePrior();
    double new_likelihood = my_model.getLikelihood();

    double acceptance_prob = new_prior - current_prior + new_likelihood - current_likelihood;

    u = log( rng.uniform01()) ;

    if ( isnan(acceptance_prob) || u > acceptance_prob )
    {
        // reject -> reset the value
        node->setAge( my_age );
    }
    else
    {
        // accept! nothing to do, because we changed our value already
    }


}

Subsection 8.1.2: Updating the root age

void MCMC::updateRootAge( void )
{
    // store the current probabilities
    // we need these to compute the acceptance probabilities
    double current_prior = my_model.getTreePrior() + my_model.getRootPrior();
    double current_likelihood = my_model.getLikelihood();

    // we need to pick an internal node randomly
    Tree* my_tree = my_model.getTree();
    TreeNode* root = my_tree->getRootNode();

    // we need to work with the times
    double my_age      = root->getAge();
    double child_Age   = root->getLeftChild()->getAge();
    if ( child_Age < root->getRightChild()->getAge())
    {
        child_Age = root->getRightChild()->getAge();
    }

    // define the window size
    double delta = 15.0;

    // draw new ages and compute the hastings ratio at the same time
    double my_new_age = delta * (0.5 - rng.uniform01()) + my_age;

    // only proceed of the proposed root age is older than both its children
    if ( my_new_age > child_Age )
    {

        // set the age
        root->setAge( my_new_age );

        double new_prior = my_model.getTreePrior() + my_model.getRootPrior();
        double new_likelihood = my_model.getLikelihood();

        double acceptance_prob = new_prior - current_prior + new_likelihood - current_likelihood;

        double u = log( rng.uniform01()) ;

        if ( isnan(acceptance_prob) || u > acceptance_prob )
        {
            // reject -> reset the value
            root->setAge( my_age );
        }
        else
        {
            // accept! nothing to do, because we changed our value already
        }

    }

}

Subsection 8.1.2: Nearest neighbor interchange for topology updates

void MCMC::updateTopologyNNI( void )
{
    // store the current probabilities
    // we need these to compute the acceptance probabilities
    double current_prior = my_model.getTreePrior();
    double current_likelihood = my_model.getLikelihood();

    // we need to pick an internal node randomly
    Tree* my_tree = my_model.getTree();
    int num_nodes = my_tree->getNumberOfNodes();
    int num_tip_nodes = ((num_nodes + 1) / 2);
    int num_internal_nodes = num_tip_nodes - 2;

    const std::vector<TreeNode*> my_nodes = my_tree->getNodes();

    // pick a random node which is not the root and neithor the direct descendant of the root
    double u = rng.uniform01();
    TreeNode* node;
    do {
        double u = rng.uniform01();
        size_t index = size_t( std::floor(num_nodes * u) );
        node = my_nodes[index];
    } while ( node->isRoot() || node->getParent()->isRoot() );

    TreeNode* parent = node->getParent();
    TreeNode* grandparent = parent->getParent();
    TreeNode* uncle = grandparent->getLeftChild();
    // check if we got the correct child
    if ( uncle == parent )
    {
        uncle = grandparent->getRightChild();
    }

    // we need to work with the times
    double parent_age   = parent->getAge();
    double uncles_age   = uncle->getAge();

    if ( uncles_age < parent_age )
    {

        // now exchange the two nodes
        if ( grandparent->getLeftChild() == uncle )
        {
            grandparent->setLeftChild( node );
        }
        else
        {
            grandparent->setRightChild( node );
        }

        if ( parent->getLeftChild() == node )
        {
            parent->setLeftChild( uncle );
        }
        else
        {
            parent->setRightChild( uncle );
        }

        node->setParent( grandparent );
        uncle->setParent( parent );


        double new_prior = my_model.getTreePrior();
        double new_likelihood = my_model.getLikelihood();

        double acceptance_prob = new_prior - current_prior + new_likelihood - current_likelihood;

        u = log( rng.uniform01()) ;

        if ( isnan(acceptance_prob) || u > acceptance_prob )
        {
            // reject -> reset the value
            if ( grandparent->getLeftChild() == node )
            {
                grandparent->setLeftChild( uncle );
            }
            else
            {
                grandparent->setRightChild( uncle );
            }

            if ( parent->getLeftChild() == uncle )
            {
                parent->setLeftChild( node );
            }
            else
            {
                parent->setRightChild( node );
            }

            node->setParent( parent );
            uncle->setParent( grandparent );
        }
        else
        {
            // accept! nothing to do, because we changed our value already
        }

    }

}

Section 8.2: Tree summaries

Subsection 8.2.1: Definition

#ifndef TreeSummary_H
#define TreeSummary_H

#include <string>
#include <vector>

class Tree;
class TreeNode;

/**
 * Newick tree reader.
 *
 * The newick tree reader provides convenience methods for reading trees in Newick format.
 *
 *
 *
 */
class TreeSummary {

public:
    TreeSummary(const std::string &fn);

    void                                summarize(double b);

private:
    std::string                         filename;

};


#endif

Subsection 8.2.1: Implementation

#include "NewickTreeReader.h"
#include "Tree.h"
#include "TreeSummary.h"

#include <stdio.h>
#include <iostream>
#include <map>

TreeSummary::TreeSummary(const std::string &fn) :
    filename(fn)
{

}

void TreeSummary::summarize( double b )
{
    NewickTreeReader reader = NewickTreeReader();
    std::vector<Tree> trees = reader.readTrees( filename );

    int burnin    = b * trees.size();
    int n_samples = trees.size() - burnin;

    std::map<std::string,int> topology_counts;
    for (int i=burnin; i<trees.size(); ++i)
    {
        std::string newick = trees[i].getNewickRepresentation( true );
        topology_counts[ newick ]++;
    }

    for (std::map<std::string,int>::const_iterator it=topology_counts.begin(); it!=topology_counts.end(); ++it)
    {
        std::cout << double(it->second) / n_samples << "\t\t" << it->first << std::endl;
    }


}

References