{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Apply a non-stationary nucleotide model to an alignment with a tree\n", "\n", "We analyse an alignment with sequences from 6 primates." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Chimpanzee',\n", " 'Galago',\n", " 'Gorilla',\n", " 'HowlerMon',\n", " 'Human',\n", " 'Orangutan',\n", " 'Rhesus']" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from cogent3.app import io\n", "\n", "reader = io.load_aligned(format=\"fasta\", moltype=\"dna\")\n", "aln = reader(\"../data/primate_brca1.fasta\")\n", "aln.names" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Specify the tree via a tree instance" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "model(type='model', sm='GN', tree='root', name=None, sm_args=None, lf_args=None, time_het=None, param_rules=None, opt_args=None, split_codons=False, show_progress=False, verbose=False)" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from cogent3 import load_tree\n", "from cogent3.app import evo\n", "\n", "tree = load_tree(\"../data/primate_brca1.tree\")\n", "gn = evo.model(\"GN\", tree=tree)\n", "gn" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Specify the tree via a path." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "model(type='model', sm='GN', tree='../data/primate_brca1.tree', name=None, sm_args=None, lf_args=None, time_het=None, param_rules=None, opt_args=None, split_codons=False, show_progress=False, verbose=False)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gn = evo.model(\"GN\", tree=\"../data/primate_brca1.tree\")\n", "gn" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Apply the model to an alignment" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
GN
keylnLnfpDLCunique_Q
-6987.883425True
\n" ], "text/plain": [ "GN\n", "============================================\n", "key lnL nfp DLC unique_Q\n", "--------------------------------------------\n", " -6987.8834 25 True \n", "--------------------------------------------\n", "\n", "1 rows x 5 columns" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fitted = gn(aln)\n", "fitted" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "In the above, no value is shown for `unique_Q`. This can happen because of numerical precision issues.\n", "\n", "**NOTE:** in the display of the `lf` below, the \"length\" parameter is not the ENS. It is, instead, just a scalar." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "

GN

\n", "

log-likelihood = -6987.8834

\n", "

number of free parameters = 25

\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
Global params
A>CA>GA>TC>AC>GC>TG>AG>CG>TT>A
0.87003.66690.91111.59252.12646.03238.21781.22880.62941.2498
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
T>C
3.4136
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
Edge params
edgeparentlength
Galagoroot0.1735
HowlerMonroot0.0450
Rhesusedge.30.0215
Orangutanedge.20.0078
Gorillaedge.10.0025
Humanedge.00.0061
Chimpanzeeedge.00.0028
edge.0edge.10.0000
edge.1edge.20.0033
edge.2edge.30.0121
edge.3root0.0077
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
Motif params
ACGT
0.37560.17680.20780.2398
\n" ], "text/plain": [ "GN\n", "log-likelihood = -6987.8834\n", "number of free parameters = 25\n", "============================================================================\n", " A>C A>G A>T C>A C>G C>T G>A G>C\n", "----------------------------------------------------------------------------\n", "0.8700 3.6669 0.9111 1.5925 2.1264 6.0323 8.2178 1.2288\n", "----------------------------------------------------------------------------\n", "\n", "continued: \n", "==========================\n", " G>T T>A T>C\n", "--------------------------\n", "0.6294 1.2498 3.4136\n", "--------------------------\n", "\n", "==============================\n", " edge parent length\n", "------------------------------\n", " Galago root 0.1735\n", " HowlerMon root 0.0450\n", " Rhesus edge.3 0.0215\n", " Orangutan edge.2 0.0078\n", " Gorilla edge.1 0.0025\n", " Human edge.0 0.0061\n", "Chimpanzee edge.0 0.0028\n", " edge.0 edge.1 0.0000\n", " edge.1 edge.2 0.0033\n", " edge.2 edge.3 0.0121\n", " edge.3 root 0.0077\n", "------------------------------\n", "====================================\n", " A C G T\n", "------------------------------------\n", "0.3756 0.1768 0.2078 0.2398\n", "------------------------------------" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fitted.lf" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.1" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "state": {}, "version_major": 2, "version_minor": 0 } } }, "nbformat": 4, "nbformat_minor": 4 }