{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Apply a non-stationary nucleotide model to an alignment with a tree\n",
    "\n",
    "We analyse an alignment with sequences from 6 primates."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Chimpanzee',\n",
       " 'Galago',\n",
       " 'Gorilla',\n",
       " 'HowlerMon',\n",
       " 'Human',\n",
       " 'Orangutan',\n",
       " 'Rhesus']"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from cogent3.app import io\n",
    "\n",
    "reader = io.load_aligned(format=\"fasta\", moltype=\"dna\")\n",
    "aln = reader(\"../data/primate_brca1.fasta\")\n",
    "aln.names"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Specify the tree via a tree instance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "model(type='model', sm='GN', tree='root', name=None, sm_args=None, lf_args=None, time_het=None, param_rules=None, opt_args=None, split_codons=False, show_progress=False, verbose=False)"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from cogent3 import load_tree\n",
    "from cogent3.app import evo\n",
    "\n",
    "tree = load_tree(\"../data/primate_brca1.tree\")\n",
    "gn = evo.model(\"GN\", tree=tree)\n",
    "gn"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Specify the tree via a path."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "model(type='model', sm='GN', tree='../data/primate_brca1.tree', name=None, sm_args=None, lf_args=None, time_het=None, param_rules=None, opt_args=None, split_codons=False, show_progress=False, verbose=False)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gn = evo.model(\"GN\", tree=\"../data/primate_brca1.tree\")\n",
    "gn"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Apply the model to an alignment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<style>\n",
       "tr:last-child {border-bottom: 1px solid #000;} tr > th {text-align: center !important;} tr > td {text-align: left !important;}\n",
       "</style>\n",
       "<caption style=\"color: rgb(250, 250, 250); background: rgba(30, 140, 200, 1); align=top;\"><span style=\"font-weight: bold;\">GN</span><span></span></caption>\n",
       "<thead style=\"background: rgba(161, 195, 209, 0.75); font-weight: bold; text-align: center;\">\n",
       "<th>key</th>\n",
       "<th>lnL</th>\n",
       "<th>nfp</th>\n",
       "<th>DLC</th>\n",
       "<th>unique_Q</th>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr>\n",
       "<td></td>\n",
       "<td style=\"font-family: monospace !important;\">-6987.8834</td>\n",
       "<td style=\"font-family: monospace !important;\">25</td>\n",
       "<td>True</td>\n",
       "<td></td>\n",
       "</tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/plain": [
       "GN\n",
       "============================================\n",
       "key           lnL    nfp     DLC    unique_Q\n",
       "--------------------------------------------\n",
       "       -6987.8834     25    True            \n",
       "--------------------------------------------\n",
       "\n",
       "1 rows x 5 columns"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fitted = gn(aln)\n",
    "fitted"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In the above, no value is shown for `unique_Q`. This can happen because of numerical precision issues.\n",
    "\n",
    "**NOTE:** in the display of the `lf` below, the \"length\" parameter is not the ENS. It is, instead, just a scalar."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<h4>GN</h4>\n",
       "<p>log-likelihood = -6987.8834</p>\n",
       "<p>number of free parameters = 25</p>\n",
       "<table>\n",
       "<style>\n",
       "tr:last-child {border-bottom: 1px solid #000;} tr > th {text-align: center !important;} tr > td {text-align: left !important;}\n",
       "</style>\n",
       "<caption style=\"color: rgb(250, 250, 250); background: rgba(30, 140, 200, 1); align=top;\"><span style=\"font-weight: bold;\">Global params</span><span></span></caption>\n",
       "<thead style=\"background: rgba(161, 195, 209, 0.75); font-weight: bold; text-align: center;\">\n",
       "<th>A&gt;C</th>\n",
       "<th>A&gt;G</th>\n",
       "<th>A&gt;T</th>\n",
       "<th>C&gt;A</th>\n",
       "<th>C&gt;G</th>\n",
       "<th>C&gt;T</th>\n",
       "<th>G&gt;A</th>\n",
       "<th>G&gt;C</th>\n",
       "<th>G&gt;T</th>\n",
       "<th>T&gt;A</th>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr>\n",
       "<td style=\"font-family: monospace !important;\">0.8700</td>\n",
       "<td style=\"font-family: monospace !important;\">3.6669</td>\n",
       "<td style=\"font-family: monospace !important;\">0.9111</td>\n",
       "<td style=\"font-family: monospace !important;\">1.5925</td>\n",
       "<td style=\"font-family: monospace !important;\">2.1264</td>\n",
       "<td style=\"font-family: monospace !important;\">6.0323</td>\n",
       "<td style=\"font-family: monospace !important;\">8.2178</td>\n",
       "<td style=\"font-family: monospace !important;\">1.2288</td>\n",
       "<td style=\"font-family: monospace !important;\">0.6294</td>\n",
       "<td style=\"font-family: monospace !important;\">1.2498</td>\n",
       "</tr>\n",
       "</tbody>\n",
       "</table>\n",
       "<table>\n",
       "<thead style=\"background: rgba(161, 195, 209, 0.75); font-weight: bold; text-align: center;\">\n",
       "<th>T&gt;C</th>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr>\n",
       "<td style=\"font-family: monospace !important;\">3.4136</td>\n",
       "</tr>\n",
       "</tbody>\n",
       "</table>\n",
       "\n",
       "<table>\n",
       "<style>\n",
       "tr:last-child {border-bottom: 1px solid #000;} tr > th {text-align: center !important;} tr > td {text-align: left !important;}\n",
       "</style>\n",
       "<caption style=\"color: rgb(250, 250, 250); background: rgba(30, 140, 200, 1); align=top;\"><span style=\"font-weight: bold;\">Edge params</span><span></span></caption>\n",
       "<thead style=\"background: rgba(161, 195, 209, 0.75); font-weight: bold; text-align: center;\">\n",
       "<th>edge</th>\n",
       "<th>parent</th>\n",
       "<th>length</th>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr>\n",
       "<td style=\"background: rgba(161, 195, 209, 0.25); font-weight: 600;\">Galago</td>\n",
       "<td>root</td>\n",
       "<td style=\"font-family: monospace !important;\">0.1735</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"background: rgba(161, 195, 209, 0.25); font-weight: 600;\">HowlerMon</td>\n",
       "<td>root</td>\n",
       "<td style=\"font-family: monospace !important;\">0.0450</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"background: rgba(161, 195, 209, 0.25); font-weight: 600;\">Rhesus</td>\n",
       "<td>edge.3</td>\n",
       "<td style=\"font-family: monospace !important;\">0.0215</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"background: rgba(161, 195, 209, 0.25); font-weight: 600;\">Orangutan</td>\n",
       "<td>edge.2</td>\n",
       "<td style=\"font-family: monospace !important;\">0.0078</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"background: rgba(161, 195, 209, 0.25); font-weight: 600;\">Gorilla</td>\n",
       "<td>edge.1</td>\n",
       "<td style=\"font-family: monospace !important;\">0.0025</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"background: rgba(161, 195, 209, 0.25); font-weight: 600;\">Human</td>\n",
       "<td>edge.0</td>\n",
       "<td style=\"font-family: monospace !important;\">0.0061</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"background: rgba(161, 195, 209, 0.25); font-weight: 600;\">Chimpanzee</td>\n",
       "<td>edge.0</td>\n",
       "<td style=\"font-family: monospace !important;\">0.0028</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"background: rgba(161, 195, 209, 0.25); font-weight: 600;\">edge.0</td>\n",
       "<td>edge.1</td>\n",
       "<td style=\"font-family: monospace !important;\">0.0000</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"background: rgba(161, 195, 209, 0.25); font-weight: 600;\">edge.1</td>\n",
       "<td>edge.2</td>\n",
       "<td style=\"font-family: monospace !important;\">0.0033</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"background: rgba(161, 195, 209, 0.25); font-weight: 600;\">edge.2</td>\n",
       "<td>edge.3</td>\n",
       "<td style=\"font-family: monospace !important;\">0.0121</td>\n",
       "</tr>\n",
       "<tr>\n",
       "<td style=\"background: rgba(161, 195, 209, 0.25); font-weight: 600;\">edge.3</td>\n",
       "<td>root</td>\n",
       "<td style=\"font-family: monospace !important;\">0.0077</td>\n",
       "</tr>\n",
       "</tbody>\n",
       "</table>\n",
       "\n",
       "<table>\n",
       "<style>\n",
       "tr:last-child {border-bottom: 1px solid #000;} tr > th {text-align: center !important;} tr > td {text-align: left !important;}\n",
       "</style>\n",
       "<caption style=\"color: rgb(250, 250, 250); background: rgba(30, 140, 200, 1); align=top;\"><span style=\"font-weight: bold;\">Motif params</span><span></span></caption>\n",
       "<thead style=\"background: rgba(161, 195, 209, 0.75); font-weight: bold; text-align: center;\">\n",
       "<th>A</th>\n",
       "<th>C</th>\n",
       "<th>G</th>\n",
       "<th>T</th>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr>\n",
       "<td style=\"font-family: monospace !important;\">0.3756</td>\n",
       "<td style=\"font-family: monospace !important;\">0.1768</td>\n",
       "<td style=\"font-family: monospace !important;\">0.2078</td>\n",
       "<td style=\"font-family: monospace !important;\">0.2398</td>\n",
       "</tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/plain": [
       "GN\n",
       "log-likelihood = -6987.8834\n",
       "number of free parameters = 25\n",
       "============================================================================\n",
       "   A>C       A>G       A>T       C>A       C>G       C>T       G>A       G>C\n",
       "----------------------------------------------------------------------------\n",
       "0.8700    3.6669    0.9111    1.5925    2.1264    6.0323    8.2178    1.2288\n",
       "----------------------------------------------------------------------------\n",
       "\n",
       "continued: \n",
       "==========================\n",
       "   G>T       T>A       T>C\n",
       "--------------------------\n",
       "0.6294    1.2498    3.4136\n",
       "--------------------------\n",
       "\n",
       "==============================\n",
       "      edge    parent    length\n",
       "------------------------------\n",
       "    Galago      root    0.1735\n",
       " HowlerMon      root    0.0450\n",
       "    Rhesus    edge.3    0.0215\n",
       " Orangutan    edge.2    0.0078\n",
       "   Gorilla    edge.1    0.0025\n",
       "     Human    edge.0    0.0061\n",
       "Chimpanzee    edge.0    0.0028\n",
       "    edge.0    edge.1    0.0000\n",
       "    edge.1    edge.2    0.0033\n",
       "    edge.2    edge.3    0.0121\n",
       "    edge.3      root    0.0077\n",
       "------------------------------\n",
       "====================================\n",
       "     A         C         G         T\n",
       "------------------------------------\n",
       "0.3756    0.1768    0.2078    0.2398\n",
       "------------------------------------"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fitted.lf"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.1"
  },
  "widgets": {
   "application/vnd.jupyter.widget-state+json": {
    "state": {},
    "version_major": 2,
    "version_minor": 0
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}