{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# `natsel_timehet` -- a test of branch heterogeneity\n",
"\n",
"We employ codon models to test whether the mode of natural selection affecting human and chimpanzee lineages is distinctive. This is done by specifying the edges of interest ([Yang 1998](https://www.ncbi.nlm.nih.gov/pubmed/9580986)). (Note I'm setting `optimise_motif_probs=False` to speed up execution of the examples, not because it's a good idea!)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"Statistics\n",
"\n",
"LR | \n",
"df | \n",
"pvalue | \n",
"\n",
"\n",
"\n",
"4.9248 | \n",
"1 | \n",
"0.0265 | \n",
"
\n",
"\n",
"
\n",
"\n",
"\n",
"\n",
"\n",
"hypothesis | \n",
"key | \n",
"lnL | \n",
"nfp | \n",
"DLC | \n",
"unique_Q | \n",
"\n",
"\n",
"\n",
"null | \n",
"'GNC-null' | \n",
"-6713.2733 | \n",
"23 | \n",
"True | \n",
" | \n",
"
\n",
"\n",
"alt | \n",
"'GNC-alt' | \n",
"-6710.8109 | \n",
"24 | \n",
"True | \n",
" | \n",
"
\n",
"\n",
"
\n"
],
"text/plain": [
"Statistics\n",
"======================\n",
" LR df pvalue\n",
"----------------------\n",
"4.9248 1 0.0265\n",
"----------------------\n",
"=================================================================\n",
"hypothesis key lnL nfp DLC unique_Q\n",
"-----------------------------------------------------------------\n",
" null 'GNC-null' -6713.2733 23 True \n",
" alt 'GNC-alt' -6710.8109 24 True \n",
"-----------------------------------------------------------------"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from cogent3.app import io, evo\n",
"\n",
"loader = io.load_aligned(format=\"fasta\", moltype=\"dna\")\n",
"aln = loader(\"../data/primate_brca1.fasta\")\n",
"\n",
"hc_differ = evo.natsel_timehet(\"GNC\",\n",
" tree=\"../data/primate_brca1.tree\",\n",
" optimise_motif_probs=False,\n",
" tip1=\"Human\", tip2=\"Chimpanzee\")\n",
"result = hc_differ(aln)\n",
"result"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"GNC-alt
\n",
"log-likelihood = -6710.8109
\n",
"number of free parameters = 24
\n",
"\n",
"\n",
"Global params\n",
"\n",
"A>C | \n",
"A>G | \n",
"A>T | \n",
"C>A | \n",
"C>G | \n",
"C>T | \n",
"G>A | \n",
"G>C | \n",
"G>T | \n",
"T>A | \n",
"\n",
"\n",
"\n",
"0.8620 | \n",
"3.5361 | \n",
"0.9790 | \n",
"1.6698 | \n",
"2.2059 | \n",
"6.2630 | \n",
"7.9209 | \n",
"1.2265 | \n",
"0.8024 | \n",
"1.2882 | \n",
"
\n",
"\n",
"
\n",
"\n",
"\n",
"T>C | \n",
"\n",
"\n",
"\n",
"3.0675 | \n",
"
\n",
"\n",
"
\n",
"\n",
"\n",
"\n",
"Edge params\n",
"\n",
"edge | \n",
"parent | \n",
"length | \n",
"omega | \n",
"\n",
"\n",
"\n",
"Galago | \n",
"root | \n",
"0.5237 | \n",
"0.7906 | \n",
"
\n",
"\n",
"HowlerMon | \n",
"root | \n",
"0.1339 | \n",
"0.7906 | \n",
"
\n",
"\n",
"Rhesus | \n",
"edge.3 | \n",
"0.0640 | \n",
"0.7906 | \n",
"
\n",
"\n",
"Orangutan | \n",
"edge.2 | \n",
"0.0233 | \n",
"0.7906 | \n",
"
\n",
"\n",
"Gorilla | \n",
"edge.1 | \n",
"0.0075 | \n",
"0.7906 | \n",
"
\n",
"\n",
"Human | \n",
"edge.0 | \n",
"0.0182 | \n",
"2.6351 | \n",
"
\n",
"\n",
"Chimpanzee | \n",
"edge.0 | \n",
"0.0085 | \n",
"2.6351 | \n",
"
\n",
"\n",
"edge.0 | \n",
"edge.1 | \n",
"0.0000 | \n",
"0.7906 | \n",
"
\n",
"\n",
"edge.1 | \n",
"edge.2 | \n",
"0.0100 | \n",
"0.7906 | \n",
"
\n",
"\n",
"edge.2 | \n",
"edge.3 | \n",
"0.0366 | \n",
"0.7906 | \n",
"
\n",
"\n",
"edge.3 | \n",
"root | \n",
"0.0238 | \n",
"0.7906 | \n",
"
\n",
"\n",
"
\n",
"\n",
"\n",
"\n",
"Motif params\n",
"\n",
"AAA | \n",
"AAC | \n",
"AAG | \n",
"AAT | \n",
"ACA | \n",
"ACC | \n",
"ACG | \n",
"ACT | \n",
"AGA | \n",
"AGC | \n",
"\n",
"\n",
"\n",
"0.0556 | \n",
"0.0235 | \n",
"0.0344 | \n",
"0.0556 | \n",
"0.0228 | \n",
"0.0046 | \n",
"0.0008 | \n",
"0.0289 | \n",
"0.0231 | \n",
"0.0286 | \n",
"
\n",
"\n",
"
\n",
"\n",
"\n",
"AGG | \n",
"AGT | \n",
"ATA | \n",
"ATC | \n",
"ATG | \n",
"ATT | \n",
"CAA | \n",
"CAC | \n",
"CAG | \n",
"CAT | \n",
"\n",
"\n",
"\n",
"0.0140 | \n",
"0.0381 | \n",
"0.0186 | \n",
"0.0070 | \n",
"0.0128 | \n",
"0.0192 | \n",
"0.0196 | \n",
"0.0052 | \n",
"0.0238 | \n",
"0.0221 | \n",
"
\n",
"\n",
"
\n",
"\n",
"\n",
"CCA | \n",
"CCC | \n",
"CCG | \n",
"CCT | \n",
"CGA | \n",
"CGC | \n",
"CGG | \n",
"CGT | \n",
"CTA | \n",
"CTC | \n",
"\n",
"\n",
"\n",
"0.0195 | \n",
"0.0062 | \n",
"0.0006 | \n",
"0.0263 | \n",
"0.0011 | \n",
"0.0009 | \n",
"0.0023 | \n",
"0.0032 | \n",
"0.0137 | \n",
"0.0078 | \n",
"
\n",
"\n",
"
\n",
"\n",
"\n",
"CTG | \n",
"CTT | \n",
"GAA | \n",
"GAC | \n",
"GAG | \n",
"GAT | \n",
"GCA | \n",
"GCC | \n",
"GCG | \n",
"GCT | \n",
"\n",
"\n",
"\n",
"0.0125 | \n",
"0.0105 | \n",
"0.0755 | \n",
"0.0105 | \n",
"0.0303 | \n",
"0.0315 | \n",
"0.0158 | \n",
"0.0096 | \n",
"0.0014 | \n",
"0.0137 | \n",
"
\n",
"\n",
"
\n",
"\n",
"\n",
"GGA | \n",
"GGC | \n",
"GGG | \n",
"GGT | \n",
"GTA | \n",
"GTC | \n",
"GTG | \n",
"GTT | \n",
"TAC | \n",
"TAT | \n",
"\n",
"\n",
"\n",
"0.0161 | \n",
"0.0090 | \n",
"0.0067 | \n",
"0.0133 | \n",
"0.0148 | \n",
"0.0070 | \n",
"0.0069 | \n",
"0.0213 | \n",
"0.0023 | \n",
"0.0101 | \n",
"
\n",
"\n",
"
\n",
"\n",
"\n",
"TCA | \n",
"TCC | \n",
"TCG | \n",
"TCT | \n",
"TGC | \n",
"TGG | \n",
"TGT | \n",
"TTA | \n",
"TTC | \n",
"TTG | \n",
"\n",
"\n",
"\n",
"0.0221 | \n",
"0.0082 | \n",
"0.0015 | \n",
"0.0251 | \n",
"0.0018 | \n",
"0.0040 | \n",
"0.0201 | \n",
"0.0212 | \n",
"0.0078 | \n",
"0.0108 | \n",
"
\n",
"\n",
"
\n",
"\n",
"\n",
"TTT | \n",
"\n",
"\n",
"\n",
"0.0187 | \n",
"
\n",
"\n",
"
\n"
],
"text/plain": [
"GNC-alt\n",
"log-likelihood = -6710.8109\n",
"number of free parameters = 24\n",
"============================================================================\n",
" A>C A>G A>T C>A C>G C>T G>A G>C\n",
"----------------------------------------------------------------------------\n",
"0.8620 3.5361 0.9790 1.6698 2.2059 6.2630 7.9209 1.2265\n",
"----------------------------------------------------------------------------\n",
"\n",
"continued: \n",
"==========================\n",
" G>T T>A T>C\n",
"--------------------------\n",
"0.8024 1.2882 3.0675\n",
"--------------------------\n",
"\n",
"========================================\n",
" edge parent length omega\n",
"----------------------------------------\n",
" Galago root 0.5237 0.7906\n",
" HowlerMon root 0.1339 0.7906\n",
" Rhesus edge.3 0.0640 0.7906\n",
" Orangutan edge.2 0.0233 0.7906\n",
" Gorilla edge.1 0.0075 0.7906\n",
" Human edge.0 0.0182 2.6351\n",
"Chimpanzee edge.0 0.0085 2.6351\n",
" edge.0 edge.1 0.0000 0.7906\n",
" edge.1 edge.2 0.0100 0.7906\n",
" edge.2 edge.3 0.0366 0.7906\n",
" edge.3 root 0.0238 0.7906\n",
"----------------------------------------\n",
"============================================================================\n",
" AAA AAC AAG AAT ACA ACC ACG ACT\n",
"----------------------------------------------------------------------------\n",
"0.0556 0.0235 0.0344 0.0556 0.0228 0.0046 0.0008 0.0289\n",
"----------------------------------------------------------------------------\n",
"\n",
"continued: \n",
"============================================================================\n",
" AGA AGC AGG AGT ATA ATC ATG ATT\n",
"----------------------------------------------------------------------------\n",
"0.0231 0.0286 0.0140 0.0381 0.0186 0.0070 0.0128 0.0192\n",
"----------------------------------------------------------------------------\n",
"\n",
"continued: \n",
"============================================================================\n",
" CAA CAC CAG CAT CCA CCC CCG CCT\n",
"----------------------------------------------------------------------------\n",
"0.0196 0.0052 0.0238 0.0221 0.0195 0.0062 0.0006 0.0263\n",
"----------------------------------------------------------------------------\n",
"\n",
"continued: \n",
"============================================================================\n",
" CGA CGC CGG CGT CTA CTC CTG CTT\n",
"----------------------------------------------------------------------------\n",
"0.0011 0.0009 0.0023 0.0032 0.0137 0.0078 0.0125 0.0105\n",
"----------------------------------------------------------------------------\n",
"\n",
"continued: \n",
"============================================================================\n",
" GAA GAC GAG GAT GCA GCC GCG GCT\n",
"----------------------------------------------------------------------------\n",
"0.0755 0.0105 0.0303 0.0315 0.0158 0.0096 0.0014 0.0137\n",
"----------------------------------------------------------------------------\n",
"\n",
"continued: \n",
"============================================================================\n",
" GGA GGC GGG GGT GTA GTC GTG GTT\n",
"----------------------------------------------------------------------------\n",
"0.0161 0.0090 0.0067 0.0133 0.0148 0.0070 0.0069 0.0213\n",
"----------------------------------------------------------------------------\n",
"\n",
"continued: \n",
"============================================================================\n",
" TAC TAT TCA TCC TCG TCT TGC TGG\n",
"----------------------------------------------------------------------------\n",
"0.0023 0.0101 0.0221 0.0082 0.0015 0.0251 0.0018 0.0040\n",
"----------------------------------------------------------------------------\n",
"\n",
"continued: \n",
"==============================================\n",
" TGT TTA TTC TTG TTT\n",
"----------------------------------------------\n",
"0.0201 0.0212 0.0078 0.0108 0.0187\n",
"----------------------------------------------"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result.alt.lf"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:c3dev] *",
"language": "python",
"name": "conda-env-c3dev-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.1"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"state": {},
"version_major": 2,
"version_minor": 0
}
}
},
"nbformat": 4,
"nbformat_minor": 4
}