{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# `natsel_sitehet` -- a test of site heterogeneity\n",
"\n",
"This app evaluates evidence for whether sites differ in their mode of natural selection ([Nielsen and Yang 1998](https://www.ncbi.nlm.nih.gov/pubmed/9539414))."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"Statistics\n",
"\n",
"LR | \n",
"df | \n",
"pvalue | \n",
"\n",
"\n",
"\n",
"1.4048 | \n",
"2 | \n",
"0.4954 | \n",
"
\n",
"\n",
"
\n",
"\n",
"\n",
"\n",
"\n",
"hypothesis | \n",
"key | \n",
"lnL | \n",
"nfp | \n",
"DLC | \n",
"unique_Q | \n",
"\n",
"\n",
"\n",
"null | \n",
"'GNC-null' | \n",
"-6708.3119 | \n",
"24 | \n",
"True | \n",
" | \n",
"
\n",
"\n",
"alt | \n",
"'GNC-alt' | \n",
"-6707.6095 | \n",
"26 | \n",
"True | \n",
" | \n",
"
\n",
"\n",
"
\n"
],
"text/plain": [
"Statistics\n",
"======================\n",
" LR df pvalue\n",
"----------------------\n",
"1.4048 2 0.4954\n",
"----------------------\n",
"=================================================================\n",
"hypothesis key lnL nfp DLC unique_Q\n",
"-----------------------------------------------------------------\n",
" null 'GNC-null' -6708.3119 24 True \n",
" alt 'GNC-alt' -6707.6095 26 True \n",
"-----------------------------------------------------------------"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from cogent3.app import io, evo\n",
"\n",
"loader = io.load_aligned(format=\"fasta\", moltype=\"dna\")\n",
"aln = loader(\"../data/primate_brca1.fasta\")\n",
"\n",
"sites_differ = evo.natsel_sitehet(\"GNC\",\n",
" tree=\"../data/primate_brca1.tree\",\n",
" optimise_motif_probs=False)\n",
"\n",
"result = sites_differ(aln)\n",
"result"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The models have been constructed such that site-class bins have names indicating the mode of natural selection: -ve is purifying (omega<1); neutral (omega=1); and +ve is positive natural selection (omega>1). The two parameters of interest relating to these are the `bprobs` (the maximum likelihood estimate of the frequency of the site-class) and the corresponding value of omega."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"GNC-alt
\n",
"log-likelihood = -6707.6095
\n",
"number of free parameters = 26
\n",
"\n",
"\n",
"Global params\n",
"\n",
"A>C | \n",
"A>G | \n",
"A>T | \n",
"C>A | \n",
"C>G | \n",
"C>T | \n",
"G>A | \n",
"G>C | \n",
"G>T | \n",
"T>A | \n",
"\n",
"\n",
"\n",
"0.8530 | \n",
"3.5646 | \n",
"0.9734 | \n",
"1.6404 | \n",
"2.1801 | \n",
"6.3218 | \n",
"8.0814 | \n",
"1.2347 | \n",
"0.7829 | \n",
"1.2798 | \n",
"
\n",
"\n",
"
\n",
"\n",
"\n",
"T>C | \n",
"\n",
"\n",
"\n",
"3.0292 | \n",
"
\n",
"\n",
"
\n",
"\n",
"\n",
"\n",
"Bin params\n",
"\n",
"bin | \n",
"bprobs | \n",
"omega | \n",
"\n",
"\n",
"\n",
"-ve | \n",
"0.1043 | \n",
"0.0000 | \n",
"
\n",
"\n",
"neutral | \n",
"0.8052 | \n",
"1.0000 | \n",
"
\n",
"\n",
"+ve | \n",
"0.0905 | \n",
"20.0000 | \n",
"
\n",
"\n",
"
\n",
"\n",
"\n",
"\n",
"Edge params\n",
"\n",
"edge | \n",
"parent | \n",
"length | \n",
"\n",
"\n",
"\n",
"Galago | \n",
"root | \n",
"0.5463 | \n",
"
\n",
"\n",
"HowlerMon | \n",
"root | \n",
"0.1364 | \n",
"
\n",
"\n",
"Rhesus | \n",
"edge.3 | \n",
"0.0649 | \n",
"
\n",
"\n",
"Orangutan | \n",
"edge.2 | \n",
"0.0235 | \n",
"
\n",
"\n",
"Gorilla | \n",
"edge.1 | \n",
"0.0075 | \n",
"
\n",
"\n",
"Human | \n",
"edge.0 | \n",
"0.0182 | \n",
"
\n",
"\n",
"Chimpanzee | \n",
"edge.0 | \n",
"0.0085 | \n",
"
\n",
"\n",
"edge.0 | \n",
"edge.1 | \n",
"0.0000 | \n",
"
\n",
"\n",
"edge.1 | \n",
"edge.2 | \n",
"0.0099 | \n",
"
\n",
"\n",
"edge.2 | \n",
"edge.3 | \n",
"0.0364 | \n",
"
\n",
"\n",
"edge.3 | \n",
"root | \n",
"0.0233 | \n",
"
\n",
"\n",
"
\n",
"\n",
"\n",
"\n",
"Motif params\n",
"\n",
"AAA | \n",
"AAC | \n",
"AAG | \n",
"AAT | \n",
"ACA | \n",
"ACC | \n",
"ACG | \n",
"ACT | \n",
"AGA | \n",
"AGC | \n",
"\n",
"\n",
"\n",
"0.0556 | \n",
"0.0235 | \n",
"0.0344 | \n",
"0.0556 | \n",
"0.0228 | \n",
"0.0046 | \n",
"0.0008 | \n",
"0.0289 | \n",
"0.0231 | \n",
"0.0286 | \n",
"
\n",
"\n",
"
\n",
"\n",
"\n",
"AGG | \n",
"AGT | \n",
"ATA | \n",
"ATC | \n",
"ATG | \n",
"ATT | \n",
"CAA | \n",
"CAC | \n",
"CAG | \n",
"CAT | \n",
"\n",
"\n",
"\n",
"0.0140 | \n",
"0.0381 | \n",
"0.0186 | \n",
"0.0070 | \n",
"0.0128 | \n",
"0.0192 | \n",
"0.0196 | \n",
"0.0052 | \n",
"0.0238 | \n",
"0.0221 | \n",
"
\n",
"\n",
"
\n",
"\n",
"\n",
"CCA | \n",
"CCC | \n",
"CCG | \n",
"CCT | \n",
"CGA | \n",
"CGC | \n",
"CGG | \n",
"CGT | \n",
"CTA | \n",
"CTC | \n",
"\n",
"\n",
"\n",
"0.0195 | \n",
"0.0062 | \n",
"0.0006 | \n",
"0.0263 | \n",
"0.0011 | \n",
"0.0009 | \n",
"0.0023 | \n",
"0.0032 | \n",
"0.0137 | \n",
"0.0078 | \n",
"
\n",
"\n",
"
\n",
"\n",
"\n",
"CTG | \n",
"CTT | \n",
"GAA | \n",
"GAC | \n",
"GAG | \n",
"GAT | \n",
"GCA | \n",
"GCC | \n",
"GCG | \n",
"GCT | \n",
"\n",
"\n",
"\n",
"0.0125 | \n",
"0.0105 | \n",
"0.0755 | \n",
"0.0105 | \n",
"0.0303 | \n",
"0.0315 | \n",
"0.0158 | \n",
"0.0096 | \n",
"0.0014 | \n",
"0.0137 | \n",
"
\n",
"\n",
"
\n",
"\n",
"\n",
"GGA | \n",
"GGC | \n",
"GGG | \n",
"GGT | \n",
"GTA | \n",
"GTC | \n",
"GTG | \n",
"GTT | \n",
"TAC | \n",
"TAT | \n",
"\n",
"\n",
"\n",
"0.0161 | \n",
"0.0090 | \n",
"0.0067 | \n",
"0.0133 | \n",
"0.0148 | \n",
"0.0070 | \n",
"0.0069 | \n",
"0.0213 | \n",
"0.0023 | \n",
"0.0101 | \n",
"
\n",
"\n",
"
\n",
"\n",
"\n",
"TCA | \n",
"TCC | \n",
"TCG | \n",
"TCT | \n",
"TGC | \n",
"TGG | \n",
"TGT | \n",
"TTA | \n",
"TTC | \n",
"TTG | \n",
"\n",
"\n",
"\n",
"0.0221 | \n",
"0.0082 | \n",
"0.0015 | \n",
"0.0251 | \n",
"0.0018 | \n",
"0.0040 | \n",
"0.0201 | \n",
"0.0212 | \n",
"0.0078 | \n",
"0.0108 | \n",
"
\n",
"\n",
"
\n",
"\n",
"\n",
"TTT | \n",
"\n",
"\n",
"\n",
"0.0187 | \n",
"
\n",
"\n",
"
\n"
],
"text/plain": [
"GNC-alt\n",
"log-likelihood = -6707.6095\n",
"number of free parameters = 26\n",
"============================================================================\n",
" A>C A>G A>T C>A C>G C>T G>A G>C\n",
"----------------------------------------------------------------------------\n",
"0.8530 3.5646 0.9734 1.6404 2.1801 6.3218 8.0814 1.2347\n",
"----------------------------------------------------------------------------\n",
"\n",
"continued: \n",
"==========================\n",
" G>T T>A T>C\n",
"--------------------------\n",
"0.7829 1.2798 3.0292\n",
"--------------------------\n",
"\n",
"============================\n",
" bin bprobs omega\n",
"----------------------------\n",
" -ve 0.1043 0.0000\n",
"neutral 0.8052 1.0000\n",
" +ve 0.0905 20.0000\n",
"----------------------------\n",
"==============================\n",
" edge parent length\n",
"------------------------------\n",
" Galago root 0.5463\n",
" HowlerMon root 0.1364\n",
" Rhesus edge.3 0.0649\n",
" Orangutan edge.2 0.0235\n",
" Gorilla edge.1 0.0075\n",
" Human edge.0 0.0182\n",
"Chimpanzee edge.0 0.0085\n",
" edge.0 edge.1 0.0000\n",
" edge.1 edge.2 0.0099\n",
" edge.2 edge.3 0.0364\n",
" edge.3 root 0.0233\n",
"------------------------------\n",
"============================================================================\n",
" AAA AAC AAG AAT ACA ACC ACG ACT\n",
"----------------------------------------------------------------------------\n",
"0.0556 0.0235 0.0344 0.0556 0.0228 0.0046 0.0008 0.0289\n",
"----------------------------------------------------------------------------\n",
"\n",
"continued: \n",
"============================================================================\n",
" AGA AGC AGG AGT ATA ATC ATG ATT\n",
"----------------------------------------------------------------------------\n",
"0.0231 0.0286 0.0140 0.0381 0.0186 0.0070 0.0128 0.0192\n",
"----------------------------------------------------------------------------\n",
"\n",
"continued: \n",
"============================================================================\n",
" CAA CAC CAG CAT CCA CCC CCG CCT\n",
"----------------------------------------------------------------------------\n",
"0.0196 0.0052 0.0238 0.0221 0.0195 0.0062 0.0006 0.0263\n",
"----------------------------------------------------------------------------\n",
"\n",
"continued: \n",
"============================================================================\n",
" CGA CGC CGG CGT CTA CTC CTG CTT\n",
"----------------------------------------------------------------------------\n",
"0.0011 0.0009 0.0023 0.0032 0.0137 0.0078 0.0125 0.0105\n",
"----------------------------------------------------------------------------\n",
"\n",
"continued: \n",
"============================================================================\n",
" GAA GAC GAG GAT GCA GCC GCG GCT\n",
"----------------------------------------------------------------------------\n",
"0.0755 0.0105 0.0303 0.0315 0.0158 0.0096 0.0014 0.0137\n",
"----------------------------------------------------------------------------\n",
"\n",
"continued: \n",
"============================================================================\n",
" GGA GGC GGG GGT GTA GTC GTG GTT\n",
"----------------------------------------------------------------------------\n",
"0.0161 0.0090 0.0067 0.0133 0.0148 0.0070 0.0069 0.0213\n",
"----------------------------------------------------------------------------\n",
"\n",
"continued: \n",
"============================================================================\n",
" TAC TAT TCA TCC TCG TCT TGC TGG\n",
"----------------------------------------------------------------------------\n",
"0.0023 0.0101 0.0221 0.0082 0.0015 0.0251 0.0018 0.0040\n",
"----------------------------------------------------------------------------\n",
"\n",
"continued: \n",
"==============================================\n",
" TGT TTA TTC TTG TTT\n",
"----------------------------------------------\n",
"0.0201 0.0212 0.0078 0.0108 0.0187\n",
"----------------------------------------------"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result.alt.lf"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Getting the individual site posterior probabilities\n",
"\n",
"I'm just displaying the posterior-probabuilities from the first 20 positions only."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"\n",
" | \n",
"0 | \n",
"1 | \n",
"2 | \n",
"3 | \n",
"4 | \n",
"5 | \n",
"6 | \n",
"7 | \n",
"8 | \n",
"9 | \n",
"\n",
"\n",
"\n",
"-ve | \n",
"0.149 | \n",
"0.084 | \n",
"0.000 | \n",
"0.131 | \n",
"0.115 | \n",
"0.157 | \n",
"0.084 | \n",
"0.119 | \n",
"0.102 | \n",
"0.080 | \n",
"
\n",
"\n",
"neutral | \n",
"0.773 | \n",
"0.813 | \n",
"0.864 | \n",
"0.785 | \n",
"0.796 | \n",
"0.767 | \n",
"0.812 | \n",
"0.793 | \n",
"0.803 | \n",
"0.814 | \n",
"
\n",
"\n",
"+ve | \n",
"0.078 | \n",
"0.103 | \n",
"0.136 | \n",
"0.084 | \n",
"0.089 | \n",
"0.076 | \n",
"0.103 | \n",
"0.088 | \n",
"0.095 | \n",
"0.106 | \n",
"
\n",
"\n",
"
\n",
"\n",
"\n",
" | \n",
"10 | \n",
"11 | \n",
"12 | \n",
"13 | \n",
"14 | \n",
"15 | \n",
"16 | \n",
"17 | \n",
"18 | \n",
"19 | \n",
"\n",
"\n",
"\n",
"-ve | \n",
"0.076 | \n",
"0.157 | \n",
"0.094 | \n",
"0.000 | \n",
"0.156 | \n",
"0.517 | \n",
"0.080 | \n",
"0.070 | \n",
"0.115 | \n",
"0.122 | \n",
"
\n",
"\n",
"neutral | \n",
"0.816 | \n",
"0.767 | \n",
"0.807 | \n",
"0.865 | \n",
"0.767 | \n",
"0.482 | \n",
"0.814 | \n",
"0.820 | \n",
"0.796 | \n",
"0.791 | \n",
"
\n",
"\n",
"+ve | \n",
"0.108 | \n",
"0.076 | \n",
"0.099 | \n",
"0.135 | \n",
"0.077 | \n",
"0.000 | \n",
"0.106 | \n",
"0.111 | \n",
"0.089 | \n",
"0.087 | \n",
"
\n",
"\n",
"
\n"
],
"text/plain": [
"===============================================================================================================================================================================================================\n",
" 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19\n",
"---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
" -ve 0.1491 0.0839 0.0000 0.1313 0.1146 0.1569 0.0843 0.1191 0.1020 0.0798 0.0760 0.1569 0.0937 0.0000 0.1563 0.5173 0.0798 0.0695 0.1146 0.1216\n",
"neutral 0.7725 0.8127 0.8643 0.7851 0.7961 0.7668 0.8125 0.7927 0.8032 0.8141 0.8164 0.7668 0.8070 0.8655 0.7670 0.4824 0.8141 0.8197 0.7961 0.7909\n",
" +ve 0.0784 0.1034 0.1357 0.0837 0.0893 0.0764 0.1032 0.0882 0.0948 0.1061 0.1076 0.0764 0.0993 0.1345 0.0766 0.0003 0.1061 0.1108 0.0893 0.0875\n",
"---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bprobs = result.alt.lf.get_bin_probs()\n",
"bprobs[:, :20]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:c3dev] *",
"language": "python",
"name": "conda-env-c3dev-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.1"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"state": {},
"version_major": 2,
"version_minor": 0
}
}
},
"nbformat": 4,
"nbformat_minor": 4
}