{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# `natsel_sitehet` -- a test of site heterogeneity\n", "\n", "This app evaluates evidence for whether sites differ in their mode of natural selection ([Nielsen and Yang 1998](https://www.ncbi.nlm.nih.gov/pubmed/9539414))." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
Statistics
LRdfpvalue
1.404820.4954
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
hypothesiskeylnLnfpDLCunique_Q
null'GNC-null'-6708.311924True
alt'GNC-alt'-6707.609526True
\n" ], "text/plain": [ "Statistics\n", "======================\n", " LR df pvalue\n", "----------------------\n", "1.4048 2 0.4954\n", "----------------------\n", "=================================================================\n", "hypothesis key lnL nfp DLC unique_Q\n", "-----------------------------------------------------------------\n", " null 'GNC-null' -6708.3119 24 True \n", " alt 'GNC-alt' -6707.6095 26 True \n", "-----------------------------------------------------------------" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from cogent3.app import io, evo\n", "\n", "loader = io.load_aligned(format=\"fasta\", moltype=\"dna\")\n", "aln = loader(\"../data/primate_brca1.fasta\")\n", "\n", "sites_differ = evo.natsel_sitehet(\"GNC\",\n", " tree=\"../data/primate_brca1.tree\",\n", " optimise_motif_probs=False)\n", "\n", "result = sites_differ(aln)\n", "result" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The models have been constructed such that site-class bins have names indicating the mode of natural selection: -ve is purifying (omega<1); neutral (omega=1); and +ve is positive natural selection (omega>1). The two parameters of interest relating to these are the `bprobs` (the maximum likelihood estimate of the frequency of the site-class) and the corresponding value of omega." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "

GNC-alt

\n", "

log-likelihood = -6707.6095

\n", "

number of free parameters = 26

\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
Global params
A>CA>GA>TC>AC>GC>TG>AG>CG>TT>A
0.85303.56460.97341.64042.18016.32188.08141.23470.78291.2798
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
T>C
3.0292
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
Bin params
binbprobsomega
-ve0.10430.0000
neutral0.80521.0000
+ve0.090520.0000
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
Edge params
edgeparentlength
Galagoroot0.5463
HowlerMonroot0.1364
Rhesusedge.30.0649
Orangutanedge.20.0235
Gorillaedge.10.0075
Humanedge.00.0182
Chimpanzeeedge.00.0085
edge.0edge.10.0000
edge.1edge.20.0099
edge.2edge.30.0364
edge.3root0.0233
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
Motif params
AAAAACAAGAATACAACCACGACTAGAAGC
0.05560.02350.03440.05560.02280.00460.00080.02890.02310.0286
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
AGGAGTATAATCATGATTCAACACCAGCAT
0.01400.03810.01860.00700.01280.01920.01960.00520.02380.0221
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
CCACCCCCGCCTCGACGCCGGCGTCTACTC
0.01950.00620.00060.02630.00110.00090.00230.00320.01370.0078
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
CTGCTTGAAGACGAGGATGCAGCCGCGGCT
0.01250.01050.07550.01050.03030.03150.01580.00960.00140.0137
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
GGAGGCGGGGGTGTAGTCGTGGTTTACTAT
0.01610.00900.00670.01330.01480.00700.00690.02130.00230.0101
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
TCATCCTCGTCTTGCTGGTGTTTATTCTTG
0.02210.00820.00150.02510.00180.00400.02010.02120.00780.0108
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
TTT
0.0187
\n" ], "text/plain": [ "GNC-alt\n", "log-likelihood = -6707.6095\n", "number of free parameters = 26\n", "============================================================================\n", " A>C A>G A>T C>A C>G C>T G>A G>C\n", "----------------------------------------------------------------------------\n", "0.8530 3.5646 0.9734 1.6404 2.1801 6.3218 8.0814 1.2347\n", "----------------------------------------------------------------------------\n", "\n", "continued: \n", "==========================\n", " G>T T>A T>C\n", "--------------------------\n", "0.7829 1.2798 3.0292\n", "--------------------------\n", "\n", "============================\n", " bin bprobs omega\n", "----------------------------\n", " -ve 0.1043 0.0000\n", "neutral 0.8052 1.0000\n", " +ve 0.0905 20.0000\n", "----------------------------\n", "==============================\n", " edge parent length\n", "------------------------------\n", " Galago root 0.5463\n", " HowlerMon root 0.1364\n", " Rhesus edge.3 0.0649\n", " Orangutan edge.2 0.0235\n", " Gorilla edge.1 0.0075\n", " Human edge.0 0.0182\n", "Chimpanzee edge.0 0.0085\n", " edge.0 edge.1 0.0000\n", " edge.1 edge.2 0.0099\n", " edge.2 edge.3 0.0364\n", " edge.3 root 0.0233\n", "------------------------------\n", "============================================================================\n", " AAA AAC AAG AAT ACA ACC ACG ACT\n", "----------------------------------------------------------------------------\n", "0.0556 0.0235 0.0344 0.0556 0.0228 0.0046 0.0008 0.0289\n", "----------------------------------------------------------------------------\n", "\n", "continued: \n", "============================================================================\n", " AGA AGC AGG AGT ATA ATC ATG ATT\n", "----------------------------------------------------------------------------\n", "0.0231 0.0286 0.0140 0.0381 0.0186 0.0070 0.0128 0.0192\n", "----------------------------------------------------------------------------\n", "\n", "continued: \n", "============================================================================\n", " CAA CAC CAG CAT CCA CCC CCG CCT\n", "----------------------------------------------------------------------------\n", "0.0196 0.0052 0.0238 0.0221 0.0195 0.0062 0.0006 0.0263\n", "----------------------------------------------------------------------------\n", "\n", "continued: \n", "============================================================================\n", " CGA CGC CGG CGT CTA CTC CTG CTT\n", "----------------------------------------------------------------------------\n", "0.0011 0.0009 0.0023 0.0032 0.0137 0.0078 0.0125 0.0105\n", "----------------------------------------------------------------------------\n", "\n", "continued: \n", "============================================================================\n", " GAA GAC GAG GAT GCA GCC GCG GCT\n", "----------------------------------------------------------------------------\n", "0.0755 0.0105 0.0303 0.0315 0.0158 0.0096 0.0014 0.0137\n", "----------------------------------------------------------------------------\n", "\n", "continued: \n", "============================================================================\n", " GGA GGC GGG GGT GTA GTC GTG GTT\n", "----------------------------------------------------------------------------\n", "0.0161 0.0090 0.0067 0.0133 0.0148 0.0070 0.0069 0.0213\n", "----------------------------------------------------------------------------\n", "\n", "continued: \n", "============================================================================\n", " TAC TAT TCA TCC TCG TCT TGC TGG\n", "----------------------------------------------------------------------------\n", "0.0023 0.0101 0.0221 0.0082 0.0015 0.0251 0.0018 0.0040\n", "----------------------------------------------------------------------------\n", "\n", "continued: \n", "==============================================\n", " TGT TTA TTC TTG TTT\n", "----------------------------------------------\n", "0.0201 0.0212 0.0078 0.0108 0.0187\n", "----------------------------------------------" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result.alt.lf" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Getting the individual site posterior probabilities\n", "\n", "I'm just displaying the posterior-probabuilities from the first 20 positions only." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
0123456789
-ve0.1490.0840.0000.1310.1150.1570.0840.1190.1020.080
neutral0.7730.8130.8640.7850.7960.7670.8120.7930.8030.814
+ve0.0780.1030.1360.0840.0890.0760.1030.0880.0950.106
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
10111213141516171819
-ve0.0760.1570.0940.0000.1560.5170.0800.0700.1150.122
neutral0.8160.7670.8070.8650.7670.4820.8140.8200.7960.791
+ve0.1080.0760.0990.1350.0770.0000.1060.1110.0890.087
\n" ], "text/plain": [ "===============================================================================================================================================================================================================\n", " 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19\n", "---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n", " -ve 0.1491 0.0839 0.0000 0.1313 0.1146 0.1569 0.0843 0.1191 0.1020 0.0798 0.0760 0.1569 0.0937 0.0000 0.1563 0.5173 0.0798 0.0695 0.1146 0.1216\n", "neutral 0.7725 0.8127 0.8643 0.7851 0.7961 0.7668 0.8125 0.7927 0.8032 0.8141 0.8164 0.7668 0.8070 0.8655 0.7670 0.4824 0.8141 0.8197 0.7961 0.7909\n", " +ve 0.0784 0.1034 0.1357 0.0837 0.0893 0.0764 0.1032 0.0882 0.0948 0.1061 0.1076 0.0764 0.0993 0.1345 0.0766 0.0003 0.1061 0.1108 0.0893 0.0875\n", "---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bprobs = result.alt.lf.get_bin_probs()\n", "bprobs[:, :20]" ] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:c3dev] *", "language": "python", "name": "conda-env-c3dev-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.1" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "state": {}, "version_major": 2, "version_minor": 0 } } }, "nbformat": 4, "nbformat_minor": 4 }