{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# QC and processing raw barcode counts for Library 2 with the Rho promoter\n", "This notebook takes raw barcode counts and processes it into activity scores for each replicate. Then, we do statistics to determine which sequences are significantly different from the Rho promoter alone. There are three biological replicates of RNA and one DNA sample from the input plasmid pool. This notebook should be nearly identical to notebook 01 for Library 1 with Rho." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import sys\n", "\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "from scipy import stats\n", "\n", "sys.path.insert(0, \"utils\")\n", "from utils import modeling, plot_utils, quality_control" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "plot_utils.set_manuscript_params()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load in data" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | label | \n", "DNA | \n", "RNA1 | \n", "RNA2 | \n", "RNA3 | \n", "
---|---|---|---|---|---|
barcode | \n", "\n", " | \n", " | \n", " | \n", " | \n", " |
AACAACAAG | \n", "chr7-141291911-141292075_UPPP_MUT-allCrxSites | \n", "132 | \n", "0 | \n", "1 | \n", "1 | \n", "
AACAACGTT | \n", "chr19-16380352-16380516_CPPN_MUT-allCrxSites | \n", "1779 | \n", "36 | \n", "17 | \n", "46 | \n", "
AACAACTAC | \n", "chr1-44147572-44147736_UPPP_MUT-allCrxSites | \n", "2928 | \n", "433 | \n", "802 | \n", "510 | \n", "
AACAACTCG | \n", "chr12-116230818-116230982_CPPE_WT | \n", "2822 | \n", "3043 | \n", "2967 | \n", "3013 | \n", "
AACAACTGT | \n", "chr5-65391346-65391510_CPPP_MUT-allCrxSites | \n", "1810 | \n", "1572 | \n", "2281 | \n", "1559 | \n", "
AACAAGCTT | \n", "chr15-97965790-97965954_CPPP_MUT-allCrxSites | \n", "1047 | \n", "1259 | \n", "1531 | \n", "1310 | \n", "
AACAAGGCG | \n", "chr1-59164069-59164233_CPPE_WT | \n", "4966 | \n", "17124 | \n", "17046 | \n", "14499 | \n", "
AACAAGTAG | \n", "chr2-158513919-158514083_CPPE_WT | \n", "2690 | \n", "10607 | \n", "12288 | \n", "9783 | \n", "
AACAAGTCT | \n", "chr11-58097684-58097848_UPCP_MUT-allCrxSites | \n", "4049 | \n", "1589 | \n", "2030 | \n", "1541 | \n", "
AACAATAGG | \n", "chr16-33682556-33682720_CPNE_MUT-allCrxSites | \n", "2623 | \n", "2510 | \n", "2215 | \n", "2097 | \n", "
AACAATCAC | \n", "chr8-120493050-120493214_CSRE_MUT-shape | \n", "1105 | \n", "23 | \n", "14 | \n", "137 | \n", "
AACAATGAG | \n", "chr1-162570536-162570700_CSPP_MUT-shape | \n", "564 | \n", "217 | \n", "563 | \n", "371 | \n", "
AACAATGCT | \n", "chr12-86553474-86553638_CPRN_WT | \n", "1139 | \n", "176 | \n", "72 | \n", "87 | \n", "
AACAATGTC | \n", "chr4-59668207-59668371_CPPE_MUT-allCrxSites | \n", "1168 | \n", "814 | \n", "1323 | \n", "1242 | \n", "
AACAATTCA | \n", "chr1-111864309-111864473_UPCQ_MUT-allCrxSites | \n", "2963 | \n", "2004 | \n", "2144 | \n", "2143 | \n", "
AACACAGCC | \n", "chr9-27191668-27191832_CPPE_WT | \n", "1589 | \n", "1 | \n", "1 | \n", "1 | \n", "
AACACAGGG | \n", "chr13-41249684-41249848_CSPP_scrambled | \n", "580 | \n", "47 | \n", "118 | \n", "53 | \n", "
AACACATAC | \n", "chr9-59660499-59660663_CPPP_WT | \n", "5276 | \n", "48243 | \n", "41743 | \n", "32514 | \n", "
AACACATGT | \n", "chr19-3321456-3321620_UPPE_WT | \n", "1947 | \n", "1397 | \n", "1485 | \n", "1005 | \n", "
AACACCAAT | \n", "chr4-119142639-119142803_UPPE_WT | \n", "3350 | \n", "588 | \n", "1101 | \n", "779 | \n", "