{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# QC and processing raw barcode counts for Library 1 with the Rho promoter\n", "This notebook takes raw barcode counts and processes it into activity scores for each replicate. Then, we do statistics to determine which sequences are significantly different from the Rho promoter alone. There are three biological replicates of RNA and one DNA sample from the input plasmid pool." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import sys\n", "\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "from scipy import stats\n", "\n", "sys.path.insert(0, \"utils\")\n", "from utils import modeling, plot_utils, quality_control" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "plot_utils.set_manuscript_params()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load in data" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | label | \n", "DNA | \n", "RNA1 | \n", "RNA2 | \n", "RNA3 | \n", "
---|---|---|---|---|---|
barcode | \n", "\n", " | \n", " | \n", " | \n", " | \n", " |
AACAACAAG | \n", "chr16-87432635-87432799_CPPQ_scrambled | \n", "3019 | \n", "148 | \n", "325 | \n", "97 | \n", "
AACAACCGC | \n", "chr4-119112319-119112483_CPPE_WT | \n", "4117 | \n", "24493 | \n", "25950 | \n", "23406 | \n", "
AACAACGGG | \n", "chr7-128854234-128854398_UPCE_WT | \n", "86 | \n", "76 | \n", "39 | \n", "233 | \n", "
AACAACTAC | \n", "chr4-138107597-138107761_UPPE_WT | \n", "827 | \n", "926 | \n", "857 | \n", "659 | \n", "
AACAACTGT | \n", "chr5-31298508-31298672_CPPE_WT | \n", "7170 | \n", "492 | \n", "392 | \n", "149 | \n", "
AACAAGCGG | \n", "chr16-37868024-37868188_UPPP_MUT-allCrxSites | \n", "1199 | \n", "98 | \n", "147 | \n", "206 | \n", "
AACAAGGAC | \n", "chr9-72409911-72410075_CBPE_MUT-shape | \n", "909 | \n", "1920 | \n", "1107 | \n", "995 | \n", "
AACAAGGCG | \n", "chr8-70212225-70212389_CPPP_MUT-allCrxSites | \n", "98 | \n", "33 | \n", "43 | \n", "19 | \n", "
AACAAGTAG | \n", "chr15-72652217-72652381_CPPE_WT | \n", "1851 | \n", "2557 | \n", "3203 | \n", "2688 | \n", "
AACAAGTCT | \n", "chr2-104122476-104122640_CBPE_WT | \n", "305 | \n", "386 | \n", "357 | \n", "204 | \n", "
AACAATACC | \n", "chr2-5669535-5669699_CPPE_WT | \n", "518 | \n", "1 | \n", "1 | \n", "5 | \n", "
AACAATAGG | \n", "chr9-102717090-102717254_UPPP_MUT-allCrxSites | \n", "3577 | \n", "1750 | \n", "1563 | \n", "2113 | \n", "
AACAATCAC | \n", "chr17-34197537-34197701_CPPQ_MUT-allCrxSites | \n", "1627 | \n", "282 | \n", "52 | \n", "30 | \n", "
AACAATGAG | \n", "chr4-44576957-44577121_UPPE_WT | \n", "7030 | \n", "2706 | \n", "3000 | \n", "2267 | \n", "
AACAATGCT | \n", "chr17-74221859-74222023_CPPE_WT | \n", "2481 | \n", "1418 | \n", "1722 | \n", "1725 | \n", "
AACAATGTC | \n", "chr11-3422348-3422512_CPRE_MUT-allCrxSites | \n", "2912 | \n", "4651 | \n", "3679 | \n", "5035 | \n", "
AACAATTCA | \n", "chr2-164427118-164427282_CPPE_MUT-allCrxSites | \n", "2296 | \n", "2218 | \n", "2237 | \n", "1850 | \n", "
AACACACAT | \n", "BASAL | \n", "6067 | \n", "926 | \n", "1183 | \n", "697 | \n", "
AACACAGCC | \n", "chr10-61695319-61695483_CPPP_MUT-allCrxSites | \n", "195 | \n", "22 | \n", "0 | \n", "36 | \n", "
AACACAGGG | \n", "chr12-83444664-83444828_CPPE_MUT-allCrxSites | \n", "1680 | \n", "1700 | \n", "1539 | \n", "1886 | \n", "