1
0
Fork 0
tidy-data/6_case_study.ipynb

1000 lines
310 KiB
Text
Raw Permalink Normal View History

2018-08-27 11:48:50 +02:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Case Study: Unusual Deaths in Mexico\n",
"\n",
"> The following case study illustrates how tidy data and tidy tools make data analysis easier by easing the transitions between manipulation, visualisation and modelling. You will not see any code that exists solely to get the output of one function into the right format to input to another.\n",
"\n",
"> The case study uses individual-level mortality data from Mexico. The goal is to find causes of death with unusual temporal patterns within a day."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## \"Housekeeping\""
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
2018-08-27 11:48:50 +02:00
"source": [
"%load_ext lab_black"
2018-08-27 11:48:50 +02:00
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/webartifex/repos/tidy-data/.venv/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:11: FutureWarning: pandas.core.index is deprecated and will be removed in a future version. The public classes are available in the top-level namespace.\n",
" from pandas.core.index import Index as PandasIndex\n"
]
}
],
2018-08-27 11:48:50 +02:00
"source": [
"import math\n",
"import textwrap\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"import seaborn as sns\n",
"from matplotlib import pyplot as plt\n",
"from rpy2 import robjects # leads to a FutureWarning that can be safely ignored\n",
"from rpy2.robjects import pandas2ri\n",
2018-08-27 11:48:50 +02:00
"from sklearn.linear_model import HuberRegressor"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline"
2018-08-27 11:48:50 +02:00
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"sns.set()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load the Data"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"deaths = pandas2ri.ri2py(robjects.r[\"readRDS\"](\"data/deaths.rds\"))\n",
"deaths = deaths[(deaths[\"yod\"] == 2008) & (deaths[\"mod\"] != 0) & (deaths[\"dod\"] != 0)]\n",
"deaths = deaths[~(deaths[\"hod\"] < 0)]\n",
2018-08-27 11:48:50 +02:00
"deaths = deaths.reset_index(drop=True)\n",
"\n",
"assert set(deaths[\"hod\"].unique()) <= set(range(24))"
2018-08-27 11:48:50 +02:00
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(502520, 5)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"deaths.shape"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>yod</th>\n",
" <th>mod</th>\n",
" <th>dod</th>\n",
" <th>hod</th>\n",
" <th>cod</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2008</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>B20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2008</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>B22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2008</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>C18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2008</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>C34</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2008</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>C50</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" yod mod dod hod cod\n",
"0 2008 1 1 1 B20\n",
"1 2008 1 1 1 B22\n",
"2 2008 1 1 1 C18\n",
"3 2008 1 1 1 C34\n",
"4 2008 1 1 1 C50"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"deaths.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# The file contains 7 duplicates that are discarded.\n",
"codes = pd.read_csv(\"data/icd-main.csv\")\n",
"codes = codes[(codes[\"code\"] != codes[\"code\"].shift())].set_index(\"code\")"
2018-08-27 11:48:50 +02:00
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1851, 1)"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"codes.shape"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>disease</th>\n",
" </tr>\n",
" <tr>\n",
" <th>code</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A00</th>\n",
" <td>Cholera</td>\n",
" </tr>\n",
" <tr>\n",
" <th>A01</th>\n",
" <td>Typhoid and paratyphoid fevers</td>\n",
" </tr>\n",
" <tr>\n",
" <th>A02</th>\n",
" <td>Other salmonella infections</td>\n",
" </tr>\n",
" <tr>\n",
" <th>A03</th>\n",
" <td>Shigellosis</td>\n",
" </tr>\n",
" <tr>\n",
" <th>A04</th>\n",
" <td>Other bacterial intestinal infections</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" disease\n",
"code \n",
"A00 Cholera\n",
"A01 Typhoid and paratyphoid fevers\n",
"A02 Other salmonella infections\n",
"A03 Shigellosis\n",
"A04 Other bacterial intestinal infections"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"codes.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Counts\n",
"\n",
"Count the number of deaths by `\"hod\"` (=\"hour of the day\") and `\"cod\"` (=\"cause of death\"), and also join in the more descriptive labels for the various causes."
2018-08-27 11:48:50 +02:00
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>hod</th>\n",
" <th>cod</th>\n",
" <th>freq</th>\n",
" <th>disease</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>A01</td>\n",
" <td>3</td>\n",
" <td>Typhoid and paratyphoid fevers</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>A02</td>\n",
" <td>3</td>\n",
" <td>Other salmonella infections</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>A04</td>\n",
" <td>7</td>\n",
" <td>Other bacterial intestinal infections</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>A05</td>\n",
" <td>1</td>\n",
" <td>Other bacterial foodborne intoxications, not e...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>A06</td>\n",
" <td>2</td>\n",
" <td>Amebiasis</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" hod cod freq disease\n",
"0 1 A01 3 Typhoid and paratyphoid fevers\n",
"1 1 A02 3 Other salmonella infections\n",
"2 1 A04 7 Other bacterial intestinal infections\n",
"3 1 A05 1 Other bacterial foodborne intoxications, not e...\n",
"4 1 A06 2 Amebiasis"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"counts = (\n",
" pd.DataFrame(deaths.groupby([\"hod\", \"cod\"]).size(), columns=[\"freq\"])\n",
2018-08-27 11:48:50 +02:00
" .reset_index()\n",
" .join(codes, on=\"cod\")\n",
2018-08-27 11:48:50 +02:00
")\n",
"# This is to ensure that no duplicates are created\n",
"# because of duplicate entries in the codes DataFrame.\n",
"assert counts[\"cod\"].value_counts().max() <= 24\n",
2018-08-27 11:48:50 +02:00
"\n",
"# Keep only causes where a death happened in every hour.\n",
"counts = counts[counts[\"cod\"].isin(list((counts[\"cod\"].value_counts() == 24).index))]\n",
2018-08-27 11:48:50 +02:00
"\n",
"counts.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Add a `\"prop\"` (=\"proportion\") column indicating the relative frequency of a given cause of death on an hourly basis."
2018-08-27 11:48:50 +02:00
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>hod</th>\n",
" <th>cod</th>\n",
" <th>freq</th>\n",
" <th>disease</th>\n",
" <th>prop</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>A01</td>\n",
" <td>3</td>\n",
" <td>Typhoid and paratyphoid fevers</td>\n",
" <td>0.062500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>A02</td>\n",
" <td>3</td>\n",
" <td>Other salmonella infections</td>\n",
" <td>0.048387</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>A04</td>\n",
" <td>7</td>\n",
" <td>Other bacterial intestinal infections</td>\n",
" <td>0.051095</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>A05</td>\n",
" <td>1</td>\n",
" <td>Other bacterial foodborne intoxications, not e...</td>\n",
" <td>0.050000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>A06</td>\n",
" <td>2</td>\n",
" <td>Amebiasis</td>\n",
" <td>0.024390</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" hod cod freq disease prop\n",
"0 1 A01 3 Typhoid and paratyphoid fevers 0.062500\n",
"1 1 A02 3 Other salmonella infections 0.048387\n",
"2 1 A04 7 Other bacterial intestinal infections 0.051095\n",
"3 1 A05 1 Other bacterial foodborne intoxications, not e... 0.050000\n",
"4 1 A06 2 Amebiasis 0.024390"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"counts = counts.set_index(\"cod\")\n",
"counts[\"prop\"] = counts[\"freq\"] / deaths.groupby([\"cod\"]).size().reindex(counts.index)\n",
2018-08-27 11:48:50 +02:00
"counts = counts.reset_index()\n",
"# Re-order the columns as in the paper.\n",
"counts = counts[[\"hod\", \"cod\", \"freq\", \"disease\", \"prop\"]]\n",
2018-08-27 11:48:50 +02:00
"\n",
"counts.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Add `\"freq_all\"` and `\"prop_all\"` columns that show the absolute number of deaths for a given hour of day (disregarding cause of death) and the proportion of deaths for a certain hour of day with respect to the whole day."
2018-08-27 11:48:50 +02:00
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>hod</th>\n",
" <th>cod</th>\n",
" <th>freq</th>\n",
" <th>disease</th>\n",
" <th>prop</th>\n",
" <th>freq_all</th>\n",
" <th>prop_all</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>A01</td>\n",
" <td>3</td>\n",
" <td>Typhoid and paratyphoid fevers</td>\n",
" <td>0.062500</td>\n",
" <td>20038</td>\n",
" <td>0.039875</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>A02</td>\n",
" <td>3</td>\n",
" <td>Other salmonella infections</td>\n",
" <td>0.048387</td>\n",
" <td>20038</td>\n",
" <td>0.039875</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>A04</td>\n",
" <td>7</td>\n",
" <td>Other bacterial intestinal infections</td>\n",
" <td>0.051095</td>\n",
" <td>20038</td>\n",
" <td>0.039875</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>A05</td>\n",
" <td>1</td>\n",
" <td>Other bacterial foodborne intoxications, not e...</td>\n",
" <td>0.050000</td>\n",
" <td>20038</td>\n",
" <td>0.039875</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>A06</td>\n",
" <td>2</td>\n",
" <td>Amebiasis</td>\n",
" <td>0.024390</td>\n",
" <td>20038</td>\n",
" <td>0.039875</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" hod cod freq disease \\\n",
"0 1 A01 3 Typhoid and paratyphoid fevers \n",
"1 1 A02 3 Other salmonella infections \n",
"2 1 A04 7 Other bacterial intestinal infections \n",
"3 1 A05 1 Other bacterial foodborne intoxications, not e... \n",
"4 1 A06 2 Amebiasis \n",
"\n",
" prop freq_all prop_all \n",
"0 0.062500 20038 0.039875 \n",
"1 0.048387 20038 0.039875 \n",
"2 0.051095 20038 0.039875 \n",
"3 0.050000 20038 0.039875 \n",
"4 0.024390 20038 0.039875 "
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"counts = counts.set_index(\"hod\")\n",
"counts[\"freq_all\"] = deaths.groupby(\"hod\").size()\n",
"counts[\"prop_all\"] = counts[\"freq_all\"] / deaths.shape[0]\n",
2018-08-27 11:48:50 +02:00
"counts = counts.reset_index()\n",
"\n",
"counts.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Distance between temporal Patterns\n",
"\n",
"> Next we compute a distance between the temporal pattern of each cause of death and the overall temporal pattern. There are many ways to measure this distance, but I found a simple mean squared deviation to be revealing. We also record the sample size, the total number of deaths from that cause. To ensure that the diseases we consider are sufficiently representative we’ll only work with diseases with more than 50 total deaths (∼2/hour)."
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>disease</th>\n",
" <th>n</th>\n",
" <th>dist</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A02</th>\n",
" <td>Other salmonella infections</td>\n",
" <td>62</td>\n",
" <td>0.000738</td>\n",
" </tr>\n",
" <tr>\n",
" <th>A04</th>\n",
" <td>Other bacterial intestinal infections</td>\n",
" <td>137</td>\n",
" <td>0.000208</td>\n",
" </tr>\n",
" <tr>\n",
" <th>A06</th>\n",
" <td>Amebiasis</td>\n",
" <td>82</td>\n",
" <td>0.000405</td>\n",
" </tr>\n",
" <tr>\n",
" <th>A09</th>\n",
" <td>Diarrhea and gastroenteritis of infectious origin</td>\n",
" <td>3016</td>\n",
" <td>0.000028</td>\n",
" </tr>\n",
" <tr>\n",
" <th>A16</th>\n",
" <td>Respiratory tuberculosis, not confirmed bacter...</td>\n",
" <td>1642</td>\n",
" <td>0.000029</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" disease n dist\n",
"A02 Other salmonella infections 62 0.000738\n",
"A04 Other bacterial intestinal infections 137 0.000208\n",
"A06 Amebiasis 82 0.000405\n",
"A09 Diarrhea and gastroenteritis of infectious origin 3016 0.000028\n",
"A16 Respiratory tuberculosis, not confirmed bacter... 1642 0.000029"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"devi = (\n",
" codes.join(deaths.groupby(\"cod\").count()[\"yod\"].to_frame(), how=\"inner\")\n",
2018-08-27 11:48:50 +02:00
" .join(\n",
" counts.groupby(\"cod\")\n",
" .apply(lambda x: ((x[\"prop\"] - x[\"prop_all\"]) ** 2).mean())\n",
2018-08-27 11:48:50 +02:00
" .to_frame(),\n",
" how=\"inner\",\n",
2018-08-27 11:48:50 +02:00
" )\n",
" .rename(columns={\"yod\": \"n\", 0: \"dist\"})\n",
2018-08-27 11:48:50 +02:00
")\n",
"devi = devi[(devi[\"n\"] > 50)]\n",
2018-08-27 11:48:50 +02:00
"\n",
"devi.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Plot `\"dist\"` vs. `\"n\"`. Not a whole lot can be seen here."
2018-08-27 11:48:50 +02:00
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:xlabel='n', ylabel='dist'>"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAATgAAAEQCAYAAAAkgGgxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAcHklEQVR4nO3dfWxT59038G9s3tJCSu0lxh5pgbVlZiTtSp8+Yi3dCE6cghOnjNRVRtSJEjQRFo1qW9NNJQSo2iCtWseIdE90TJEqhqyu0LhpyFJuNpLnKYWVhjIDpWlCgDgv2GThLTQ5vu4/anzHhMZO8OvF9yNViv27bP/OqfvtdR0fHycJIQSIiCSkinUDRESRwoAjImkx4IhIWgw4IpIWA46IpMWAIyJpRS3g2traYLPZYDabYbPZ0N7ePmKMoiiorKyEyWRCdnY27HZ7SDUAqKurQ15eHiwWC/Ly8nDhwoVIbxIRxTsRJcXFxWLPnj1CCCH27NkjiouLR4x59913xapVq4SiKMLtdotFixaJs2fPBq0dO3ZMPP3006Knp0cIIUR/f78YGBiI0pYRUbyKygzO7XbD6XTCYrEAACwWC5xOJzweT8C4uro6FBYWQqVSQaPRwGQyob6+PmjtL3/5C1atWoXU1FQAwLRp0zB58uRobBoRxbGoBJzL5YJOp4NarQYAqNVqpKWlweVyjRhnMBj8t/V6Pbq6uoLWWltbcfbsWfzkJz/BM888g+rqagh+QYPojjch1g2Eg6IoOHXqFHbu3ImvvvoKq1evhsFgQEFBQaxbI6IYikrA6fV6dHd3Q1EUqNVqKIqCnp4e6PX6EeM6OzuRmZkJIHDWNlrNYDAgNzcXkyZNwqRJk7BkyRIcO3ZsTAF38eIVeL2JM+vTaqfC7b4c6zZClmj9Auw50lSqJNx7792RfY2IPruPVquF0WiEw+EAADgcDhiNRmg0moBxubm5sNvt8Hq98Hg8aGxshNlsDlqzWCxoamqCEAKDg4P46KOP8N3vfndMPXq9IqH+SbSeE61f9hy9fiMpakvUjRs3ory8HNXV1UhJSUFVVRUAoKSkBGVlZcjIyIDVakVLSwtycnIAAKWlpUhPTweAUWvLli3D8ePHsXTpUqhUKjz55JNYsWJFtDaNiOJUkuDReACA2305Kv9HCZfU1Gno7b0U6zZClmj9Auw50lSqJGi1UyP7GhF9diKiGGLAEZG0GHBEJC0GHBFJiwHnc+36UKxbIKIwY8D5ONs9wQcRUUJhwPnMm6UJPoiIEgoDzid5shRfyyWiYRhwRCQtBhwRSYsBR0TSYsARkbQYcEQkLQYcEUmLAUdE0mLAEZG0GHBEJC0GHBFJiwFHRNJiwBGRtBhwRCQtBhwRSYsBR0TSYsARkbQYcEQkLQacD390hkg+DDgf/ugMkXyiFnBtbW2w2Wwwm82w2Wxob28fMUZRFFRWVsJkMiE7Oxt2uz2k2rZt27Bw4UJYrVZYrVZUVlaOuT/+6AyRfKL2SysVFRUoKiqC1WrF3r17sWHDBtTU1ASMqa2tRUdHBxoaGtDX14eCggIsXLgQM2fOHLUGAAUFBXjppZfG3V/y5AnwesVtbSMRxZeozODcbjecTicsFgsAwGKxwOl0wuMJXBbW1dWhsLAQKpUKGo0GJpMJ9fX1QWtERLcSlYBzuVzQ6XRQq9UAALVajbS0NLhcrhHjDAaD/7Zer0dXV1fQGgC8//77yMvLw6pVq3D06NFIbg4RJQgpfgz0ueeew89+9jNMnDgRzc3NWLt2Lerq6nDvvfeG/Bxa7dQIdhgZqanTYt3CmCRavwB7TnRRCTi9Xo/u7m4oigK1Wg1FUdDT0wO9Xj9iXGdnJzIzMwEEztpGq6Wmpvqf44knnoBer8fp06fx+OOPh9yj2305oY7BpaZOQ2/vpVi3EbJE6xdgz5GmUiVFfGIRlSWqVquF0WiEw+EAADgcDhiNRmg0gZ9c5ubmwm63w+v1wuPxoLGxEWazOWitu7vb/xwnTpzA+fPnMXv27GhsGhHFsagtUTdu3Ijy8nJUV1cjJSUFVVVVAICSkhKUlZUhIyMDVqsVLS0tyMnJAQCUlpYiPT0dAEatvfHGG/j3v/8NlUqFiRMnYuvWrQGzOiK6MyUJIRJnXRZBXKJGVqL1C7DnSJNmiUpEFAsMOCKSFgOOiKTFgCMiaTHgiEhaDDgikhYDjoikxYAjImkx4IhIWgw4IpIWA46IpMWAIyJpMeCISFoMOCKSFgOOiKTFgCMiaTHgiEhaDDgikhYDjoikxYAjImkx4IhIWgw4IpIWA46IpMWA87l2fSjWLRBRmDHgfJztnli3QERhxoDzmTdLE+sWiCjMGHA+yZMnxLoFIgozBhwRSStqAdfW1gabzQaz2QybzYb29vYRYxRFQWVlJUwmE7Kzs2G320Oq3fDll1/i4YcfRlVVVSQ3hYgSRNQCrqKiAkVFRdi3bx+KioqwYcOGEWNqa2vR0dGBhoYG7N69G9u2bcO5c+eC1oCvA7CiogImkylam0REcS4qAed2u+F0OmGxWAAAFosFTqcTHk/gJ5d1dXUoLCyESqWCRqOByWRCfX190BoA/OlPf8KPfvQjzJo1KxqbREQJICpH1l0uF3Q6HdRqNQBArVYjLS0NLpcLGo0mYJzBYPDf1uv16OrqClo7efIkmpqaUFNTg+rq6nH1qNVOHdfjYik1dVqsWxiTROsXYM+JLuE/OhwcHMQrr7yC1157zR+g4+F2X4bXK8LYWWSlpk5Db++lWLcRskTrF2DPkaZSJUV8YhGVgNPr9eju7oaiKFCr1VAUBT09PdDr9SPGdXZ2IjMzE0DgrO2bar29vejo6MCaNWsAAP39/RBC4PLly9i8eXM0No+I4lRUjsFptVoYjUY4HA4AgMPhgNFoDFieAkBubi7sdju8Xi88Hg8aGxthNptHrRkMBhw6dAj79+/H/v378fzzz+PZZ59luBFR9JaoGzduRHl5Oaqrq5GSkuI/laOkpARlZWXIyMiA1WpFS0sLcnJyAAClpaVIT08HgFFrRES3kiSESJwDTxHEY3CRlWj9Auw50qJxDI7fZCAiaTHgiEhaDDgikhYDjoikxYAjImkx4IhIWgw4IpIWA46IpMWAIyJpMeCISFoMOCKSFgOOiKTFgCMiaTHgiEhaDDgikhYDjoikxYAjImkx4IhIWgw4IpIWA46IpMWAIyJpMeCISFohB1xBQcEt71++fHm4eiEiCquQA+7MmTMj7hNC4Ny5c2FtiIgoXIL+sv2vf/1rAMDg4KD/7xvOnz+PBx54IDKdERHdpqABd999993ybwB49NFHkZubG/6uiIjCIGjArVu3DgDw8MMPY9GiRRFviIgoXEI+Bjdx4kScPXsWANDb24uXXnoJL7/8Mnp7e0N6fFtbG2w2G8xmM2w2G9rb20eMURQFlZWVMJlMyM7Oht1uD6n2zjvvIC8vD1arFXl5eaipqQl1s4hIYiEHXGVlJdRqNQDg9ddfx9DQEJKSkvDKK6+E9PiKigoUFRVh3759KCoqwoYNG0aMqa2tRUdHBxoaGrB7925s27bN/yHGaDWz2Yz33nsPe/fuxa5du7Bz506cPHky1E0jIkmFHHDd3d0wGAwYGhpCU1MTNm3ahI0bN+Lo0aNBH+t2u+F0OmGxWAAAFosFTqcTHo8nYFxdXR0KCwuhUqmg0WhgMplQX18ftDZ16lQkJSUBAAYGBjA4OOi/TUR3rpADburUqbhw4QIOHz6M73znO7j77rsBAENDQ0Ef63K5oNPp/DNAtVqNtLQ0uFyuEeMMBoP/tl6vR1dXV9AaAHz44YdYtmwZFi9ejNWrV2Pu3LmhbhoRSSrohww3rFy5EitWrMDg4CB+85vfAAA++eQTzJkzJ2LNjcWSJUuwZMkSdHZ2orS0FE899dSYetNqp0awu8hITZ0W6xbGJNH6Bdhzogs54NasWYPs7Gyo1Wr/6SI6nQ5btmwJ+li9Xo/u7m4oigK1Wg1FUdDT0wO9Xj9iXGdnJzIzMwEEztpGqw1nMBiQkZGBAwcOjCng3O7L8HpFyONjLTV1Gnp
2018-08-27 11:48:50 +02:00
"text/plain": [
"<Figure size 288x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"_, ax = plt.subplots(figsize=(4, 4))\n",
"ax.set_xlim(0, 50000)\n",
"ax.set_ylim(0, 0.006)\n",
"sns.regplot(x=\"n\", y=\"dist\", data=devi, ax=ax, fit_reg=False, scatter_kws={\"s\": 1})"
2018-08-27 11:48:50 +02:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The relationship becomes more obvious if one plots the same points on a `\"log\"`-`\"log\"` scale."
2018-08-27 11:48:50 +02:00
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:xlabel='n', ylabel='dist'>"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAASMAAAEZCAYAAADcwUPmAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAApA0lEQVR4nO3dfXBTZcI28CtJ0+/W0DYJQdpCaSkFEUu7CkrxLXVlUXYQWQHBAu4+4wzz+sys+4fD7LgiI/vRXdfdmd1Xxxk/GHWVhXkUlvKqfcXuIqhooQJaGtIPsIWQJq2lLf1Kk/P+Ec4xSdMmaZvkJL1+M47l5CS5z8C5et/3uT8UgiAIICKKMGWkC0BEBDCMiEgmGEZEJAsMIyKSBYYREckCw4iIZIFhRESywDAiIlmI2jDq7u7Gww8/jOLi4kgXhYimQNSGUUpKCl5//XUsWbIk0kUhoikQtWGkVquh0WgiXQwimiJhDaOqqiqsWrUKhYWFuHjxonS8tbUVmzZtwurVq7Fp0yZcunQpnMUiIhmIC+eXVVRUYNu2bdi6davH8d27d2PLli1Yt24dDh8+jGeffRZvvvkmAKCpqQl79uzxOL+srAxPPPFE2MpNRKEX1jAqLS0ddayzsxMNDQ144403AABr167F888/j66uLmRkZCA/Px9vvfXWlJfl++9vwOkM/4IFmZmp6OzsC/v3RtJ0u2Zer29KpQIzZqSM+XpYw8gXs9kMvV4PlUoFAFCpVNDpdDCbzcjIyBj3vTt27MCFCxewY8cO/PrXv8b8+fMD/l6nU4hIGInfPd1Mt2vm9QYv4mE0Gfv27Yt0EYhoikT8aZrBYIDFYoHD4QAAOBwOdHR0wGAwRLhkRBROEQ+jzMxMFBUVobq6GgBQXV2NoqIiv000IootinAuO7t3717U1NTAZrNhxowZ0Gg0OHr0KJqbm7Fr1y709PQgPT0dVVVVyMvLC2lZOjv7ItKu12rTYLX2hv17I2m6XTOv1zelUoHMzNQxXw9rGMkJwyh8pts183p98xdGEW+mEREBDCMikgmGERHJAsOIiGSBYUREssAwIiJZYBgRkSwwjIhIFhhGRCQLDCMikgWGERHJAsOIiGSBYUREssAwIiJZYBgRkSwwjIhIFhhGRCQLDCMikgWGERHJAsOIiGSBYUREssAwIiJZYBgRkSwwjIhIFhhGRCQLDCMikgWGERHJAsOIiGSBYUREssAwIiJZYBgRkSwwjIhIFhhGRCQLDCMikgWGERHJAsOIiGSBYUREssAwIiJZYBgRkSwwjIhIFhhGRCQLDCMikoWoDaO6ujps3LgRmzdvxuuvvx7p4hDRJEVtGGVnZ+Ptt9/G/v37UVtbi4GBgUgXiYgmIS7SBZgovV4v/axSqaBURm2uEhHCXDOqqqrCqlWrUFhYiIsXL0rHW1tbsWnTJqxevRqbNm3CpUuXAv7MkydPIicnBwkJCSEoMRGFS1hrRhUVFdi2bRu2bt3qcXz37t3YsmUL1q1bh8OHD+PZZ5/Fm2++CQBoamrCnj17PM4vKyvDE088gWvXruGVV17Byy+/HLZrIKIQESKgvLxcMBqNgiAIgs1mE0pKSoSRkRFBEARhZGREKCkpETo7O8f9jKGhIWH79u1Cc3NzyMtLRKEX8T4js9kMvV4PlUoFwNX/o9PpYDabkZGRMeb7jhw5gqamJuzevRsA8MILL3j0I/nT2dkHp1OYXOEnQKtNg9XaG/bvjaTpds28Xt+USgUyM1PHfD3iYTRRGzZswIYNGyJdDCKaIhF/BGUwGGCxWOBwOAAADocDHR0dMBgMES4ZEYVTxMMoMzMTRUVFqK6uBgBUV1ejqKho3CYaEcUehSAIYes42bt3L2pqamCz2TBjxgxoNBocPXoUzc3N2LVrF3p6epCeno6qqirk5eWFtCzsMwqf6XbNvF7f/PUZhTWM5IRhFD7T7Zp5vb75C6OIN9OIiACGERHJBMOIiGSBYUREssAwIiJZYBhNQP+gHSfPm9E/aI90UYhiBsNoAupNNuw/ZkK9yRbpohDFjKidmxZJxQVZHv/3p3/QjnqTLeDziaYjhtEEJCeqcc/iwOfOiTUpAMjN5jQXIl8YRmEQbE2KaDpiGIVBsDUpoumIHdhEJAsMowDxcT5RaDGMAsTH+UShxT6jALETmii0GEYBYic0UWixmUZEssAwIiJZYBgRkSwwjIhIFhhGRCQLDCMikgWGERHJAsOIiGSBYUREssAwIiJZYBgRkSwwjIhIFhhGRCQLDCMikgWGERHJAsOIiGSBYUREssAwIiJZYBgRkSwwjEKA2xoRBY9hFALc1ogoeNwdJATktq1R/6Ad9SYbiguykJyojnRxiHxizSgExG2N5HLjB1pTY/OSIingMHrooYd8Hn/44YenqiwUIsUFWdhcUeC3psbmJUVSwM20y5cvjzomCALa29untEA09QLdgFJuzUuaXvyG0dNPPw0AsNvt0s+iK1euID8/PzQlixKx1B/DXXMpkvyGUU5Ojs+fAWDp0qX4yU9+MvWliiJi0wbAqBs5loKKKNT8htGTTz4JAFiyZAnKyspCXqBAnDt3Dr/73e8AAHfddReeeuqpiJVlvKbNeEFFRJ4C7jNSq9Voa2tDdnY2rFYrXnjhBSiVSvzqV7+CVqsNZRlHKSoqwv79+wEA27dvR19fH1JTU8NaBmD8mk//oB1DdgceXpnHPhiiAAT8NG3Pnj1QqVQAgD/84Q8YGRmBQqHAb37zm5AVbixqtevGdzgc0Ol0SExMDHsZgB9qPqcaLKMeidebbHj/eAvi1So20YgCEHDNyGKxYNasWRgZGcGJEyfwySefQK1WB9x0q6qqwkcffYQrV67gyJEjmD9/PgCgtbUVu3btQnd3NzQaDaqqqjBnzhy/n3fkyBH8/e9/x4oVKxAXF5mxm2KNZ8juGNUc45MpoiAJASorKxOsVqvw2WefCY8++qggCIIwNDQkLF26NKD3f/XVV8LVq1eF8vJywWg0SscrKyuFQ4cOCYIgCIcOHRIqKyul10wmk/DYY495/PfKK69IrzscDuHJJ58UGhsbA72MkOjtHxaOfXVZ6O0fnpLzplIkvpNoIgKuUjz22GP42c9+Brvdjl//+tcAgDNnziAvLy+g95eWlo461tnZiYaGBrzxxhsAgLVr1+L5559HV1cXMjIykJ+fj7feemvU+4aHhxEfHw+lUomUlBQkJCQEehlu390Hp1MI+Hx/T8YW587AQN8gBvoGx/yMk+fNOFDbhJ6ewbB1aJ88b8b+Y6awfqc3rTYNVmtvRL47Eni9vimVCmRmjt23G3AYPfHEE/jxj38MlUolPeLX6/XYu3dvoB8xitlshl6vl/qiVCoVdDodzGYzMjIyxnzfsWPH8M4778DpdKK0tDSgZt1kTcWTscJsDe6+3YDCbM0Ulmx8bC5StAiqs2Xu3Lnj/jlc1qxZgzVr1oT1O3N0qZitTUGOLvCndmJtqjBbA2NbN4btDnx2zozZmSnI0iQF9RkTHaskx4GMHH9FvowbRmvWrMEHH3wAALj33nuhUCh8nvfvf/97Ql9uMBhgsVjgcDigUqngcDjQ0dEBg0FeNw8AHDvTDmPbdRw7044da4r8nt8/aMeB2iZ8dcGCHH0a2q03sH5lHv5r3W2Ypw880GJxrFIsXhNN3rhh9Pzzz0s//+lPf5ryL8/MzERRURGqq6uxbt06VFdXo6ioaNwmWqRULJ2Nq7YbSEtS45Mz7Vi2UO/xW939tz0AHKhtQl2jFbdqU9Fi7sGPFuixbKEeudkZQfUnxGIzKxaviSZv3DA6efIkTp486fdD7rzzTr/n7N27FzU1NbDZbHj88ceh0Whw9OhRPPfcc9i1axdeeuklpKeno6qqKvDSh9F3HX24bOlFy9UeqJTAd5ZerF0+B8a2bhQXZHn8tgeAusYOlC7QwZCRgraOG5g3Kz1mmlmTFYvXRJM3bhhdu3ZN+nloaAg1NTW47bbbcOutt+Lq1as4f/487r///oC+6JlnnsEzzzwz6vi8efNw8ODBIIsdfsUFWbhwWY+6xg7k6FNR19g
2018-08-27 11:48:50 +02:00
"text/plain": [
"<Figure size 288x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"_, ax = plt.subplots(figsize=(4, 4))\n",
"ax.set_xscale(\"log\")\n",
"ax.set_yscale(\"log\")\n",
2018-08-27 11:48:50 +02:00
"ax.set_xlim(30, 150000)\n",
"ax.set_ylim(0.00001, 0.1)\n",
"sns.regplot(\"n\", \"dist\", data=devi, ax=ax, fit_reg=False, scatter_kws={\"s\": 1})"
2018-08-27 11:48:50 +02:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"> We are interested in points that have high y-values, relative to their x-neighbours. Controlling for the number of deaths, these points represent the diseases which depart the most from the overall pattern. To find these unusual points, we fit a robust linear model and plot the residuals, Figure 3. The plot shows an empty region around a residual of 1.5. So somewhat arbitrarily, we’ll select those diseases with a residual greater than 1.5."
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"# Note that the HuberRegressor is not the exact\n",
"# same method as in the paper but close.\n",
"X = np.log(devi[\"n\"]).values[:, np.newaxis]\n",
"y = np.log(devi[\"dist\"]).values\n",
2018-08-27 11:48:50 +02:00
"rlm = HuberRegressor()\n",
"rlm.fit(X, y)\n",
"devi[\"residuals\"] = y - rlm.predict(X)"
2018-08-27 11:48:50 +02:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Plot the threshold for \"unusual\" deaths, set arbitrarily at 1.5."
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.collections.LineCollection at 0x7f5528c90550>"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAASIAAAEWCAYAAADCVZoNAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAv4ElEQVR4nO2de3hTZbb/v0ma0pZQ2tI2pBAohVoKFkWQi4BMoXIZ4bToVAuClx+XUdCZ85x5HJgzc/A68zxwPKjjOONR0DlnHBWYw8UiKBYYgSJggUHkUsql0NLQKx0obWhI8/uj7u3OTnayd7KTvZO9Ps/jYy87e6+35P1mvetd71o6l8vlAkEQhILolTaAIAiChIggCMUhISIIQnFIiAiCUBwSIoIgFIeEiCAIxYlR2gAuS5cuRW1tLfR6PRISEvAf//EfyM3NdbvG6XTi1Vdfxb59+6DT6bBkyRIUFxcrZDFBEHKgU1Me0Y0bN9CrVy8AQFlZGd5++21s3rzZ7ZotW7agtLQU7733HlpbW1FUVISPPvoI/fv3V8JkgiBkQFVLM0aEAKCtrQ06nc7jmu3bt6O4uBh6vR4pKSkoKCjA559/Hk4zCYKQGVUtzQDg17/+NcrLy+FyubB27VqP39tsNmRkZLDfWywWXL16NZwmEgQhM6oTot/+9rcAupdgq1evxnvvvSf7M65du4muruBXpH36mNDc3CaDRZEFjVtbyDVuvV6H5OSeXn+nOiFiKCoqwsqVK3Ht2jUkJyezP7dYLKirq8OIESMAeHpIYujqcskiRMy9tAh/3O12B45VNWFkdioS4owKWRV66N87NKgmRnTz5k3YbDb2+927d6N3795ISkpyu27GjBnYuHEjurq60NLSgrKyMkyfPj3M1hJ8jlU14ZNdVThW1aS0KUQEohqPqKOjAz//+c/R0dEBvV6P3r1745133oFOp8PixYvxs5/9DHl5eSgsLMTx48cxbdo0AMCyZctgtVoVtp4YmZ3q9n+CkIKqtu/DRXNzmyyuZlpaLzQ23pDBosiCxq0t5Bq3Xq9Dnz4m778L+u4EQRBBQkJEEITikBARBKE4JEQEQSgOCRFBEIpDQkQQhOKQEBEEoTgkRARBKA4JEUEQikNCRBCE4pAQEQShOCREBEEoDgkRQRCKQ0JEEITikBARBKE4JEQEQSgOCRFBEIpDQhTFtNsdKD9hQ7vdobQpBOET1dSsBoBr167hl7/8JS5fvozY2FgMHDgQL7/8MlJSUtyuW7FiBQ4cOMB295gxYwaeeeYZJUxWNUxBewCYkGdR2BqCEEZVQqTT6bBo0SKMHTsWALBq1Sq89tpr+N3vfudx7ZIlSzB//vxwmxhRUEF7IlJQ1dIsKSmJFSEAuPvuu1FXV6egRZFNQpwRE/IsUd1njIgOVOURcenq6sLHH3+MKVOmeP39Bx98gPXr18NqteIXv/gFBg8eLPreQp0EAiEtrZds94okaNzaItTjVm07oZdeegn19fX4wx/+AL3e3XGrr69HWloa9Ho9tmzZgjfffBNlZWUwGAyi7k3thIKDxq0tNNtOaNWqVbh06RLeeOMNDxECALPZzP68qKgI7e3tuHr1alhso50ogpAf1QnRmjVr8N133+Htt99GbGys12vq6+vZr/ft2we9Xg+z2RwW+6i1MkHIj6piRFVVVfjv//5vZGZmoqSkBADQv39/vP322ygsLMS7774Ls9mM5cuXo7m5GTqdDiaTCX/6058QExOeodBOFEHIj2pjRKGEYkTBoaZxt9sdOFbVhJHZqSHfHVTTuMOJZmNEBCEWWipHB6pamhHRRTi8FVoqRwfkEREhIxzeCiVtRgfkEREhg7wVQiwkRETIYLwVgvAHLc0iEEqqJKINEqIIhHaKiGiDlmYRCMVeiGiDhCgCodgLEW3Q0owgCMUhISIIQnFIiAiCUBwSIoIgFIeEiCAIxdG0EFFiIEGoA00LESUGRgf0gRL5aDqPiBIDowNqJBn5aFqIKDEwOqAPlMhH00JERAf0gRL5qEaIxPa97+jowK9+9SucPHkSBoMBy5cvR35+vkJWEwQhB6oJVjN977/44guUlpbCarXitdde87hu3bp1MJlM+PLLL/HOO+/gN7/5DW7evKmAxQRByIVqhEhs3/sdO3bg0UcfBQBkZmbizjvvxN69e8NmJ0EQ8qOapRkXX33v6+rq0K9fP/Z7i8UiucurUEuTQKBe6NqCxh0aVClEr7zyChISEjB//vyQ3J/6mgUHjVtbaLKvmb++9xkZGbhy5Qr7vc1mQ9++fcNpIkEQMqMqIRLT937GjBlYv349AKC6uhonTpzApEmTwmkmQRAyoxohYvreNzQ0oKSkBIWFhVi2bBkAoLCwEPX19QCAhQsX4vr163jggQfw05/+FC+//DJMJvliPgRBhB+dy+UKPlgSYVCMKDho3NpCkzEigiC0BwkRQRCKQ0IUYVDJCyIaISGKMKiGEhGNqDKhkRCGSl4Q0Qh5RBEGU/IiIc6otCkhgZae2oSEiFAVtPTUJrQ0I1QFLT21iSY9oiOVDeT6q5RoX3oS3tGkEJWWV5PrTxAqQpNCNHtCZtS7/hT0JSIJTQrRqJz0qHf9KehLRBIUrI5SKOhLRBIkRFEKtdghIglNLs0IglAXJEQEQSgOCRFBEIpDQkQQhOKoSohWrVqFKVOmICcnB2fPnvV6zVtvvYXx48ejsLAQhYWFeOmll8JsJUEQcqOqXbOpU6fi8ccfx2OPPebzuqKiIixfvjxMVhEEEWpUJUSjR48Oy3Pe2fodrl2/FfR9jLEGODqdMlgUWdC4tYVc405O7IFf/79xXn+nKiESy2effYb9+/cjLS0Nzz33HEaOHCnp9TFGA4yxBllskes+kQaNW1vIMe4Yo/A9Ik6ISkpK8PTTT8NoNKK8vBxLly7F9u3bkZycLPoei36cS+2EgoDGrS3kbCck+Lug7x5m0tLSYDR2nxObMGECLBYLqqqqFLaKIIhgiDghYjq+AsDp06dx5coVDBo0SEGLCIIIFtFLs4MHD6Jfv36wWq1oaGjAf/3Xf0Gv1+Pf/u3fkJaWJosxr776Knbu3ImmpiY89dRTSEpKwmeffYbFixfjZz/7GfLy8rBmzRqcPHkSer0eRqMRq1evlu35BEEog+iW0zNnzsS6deuQkZGBX/ziFwCAHj16oKWlBe+8805IjZQbajkdHDRubRGOltOiPaL6+npkZGTg9u3b2L9/P3bv3g2j0YhJkyYFbSBBENpGtBCZTCY0NTWhqqoKgwcPRs+ePdHZ2Ynbt2+H0j6CIDSAaCGaP38+fvKTn8DhcODf//3fAQBHjx5FVlZWyIwjCEIbiI4RAcDFixdhMBgwYMAA9vvOzk7k5OSEzMBQQDGi4KBxawtVxYgAeGyT07Y5QRBy4FOIJk+eDJ1OOBuS4e9//7tc9hAEoUF8CtF//ud/hssOgiA0jE8hGjNmTLjsIAhCw0iKEZ0+fRoVFRW4du0auDHun//857IbRhCENNrtDhyrasLI7NSI69sn+qzZ+vXrMXfuXBw8eBDvvfcezp49iw8++ACXL18OpX0EQV1rRRLJTTVFe0Rr167F2rVrMXr0aNx77714++238dVXX2H79u2htI8g2AkGgHq1+SCSm2qKFqLm5ma2gqJer0dXVxcmT56M559/PmTGEQQQ2RMsnERyU03RS7O+ffuitrYWAJCZmYldu3ahoqKCrQ1EEGKRutRiJlikxT0I8YgWokWLFuH8+fMAgKVLl+L555/HE088gWXLloXMuGhDTbEOJW2J5FgGERpEL80eeugh9uvJkyfj8OHDcDgc6NmzZ0gMUwNy70KoKdahpC201CL4iBairq4u9xfGxCAmJgZdXV3Q6yOu0KMo5J6sapqAStoSybEMIjSIFqJhw4YJHvc4ffq0bAapCbknq5omoJpsIQjRQrRr1y637xsbG/Huu+8iPz9fdqPUQrgmayQnohGEHIheU/Xr18/tv7vvvhurVq3C2rVrZTNGTMtpp9OJl156CQUFBXjggQewceNG2Z6vFEoHb9UURCe0SVB9zdra2tD
2018-08-27 11:48:50 +02:00
"text/plain": [
"<Figure size 288x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"_, ax = plt.subplots(figsize=(4, 4))\n",
"ax.set_xscale(\"log\")\n",
2018-08-27 11:48:50 +02:00
"ax.set_xlim(50, 200000)\n",
"ax.set_ylim(-1, 3)\n",
"sns.regplot(\"n\", \"residuals\", data=devi, ax=ax, fit_reg=False, scatter_kws={\"s\": 1})\n",
"ax.hlines(1.5, 0, 200000)"
2018-08-27 11:48:50 +02:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"> Finally, we plot the temporal course for each unusual cause, Figure 4. We split the diseases into two plots because of differences in variability. The top plot shows diseases with over 350 deaths and the bottom with under 350. The causes of death fall into three main groups: murder, drowning, and transportation related. Murder is more common at night, drowning in the afternoon, and transportation related deaths during commute times. The pale gray line in the background shows the temporal course across all diseases."
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA60AAAMECAYAAABUtsP5AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAEAAElEQVR4nOzdd3hUVfrA8e/U9N4DoZNCDyShd5RelSKKBQULiF1ZV2Xta9mfruiKomJBAZHeuxSlBWkCoUOA9N4zk5n7+2PIQEiHhBTez/PkSZlb3jMzOXPee849R6UoioIQQgghhBBCCFELqWs6ACGEEEIIIYQQojSStAohhBBCCCGEqLUkaRVCCCGEEEIIUWtJ0iqEEEIIIYQQotaSpFUIIYQQQgghRK0lSasQQgghhBBCiFpLklYhhBCiltq7dy9BQUHMnj27pkMRQgghaoy2pgMQQghRd3zzzTfs3buXs2fPkpqaikqlokGDBnTr1o1HHnkEX1/fYvsEBQWVerz27dvz66+/lvjYtm3b+O677zh+/Dhms5kWLVowceJERo8eXeF4L1++TP/+/Yv8zcbGBgcHBwICAmjTpg1DhgwhLCyswsesSoXxjR49mn//+981EkN5pk6dyqFDh/jzzz/Raos3G/7++28mTJiAl5cXK1aswNnZucTjPPXUU2zZsoXXXnuNSZMmVXfYQggh6hFJWoUQQlTYokWLsLe3Jzw8HA8PDwoKCjhx4gTff/89v/32Gz/99BOtWrUqtl+DBg1KTDZLSnIB5s+fz9tvv42rqysjRoxAp9OxYcMGZs6cyalTp3jllVcqFbeTkxMPPfQQAAUFBaSnpxMVFcWCBQv4+eef6dGjBx988AGenp6VOm59l5WVxe7duxkyZEiJCStAmzZtmD59Op988glvvvkm//nPf4pts3jxYrZs2UKPHj144IEHqjtsIYQQ9YwkrUIIUc8cPnyYNm3aoNFoqvzYq1evxsbGptjff/31V15//XU++eQT5s6dW+zxBg0a8PTTT1foHJcvX+aDDz7A1dWVJUuW0LBhQwCmTZvGvffey3fffcfdd99NaGhoheN2dnYu8fyXLl3i1VdfZdeuXTz22GMsWrSoxPLdqXbs2IHBYOCuu+4qc7upU6eyY8cOVq9eTZ8+fRg+fLj1sejoaN577z1cXV15//33UalU1R22EEKIekbuaRVCiHrmueeeo3fv3rz//vv8/fffVXrs0hK6wYMHA3Dx4sVbPseSJUswGAzcf//91oQVwMXFhccffxyAhQsX3vJ5AAICAvj6669p1qwZJ06cYMGCBcW2iYuL46233qJ///60adOGzp0788QTT3DkyJFi28bHx/P5558zYcIEunfvTps2bejRowcvvPACZ86cKbLt7NmzrUOXly1bRlBQkPVr6dKlxY594sQJpk6dSlhYGO3bt+eBBx7gr7/+KrZdVlYWX3zxBcOGDaNjx46EhoYyYMAAnn322Uq/HzZt2oSdnR09evQoczu1Ws2HH36Io6Mjb731FrGxsQCYTCZefvllcnJyeOedd/D29gbg7NmzzJw5k969e9OmTRu6devGCy+8wLlz54od+/z583z88ceMGTOGLl260KZNG/r27cvrr79OXFxcse2vvw/4yJEjTJ06lYiICIKCgrh8+XKlyi+EEKJ2kKRVCCHqmcmTJ+Ph4cH333/PPffcw6BBg/jiiy+Ijo6utnNu3boVKP3+1YyMDH777TfmzJnDzz//zKFDh0o91p49ewDo2bNnscd69epVZJuqYGdnx+TJkwFYtWpVkceOHTvGyJEj+eWXX2jatCmTJk2ib9++REZGMnHiRLZv315k+8jISObOnYuzszN33303Dz30EB06dGDDhg2MHTuWqKgo67YRERE8+OCDAAQHBzN9+nTrV0hISJHjFt43mp+fz9ixY+nTpw8HDhzg4YcfLpLoKYrCY489xmeffYajoyNjx47lvvvuo3379kRGRpb5vN/IYDCwfft2evToga2tbbnbN2zYkNdff52MjAxefvllzGYzc+bM4eDBg9xzzz3W3todO3YwZswYVq1aRdu2bXnwwQfp2rUrGzduZOzYsRw7dqzIcTdt2sTChQvx8/Nj2LBhTJo0iebNm7N48WLuvfde4uPjS4zn0KFDTJw4kfz8fO655x5Gjx6NTqercPmFEELUIooQQoh66cyZM8r//d//Kf3791cCAwOVwMBAZfz48cr8+fOV5OTkWzr2r7/+qnz22WfKv//9b2Xy5MlKcHCw0rdvX+X8+fPFti08941fI0aMUKKioopt37lzZyUwMFBJSUkp8dwdOnRQAgMDlZycnHLjvHTpkhIYGKj07du3zO0uXryoBAYGKiEhIYrRaFQURVGMRqMyYMAApU2bNsrevXuLbB8XF6f06NFD6d69u5Kfn2/9e1JSkpKZmVns+CdOnFA6dOigPProoyXG98orr5QY1549e6zP15IlS4o8tmDBAiUwMFCZNWuW9W9RUVFKYGCg8tRTTxU7lslkUtLS0sp8Hq63bds2JTAwUFm+fHmF91EURZkxY4YSGBiozJw5U2ndurUyYMAAJSsrS1EURUlLS1PCwsKUiIgI5fTp00X2O3nypNKhQwdl1KhRRf4eFxdX5DkutHPnTiU4OFh54403ivz9+udswYIFlYpdCCFE7SQ9rUIIUU81b96c5557js2bN7No0SImTZrEpUuXeOutt+jZsydTp05l1apV5ObmVvrYixcv5vPPP+e7775j165dtG7dmnnz5tGkSZNi2z7yyCMsWLCA3bt389dff/Hbb78xcOBAoqKieOihh4r1lGVlZQGWyZNK4ujoCEBmZmal4y6Nj48PYBnOmp6eDsDvv/9OdHQ0DzzwABEREcW2f+yxx0hMTGT37t3Wv3t4eFjju15wcDCdO3dm7969GI3GSsfXsWNHxowZU+Rv99xzD1qttsRhyiX1jKrValxcXCp8zk2bNqHT6ejbt2+lYn3rrbfw8fFh6dKlmM1mPvroIxwcHABYvnw5GRkZzJgxgxYtWhTZLzAwkLFjx3L8+PEiQ6l9fHzQ6/XFztOjRw9atGjBrl27SowjJCSECRMmVCp2IYQQtZNMxCSEEHeADh060KFDB/7xj3+we/duVq5cyapVq9i+fTtjxozh/fffr9TxCpepSU1N5fjx43zyySeMGTOGTz/9tNiw3pkzZxb5vW3btnz22WfMmDGDDRs28O233/Lqq6/eWgFvkaIo1p8LJwoqHEobExNT4jqpFy5cACz3Z/bu3dv6999//52FCxfy999/k5qaSkFBQZH9UlNTrfd2VlSbNm2K/U2n0+Hh4UFGRob1by1atCAkJITVq1dz5coV+vfvT6dOnWjTpk2JiV9pzGYzW7duJSIiotQlbErj4uLCE088wZtvvsndd99Nhw4drI8VPqdRUVHlPqeFSa2iKKxcuZJly5YRFRVFRkYGJpPJuk9pQ37btWtXqbiFEELUXpK0CiHEHeTYsWPs3LmT3bt3Yzab0el0NG3a9KaP5+bmRvfu3Wnbti2DBw/m5ZdfZtu2bRW6B3LChAls2LCByMjIIn93dHQkNTWVzMxM3Nzciu1XXk/szUhISABAo9FYk7S0tDQA1q9fX+a+OTk51p9/+OEH3nvvPVxcXOjWrRt+fn7Y2dmhUqnYvHkzUVFRGAyGSsdXWuKo1Woxm83W3zUaDT/88ANffPEFGzZs4OOPPwbAwcGB0aNH8/zzz1t7PcsSGRlJSkoKAwYMqHSscK2n98b3QeFzWtravIWuf07ff/99fvjhB7y8vOjRowc+Pj7W4y5btowrV66UeAxZvkgIIeoPSVqFEKKeO3PmDKtXr2bNmjVER0ejUqkICwtj+vTpDBo0qFJDRkvj7OxMhw4d2Lx5M6dPn6Zt27bl7uPu7g4UTVAAmjZtSmpqKhcuXCiWtCYkJJCTk4Ovry92dna3HHehvXv3AtC6dWvreqSFSfH//vc/6yy/ZSkoKODzzz/Hy8uLpUuXFutNrcwkSLfCxcWFV199lVdffZWLFy+yb98+Fi1axPz588nIyOC
2018-08-27 11:48:50 +02:00
"text/plain": [
"<Figure size 1152x864 with 8 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA60AAAMECAYAAABUtsP5AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAEAAElEQVR4nOzdd3iT5foH8G9296SFliXDQtmFQoGWXdkyZRwFByqiIIqT40Dh514oODiioB6PqIiiIIKiTAeKsqHMUkYX3W2anef3R5qXhq60TZu0/X6uy8vSvDN98yZ37vu5H5kQQoCIiIiIiIjIA8ndfQBEREREREREFWHQSkRERERERB6LQSsRERERERF5LAatRERERERE5LEYtBIREREREZHHYtBKREREREREHotBKxERkYfat28fOnXqhJUrV7r7UIiIiNxG6e4DICKihuODDz7Avn37cPbsWeTm5kImk6Fly5YYOHAg7rjjDrRo0aLMOp06dapwez179sSXX35Z7mM7duzAmjVrcPz4cVitVnTs2BE333wzJk+e7PTxXrp0CSNGjHD4nUajga+vL1q3bo1u3bph7NixiI2NdXqbrmQ/vsmTJ+Oll15yyzFUZe7cuTh48CB+++03KJVlPzYcPXoUM2fORFhYGL799lsEBASUu5377rsPP//8M5566inMnj27rg+biIgaEQatRETktC+++AI+Pj7o27cvQkNDYTabceLECXz00Uf46quv8N///hddunQps17Lli3LDTbLC3IB4NNPP8X//d//ISgoCBMmTIBKpcK2bduwePFinDp1Co8//ni1jtvf3x+33XYbAMBsNiM/Px9JSUlYt24d/ve//yEhIQEvv/wymjVrVq3tNnZFRUX4/fffMXbs2HIDVgDo1q0bFixYgOXLl2Pp0qV4/fXXyyyzfv16/Pzzz0hISMCsWbPq+rCJiKiRYdBKRNSEabVaXLx4EZ07d3Zq+c2bN0Oj0ZT5/Zdffomnn34ay5cvx+rVq8s83rJlS9x///1O7ePSpUt4+eWXERQUhA0bNqBVq1YAgPnz5+Omm27CmjVrMHLkSMTExDi1PQAICAgod/8XL17EE088gb179+Kuu+7CF198Ue75NVW7d++G0WjEDTfcUOlyc+fOxe7du7F582YMHToUN954o/TYhQsX8MILLyAoKAgvvvgiZDJZXR82ERE1MhzTSkTUxJjNZuzatQsPP/ww4uPj8dFHHzm9bkUB3ZgxYwAAKSkptT6+DRs2wGg04pZbbpECVgAIDAzEPffcAwD4/PPPa70fAGjdujXef/99tG/fHidOnMC6devKLJOeno5ly5ZhxIgR6NatG+Li4jBv3jwcPny4zLIZGRl4++23MXPmTMTHx6Nbt25ISEjAww8/jDNnzjgsu3LlSql0+ZtvvkGnTp2k/77++usy2z5x4gTmzp2L2NhY9OzZE7NmzcI///xTZrmioiK88847GD9+PHr37o2YmBgkJibiwQcfxNGjR6v1/Pz000/w9vZGQkJCpcvJ5XK88sor8PPzw7Jly5CWlgYAsFgseOyxx1BcXIznnnsO4eHhAICzZ89i8eLFGDJkCLp164aBAwfi4Ycfxrlz58psOzk5Ga+99hqmTJmC/v37o1u3bhg2bBiefvpppKenl1m+9Djgw4cPY+7cuejXrx86deqES5cuVev8iYjIMzDTSkTURBw4cACbNm3Cli1bkJubC4VCgQEDBmD8+PG13vYvv/wCoOLxqwUFBfjqq6+QlZUFf39/dO3aFb169Sp32T/++AMAMGjQoDKPDR482GEZV/D29sacOXPw1FNPYdOmTbj99tulx44dO4Y5c+YgPz8fCQkJGDlyJHJzc7F9+3bcfPPNeOeddzBkyBBp+f3792P16tWIi4vDyJEj4ePjg5SUFGzbtg2//PIL1q1bJ2W1+/Xrh1tvvRWffPIJOnfujMTERGk70dHRDsd49OhRfPDBB+jVqxemTZuG1NRU/Pjjj7j99tuxceNGtG/fHgAghMBdd92FAwcOICYmBtOmTYNCoUBGRgb27duH2NhYdOvWzannxWg0YteuXUhISICXl1eVy7dq1QpPP/00Hn/8cTz22GP4+OOPsWrVKhw4cABTp06VsrW7d+/G/fffD7PZjGHDhqFNmzbIyMjAjz/+iJ07d+KTTz5B165dpe3+9NNP+PzzzxEXF4fevXtDpVLh9OnTWL9+PXbs2IENGzagefPmZY7n4MGD+M9//oM+ffpg6tSpyM3NhUqlcurciYjIwwgiImq0zp49K958800xYsQIERUVJaKiosT06dPFJ598IrKysmq83S+//FKsWLFCvPTSS2LOnDmic+fOYtiwYSI5ObnMsvb9XvvfhAkTRFJSUpnl4+LiRFRUlMjJySl337169RJRUVGiuLi4yuO8ePGiiIqKEsOGDat0uZSUFBEVFSWio6OFyWQSQghhMplEYmKi6Natm9i3b5/D8unp6SIhIUHEx8cLg8Eg/T4rK0sUFhaW2f6JEydEr169xJ133lnu8T3++OPlHtcff/whPV8bNmxweGzdunUiKipKPPPMM9LvkpKSRFRUlLjvvvvKbMtisYi8vLxKn4fSduzYIaKiosTGjRudXkcIIRYuXCiioqLE4sWLRdeuXUViYqIoKioSQgiRl5cnYmNjRb9+/cTp06cd1jt58qTo1auXmDRpksPv09PTHZ5juz179ojOnTuLJUuWOPy+9HO2bt26ah07ERF5JmZaiYgamczMTGzZsgXfffcdjh07BgCIiorCokWLMG7cOLRu3brW+1i/fj0OHTok/bt79+54/fXX0bZt2zLL3nHHHRg5ciSuu+46aDQanDt3DqtXr8a2bdtw22234dtvv3XIlBUVFQGwNU8qj5+fH4qLi1FYWAhvb+9anwsAaf8WiwX5+fkIDQ3Fzp07ceHCBcyZMwf9+vUrs/xdd92FF154Ab///ruUbQ0NDS13+507d0ZcXBx+/fVXmEymamf8evfujSlTpjj8burUqfi///u/csuUy8uMyuVyBAYGOr3Pn376CSqVCsOGDavWsS5btgwHDhzA119/DYVCgVdffRW+vr4AgI0bN6KgoABLlixBx44dHdaLiorCtGnT8PHHH+PMmTPS4+VlUQEgISEBHTt2xN69e8t9PDo6GjNnzqzWsRMRkWdi0EpE1MjMnDkTly9fRmBgIO6++26MHz/e6UZLzrJPU5Obm4vjx49j+fLlmDJlCt58880yZb2LFy92+Hf37t2xYsUKLFy4ENu2bcOHH36IJ554wqXHV11CCOlne6OggwcPAgBSU1PLnSf1/PnzAGzjM0uXCO/cuROff/45jh49itzcXJjNZof1cnNzpbGdziqvpFelUiE0NBQFBQXS7zp27Ijo6Ghs3rwZly9fxogRI9CnTx9069YNarXa6f1ZrVb88ssv6NevX4VT2FQkMDAQ8+bNw9KlSzFy5EiHMnD7c5qUlFTlc2oPWoUQ+O677/DNN98gKSkJBQUFsFgs0joVfQHQo0ePah03ERF5LgatRESNTFRUFC5fvoz8/Hzs3bsXgYGB8Pf3R8uWLV2+r+DgYMTHx6N79+4YM2YMHnvsMezYscOpMZAzZ87Etm3bsH//foff+/n5ITc3F4WFhQgODi6zXlWZ2JrIzMwEACgUCilIy8vLAwBs3bq10nWLi4ulnz/++GO88MILCAwMxMCBAxEREQFvb2/IZDJs374dSUlJMBqN1T6+igJHpVIJq9Uq/VuhUODjjz/GO++8g23btuG1114DAPj6+mLy5Ml46KGHpKxnZfbv34+cnByHcbbVYf/7X3sd2J/TiubmtSv9nL744ov4+OOPERYWhoSEBDRv3lza7jfffIPLly+Xuw1OX0RE1HgwaCUiamRWrVqFixcv4rvvvsOmTZvw2muv4fXXX0dMTAzGjx+P0aNHV1jGWlMBAQHo1asXtm/fjtOnT6N79+5VrhMSEgLAMUABgHbt2iE3Nxfnz58vE7RmZmaiuLgYLVq0cFlpMGDrOAsAXbt2leYjtQfF7777rtTltzJmsxlvv/0
2018-08-27 11:48:50 +02:00
"text/plain": [
"<Figure size 1152x864 with 5 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Get all temporarily \"unusual\" deaths.\n",
"unusual = devi.loc[(devi[\"residuals\"] > 1.5), [\"disease\", \"n\"]].sort_values(\"disease\")\n",
2018-08-27 11:48:50 +02:00
"# Helper dataset for easy indexing / value retrieval.\n",
"plot_data = counts[[\"cod\", \"hod\", \"prop\", \"prop_all\"]].set_index(\"cod\")\n",
2018-08-27 11:48:50 +02:00
"# Divide the plots in two big categories.\n",
"for header, cond, ylim in [\n",
" (\"> 350 Deaths / Year\", (unusual[\"n\"] > 350), 0.125),\n",
" (\"< 350 Deaths / Year\", (unusual[\"n\"] <= 350), 0.3),\n",
2018-08-27 11:48:50 +02:00
"]:\n",
" nrows = math.ceil(len(unusual[cond]) / 3)\n",
" fig = plt.figure(figsize=(16, 12),)\n",
2018-08-27 11:48:50 +02:00
" for i, (cod, (disease, _)) in enumerate(unusual[cond].iterrows(), 1):\n",
" ax = fig.add_subplot(nrows, 3, i)\n",
" ax.set_title(\"\\n\".join(textwrap.wrap(disease, 40)))\n",
2018-08-27 11:48:50 +02:00
" ax.set_xlim(0, 24)\n",
" ax.set_ylim(0, ylim)\n",
" ax.plot(plot_data.loc[cod, \"hod\"], plot_data.loc[cod, \"prop\"])\n",
" ax.plot(plot_data.loc[cod, \"hod\"], plot_data.loc[cod, \"prop_all\"])\n",
2018-08-27 11:48:50 +02:00
" # Show only lower and left axes.\n",
" if i not in (3 * nrows - 2, 3 * nrows - 1, 3 * nrows):\n",
" plt.setp(ax.get_xticklabels(), visible=False)\n",
" if i % 3 != 1:\n",
" plt.setp(ax.get_yticklabels(), visible=False)\n",
" fig.suptitle(header, fontsize=20)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.9"
2018-08-27 11:48:50 +02:00
}
},
"nbformat": 4,
"nbformat_minor": 4
2018-08-27 11:48:50 +02:00
}