ames-housing/2_pairwise_correlations.ipynb

1050 lines
187 KiB
Text
Raw Normal View History

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Pair-wise Correlations\n",
"\n",
"The purpose is to identify variables strongly correlated with the sales price and with each other to get an idea of what variables could be good predictors and potential issues with collinearity."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## \"Housekeeping\""
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2018-09-02 23:23:32 CEST\n",
"\n",
"CPython 3.6.5\n",
"IPython 6.5.0\n",
"\n",
"matplotlib 3.0.0rc2\n",
"numpy 1.15.1\n",
"pandas 0.23.4\n",
"seaborn 0.9.0\n"
]
}
],
"source": [
"% load_ext watermark\n",
"% watermark -d -t -v -z -p matplotlib,numpy,pandas,seaborn"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"import seaborn as sns\n",
"\n",
"from utils import (\n",
" CONTINUOUS_VARIABLES,\n",
" DISCRETE_VARIABLES,\n",
" NUMERIC_VARIABLES,\n",
" ORDINAL_VARIABLES,\n",
" TARGET_VARIABLE,\n",
" load_clean_data,\n",
" print_column_list,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"% matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"pd.set_option(\"display.max_columns\", 100)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"sns.set_style(\"white\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load the Data\n",
"\n",
"A subset of the previously cleaned data is used in this analysis. It does not make sense to calculate correlations involving nominal variables."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"df = load_clean_data(\n",
" subset=CONTINUOUS_VARIABLES + DISCRETE_VARIABLES + ORDINAL_VARIABLES,\n",
" ordinal_encoded=True,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>1st Flr SF</th>\n",
" <th>2nd Flr SF</th>\n",
" <th>3Ssn Porch</th>\n",
" <th>Bedroom AbvGr</th>\n",
" <th>Bsmt Full Bath</th>\n",
" <th>Bsmt Half Bath</th>\n",
" <th>Bsmt Unf SF</th>\n",
" <th>BsmtFin SF 1</th>\n",
" <th>BsmtFin SF 2</th>\n",
" <th>Enclosed Porch</th>\n",
" <th>Fireplaces</th>\n",
" <th>Full Bath</th>\n",
" <th>Garage Area</th>\n",
" <th>Garage Cars</th>\n",
" <th>Gr Liv Area</th>\n",
" <th>Half Bath</th>\n",
" <th>Kitchen AbvGr</th>\n",
" <th>Lot Area</th>\n",
" <th>Low Qual Fin SF</th>\n",
" <th>Mas Vnr Area</th>\n",
" <th>Misc Val</th>\n",
" <th>Mo Sold</th>\n",
" <th>Open Porch SF</th>\n",
" <th>Pool Area</th>\n",
" <th>Screen Porch</th>\n",
" <th>TotRms AbvGrd</th>\n",
" <th>Total Bsmt SF</th>\n",
" <th>Wood Deck SF</th>\n",
" <th>Year Built</th>\n",
" <th>Year Remod/Add</th>\n",
" <th>Yr Sold</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>1656.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>441.0</td>\n",
" <td>639.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>528.0</td>\n",
" <td>2</td>\n",
" <td>1656.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>31770.0</td>\n",
" <td>0.0</td>\n",
" <td>112.0</td>\n",
" <td>0.0</td>\n",
" <td>5</td>\n",
" <td>62.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>7</td>\n",
" <td>1080.0</td>\n",
" <td>210.0</td>\n",
" <td>1960</td>\n",
" <td>1960</td>\n",
" <td>2010</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>896.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>270.0</td>\n",
" <td>468.0</td>\n",
" <td>144.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>730.0</td>\n",
" <td>1</td>\n",
" <td>896.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>11622.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>6</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>120.0</td>\n",
" <td>5</td>\n",
" <td>882.0</td>\n",
" <td>140.0</td>\n",
" <td>1961</td>\n",
" <td>1961</td>\n",
" <td>2010</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>1329.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>406.0</td>\n",
" <td>923.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>312.0</td>\n",
" <td>1</td>\n",
" <td>1329.0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>14267.0</td>\n",
" <td>0.0</td>\n",
" <td>108.0</td>\n",
" <td>12500.0</td>\n",
" <td>6</td>\n",
" <td>36.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>6</td>\n",
" <td>1329.0</td>\n",
" <td>393.0</td>\n",
" <td>1958</td>\n",
" <td>1958</td>\n",
" <td>2010</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>2110.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1045.0</td>\n",
" <td>1065.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>522.0</td>\n",
" <td>2</td>\n",
" <td>2110.0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>11160.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>8</td>\n",
" <td>2110.0</td>\n",
" <td>0.0</td>\n",
" <td>1968</td>\n",
" <td>1968</td>\n",
" <td>2010</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>928.0</td>\n",
" <td>701.0</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>137.0</td>\n",
" <td>791.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>482.0</td>\n",
" <td>2</td>\n",
" <td>1629.0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>13830.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>34.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>6</td>\n",
" <td>928.0</td>\n",
" <td>212.0</td>\n",
" <td>1997</td>\n",
" <td>1998</td>\n",
" <td>2010</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 1st Flr SF 2nd Flr SF 3Ssn Porch Bedroom AbvGr \\\n",
"Order PID \n",
"1 526301100 1656.0 0.0 0.0 3 \n",
"2 526350040 896.0 0.0 0.0 2 \n",
"3 526351010 1329.0 0.0 0.0 3 \n",
"4 526353030 2110.0 0.0 0.0 3 \n",
"5 527105010 928.0 701.0 0.0 3 \n",
"\n",
" Bsmt Full Bath Bsmt Half Bath Bsmt Unf SF BsmtFin SF 1 \\\n",
"Order PID \n",
"1 526301100 1 0 441.0 639.0 \n",
"2 526350040 0 0 270.0 468.0 \n",
"3 526351010 0 0 406.0 923.0 \n",
"4 526353030 1 0 1045.0 1065.0 \n",
"5 527105010 0 0 137.0 791.0 \n",
"\n",
" BsmtFin SF 2 Enclosed Porch Fireplaces Full Bath \\\n",
"Order PID \n",
"1 526301100 0.0 0.0 2 1 \n",
"2 526350040 144.0 0.0 0 1 \n",
"3 526351010 0.0 0.0 0 1 \n",
"4 526353030 0.0 0.0 2 2 \n",
"5 527105010 0.0 0.0 1 2 \n",
"\n",
" Garage Area Garage Cars Gr Liv Area Half Bath \\\n",
"Order PID \n",
"1 526301100 528.0 2 1656.0 0 \n",
"2 526350040 730.0 1 896.0 0 \n",
"3 526351010 312.0 1 1329.0 1 \n",
"4 526353030 522.0 2 2110.0 1 \n",
"5 527105010 482.0 2 1629.0 1 \n",
"\n",
" Kitchen AbvGr Lot Area Low Qual Fin SF Mas Vnr Area \\\n",
"Order PID \n",
"1 526301100 1 31770.0 0.0 112.0 \n",
"2 526350040 1 11622.0 0.0 0.0 \n",
"3 526351010 1 14267.0 0.0 108.0 \n",
"4 526353030 1 11160.0 0.0 0.0 \n",
"5 527105010 1 13830.0 0.0 0.0 \n",
"\n",
" Misc Val Mo Sold Open Porch SF Pool Area Screen Porch \\\n",
"Order PID \n",
"1 526301100 0.0 5 62.0 0.0 0.0 \n",
"2 526350040 0.0 6 0.0 0.0 120.0 \n",
"3 526351010 12500.0 6 36.0 0.0 0.0 \n",
"4 526353030 0.0 4 0.0 0.0 0.0 \n",
"5 527105010 0.0 3 34.0 0.0 0.0 \n",
"\n",
" TotRms AbvGrd Total Bsmt SF Wood Deck SF Year Built \\\n",
"Order PID \n",
"1 526301100 7 1080.0 210.0 1960 \n",
"2 526350040 5 882.0 140.0 1961 \n",
"3 526351010 6 1329.0 393.0 1958 \n",
"4 526353030 8 2110.0 0.0 1968 \n",
"5 527105010 6 928.0 212.0 1997 \n",
"\n",
" Year Remod/Add Yr Sold \n",
"Order PID \n",
"1 526301100 1960 2010 \n",
"2 526350040 1961 2010 \n",
"3 526351010 1958 2010 \n",
"4 526353030 1968 2010 \n",
"5 527105010 1998 2010 "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[NUMERIC_VARIABLES].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Ordinal variables are encoded as integers (with greater values indicating a higher sales price by \"guts feeling\"; refer to the [data documentation](https://www.amstat.org/publications/jse/v19n3/decock/DataDocumentation.txt) to see the un-encoded values) and take part in the analysis."
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>Bsmt Cond</th>\n",
" <th>Bsmt Exposure</th>\n",
" <th>Bsmt Qual</th>\n",
" <th>BsmtFin Type 1</th>\n",
" <th>BsmtFin Type 2</th>\n",
" <th>Electrical</th>\n",
" <th>Exter Cond</th>\n",
" <th>Exter Qual</th>\n",
" <th>Fence</th>\n",
" <th>Fireplace Qu</th>\n",
" <th>Functional</th>\n",
" <th>Garage Cond</th>\n",
" <th>Garage Finish</th>\n",
" <th>Garage Qual</th>\n",
" <th>Heating QC</th>\n",
" <th>Kitchen Qual</th>\n",
" <th>Land Slope</th>\n",
" <th>Lot Shape</th>\n",
" <th>Overall Cond</th>\n",
" <th>Overall Qual</th>\n",
" <th>Paved Drive</th>\n",
" <th>Pool QC</th>\n",
" <th>Utilities</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>7</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>7</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Bsmt Cond Bsmt Exposure Bsmt Qual BsmtFin Type 1 \\\n",
"Order PID \n",
"1 526301100 4 4 3 4 \n",
"2 526350040 3 1 3 3 \n",
"3 526351010 3 1 3 5 \n",
"4 526353030 3 1 3 5 \n",
"5 527105010 3 1 4 6 \n",
"\n",
" BsmtFin Type 2 Electrical Exter Cond Exter Qual Fence \\\n",
"Order PID \n",
"1 526301100 1 4 2 2 0 \n",
"2 526350040 2 4 2 2 3 \n",
"3 526351010 1 4 2 2 0 \n",
"4 526353030 1 4 2 3 0 \n",
"5 527105010 1 4 2 2 3 \n",
"\n",
" Fireplace Qu Functional Garage Cond Garage Finish \\\n",
"Order PID \n",
"1 526301100 4 7 3 3 \n",
"2 526350040 0 7 3 1 \n",
"3 526351010 0 7 3 1 \n",
"4 526353030 3 7 3 3 \n",
"5 527105010 3 7 3 3 \n",
"\n",
" Garage Qual Heating QC Kitchen Qual Land Slope Lot Shape \\\n",
"Order PID \n",
"1 526301100 3 1 2 2 2 \n",
"2 526350040 3 2 2 2 3 \n",
"3 526351010 3 2 3 2 2 \n",
"4 526353030 3 4 4 2 3 \n",
"5 527105010 3 3 2 2 2 \n",
"\n",
" Overall Cond Overall Qual Paved Drive Pool QC Utilities \n",
"Order PID \n",
"1 526301100 4 5 1 0 3 \n",
"2 526350040 5 4 2 0 3 \n",
"3 526351010 5 5 2 0 3 \n",
"4 526353030 4 6 2 0 3 \n",
"5 527105010 4 4 2 0 3 "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[ORDINAL_VARIABLES].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Correlations\n",
"\n",
"The pair-wise correlations are calculated based on the type of the variables:\n",
"- **continuous** variables are assumed to be linearly related with the target and each other or not: **Pearson's correlation coefficient**\n",
"- **discrete** (because of the low number of distinct realizations as seen in the data cleaning notebook) and **ordinal** (low number of distinct realizations as well) variables are assumed to be related in a monotonic way with the target and each other or not: **Spearman's rank correlation coefficient**\n",
"\n",
"Furthermore, a \"rule of thumb\" classification in *weak* and *strong* correlation is applied to the variables. The identified variables will be used in the prediction modelling part to speed up the feature selection. A correlation between 0.33 and 0.66 is considered *weak* while a correlation above 0.66 is considered *strong*."
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"strong = 0.66\n",
"weak = 0.33"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Two heatmaps below (implemented in the reusable `plot_correlation` function) help visualize the correlations.\n",
"\n",
"Obviously, many variables are pair-wise correlated. This could yield regression coefficients *inprecise* and not usable / interpretable. At the same time, this does not lower the predictive power of a model as a whole. In contrast to the pair-wise correlations, *multi-collinearity* is not checked here."
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"def plot_correlation(data, title):\n",
" \"\"\"Visualize a correlation matrix in a nice heatmap.\"\"\"\n",
" fig, ax = plt.subplots(figsize=(12, 12))\n",
" ax.set_title(title, fontsize=24)\n",
" # Blank out the upper triangular part of the matrix.\n",
" mask = np.zeros_like(data, dtype=np.bool)\n",
" mask[np.triu_indices_from(mask)] = True\n",
" # Use a diverging color map.\n",
" cmap = sns.diverging_palette(240, 0, as_cmap=True)\n",
" # Adjust the labels' font size.\n",
" labels = data.columns\n",
" ax.set_xticklabels(labels, fontsize=10)\n",
" ax.set_yticklabels(labels, fontsize=10)\n",
" # Plot it.\n",
" sns.heatmap(\n",
" data, vmin=-1, vmax=1, cmap=cmap, center=0, linewidths=.5,\n",
" cbar_kws={\"shrink\": .5}, square=True, mask=mask, ax=ax\n",
" )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Pearson\n",
"\n",
"Pearson's correlation coefficient shows a linear relationship between two variables."
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"pearson = df[CONTINUOUS_VARIABLES + TARGET_VARIABLE].corr(method=\"pearson\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAsQAAAKPCAYAAABqytwsAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzs3Xl8Tdf+//HXOSGDBElMUaQknBgqiCqqpUq16Kg1Jy4tbbUoagjVmmqs0tLKNRYxD6Ek2ntr6NfQUnqpmqnQxBSzJCInw/794Zdz5Qoi4YSc9/PxyOOR7L32+qy9cx4enyyfvZbJMAwDEREREREHZc7rAYiIiIiI5CUlxCIiIiLi0JQQi4iIiIhDU0IsIiIiIg5NCbGIiIiIODQlxCIiIiLi0Ark9QBE5P6IjY2lSZMmWZ4zmUw4Ozvj6elJtWrVePPNN2natKmdR/joCw0NZeXKlTz11FOEh4c/0FiGYbB+/XrWrFnDnj17OHfuHAULFuSxxx6jfv36BAcHU758+Qc6hgctICAAgDVr1mCxWHLdX0JCAomJiZQqVcp2bMqUKXzzzTe8+OKLTJ48OdcxRCR/UkIskg898cQTODs72342DAOr1UpsbCwbNmxgw4YNdOjQgaFDh+bhKOV2zp49S9++fdm5cycAhQoVws/Pj6SkJI4fP87Ro0dZvHgxAwYMoFOnTnk82odDZGQkY8eOZeTIkZkSYhGR7FBCLJIPff3115QtW/aW4ykpKXzzzTf885//ZOHChTz77LM8//zzeTDCR5PJZHrgMWJjY2nbti3nz5+natWq9OnTh4YNG9rOX7x4kalTpxIeHs6oUaMoUKAAHTp0eODjethNnDiRc+fO3XK8Y8eOtGjRAg8PjzwYlYg8KlRDLOJAChYsSJ8+fahVqxYACxcuzOMRPVoyZh4fe+yxB9J/eno6AwYM4Pz58wQFBbFgwYJMyTCAt7c3Q4YM4b333gNg/PjxnD179oGMJz/w9vbG399fs8YickdKiEUcUOPGjQH4888/83gkj5ZKlSoB/619vd++//57fv/9dwoUKMD48eMpVKjQbdt+8MEHeHt7k5SUxLJlyx7IeEREHIVKJkQcUMZ/HycmJt5yLiYmhhkzZrBlyxbi4uJwd3enZs2adO7cmfr162fZX0xMDPPmzWPbtm2cPHkSq9WKp6cnNWrUICQkhHr16mVqHxISwm+//caiRYtYs2YN33//PQCBgYHMnj0bs9nM9u3bmTt3Lrt27eLq1asUKVKEqlWr8uabb9KiRYssx/Hvf/+bJUuW8Oeff3Lt2jVKlChB/fr16datGxUqVMjUNiIigkGDBtGxY0d69OjBN998w4YNGzh//jzFihXjueee48MPP6RkyZK2aypWrAhwywtgBw4cYObMmfz2229cvHgRd3d3LBYLr7zyCm+++SYFCmTvn9oVK1YA8MILL1CuXLk7tnV1dWXs2LG4u7sTGBh4y/lt27YRHh5ue36enp7UqVOHt99+m+rVq9/2WdSrV48vvviCM2fOULp0acaMGcOJEyfueL527doAXLhwgZkzZ7JhwwZOnz6Ni4sLVatWpX379rz00kvZegYAaWlpREZGEhUVxf79+7l8+TLOzs6UK1eOJk2a0KVLFwoXLpxp7Bnef/99AMaMGUOrVq3u+FJdXFwcs2fPZuPGjZw6dQpnZ2csFgtvvPEGrVq1uuX39vzzz3Py5El+/fVXdu7cyZw5czh48CCGYRAQEEBISAgtW7bM9n2KyMNDCbGIA/r7778BKF26dKbjmzdvplevXly7dg03NzcqVarExYsX+fnnn/n555/p2bMnPXr0yHTNli1b+PDDD7l+/TqFCxfG19eX5ORkYmJiWLduHevXr2fChAm8/PLLt4xj3Lhx7N69G4vFwuXLlylRogRms5k1a9YwYMAA0tPT8fHxoXLlypw/f54tW7awZcsW/vzzTwYOHGjrJ6PUYM2aNbb7KleuHNHR0axYsYLIyEgmTJhAs2bNbhlDXFwcrVq14syZM5QpU4by5ctz5MgRFi9ezObNm1m1ahVFihQBbswMHzp0KNP1v/32G++88w5Wq5VixYpRuXJlrly5wo4dO9ixYwdbt27N1uoGycnJ7Nq1C+C2f3j8r0aNGmV5/Msvv2T69OkAFC9enMqVKxMTE8PatWv58ccf+eSTTwgODr7lut27d7NkyRI8PT0pX748J0+eJCAggBMnTtzxPMC+ffvo1q0bFy5cwNnZmQoVKnDt2jW2bdvGtm3baNWqFaNHj75rHXZKSgrdu3dn8+bNAPj6+lKqVCnOnDnDwYMHOXjwIOvWrWP58uU4OztTrFgxgoKC2Lt3L1arlYoVK1KkSBGKFSt2xzi7du3i/ffftyXblSpVIjExkf/85z/85z//ISoqiqlTp+Lu7n7LtWFhYcybN49ChQpRvnx5Tp06xa5du9i1axfnzp2jc+fOd4wtIg8hQ0TyhZiYGMNisRgWi8WIiYm5bbvLly8b9erVMywWizFy5MhM1wcFBRkWi8X46quvjOTkZNu5devW2c799NNPtuPJycnGM888Y1gsFmP06NGZrjl37pzRuXNnw2KxGM2bN880huDgYNtY//3vfxuGYRhpaWnGpUuXjLS0NOPpp582LBaLERUVlem6lStXGgEBAUblypUz3eOUKVMMi8Vi1K5d29iwYYPteFJSkjF69GjDYrEY1atXNw4dOmQ7t2LFCtsYmjVrZuzdu9d27j//+Y9Ro0YNw2KxGNOnT7/9QzcMo1WrVobFYjFmzZplpKWl2Y5v2bLFqF69umGxWIwdO3bcsQ/DMIzDhw/bxvP777/ftf3trFy50rBYLEa1atWMZcuWGenp6YZhGEZqaqoxffp0IyAgwAgICDC2bNliu+bmZ9GzZ0/DarUahmEYFy5cyNb5q1evGo0aNTIsFovxySefGPHx8ba+d+7cafuMfPfdd5nGmtHnzb+XefPmGRaLxXj66aeNAwcOZGq/du1ao3Llyll+Nho3bmxYLJZMv3/DMIzJkyfbxp3h0qVLxlNPPWVYLBajV69exqVLl2zn/vjjD9u99OvXL8sYFovFmDhxou3znpycbPTp08f2Gcx4PiLy6FANsYgDMAyDq1evsmnTJrp27crFixcpXLgw77zzjq3N7NmzSUhI4PXXX+ejjz7KtGxbkyZN+PjjjwH45ptvbMf37t3LtWvXKFWqFAMGDMh0TfHixfnwww8BiI6OJj09/ZZx1apVixdeeAEAs9mMp6cnFy5c4Pz58xQtWpTmzZtnav/666/Tpk0bWrZsSUJCAgDXrl1j9uzZAIwYMcJWHw03ygoGDRpEkyZNSE5OZurUqVk+n/Hjx1OtWrVM48r4r+8//vjjts8V4PDhwwC89dZbmM3//Se1QYMGdO3alRYtWpCSknLHPgCuXr1q+97T0/Ou7W8n4/fTq1cv3nrrLduMrJOTE926dSMkJATDMPjqq6+yvL5Pnz4ULFgQuPFCWnbOL126lNOnT/PUU08xcuTITCs61K5dm88//xyA6dOn3/VZbNu2DScnJ3r27EnlypUznWvevDl169YF4K+//rrzg7iDBQsWcPnyZSwWC19++WWm5x0YGMjUqVMxmUysWbOGo0eP3nL9s88+S58+fWyfd2dnZwYMGABAfHx8rsYmInlDJRMi+dDtNujI4OXlxeTJkzOVTGzYsAHgtjWQLVu2ZMSIERw4cIBz585RokQJgoKC+P3337l+/TpOTk63XOPm5gbcKGlITk62/ZyhZs2aWY6tcOHCXLlyhcGDB/P222/bXmaDG0nvzXbu3EliYiLe3t63rVMNCQlh/fr1bNq0ibS0tExjzah1/l8ZNccZifft+Pr6cvToUQYMGMCHH37IE088YUtCe/Xqdcdrb3bzs0lLS8v2dTf766+/iImJwWw2065duyzbdOrUiXnz5rFnzx4uXLiQqbTA09Pzllrrm93u/Pr16wFo0aJFliURDRs2pGjRoly4cIF9+/Zl+XvP8O2335KSkpJlP2lpabYShqSkpNv2cTf
"text/plain": [
"<Figure size 864x864 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plot_correlation(pearson, \"Pearson's Correlation\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"corrs = pearson.loc['SalePrice'].drop('SalePrice')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Show the continuous variables that are weakly and strongly correlated with the sales price."
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1st Flr SF First Floor square feet\n",
"BsmtFin SF 1 Type 1 finished square feet\n",
"Garage Area Size of garage in square feet\n",
"Mas Vnr Area Masonry veneer area in square feet\n",
"Total Bsmt SF Total square feet of basement area\n"
]
}
],
"source": [
"pearson_weakly_correlated = list(corrs[(weak < corrs) & (corrs <= strong)].index)\n",
"print_column_list(pearson_weakly_correlated)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Gr Liv Area Above grade (ground) living area square feet\n"
]
}
],
"source": [
"pearson_strongly_correlated = list(corrs[(strong < corrs)].index)\n",
"print_column_list(pearson_strongly_correlated)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Spearman\n",
"\n",
"Spearman's correlation coefficient shows an ordinal rank relationship between two variables."
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"spearman = df[sorted(DISCRETE_VARIABLES + ORDINAL_VARIABLES) + TARGET_VARIABLE].corr(method=\"spearman\")"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAsYAAAKRCAYAAABX42/6AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzs3Xtcz+f/+PHHO52ckkSSUwflMGJOxebUMBkbm0qrOW0+axuzLZTDlmNTrJE5ayijItnEZsM0TEZ8+MhhSpSSHBJR79L794dfr6/3Kh0N2/N+u3X7vHe9Xtf1ul6v9/v98ezqeV2XSqPRaBBCCCGEEOJfTudpd0AIIYQQQohngQTGQgghhBBCIIGxEEIIIYQQgATGQgghhBBCABIYCyGEEEIIAUhgLIQQQgghBAC6T7sDQjyP9u/fz/bt2zlx4gTXr19HX1+fRo0a0b17d958801eeOGFp91F8Qg7OzsA/P39GT58+BNrvzT6+vo0aNCANm3a4OHhQc+ePau9D+WVmpqKk5MTAPHx8dSuXbva2s7IyCA8PJzffvuNpKQkcnNzqVevHm3atGHIkCEMGTKEGjVqVNv1/m7BwcEsXbqUgQMHsmTJkmppMzExEWtra62yos/TDz/8gK2tbbVcRwhRPhIYC1EBBQUFeHt7s2vXLgAaN26MnZ0d2dnZpKamkpiYyKZNmxgzZgxTp059yr0VfzdbW1vq1KlTrDw7O5vk5GTS09PZu3cvkydP5t13330KPXxyIiIimDt3Lnl5eejo6GBmZkbz5s25cuUKBw4c4MCBA2zYsIFly5bRuHHjp93dpy4zM5N58+aRkpLC1q1bn3Z3hBD/nwTGQlTA119/za5du7C0tOSrr76ibdu2yrHc3Fw2bNhAUFAQISEhNGnSBE9Pz6fYW/FXKpXqibY/Y8YMunfvXuKxGzduMG3aNH799VcWLVpE3759i40UPq8CAwNZs2YNenp6vPfee4wdOxYTExPl+K+//sr8+fM5ffo0o0aNYsuWLdStW/cp9vjp++2339i1axft2rUrdmznzp0ANGvW7O/ulhD/epJjLEQ53bt3j40bNwIPA+RHg2IAQ0NDxo8fj5eXFwArV66ksLDwb++nKK5ohNLc3Pyp9aFBgwYEBgZibGxMYWEhUVFRT60v1enAgQOsXbuWGjVqsHDhQry9vbWCYoA+ffoQGhpK/fr1SU5OJjg4+Cn19vlgbW2NtbU1+vr6T7srQvzrSGAsRDklJydz79499PX1ad26dannjRgxAnj4p9L09PS/q3viMWxsbICyc4GfNCMjIzp27Ag8zC193hUWFjJr1iw0Gg3Dhg3j1VdfLfVcMzMzxo8fD0BkZCT379//u7ophBDlJqkUQpSTru7Dr4tareb333/H0dGxxPPMzc2Jjo7GyMhIK5cyKioKX19fXF1d+fDDDwkICODgwYPk5eVhaWmJm5sbb731Fjo6xX9fVavVfPfdd/zwww8kJSWh0WiwtLTktddew8PDAwMDg2J18vLy2LJlC7t37+b8+fPcuXMHQ0NDLC0tGTRoEB4eHlojUkX9e/vtt3FwcCAwMJCrV69ibm6Ov78/BQUFvPPOOzg5OREQEMCyZcv48ccfyczMpFGjRrzxxht4eXmhq6vLrl27+Pbbbzl//jy6urp069YNb29vrKysivXz7NmzhIWF8ccff3Dt2jUKCgpo0KABnTt3ZuzYscX+1Ozp6cmRI0fYsmUL9+7dY9WqVZw6dYq8vDysrKx46623GDlypNZzbNWqFefOnaN+/fpKmUajYcuWLWzbto2zZ8+iVqsxNTWlc+fOjBo1ig4dOpT2UaiSx6VzXL9+ndDQUH777TdSUlK4f/8+devWpW3btri4uDBw4ECt84smg02ZMkWZEHbo0CGysrJo3LgxAwcO5P333y932sK6devw9/dHT0+P4OBg+vbt+9jzjxw5wuXLlwEYO3Zsme0PGzYMExMTunXrRs2aNbWOZWdns379enbv3s3ly5fR0dHB0tISZ2dnPDw8MDQ01Dq/X79+XLlyhZ9//pmgoCD27duHnp4effv2JSAgoMzjRX755Rc2bdrE//73P+7du4eZmRl9+vRh/PjxNGrUqFzPDSAlJYUNGzZw+PBhrly5glqtxtjYGHt7ezw9PXFwcCjWd4DTp09jZ2eHhYUFe/fuBR4/+W737t2Eh4dz6tQp7t27R8OGDXF0dOS9997D0tJS69xHv9MfffQRS5cuZe/evVy/fp0GDRrQp08fPvzwwwrdpxD/dBIYC1FOVlZWmJmZkZGRwYcffsioUaMYMmRIicFemzZtSm0nMzMTFxcXrl69irW1NYWFhZw+fZqZM2dy4MABvvrqKyUIB8jKyuK9997j5MmT6Ojo0KxZMwwNDTl37hwJCQnExMSwdu1araDvzp07jBo1itOnT1OjRg2aN2+Oubk5V65c4eTJk5w8eZJDhw6xZs2aYv07ceIE4eHhGBsb07JlS65cuYKdnR2nT58GHgYwrq6uymz6Ro0akZqaytKlS7l+/TqmpqYsXbqU+vXrY2lpyZ9//smePXs4ceIEMTExWv3ctm0b06dP58GDB8r5d+/eJTU1lR07dvDTTz/x7bff0rVr12L93L59O2FhYRgYGNCyZUtu3LhBQkICs2fP5uLFi8yYMUM518fHBx8fH636X3zxBeHh4ahUKlq0aEHt2rWV6+7atYslS5bwyiuvlPo+VkZWVhZxcXEAxVYuOXPmDGPGjOHWrVvUqlWLpk2bAg8DrqLJa5999pky6vqoP//8k+XLl3Pv3j3lXpKTk1mzZg2///47ERERWp+pkkRGRvLll1+ip6fH119/XWZQDHD48GEAGjZsWK586fr16/PGG28UK09OTmbMmDGkpaVRo0YNWrVqRWFhIQkJCZw+fZrvv/+etWvX0rBhw2J1J0+ezKlTp7C1teXq1as0adKkXMc1Gg2ff/45ERERyj20atWKixcvEhoaSkxMDKtWraJ9+/Zl3teBAwf48MMPyc3NpW7dujRv3py8vDxSUlL45Zdf2LNnDwsXLuS1114DHr73enp6JCcnU6tWLVq3bl3ivT2qsLCQKVOm8MMPPwAPfwFv1qwZFy9eZOvWrezYsYOFCxcyYMCAYnWvXbvG8OHDuXr1KhYWFrRs2ZI///yTzZs389tvvym/yAshAI0Qotx2796tsbOz09ja2io/ffr00UyZMkWzdetWTUZGRql1t27dqtTp1q2b5siRI8qxQ4cOaV588UWNra2tZsOGDVr1/vOf/2hsbW01rq6umkuXLinlaWlpGnd3d42tra3Gy8tLq46/v7/G1tZWM2jQIE1qaqpSXlBQoFm3bp3Sj//+978l9m/ChAkatVqt0Wg0mhs3bmg0Go3m8OHDyvGePXtqTp06pdRdunSpxtbWVtO6dWuNnZ2d5ttvv9UUFhZqNBqN5uLFi5pu3bppbG1tNevXr1fqZGZmauzt7TW2traatWvXagoKCpRjly9f1gwdOlRja2ureffdd7XuzcPDQ+mHr6+v5s6dO8q9ffnllxpbW1tNmzZtHvtenD9/XmNra6txcHDQnD9/XinPy8vT+Pn5aWxtbTVOTk6l1v+rov4cPny41HMuXbqkvF9du3bVXLt2Tev4sGHDNLa2tppJkyYp96TRaDR37tzRfPbZZxpbW1tN586dlfdFo9FolixZolzbxcVF6/Px6Gc1JiZGKU9JSVHq3L17V6PRaDQxMTGa1q1ba9q2bav56aefyn3fEyZM0Nja2mrGjBlT7jp/pVarNQMHDtTY2tpqPDw8NOnp6cqxpKQkzZAhQzS2trYad3d3rXp9+/bV2Nraal544QVNfHy80lbRsyvreEhIiMbW1lbz0ksvaQ4dOqS0m5OTo3wGevfurfVeFD3vCRMmKGV5eXmal156SWNra6uZP3++Ji8vTzmWmZmpGT16tPJdfFTR923YsGHFnknR+3Pu3DmlLDg4WPkM7N27Vym/f/++Zv78+RpbW1tN+/btteo8+p0eMGCA5n//+59yLD4+Xvn
"text/plain": [
"<Figure size 864x864 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plot_correlation(spearman, \"Spearman's Rank Correlation\")"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"corrs = spearman.loc['SalePrice'].drop('SalePrice')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Show the discrete and ordinal variables that are weakly and strongly correlated with the sales price."
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Bsmt Exposure Refers to walkout or garden level walls\n",
"BsmtFin Type 1 Rating of basement finished area\n",
"Fireplace Qu Fireplace quality\n",
"Fireplaces Number of fireplaces\n",
"Full Bath Full bathrooms above grade\n",
"Garage Cond Garage condition\n",
"Garage Finish Interior finish of the garage\n",
"Garage Qual Garage quality\n",
"Half Bath Half baths above grade\n",
"Heating QC Heating quality and condition\n",
"Paved Drive Paved driveway\n",
"TotRms AbvGrd Total rooms above grade (does not include bathrooms)\n",
"Year Remod/Add Remodel date (same as construction date if no remodeling or additions)\n"
]
}
],
"source": [
"spearman_weakly_correlated = list(corrs[(weak < corrs) & (corrs <= strong)].index)\n",
"print_column_list(spearman_weakly_correlated)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Bsmt Qual Evaluates the height of the basement\n",
"Exter Qual Evaluates the quality of the material on the exterior\n",
"Garage Cars Size of garage in car capacity\n",
"Kitchen Qual Kitchen quality\n",
"Overall Qual Rates the overall material and finish of the house\n",
"Year Built Original construction date\n"
]
}
],
"source": [
"spearman_strongly_correlated = list(corrs[(strong < corrs)].index)\n",
"print_column_list(spearman_strongly_correlated)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Save the weakly and strongly correlated Variables"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"with open(\"weakly_and_strongly_correlated_variables.json\", \"w\") as file:\n",
" file.write(json.dumps({\n",
" \"weakly_correlated\": sorted(\n",
" pearson_weakly_correlated + spearman_weakly_correlated\n",
" ),\n",
" \"strongly_correlated\": sorted(\n",
" pearson_strongly_correlated + spearman_strongly_correlated\n",
" ),\n",
" }))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}