ames-housing/3_descriptive_visualizations.ipynb

5015 lines
1.9 MiB
Text
Raw Normal View History

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Descriptive Visualizations\n",
"\n",
"The purpose of this notebook is to visually examine the nominal features, discard the useless ones among them, and create new factor variables.\n",
"\n",
"The \"main\" plot used in this notebook is *Gr Liv Area* vs. *SalePrice* as the overall living area is the most correlated predictor (which is also very intuitive). Many of the nominal variables change the slopes of the regression lines for sub-groups of data points significantly."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## \"Housekeeping\""
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2018-09-05 18:04:02 CEST\n",
"\n",
"CPython 3.6.5\n",
"IPython 6.5.0\n",
"\n",
"matplotlib 3.0.0rc2\n",
"numpy 1.15.1\n",
"pandas 0.23.4\n",
"seaborn 0.9.0\n"
]
}
],
"source": [
"% load_ext watermark\n",
"% watermark -d -t -v -z -p matplotlib,numpy,pandas,seaborn"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"import seaborn as sns\n",
"\n",
"from sklearn.ensemble import IsolationForest\n",
"\n",
"from utils import (\n",
" ALL_COLUMNS,\n",
" NOMINAL_VARIABLES,\n",
" TARGET_VARIABLES,\n",
" load_clean_data,\n",
" encode_ordinals,\n",
" print_column_list,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"% matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"random_state = np.random.RandomState(42)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"pd.set_option(\"display.max_columns\", 120)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"sns.set_style(\"white\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load the Data\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"df = load_clean_data(\"data/data_clean_with_transformations.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(2898, 83)"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>1st Flr SF</th>\n",
" <th>2nd Flr SF</th>\n",
" <th>3Ssn Porch</th>\n",
" <th>Alley</th>\n",
" <th>Bedroom AbvGr</th>\n",
" <th>Bldg Type</th>\n",
" <th>Bsmt Cond</th>\n",
" <th>Bsmt Exposure</th>\n",
" <th>Bsmt Full Bath</th>\n",
" <th>Bsmt Half Bath</th>\n",
" <th>Bsmt Qual</th>\n",
" <th>Bsmt Unf SF</th>\n",
" <th>BsmtFin SF 1</th>\n",
" <th>BsmtFin SF 2</th>\n",
" <th>BsmtFin Type 1</th>\n",
" <th>BsmtFin Type 2</th>\n",
" <th>Central Air</th>\n",
" <th>Condition 1</th>\n",
" <th>Condition 2</th>\n",
" <th>Electrical</th>\n",
" <th>Enclosed Porch</th>\n",
" <th>Exter Cond</th>\n",
" <th>Exter Qual</th>\n",
" <th>Exterior 1st</th>\n",
" <th>Exterior 2nd</th>\n",
" <th>Fence</th>\n",
" <th>Fireplace Qu</th>\n",
" <th>Fireplaces</th>\n",
" <th>Foundation</th>\n",
" <th>Full Bath</th>\n",
" <th>Functional</th>\n",
" <th>Garage Area</th>\n",
" <th>Garage Cars</th>\n",
" <th>Garage Cond</th>\n",
" <th>Garage Finish</th>\n",
" <th>Garage Qual</th>\n",
" <th>Garage Type</th>\n",
" <th>Gr Liv Area</th>\n",
" <th>Half Bath</th>\n",
" <th>Heating</th>\n",
" <th>Heating QC</th>\n",
" <th>House Style</th>\n",
" <th>Kitchen AbvGr</th>\n",
" <th>Kitchen Qual</th>\n",
" <th>Land Contour</th>\n",
" <th>Land Slope</th>\n",
" <th>Lot Area</th>\n",
" <th>Lot Config</th>\n",
" <th>Lot Shape</th>\n",
" <th>Low Qual Fin SF</th>\n",
" <th>MS SubClass</th>\n",
" <th>MS Zoning</th>\n",
" <th>Mas Vnr Area</th>\n",
" <th>Mas Vnr Type</th>\n",
" <th>Misc Feature</th>\n",
" <th>Misc Val</th>\n",
" <th>Mo Sold</th>\n",
" <th>Neighborhood</th>\n",
" <th>Open Porch SF</th>\n",
" <th>Overall Cond</th>\n",
" <th>Overall Qual</th>\n",
" <th>Paved Drive</th>\n",
" <th>Pool Area</th>\n",
" <th>Pool QC</th>\n",
" <th>Roof Matl</th>\n",
" <th>Roof Style</th>\n",
" <th>Sale Condition</th>\n",
" <th>Sale Type</th>\n",
" <th>Screen Porch</th>\n",
" <th>Street</th>\n",
" <th>TotRms AbvGrd</th>\n",
" <th>Total Bath</th>\n",
" <th>Total Bsmt SF</th>\n",
" <th>Total Porch SF</th>\n",
" <th>Total SF</th>\n",
" <th>Total SF (box-cox-0.0)</th>\n",
" <th>Utilities</th>\n",
" <th>Wood Deck SF</th>\n",
" <th>Year Built</th>\n",
" <th>Year Remod/Add</th>\n",
" <th>Yr Sold</th>\n",
" <th>SalePrice</th>\n",
" <th>SalePrice (box-cox-0.0)</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>1656.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>3</td>\n",
" <td>1Fam</td>\n",
" <td>Gd</td>\n",
" <td>Gd</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>TA</td>\n",
" <td>441.0</td>\n",
" <td>639.0</td>\n",
" <td>0.0</td>\n",
" <td>BLQ</td>\n",
" <td>Unf</td>\n",
" <td>Y</td>\n",
" <td>Norm</td>\n",
" <td>Norm</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>TA</td>\n",
" <td>TA</td>\n",
" <td>BrkFace</td>\n",
" <td>Plywood</td>\n",
" <td>NA</td>\n",
" <td>Gd</td>\n",
" <td>2</td>\n",
" <td>CBlock</td>\n",
" <td>1</td>\n",
" <td>Typ</td>\n",
" <td>528.0</td>\n",
" <td>2</td>\n",
" <td>TA</td>\n",
" <td>Fin</td>\n",
" <td>TA</td>\n",
" <td>Attchd</td>\n",
" <td>1656.0</td>\n",
" <td>0</td>\n",
" <td>GasA</td>\n",
" <td>Fa</td>\n",
" <td>1Story</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Lvl</td>\n",
" <td>Gtl</td>\n",
" <td>31770.0</td>\n",
" <td>Corner</td>\n",
" <td>IR1</td>\n",
" <td>0.0</td>\n",
" <td>020</td>\n",
" <td>RL</td>\n",
" <td>112.0</td>\n",
" <td>Stone</td>\n",
" <td>NA</td>\n",
" <td>0.0</td>\n",
" <td>5</td>\n",
" <td>Names</td>\n",
" <td>62.0</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>P</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>CompShg</td>\n",
" <td>Hip</td>\n",
" <td>Normal</td>\n",
" <td>WD</td>\n",
" <td>0.0</td>\n",
" <td>Pave</td>\n",
" <td>7</td>\n",
" <td>2.0</td>\n",
" <td>1080.0</td>\n",
" <td>272.0</td>\n",
" <td>2736.0</td>\n",
" <td>7.914252</td>\n",
" <td>AllPub</td>\n",
" <td>210.0</td>\n",
" <td>1960</td>\n",
" <td>1960</td>\n",
" <td>2010</td>\n",
" <td>215000.0</td>\n",
" <td>12.278393</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>896.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>2</td>\n",
" <td>1Fam</td>\n",
" <td>TA</td>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>TA</td>\n",
" <td>270.0</td>\n",
" <td>468.0</td>\n",
" <td>144.0</td>\n",
" <td>Rec</td>\n",
" <td>LwQ</td>\n",
" <td>Y</td>\n",
" <td>Feedr</td>\n",
" <td>Norm</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>TA</td>\n",
" <td>TA</td>\n",
" <td>VinylSd</td>\n",
" <td>VinylSd</td>\n",
" <td>MnPrv</td>\n",
" <td>NA</td>\n",
" <td>0</td>\n",
" <td>CBlock</td>\n",
" <td>1</td>\n",
" <td>Typ</td>\n",
" <td>730.0</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Unf</td>\n",
" <td>TA</td>\n",
" <td>Attchd</td>\n",
" <td>896.0</td>\n",
" <td>0</td>\n",
" <td>GasA</td>\n",
" <td>TA</td>\n",
" <td>1Story</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Lvl</td>\n",
" <td>Gtl</td>\n",
" <td>11622.0</td>\n",
" <td>Inside</td>\n",
" <td>Reg</td>\n",
" <td>0.0</td>\n",
" <td>020</td>\n",
" <td>RH</td>\n",
" <td>0.0</td>\n",
" <td>None</td>\n",
" <td>NA</td>\n",
" <td>0.0</td>\n",
" <td>6</td>\n",
" <td>Names</td>\n",
" <td>0.0</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>Y</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>CompShg</td>\n",
" <td>Gable</td>\n",
" <td>Normal</td>\n",
" <td>WD</td>\n",
" <td>120.0</td>\n",
" <td>Pave</td>\n",
" <td>5</td>\n",
" <td>1.0</td>\n",
" <td>882.0</td>\n",
" <td>260.0</td>\n",
" <td>1778.0</td>\n",
" <td>7.483244</td>\n",
" <td>AllPub</td>\n",
" <td>140.0</td>\n",
" <td>1961</td>\n",
" <td>1961</td>\n",
" <td>2010</td>\n",
" <td>105000.0</td>\n",
" <td>11.561716</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>1329.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>3</td>\n",
" <td>1Fam</td>\n",
" <td>TA</td>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>TA</td>\n",
" <td>406.0</td>\n",
" <td>923.0</td>\n",
" <td>0.0</td>\n",
" <td>ALQ</td>\n",
" <td>Unf</td>\n",
" <td>Y</td>\n",
" <td>Norm</td>\n",
" <td>Norm</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>TA</td>\n",
" <td>TA</td>\n",
" <td>Wd Sdng</td>\n",
" <td>Wd Sdng</td>\n",
" <td>NA</td>\n",
" <td>NA</td>\n",
" <td>0</td>\n",
" <td>CBlock</td>\n",
" <td>1</td>\n",
" <td>Typ</td>\n",
" <td>312.0</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Unf</td>\n",
" <td>TA</td>\n",
" <td>Attchd</td>\n",
" <td>1329.0</td>\n",
" <td>1</td>\n",
" <td>GasA</td>\n",
" <td>TA</td>\n",
" <td>1Story</td>\n",
" <td>1</td>\n",
" <td>Gd</td>\n",
" <td>Lvl</td>\n",
" <td>Gtl</td>\n",
" <td>14267.0</td>\n",
" <td>Corner</td>\n",
" <td>IR1</td>\n",
" <td>0.0</td>\n",
" <td>020</td>\n",
" <td>RL</td>\n",
" <td>108.0</td>\n",
" <td>BrkFace</td>\n",
" <td>Gar2</td>\n",
" <td>12500.0</td>\n",
" <td>6</td>\n",
" <td>Names</td>\n",
" <td>36.0</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>Y</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>CompShg</td>\n",
" <td>Hip</td>\n",
" <td>Normal</td>\n",
" <td>WD</td>\n",
" <td>0.0</td>\n",
" <td>Pave</td>\n",
" <td>6</td>\n",
" <td>1.5</td>\n",
" <td>1329.0</td>\n",
" <td>429.0</td>\n",
" <td>2658.0</td>\n",
" <td>7.885329</td>\n",
" <td>AllPub</td>\n",
" <td>393.0</td>\n",
" <td>1958</td>\n",
" <td>1958</td>\n",
" <td>2010</td>\n",
" <td>172000.0</td>\n",
" <td>12.055250</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>2110.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>3</td>\n",
" <td>1Fam</td>\n",
" <td>TA</td>\n",
" <td>No</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>TA</td>\n",
" <td>1045.0</td>\n",
" <td>1065.0</td>\n",
" <td>0.0</td>\n",
" <td>ALQ</td>\n",
" <td>Unf</td>\n",
" <td>Y</td>\n",
" <td>Norm</td>\n",
" <td>Norm</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>TA</td>\n",
" <td>Gd</td>\n",
" <td>BrkFace</td>\n",
" <td>BrkFace</td>\n",
" <td>NA</td>\n",
" <td>TA</td>\n",
" <td>2</td>\n",
" <td>CBlock</td>\n",
" <td>2</td>\n",
" <td>Typ</td>\n",
" <td>522.0</td>\n",
" <td>2</td>\n",
" <td>TA</td>\n",
" <td>Fin</td>\n",
" <td>TA</td>\n",
" <td>Attchd</td>\n",
" <td>2110.0</td>\n",
" <td>1</td>\n",
" <td>GasA</td>\n",
" <td>Ex</td>\n",
" <td>1Story</td>\n",
" <td>1</td>\n",
" <td>Ex</td>\n",
" <td>Lvl</td>\n",
" <td>Gtl</td>\n",
" <td>11160.0</td>\n",
" <td>Corner</td>\n",
" <td>Reg</td>\n",
" <td>0.0</td>\n",
" <td>020</td>\n",
" <td>RL</td>\n",
" <td>0.0</td>\n",
" <td>None</td>\n",
" <td>NA</td>\n",
" <td>0.0</td>\n",
" <td>4</td>\n",
" <td>Names</td>\n",
" <td>0.0</td>\n",
" <td>5</td>\n",
" <td>7</td>\n",
" <td>Y</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>CompShg</td>\n",
" <td>Hip</td>\n",
" <td>Normal</td>\n",
" <td>WD</td>\n",
" <td>0.0</td>\n",
" <td>Pave</td>\n",
" <td>8</td>\n",
" <td>3.5</td>\n",
" <td>2110.0</td>\n",
" <td>0.0</td>\n",
" <td>4220.0</td>\n",
" <td>8.347590</td>\n",
" <td>AllPub</td>\n",
" <td>0.0</td>\n",
" <td>1968</td>\n",
" <td>1968</td>\n",
" <td>2010</td>\n",
" <td>244000.0</td>\n",
" <td>12.404924</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>928.0</td>\n",
" <td>701.0</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>3</td>\n",
" <td>1Fam</td>\n",
" <td>TA</td>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>Gd</td>\n",
" <td>137.0</td>\n",
" <td>791.0</td>\n",
" <td>0.0</td>\n",
" <td>GLQ</td>\n",
" <td>Unf</td>\n",
" <td>Y</td>\n",
" <td>Norm</td>\n",
" <td>Norm</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>TA</td>\n",
" <td>TA</td>\n",
" <td>VinylSd</td>\n",
" <td>VinylSd</td>\n",
" <td>MnPrv</td>\n",
" <td>TA</td>\n",
" <td>1</td>\n",
" <td>PConc</td>\n",
" <td>2</td>\n",
" <td>Typ</td>\n",
" <td>482.0</td>\n",
" <td>2</td>\n",
" <td>TA</td>\n",
" <td>Fin</td>\n",
" <td>TA</td>\n",
" <td>Attchd</td>\n",
" <td>1629.0</td>\n",
" <td>1</td>\n",
" <td>GasA</td>\n",
" <td>Gd</td>\n",
" <td>2Story</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Lvl</td>\n",
" <td>Gtl</td>\n",
" <td>13830.0</td>\n",
" <td>Inside</td>\n",
" <td>IR1</td>\n",
" <td>0.0</td>\n",
" <td>060</td>\n",
" <td>RL</td>\n",
" <td>0.0</td>\n",
" <td>None</td>\n",
" <td>NA</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>Gilbert</td>\n",
" <td>34.0</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>Y</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>CompShg</td>\n",
" <td>Gable</td>\n",
" <td>Normal</td>\n",
" <td>WD</td>\n",
" <td>0.0</td>\n",
" <td>Pave</td>\n",
" <td>6</td>\n",
" <td>2.5</td>\n",
" <td>928.0</td>\n",
" <td>246.0</td>\n",
" <td>2557.0</td>\n",
" <td>7.846590</td>\n",
" <td>AllPub</td>\n",
" <td>212.0</td>\n",
" <td>1997</td>\n",
" <td>1998</td>\n",
" <td>2010</td>\n",
" <td>189900.0</td>\n",
" <td>12.154253</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 1st Flr SF 2nd Flr SF 3Ssn Porch Alley Bedroom AbvGr \\\n",
"Order PID \n",
"1 526301100 1656.0 0.0 0.0 NA 3 \n",
"2 526350040 896.0 0.0 0.0 NA 2 \n",
"3 526351010 1329.0 0.0 0.0 NA 3 \n",
"4 526353030 2110.0 0.0 0.0 NA 3 \n",
"5 527105010 928.0 701.0 0.0 NA 3 \n",
"\n",
" Bldg Type Bsmt Cond Bsmt Exposure Bsmt Full Bath \\\n",
"Order PID \n",
"1 526301100 1Fam Gd Gd 1 \n",
"2 526350040 1Fam TA No 0 \n",
"3 526351010 1Fam TA No 0 \n",
"4 526353030 1Fam TA No 1 \n",
"5 527105010 1Fam TA No 0 \n",
"\n",
" Bsmt Half Bath Bsmt Qual Bsmt Unf SF BsmtFin SF 1 \\\n",
"Order PID \n",
"1 526301100 0 TA 441.0 639.0 \n",
"2 526350040 0 TA 270.0 468.0 \n",
"3 526351010 0 TA 406.0 923.0 \n",
"4 526353030 0 TA 1045.0 1065.0 \n",
"5 527105010 0 Gd 137.0 791.0 \n",
"\n",
" BsmtFin SF 2 BsmtFin Type 1 BsmtFin Type 2 Central Air \\\n",
"Order PID \n",
"1 526301100 0.0 BLQ Unf Y \n",
"2 526350040 144.0 Rec LwQ Y \n",
"3 526351010 0.0 ALQ Unf Y \n",
"4 526353030 0.0 ALQ Unf Y \n",
"5 527105010 0.0 GLQ Unf Y \n",
"\n",
" Condition 1 Condition 2 Electrical Enclosed Porch Exter Cond \\\n",
"Order PID \n",
"1 526301100 Norm Norm SBrkr 0.0 TA \n",
"2 526350040 Feedr Norm SBrkr 0.0 TA \n",
"3 526351010 Norm Norm SBrkr 0.0 TA \n",
"4 526353030 Norm Norm SBrkr 0.0 TA \n",
"5 527105010 Norm Norm SBrkr 0.0 TA \n",
"\n",
" Exter Qual Exterior 1st Exterior 2nd Fence Fireplace Qu \\\n",
"Order PID \n",
"1 526301100 TA BrkFace Plywood NA Gd \n",
"2 526350040 TA VinylSd VinylSd MnPrv NA \n",
"3 526351010 TA Wd Sdng Wd Sdng NA NA \n",
"4 526353030 Gd BrkFace BrkFace NA TA \n",
"5 527105010 TA VinylSd VinylSd MnPrv TA \n",
"\n",
" Fireplaces Foundation Full Bath Functional Garage Area \\\n",
"Order PID \n",
"1 526301100 2 CBlock 1 Typ 528.0 \n",
"2 526350040 0 CBlock 1 Typ 730.0 \n",
"3 526351010 0 CBlock 1 Typ 312.0 \n",
"4 526353030 2 CBlock 2 Typ 522.0 \n",
"5 527105010 1 PConc 2 Typ 482.0 \n",
"\n",
" Garage Cars Garage Cond Garage Finish Garage Qual \\\n",
"Order PID \n",
"1 526301100 2 TA Fin TA \n",
"2 526350040 1 TA Unf TA \n",
"3 526351010 1 TA Unf TA \n",
"4 526353030 2 TA Fin TA \n",
"5 527105010 2 TA Fin TA \n",
"\n",
" Garage Type Gr Liv Area Half Bath Heating Heating QC \\\n",
"Order PID \n",
"1 526301100 Attchd 1656.0 0 GasA Fa \n",
"2 526350040 Attchd 896.0 0 GasA TA \n",
"3 526351010 Attchd 1329.0 1 GasA TA \n",
"4 526353030 Attchd 2110.0 1 GasA Ex \n",
"5 527105010 Attchd 1629.0 1 GasA Gd \n",
"\n",
" House Style Kitchen AbvGr Kitchen Qual Land Contour \\\n",
"Order PID \n",
"1 526301100 1Story 1 TA Lvl \n",
"2 526350040 1Story 1 TA Lvl \n",
"3 526351010 1Story 1 Gd Lvl \n",
"4 526353030 1Story 1 Ex Lvl \n",
"5 527105010 2Story 1 TA Lvl \n",
"\n",
" Land Slope Lot Area Lot Config Lot Shape Low Qual Fin SF \\\n",
"Order PID \n",
"1 526301100 Gtl 31770.0 Corner IR1 0.0 \n",
"2 526350040 Gtl 11622.0 Inside Reg 0.0 \n",
"3 526351010 Gtl 14267.0 Corner IR1 0.0 \n",
"4 526353030 Gtl 11160.0 Corner Reg 0.0 \n",
"5 527105010 Gtl 13830.0 Inside IR1 0.0 \n",
"\n",
" MS SubClass MS Zoning Mas Vnr Area Mas Vnr Type Misc Feature \\\n",
"Order PID \n",
"1 526301100 020 RL 112.0 Stone NA \n",
"2 526350040 020 RH 0.0 None NA \n",
"3 526351010 020 RL 108.0 BrkFace Gar2 \n",
"4 526353030 020 RL 0.0 None NA \n",
"5 527105010 060 RL 0.0 None NA \n",
"\n",
" Misc Val Mo Sold Neighborhood Open Porch SF Overall Cond \\\n",
"Order PID \n",
"1 526301100 0.0 5 Names 62.0 5 \n",
"2 526350040 0.0 6 Names 0.0 6 \n",
"3 526351010 12500.0 6 Names 36.0 6 \n",
"4 526353030 0.0 4 Names 0.0 5 \n",
"5 527105010 0.0 3 Gilbert 34.0 5 \n",
"\n",
" Overall Qual Paved Drive Pool Area Pool QC Roof Matl \\\n",
"Order PID \n",
"1 526301100 6 P 0.0 NA CompShg \n",
"2 526350040 5 Y 0.0 NA CompShg \n",
"3 526351010 6 Y 0.0 NA CompShg \n",
"4 526353030 7 Y 0.0 NA CompShg \n",
"5 527105010 5 Y 0.0 NA CompShg \n",
"\n",
" Roof Style Sale Condition Sale Type Screen Porch Street \\\n",
"Order PID \n",
"1 526301100 Hip Normal WD 0.0 Pave \n",
"2 526350040 Gable Normal WD 120.0 Pave \n",
"3 526351010 Hip Normal WD 0.0 Pave \n",
"4 526353030 Hip Normal WD 0.0 Pave \n",
"5 527105010 Gable Normal WD 0.0 Pave \n",
"\n",
" TotRms AbvGrd Total Bath Total Bsmt SF Total Porch SF \\\n",
"Order PID \n",
"1 526301100 7 2.0 1080.0 272.0 \n",
"2 526350040 5 1.0 882.0 260.0 \n",
"3 526351010 6 1.5 1329.0 429.0 \n",
"4 526353030 8 3.5 2110.0 0.0 \n",
"5 527105010 6 2.5 928.0 246.0 \n",
"\n",
" Total SF Total SF (box-cox-0.0) Utilities Wood Deck SF \\\n",
"Order PID \n",
"1 526301100 2736.0 7.914252 AllPub 210.0 \n",
"2 526350040 1778.0 7.483244 AllPub 140.0 \n",
"3 526351010 2658.0 7.885329 AllPub 393.0 \n",
"4 526353030 4220.0 8.347590 AllPub 0.0 \n",
"5 527105010 2557.0 7.846590 AllPub 212.0 \n",
"\n",
" Year Built Year Remod/Add Yr Sold SalePrice \\\n",
"Order PID \n",
"1 526301100 1960 1960 2010 215000.0 \n",
"2 526350040 1961 1961 2010 105000.0 \n",
"3 526351010 1958 1958 2010 172000.0 \n",
"4 526353030 1968 1968 2010 244000.0 \n",
"5 527105010 1997 1998 2010 189900.0 \n",
"\n",
" SalePrice (box-cox-0.0) \n",
"Order PID \n",
"1 526301100 12.278393 \n",
"2 526350040 11.561716 \n",
"3 526351010 12.055250 \n",
"4 526353030 12.404924 \n",
"5 527105010 12.154253 "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Newly created variables are collected in the *new_variables* list."
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"new_variables = []"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Throughout this notebook predictors that have \"interesting\" visual patterns are collected in the *interesting_variables* list. Together with the previously identified predictors weakly and strongly correlated with the price, a naive feature selection will be done in the next notebooks."
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"interesting_variables = []"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Derived Characteristics\n",
"\n",
"Certain characteristics of a house are assumed to have a \"binary\" influence on the sales price. For example, the existence of a pool could be an important predictor while the exact size of the pool can be deemed not so important.\n",
"\n",
"The below cell creates boolean factor variables out of a set of numeric variables."
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"derived_variables = {\n",
" \"has 2nd Flr\": \"2nd Flr SF\",\n",
" \"has Bsmt\": \"Total Bsmt SF\",\n",
" \"has Fireplace\": \"Fireplaces\",\n",
" \"has Garage\": \"Garage Area\",\n",
" \"has Pool\": \"Pool Area\",\n",
" \"has Porch\": \"Total Porch SF\",\n",
"}\n",
"# Factorize numeric columns.\n",
"for factor_column, column in derived_variables.items():\n",
" df[factor_column] = df[column].apply(lambda x: 1 if x > 0 else 0)\n",
"derived_variables = list(derived_variables.keys())"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"new_variables.extend(derived_variables)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>has 2nd Flr</th>\n",
" <th>has Bsmt</th>\n",
" <th>has Fireplace</th>\n",
" <th>has Garage</th>\n",
" <th>has Pool</th>\n",
" <th>has Porch</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" has 2nd Flr has Bsmt has Fireplace has Garage has Pool \\\n",
"Order PID \n",
"1 526301100 0 1 1 1 0 \n",
"2 526350040 0 1 0 1 0 \n",
"3 526351010 0 1 0 1 0 \n",
"4 526353030 0 1 1 1 0 \n",
"5 527105010 1 1 1 1 0 \n",
"\n",
" has Porch \n",
"Order PID \n",
"1 526301100 1 \n",
"2 526350040 1 \n",
"3 526351010 1 \n",
"4 526353030 0 \n",
"5 527105010 1 "
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[derived_variables].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2nd Floors\n",
"\n",
"A second floor clearly has a positive effect on the sales price. However, having a second floor correlates with overall living space. The individual effect is therefore not as clear as it seems in the plot below."
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEICAYAAACXo2mmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzs3Xd4lFXawOHf9J6eAElISOhVCF1CB1EURCyAZVXEghRRUBRXED9XUVdQcFXsK4gKgh1hBUQIKCBNCEVqQgiQXmYymfp+f5yQEGmBJCTAua/Li+HNmXdOYphnTnmeo1IURUGSJEmSqoC6pjsgSZIkXTlkUJEkSZKqjAwqkiRJUpWRQUWSJEmqMjKoSJIkSVVGBhVJkiSpysigIkmSJFUZGVQkSZKkKiODiiRJklRltDXdgUutc+fOREVF1XQ3JEmSLitHjx5lw4YN52131QWVqKgolixZUtPdkCRJuqwMHTq0Qu3k9JckSZJUZWRQkSRJkqqMDCqSJElSlbnq1lTOxOPxkJaWRnFxcU135YpiNBqJjo5Gp9PVdFckSbpEZFAB0tLSsNlsNGjQAJVKVdPduSIoikJ2djZpaWnExcXVdHckSbpE5PQXUFxcTGhoqAwoVUilUhEaGipHf5J0lZEjlRIyoFQ9+TOVqo0zF+yZoDeDKUT8KdUKcqQiSdLlJ/kb+E9HePMaKDha072RTiFHKrXAkiVLOHjwIJMmTar0vTweD1OmTOHo0aO43W5Gjx5N3759K/TcO+64g5kzZxIdHV16bc6cOfzwww9ERESUXnvyySdZsGABAwcOpEePHpXusyRdEJ8XjpRkdvu9kHMIwhqfvb2igBw1XzIyqFxhvvvuO4KCgnjttdfIy8tjyJAhFQ4qZ3PfffcxYsSIctcWLFhQqXtK0kXTaKH3s2DPgIBIiEo4cztHNmz+CNxF0PVRsIRf2n5epWRQqSW2b9/OyJEjycnJYcSIEQwbNoxly5bx2Wef4fV6UalUvPXWWwBMmDABRVFwuVxMnz6d5s2bl97n+uuvZ8CAAYDYgaXRaAC45557aNasGfv27cNut/Pmm28SFRXFrFmzWLt2LXXr1iU3N/eC+71kyRIWL16M3+9n/PjxdO3atQp+GpJ0HkH14faPQa0FveXMbQ6sglUvisd6C/So/EyAdH4yqNQSWq2WDz/8kKNHj/LQQw8xbNgwDh8+zHvvvYfJZGLq1KkkJSUREBBAUFAQr776Kvv376eoqKjcfSwW8Q/Mbrczfvx4JkyYUPq1Nm3a8OyzzzJr1ix+/PFHunbtyqZNm/jqq68oKiriuuuuO2PfPvnkE5YuXQpAkyZNeO6558p9PSAggHfeeacqfxySdH7GwHN/3Rxc9liOUi4ZGVRqiRYtWqBSqQgPDy/dhhsaGsrkyZOxWCwcPHiQtm3b0qNHDw4fPsyjjz6KVqtl9OjRp93r2LFjjBkzhjvvvJNBgwaVew2AunXrkpWVxeHDh2nVqhVqtRqr1UqTJk3O2LczTX+dSuahSLVSZHu4byl4nRDZrqZ7c9WQQaWW+Pv228LCQmbPns3q1asBuP/++1EUhQ0bNhAREcFHH33E1q1bmTlzJvPmzSt9XlZWFiNHjmTq1KnnnYpq1KgRn332GX6/n+LiYvbv339RfVer5SZCqRYyB0ODbjXdi6uODCq1lNVqJSEhgWHDhqHVagkICCAjI4M+ffrwxBNP8Pnnn+P1ehkzZky557377rsUFBTw9ttv8/bbbwPw/vvvn/E1mjdvTo8ePbjtttuIiIggNDS02r8vSZKubCpFUZSa7sSlNHTo0NPOU9m9e3e5xW6p6sifrSRdGc703nkmct5CkiRJqjIyqEiSJElVRgYVSZIkqcrIoCJJkiRVmWrZ/bVkyRK+/vprAFwuF7t372bevHn861//QqPRkJiYyNixY/H7/Tz//PPs3bsXvV7Piy++SGxsLNu2batUW0mSJKmGKNXs+eefV7744gtl8ODBSkpKiuL3+5VRo0YpycnJyvLly5XJkycriqIoW7duVR555BFFUZRKtz2XW2655bRru3btqspvWTqF/NlK0pXhTO+dZ1Kt0187duxg//793HjjjbjdbmJiYlCpVCQmJrJ+/Xo2b95M9+7dAWjbti07d+7EbrdXuu3lyO/3M3XqVIYNG8Y999xDSkpKTXdJkiTpglVrUJk7dy5jxozBbrdjtVpLr1ssFgoLC0+7rtFoqqRtdftm61G6zVhF3NM/0m3GKr7ZWvnzHFasWIHb7ebLL79k4sSJzJgxowp6KkmSdGlVW0Z9QUEBhw4dokuXLtjtdhwOR+nXHA4HAQEBFBcXl7vu9/uxWq2Vbludvtl6lGeW7MDp8QFwNM/JM0t2ADCkXdRF3/dMIzFJkqTLTbWNVDZt2lRae8pqtaLT6UhNTUVRFJKSkujQoQMJCQmsWbMGgG3bttGkSZMqaVudXlu+tzSgnOT0+Hht+d5K3fdMIzGv11upe0qSJF1q1TZSOXToULkTBKdPn86kSZPw+XwkJiZyzTXX0Lp1a9atW8fw4cNRFIWXXnqpStpWp/Q85wVdr6i/j7r8fj9arSzNJknS5aXa3rVGjRpV7u9t27Zl4cKF5a6p1WpeeOGF055b2bbVKTLIxNEzBJDIIFOl7puQkMAvv/zCwIEDS0dikiRJlxuZ/HiBnhzQFJNOU+6aSafhyQFNK3Xf/v37o9frGT58OC+//DLPPPNMpe4nSZJUE+T8ygU6uRj/2vK9pOc5iQwy8eSAppVapIezj8QkSZIuJzKoXIQh7aIqHUQkSZKuRHL6S5IkSaoyMqhIkiRJVUYGFUmSJKnKyKAiSZIkVRkZVCRJkqQqI4NKLbJ9+3buueeemu6GJEnSRZNbii/Gnwth5QuQnwaB0dB3KrS5o1K3fP/99/nuu+8wmSqXmS9JklST5EjlQv25EL4fD/lHAEX8+f14cb0SYmJimDNnTtX0UZIkqYbIoHKhVr4Anr/V/vI4xfVKGDBggCwgKUnSZU8GlQuVn3Zh1yXpcuZz13QPpMuMDCoXKjD6wq5L0uXIVQi7v4evH4HMPeD313SPpMuEDCoXqu9U0P1tMV1nEtcl6UrhzIOF98DOxfDFXVCUVdM9ki4TMqhcqDZ3wKDZEFgfUIk/B82u9O4vgOjo6Et6Nox0OkVRyCgsJrOwGJ9fqenu1ByNFvQ28dhWD9Sac7eXpBJyZfhitLmjSoKIVHOKPT7yijyoVBBq0aPViM9XqTlF3Pbub/j9Cgsf6UrDcOt57gQenx+tWoVKparubl865nAYvQ5O7IKoBDCH1nSPpMuEHKlIV6U9xwro8eov9H39V1Jzikqvr9idQWahi2yHm++2pZ/3PqnZDiYu3M7CP46Q7/RUZ5cvLY0WgmKg6fVgjajp3kiXkWobqcydO5dVq1bh8XgYMWIEnTp14umnn0alUtG4cWOmTZuGWq3mrbfeYvXq1Wi1WqZMmUKbNm1ISUmpdNsLpSjKlfVJsxZQlNo7ffT9n8dw+/y4fX42HMohvmRE0rtpOG+u0OLzKwxsXe+c98gtcvP4wu1sTsnlu+3pdI4PJdCkuxTdl6Raq1pGKhs2bGDr1q18/vnnzJs3j+PHj/Pyyy8zYcIEFixYgKIorFy5kuTkZDZu3MiiRYuYOXMm06dPB6h02wtlNBrJzs6u1W+ClxtFUcjOzsZoNNZ0V87ors4xRAWZaBhupVeT8NLrsSFmVjzRk18m9SI+3HLOe+jUKqKCxPdn0KoxauXAX5KqZaSSlJREkyZNGDNmDHa7naeeeoqFCxfSqVMnAHr06MG6deuIi4sjMTERlUpFZGQkPp+PnJwckpOTK9W2f//+F9Tf6Oho0tLSyMzMrNofxFXOaDQSHV07t1o3CLXwzZhrUaEizGYova7RqIkIqFggtBp1TBvUklsTomkQZiHYrK+u7krSZaNagkpubi7p6em8++67pKWlMXr06HLTSxaLhcLCQux2O0FBQaXPO3m9sm0vlE6nIy4urjLfsnQZcXt9ON0+gs1lC/QXK9RqoGd
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"has 2nd Flr\", s=15, data=df);"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"interesting_variables.append(\"has 2nd Flr\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Basements\n",
"\n",
"Nearly all houses in Ames, IA, have a basement. Therefore, *has Bsmt* is most likely not an important predictor."
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEICAYAAACXo2mmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzsnWd0VFXbhq8p6T2EFmpCFekgRQIKiAgKIogURQWxIKC8NpBXKX4qlldsWBDFAkpTQEREkV4EFClKUZROqAlpk56Z78edEEoIgUxIgH2txcrkzJkzewbYz3na/VhcLpcLg8FgMBjcgLW4F2AwGAyGKwdjVAwGg8HgNoxRMRgMBoPbMEbFYDAYDG7DGBWDwWAwuA1jVAwGg8HgNoxRMRgMBoPbMEbFYDAYDG7DGBWDwWAwuA17cS/gUtO8eXMqVKhQ3MswGAyGy4qDBw+ybt2685531RmVChUqMHv27OJehsFgMFxWdO/evUDnmfCXwWAwGNyGMSoGg8FgcBvGqBgMBoPBbVx1OZW8yMjI4MCBA6Smphb3Ui4bvL29qVixIh4eHsW9FIPBUIIwRgU4cOAAAQEBVK1aFYvFUtzLKfG4XC5iYmI4cOAAERERxb0cg8FQgjDhLyA1NZVSpUoZg1JALBYLpUqVMp6dwWA4C+OpZGMMyoVhvi9DsZJyApKOgacv+ITqp6FEYDwVg8Fw+bF1Lrx3HbzdABIOFvdqDKdgjEoJYPbs2fzvf/9zy7XWrVtHy5Yt6devH/fccw933XUX27ZtK9Q1p06d6pa1GQxuISsT9md3djszIXZ3/ue7XEW/JsNJjFG5AmnRogVTpkxh6tSpPPbYY7z99tuFut4HH3zgppUZDG7AZoe2/4Vq7aFRP6jQOO/zHDGw4nVY/AI4jl3aNV7FmJxKCWHz5s0MGDCA2NhY+vTpQ69evVi4cCFffvklmZmZWCwWJkyYAMCwYcNwuVykpaUxduxYrrnmmnNeNyEhgdDQUAC+/PJL5s6di9VqpV69ejz33HOMGDECu91OdHQ06enpdO7cmaVLl3Lo0CHef/99vv/+e+Lj4xkzZgxjxoy5FF+FwXB+gitBz0/BagdPv7zP+XcJLHlRjz39oM1Tl259VzHGUykh2O12PvnkEyZMmMDnn38OwJ49e/joo4+YNm0a1atXZ9WqVWzZsoXg4GAmTZrEqFGjSE5OPutaa9eupV+/fvTq1Ytnn32WW2+9FVCY7fnnn2fGjBlERkaSmZkJSA9t8uTJREZGcuDAASZNmsTNN9/MkiVLGDRoEEFBQcagGEoe3kHnNigAviG5j/1KF/16DIDxVEoMderUwWKxULp06ZOluqVKlWL48OH4+fmxa9cuGjZsSJs2bdizZw+PPvoodrudQYMGnXWtFi1a8OabbwKwa9cuevfuzYoVKxg3bhyTJ0/mtddeo2HDhriyY8116tQBIDAwkMjIyJOP09PTL8VHNxiKhvAmcP8CyEyB8EbFvZqrBmNUSghnlugmJibyzjvvsGzZMgD69++Py+Vi3bp1lClThsmTJ7Nx40bGjx/PlClTznndsLCwk49nzpzJ2LFj8fLy4oEHHmDjxo15vveZuEyi03A54hsCVVsV9yquOoxRKaH4+/vTuHFjevXqhd1uJzAwkKNHj9KuXTueeOIJpk2bRmZmJoMHDz7rtTnhL6vVisPhYMSIEXh7e1OrVi369u2Ln58fZcuWpUGDBgUaA1CtWjWeeuopt1WoGQyGKxeL6yq7De3evftZG+n27dvzTXYb8sZ8bwbD1UNee2demES9wWAwGNyGMSoGg8FgcBvGqBgMBoPBbRijYjAYDAa3USTVX7Nnz2bOnDkApKWlsX37dqZMmcJLL72EzWYjKiqKIUOG4HQ6GTNmDH/99Reenp68+OKLVKlShU2bNhXqXIPBYDAUE64iZsyYMa7p06e7unbt6tq7d6/L6XS6Bg4c6Nq6davrxx9/dA0fPtzlcrlcGzdudD3yyCMul8tV6HPz44477jjr2LZt29z5ka8azPdmMFw95LV35kWRhr/++OMP/vnnH2699VbS09OpXLkyFouFqKgo1qxZw4YNG2jdujUADRs25M8//yQpKanQ516OOJ1ORo0aRa9evejXrx979+4t7iUZDAbDBVOkRmXixIkMHjyYpKQk/P39Tx738/MjMTHxrOM2m80t5xY1czcepNUrS4gY8T2tXlnC3I2Fn+fw888/k56ezowZM3jyySd55ZVX3LBSg8FguLQUWUd9QkICu3fvpkWLFiQlJeFwOE4+53A4CAwMJDU19bTjTqcTf3//Qp9blMzdeJBnZ/9BSkYWAAfjUnh29h8AdGtU4aKvm5cnZjAYDJcbReap/Prrr7Rs2RKQ5IiHhwf79u3D5XKxatUqmjZtSuPGjVmxYgUAmzZtombNmm45tyh5/ce/ThqUHFIysnj9x78Kdd28PLEcFWGDwWC4XCgyT2X37t1UrFjx5O9jx47lqaeeIisri6ioKBo0aEC9evVYvXo1vXv3xuVy8fLLL7vl3KIkOi7lgo4XlDO9LqfTid1upNkMBsPlRZHtWgMHDjzt94YNGzJz5szTjlmtVl544YWzXlvYc4uS8GAfDuZhQMKDfQp13caNG7N06VI6d+580hMzGAyGyw3T/HiBPN2xFj4ettOO+XjYeLpjrUJdt0OHDnh6etK7d2/GjRvHs88+W6jrGQwGQ3Fg4isXSE4y/vUf/yI6LoXwYB+e7lirUEl6OLcnZjAYDJcTxqhcBN0aVSi0ETEYDIYrERP+MhgMBoPbMEbFYDAYDG7DGBWDwWAwuA1jVAwGg8HgNoxRMRgMBoPbMEalBLF582b69etX3MswGAyGi8aUFF8MW2bC4hcg/gAEVYT2o6D+XYW65KRJk5g3bx4+PoXrzDcYDIbixHgqF8qWmfDdYxC/H3Dp53eP6XghqFy5Mu+++6571mgwGAzFhDEqF8riFyDjDO2vjBQdLwQdO3Y0ApIGg+GyxxiVCyX+wIUdNxguZ7LSi3sFhssMY1QulKCKF3bcYLgcSUuE7d/BnEfg2A5wOot7RYbLBGNULpT2o8DjjGS6h4+OGwxXCilxMLMf/PkNTL8bko8X94oMlwnGqFwo9e+CLu9AUCXAop9d3il09RdAxYoVL+lsGEMeuFyQdER/nFnnP/9KxWYHzwA9DigPVlv+5xsM2ZjM8MVQ/y63GBFDMZKRCimxgAX8SmsTBTixByZ3BFcW9F8IYTXOf62sdLB6gMVSlCu+tPiWhkGr4cg2qNAYfEsV94oMlwnGUzFcnRz5E95uAO9dJ0OSw18/yEtxHIc/vj7/dWJ3w9xBsHGKQkZXCjY7BFeGWreAf5niXo3hMqLIPJWJEyeyZMkSMjIy6NOnD82aNWPEiBFYLBZq1KjB6NGjsVqtTJgwgWXLlmG32xk5ciT169dn7969hT73QnG5XFiupDvNIsblchX3EgrH1tnyMLLSYe8qCKuu4zU6wPJXFPqqc3v+10iOhTkPw/51MkBVo8AnuOjXbjCUYIrEU1m3bh0bN25k2rRpTJkyhcOHDzNu3DiGDRvGV199hcvlYvHixWzdupX169cza9Ysxo8fz9ixYwEKfe6F4u3tTUxMzOW/UV4iXC4XMTExeHt7F/dSLp4mA5QPC6sJ1TvkHg+JhMHrYeiG84e+rB7ZuTXA7gX2y/j7MBjcRJF4KqtWraJmzZoMHjyYpKQknnnmGWbOnEmzZs0AaNOmDatXryYiIoKoqCgsFgvh4eFkZWURGxvL1q1bC3Vuhw4dzrm2vKhYsSIHDhzg2LFj7v0irmC8vb2pWPEyLqMOjYSBi5UHOTW8Y7NBQLmCXcM7ADq9Ag376Ho+oUWzVoPhMqJIjMqJEyeIjo7mww8/5MCBAwwaNOi08JKfnx+JiYkkJSURHJwbLsg5XthzLxQPDw8iIiIK85ENlxOZ6ZDhUPLZVsj/An6lofpN7lmXwXAFUCThr+DgYKKiovD09CQyMhIvL6/TNnuHw0FgYCD+/v44HI7TjgcEBJyWE7mYcw2
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"has Bsmt\", s=15, data=df);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Fireplaces\n",
"\n",
"Bigger houses are more likely to have a fireplace. Thus, the variable *has Fireplace* might be an interesting predictor."
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEICAYAAACXo2mmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzsnXd4U+X7h++sNmm6B6Mto2WKzFKWFJAtKIgoMhQFBFSGoqgoKoJfBXHgQH+KAweoCIq4EAfIlqLIkDJklEIp0D2SrjQ5vz+eljJLoS0t8N7X1Yv05M3JmwDnOc/6PDpN0zQUCoVCoSgH9JW9AYVCoVBcPSijolAoFIpyQxkVhUKhUJQbyqgoFAqFotxQRkWhUCgU5YYyKgqFQqEoN5RRUSgUCkW5oYyKQqFQKMoNZVQUCoVCUW4YK3sDl5t27doREhJS2dtQKBSKK4qjR48SHR19wXXXnFEJCQlh6dKllb0NhUKhuKIYOHBgqdap8JdCoVAoyg1lVBQKhUJRbiijolAoFIpy45rLqZwLh8NBfHw8ubm5lb2Vaw6z2UxoaCgmk6myt6JQKMoBZVSA+Ph4vLy8qFu3LjqdrrK3c82gaRopKSnEx8cTFhZW2dtRKBTlgAp/Abm5uQQEBCiDcpnR6XQEBAQoD1GhuIpQnkohyqBUDup7V1wSOWlgSwI3D7D4y5+KKoHyVBQKxZVHzDJ4pw282QIyj1b2bhSnoDyVKsDSpUs5ePAgjz32WJnPFR8fT//+/bn++utPHmvXrh3du3dn5cqVTJgw4ZLP/eSTT9K3b186d+5c5n0qFJeMswCOFHZ2uwogNRYCG5x/vaaB8ogvG8qoXIXUr1+fBQsWnHX8uuuuq4TdKBTljMEIXZ8GWyJ4B0NIxLnX2VNgy3zIz4YO48AadHn3eY2ijEoVYfv27YwaNYrU1FSGDh3K4MGDWbFiBZ9//jkFBQXodDrefvttACZNmoSmaeTl5TFjxoxSGYvo6GgWLVrE66+/TteuXQkPD6devXqMHDmSZ599lry8PNzd3fnf//6H0+nk4YcfJigoiBMnTtC5c2ceeeSRk+ey2Ww8/fTTZGVlkZiYyLBhwxg2bBjbt29n5syZuFwuqlevzquvvkpcXBwvvPACAL6+vsycORMvL6+K+RIV1w6+tWDQx6A3gpv13GsOrIJV8m8PNyt0LnskQHFhlFGpIhiNRj766COOHj3K2LFjGTx4MIcOHeL999/HYrEwbdo01q9fj7e3N76+vrz88svs37+f7Ozss861f/9+hg8ffvL3V1999bTnjx07xtKlS/Hz82PSpEkMHz6cLl268Oeff/Lqq6/yyCOPcPToUT766CO8vLwYNmwYMTExJ18fFxfHzTffTK9evThx4gTDhw9n2LBhTJs2jTlz5lCvXj2WLFnCgQMHmDFjBjNnzqR+/fosWbKEDz/88DQDpVBcMmafkp/38Ct+rLyUy4YyKlWEJk2aoNPpCAoKOlliGxAQwJQpU7BarRw8eJCWLVvSuXNnDh06xLhx4zAajTz44INnnetc4a9Dhw6dfOzn54efn/yH+++//5g3bx4ffvghmqZhNMo/icaNG+Pr6wtA8+bNiY2NPfn6wMBAPv30U3799Vc8PT0pKCgAIDk5mXr16gEwaNAggJOGBaTJtG7dumX9qhSK0hHcGkYsh4IcCG5V2bu5ZlBGpYpwZmltVlYWb731FqtXrwZg5MiRaJpGdHQ01apVY/78+WzdupU5c+acM39SEnp9cdFfeHg4o0aNIiIiggMHDvDXX38BYgxycnJwc3Njx44d3H777axfvx6A+fPn07JlS4YNG8amTZtYs2YNANWqVePQoUPUrVuX999/n7CwMMLCwpg9ezbBwcFs2bKFpKSkS/2KFIqLw8MP6nas7F1ccyijUkXx9PQkIiKCwYMHYzQa8fb2JjExkW7duvHoo4/y5ZdfUlBQwPjx48v0PlOmTGH69Onk5eWRm5vL008/DYDJZOLhhx8mOTmZm266icaNG598TdeuXXnhhRdYvnw5Xl5eGAwG8vPzmTFjBlOnTkWv1xMUFMSIESOoWbMmU6ZMOZkXevHFF8u0X4VCUbXRaZqmVfYmLicDBw48a57K7t27VWXUKcTHx/Poo4+yePHiy/J+6vtXKKo+57p2ngvV/KhQKBSKckMZFcVZhIaGXjYvRaFQXF0oo6JQKBSKckMZFYVCoVCUGxVS/bV06VK+/fZbAPLy8ti9ezcLFizgxRdfxGAwEBUVxYQJE3C5XEyfPp29e/fi5ubGCy+8QJ06ddi2bVuZ1ioUCoWiktAqmOnTp2uLFi3S+vfvr8XFxWkul0sbPXq0FhMTo/3yyy/alClTNE3TtK1bt2oPPPCApmlamdeWxG233XbWsV27dpXnR1ZcJOr7VyiqPue6dp6LCg1//fvvv+zfv5+bb76Z/Px8ateujU6nIyoqio0bN7JlyxY6deoEQMuWLdm5cyc2m63Ma69EXC4X06ZNY/DgwQwfPpy4uLjK3pJCoVBcNBVqVObNm8f48eOx2Wx4enqePG61WsnKyjrruMFgKJe1Fc2yrUfp+NIqwp78iY4vrWLZ1rLPc/j999/Jz8/nq6++YvLkybz00kvlsFOFQqG4vFRYR31mZiaxsbG0b98em82G3W4/+Zzdbsfb25vc3NzTjrtcLjw9Pcu8tiJZtvUoTy39lxyHE4Cj6Tk8tfRfAAa0Crnk857LE1MoFIorjQrzVP766y86dOgAiOSIyWTi8OHDaJrG+vXriYyMJCIigrVr1wKwbds2GjZsWC5rK5JXftl70qAUkeNw8sove8t03nN5YkVCjQqFQnGlUGGeSmxsLKGhoSd/nzFjBo899hhOp5OoqChatGhBs2bN2LBhA0OGDEHTNGbOnFkuayuShPScizpeWs70ulwu10nFYIVCobhSqLCr1ujRo0/7vWXLlmd1aev1ep5//vmzXlvWtRVJsK+Fo+cwIMG+ljKdNyIigj/++IO+ffue9MQUCoXiSkM1P14kj/duhMVkOO2YxWTg8d6NynTenj174ubmxpAhQ5g1axZPPfVUmc6nUCgUlYGKr1wkRcn4V37ZS0J6DsG+Fh7v3ahMSXo4vyemUCgUVxLKqFwCA1qFlNmIKBQKxdWICn8pFAqFotxQRkWhUCgU5YYyKgqFQqEoN5RRUSgUCkW5oYyKQqFQKMoNZVSqENu3b2f48OGVvQ2FQqG4ZFRJ8aWwYzGsfB4y4sEnFLpPg+Z3lumUH3zwAd9//z0WS9k68xUKhaIyUZ7KxbJjMfzwEGQcATT584eH5HgZqF27NnPnzi2fPSoUCkUloYzKxbLyeXCcof3lyJHjZaB3795KQFKhUFzxKKNysWTEX9xxheJKxplf2TtQXGEoo3Kx+IRe3HGF4kokLwt2/wDfPgBJe8DlquwdKa4QlFG5WLpPA9MZyXSTRY4rFFcLOemweDjs/AYW3QXZyZW9I8UVgjIqF0vzO6HfW+BTC9DJn/3eKnP1F0BoaOhlnQ2jOAeaBrYT8uNyXnj91YrBCG5e8tirJugNJa9XKApRmeFLofmd5WJEFJWIIxdyUgEdWIPkIgqQdgjm9wbNCSNXQGCDC5/LmQ96E+h0Fbnjy4tHEDy4AU7sgpAI8Aio7B0prhCUp6K4NjmxE95sAe+0EUNSxN6fxUuxJ8O/X1/4PKmxsOxB2LpAQkZXCwYj+NaGRjeBZ7XK3o3iCqLCPJV58+axatUqHA4HQ4cOpW3btjz55JPodDoaNGjAc889h16v5+2332b16tUYjUamTp1K8+bNiYuLK/Pai0XTNHRX053mFYKmaZXzxjFLxcNw5kPcegisL8cb9IQ1L0noq8mtJZ8jOxW+vR+ORIsBqhsFFt+K37tCUYWpEE8lOjqarVu38uWXX7JgwQKOHz/OrFmzmDRpEl988QWaprFy5UpiYmLYvHkzS5YsYc6cOcyYMQOgzGsvFrPZTEpKSuVd4K5RNE0jJSUFs9l8+d+89SjJhwU2hPo9i4/7hcP4zTBxy4VDX3pTYW4NMLq
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"has Fireplace\", s=15, data=df);"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"interesting_variables.append(\"has Fireplace\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Garages\n",
"\n",
"Holding the overall living area fixed adding a garage seems to affect the price positively. Thus, *has Garage* seems like an interesting predictor as well."
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEICAYAAACXo2mmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzsnWd0lNUWhp8p6T2hJrSEKlJDl4ACIoKCiCJFsSCoKChiQbkK6lXBxrWgqNhBQVBEUESRIk1AkSJNQSCUUFNIMumZuT/ehBAIIZiEBDjPWqxMZs58c2aAs2e3d1tcLpcLg8FgMBhKAGtZb8BgMBgMFw/GqBgMBoOhxDBGxWAwGAwlhjEqBoPBYCgxjFExGAwGQ4lhjIrBYDAYSgxjVAwGg8FQYhijYjAYDIYSwxgVg8FgMJQY9rLewPmmTZs2hIWFlfU2DAaD4YLiwIEDrFmz5qzrLjmjEhYWxuzZs8t6GwaDwXBB0adPnyKtM+Evg8FgMJQYxqgYDAaDocQwRsVgMBgMJcYll1MpiMzMTPbv309aWlpZb+WixNPTk2rVquHm5lbWWzEYDKWMMSrA/v378fPzo1atWlgslrLezkWFy+UiNjaW/fv3Ex4eXtbbMRgMpYwJfwFpaWmEhIQYg1IKWCwWQkJCjBdoMFwiGE8lB2NQSg/z2RpKnNR4SD4K7t7gFayfhnKB8VQMBsOFx5Y58HYreKMpJB4o690YTsJ4KuWA2bNns2vXLh599NESud6+fft45ZVXOHToEJ6ennh6evLYY49Rt27dErm+wVCmZGfBvpzObmcWxO2GCoX823a5wHjL5w1jVC4yUlNTGTZsGP/9739p3rw5AJs2beK5555j6tSpZbw7g6EEsNmh038g+Qj4h0JYZMHrHLGw7iPISIF294NPxfO7z0sUY1TKCRs3bmTw4MHExcUxYMAA+vXrx4IFC/j888/JysrCYrEwadIkAEaOHInL5SI9PZ1nn32Wyy677MR1lixZQtu2bU8YFIAmTZrw2WefAfD3338zYcIEsrOziY+P55lnniEyMpJOnToRERFB7dq1ufnmmwtcM2vWLD7//HMCAgJwc3OjR48e9OzZk3HjxhEdHY3T6WTkyJG0adPm/H54hkuPwOrQ92Ow2sHdp+A1/yyGxc/rtrsPdCyZSIChcIxRKSfY7XY+/PBDDhw4wD333EO/fv3Ys2cP77//Pl5eXowdO5YVK1bg7+9PYGAgL7/8Mjt37iQlJSXfdfbv30+NGjVO/D5s2DCSk5M5cuQIn376KTt37mT06NHUr1+fefPmMXv2bCIjIzl48CCzZ88mKCiI+fPnn7amVq1afPDBB8yZMwd3d3duv/12AGbNmkVQUBAvvvgi8fHx3HbbbXz//ffn9bMzXKJ4BhT+uHdQ3m3jpZw3jFEpJzRs2BCLxULFihVPlN+GhIQwevRofHx82LVrF82aNaNjx47s2bOH+++/H7vdzrBhw/Jdp0qVKmzevPnE75MnTwbglltuISsri0qVKvHOO+/g6emJw+HA19cXgKCgIIKC9J+woDV79+6ldu3aeHl5AZzwhP7++2/WrVvHpk2bAMjKyiIuLo7g4OBS/LQMhiIQ2gLunA9ZqRDa/OzrDSWCMSrlhFPLbpOSknjzzTdZunQpAHfddRcul4s1a9ZQqVIlPvroI9avX8/EiRPz5Uq6dOnClClT2LBhA82aNQMgOjqaQ4cOYbFYeOGFF3j11VepXbs2b775JgcOqHLGas0rBCxoTY0aNdi1axdpaWm4u7uzadMmIiIiiIiIoEqVKtx3332kpaUxefJkAgMDS/nTMhiKgHcQ1Gpf1ru45DBGpZzi6+tLZGQk/fr1w2634+/vz5EjR+jcuTOjRo1i+vTpZGVl8cADD+R7no+PD5MnT+a1117j1VdfJSsrC5vNxpNPPklYWBi9evXioYcewt/fnypVqhAfH3/aaxe0Jjg4mKFDhzJw4EACAwNJT0/HbrfTv39/nnrqKW677TaSk5MZOHBgPgNlMBguLSwul8tV1ps4n/Tp0+e0eSrbtm3Ll+w2nE5WVhZTpkxh2LBhuFwubr31Vh5++GFatWpVpOebz9hguLAp6OwsCOOpGIqE3W4nNTWVG2+8ETc3N5o0aULLli3LelsGg6GcYYyKociMGjWKUaNGlfU2DAZDOcYEvw0Gg8FQYhijYjAYDIYSo1TCX7Nnz+abb74BID09nW3btjF16lReeOEFbDYbUVFRDB8+HKfTyTPPPMNff/2Fu7s7zz//PDVr1mTDhg3FWmswGAyGMsJVyjzzzDOuGTNmuHr16uWKjo52OZ1O15AhQ1xbtmxx/fjjj67Ro0e7XC6Xa/369a777rvP5XK5ir22MG688cbT7tu6dWtJvmVDAZjP2GC4sCno7CyIUg1//fnnn+zcuZPrrruOjIwMatSogcViISoqilWrVrFu3To6dOgAQLNmzdi8eTPJycnFXnsh4nQ6GTt2LP369WPQoEFER0eX9ZYMBoPhnClVo/Lee+/xwAMPkJycfEIOBNSgl5SUdNr9NputRNaWNnPWH6D9hMWEP/E97ScsZs764s9z+Pnnn8nIyODLL7/kkUceYcKECSWwU4PBYDi/lFpJcWJiIrt376Zt27YkJyfjcDhOPOZwOPD39yctLS3f/U6nE19f32KvLU3mrD/Ak7P/JDUzG4ADCak8OftPAHo3D/vX1y3IEzMYDIYLjVLzVH777TfatWsHSHLEzc2NvXv34nK5WLFiBS1btiQyMpJly5YBsGHDBurVq1cia0uTV37864RBySU1M5tXfvyrWNctyBPLysoq1jUNBoPhfFNqnsru3bupVq3aid+fffZZHn30UbKzs4mKiqJp06Y0btyYlStX0r9/f1wuFy+++GKJrC1NYhJSz+n+onKq1+V0OrHbTW+qwWC4sCi1U2vIkCH5fm/WrBkzZ87Md5/VauW555477bnFXVuahAZ6caAAAxIa6FWs60ZGRrJkyRJ69OhxwhMzGAyGCw3T/HiOPNatPl5utnz3ebnZeKxb/WJdt2vXrri7u9O/f3/Gjx/Pk08+WazrGQwGQ1lg4ivnSG4y/pUf/yImIZXQQC8e61a/WEl6OLMnZjAYDBcSxqj8C3o3Dyu2ETEYDIaLERP+MhgMBkOJYYyKwWAwGEoMY1QMBoPBUGIYo2IwGAyGEsMYFYPBYDCUGMaolCM2btzIoEGDynobBoPB8K8xJcX/hk0zYdFzcHw/BFSDLmOhyS3FuuSUKVOYO3cuXl7F68w3GAyGssR4KufKppkw70E4vg9w6ee8B3V/MahRowZvvfVWyezRYDAYyghjVM6VRc9B5inaX5mpur8YdOvWzQhIGgyGCx5jVM6V4/vP7X6D4UImO6Osd2C4wDBG5VwJqHZu9xsMFyLpSbBtHnxzHxzdDk5nWe/IcIFgjMq50mUsuJ2STHfz0v0Gw8VCagLMHASbv4YZt0LKsbLekeECwRiVc6XJLdDzTQioDlj0s+ebxa7+AqhWrdp5nQ1jKACXC5IP648z++zrL1ZsdnD3022/qmC1Fb7eYMjBZIb/DU1uKREjYihDMtMgNQ6wgE9FHaIA8Xvgo27gyoa7FkCFume/VnYGWN3AYinNHZ9fvCvCsJVweCuERYJ3SFnvyHCBYDwVw6XJ4c3wRlN4u5UMSS5//SAvxXEM/vzq7NeJ2w1zhsH6qQoZXSzY7BBYA+pfC76Vyno3hguIUvNU3nvvPRYvXkxmZiYDBgygdevWPPHEE1gsFurWrcu4ceOwWq1MmjSJpUuXYrfbGTNmDE2aNCE6OrrYa88Vl8uF5WL6plmOcLlcZb2F09kyWx5GdgZEr4AKdXR/3a7wywSFvhreUPg1UuLgm3th3xoZoFpR4BVY+ns3GMoxpeKprFmzhvXr1zN9+nSmTp3KoUOHGD9+PCNHjuSLL77A5XKxaNEitmzZwtq1a5k1axYTJ07k2WefBSj22nPF09OT2NjY8nn4XeC4XC5iY2Px9PQs663kp8Vg5cMq1IM6XfPuD4qAB9bCiHV
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"has Garage\", s=15, data=df);"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"interesting_variables.append(\"has Garage\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Pools\n",
"\n",
"Unfortunately, almost no one in Ames, IA, has a pool. The predictor *has Pool* seems quite uninteresting."
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEICAYAAACXo2mmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzs3Xd4VFX6wPHvnV6TSSeF9FBChwBB6cUOKhbA3hURRVFBV0Fc+67oz7J2112wgaJrVxQQAlKkE4q0JCQhvc5k+r2/PwYGEIRAEghwPs/j43DnzJ0zw3Dfe9p7JEVRFARBEAShGahOdQUEQRCEM4cIKoIgCEKzEUFFEARBaDYiqAiCIAjNRgQVQRAEodmIoCIIgiA0GxFUBEEQhGYjgoogCILQbERQEQRBEJqN5lRX4GTr27cv8fHxp7oagiAIp5WioiJWrFhxzHJnXVCJj49n3rx5p7oagiAIp5XRo0c3qpzo/hIEQRCajQgqgiAIQrMRQUUQBEFoNmfdmIogCGcnr9dLYWEhLpfrVFelVTMYDCQkJKDVak/o9SKoCIJwVigsLMRqtZKcnIwkSae6Oq2SoihUVlZSWFhISkrKCZ1DdH8JgnBWcLlcREREiIByFJIkERER0aTWnGipCIJw2qlp8FBp92DUqQkzaTHqGncpEwHl2Jr6HYmWiiAIp53vNu5l2MxfGfjCQvbWijGS1kS0VARBOK34/DKr86sDj2WFvMoGUqMsf/0CRYFW0EKZN28eu3bt4sEHH2zyuVasWMGkSZNIT08HwO12M3LkSK6//vrjOs+rr75KZGQk48aNa3Kd9hNBRRCE04pGreKBEe0pr3cTG2qkW0LokQs6KmH1++BpgH53n9xKngTZ2dm89NJLAHg8Hi644AIuvfRSQkJCTmm9RFARBOG0Ex9m5LVreqJRS5j+ajxl5wJY8FTgsc4MURefvAr+hfXr13PLLbdQVVXFuHHjGDNmDD/88AMffvghPp8PSZJ47bXXAJg0aRKKouB2u5kxYwYdO3b8y/Pa7XZUKhVqtZrNmzfz97//HbVajV6v5+9//ztxcXG8//77fPvtt2g0GrKysnjooYda5DOKoCIIwmkpxHiMdRSmsAOPzVEtW5lG0mg0vPfeexQVFXHHHXcwZswY8vLyePvttzEajUybNo2cnBxCQkKw2Wy88MIL7Nixg4aGhsPOtXz5cq6//nokSUKr1fL4449jNpt57LHHePrpp+nYsSM///wzzz33HBMmTOD777/nk08+QaPRMHHiRBYuXNgyn7FFzioIgnCqxfWCm74DnxPiekB+2amuEZmZmUiSRFRUVHDabkREBFOmTMFsNrNr1y66d+/OwIEDycvL4+6770aj0TB+/PjDznVw99fBysrKgq2a3r178+KLL7Jr1y66desWXNCYlZXF9u3bW+QzitlfgiCcmUxhkHwupA8HU8Sprg1w+HTd+vp6XnnlFV566SWeeuop9Ho9iqKwYsUKoqOjef/99xk/fjwzZ85s9HtER0ezdetWAFatWkVycjKpqals2LABn8+HoiisWrXqhBc3HotoqQiCIJwiFouFnj17MmbMGDQaDSEhIZSVlTF06FAeeOABPv74Y3w+HxMmTGj0OZ966in+/ve/oygKarWaZ555hrZt23LhhRcybtw4ZFmmV69eDB8+PBh8mpOkKIrS7GdtxUaPHi32UxGEs9CWLVuOOtgtHHCk76qx107R/SUIgiA0GxFUBEEQhGYjgoogCILQbERQEQRBEJpNi8z+mjdvHl988QUQyEmzZcsWZs2axdNPP41araZ///7cc889yLLME088wbZt29DpdDz11FMkJSWxbt26JpUVBEEQThGlhT3xxBPKJ598oowaNUrJz89XZFlWbrvtNiU3N1f58ccflSlTpiiKoihr165V7rrrLkVRlCaXPZrLL7+8BT+tIAit1ebNm091FU4bR/quGnvtbNHur40bN7Jjxw4uvvhiPB4PiYmJSJJE//79WbZsGatXr2bAgAEAdO/enU2bNmG325tcVhAEobWRZZlp06YxZswYrr/+evLz8091lVpEiwaVt956iwkTJmC327FYDqSmNpvN1NfXH3ZcrVY3S1lBEISm+nJtEec+t4CUqd9y7nML+HJtUZPO9/PPP+PxePj000+ZPHkyzz33XDPVtHVpsRX1dXV17N69m+zsbOx2Ow6HI/icw+EgJCQEl8t1yHFZlrFYLE0uKwiC0BRfri3ikXkbcXr9ABTVOHlk3kYALusRf0LnPFJvy5moxVoqq1atol+/fkAgFYFWq6WgoABFUcjJySErK4uePXuyePFiANatW0e7du2apawgCEJT/OPHbcGAsp/T6+cfP2474XMeqbfF5/Od8PlaqxZrqezevZuEhITgn2fMmMGDDz6I3++nf//+dOvWjS5durB06VLGjh2Loig888wzzVJWEAShKYprnMd1vDH+3LMiyzIazZmXfrHFPtFtt912yJ+7d+/OnDlzDjmmUql48sknD3ttU8sKgiA0RZzNSNERAkiczXjC5+zZsycLFy7koosuCva2nInE4kdBEIQ/eej89hi16kOOGbVqHjq//Qmfc8SIEeh0OsaOHcuzzz7LI4880tRqtkpnXttLEAShifYPxv/jx20U1ziJsxl56Pz2JzxID3/d23KmEUFFEAThCC7rEd+kIHK2Et1fgiAIQrMRQUUQBEFoNiKoCIIgCM1GBBVBEASh2YigIgiCIDQbEVQEQRBOovXr13P99def6mq0GDGlWBAE4Ug2zIFfnoTaQghNgGHToOvVTTrlO++8w1dffYXReOIr81s70VIRBEH4sw1z4Ot7oXYPoAT+//W9geNNkJiYyKuvvto8dWylRFARBEH4s1+eBO+fcn95nYHjTXD++eefkUkkDyaCiiAIf8nrl091FU6N2sLjOy4EiaAiCMJh7C4vP2wqYfKc9WwvrUeWlVNdpZMrNOH4jgtBIqgIgnCYWqeP8R+u5qv1xdw5azWVDveprtLJNWwaaP80mK41Bo4LRyWCiiAcTFHAXhr4T/Yfu/wZSqOWsOgCff/RIXo0qrPsUtH1ahj5CoS2BaTA/0e+0uTZXwAJCQln9B5QZ/aIkSD8BZfXT02DF0mCCLMOjXrfRbM6D94/HxQ/3PwDRGYc81xev4xGJSFJUstW+iSKMOv4/r4BbCutp1uCjTCz7lRX6eTrenWzBJGzzVl2+yEIAVv31jHwhYUMe/FXCqoaDjyx7ftAK8VRARs/O+Z5CiodTJ6znjm/76HW6W3BGp9cGrWKhHATwzrGEGnVn+rqCKeRFmupvPXWWyxYsACv18u4cePo06cPU6dORZIkMjIymD59OiqVitdee41Fixah0Wh49NFH6dq1K/n5+U0uKwhH8/WGvXj8Mh6/zIrdVaRGWQJPZIyAX58LdH1lXnrUc1Q3eLh/znpW51fz1fpi+qZGEGrUnoTaCydKUZQzqkXZEhSlaZMyWuTqu2LFCtauXcvHH3/MrFmzKCkp4dlnn2XSpEl89NFHKIrCL7/8Qm5uLitXrmTu3LnMnDmTGTNmADS5rCAcy7V9E4m3GUmLsjC4XdSBJ8JSYcJKmLj6mF1fWpVEvM0AgF6jwqARNzOtmcFgoLKysskXzTOZoihUVlZiMBhO+Bwt0lLJycmhXbt2TJgwAbvdzsMPP8ycOXPo06cPAAMHDmTp0qWkpKTQv39/JEkiLi4Ov99PVVUVubm5TSo7YsSIlvhYwhkkOcLMlxPOQUI6tHtHrQZrm0adw2LQMn1kJ67omUBypJkw01k47nAaSUhIoLCwkPLy8lNdlVbNYDCQkHDiU6dbJKhUV1dTXFzMm2++SWFhIePHjz+k2Wk2m6mvr8dut2Oz2YKv23+8qWUF4Wg8Pj9Oj58w00ED9CcowqJnUPvoZqqZ0JK0Wi0pKSmnuhpnvBZpr9tsNvr3749OpyM1NRW9Xn/Ixd7hcBASEoLFYsHhcBxy3Gq1HjImciJlBeGv2F1evttYwu2zVrN0RwUu79k7bVgQWkKLBJVevXq
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"has Pool\", s=15, data=df);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Porch\n",
"\n",
"Most houses have a porch."
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEICAYAAACXo2mmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzsnXd4U2Ubh+/MJt0bKGW07L0qQwrIEkFlKRZUFBBUEJRPVBQVxAkO3AouVFAZggioIKBsAUVmAWWWUaCLjqQ7Od8fT0tAoLS0pS2893VxNT15c/ImwHnOs36PTtM0DYVCoVAoSgB9WW9AoVAoFNcOyqgoFAqFosRQRkWhUCgUJYYyKgqFQqEoMZRRUSgUCkWJoYyKQqFQKEoMZVQUCoVCUWIoo6JQKBSKEkMZFYVCoVCUGMay3sDVpk2bNlStWrWst6FQKBQVihMnTrB58+bLrrvujErVqlVZuHBhWW9DoVAoKhT9+/cv1DoV/lIoFApFiaGMikKhUChKDGVUFAqFQlFiXHc5lYuRk5PD8ePHyczMLOutVEgsFguhoaGYTKay3opCoShjlFEBjh8/jpeXFzVr1kSn05X1dioUmqaRmJjI8ePHCQsLK+vtKBSKMkaFv4DMzEwCAgKUQbkCdDodAQEBystTKBSA8lTOogzKlaO+O8VVJ+MM2OLB7A5Wf/mpKBcoT0WhUFQ8ohfBhzfAu80g9URZ70ZxDsqolAMWLlzIm2++WSLn2rx5M+3atWPw4MEMHjyYu+66i1mzZhXrnIMHD+bgwYMlsj+Fotg4cuFYXme3MxeSDhe8XtNKf0+Ks6jw1zVI27ZtefvttwHIzs7mlltuoU+fPnh7e5fxzhSKEsBghM7Pgi0OvEOgasuLr7MnwtYvIDsd2o0Cj6Cru8/rFGVUygk7duxg2LBhJCUlMWjQIKKioli2bBnffPMNubm56HQ6PvjgAwDGjh2LpmlkZWUxefJkGjRocMnz2mw29Ho9BoOBPXv28NJLL2EwGHBzc+Oll17C6XQycuRIfH196dixI61bt+bVV1/F6XRSqVKlsx7Uhx9+SEJCAhkZGUybNo1q1apdle9FobgovtVgwEzQG8HscfE1B3+D316Wx2YP6PjE1dvfdYwyKuUEo9HI559/zokTJ3jwwQeJioriyJEjfPLJJ1itViZOnMj69evx9vbG19eX119/nQMHDpCenn7BuTZt2sTgwYPR6XSYTCaef/55PDw8eO6553jllVdo0KABK1euZMqUKTz11FPEx8ezYMECzGYzffr0Ydq0adSqVYv58+efDXt16tSJPn368P7777Ns2TJGjBhxtb8iheJ8LD4FP+/u53qsvJSrhjIq5YSGDRui0+kICgo6W54bEBDA+PHj8fDw4NChQzRv3pyOHTty5MgRRo0ahdFoZOTIkRec69zw17nExcWd9WpuuOEG3nrrLQBCQ0Mxm80AJCQkUKtWLQAGDBhw9rWNGzcGIDAwkISEhBL85ApFKRHSCob8DLkZENKirHdz3aCMSjnhv2W5aWlpvPfee6xevRqAoUOHomkamzdvJjg4mC+++IJt27Yxbdq0Qifig4OD2bdvH/Xr1+fPP/+kZs2aAOj1+vPWHDlyhJo1a/LJJ5+ohkZFxcXdD2q2L+tdXHcoo1JO8fT0pGXLlkRFRWE0GvH29iYuLo4uXbrw+OOP891335Gbm8sjjzxS6HO+/PLLvPTSS2iahsFg4NVXX71gzeTJk5kwYQJ6vZ6goCCGDBnC119/XZIfTaFQXMPoNO36qrfr37//BfNU9u7dW2CyW3F51HeoUFzbXOzaeTFUn4pCoVAoSgxlVBQKhUJRYiijolAoFIoSQxkVhUKhUJQYpVL9tXDhQn744QcAsrKy2Lt3L7NmzeKVV17BYDAQGRnJ6NGjcTqdvPDCC/zzzz+YzWZefvllatSowfbt24u1VqFQKBRlhFbKvPDCC9qcOXO03r17azExMZrT6dSGDx+uRUdHa8uXL9fGjx+vaZqmbdu2TXv44Yc1TdOKvbYg+vXrd8GxPXv2lORHvi5R36FCcW1zsWvnxSjV8NeuXbs4cOAAt956K9nZ2VSvXh2dTkdkZCQbN25k69atdOjQAYDmzZuze/dubDZbsddWRJxOJxMnTiQqKorBgwcTExNT1ltSKBSKIlOqRmXGjBk88sgj2Gw2PD09zx738PAgLS3tguMGg6FE1pY2i7adoP2U3wh7+ifaT/mNRduKP89h5cqVZGdnM3fuXMaNG8eUKVNKYKcKhUJxdSm1jvrU1FQOHz5M27Ztsdls2O32s8/Z7Xa8vb3JzMw877jT6cTT07PYa0uTRdtO8MzCXWTkOAA4kZzBMwt3AdC3RdUrPu/FPDGFQqGoaJSap/Lnn3/Srl07QCRHTCYTR48eRdM01q9fT0REBC1btmTt2rUAbN++nbp165bI2tLkjeX/nDUo+WTkOHhj+T/FOu/FPLHc3NxinVOhUCiuNqXmqRw+fJjQ0NCzv0+ePJknnngCh8NBZGQkzZo1o0mTJmzYsIGBAweiadpZLariri1NYpMzinS8sPzX63I6nRiNSppNoVBULErtqjV8+PDzfm/evDnz5s0775her+fFF1+84LXFXVuahPhaOXERAxLiay3WeVu2bMnvv/9Or169znpiCoVCUdFQzY9F5Mke9bCaDOcds5oMPNmjXrHO2717d8xmMwMHDuS1117jmWeeKdb5FAqFoixQ8ZUikp+Mf2P5P8QmZxDia+XJHvWKlaSHS3tiCoVCUZFQRuUK6NuiarGNiEKhUFyLqPCXQqFQKEoMZVQUCoVCUWIoo6JQKBSKEkMZFYVCoVCUGMqoKBQKhaLEUEalHLFjxw4GDx5c1ttQKBSKK0aVFF8JO+fBqhch5Tj4hELXidD0rmKd8tNPP2Xx4sVYrcXrzFcoFIqyRHkqRWXnPFjyKKQcAzT5ueRROV4Mqlevzvvvv18ye1QoFIoyQhmVorLqRcj5j/ZXToYcLwY9evRQApIKhaLCo4xKUUk5XrTjCkVFxpFd1jtQVDCUUSkqPqFFO65QVESy0mDvEvjhYYjfB05nWe9IUUFQRqWodJ0Ipv8k001WOa5QXCtkJMO8wbB7Acy5B9ITynpHigqCMipFpeldcPt74FMN0MnP298rdvUXQGho6FWdDaO4CJoGttPyx+m4/PprFYMRzF7y2KsK6A0Fr1co8lCZ4Suh6V0lYkQUZUhOJmQkATrwCJKLKMCZI/BFD9AcMHQZBNa5/Lkc2aA3gU5Xmju+urgHwcgNcHoPVG0J7gFlvSNFBUF5Korrk9O74d1m8OENYkjy+ecX8VLsCbDr+8ufJ+kwLBoJ22ZJyOhawWAE3+pQ7xbwDC7r3SgqEKXmqcyYMYPffvuNnJwcBg0aROvWrXn66afR6XTUqVOHSZMmodfr+eCDD1i9ejVGo5EJEybQtGlTYmJiir22qGiahu5autO8imiaVtZbKDrRC8XDcGRDzHoIrC3H63SHNVMk9NWwT8HnSE+CHx6CY5vFANWMBKtv6e9doSjHlIqnsnnzZrZt28Z3333HrFmzOHXqFK+99hpjx47l22+/RdM0Vq1aRXR0NFu2bGH+/PlMmzaNyZMnAxR7bVGxWCwkJiZWzItjGaNpGomJiVgslrLeStFoNUzyYYF1oXZ313G/cHhkC4zZevnQl96Ul1sDjG5grGDfgUJRCpSKp7J+/Xrq1q3LI488gs1m46mnnmLevHm0bt0agI4dO7JhwwbCwsKIjIxEp9MREhKCw+EgKSmJ6OjoYq3t3r37Jfd2MUJDQzl+/Djx8fEl+0VcJ1gsFkJDK1hJtX84DF8leZBzwzsGA3hVLtw5LF7Qcwo0HyTns/qXzl4VigpEqRiVM2fOEBsby/Tp0zl+/DgjR448L7zk4eFBWloaNpsNX19XuCD/eHHXFhWTyURYWFhxPrKiIpGbDTl2ST4bivlfwCMIancrmX0pFNcApRL+8vX1JTIyErPZTHh4OG5ubudd7O12O97e3nh6emK328877uXldV5O5Er
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"has Porch\", s=15, data=df);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Neighborhoods\n",
"\n",
"The instructors' notes say:\n",
"\n",
"> For instructors who cover nominal variables in their class, I would suggest incorporating the neighborhood variable into their models by converting it to a set of dummy (indicator) variables. I have found that the coefficients for the continuous variables tend to have values with more realistic interpretations when used in conjunction with the neighborhood variable.\n",
"\n",
"Indeed, plotting the price distributions by neighborhood reveals significant differences in the price level."
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAoAAAAIcCAYAAABmaR8bAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzs3Xtck3X/P/DXxkAFRGEpCaGJOtQ8ZSZpKgoyMy0PeWfOu+6+djRFrSAPt2mWpzxUpmknzTKW545mgpCaWmiWt3lWMFRQ1DEPoILbrt8f/HbFZIOxXWNjez0fDx91HXbt7Zjstc/1OcgEQRBARERERD5D7u4CiIiIiKhmMQASERER+RgGQCIiIiIfwwBIRERE5GMYAImIiIh8DAMgERERkY9hACSqYWfPnkVMTIzVP61bt0aHDh3Qq1cvjB49Glu3bq329Tdu3IiYmBgMHTrUBdXXHPNrcvz4cbc8/5NPPomYmBgkJibi5s2blZ4r5WuelZWFmJgYxMbGOn2txYsXIyYmBuPGjbP7MeXfn8XFxU7X4AxH6vckQ4cORUxMDDZu3OjuUogqULi7ACJf1q5dOwQEBIjbgiCgtLQUZ8+eRWZmJjIzM6HRaDB9+nQ3VunbTp8+jcWLFyMlJcXdpRARSYYBkMiNFi1ahLvuuqvC/lu3bmHJkiX48MMPodVq0bNnT8THx9t1zcTERHTs2BF169aVulyftXLlSgwYMABt27Z1+XN16NABP/74IxQK/nomItfhLWAiD+Tv74+XX34Z9957LwBAq9Xa/dj69eujRYsWiIyMdFV5PkUmk8FgMGDq1KkwGo0uf7569eqhRYsWaNasmcufi4h8FwMgkQfr06cPAOCvv/5ycyW+67HHHoNCocChQ4fw2WefubscIiJJMAASebDg4GAAsOiMb+4Yv3z5cvH2cIcOHTBw4EDk5uZWOiChqKgIH3/8MYYMGYLOnTujU6dOeOyxx6DVamEymSqcX1paipUrV+Kxxx7Dvffei06dOmHIkCFYvnw5SkpKrNaclZWFl156Cd26dcM999yDbt264ZlnnsGPP/7o0GtgMpnw+eefo3///mjfvj3i4uIwffp0nD9/3uK8pKQkxMTE4M0337R5raeeegoxMTH45ptv7H7+Nm3a4P/+7/8AlL32Z86cqfbfYe/evRgzZgy6d++Odu3aIS4uDlOmTEFubm6FcysbBHLlyhUsWrQIDz30kDhY6K233sLly5fFQStZWVlWazh79iwmT56MHj16oH379khMTMSCBQsqHehRWlqK999/HwkJCeJj5s2bhytXrlg932QyYf369Rg5ciTuu+8+tG/fHmq1GrNmzUJBQUGF8ydNmoSYmBhs3rwZixcvRmxsLDp16oShQ4eiqKjIqfovXLiAuXPnol+/fmjfvj3uu+8+jBgxAmvXroXBYLD6mJKSEnz22WcYNmwY7r33XnTs2BEDBw7Ee++9Z/PvLAgC1q9fLz6mW7dumDx5Mi5cuGDzdSXyBOxkQuTBTp8+DQBo0qRJhWNpaWnYv38/IiMjERkZievXryMqKgr79u2zeq28vDw899xzyM7Ohp+fH1q0aIFbt27h4MGDOHjwIP73v//h7bffFs+/fPkynnvuORw4cAByuRxRUVGoW7cujh07hsOHD2PTpk1Yvnw5QkNDxcd8//33eO2112AymXDnnXeidevWuHTpEnbu3ImdO3fir7/+wsSJE6v1Grz55pvYt28fwsLC0KpVK2RnZ2P16tX46aef8Pnnn6N169YAgEGDBiEtLQ0//fQT/vvf/8LPz8/iOufPn8fevXsRGBgItVpdrRqSkpKQlpaG3NxcvP7661i5cqXdj126dCkWLVoEAAgNDYVKpcKZM2ewYcMG/Pjjj1i0aBHi4uKqvE5BQQGefvpp5OTkwN/fHyqVCpcvX8aXX36JX375Bf7+/jYfm52djaFDh+LatWto2bIl6tSpg9OnT+OTTz7B7t27sWbNGquPHzNmDPbt24fw8HC0atUKx48fx/Lly/HTTz8hNTXV4n158+ZNvPDCC/jtt98AAM2aNUNwcDBOnjyJL774At9++y2WLl2KLl26VHielStXYv/+/bj77rthMBhQt25d8cuPI/X/+eefePHFF3H58mUEBASgVatWKC4uxh9//IE//vgDmzZtwtKlSxEUFCQ+prCwEE8//TSOHTsGmUyG6OhoBAQE4MSJE1i2bBm+/fZbfPrpp2jRooX4GJPJhOTkZGzatAkAEB0dDYVCga+//hq//vor5HK2sZAHE4ioRp05c0ZQqVSCSqUSzpw5Y/O8y5cvCw888ICgUqmEt956S9z//vvvi4+fNWuWYDKZBEEQBJ1OJwiCIGzYsEFQqVTCkCFDLK7373//W1CpVMLjjz8unD17VtyflZUldOrUSVCpVMK3334r7n/hhRcElUolDB8+XMjNzRX35+fnCxqNRlCpVMLo0aPF/UajUejevbugUqmETZs2WTz3119/LcTExAitW7eu9O9cnvnvqFKphEWLFgkGg0EQBEHQ6/XCqFGjBJVKJTz88MPi/tLSUiE2NlZQqVTCjh07Klzvk08+EVQqlZCSkmLX85tfr1WrVgmCIAi//vqrWM/69estzrX1mm/ZskVQqVRC586dLV6T0tJS4YMPPhCP5eXlicd+++03QaVSCV27drW41vPPPy+oVCph2LBhQn5+vsVzdOjQQaztt99+E4+Vf68MGjRIOHXqlHgsPT1diImJqfDzKv/+bNOmjfDVV1+J77H8/Hxh8ODBgkqlEkaNGmVR36RJkwSVSiX07NlT+PPPP8X9V65cEV5++WVBpVIJsbGxwoULF8RjEydOFJ/rs88+E/eb38uO1K/X64WuXbsKKpVKGDdunKDX68Vj//vf/4S4uDhBpVIJycnJFvU/+eSTgkqlEgYOHCicPHlS3H/+/HnhqaeeElQqlaBWq4WbN2+Kx7RaraBSqYT7779f2Lt3r7j/8OHDQq9evcTaN2zYIBB5Gn49IfIggiDg6tWr2LFjB5599lkUFhaifv36eOaZZyqc6+/vj/Hjx0MmkwEAwsLCbF73jz/+wJ49exAYGIilS5daDBDp2rUrxo4dCwD49ttvAZT1Ofz5558RGhqKpUuXomnTpuL5TZo0wfvvv4/AwEBkZGTg6NGjAACdTodLly6hQYMG6N+/v8XzDx48GI8//jgGDBhQ4dZeVfr164dx48aJLXoNGzbEe++9h4YNG+LkyZPYvn27+HoMGDAAAMQWmfK+++47AGUthY544IEHMGzYMADAvHnzoNPpqnzM+++/DwCYMmUKHn74YXG/v78/XnrpJfTv3x9FRUVVtigePnwY27ZtQ2BgID744AOLlje1Wl3lFDV+fn5YsmQJ7r77bnFf37590bNnTwBl7w9r/vOf/+CJJ54Q32Pmn71CocDOnTtx7NgxAGW3Z8231RcvXoxOnTqJ1wgJCcH8+fNxzz33QK/XW/27hoeH4z//+Y+4fft7uTr1p6am4vLly1CpVFi4cCEaNmwoHuvQoQOWLl0KmUyG77//HidPngQA/P7778jKykKdOnXw4YcfWrTyhYeH44MPPsCdd96Jv//+Gxs2bBCPffrppwDKbmWXb9ls06aNRWs6kSdiACRyo4SEhAoTQd9///3irVdzALN2C1ilUlncwqqMOST16dMHSqWywvHhw4dj06ZNWLZsGQAgIyMDANCtWzerwVKpVKJbt24AgB07dgAou71Zv359XLlyBVOmTMGJEycsHvPmm29iwYIF4i1be40YMaLCvvr16yMxMREA8Msvv4j7zeEuPT3doo/i8ePHcezYMTRu3Fis2xGvvfYa7rjjDly+fBlvvfVWpeeePn0aJ06cgFwutwh/5Q0cOBDAP6+hLZmZmQCA3r17o3HjxhWODxs2zGI+ydu1bt3a6nRDLVu2BFB2u98ajUZTYV9UVBS6du0K4J/X/pdffoHJZMI999yDjh07VniMn5+feK1t27ZVON6xY0cxZDpbv/m9/vjjj1udSqdt27a47777IAi
"text/plain": [
"<Figure size 720x576 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"_, ax = plt.subplots(figsize=(10, 8))\n",
"sns.boxplot(x=\"Neighborhood\", y=\"SalePrice\", data=df, ax=ax)\n",
"ax.set_title(\"Prices by Neighborhood\", fontsize=24)\n",
"ax.set_xlabel(\"Neighborhood\", fontsize=18)\n",
"ax.set_xticklabels(ax.get_xticklabels(), rotation=45)\n",
"ax.set_ylabel(\"House Price\", fontsize=18);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The 28 neighborhoods are encoded as factor variables."
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"neighborhood = pd.get_dummies(df[\"Neighborhood\"], prefix=\"nhood\")\n",
"df = pd.concat([df, neighborhood], axis=1)\n",
"del df[\"Neighborhood\"]"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"new_variables.extend(neighborhood.columns)\n",
"interesting_variables.extend(neighborhood.columns)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(2898, 28)"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[neighborhood.columns].shape"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>nhood_Blmngtn</th>\n",
" <th>nhood_Blueste</th>\n",
" <th>nhood_BrDale</th>\n",
" <th>nhood_BrkSide</th>\n",
" <th>nhood_ClearCr</th>\n",
" <th>nhood_CollgCr</th>\n",
" <th>nhood_Crawfor</th>\n",
" <th>nhood_Edwards</th>\n",
" <th>nhood_Gilbert</th>\n",
" <th>nhood_Greens</th>\n",
" <th>nhood_GrnHill</th>\n",
" <th>nhood_IDOTRR</th>\n",
" <th>nhood_Landmrk</th>\n",
" <th>nhood_MeadowV</th>\n",
" <th>nhood_Mitchel</th>\n",
" <th>nhood_Names</th>\n",
" <th>nhood_NoRidge</th>\n",
" <th>nhood_NPkVill</th>\n",
" <th>nhood_NridgHt</th>\n",
" <th>nhood_NWAmes</th>\n",
" <th>nhood_OldTown</th>\n",
" <th>nhood_SWISU</th>\n",
" <th>nhood_Sawyer</th>\n",
" <th>nhood_SawyerW</th>\n",
" <th>nhood_Somerst</th>\n",
" <th>nhood_StoneBr</th>\n",
" <th>nhood_Timber</th>\n",
" <th>nhood_Veenker</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" nhood_Blmngtn nhood_Blueste nhood_BrDale nhood_BrkSide \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_ClearCr nhood_CollgCr nhood_Crawfor nhood_Edwards \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_Gilbert nhood_Greens nhood_GrnHill nhood_IDOTRR \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 1 0 0 0 \n",
"\n",
" nhood_Landmrk nhood_MeadowV nhood_Mitchel nhood_Names \\\n",
"Order PID \n",
"1 526301100 0 0 0 1 \n",
"2 526350040 0 0 0 1 \n",
"3 526351010 0 0 0 1 \n",
"4 526353030 0 0 0 1 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_NoRidge nhood_NPkVill nhood_NridgHt nhood_NWAmes \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_OldTown nhood_SWISU nhood_Sawyer nhood_SawyerW \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_Somerst nhood_StoneBr nhood_Timber nhood_Veenker \n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 "
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[neighborhood.columns].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Nominal Features\n",
"\n",
"This section investigates the rest of the nominal variables with regard to which realizations / encoding might be a useful predictor."
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Alley Type of alley access to property\n",
"Bldg Type Type of dwelling\n",
"Central Air Central air conditioning\n",
"Condition 1 Proximity to various conditions\n",
"Condition 2 Proximity to various conditions (if more than one is present)\n",
"Exterior 1st Exterior covering on house\n",
"Exterior 2nd Exterior covering on house (if more than one material)\n",
"Foundation Type of foundation\n",
"Garage Type Garage location\n",
"Heating Type of heating\n",
"House Style Style of dwelling\n",
"Land Contour Flatness of the property\n",
"Lot Config Lot configuration\n",
"MS SubClass Identifies the type of dwelling involved in the sale.\n",
"MS Zoning Identifies the general zoning classification of the sale.\n",
"Mas Vnr Type Masonry veneer type\n",
"Misc Feature Miscellaneous feature not covered in other categories\n",
"Roof Matl Roof material\n",
"Roof Style Type of roof\n",
"Sale Condition Condition of sale\n",
"Sale Type Type of sale\n",
"Street Type of road access to property\n"
]
}
],
"source": [
"print_column_list(set(NOMINAL_VARIABLES) - set([\"Neighborhood\"]))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Alleys\n",
"\n",
"Almost no house has access to an alley."
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEICAYAAACXo2mmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzs3WdgVFXawPH/9JnMZNIgCUkIJHSE0EIACSK9CUgHFSwg6iIrKrsBlK6Ii4IrKAqIIChIl46IID2g9N4TAqT3yfS574e8jGbBEEhCKOf3KXPn3HtPwnCfOe05MkmSJARBEAShBMjLugKCIAjCo0MEFUEQBKHEiKAiCIIglBgRVARBEIQSI4KKIAiCUGJEUBEEQRBKjAgqgiAIQokRQUUQBEEoMSKoCIIgCCVGWdYVuN+aNGlCcHBwWVdDEAThoXLt2jViY2PvWO6xCyrBwcGsWrWqrKshCILwUOnZs2eRyonuL0EQBKHEiKAiCIIglBgRVARBEIQS89iNqQiCINwLu91OQkICFoulrKtSqrRaLSEhIahUqns6XwQVQRCEIkhISMDT05PKlSsjk8nKujqlQpIk0tLSSEhIICws7J6uIbq/BEEQisBiseDn5/fIBhQAmUyGn59fsVpjoqUiCMJDJ8uaRbolHa1Si7fGG51Sd1/u+ygHlJuK+zuKloogCA+dn+N+ptuabnRe2ZkkU1JZV0f4CxFUBEF4qDhcDo4kHcn/WXIQnxNfaHlJku5Hte6ruXPnEh0djdVqBWDgwIFcvHiRmTNnsmTJkjKtm+j+EgThoaKUK3mzwZukWlIJ8AigTrk6ty2XYclg+bnl5NnzGFR7EL463/tc09Kzdu1aOnfuzIYNG4q80v1+ES0VQRAeOhUMFfik5SeMihqFr/b2wWLv9b3MPDyTb058w8rzK+9zDUtPbGwsoaGh9O/fn++///5vy3366acMGDCAfv36sWnTJnJycmjbti1OpxOAadOmsXHjxhKvnwgqgiA8lDzVnnioPP72fW+Nt/vnvws8D6Ply5fTp08fwsPDUavVHD169JYyv/32GwkJCSxZsoTvvvuOr776CkmSaNSoEbt378bpdLJz507atm1b4vUT3V+CIDyS6pSrw7cdvsXitFDH7/ZdZA+brKwsdu7cSXp6OosWLSI3N5fFixffUu7cuXOcPHmSgQMHAuBwOLh27Rp9+vRh0aJFuFwunnzySdRqdYnXUQQVQRAeSV4aLyIDI8u6GiVq7dq19OrVi5iYGADMZjNt2rTBx8enQLnw8HCaNGnC5MmTcblcfPnll1SsWBGDwcCUKVNYsWIFI0aMKJU6iu4vQRCEh8Ty5cvp3r27+7VOp6N9+/bExcUVKNe6dWs8PDx47rnn3AP5BoMBgK5du5Kamkq1atVKpY6ipSIIgvCQWLt27S3HJkyYwIQJEwAYPny4+/jo0aNvew2n00mfPn1KpX4ggoogCMJjY9SoUSQnJ/PVV1+V2j1EUBEEQXhMTJ06tdTvIcZUBEEQhBIjgoogCIJQYkql+2vVqlWsXr0aAKvVyunTp1m0aBEffvghCoWC6Oho3nzzTVwuFxMmTODs2bOo1Wo++OADKlWqxJEjR4pVVhAEQSgjUimbMGGCtHTpUqlbt25SXFyc5HK5pCFDhkgnT56UtmzZIsXExEiSJEmHDx+WXn/9dUmSpGKXLUyPHj1K8bcVBOFRderUqbKuwn1zu9+1qM/OUu3+On78OBcuXKBLly7YbDZCQ0ORyWRER0ezd+9e/vjjD1q0aAFA/fr1OXHiBLm5ucUuKwiC8Ki6evUq//znP+nbty+DBg1i6NChnD9//o7nxcbG8vbbb5d6/Up19tfXX3/NsGHDyM3NdS+8AdDr9Vy9evWW4wqFokTKCoIglLU1h68xbctZrmeaCfLW8a8ONXi2QXCxrmk2m3njjTeYPHkyDRo0AODYsWNMmjSJRYsWlUS1i63Ugkp2djaXL1+madOm5ObmYjKZ3O+ZTCaMRiMWi6XAcZfLhcFgKHZZQRCEsrTm8DVGrzqO2Z6fEfhappnRq44DFCuwbN++naZNm7oDCkBERATfffcdo0aNIjMzk8zMTMLCwoiKiqJHjx6kpKTw2muvuVO7lLZS6/46ePAgzZo1A/LTA6hUKuLj45Ekid27dxMZGUnDhg3ZuXMnAEeOHKF69eolUlYQBKEsTdty1h1QbjLbnUzbcrZY101ISCA0NNT9+o033mDgwIF07NiRxMREmjZtytKlSxk8eLB7stRPP/10X/dcKbWWyuXLlwkJCXG/njhxIiNHjsTpdBIdHU29evWoW7cue/bsoX///kiSxJQpU0qkrCAIQlm6nmm+q+NFFRgYyIkTJ9yvZ8+eDUDfvn0JDAwkLCwMgKpVq+J0Orl27RobN25kwYIFnD59ulj3LqpSCypDhgwp8Lp+/fosW7aswDG5XM6kSZNuObe4ZQVBEMpSkLeOa7cJIEHeumJdt02bNsydO5cjR45Qv359AOLi4khMTESj0SCTydxle/fuzbRp06hatep9HRYQix8FQRBK2L861ECnUhQ4plMp+FeHGsW6rl6vZ/bs2SxcuJAXXniB/v37M2bMGEaPHk1wcMGxmo4dO7J79+5STR55OyL3lyAIQgm7ORhf0rO/AEJCQpgxY8Ytxzt16lTgtU6n4/fff3e/btKkCU2aNCn2/e9EBBVBEIRS8GyD4BIJIg8b0f0lCIIglBgRVARBEIQSI4KKIAiCUGJEUBEEQRBKjAgqgiAIQokRs78EQRAeErGxsYwYMYKqVasC+ftVde3alYEDB5Zxzf4kgoogCEJpOLYMtk2CrATwCoE24yCib7Ev27RpU/c6FZvNRseOHenevfsDk0xXBBVBEISSdmwZrPsn2P8/VUvW1fzXUCKB5abc3Fzkcjlnzpxh1qxZSJKEyWTi008/ZdeuXWRnZ/Pmm29is9no1q0ba9eu5ccff2T9+vXIZDI6d+7MoEGDSqw+IIKKIAhCyds26c+AcpPdnH+8mEFl//79DBw4EJlMhkqlYuzYsZw/f55p06YREBDAV199xebNm3nuued47rnnGDZsGNu2baNVq1bEx8ezceNGfvjhBwBefvlloqOjCQ8PL1ad/koEFUEQ/pbdaUelUJV1NR4+WQl3d/wu/LX766ZffvmFDz/8EA8PD5KSkmjYsCFeXl7UqlWLP/74g9WrVxMTE8PZs2e5fv06L730Un51srKIi4sTQUUQhNKVa8sl9kYsW+K28FrEa4R5hSGXicmiReYVkt/ldbvjpWDs2LFs3boVg8FATEwMkiQB+SnxFy5ciMVioUqVKtjtdqpWrcq8efOQyWQsWLCAGjWKl+Tyf4lPiSAIt8ix5fD2jrfZdHkTI7aPIN2SXtZVeri0GQeq/0lzr9LlHy8F3bp14/nnn6d///6YTCaSk5MBiIqK4ty5c+5NumrWrEmzZs0YMGAAPXv25MqVKwQEBJRoXURLRRD+QpIk0sxpIAMfjQ8KueLOJz2CFHIFepWeXHsu5XXlUcrEo+Ku3Bw3KeHZX3+XaXj06NF/e86WLVsKvB4yZMgt+12VJPFJER5LVoeVTGsmMpkMX60vSnn+f4WE3AQGbRqES3KxoOMCwrzC7ngtu9OOUq4ssEHSw85X68uKbiu4kHGBOuXq4K31LusqPXwi+pboTK+Hhej+Eh5LZzPO0mlVJ7qt6UZCzp+Dpzuu7iDVnEq6JZ1Nlzfd8TpXc67y/p73WX1hNdnW7NKs8n2llCsJNgTTsmJL/HR+ZV0d4SFSai2Vr7/+ml9//RW73c6AAQOIiopi1KhRyGQyqlWrxvjx45HL5cyaNYsdO3agVCoZM2YMERERxMXFFbusIBRmy5Ut2F127C47vyf9TmWvygC0CG7B7COzcUpO2lVqV+g1Mi2ZjNk1hiMpR9h4eSORAZEYNQ/GAjRBKCul8vSNjY3l8OHDLFmyhEWLFpGYmMhHH33EiBEj+OGHH5AkiW3btnHy5EkOHDjA8uXLmT59OhMnTgQodllBuJM+1ftQQV+BMK8
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Alley\", s=15, data=df);"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"del df[\"Alley\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Building Type\n",
"\n",
"The type of a building clearly affects the valuation. The two types of townhouses as well as the 2-family condo and duplex type are summarized into a single category. This makes sense a) semantically, and b) by looking at the two sub-clusters in the scatter plot."
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEICAYAAACXo2mmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzs3Xd4k1X7wPFvdtIk3S1dUNqykbIKFCl7OkBElKF99UXlVRFBZYnK8KeCC1Rw4gYFQXELOACRjWwqoKxC90hX0uw8vz8CgUqFlrbM87kur6s9OXlyEkvu56z7yCRJkhAEQRCEWiC/1A0QBEEQrh4iqAiCIAi1RgQVQRAEodaIoCIIgiDUGhFUBEEQhFojgoogCIJQa0RQEQRBEGqNCCqCIAhCrRFBRRAEQag1ykvdgIutU6dOREdHX+pmCIIgXFEyMzPZsmXLeetdc0ElOjqa5cuXX+pmCIIgXFGGDBlSpXpi+EsQBEGoNSKoCIIgCLVGBBVBEASh1lxzcyqVcTqdZGRkYLPZLnVTrkharZaYmBhUKtWlboogCJeYCCpARkYGRqORhg0bIpPJLnVzriiSJFFYWEhGRgZxcXGXujmCIFxiYvgLsNlshISEiIByAWQyGSEhIaKXJwgCIHoqPiKgXDjx2QkXW3G5g0KzA51aQZCfCp1afJVdLkRPRRCEK86Pe7PpPec3ur24huwS0Uu+nIigchnYsmULnTt3JjU1lbvuuos77riDP//8E4DU1FQOHz5cof7hw4dJTU2t0rUff/xxUlNT6dWrF/379yc1NZX/+7//q/X3IAgXi8vtYXt6kfdnj8SxwvJz1pck6WI0SzhJ9BkvE8nJycydOxeA9evX89prr/HOO+/U+LqvvPIKAPPmzSM0NJQRI0bU+JqCcCkpFXIe69uU/DI7kQE6WscEVFrPZHHw2ZZ0yh1u7k2JI8SgucgtvTaJoHIZKi0tJTg4uEJZXl4eEyZMQJIkwsLCfOVr1qzh9ddfx2AwEBAQQNOmTRk7dux5X+Pxxx9n4MCB9OjRg8OHD/PCCy8wYMAAfvnlFywWC0VFRYwZM4b+/fuzdetW5s6di0KhoH79+jzzzDNi+bBwSUUH6Zg/sh1KhQy/f5lP+f2vfF7+6S8A9BolY3o2uphNvGaJ4a/LxObNm0lNTWXYsGE88cQT3HTTTRUef/vtt7n55ptZuHAhffr0AcDtdvPss8+yYMECFi5ciEZT9Tux22+/na+++gqAL774gqFDhwJgtVr58MMP+eCDD5g9ezZOp5Onn36a+fPns2jRIurVq+d7niBcSv461b8GFIBA/ekbnxCD+mI0SUAElctGcnIyCxcu5PPPP+err77iscceq7BM99ixYyQmJgLQrl07AEwmEwaDgdDQUACSkpKq/HqdOnXi8OHDmEwmNmzYQM+ePQHo0KEDcrmc0NBQ/P39ycvLIy8vj/Hjx5OamsqGDRvIzMysrbctCHWmdUwgn49O5uP/dqBfi4hL3Zxrhhj+ugydChJnSkhIYOfOnTRr1oy9e/cCEBISgsViwWQyERwczO7du6uc1l8mkzFo0CCeffZZunTp4hvOSktLA6CgoACz2UxERAQRERG8+eabGI1Gfv31V/z8/GrpnQpC3Qn0U9MpPuRSN+OaI4LKZeLU8JdcLsdisTBlyhS0Wq3v8QcffJCJEyfy448/EhMTA4BcLufpp5/m/vvvx2g04vF4iI2NrfJrDhkyhB49evDNN9/4ygoKCrj77rspKytj+vTpKBQKnnzySUaPHo0kSej1el588cXae+OCIFxVRFC5DHTq1IlNmzZV+tjChQt9P7///vtnPX7gwAEWL16MWq1mwoQJREZGVnqdyibv3W437du3JyEhwVfWoUMHJkyYUKFeSkoKKSkpVXovgiBc20RQucLp9XruuOMOtFot0dHR3HjjjVV63k8//cS8efOYMWNG3TZQEIRriggqV7i77rqLu+66q9rP69evH/369atQVtWT3QRBEP6NWP0lCIIg1BoRVARBEIRaUyfDX8uXL/dtkLPb7ezfv5+FCxfy3HPPoVAoSElJ4eGHH8bj8TBjxgwOHjyIWq3m2WefJTY2ll27dtWoriAIgnCJSHVsxowZ0pIlS6RBgwZJ6enpksfjke677z4pLS1NWrVqlTR58mRJkiRp586d0gMPPCBJklTjuudy6623nlX2559/1uZbviaJz1AQrm6VfXdWpk6Hv/bu3cuhQ4e46aabcDgcNGjQAJlMRkpKChs3bmT79u107doVgDZt2rBv3z7MZnON616pdu/e7cs+PG/ePF9W4VP/7dmz5xK3UBAE4dzqdPXXO++8w5gxYzCbzRgMBl+5Xq/nxIkTZ5UrFIpaqVvXvt6ZyUurDpJVbCUqUMfE/k0Z3LZqO9n/zYIFC/j222/R6XS+snvuuUdkFRYE4YpSZ0GltLSUo0ePkpycjNlsxmKx+B6zWCz4+/tjs9kqlHs8HgwGQ43r1qWvd2byxPK9WJ1uADKLrTyx3Js2pSaBpUGDBsybN49Jkyads95ff/3F7NmzcbvdFBUVMWPGDNq1a0ffvn1p27Ytx44do3PnzpSVlbFnzx7i4uJ46aWXLrhdgiAI1VFnw1/btm2jc+fOABgMBlQqFcePH0eSJNavX09SUhLt2rVj3bp1AOzatYsmTZrUSt269NKqg76AcorV6ealVQdrdN3+/fujVFaM8R999JFv6OvUwVqHDh1i8uTJfPzxx9x///0sX74cgMzMTMaPH8+nn37KJ598wsiRI1m2bBnbt2+ntLS0Rm0TBEGoqjrrqRw9etSXowpg5syZTJgwAbfbTUpKCq1bt6ZVq1Zs2LCB4cOHI0kSzz//fK3UrUtZxdZqlddEZcNf4eHhvPnmm2i1WiwWi2/4LzAwkKioKAD8/Pxo1Mh7doTRaMRut9d62wRBECpTZ0Hlvvvuq/B7mzZtWLp0aYUyuVzOM888c9Zza1q3LkUF6sisJIBEBeoqqV37nnvuOV5++WUSEhJ4/fXXfWnoZTLZRXl9QRCEcxFpWqppYv+mFeZUAHQqBRP7N70orz9o0CDGjRuHv78/ERERFBUVXZTXFQRBqAqZJEnSpW7ExTRkyBDfPMQp+/fvp3nz5lW+Rl2s/rrSVfczFAThylLZd2dlRE/lAgxuG33NBxFBEITKiNxfgiAIQq0RQUUQBEGoNSKoCIIgCLVGBBVBEASh1oigIgiCINQasfrrMuF0Opk6dSqZmZk4HA4efPBB/vzzT77//nvCw8N99SZOnEhiYuJ5r5ednc3s2bMxmUzYbDZatmzJ1KlTUavVdfk2BEG4xomgciH2LIVfn4GSDAiIgd7TIPGOGl3y22+/JTAwkJdeeoni4mIGDx7MbbfddkGZit1uNw899BAzZszwpa159tlnef3115kwYUKN2ikIgnAuIqhU156l8N0j4DyZqqXkhPd3qFFgGTBgAP379wdAkiQUCsW/1p0yZQpKpZKsrCwcDgc33ngja9asITs7mzfffJOcnBwiIiIq5EGbOHEiHo8HgA8++IAffvgBpVJJUlISEydOZN68eWRkZFBYWEhWVhZPPPGE7/waQRCEqhJzKtX16zOnA8opTqu3vAb0ej0GgwGz2cwjjzzC+PHjgcozFQNER0fzwQcfEB8fT0ZGBgsWLKBfv36sXr2avLw86tevX+H6Go0GnU7HwYMHWbFiBUuWLGHJkiWkp6ezZs0aANRqNe+99x5PPvkkH330UY3ejyAI1ybRU6mukozqlVdDdnY2Y8aMYeTIkQwcOJB58+b96/BXixYtAPD39yc+Pt73s8PhICoqip9++qlC/aKiInbu3Indbqd169aoVCoAkpKS+PvvvwF8aVYiIiJwOBw1fj/Clc/p9qBSiHtPoerEX0t1BcRUr7yKCgoKGDVqFBMnTmTo0KHnrX+urMRt2rQhIyPDd/ywJEnMnz+fP/74g/j4ePbs2YPL5UKSJLZt20ZcXNx5rylcW8w2Jyv35fD40t38nVuGx3NNpQgUakD0VKqr97SKcyoAKp2
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Bldg Type\", s=15, data=df);"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"# Unify the two townhouse types into one.\n",
"df[\"Bldg Type\"] = df[\"Bldg Type\"].apply(\n",
" lambda x: \"Twnhs\" if x in (\"TwnhsE\", \"TwnhsI\") else x\n",
")\n",
"# Unify the two kinds of 2-family homes.\n",
"df[\"Bldg Type\"] = df[\"Bldg Type\"].apply(\n",
" lambda x: \"2Fam\" if x in (\"2FmCon\", \"Duplx\") else x\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"build_type = pd.get_dummies(df[\"Bldg Type\"], prefix=\"build_type\")\n",
"df = pd.concat([df, build_type], axis=1)\n",
"del df[\"Bldg Type\"]"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"new_variables.extend(build_type.columns)\n",
"interesting_variables.extend(build_type.columns)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>build_type_1Fam</th>\n",
" <th>build_type_2Fam</th>\n",
" <th>build_type_Twnhs</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" build_type_1Fam build_type_2Fam build_type_Twnhs\n",
"Order PID \n",
"1 526301100 1 0 0\n",
"2 526350040 1 0 0\n",
"3 526351010 1 0 0\n",
"4 526353030 1 0 0\n",
"5 527105010 1 0 0"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[build_type.columns].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Air Conditioning\n",
"\n",
"Air conditioning clearly increases the valuation (\"steeper\" regression line with respect to the overall living area)."
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEICAYAAACXo2mmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzsnXd4U2Ubh+8k3U0nu2W1TJENAkpBWSKoiAwRFBUEHAxxoqiAfirgwIUDt4KCLBEUB4JsAUURZSjIpowO6Eh3k++PX0spo7Q0hQLvfV1cTU9OTt4EeJ/zrN9jcblcLgwGg8FgcAPW870Ag8FgMFw8GKNiMBgMBrdhjIrBYDAY3IYxKgaDwWBwG8aoGAwGg8FtGKNiMBgMBrdhjIrBYDAY3IYxKgaDwWBwG8aoGAwGg8FteJzvBZxrWrZsSXh4+PlehsFgMFxQ7N+/n7Vr157xvEvOqISHhzN37tzzvQyDwWC4oOjRo0ehzjPhL4PBYDC4DWNUDAaDweA2jFExGAwGg9u45HIqpyIzM5N9+/aRlpZ2vpdywePj40PlypXx9PQ830sxGAznAWNUgH379hEQEED16tWxWCznezkXLC6Xi7i4OPbt20dERMT5Xo7BYDgPmPAXkJaWRpkyZYxBKSYWi4UyZcoYj89guIQxnkoOxqC4B/M9Gs4JqUcgOQa8/MA3VD8NpQLjqRgMhguPTfPgrSvg9UaQuP98r8ZwHMaolBK2bdvGkCFD6N+/Pz179uSNN97A5XIV+TpffvklmZmZhTp37dq1PPjgg6d8buHChTRu3JhDhw4dO/bee++xcePGIq/JYHAr2VmwN6ez25kF8TsLPv8s/h8Zzh5jVEoBiYmJPPTQQ4wePZqpU6cyc+ZM/v33X2bMmFHka02ZMgWn01nsNc2aNYv+/fszc+bMY8eGDBlCw4YNi31tg6FY2Dyg3ZNQowM06Q/hTU99niMOlr8Ei58FR8y5XeMljMmplAIWL15My5YtqV69OgA2m42JEyceK8t95ZVX+O2333A6ndx111106dKF/v37U7duXbZt20ZycjKvv/46q1evJiYmhgcffJA777yTl19+GU9PT2655RZ8fHz4/PPPycrKwmKxMHny5NOuZ+/evSQkJDB48GB69OjBvffei6enJ48//jhdu3YlNjaWOXPm4HQ6GTFiBFdeeeW5+JoMhjyCq0Dvj8HqAV7+pz7nvyWw5Dk99vKHto+cu/VdwhhPpRRw+PBhqlSpku+Yv78/Xl5eLFu2jH379jF9+nQ+++wz3n33XRITEwFo2LAhn3zyCa1bt+bbb7+ld+/elCtXjldffRWA9PR0vvjiC7p3786uXbt47733mD59OjVr1mTlypWnXc/s2bPp2bMngYGBNG7cmEWLFp10TmBgINOnTzcGxXD+8Ak6vUEB8AvJe+xfruTXYwCMp1IqCAsLY/PmzfmO7d27l4MHD/Lvv/+yadMm+vfvD0BWVhb79ysxWa9ePQAqVqxIbGzsSdc9vlekTJkyjBo1Cn9/f3bs2EHjxo1PuZbs7GwWLFhAeHg4S5YsISEhgWnTptG1a9fTXttgKJWENYO7FkJWKoQ1Od+ruWQwRqUU0K5dO6ZMmULfvn2pWrUqmZmZTJgwgauuuorIyEhatmzJ//73P5xOJ2+//fZJXs3xWCyWYzkVq1WOaFJSEm+88QZLly4FYMCAAactAli2bBn169fnjTfeOHasc+fObN26Nd95udc2GEotfiFQvfX5XsUlhzEqpQC73c6ECRN46qmncLlcOBwO2rVrR79+/QBYt24d/fr1IyUlhY4dO2K32097rebNmzNkyBCGDh2a7/pNmzalT58+eHh4EBgYyOHDh6lcufJJr585cya9e/fOd6xXr158/vnnbvq0BoPhYsbiOpu61QuYHj16nDRPZcuWLVx22WXnaUUXH+b7NBguPk61d54KE8MwGAwGg9swRsVgMBgMbsMYFYPBYDC4DWNUDAaDweA2SqT6a+7cuXz11VeAGvC2bNnC1KlTef7557HZbERFRTFs2DCcTifjxo3jn3/+wcvLi+eee45q1aqxYcOGYp1rMBgMhvOEq4QZN26ca8aMGa5u3bq5du/e7XI6na5Bgwa5Nm3a5Prhhx9co0aNcrlcLtcff/zhuvfee10ul6vY5xbEzTfffNKxzZs3u/MjX/KY79NguPg41d55Kko0/PXXX3+xfft2rr/+ejIyMqhatSoWi4WoqChWr17N+vXradOmDQCNGzfm77//Jjk5udjnXoisXbuWZs2aceDAgWPHXn755UKV8BkMBkNpoUSNypQpUxg6dCjJycn5Gvb8/f1JSko66bjNZnPLuSXNvD/203rCEiIe/5bWE5Yw7w/3zHPw8vLiiSeeOCvJe4PBYCgNlJhRSUxMZOfOnbRq1Qq73Y7D4Tj2nMPhIDAw8KTjTqfTLeeWJPP+2M8Tc/9i/9FUXMD+o6k8MfcvtxiWVq1aERQUZLrXDQbDBUuJGZVff/31mIKt3W7H09OTPXv24HK5WLlyJc2bN6dp06YsX74cgA0bNlC7dm23nFuSvPTDP6RmZuc7lpqZzUs//OOW648bN45PPvmE3bt3u+V6BoPBcC4pMe2vnTt35tOWeuaZZ3jkkUfIzs4mKiqKRo0a0aBBA1atWsWtt96Ky+XihRdecMu5JUn00dQiHS8qISEhjB49mlGjRtG06WmGDxkMBkMppcSMyqBBg/L93rhx43xTBEFKt88+++xJry3uuSVJWLAv+09hQMKCfd32Hu3bt2fRokV89dVXPProo267rsFgMJQ0pvmxiDzauQ6+nrZ8x3w9bTzauY5b3+fJJ5/Ex8fHrdc0GAyGksZI3xeR7k3CAeVWoo+mEhbsy6Od6xw7fra0bNmSli1bHvvdbrfz888/F+uaBoPBcK4xRuUs6N4kvNhGxGAwGC5GTPjLYDAYDG7DGBWDwWAwuA1jVAwGg8HgNoxRMRgMBoPbMEbFYDAYDG7DGJVSwogRI5gyZcqx35OTk+ncuTNbt249j6syGAyGomGMytmwcSa8Wh/GBevnxuJ39I8bN44ZM2awfft2AF588UX69OlD3bp1i31tg8FgOFcYo1JUNs6EBSMgYS/g0s8FI4ptWEJDQ3n66ad56qmnWLt2Lfv27WPAgAHuWbPBYDCcI4xRKSqLn4XME7S/MlN1vJi0b9+eiIgInnjiCcaPH4/FYin2NQ0Gg+FcYjrqi0rCvqIdLyLdu3cnLS2NChUquOV6BkOxyM4Am9f5XoXhAsJ4KkUlqHLRjhsMFyLpSbBlAXx1L8RsBafzfK/IcIFgjEpR6TAGPE+Quff01XGD4WIh9SjM7A9/z4EZt0FK7PlekeECwYS/ikrDW/Rz8bMKeQVVlkHJPV5MTlQrNpxjXC5wHNZjv7JgtRV8/sWKzQO8AiA9EQIqXbrfg6HIGKNyNjS8xW1GxHCeyEyD1HjAAv7ltIkCHNkFH3UGVzYM+B7K1jrztbIzwOoJF1NhhV85uG8VHNoM4U3Br8z5XpHhAsGEvwyXJof+htcbwVtXyJDk8s93kHwIHLHw1+wzXyd+J8y7D/6YqpDRxYLNA4KrQp3rwF7+fK/GcAFRYp7KlClTWLJkCZmZmfTt25cWLVrw+OOPY7FYqFWrFmPHjsVqtTJ58mSWLl2Kh4cHo0ePpmHDhuzevbvY5xYVl8tlSnjdgMvlOt9LKByb5srDyM6A3SuhbE0dr9UJlk0AZzbUu6nga6TEw1f3wN61MkDVo8A3uOTXbjCUYkrEU1m7di1//PEH06dPZ+rUqRw8eJDx48czcuRIvvjiC1wuF4sXL2bTpk2sW7eOWbNmMWnSJJ555hmAYp9bVHx8fIiLi7twNsRSisvlIi4u7sIYg9xsIARVgbK1oWanvOMhkTB0HQxff+bQl9VT1wDw8AaPC+BzGwwlTIl4KitXrqR27doMHTqU5ORkHnvsMWbOnEmLFi0AaNu2LatWrSIiIoKoqCgsFgthYWFkZ2cTHx/Ppk2binVup06dTru2U1G5cmX27dtHTEy
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Central Air\", s=15, data=df);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Use a new variable name to cleary show that the variable's *dtype* is changed from *str* to *int*."
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"df[\"air_cond\"] = df[\"Central Air\"].apply(lambda x: 1 if x == \"Y\" else 0).astype(int)\n",
"del df[\"Central Air\"]"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"new_variables.append(\"air_cond\")\n",
"interesting_variables.append(\"air_cond\")"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>air_cond</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" air_cond\n",
"Order PID \n",
"1 526301100 1\n",
"2 526350040 1\n",
"3 526351010 1\n",
"4 526353030 1\n",
"5 527105010 1"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[[\"air_cond\"]].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### \"Proximity to various Conditions\"\n",
"\n",
"The columns *Condition 1* and *Condition 2* have the same realizations and can be regarded as \"tags\" given to a house indicating the nearby presence of a) a major street, b) a railroad, or c) a park.\n",
"\n",
"The default tag \"Norm\" (implying no \"condition\") is given to 86% of the houses (this realization should therefore not be regarded as a tag!).\n",
"\n",
"From the comparison of the grouped scatter plots below, it can be assumed that the proximity of a major street decreases the valuation (lower regression slope through the cloud of blue and orange dots). Therefore, a factor variable *major_street* is extracted indicating the proximity of an \"artery\" or \"feeder\" street.\n",
"\n",
"Further, a factor variable *railway* is extracted as a relatively high proportion of the houses has such a tag. From the plots, a railway seems to not affect the valuations strongly.\n",
"\n",
"Lastly, a factor variable *park* is extracted. From the plots, this does not seem to affect the valuation much."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"List the \"raw\" realizations:"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Feedr 174\n",
"Artery 97\n",
"RRAn 48\n",
"PosN 43\n",
"RRAe 29\n",
"PosA 24\n",
"RRNn 11\n",
"RRNe 6\n",
"dtype: int64"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(df[\"Condition 1\"].value_counts() + df[\"Condition 2\"].value_counts()).sort_values(\n",
" ascending=False\n",
")[1:]"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"# Condition 2 is only filled with anything other than \"Norm\"\n",
"# if Condition 1 already has such a tag.\n",
"assert not ((df[\"Condition 1\"] == \"Norm\") & (df[\"Condition 2\"] != \"Norm\")).any()"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"86.0"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 86% of the houses actually have no tag.\n",
"round(100* (df[\"Condition 1\"] == \"Norm\").sum() / df.shape[0])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"From a simple scatter plot it is hard to see any significant impact by a predictor."
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEICAYAAACXo2mmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzs3WdgVMXawPH/1uymbHonkITeQgtNQAUpItIFggqogOJFroAFUEGKcONV0CtFAUWUKiBKFZSiVANKb6EIhFDS226273k/5GUxJkAgG0Jgfp/Y2TnnTJbkPHumPCOTJElCEARBEFxAXt4NEARBEB4cIqgIgiAILiOCiiAIguAyIqgIgiAILiOCiiAIguAyIqgIgiAILiOCiiAIguAyIqgIgiAILiOCiiAIguAyyvJuwL3WvHlzwsPDy7sZgiAIFcrly5dJSEi4bb2HLqiEh4ezevXq8m6GIAhChdKrV68S1RPdX4IgCILLiKAiCIIguIwIKoIgCILLPHRjKoIgPBisVivJycmYTKbybsoDRaPRUKlSJVQq1V0dL4KKIAgVUnJyMl5eXkRGRiKTycq7OQ8ESZLIyMggOTmZqKiouzqH6P4SBKFCMplM+Pv7i4DiQjKZDH9//1I9/YmgIghChZNjzsHmsGF1WHFIjvJuzgOltEFaBBVBECqcny/+TGp+KmezzmK1W8u7OcLfiKAiCEKFYnPYOJRyCAAJCYvDcsv6kiSVWVvOnDnDyy+/zIABA+jduzefffZZqa83atQoEhIS2LFjB9999x0A3333HVarlZMnTzJr1qxSt9toNBIXF8e5c+dKfa5/EgP1giBUKEq5ktcavca50+fw0figVWqLrWdz2MgyZeGQHPhr/VHKXXu7y83NZfTo0cycOZPIyEjsdjuvv/46y5cvp3///qU+/6OPPur899y5c+nRowe1a9emdu3apTrv0aNHef/990lJSSltE4slgoogCBVOqGcomZpMQtxDUMgVxdbRW/Wk5qcCIJfJCXQPdGkbtm7dSvPmzYmMjARAoVDw4YcfOqfixsfH8+effwLw9NNPM2jQIMaOHYtareby5cukpqYSHx9P3bp1WbJkCStXriQwMJCMjAwAVq9ezV9//UWVKlVIS0tj1KhRDBo0iOXLl/PJJ5+wdu1avvnmG9RqNZGRkUyePJl169bx22+/YTKZSEpKYujQoUXSq1gsFmbPns3bb7/t0s/jOtH9JQhChSSXyW8aUACUshvfmV39lAKQmppKREREoTIPDw/UajXbt28nOTmZFStWsHTpUtavX09iYiIAYWFhfPXVVwwYMIDvvvuO9PR0vv32W1asWMGcOXOwWguPEfXp04fAwEA++eQTZ1lWVhYzZ87km2++YdmyZXh5eTm7yvR6PXPnzuXzzz9n3rx5RdrdpEkTQkNDXf1xOImgIgjCA0mj1BDpHUllXWW81F4uP39YWBjXrl0rVHbp0iX279/PuXPniI2NRSaToVKpaNCggXP84nr3VUhICBaLhaSkJKpVq4ZarUalUhETE3Pba1+6dIlq1arh6ekJQNOmTTlz5gwAtWrVAiA0NBSL5dbjTWVBBBVBEB5ISrkSD5UHXmqvMnlSadu2LTt37iQpKQkoWOEfHx/P6dOnqVq1qrPry2q1cvDgQapUqQIUnbIbGRnJ2bNnMZlM2O12Tp48WeRaMpkMh+PG1OlKlSpx7tw58vPzAdi3b59zsWJ5r9sRYyqCIAh3wdPTk/j4eN577z0kScJgMNC2bVueffZZZDIZ+/bto1+/flitVp588knq1q1b7Hn8/PwYOnQocXFx+Pn5odUWnXgQGxvLyy+/zPDhw53HjBgxgoEDByKXy6lcuTJvvvkmGzZsKNOfuSRkUlnOt7sP9erVS+ynIggPgJMnT5Z6JpRQvOI+25LeO0X3lyAIguAyIqgIgiAILiOCiiAIguAyIqgIgiAILlMms79Wr17NDz/8AIDZbObkyZMsWrSIqVOnolAoaN26Na+99hoOh4OJEyeSmJiIWq3mgw8+oEqVKhw6dKhUdQVBEIRyIpWxiRMnSsuXL5e6desmXbx4UXI4HNKQIUOk48ePS5s3b5bGjBkjSZIkHTx4UBo2bJgkSVKp695Kz549y/CnFQThXjlx4kR5N+GBVdxnW9J7Z5l2fx09epSzZ8/SpUsXLBYLlStXRiaT0bp1a/bs2cOff/5JmzZtAGjYsCHHjh1Dr9eXuq4gCMK9NH/+fFq3bo3ZbC7yntlsZuXKleXQqvJRpkFl7ty5DB8+HL1e70wnAAX5cfLy8oqUKxQKl9QVBEH4px8PXqZV/Daixm6gVfw2fjx42WXnXrt2LU899VSxiw/T0tIeqqBSZivqc3NzOX/+PC1atECv12MwGJzvGQwGdDodJpOpULnD4cDT07PUdQVBEP7ux4OXGbf6KEarHYDL2UbGrT4KQI9G4aU6d0JCApUrVyYuLo633nqLXr16MWDAAPz8/MjJyaFSpUqcPXuWWbNmMWjQIN59912ysrIAeO+996hZsyZt27YlOjqaqlWrsn37dlauXImPjw9Lly7FYDAwdOjQ0n0A91CZPans37+fli1bAgXpDFQqFUlJSUiSxK5du4iNjaVx48bs2LEDgEOHDlGjRg2X1BUEQfi7jzYnOgPKdUarnY82J5b63CtXrqRPnz5ER0ejVqs5fPgwUJDufuHChQwbNoxq1arx2muv8cUXX9CiRQsWLVrElClTmDhxIgBXr17l448/5p133qFr167OJ561a9fSs2fPUrfxXiqzJ5Xz589TqVIl5+tJkybx5ptvYrfbad26NQ0aNKB+/frs3r2buLg4JEli2rRpLqkrCILwd1eyjXdUXlI5OTns2LGDzMxMFi1ahF6vZ/HixQDOBI9/d/r0aX7//Xd++ukn5/EAvr6++Pr6AtC7d29Gjx5N06ZNCQgIICAgoFRtvNfKLKgMGTKk0OuGDRuyYsWKQmVyuZzJkycXOba0dQVBEP4uzEfL5WICSJhP8btGltTatWvp3bs3Y8aMAQq26X3iiSfw9fV1ZguWy+XODMPR0dF069aNrl27kpGR4RxrkctvdBqFh4fj5eXFF198wTPPPFOq9pUHsfhREIQH3ludaqJVFd7QS6tS8FanmqU678qVK+nevfuNc2q1dOzYkYsXLzrL/P39sVqtfPTRRwwbNoyffvqJAQMGMGTIEKpXr17sefv27csff/zhnPFakYjU94IgPPCuD8Z/tDmRK9lGwny0vNWpZqkH6deuXVukbOLEic6xEgA3NzfWrFnjfD1nzpwix+zevbvQa7vdTu/evVEobr6z5f1KBBVBEB4KPRqFlzqI3AszZswgISGBL774orybcldEUBEEQbiPjB49urybUCpiTEUQBEFwGRFUBEEQBJcRQUUQBEFwGRFUBEEQBJcRA/WCIAh3KTk5mW7dulG3bl1nWfPmze96X6dWrVoVmV5c0YigIgjCw+HICtg6GXKSwbsSPDEBYvqW+rTVqlVj0aJFLmjgg0EEFUEQHnxHVsC6f4P1/1O15FwqeA0uCSz/NH36dP744w8cDgcvvPACnTt3JjExkQ8++AAAHx8fpk2bhru7O+PHj+fs2bNERERgsVgAGDt2LNnZ2WRnZzN37ly8vb1d3sayIoKKIAgPvq2TbwSU66zGgvJSBpWzZ88yYMAA5+s+ffqQnJzMsmXLMJvN9O3bl1atWjF+/HimTZtGtWrVWLlyJV9++SW1a9fGbDazYsUKrly5wubNm53nadGiBS+88EKp2lYeRFARBOGmrHYrKoWqvJtRejnJd1Z+B/7Z/TV//nyOHz/uDDQ2m43Lly9z7tw5Jk2aBIDVaiUyMhKtVktMTAwAYWFhhIaGOs9TXJbjikAEFUEQitBb9CRcTWDzxc28EvMKUd5RyGUVeLKod6WCLq/iyl0sOjqa5s2bM2XKFBwOB3PmzCEiIoKoqCg+/PBDwsLC+PPPP0lLS0OpVLJhwwYGDRpESkoKKSkpzvNcz3Jc0YigIghCEXmWPEb9OgoJiZMZJ/n
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Condition 1\", s=15, data=df);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"However, plotting the groups seperately reveals different slopes."
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABZ4AAAQ+CAYAAABfgSrEAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzs3XlYlXX+//HXYVXZFCvNNa2ANFFBMBM3yqyxzKVGmrLJFkfLKTVDJ1wiRRPL0bRyqqlpXMpcmqzUUsclNZUwy1S0Rs0VNVH2/dy/P/ye82P3AAcOy/NxXV7lfT73fb+58foALz7n/TEZhmEIAAAAAAAAAAA7cXJ0AQAAAAAAAACAuoXgGQAAAAAAAABgVwTPAAAAAAAAAAC7IngGAAAAAAAAANgVwTMAAAAAAAAAwK4IngEAAAAAAAAAdkXwDLs5ePCgpk2bpnvvvVedO3dWUFCQIiIitGzZMuXl5Tm6PP3rX/+Sv7+/1qxZYz02YsQI+fv7KyUlxXosNTVVS5cuLXRuSeMcKSMjQ+Hh4YqJianW+y5cuFD+/v4aMWJEqWNSUlKuOQaA7Zhbq4+j5taiRo4cKX9/f40ePbrC1/jxxx+1a9cuO1YF1F3Ms9XHUfPsmjVr5O/vf80/mzZtqta6Cnr22Wfl7++v06dPO6wGwJ6YW6tPTZxbO3XqpF69eum5557T/v37i50bHh5e4nkBAQHq1q2bBg8erEWLFikrK6vQeWQStY+LowtA7Wc2m7Vw4UK98847cnV1Ve/evdWvXz+lpqZqx44devXVV7Vhwwa99957atCggaPLLWTIkCEKDQ2Vu7u79diAAQN0/fXX67HHHitznKPk5eVp4sSJOnPmjMNq2Lt3r1auXKmHH37YYTUAdR1za/WqCXOrJJ0/f167d+9Ww4YNtX37dl24cEE33HBDua6xefNmPffcc5oyZYruvPPOKqoUqP2YZ6tXTZhnQ0NDFRoaWurr7dq1q8ZqgLqJubV61dS5NSUlRT/99JM2bdqkrVu36qOPPlK3bt2KnTt27NhCf8/Pz9fZs2e1efNmLVy4UD/++KPeffddmUymQuPIJGoPgmdU2uLFi/X222+rS5cuevPNN9WsWTPrazk5OXr55Zf1xRdfaPLkyZo/f74DKy1u6NChxY5dunRJ119//TXHOcKVK1c0YcIE7dy509GlaO7cuerXr5+uu+46R5cC1EnMrdWnJs2tn3/+ucxms55++mktXLhQa9asKffK56SkJBmGUUUVAnUH82z1qSnzbGhoqP761786tAagrmNurT61YW5dsGCB3n77bb3++uv65JNPir1e2nnnz5/X0KFDtX37du3cuVNhYWHFxpBJ1A602kClHD9+XG+//bZ8fX313nvvFfqiIklubm6aPXu2WrZsqQ0bNuh///ufgyqt/b788kv94Q9/0M6dO9WzZ0+H1tKhQwclJydr5syZDq0DqKuYW6tPTZpbJWnt2rVq0qSJnnrqKXl4eGj16tWEyEAVYJ6tPjVtngVQdZhbq09tmVvHjBkjV1dX/fDDD8rMzLT5vGbNmmn48OGSpO+++67Y62QStQfBMyrlP//5j3Jzc/Xoo4/K29u7xDGurq6aOnWqZs2apSZNmhR6bd26dYqIiFCXLl3UtWtXRURE6Kuvvip2DX9/f02ePFn79u3TiBEj1LVrV4WEhGjcuHEl9kHbtGmThg8fri5duqhPnz565513ZDabi40r2Jtpz5498vf3lyQlJCTI399fCxcuLDbOwmw2a/ny5Ro8eLACAwMVHByskSNHFvtt4+nTp63X2rx5sx566CEFBgaqR48emjJlipKSkq7xlK9asWKFGjRooMWLF+svf/mLTecUreFafwr2tyrLM888o3bt2mn9+vXasmWLTefY+rwsn4fly5drwoQJCgwMVFhYmOLj4639nE6cOKHY2FiFhYWpc+fOioiI0IEDB2Q2m/Xee+8pPDxcXbp00UMPPaQ9e/aU61kBNQFza/2cW3/++Wf98ssv6tmzpxo2bKjw8HCdPHlSu3fvLjZ25cqV8vf31zfffKORI0fq9ttvV3h4uEaMGKEpU6ZIkmbMmCF/f38lJiZaz9u5c6eeeOIJBQUFqUuXLoqIiNDGjRsLXfu3336Tv7+/Fi1apOjoaHXp0kV33HGHvv76a+u/maIMw1B4eLjCw8MJylErMM/Wz3m2PAzD0Mcff6whQ4YoMDBQISEhGj16tA4dOlTi+PXr1ysiIkJdu3ZVUFCQ/vznP5c4f+fn5+v999/XgAEDFBgYqAceeEDffPNNqR/7ggULNHPmTHXp0kXdu3fX+vXr7f6xAvbC3MrcWpSbm5s8PT0lSbm5ueU6t2nTppKurpQvqryZhOXzuWbNGq1atUoPPPCAOnXqpN69e2vOnDnlCsVRPrTaQKV8++23kqRevXqVOa5fv37Fjs2ZM0cffPCBrr/+et1///2SpK1bt2rChAk6dOiQXnrppULjDx48qMcff1zBwcF65JFH9NNPP2n9+vX6+eeftW7dOrm5uUm6+sP4lClT1LRpUw0aNEiZmZlavHixvLy8yqyxZcuWGjt2rBYtWqTrrrtOERERpfaAM5vNGj9+vDZs2KDWrVtr2LBhysjI0ObNm/XUU09p6tSpevTRRwuds2XLFr399tvq27evunfvrp07d2rlypX69ddfS3zLSVHPPvusgoKC5O7uXu4w1dvbu1jvpJLcdtttNl3Pzc1NM2bM0IgRIxQdHa3Q0FB5eHiUOr4iz+utt95So0aN9Nhjj+nXX39Vx44drRtljRs3TsnJyRo4cKDOnTunr7/+Wk8//bTCw8O1detWDRgwQNnZ2Vq7dq1Gjx6tDRs2FPttO1CTMbfWz7n1P//5jyTpvvvukyQNHDhQX3zxhVatWqUePXqUeE50dLSaNWumESNG6OzZsxo4cKA8PDy0ZcsW9e7dW4GBgdZv9j/55BO98soratq0qQYOHKhGjRpp48aNGjt2rCZOnKhnnnmm0LU//vhjmUwmPfLIIzp27Ji6dOmiFi1aaOPGjYqOji7U1/D777/XmTNnNGbMmGI9+ICaiHm2fs6z5TFp0iR9/vnnuvXWWxUREaHMzExruPyPf/yj0LxseSt5y5YtNWTIEJlMJm3YsEEjR47Ua6+9pgcffNA6dvLkyVq7dq1uueUWDR8+XCdOnNDzzz9vDViK+vTTTyWp0FwM1FTMrcytRf3888+6fPmyWrRoUeovI0pj+fcUEBBQ7LXyZhIWS5cu1dGjR3XPPfeoV69e2rhxoz744ANduHBBb7zxRrnqg40MoBJ69Ohh+Pn5GVeuXCnXeXFxcYafn58xePBg49KlS9bjly5dMu6//37Dz8/P2Lt3r/W4n5+f4efnZ7z33nvWY2az2XjyyScNPz8/Y9u2bYZhGEZycrIRHBxs9O7d2zh37px17E8//WQEBgYafn5+xurVq63HH3vsMcPPz89ITk4udK9BgwYVqrfouM8++8zw8/MznnzySSM9Pd067uTJk0bPnj2NDh06GCdPnjQMwzBOnTplrX/dunXWsTk5OcbAgQMNPz8/49dffy3X89u9e7fh5+dnzJw5s1znVdabb75p+Pn5GRs3bjQMwzCmTp1q+Pn5GTNmzLCOSU5ONvz8/IzHHnvMeqw8z8vysXXu3Nm4cOFCiffv169foc/ZhAkTDD8/PyMoKMhITEy0Hl+4cKHh5+dnLFu2zL4PAqhizK31a241DMPIzc017rjjDqNbt25Gdna2YRhXP5bQ0FCjU6dOxf4tfPrpp9b5MDMzs8TXlixZYj12+vRpo2PHjsb9999vXL582Xo8IyPDePjhh43bbrvN+rxOnDhh+Pn5Gf7+/sbRo0cLXXvevHmGn5+fsWHDhkLHLV8Pjh07VvmHAVQD5tn6Nc+uXr3a+v3pm2++WeKfU6dOWcevW7fO8PPzMyZMmGDk5uZaj588edIIDQ01evXqZZ2rf/zxR8Pf39947LHHjIyMDOvYpKQko3///kbnzp2t/1a+++476/O3nG8YhrF06VLrs7b
"text/plain": [
"<Figure size 1440x1080 with 9 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"street = [\"Artery\", \"Feedr\"]\n",
"railway = [\"RRNn\", \"RRAn\", \"RRNe\", \"RRAe\"]\n",
"park = [\"PosA\", \"PosN\"]\n",
"plot = sns.lmplot(\n",
" x=\"Gr Liv Area\", y=\"SalePrice\", col=\"Condition 1\", hue=\"Condition 1\",\n",
" col_order=[\"Norm\"] + street + railway + park,\n",
" data=df, robust=True, col_wrap=4, ci=None, truncate=True, scatter_kws={\"s\": 15},\n",
")\n",
"# Adjust font sizes.\n",
"for ax in plot.axes:\n",
" ax.set_title(ax.get_title(), fontsize=20)\n",
" ax.set_xlabel(ax.get_xlabel(), fontsize=16)\n",
" ax.set_ylabel(ax.get_ylabel(), fontsize=16)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Extract factor variables *major_street*, *railway*, and *park*."
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"df[\"major_street\"] = 0\n",
"df.loc[\n",
" df[\"Condition 1\"].isin(street) | df[\"Condition 2\"].isin(street),\n",
" \"major_street\",\n",
"] = 1"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"df[\"railway\"] = 0\n",
"df.loc[\n",
" df[\"Condition 1\"].isin(railway) | df[\"Condition 2\"].isin(railway),\n",
" \"railway\",\n",
"] = 1"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"df[\"park\"] = 0\n",
"df.loc[\n",
" df[\"Condition 1\"].isin(park) | df[\"Condition 2\"].isin(park),\n",
" \"park\",\n",
"] = 1"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"del df[\"Condition 1\"]\n",
"del df[\"Condition 2\"]"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
"new_variables.extend([\"major_street\", \"railway\", \"park\"])\n",
"interesting_variables.append(\"major_street\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Show summary of counts:"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"major_street 264\n",
"railway 94\n",
"park 60\n",
"dtype: int64"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[[\"major_street\", \"railway\", \"park\"]].sum()"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>major_street</th>\n",
" <th>railway</th>\n",
" <th>park</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" major_street railway park\n",
"Order PID \n",
"1 526301100 0 0 0\n",
"2 526350040 1 0 0\n",
"3 526351010 0 0 0\n",
"4 526353030 0 0 0\n",
"5 527105010 0 0 0"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[[\"major_street\", \"railway\", \"park\"]].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Exterior\n",
"\n",
"This dimensions tells the main material with which the houses are made of. The category is too diverse and the various grouped scatter plots did not reveal differing slopes. For simplicity, this variable is dropped.\n",
"\n",
"This variable actually also represents tags that could be associated with a house (possibly up to two different tags)."
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEVCAYAAAAo63jjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzs3Xd4FNX6wPHvlG3Z3fQCIZSEJgICkaYEELGjqChSJDbsggIqICI2VFDQewXkYsECNlDw51WsWOhFBaVJkxZSgPTdbJ2Z3x8LC7m0QBIIcD7P43OTyZmZs9zdefe090iGYRgIgiAIQhWQT3cFBEEQhLOHCCqCIAhClRFBRRAEQagyIqgIgiAIVUYEFUEQBKHKiKAiCIIgVBkRVARBEIQqI4KKIAiCUGVEUBEEQRCqjHq6K3CqdejQgTp16pzuagiCIJxRdu/ezfLly49b7pwLKnXq1GHOnDmnuxqCIAhnlF69elWonOj+EgRBEKqMCCqCIAhClRFBRRAEQagy59yYiiAIZ4dAIEBWVhZer/d0V+WsYrVaSUlJwWQyndT5IqgIgnBGysrKwul00qBBAyRJOt3VOSsYhkF+fj5ZWVmkpqae1DVE95cgCGekMk8Z0THRIqBUIUmSiIuLq1TrTwQVQRDOOLvcuynwFZHtzSOoB093dc4qlQ3SIqgIgnBGCepB/rNpBgEjgCvgxqOJMZWaRAQVQRDOKKqs0jmxAxD6Vm2RzUcsZ+gGRlAP/acbVV6P5cuXc9FFF5GZmRn+7+GHHz5q+Y0bN7Jy5coKX3/QoEEnXTePx0Pfvn3ZunXrMcvNnDnzpO9xNGKgXhCEM06nxHbsKNpOI2cDFEk5cqGggZ7jBwPkWiawHqVcJXTs2JHXXnutQmW///574uPjadeuXYXKT548+aTqtGbNGp5++mny8vKOW3bq1KkMGDDgpO5zNKKlIgjCGcdpcqDICibZhCwd+TFm+HTY30AxyvRTVrdgMEjfvn1ZuHAh+/bto0ePHuzevZu5c+fy3nvv8ddff7FixQr69evHgAEDeOKJJwgEAsyZM4dbb72Vfv36sXTpUjp16gTA+vXrw2UHDhxIdnY2WVlZXHfddWRmZvLWW2+Vu7/f72fKlCmkpaWFj23bto2+ffsyYMAA+vfvT05ODlOnTqW4uJhnnnmmSl+/aKkIgnBWkmwKWDQwQIqs+lYKwLJly8jMzAz/3rVrV+6++24mTJjA/fffT0JCAsOHD6dOnTrceOONxMfH07JlS6666io++ugj4uLi+Ne//sXcuXNRVZXIyEimTp1a7h6jR4/mhRdeoFmzZvz444+MGzeO4cOHs3fvXj7//HPM5vLdfxdeeOFh9VyyZAkXXHABjz/+OL/99hulpaU88MADzJw5UwQVQRCEipBUCRJDD1xJqZ5px0fr/kpJSSE9PZ3Vq1fTpUuXcn8rKChgz549DBkyBACv18vFF19M/fr1j7g2ZM+ePTRr1gyAdu3aMXHixPA9/jegHM3NN9/MW2+9xd13343T6WTo0KEn9DpPhOj+EgThrCUpUrUFlGNZvXo1mzdvpl27dkyfPj1UF0lC13ViYmKoVasWb7zxBjNmzOD++++nY8eOAMjy4Y/kxMRE/v77bwBWrlxJgwYNjlr2aObPn8+FF17I+++/z1VXXcXbb78NhBY7VjXRUhEEQThJ/9v9BfDGG2/w5JNPMnnyZJKTk+nduzft27enRYsWvPzyyzRs2JAnn3ySe++9F8MwsNvtvPzyy+Tk5BzxHmPHjuX555/HMAwUReHFF1884Xq2aNGCESNGMHXqVHRd54knngCgYcOGPPbYY0yYMOHEX/xRSEZ1hKoarFevXmI/FUE4C2zYsCHcLSRUrSP921b02Sm6vwRBEIQqI4KKIAiCUGVEUBEEQRCqjAgqgiAIQpWpltlfc+bMYe7cuQD4fD42bNjAjBkzeOGFF1AUhYyMDAYNGoSu6zzzzDNs3LgRs9nM2LFjqV+/PqtXr65UWUEQBOE0MarZM888Y3zyySdGz549jR07dhi6rht33323sW7dOuO7774zRowYYRiGYaxatcq4//77DcMwKl32WG688cZqfLWCIJwq69evP91VOGsd6d+2os/Oau3+WrNmDVu2bKFHjx74/X7q1auHJElkZGSwZMkSfv/9dzp37gxA69atWbt2LS6Xq9JlBUEQ/lfw9/V4n/8P3mEv433+PwR/X19l137rrbfIyMjA5/MdtczIkSNZsGDBYcd//fVXbr/9dm677TZuueUWvvzySwAmTZrExx9/XGV1PFWqdfHjtGnTeOihh3C5XDgcjvBxu93Orl27DjuuKEqVlBUEQThU8Pf1BGd9C4H9G3oVloR+B9QLz6/09b/88kuuueYavv76a3r16nVC5z799NN8+eWXREZG4nK5uP7668PJJM9E1RZUSkpK2LZtGx07dsTlcuF2u8N/c7vdREZG4vV6yx3XdR2Hw1HpsoIgCIcKzltwMKAcEAgSnLeg0kFl+fLl1KtXj759+/L444/Tq1cvPvzwQ7744gtkWaZly5aMHj0agI8++oh33nkHTdN44YUXqF+/Pk6nkw8++IArr7ySRo0a8c0334Rzes2fP59vv/2WoqIiHnnkES699FKuuOIK0tPT2bZtG3FxcUyaNIlAIMDw4cPZs2cPtWvXZuXKlSxatKhSr+tkVVv318qVK7nooosAcDgcmEwmdu7ciWEYLFq0iLZt25Kenh5uDq5evZomTZpUSVlBEIRyCktO7PgJmD17Nr179yYtLQ2z2cyff/7JnDlzeOqpp/j0009JS0sjGAwFtPT0dN5//33uueceXnnlFQCmT5+Ox+Nh2LBhZGRkMG3atHBOrqSkJN5//31GjRoV7grbtWsXjzzyCJ9++ikFBQWsWbOGTz/9lJSUFD755BMGDRpEfn5+pV/Xyaq2lsq2bdtISUkJ//7ss8/y2GOPoWkaGRkZtGrVipYtW7J48WL69u2LYRjhnDaVLSsIglBOTOSRA0hM5Xo2iouLWbBgAQUFBcyYMQOXy8XMmTN56aWXmD59Oi+//DKtW7cOB4kDX3rbtGnDyy+/THFxMdnZ2Tz++OM8/vjj5OXlMXjwYJo3bw4Q/t/4+Hi83tC2yTExMdSuXRuA2rVr4/P52Lp1azgbcsOGDYmNja3U66qMagsqd999d7nfW7duzaxZs8odk2WZ55577rBzK1tWEAThUOo1XcqPqQCYVNRruhz9pAr48ssvuemmmxgxYgQQ2sa3e/fuOBwOnn32WSwWCwMHDmTVqlUA/PXXX6Snp/Pbb7/RuHFj/H4/Q4cOZdasWcTHx5OQkEB8fHy4+0uSDs+wfKRjTZo0YdWqVVx22WXs3LmTwsLCSr2uyhBZigVBOOsdGDcJzlsQarHERKJe06XS4ymzZ8/m5ZdfDv9us9m44ooriIuLo3///tjtdpKSkmjVqhVz5szhzz//5LbbbkOSJF588UUSEhJ48sknue+++1BVFU3TuOSSS8jIyAgHooq4+eabGTlyJLfeeivJyclYLJZKva7KEFmKBUE4I4ksxQf98ccflJWVkZGRwfbt27n77rv58ccfT/p6lclSLFoqgiAIZ7i6desybNgwJk+eTDAYZMyYMaetLiKoCIIgnOESEhKYMWPG6a4GIBJKCoIgCFVIBBVBEAShyoigIgiCIFQZEVQEQRCEKiOCiiAI54TgH9/je+kmfCM643vpJoJ/fF9l165IluIjycrK4pZbbjnsuNfrZeTIkdx1113069ePhx9+OLygsaYnmxRBRRCEs17wj+/R5oyHojzAgKI8tDnjqyywHJqluCp8/vnnxMfHM336dD7++GPS09OZMmVKlVy7uokpxYIgnPW076ZB4H9aEQEf2nfTUNOvqNS1K5qleOTIkRiGQU5ODmVlZYwfPx6LxUJBQQEPPvgge/fupWnTpowdO5b4+Hg+++wz0tPTad++PZmZmeH8YX6/n0cffZTs7Gyio6N5/fXX+c9//kNWVhb5+flkZ2fzxBNPhPefOtVES0UQhLN
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Exterior 1st\", s=15, data=df);"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEVCAYAAAAo63jjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzs3WdgFNXawPH/zGzNbnqDEEpCEwGBSFMQROx6UVCkaMTeIIqogIgoiDRBvQJ60SuKoFTB14IFsVAURAUvTUCkhRBK+m62zsz7YclCpCVkQxI4vy+G2TMzZ2N2nj3tOZKu6zqCIAiCEAJyVVdAEARBOH+IoCIIgiCEjAgqgiAIQsiIoCIIgiCEjAgqgiAIQsiIoCIIgiCEjAgqgiAIQsiIoCIIgiCEjAgqgiAIQsgYqroC51qHDh2oU6dOVVdDEAShRtm/fz9r1649Y7kLLqjUqVOHxYsXV3U1BEEQapRevXqVqZzo/hIEQRBCRgQVQRAEIWREUBEEQRBC5oIbUxEEoeby+XxkZmbidruruirnLYvFQnJyMkaj8azOF0FFEIQaIzMzk/DwcBo0aIAkSVVdnfOOruvk5OSQmZlJSkrKWV1DdH8JglBjuN1uYmNjkSQJv+ZH1dSqrtJ5RZIkYmNjK9QSFEFFEIQaRZIkvKqXvc797Hdl49f8VV2l80pFW4AiqAiCUKPous5hTw5u1YPD58SlivGV6kSMqQiCUKNIkoTdYKPAW4QkSZhl00nLabqOqmuAjizJKFLFv0OvXbuWwYMH06hRo+Cx6Oho3njjjZOW37ZtG4WFhbRr165M1x80aBDTpk0rd70+//xzZs2ahaIoNGnShBdffBFZPvP7nTt3LkeOHCEjI6Pc9zwVEVQEQahx7AYbjcJTkCRQJOWkZbyan11FRwCderZYbEZzSO7dsWNHXnvttTKV/eabb4iLiytzUDmbgOJ2u3n99df57LPPsFqtDBkyhO+//57u3buX+1qhIIKKIAg1jiIrKJw8mJRw+b3o6AA4/J6QBZWT8fv93HXXXQwcOJBmzZoxYMAA3n77bZYsWYLRaKR58+a43W5ee+01FEWhbt26jBkzhs8++4yPP/4YTdN4/PHHefrpp1m9ejVbtmzhpZdeQlEUzGYzL730Epqm8eijjxIVFUWXLl148MEHATCZTMybNw+r1Rqsi9lsZvHixfz444+43W727t3Lgw8+SK9evfj1118ZN24cERERKIpC69atQ/q7EEFFEITzkt1oIczrQkMn2hQWsuuuWbOG9PT04L+7du3KAw88wOTJk3nkkUeIj49n6NCh1KlTh549exIXF0fLli25/vrr+eijj4iNjeX1119nyZIlGAwGIiIieOutt0rdY+TIkbz88ss0a9aMb7/9lgkTJjB06FAOHz7Mxx9/jMl0rMtPlmXi4uIAmD17NsXFxXTq1IklS5bgcDh499132b17N4888gi9evVi9OjRvPHGG6SkpPDCCy+E7PdSQgQVQRDOS0ZZIdkWDYBBPn2rpjxO1f2VnJxMWloaGzZsoEuXLqVey83N5dChQwwePBgIdFldfvnl1K9f/6TrQQ4dOkSzZs0AaNeuHVOmTAne4/iAUkLTNF555RV27drF1KlTgzO4LrroIgBq166N1+sF4MiRI8F7pqWlsXfv3rP6PZyKmP0lCMJ5yyArIQ0op7NhwwZ27NhBu3btmDlzJhCYVKBpGtHR0dSqVYs333yT2bNn88gjj9CxY0eAkw6oJyQk8OeffwKwbt06GjRocMqyAKNGjcLj8fDmm28Gu8FK7v9PiYmJ7Ny5E4CNGzee/Rs+BdFSEQRBKId/dn8BvPnmmzz33HNMmzaNpKQkevfuTfv27WnRogWTJk2iYcOGPPfcczz00EPouo7NZmPSpEkcOHDgpPcYO3YsL730ErquoygK48aNO2V9Nm/ezKJFi2jbti0DBgwA4O677z5l+TFjxjB06FDsdjs2m43IyMiz+C2cmqTruh7SK1ZzvXr1EvupCEINtXXr1mC3kFB5TvZ7LuuzU3R/CYIgCCEjgoogCIIQMiKoCIIgCCEjgoogCIIQMpUy+2vx4sUsWbIEAI/Hw9atW5k9ezYvv/wyiqLQuXNnBg0ahKZpvPjii2zbtg2TycTYsWOpX78+GzZsqFBZQRAEoYrolezFF1/U582bp/fo0UPfs2ePrmma/sADD+ibN2/Wv/76a33YsGG6ruv6+vXr9UceeUTXdb3CZU+nZ8+elfhuBUGoTFu2bKnqKlwQTvZ7Luuzs1K7vzZu3Mhff/3FTTfdhNfrpV69ekiSROfOnfnpp5/47bffuOKKKwBo3bo1mzZtwuFwVLisIAgCgP+3Lbhf+g/uIZNwv/Qf/L9tCdm133nnHTp37ozH4zllmeHDh7NixYoTjv/4448MGDCAu+++mzvuuINPP/0UgKlTpzJ37tyQ1bEqVOrixxkzZjBw4EAcDgd2uz143GazsW/fvhOOK4oSkrKCIAj+37bgX/AV+I5u4pVXGPg3YLj04gpf/9NPP+XGG2/kiy++oFevXuU694UXXuDTTz8lIiICh8PBLbfcQqdOnSpcp+qg0oJKYWEhu3btomPHjjgcDpxOZ/A1p9NJREQEbre71HFN07Db7RUuKwiC4F+64lhAKeHz41+6osJBZe3atdSrV4++ffvyzDPP0KtXLz788EM++eQTZFmmZcuWjBw5EoCPPvqId999F1VVefnll6lfvz7h4eF88MEHXHfddTRq1Igvv/wymNNr+fLlfPXVV+Tn5/PEE09w1VVXce2115KWlsauXbuIjY1l6tSp+Hw+hg4dyqFDh6hduzbr1q1j1apVFXpfoVBp3V/r1q3jsssuA8But2M0Gtm7dy+6rrNq1Sratm1LWlpasGm4YcMGmjRpEpKygiAI5BWW73g5LFy4kN69e5OamorJZOKPP/5g8eLFPP/888yfP5/U1FT8/kBAS0tLY9asWTz44IO88sorAMycOROXy8WQIUPo3LkzM2bMQD+a3CQxMZFZs2YxYsSIYFfYvn37eOKJJ5g/fz65ubls3LiR+fPnk5yczLx58xg0aBA5OTkVfl+hUGktlV27dpGcnBz89+jRo3n66adRVZXOnTvTqlUrWrZsyerVq+nbty+6rgfz21S0rCAIAtERJw8g0RXrzSgoKGDFihXk5uYye/ZsHA4Hc+bMYfz48cycOZNJkybRunXrYJAo+aLbpk0bJk2aREFBAVlZWTzzzDM888wzHDx4kIyMDJo3bw4Q/G9cXBxud2Cr5OjoaGrXrg0EMg57PB527twZzIbcsGFDYmJiKvS+QqXSgsoDDzxQ6t+tW7dmwYIFpY7JssyYMWNOOLeiZQVBEAw3dik9pgJgNGC4scupTyqDTz/9lNtuu41hw4YB4HK56N69O3a7ndGjR2M2m7n//vtZv349AP/73/9IS0vj119/pXHjxni9Xp588kkWLFhAXFwc8fHxxMXFBbu/TpZZ+GTHmjRpwvr167n66qvZu3cveXl5FXpfoSKyFAuCcF4qGTfxL10RaLFER2C4sUuFx1MWLlzIpEmTgv+2Wq1ce+21xMbG0r9/f2w2G4mJibRq1YrFixfzxx9/cPfddyNJEuPGjSM+Pp7nnnuOhx9+GIPBgKqqXHnllXTu3DkYiMri9ttvZ/jw4dx5550kJSVhNlfezpblIbIUC4JQY4gsxcf8/vvvFBcX07lzZ3bv3s0DDzzAt99+G5JrVyRLsWipCIIg1EB169ZlyJAhTJs2Db/fz6hRo6q6SoAIKoIgCDVSfHw8s2fPrupqnEAklBQEQRBCRgQVQRAEIWREUBEEQRBCRgQVQRAEIWREUBEE4bzl//0bPONvwzPsCjzjb8P/+zchu3ZZshSfTGZmJnfccccJx91uN8OHD+e+++6jX79+PP7448EFjTUp2aQIKoIgnJf8v3+Dungi5B8EdMg/iLp4YsgCy/FZikPh448/Ji4ujpkzZzJ37lzS0tKYPn16SK59LokpxYIgnJfUr2eA7x+tCJ8H9esZGNKurdC
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Exterior 2nd\", s=15, data=df);"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [],
"source": [
"del df[\"Exterior 1st\"]\n",
"del df[\"Exterior 2nd\"]\n",
"# Also discard the associated ordinal variables.\n",
"del df[\"Exter Cond\"]\n",
"del df[\"Exter Qual\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Foundation\n",
"\n",
"The type of foundation appears to have an effect. However, only three of the six realizations occur in a large number. Factor variables *found_BrkTil*, *found_CBlock*, and *found_PConc* are extracted but not regarded as \"interesting\"."
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEICAYAAACXo2mmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzs3Xd8U1X/wPFPdtO9SymrlCF7WHZZKqAiW6CoBWSo/AQF5ZEhRUCBIgoqKIIiIChTQJYgAsoUkCWjMgpdjO6VNDv390ceIn2opaWtrPN+vXyZ3px770lI8+1Z3yOTJElCEARBEMqA/F5XQBAEQXh4iKAiCIIglBkRVARBEIQyI4KKIAiCUGZEUBEEQRDKjAgqgiAIQpkRQUUQBEEoMyKoCIIgCGVGBBVBEAShzCjvdQX+bS1atCAkJOReV0MQBOGBcvXqVQ4fPnzHco9cUAkJCWH9+vX3uhqCIAgPlN69exernOj+EgRBEMqMCCqCIAhCmRFBRRAEQSgzj9yYSmEsFgvJyckYjcZ7XZWHiouLC5UqVUKlUt3rqgiC8C8RQQVITk7Gw8ODatWqIZPJ7nV1HgqSJJGRkUFycjKhoaH3ujqCIPxLRPcXYDQa8fPzEwGlDMlkMvz8/ETrTxAeMaKl8l8ioJQ98Z4K5SXHlEOmMRMXpQveGm+0Su29rpLwX6KlIgjCA+fnhJ/pvrE7z/7wLCn6lHtdHeEWIqjcJ5KTk2natClRUVHO/+bPn1+m9zCZTDzxxBNFllm9ejUWi4XY2Ngyv78glAWr3crJlJOOx5KVxLzEIstLkvRvVEv4L9H9dR+pUaMGy5cvv6d1WLhwIT179qROnTrUqVPnntZFEAqjlCsZ2WQk6cZ0glyDqO9fv9ByWcYs1l5YS74ln4F1B+Kr9f2Xa/poEkHlPhcTE8OxY8cAeO655xg0aBDjx4/n2WefpV27duzdu5dt27YRExND586dadq0KVeuXMHPz4958+ZhNBoZO3Ysubm5VKlSxXndI0eOMH/+fCRJQq/X8/HHH/PHH3+QlpbGmDFjGDRoEKtWrWLu3Lls2rSJZcuWoVarqVatGtOmTWPz5s389ttvGI1GEhMTGT58eLHTOAhCaQW7B/NR+49QyBS4qlwLLXPw2kHmnZgHgJvKjeENh/+bVXxkiaByH7l06RJRUVHOn3v37k1ycjJr1qzBarXywgsv0LJly388PykpiWXLlhEcHExkZCSnT5/m2LFj1KpVizFjxnDq1ClnQriLFy8ye/ZsgoKC+PLLL9m+fTsjRoxgwYIFzJ07l5MnHd0LWVlZzJs3jw0bNuDu7s6MGTNYvXo1rq6u6HQ6Fi9eTHx8PK+99poIKsK/ykPtUeTz3hpv52NfF9FK+beIoHIf+d/ur6+//prw8HBkMhkqlYpGjRoRFxdX4Jxb+4t9fHwIDg4GIDg4GJPJRHx8PO3btwegUaNGKJWOf/KgoCCmT5+Oq6srKSkpNG3atNA6JSUlUaNGDdzd3QFo1qwZ+/fvp1GjRjz22GPOe5nN5jJ6FwShbNT3r8+SLksw2ozU9yu8i0woe2Kg/j4WFhbm7PqyWCycOHGCqlWrolarSUtLA+DcuXPO8oVN4Q0LC3O2Os6dO4fVagUgOjqaGTNmEBMTQ2BgoDM4yWQy7Ha78/xKlSoRFxdHfn4+4Og2u7mYUUwZFu5nXhovwiuEExESgbeL951PEMqECCr3sY4dO1KpUiX69+9P//796dKlC/Xq1aNv374sXbqUwYMHk5JS9HTKAQMGkJSUxIABA/juu++cKVO6d+/Oiy++SGRkJHq9ntTUVADCw8N55ZVXnEHG19eXUaNGMXDgQPr160dWVhYDBgwo3xcuCMIDSyY9YvPtevfufdt+KrGxsWKmUzkR760gPBwK++4sjGipCIIgCGVGBBVBEAShzIigIgiCIJQZEVQEQRCEMlMu61TWr1/Phg0bAEe+qdjYWJYvX8706dNRKBREREQwcuRI7HY7U6ZM4fz586jVaj744AOqVq3KyZMnS1VWEARBuEekcjZlyhRp1apVUvfu3aWEhATJbrdLw4YNk86ePSvt2LFDGjdunCRJknTixAnptddekyRJKnXZovTq1eu2Y+fOnSvLlyzcQry3gvBwKOy7szDluqL+9OnTXLp0ibfffpulS5c6c09FRERw8OBB0tLSaNu2LQCNGzfmzJkz6HQ6zGZzqcrWrVu3PF9WuTh8+DCjR4+mRo0aSJKE2WxmypQpBV7L+vXruXz5MmPHjnUei4mJ4ezZs6SlpWE0GqlcuTI+Pj6MGDGCXbt2MXLkSNq0acOBAwfuxcsSBOERU65BZeHChbz++uvodDpnmg8ANzc3kpKSbjuuUCjKpGx523jiKrN3nOdatoGK3lr+06U2PZuElPq6LVu2ZO7cuQDs37+fTz/9lIULFxZ5zvjx44HCA45YHyIIwr+t3Abqc3NzuXLlCi1btsTd3R29Xu98Tq/X4+npedtxu91eJmXL08YTV5mw/jRXsw1IwNVsAxPWn2bjiatlep/c3Fx8fX2JiorizTffZPDgwdhsNgAyMzOJjIzk0KFD/3j+4cOHGTNmTJnWSRAE4U7KLagcPXqUVq1aAeDu7o5KpSIxMRFJkti/fz/h4eE0bdqUvXv3AnDy5Elq1apVJmXL0+wd5zFYbAWOGSw2Zu84X+pr//7770RFRdG/f38mTJhA165dAUfK+6VLl6JQKMjIyGDEiBFMmDDB+f4KgiDcL8qt++vKlStUqlTJ+fPUqVMZO3YsNpuNiIgIGjVqRIMGDThw4ACRkZFIksSMGTPKpGx5upZtKNHxkri1++vy5ctERkZStWpVZwJHgH379hEQEFAg6aMgCML9otyCyrBhwwr83LhxY9asWVPgmFwuZ9q0abedW9qy5amit5arhQSQit7aMr2Pv7+/8/Gt2YB79uxJjx49GD16NGvXrsXVtfANigRBEO4FsfixhP7TpTZalaLAMa1KwX+61C71tW92fw0aNIghQ4Ywfvx4XFxcbitXs2ZNunfvzsyZM0t9T0EQhLIkshRT8ky65TX762EkshQLwsOhuFmKxc6Pd6FnkxARRARBEAohur8EQRCEMiOCiiAIglBmRFARBEEQyowIKoIgCEKZEUFFEARBKDNi9td95OLFi8yePRuDwUB+fj7t27enV69e9OjRg3r16iFJEvn5+bz99tu0adOG8ePH8+yzz9KuXbti3yMqKoopU6YQFhZWjq9EEIRHlQgqd+PPNbBrGuQkg1cleHIyNOxXqkvm5uby1ltvMW/ePKpVq4bNZuPNN99k//791KhRg+XLlwOO9DejRo1iy5YtZfFKBEEQypTo/iqpP9fA5jcgJwmQHP/f/IbjeCns2rWLFi1aUK1aNcCR2n/WrFm0bNmyQLmb2YtvZbFYGDt2LJGRkfTt25dt27YBcOrUKfr370/fvn0ZOXIkRqPRec7u3buJiooiNze3VPUWBEG4lWiplNSuaWD5n9xfFoPjeClaK6mpqVSuXLnAMTc3N1QqFZcuXSIqKgqr1UpsbCyTJk0qUG716tX4+vry0UcfodPp6N27Ny1btmTy5MnMmTOHsLAw1q5dS1xcHAA7d+7k6NGjLFy4UOQOEwShTImgUlI5ySU7XkwVK1bk3LlzBY4lJSVx48aNAt1faWlp9OrVq0Da+7i4OFq3bg04thkICwsjKSmJ9PR059hJ3759neUPHTqETqdDqRT//ELRLDYLKoXqXldDeICI7q+S8qpUsuPF1LFjR/bt20diYiLg6NKKiYnhwoULBW/j5YVGo3Fu2AUQFhbGH3/8AYBOp+PChQtUqlSJwMBA4uPjAVi0aBE7d+4EYPLkyURERPDZZ5+Vqs7Cw0tn1rErYRfvHniXuOw47JLYakEoHvGnakk9OdkxhnJrF5hK6zheCu7u7sTExDBp0iQkSUKv19OxY0fatWvHxx9/TFRUFDKZDIPBQL9+/ahSpYrz3H79+hEdHc2AAQMwmUyMHDkSPz8/pk6dysSJE5HL5QQEBDB48GC+/fZ
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Foundation\", s=15, data=df);"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"PConc 1282\n",
"CBlock 1242\n",
"BrkTil 310\n",
"Slab 48\n",
"Stone 11\n",
"Wood 5\n",
"Name: Foundation, dtype: int64"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"Foundation\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABEgAAAFuCAYAAACSti1aAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzs3Xlc1OX6//HXMCyyqWHu4tYpyV1xK1GTNI/mqbRF02/lybJNLNPQMi13o9IUO2p26qillaalpVYupVgqkRxNxU4uuYGoBMKwDDDz+4PfTCCL7OPA+9nDBzGf7ZoPPO5hrrnu6zZYrVYrIiIiIiIiIiLVmIujAxARERERERERcTQlSERERERERESk2lOCRERERERERESqPSVIRERERERERKTaU4JERERERERERKo9JUhEREREREREpNpTgkRKZP369bRq1eqa/7Zt2+boUItl9uzZtGrVin379pX6HBERERw8eND+/b59+2jVqhWzZ88ujxAdZvLkyQX+bNu2bcttt93G6NGj+eGHHwo9/sKFC4SHh3PffffRtWtX2rdvz913381bb73Fn3/+WYnPRMQ5aHzNr6qOr7nt37+fCRMm0L9/f9q1a0fXrl159NFH2bhxI1arNc++Z8+eLXRcvuOOO5g8eTJnzpzJd43g4GC6dOlSoc/DFtuzzz5bodcRqQgaf/OrquNvYX/fdu3alWHDhvHZZ5/lG3uLc76jR48Wuk9xf79s/6Dg+10ZY7mAq6MDEOfUrVs3unXrVuj2Fi1aVGI0jrN69WqmT5/Ou+++a3+scePGjB07lg4dOjgwsvIzZMgQGjdubP8+KyuL2NhYtm7dSkREBLNmzeLBBx/Mc8x3333H5MmTSUlJoVu3btx7770A/PLLLyxfvpwvvviCjz/+mGbNmlXqcxFxBhpfc1T18dVsNjNjxgzWrl2Lt7c3ffr04a677iIhIYEdO3bw0ksv8f333/PWW2/h4pL386zGjRszZMgQ+/dpaWmcPn2aTZs2sXPnTtatW4e/v39lPyURp6fxN0dVH38h79+3WVlZJCUl8dNPPzF16lR+//13XnnllXK71q233srYsWPzPLZt2zZiYmLy/Z1tU9XutzNRgkRKpVu3boSEhDg6DIe7fPlyvseaNGlSpe7NkCFD6N69e77Hhw0bxsiRI5k3bx6DBw/G09MTgMjISMaNG0ft2rX59NNP6dixY57jPvroI2bOnMk///lPtmzZgoeHR6U8DxFnofE1R1UfX6dPn866desIDg5m7ty51K5d274tJSWF5557jq+//pqGDRvy0ksv5Tm2cePGBd6HrVu38vzzzxMeHk5YWFiFPweRqkbjb46qPv5CwX/fms1mhg0bxsqVKxk5cmS5fZB36623cuutt+Z57Ny5c/YESUF/Z1e1++1MNMVGREolMDCQTp06kZKSQlRUFAAWi4XJkydjsVhYvHhxvuQIwP/93/8xePBgzp07x/r16ys7bBERh9u7dy/r1q3j5ptvZuHChXmSIwA+Pj4sXLgQLy8vPv7442JPSxwwYAC+vr5ERkZWRNgiIlWau7s799xzD1arlZ9//tnR4YiDKEEiFc5sNrN06VIGDRpE27Zt6d69O8888wyHDh3Ks59tft5//vOffOd45JFHaNWqFVeuXAH+muscHh7O9u3beeCBB2jfvj233XYbr776KgkJCfnOsW7dOu655x46dOjAXXfdxSeffFJozDt37uSJJ56gR48etGnThh49evDss8/mmV/4yCOPsHjxYgCee+65IucMApw8eZKJEydy++2307ZtW/r160dYWBjJycl59rPNZUxKSuK1116jZ8+etGvXjqFDh/LNN98UcafJc/1r/SvLvFSb+vXrA5CYmAjk/NF/9uxZunfvTmBgYKHHPf3007zyyiv5MuYHDx7k2WefpXv37rRr145BgwaxdOlSzGZznv0eeeQRgoODiYuLY8KECXTv3p0OHTowcuTIAp9XQkICc+bMITg4mPbt2zNgwAAWLFiAyWQq6y0QcSiNrzmcbXxdt24dAE888QTu7u4F7lO7dm1ef/11Zs+ejZub2zVjAzAYDLi4uBR6ztwsFgurV6/mvvvuo3379gQGBvLPf/6TPXv2FLj/tm3beOSRR+jSpQvdu3dn1KhR10zEWK1WXn75ZVq1asWkSZOwWCzFeh4izkDjbw5nG3+vxWg0AuQZR20/wy1btjB69GjatWtH3759C+z5BBATE0PXrl3p2rUrv/76a6niqCo9X5yRpthIhcrIyOCf//wnUVFR3HLLLTz88MNcunSJbdu2sXv3bt555x369etX6vPv3LmTf/3rX9xxxx10796dPXv2sHbtWn7//fc8LxDvvPMOS5YsoXHjxjzwwANcvHiRGTNm4Ofnl++ctikgTZs2ZfDgwbi5uXHo0CG2b9/O3r172bp1K/Xq1bPP/96/fz+DBg2iZcuWhcb53//+l1GjRpGenk7fvn3x9/cnOjqaf//73+zcuZM1a9bk+wTxn//8J4mJiQwcOJDU1FQ2bdrE888/z/vvv09QUFCh17LNWbyWguY7ltTp06eBvxIlu3btAigyPoCbb76Zm2++Oc9j27Zt4/nnn8fFxYV+/fpx4403snfvXhYsWMDu3bv58MMP87xYmUwmRowYgaenJ/fddx+XLl1i8+bNjB49mg0bNtjPf/HiRYYNG8a5c+fo3r07AwYM4MiRIyxdupT//ve/vP/++7i6aigU56PxNYczjq+7d+8Grj1W2vo3Fde2bdtISkpi+PDhRe5nsVgYP348W7duxd/fn/vvv5/U1FS2b9/O6NGjmTp1KiNHjrTvv2zZMubPn0+dOnUYMGAAHh4efPXVV4waNYr33nuPnj17FnidN954g/Xr1zN48GDmzp2br5eKiLPS+JvDGcffopjNZjZt2oSXl1eBscyaNYt69erxyCOPcPbs2QJ7PZ06dYrRo0eTnZ3Nhx9+SNu2bUsdjziG3hVIqezfv5/w8PACtw0ZMoQmTZoA8P777xMVFcXQoUOZOXOm/Y3o4cOHGTFiBC+//DI9evTAx8enVHEcPnyYd955h4EDBwLwwgsvMGTIEA4cOMDx48e56aabOHXqFMuXL+fWW29l5cqV1KxZE8h58XnmmWfynM9sNrNgwQKaN2/Ohg0b8PLysm97/fXXWbNmDTt37mTYsGEMHTqUc+fOsX//fu6+++5CXwizs7MJDQ3FbDazbNkyevfubd/21ltvsXz5csLCwpgzZ06e44xGI1999ZU9httuu42JEyfy+eefF/kCUllzFrdt28bhw4epV6+evYFUXFwcUPImZikpKbzyyivUqFGDlStX0qZNGyCnadbkyZPZtGkTy5cv57nnnrMfk5iYSGBgIAsXLrR/unrzzTezYMECvvzySyZOnAjAm2++yblz53j55ZcZNWqU/fhp06bx6aefsmPHDu66665S3weR8qbxtWqPr+np6SQmJuLj48ONN95YqnOcO3cuz+9IZmYmp06dYvv27fTs2TPPWFmQjRs3snXrVoKCgggPD7ffhzNnzvDwww8zZ84cevfujb+/PydPnmTRokW0bNmSlStXUrduXQAee+wx7rvvPubNm8emTZvyXeNf//oXH374IQMGDOCNN95QckScgsbfqj3+5rZhwwb2798P5CSNk5OT2b17N/Hx8bz55pvccMMN+Y5xdXVl9erV9r57V7tw4QKPP/44qampLF++XA1WnZQSJFIq+/fvtw8qV+vWrZv9BWTDhg14enoyZcqUPJ/St2nThhEjRvDBBx/w7bffMnTo0FLF4e/vb3/xAHBzc+O2227jf//7H+fOneOmm25i69atZGVl8fTTT9tfPAD69u1LUFCQ/ZM8yBnsZ86cSb169fK8eNie15o1awpsXFWUAwcOcOrUKe699948Lx4A48aNY+PGjWzatInXX389T4XEyJEj88TQp08fIOcP48qU+wUEcl5kjx07RkREBK6urnnitpVTent7l+gatk89n332WXtyBHJeiF555RW+++47Pv/883x
"text/plain": [
"<Figure size 1440x360 with 3 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plot = sns.lmplot(\n",
" x=\"Gr Liv Area\", y=\"SalePrice\", col=\"Foundation\", hue=\"Foundation\",\n",
" col_order=[\"PConc\", \"CBlock\", \"BrkTil\"],\n",
" data=df, robust=True, col_wrap=4, ci=None, truncate=True, scatter_kws={\"s\": 15},\n",
")\n",
"# Adjust font sizes.\n",
"for ax in plot.axes:\n",
" ax.set_title(ax.get_title(), fontsize=20)\n",
" ax.set_xlabel(ax.get_xlabel(), fontsize=16)\n",
" ax.set_ylabel(ax.get_ylabel(), fontsize=16)"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [],
"source": [
"foundation = pd.get_dummies(df[\"Foundation\"], prefix=\"found\")\n",
"# Only keep the top 3 realizations.\n",
"del foundation[\"found_Slab\"]\n",
"del foundation[\"found_Stone\"]\n",
"del foundation[\"found_Wood\"]\n",
"df = pd.concat([df, foundation], axis=1)\n",
"del df[\"Foundation\"]"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
"new_variables.extend([\"found_BrkTil\", \"found_CBlock\", \"found_PConc\"])"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>found_BrkTil</th>\n",
" <th>found_CBlock</th>\n",
" <th>found_PConc</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" found_BrkTil found_CBlock found_PConc\n",
"Order PID \n",
"1 526301100 0 1 0\n",
"2 526350040 0 1 0\n",
"3 526351010 0 1 0\n",
"4 526353030 0 1 0\n",
"5 527105010 0 0 1"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[foundation.columns].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Garage Type\n",
"\n",
"As can be expected, the *Garage Type* looks very similar to the above *has Garage* variable. Therefore, it is dropped."
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEICAYAAACXo2mmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzsnXd4VNXWh9+paZOQRgslEHqHiBQJKE0QP5qIFAkXEfAqHVQQkKKIICIqKCCIIlUi5YIgKCjSDGBBlN4ChCSkl5lMJlPO98dOAQkhpJiE7Pd58uTMmT377BnIWbNX+S2VoigKEolEIpEUAuriXoBEIpFIHh6kUZFIJBJJoSGNikQikUgKDWlUJBKJRFJoSKMikUgkkkJDGhWJRCKRFBrSqEgkEomk0JBGRSKRSCSFhjQqEolEIik0tMW9gH+b1q1bU6VKleJehkQikZQqbt68ybFjx+47rswZlSpVqrB169biXoZEIpGUKp555pk8jZPuL4lEIpEUGtKoSCQSiaTQkEZFIpFIJIVGmYupSCSS0oPVaiU8PJy0tLTiXkqZwdnZmapVq6LT6fL1emlUJBJJiSU8PBx3d3dq1KiBSqUq7uU89CiKQlxcHOHh4dSsWTNfc0j3l0QiKbGkpaXh4+MjDcq/hEqlwsfHp0A7Q7lTkUgkJZocDYrDBnYbqNSg1ogfSaFQUAMujYpEIil9mBMh6QaggvL1pVEpQUijIpFISheKAummzAdgt4DOOffxBfj2fePGDRYuXEhUVBTOzs44Ozvz2muvUadOnXzPmV+2b9/Oli1bsFgsXLp0iUaNGgHw/vvvU7FixX99PTkhjYpEIildqFTgXhkcVtDoQeea8zi7DVJjQXGAW3nQPHg2k9ls5uWXX+btt9+mRYsWAJw6dYq33nqLtWvXFuRd5Is+ffrQp08fwsPDmTRpUrGs4X5IoyKRSEofWj141QBU93Z9WZIhJVIcq9TgXumBL/PTTz/Rpk2bLIMC0LRpU7766isALly4wPz587Hb7SQkJDB79mwCAwPp2LEjAQEB1KpVi2effTbHMSEhIaxfv55y5cqh0+no0aMHPXv2ZNasWVy7dg2Hw8GECRNo3br1fdd5+PBhNm/ezMcffwzAwIED+eijjxg8eDDNmjXj+vXr1KlTh3feeQeTycT06dNJSEgAYMaMGdSrV++BP5t7IY2KRCIpnajvc/u6/fn7jb0H4eHhVK9ePevxyy+/jNFoJDo6mjVr1nDp0iWmTJlCvXr12LlzJ1u3biUwMJDIyEi2bt2Kl5cXu3fvvmtMjRo1WLVqFdu3b0ev1zN06FAAQkJC8PLyYt68eSQkJDBkyBB27dp133W2a9eOuXPnkpSURHR0NF5eXlSsWJFbt24xfvx4/P39GT9+PPv27ePPP/+kTZs2DB48mLCwMN544w02btyYr88nJ6RRkUgkDyc6V/CpI9xf93KR3YdKlSrx999/Zz1etmwZAM899xw2m40KFSrw6aef4uzsjMlkwmAwAODl5YWXlxdAjmOuX79OrVq1cHFxAcjaCV24cIHffvuNU6dOAWCz2YiPj8fb2zvXdapUKnr16sW3335LeHg4zz77LACVK1fG398/6xpXr17lwoULhIaG8t133wGQlJSUr8/mXsg6FYlE8nCi0YKTAZw9xHE+6Ny5M7/88gsnT57MOnft2jWioqJQqVS88847jBs3jgULFlC3bl0URQFArc6+teY0pnr16ly5coW0tDQcDkeWEQkICODpp59m7dq1rFy5ku7du+Pp6Zmntfbr1489e/Zw4sQJHn/8cQBu3bpFTEwMAL///ju1a9cmICCAYcOGsXbtWj788EN69eqVr8/mXsidikQikdwDNzc3li1bxqJFi3j//fex2WxoNBreeOMNqlSpQq9evRg/fjweHh5UqlQpK05xOzmN8fb2ZuTIkQwePBhPT08sFgtarZaBAwcyY8YMhgwZgtFoZPDgwXcYqNyoWLEibm5uNG/eHK1W3Nr1ej1vv/02kZGRNGvWjE6dOhEYGMj06dPZvHkzRqORMWPGFOpnhlLG6Nu3b3EvQSKR5JEzZ84U9xKKBKvVqnz66aeKoiiKw+FQBg0apBw/frzA844aNUoJCwvLevzYY4/la56cPve83jvlTkUikUj+ZbRaLWazmb59+6LT6WjatCktW7bM93xpaWkMHjyY1q1bZ8VQigtpVCQSiaQYmDRpEpMmTSqUuZydnXPsaHvkyJFCmf9BkIF6iUQikRQa0qhIJBKJpNAoEvfX1q1b2bZtGwAWi4WzZ8+ydu1a3nnnHTQaDUFBQYwZMwaHw8Hs2bM5f/48er2euXPn4u/vz8mTJws0ViKRSCTFRL5SAx6A2bNnK5s2bVJ69eqlXLt2TXE4HMqIESOU06dPK3v37lWmTJmiKIqi/PHHH8p///tfRVGUAo/NDZn9JZGUHh7W7K+STonN/vrrr7+4dOkSkydP5ssvv8ySOwgKCuLo0aPExMTQvn17AJo3b87ff/+N0WgkPT29QGMbNmxYlG9LIpGUEaxWK9OmTePmzZukp6fz/PPPZwXEz549S40aNXBxcaFXr17079+/mFdbMihSo7JixQpGjx6N0WjMki8AUVB048aNu85rNJpCGSuRSMom2/+4ycK954lINOPn6cJr3erRp0WVfM+3Y8cOPD09WbhwIYmJifTp04cDBw4AEBwczOzZs6lVq1Yhrf7hoMiMSnJyMlevXqVNmzYYjUZMJlPWcyaTCQ8PD9LS0u4473A4MBgMBR4rkUjKHtv/uMkbW//CbLUDcDPRzBtb/wLIt2Hp3r073bp1A0T/do0mZ0Vkh8NBt27dCAkJwdPTkw0bNmAymbh8+TKKohAZGUlqaioLFiygVq1arF27lm+//RaVSkWPHj0YOnQo33//PStXrkSr1VKhQgUWL16c52r6kkSRrfjEiRO0bdsWAIPBgE6n4/r16yiKwuHDh2nZsiWBgYEcPHgQgJMnT1K3bt1CGSuRSMoeC/eezzIomZitdhbuPZ/vOd3c3DAYDBiNRsaNG8eECRNyHKdWq+nZs2eWovCOHTvo27cvANWqVeOrr75i7NixLFy4kEuXLrF79242bNjA+vXr2bdvH1euXOHbb7/lxRdfZOPGjXTs2BGj0ZjvdRcnRbZTuXr1KlWrVs16PGfOHF599VXsdjtBQUE0a9aMJk2acOTIEQYOHIiiKMybN69QxkokkrJHRKL5gc7nlcjISEaPHs3gwYPp2bPnPcf169ePSZMm8eijj+Lr64uvry8Abdq0AYRK8Lx587hw4QIREREMGzYMECrB165d44033mDFihWsW7eOgIAAunTpUqB1FxdFZlRGjBhxx+PmzZuzefPmO86p1Wreeuutu15b0LESiaTs4efpws0cDIifp0u+54yNjWX48OHMnDkzy/NyL6pUqYK7uzvLly/Pkp4HOH36NC1btuT333+nTp06BAQEULt2bVatWoVKpeLLL7+kXr16fP3114wdOxYfHx9mzpzJDz/8kLXbKU1ImRaJRPJQ8Fq3enfEVABcdBpe65b/robLly8nOTmZTz/9lE8//RSAlStX4uzsnOP45557jrlz57Jw4cKscwcPHmT//v04HA7effddqlWrRtu2bRk0aBDp6ek0bdqUihUr0rRpU1566SXc3NxwdXXliSeeyPe6ixNpVCQSyUNBZjC+MLO/ZsyYwYwZM3J8Lqf+8Ha7nX79+t0R0P/Pf/5Dhw4d7hg3YsSIu7w5nTp1olOnTvlea0lBGhWJRPLQ0KdFlQIZkYLwwQcfcOzYMZYvX14s1y8pSKMikUgkhUBOisPz588vhpUUL6UvCVoikUgkJRZpVCQSiURSaEijIpFIJJJCQxoViUQikRQa0qhIJBJJHli5ciVBQUFYLBYAzp8/z4kTJwAhS3Xu3Ll7vrZdu3a5zn358mWCg4MLb7HFiDQqEonk4eHUZljcGGZ7it+nCk9tY8eOHfTo0SNL3+v777/n0qVLAGzZsoXo6OhCu1ZpRqYUSySSh4NTm2HnOLB
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Garage Type\", s=15, data=df);"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [],
"source": [
"del df[\"Garage Type\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Heating\n",
"\n",
"Most of the houses have gas. The variable is not helpful."
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEICAYAAACXo2mmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzsnXd8zPcfx583klz2NBMj9qaxK6IoSq0atUpRtJTSamv8SlV3Fa1qjVZbo7Vas1Wq9qjYRYyakSASWXKXnbvfH+8MKSKSiwSf5+Phkbvvfe57nwu+7+97vd4ai8ViQaFQKBQKK6At7A0oFAqF4tFBGRWFQqFQWA1lVBQKhUJhNZRRUSgUCoXVUEZFoVAoFFZDGRWFQqFQWA1lVBQKhUJhNZRRUSgUCoXVUEZFoVAoFFZDX9gbeNA0btwYb2/vwt6GQqFQPFRcuXKFwMDAe6577IyKt7c3q1atKuxtKBQKxUNFt27dcrVOhb8UCoVCYTWUUVEoFAqF1VBGRaFQKBRW47HLqSgUCkUGKSkphIaGkpiYWNhbKTIYDAZ8fHywsbHJ0/uVUVEoFI8toaGhODs7U758eTQaTWFvp9CxWCxERkYSGhqKr69vns6hwl8KheKxJTExEU9PT2VQ0tFoNHh6eubLc1OeikKhePhIiAZjBNg6gL2H/MwjyqBkJ7+/D+WpKBSKh4+gNfB1Q/iyLty8Uti7UdyCMioKheLhIi0VQtI7u82pEHUx5/UWS8HvKY8EBgby+uuvZzv2+eef33eD9vLly0lJSeHUqVPMnj3bmlu8b1T4S6FQPFzo9NDyf2AMB5fS4O1353WmSDj0PSTHQ9MR4Fjswe7zATJv3jy6du1K9erVqV69eqHuRRkVhULx8OFWBnr+AFo92Dreec35rbD1A3ls6wgBbz64/VmB6dOnc/DgQcxmMwMHDqR9+/bs37+f2bNnY7FYMJlMmWsiIiJ4/fXXefHFF1m2bBkzZ86kbdu2+Pn5cfHiRTw9Pfnqq69ISUnh7bffJjw8nFKlSnHgwAF2795t1X0ro6JQKB5ODK45v+7gnvW4CHsp+/bto3///pnPQ0JCGDZsGKGhoSxdupSkpCSef/55mjVrxtmzZ5k2bRolSpRg7ty5bNy4keHDhzNnzhxmzpzJ0aNHs51n4cKFlCpVit69e3P8+HH++ecffHx8mDVrFufPn6djx45W/z7KqCgUikeT0vVh4AZITYDSTxT2bu5KkyZNmDlzZubzzz//HJPJRFBQUKaxSU1N5cqVK5QoUYIPP/wQBwcHrl+/jp/fXUJ/gLu7O6VKlQKgVKlSJCUlcf78eQICAgCoWLEiHh4eVv8+yqgoFIpHEwd3KN+ssHeRJ+zs7GjcuDHvv/8+ZrOZb775hjJlyjB48GA2b96Mk5MT48aNw5JehKDRaDCbzdnOcafS4CpVqnDkyBGefvppLl++THR0tNX3rqq/FAqFoojh6OiIg4MDffv2zZScd3JyonPnzvTr14/evXtjMpkIDw8HoEGDBgwbNizTyNyNHj16cOXKFfr168dXX32FnZ2d1feusdxrF48Y3bp1U/NUFAoFAKdOnSr0aqkHyeHDh4mPj8ff359Lly4xZMgQ/vrrr9vW3en3kttrpwp/KRQKxWNCmTJleOONN5g9ezapqalMnjzZ6p+hjIpCoVA8JhQrVozFixcX6GeonIpCoVAorIYyKgqFQqGwGgUS/lq1ahWrV68GICkpiVOnTrF48WI+/PBDdDod/v7+jBw5ErPZzJQpUzhz5gy2trZ88MEHlCtXjqNHj+ZrrUKhUCgKCUsBM2XKFMuyZcssnTt3tgQHB1vMZrNlyJAhlqCgIMumTZss48aNs1gsFsuRI0csr7zyisViseR7bU4899xzBfhtFQrFw8TJkycLewtFkjv9XnJ77SzQRP3x48c5d+4cY8eO5ccff6Rs2bIA+Pv7s3fvXiIiImjevDkA9erV48SJExiNRpKTk/O1tkaNGgX5tRQKhcIqhIaG0rlzZ2rWrJl5rHHjxpw/fz5bl/3DRIEalXnz5vHqq69iNBpxcnLKPO7o6EhISMhtx3U6nVXWKhQKRUGw5sgVpm06w9WYBEq72fNWu6p0fcI7X+esVKlStoqswMBAzp8/n9+tFhoFZlRu3rzJxYsXadKkCUajEZPJlPmayWTCxcWFxMTEbMfNZjNOTk75XqtQKBTWZs2RK0xYdZyElDQArsQkMGHVcYB8G5a7sW7dOhYuXIitrS3ly5dn6tSpAEyYMIHQ0FDS0tIYNGgQHTp0oH///nh4eBAbG8uCBQvQ6XQFsqd7UWDVXwcOHKBp06aAyAvY2Nhw+fJlLBYLu3fvpkGDBvj5+bFz504Ajh49SpUqVayyVqFQKKzNtE1nMg1KBgkpaUzbdCZf5z137hz9+/fP/HP9+nUAoqOj+eqrr1i4cCFLly7F2dmZ5cuXs3z5cjw8PFi2bBk//PADX3zxBVFRUQB07NiRH3/8sdAMChSgp3Lx4kV8fHwyn7/33nu8+eabpKWl4e/vT926dalduzZ79uyhd+/eWCwWPvroI6usVSgUCmtzNSbhvo7nljuFv0Ck6ytVqpQZ4m/YsCG7d+9Gq9Xy5JNPAnLDXrFixcywv6+vb772Yg0KzKgMGTIk2/N69eqxYsWKbMe0Wm2mO2fNtQqFQmFtSrvZc+UOBqS0m32BfJ6Pjw/nz58nPj4eBwcH9u/fj6+vL3q9noMHD9KmTRuMRiP//vtv5g38nZSJHzRKpkWhUChywVvtqmbLqQDY2+h4q13VAvk8Dw8PRo0axYABA9BqtZQtW5Y333wTjUbDpEmT6NOnD0lJSYwcORJPT88C2UNeUCrFCoXiseV+VYoLovqrKKJUihUKheIB0PUJ70fSiFgTpf2lUCgUCquhjIpCoVAorIYyKgqFQqGwGsqoKBQKhcJqKKOiUCgUCquhqr8UCoWiEAkJCWHatGmEhYVhMBgwGAy89dZbVK5c+b7Ok5SURKtWrRg0aNBtzecPEuWpKBQKRW45tgJm1oIpbvLzWP7UPBISEhg+fDiDBg1ixYoVLFq0iJEjR95RPeRebNq0iQ4dOrB69WrMZnO+9pUflFFRKBSK3HBsBax/DWJDAIv8XP9avgzLtm3baNKkCU888UTmsTp16rBo0SL+/fdfBg8ezIsvvkjnzp05fPgwIArFffv2pVu3bqxZsybzfStXrqR79+5Uq1aNHTt25HlP+UWFvxQKhSI3bJkKKf/R/kpJkON1ns/TKUNDQzOHDAIMHz4co9FIeHg4r7zyCuPGjaNq1aqsX7+eVatWUaVKFQ4cOJCpd7hnzx4ALl26REJCAtWqVaN79+58//33tGzZMm/fM58oo6JQKO5OWjLobAt7F0WD2ND7O54LSpYsyYkTJzKfz5kzB4Dnn3+eMmXK8M0332AwGDCZTDg5OeHk5MTEiROZNGkSRqORzp07A+KlJCQk8NJLLwFw+PBhgoODKVeuXJ73lleUUVEoFLeTFAcXtkPQamjxNnhWAe1jHi139UkPfd3heB5p3bo13377LUePHqVevXoABAcHExYWxttvv823335LxYoVmTVrFleuXCE8PJygoCC+/vprkpKSaNGiBZ07d2bDhg2sXr0aNzc3QIzTzz//zIQJE/K8t7yijIpCobidhBhY0R8sFrj2Dwz6A5yKF/auCpfWkyWHcmsIzMZejucRR0dH5syZw/Tp0/n8889JTU1Fp9MxYcIEwsLCGD16NC4uLpQsWZLo6GiKFStGREQEvXv3RqvVMnjwYLZt20bNmjUzDQqI+GOXLl0YM2YM9vYFI81/N5RRUShuxWIBU7g8dvACbeFN0CtUdHqwdYakm+Bc6vH9PdxKRt5ky1QJebn6iEHJYz4lAx8fH2bOnHnH1wYNGnTbsTtVhrVt2zbb8xIlSrBv37587SuvKKOieDxJSYSEKEADjsXkIgoQfQm+bweWNBi0Ebxy0SuQlgxaGygCA5KshkMxGL4Hrp8Ebz9wKDrzOgqVOs/n24g86jzmQVLFY8v1E/B
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Heating\", s=15, data=df);"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [],
"source": [
"del df[\"Heating\"]\n",
"# Also discard the associated ordinal variable.\n",
"del df[\"Heating QC\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### House Style\n",
"\n",
"In summary, this variable is very similar to the above derived variable *has 2nd Flr*. Therefore, it is dropped."
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEICAYAAACXo2mmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzs3Xd4FNUawOHfbEuyJb3SQkLoIsVIUXoRlc4FAhEQUbEAChaaVAt20YuigCAKIjWgQa80UekgRRAQaQkEAullk+07948NC5EAgSSEwHmfx+cms2dmzu4N8+1p35FkWZYRBEEQhFKgKO8KCIIgCHcOEVQEQRCEUiOCiiAIglBqRFARBEEQSo0IKoIgCEKpEUFFEARBKDUiqAiCIAilRgQVQRAEodSIoCIIgiCUGlV5V+BWa9asGZUrVy7vagiCIFQoZ8+eZefOndctd9cFlcqVKxMXF1fe1RAEQahQevfuXaxyovtLEARBKDUiqAiCIAilRgQVQRAEodTcdWMqgiBUXDabjaSkJMxmc3lX5Y7l6elJlSpVUKvVN3W+CCqCIFQYSUlJGAwGqlevjiRJ5V2dO44sy6Snp5OUlERERMRNXUN0fwmCUGGYzWYCAgJEQCkjkiQREBBQopagaKkIglChSJKEbLcjOxwgSUgqFZJCfD8uLSUN2CKoCIJQ4ThycrCdOweShEdUFJKHR3lXSSggwrsgCBWKLMs48/Mv/oJstV63fGnZuXMno0ePLnTsgw8+uGULqs1mM+PGjWPo0KEMGDCAF154gczMTADWr1/PhQsXijwvKSmJfv363ZI6iqAiCEKFIkkSquBgFHo9Sj8/FF5eRZaT7XZsKSnYL1xAtttvcS3LxsqVKwkMDGT+/Pl89913NGnShM8++wyAb775BqPRWM41FEFFEIQKSKHRoKlaFXVoKJKq6F58h9GIPSUFe1oa9ozMW1Kvd955h759+9K3b1++/vprAMaNG8fvv/8OwO+//864ceMAGD9+PLGxsfTu3ZvVq1cDsGvXLgYMGMDAgQMZP348Nput0PUDAwPZunUrv/zyC0ajkUGDBjFu3Dh+/fVXjhw5wtixY1m6dCnvvvuu6zNwOOjWrRsWi8V9jevdo6TEmIogCBWSpFQW+3VJde2yN2LHjh0MGjTI/fuZM2d44YUX2LRpE0lJSSxbtgy73U5sbCzNmzcv8hpGo5Hdu3ezbNkyALZu3Yosy0yaNInFixcTEBDAxx9/zKpVqwp1W3Xu3BlJklixYgXjx4+nVq1aTJw4kbZt21K3bl2mTp1KSEgIvXv35pVXXmHz5s00a9YMj4Ixp+Lco6REUBEE4Y6k8PJCExEBTudVu8huRvPmzZkxY4b79w8++ACAEydOEB0djSRJqNVqGjZsyIkTJwqde3F8R6/XM2HCBCZNmoTRaKR79+5kZGSQkpLCqFGjANf4yQMPPFDo/H379tGiRQseeughHA4H33//PePHjy80pqPX67n//vvZsmULcXFxPP/88+7XinOPkhJBRRCEO5KkUqG8StdYWahRowZxcXEMGTIEm83Gvn376NWrFxqNhtTUVAAOHz4MQEpKCocOHeKzzz7DYrHQpk0bunXrRmhoKLNmzcJgMLBx40a0Wm2he/z444/4+voyYsQIlEoltWvXRqPRuN6vJLmDVr9+/Zg7dy6ZmZnUqVOHpKQkAPz8/K57j5ISQUUQBKEUtGvXjl27dhETE4PNZuPhhx+mfv369O3blwkTJhAfH0/16tUBCAoKIjU1lf79+6NQKBg6dCgajYbXXnuNYcOGIcsyOp2O9957r9A9Ro0axRtvvEGPHj3w8vJCq9Xy1ltvAdC4cWPGjBnD/PnzadiwIYmJiTz22GOFzlcoFNe9R0lJcmnOt6sAevfuLfZTEYQK6siRI9StW7e8q3HbczqdDBgwgHnz5qHX62/4/KI+5+I+O8XsL0EQhDvImTNn6NWrF48++uhNBZSSEt1fgiAId5CqVavy/fffl9v9RUtFEARBKDUiqAiCIAilpky6v+Li4li1ahUAFouFI0eOsHDhQt566y2USiUtW7ZkxIgROJ1Opk6dytGjR9FoNLz55puEh4ezf//+EpUVBEEQyolcxqZOnSovWbJE7t69u5yYmCg7nU75qaeekg8dOiSvXbtWHjt2rCzLsrxv3z752WeflWVZLnHZa+nVq1cZvltBEMrS4cOHy7sKd4WiPufiPjvLtPvr4MGDHD9+nC5dumC1WqlWrRqSJNGyZUu2bdvGnj17aNWqFQCNGjXir7/+wmg0lrisIAhCWfrzzz/dqVoyMjIYOXIkQ4cOpX///rz22mvuTa6WLl1a6rm1bndlGlRmz57N8OHDMRqNhaa26XQ6cnNzrziuVCpLpawgCALA6n1nefCdX4gY9yMPvvMLq/edLfE1586dy8SJE91JGr/88kseeOAB5s+fz5IlS9BqtSxZsgRwPQOdTmeJ71mRlNmU4pycHE6dOkXz5s0xGo3k5eW5X8vLy8Pb2xuz2VzouNPpRK/Xl7isIAjC6n1nGR93EJPNAcDZLBPj4w4C0LNx5Zu+brVq1Zg5cyZjxowBXJmD165dS3h4OE2aNGHs2LFIksTy5ctJTU1l9OjRzJo1i3feeYc9e/YA0LVrVx5//HHGjRtHVlYWWVlZ1K5dm1q1avHYY4+RnZ3NE088USEXapdZS2X37t20aNECcCU4U6vVnD59GlmW2bJlC9HR0TRp0sSdEnr//v3UqlWrVMoKgiC8v/aoO6BcZLI5eH/t0RJdt3Pnzqguyyk2ZMgQunbtyrx582jVqhUjRowgJSWFvn37EhQUxIwZMwplMF68eDFr1qzh6FFXPZo3b86SJUt46qmn3Cnw16xZQ7du3UpUz/JSZi2VU6dOUaVKFffv06ZN45VXXsHhcNCyZUsaNmxIgwYN2Lp1K/3790eWZaZPn14qZQVBEM5lmW7o+M3asWMHPXv2pE+fPlitVubOncv06dOZOXOmu8y1MhhHREQArkWLOp2O48ePEx8fz6xZs0q1nrdKmQWVp556qtDvjRo1cu8dcJFCoeD111+/4tySlhUEQajk68XZIgJIJd/SS4MPrh0XU1JS6NmzJxqNhpo1a3Ly5EnAlTnY6XReNYPxxTIX9evXj1mzZhESEoK/v3+p1vNWEYsfBUG4I73auTZe6sKbc3mplbzauXap3mfatGmsW7eOnj170r9/f1atWuUeb4mOjmbYsGG0bduWKlWqEBMTQ0xMDJ07d6Z+/fpXXKtjx45s27aNPn36lGodbyWRpVgQhArjRrMUr953lvfXHuVclolKvl682rl2iQbpy5rJZGLgwIEsX74chaL8vvOXJEuxSCgpCMIdq2fjyrd1ELnc3r17mTJlCsOHDy/XgFJSIqgIgiDcBpo0aUJ8fHx5V6PEKm44FARBEG47IqgIgiAIpUYEFUEQBKHUiKAiCIIglBoxUC8IgnCD/vzzTz744AMWLlxY6PiCBQtYvny5e+HitGnTSE1NZdSoUURFRbnLde3albCwMJKTk4mJibmldS9rIqgIgnDnOrAMNr4O2UngUwU6TIZ7+5XoknPnzuWHH37Ay+vKlfl//fUX7777Lvfcc4/7WGpqKs2bN2fGjBklum9FIbq/BEG4Mx1YBvEvQPYZQHb9b/wLruMlcDFLcVEOHTrEnDlzGDBgALNnz77mdeLi4vjggw9ISkoiJiaGF198kd69ezNlypQS1a+8iZaKIAh3po2vg+1fub9sJtfxErRWOnfuTFJSUpGvdenShdjYWPR6PSNGjGDTpk1otVp27Njh3tQLXN1kl0tISGDevHl4eXnRsWNHUlNTCQoKuuk6licRVARBuCqnzYZCrS7vatyc7KIf/Fc9XkKyLPP4449jMBgAaNOmDYcPHyY6Ovq63V/VqlVzbzgYFBTk3gCsIhLdX4IgXMFhNJKzfj3J48ZhOXECuSLuXuhT5caOl5DRaKRr167k5eUhyzI7d+4sNLZyLZdnKq7oRFARBOEKzpwczr7wIjk//kTSiBE4MjLKu0o
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"House Style\", s=15, data=df);"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [],
"source": [
"del df[\"House Style\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Land Contour\n",
"\n",
"This variable is assumed to contain the same information as the ordinal variable *Land Slope* and is dropped."
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEICAYAAACXo2mmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzs3XdgTef/wPH33TN7yCISxKoVQdSmaGlVlaKtDh2qKP2WUv3W+vZH22+rg3476aClqA5V1aFKqFFbkBoJMmSve3P3Pb8/LtGURiI3BM/rr+Tc55zz3Cvu5zzr88gkSZIQBEEQBC+QX+0KCIIgCNcPEVQEQRAErxFBRRAEQfAaEVQEQRAErxFBRRAEQfAaEVQEQRAErxFBRRAEQfAaEVQEQRAErxFBRRAEQfAa5dWuwJXWqVMnIiMjr3Y1BEEQrikZGRls3779kuVuuKASGRnJ6tWrr3Y1BEEQrilDhgypUjnR/SUIgiB4jQgqgiAIgteIoCIIgiB4zQ03pnIxDoeD9PR0rFbr1a7KDUmr1RIVFYVKpbraVREEoYZEUAHS09Px8fGhYcOGyGSyq12dG4okSeTn55Oenk5MTMzVro4gCDUkur8Aq9VKUFCQCChXgUwmIygoSLQSBeE6IVoqZ4mAcvWIz16orqIyO/kmOzq1ggC9Cp1afJXVFaKlIgjCNef7A1n0mf8b3V/5laxi0cqtS0RQqQO2b9/O008/7bXrbdq0iWnTpl1wPCsri4kTJzJq1CiGDRvGrFmzsNvt1b7+zp07OXLkiDeqKgjV5nS52XWy0POzWyItv6zS8pIkXYlqCWeJoHKDcLlcPPnkk4wePZolS5awcuVKlEolb731VrWv9eWXX5KTk1MLtRSES1Mq5Pyrb1O6NwlmeEJ92kT5XbRcgdnOwg1H+e/6FPJNtitcyxuX6Iisw3744Qc+++wznE4nMpmMhQsXcvToUT744ANUKhXp6ekMGDCAsWPHcvz4caZPn45Op0On0+HnV/E/2q5duwgLC6NNmzblx6ZMmYLb7QZg8eLFrF27FqVSSUJCAlOmTGHBggWkp6eTn59PZmYmzz33HAEBAWzevJnk5GQaN27MH3/8wSeffIJaraZhw4bMmTOHNWvWcOLECSZPnozNZuO2225jw4YNjBo1isDAQIqLi1m0aBEKheKKfp7C9SMyQMfCe+NRKmTo/2E8ZfOfubz6458AGDRKxvVqfCWreMMSQaUOS0tL4/3330en0zFjxgySkpKoV68emZmZfPvtt9jtdrp168bYsWN55ZVXeOqpp+jSpQvvv/8+J06cqHCtnJwc6tevX+GYRqMBICUlhXXr1rF8+XKUSiUTJkzg119/BUCtVvPhhx+yZcsWFi9ezKJFi+jWrRsDBgxAp9OxYMECvvrqK4xGI3PnzuWLL75Ar9f/43u6/fbb6du3r5c/KeFG5KurfF2Tv+H860FGdW1XRzhLBJU6LCgoiKlTp2IwGDhx4gRt27YFIC4uDqVSiVKpRKvVAp4A1Lp1awDi4+MvCCoRERH8+OOPFY4VFhayZ88ebDYbbdq0KV98mJCQwNGjRwFo3rw5AGFhYReMv5w+fZrGjRtjNBoB6NChA0lJSRVaQ3/vzxZrUYQrpU2UP188nojV4aJVlP/Vrs4NQ4yp1FGlpaW89dZbvP7667z44otoNJryL+iLTcFt1KgRe/bsAeDgwYMXvN62bVvS09PZv38/4PmyX7hwIX/88QexsbHs378fp9OJJEns3Lmz/Mv/YveSyWRIkkRUVBTHjx+nrMwzULpjxw5iYmLQaDTk5uYCkJycfMG5gnAl+OvVdIoNokfTUAINoqVypYiWSh2xZcuWCqmlX331VeLj4xk+fDhKpRJfX19ycnKIioq66PnTpk1j6tSpLFq0iMDAwPKurXPkcjlvvvkmc+bMwWKxUFZWRtu2bZk0aRJqtZrbbruNkSNH4na7ad++Pbfccss/zvBq06YNr776Km+88QYTJkzggQceQC6X06BBg/JxlGXLljFy5EhatmyJwWDw3gclCEKdJpNusPl2Q4YMuWA/lcOHD5d38whXh/g3EIS67WLfnRcjur8EQRAErxFBRRAEQfAaEVQEQRAErxFBRRAEQfCaWpn9tXr1ar766isAbDYbhw8fZsmSJfzf//0fCoWCrl27Mn78eNxuN7NmzSIlJQW1Ws2LL75IdHQ0e/furVFZQRAE4SqRatmsWbOk5cuXS4MGDZJOnjwpud1u6dFHH5WSk5Ol9evXS1OnTpUkSZL27NkjPfHEE5IkSTUuW5m77rrrgmOHDh3y5lsWLoP4NxCEuu1i350XU6vdXwcOHODYsWMMHDgQu91OgwYNkMlkdO3ala1bt7Jr1y66desGeBbnHTx4EJPJVOOy16KqZiru3bs3NptIjicIQt1Uq4sf33vvPcaNG4fJZCpP5QFgMBg4ffr0BccVCoVXyta2r/dk8N/1KWQWWYjw1zGlf1MGt4us9fsKgiDUdbXWUikpKSE1NZXExESMRiNms7n8NbPZjK+v7wXH3W63V8rWpq/3ZPDc6gNkFFmQgIwiC8+tPsDXezK8ep8jR44watSo8t/HjBnDoUOHvHoPQRAEb6u1oLJz5046d+4MgNFoRKVScerUKSRJIikpiYSEBOLj49m0aRMAe/fuJS4uzitla9N/16dgcbgqHLM4XPx3fYpX79OsWTPsdjsZGRnk5ORQWFhIixYtvHoPQRAEb6u17q/U1NQKeapmz57N5MmTcblcdO3alTZt2tCqVSu2bNnCiBEjkCSJuXPneqVsbcosslTreE0MHTqUr7/+GrVaXSEvmCAIQl1Va0Hl0UcfrfB727ZtWbFiRYVjcrmcOXPmXHBuTcvWpgh/HRkXCSAR/jqv32vAgAE89NBDyOVyFi1a5PXrC4IgeJvIUlxNU/o35bnVByp0gelUCqb0b1rja/89U/Frr71Gs2bNcDqdFSYkCIIg1FUiqFTTuVle3p791alTJ3bs2HHB8f/85z8Vft+wYUON7iMIglCbRFC5DIPbRYopxIIgCBchcn8JgiAIXiOCiiAIguA1IqgIgiAIXiOCiiAIguA1IqgIgiAIXiOCSh2xfft2OnfuzKhRo7j//vu55557/jHXV3p6Ovfcc88VrqEgCMKliSnFl2P/CvhlDhSng18U9JkBrWv+JZ+YmMjrr78OQFJSEm+++Sbvvfdeja8rCIJwpYigUl37V8Cap8BxNlVL8WnP7+CVwHJOSUkJgYGBjBo1imbNmnH06FFMJhNvvvlmeRmXy8W0adNo0qQJjz/+uNfuLQiCcLlE91d1/TLnfEA5x2HxHK+hbdu2MWrUKIYPH85zzz3HwIEDAWjdujUff/wxXbp0Ye3atQA4nU4mT55M27ZtRUARBKHOEC2V6ipOr97xavhr99eJEycYMWIE0dHR5Snvw8LCyMvLAyAlJQWj0UhZWVmN7ysI/8ThcqNSiGdPoerEX0t1+UVV7/hlCg4OrvT1li1b8v777/Ptt99y5MgRr95bEExWBz8cPMMzK/ZxNLsUt1u62lUSrhEiqFRXnxmg+luae5XOc7yGznV/Pfjgg4wePZpp06ah1Wr/sbxWq2XmzJlMnToVu91e4/sLwjnFFidjP9vFt/syGbNkF/lm29WuknCNEN1f1XVuMN7Ls786derE77//fsHxv6bCHzlyZPnP5/aQSUhI4JtvvqnRvYXzJEki35IPMgjQBKCQK652la4KpUKGUa2k1OYk1FeDUi6eP4WqEUHlcrS+x6szvYQrz+pwUVTmQCaDIIMa5dlxg3RTOg+sewC35ObjWz8mxi/mktdyuNwo5TJkMlltV/uKCTKoWTexGynZpbSJ8ifAoL7aVRKuEeLxQ7ghHckqofsrv9Lntd84VXB+ssPG0xvJs+RRYC1gXeq6S17nVL6ZZ1bsY8Ufpym2OGqzyleUUiEnKlBPn+b1CPbRXO3qCNeQWmupvPfee2zYsAGHw8HIkSPp2LEj06ZNQyaT0aRJE2bOnIlcLmfhwoVs3LgRpVLJ9OnTad2
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Land Contour\", s=15, data=df);"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [],
"source": [
"del df[\"Land Contour\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Lot Configuration\n",
"\n",
"This variable shows no good pattern and is dropped."
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEICAYAAACXo2mmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzs3Xd0VNXawOHf9JpeCEkgBRJ6lSrNgAVBqlIF8VIUFASU4kWl+HntKIhdVBQQBAQFUVBEpBdp0juEkN4zk+lzvj8GghEMCckQyn7WYhF29pyzZ0jmnd3eLZMkSUIQBEEQKoC8shsgCIIg3D5EUBEEQRAqjAgqgiAIQoURQUUQBEGoMCKoCIIgCBVGBBVBEAShwoigIgiCIFQYEVQEQRCECiOCiiAIglBhlJXdgButZcuWREREVHYzBEEQbikXLlxgx44d16x3xwWViIgIli9fXtnNEARBuKX07t27VPXE8JcgCIJQYURQEQRBECqMCCqCIAhChbnj5lSuxuFwkJSUhNVqreym3LK0Wi2RkZGoVKrKboogCJVIBBUgKSkJHx8foqOjkclkld2cW44kSWRlZZGUlERMTExlN0cQhEokhr8Aq9VKUFCQCCjXSSaTERQUJHp6giCInsolIqCUj3j9hBvKkgOmDFDrQRfo+Vu4KYieiiAIt55D38MHzWF2I8i/UNmtEf5GBJWbwI4dOxg/fnyp6y9YsOCq5evWrWPw4MEMHjyYPn36sGbNmutqz7PPPsvDDz/MokWL+Pbbb6/rGoLgNS4nnL+4s9vthOwzJdeXJO+3SSgihr9uQR999BGDBg0qVrZnzx7mzZvHJ598gsFgICcnh379+lGzZk1q1qxZputv3bqV7du3V2STBaHiKJSQ8AKY0sE3HCKaXr2eOQt2fwH2Qmj9FBhCbmw771AiqNzEtmzZwqxZs9BoNPj7+/Pqq6+ycOFC8vLymD59OtOnTy+qu3TpUoYMGYLBYAAgICCApUuX4uvrS35+PhMnTsRkMuFyuRg7diytW7emW7dutGjRgmPHjiGTyfjwww+ZOXMmJpOJUaNGcd9993H69GkmTJjABx98wLp16wgMDMRisTB27FhatmxZSa+McMfzrwZ9vgS5EtSGq9c5tR7Wv+L5Wm2A9hNuXPvuYGL46yYlSRIvvfQS77//PgsWLKB58+Z89NFHjBo1Cj8/v2IBBSA9PZ1q1aoVK/Pz80Mmk/HRRx9x9913s3DhQmbPns0LL7yAJEmYzWa6du3KggULCA0NZePGjUyfPh0/Pz8++uijouscPXqUTZs2sWzZMj744AMyMjJuxEsgCCXT+v17QAHQB1z+WvRSbhgRVG5SOTk5GI1GqlSpAkDz5s05ceLEv9YPDw8nJSWlWNnu3bs5d+4cp06donnz5gBUqVIFo9FIVlYWAHXr1gWgatWq2Gy2q1771KlTNGjQAIVCgVarpX79+uV+foLgdeF3weM/waDvoM5Dld2aO4YIKjepgIAATCYT6enpAOzcuZPo6GjA04v5p969e/P5559TWFgIQFZWFlOmTMFisVCjRg3+/PNPANLS0sjPz8ff3x8o3VLgmjVrcuDAAdxuN3a7ncOHD1fEUxQE79IHQHQbqHkv6IMquzV3DDGncpPYsmVLsdTSM2fO5JVXXmHMmDHIZDL8/Px47bXXAKhRowYTJkzg7bffLqrfpEkT+vbty9ChQ1EqlVitVp599llq165NWFgYU6ZMYe3atVitVl5++WWUytL/19eqVYsOHTrQt29fAgICUKlUZXq8IAh3Dpl0tY+9t7HevXtfcZ7KkSNHqFOnTiW16OaXlZXFmjVrePTRR7Hb7XTt2pWvvvqK8PDwYvXE6ygIt6+rvXdejfi4KVxTQEAABw8e5OGHH0Ymk9GnT58rAoogCAKIoCKUglwuLxp6EwRBKImYqBcEQRAqjAgqgiAIQoXxyvDX8uXLWbFiBQA2m40jR44wf/58/ve//6FQKGjbti2jR4/G7XYzffp0jh07hlqt5pVXXiEqKop9+/aVq64gCIJQSSQvmz59urR48WKpe/fu0rlz5yS32y0NHz5cOnTokLR27Vpp8uTJkiRJ0t69e6WRI0dKkiSVu25JevXqdUXZ4cOHK/Ip37HE6ygIt6+rvXdejVeHvw4cOMDJkyfp2rUrdrud6tWrI5PJaNu2LVu3bmX37t20a9cOgMaNG3Pw4EFMJlO5696KkpKS6Nu3b5kec7Ve2aJFi5gzZ05FNUsQBKFMvLr665NPPuHpp5/GZDJhNBqLyg0GA+fPn7+iXKFQVEhdb/t+7wXeWnuM5FwL4f46Jj5Qi55NIrx+3396//33b/g9BUEQSuK1oJKfn8+ZM2do1aoVJpMJs9lc9D2z2Yyvry9Wq7VYudvtxmg0lruuN32/9wL/XX4Ai8MFwIVcC/9dfgCgQgLL4MGDqV27NidOnMBkMjF79myCg4MZO3YsJpMJi8XC+PHjadu2LW3atGHLli38+eefvPrqq/j6+qJQKGjcuDEA8+fP58cff0Qmk9GlSxcee+yxcrdPEAShJF4b/tq1axetW7cGwGg0olKpSExMRJIkNm/eTLNmzWjatCkbN24EYN++fcTHx1dIXW96a+2xooByicXh4q21xyrsHg0bNmTevHm0adOG1atXk5iYSG5uLh9//DHvvPMOLlfx+8+YMYOZM2cyb948IiMjATh58iQ//fQT33zzDQsXLmTdunWcPn26wtooCIJwNV7rqZw5c6boDQ48b3wTJkzA5XLRtm1bGjVqRIMGDdiyZQv9+/dHkiReffXVCqnrTcm5ljKVX49LmYPDwsLIzMwkLi6Ofv368eyzz+J0Ohk8eHCx+pmZmcTExADQtGlTEhMTOX78OMnJyTz++OMA5OXlce7cOWJjYyusnYIgCP/ktaAyfPjwYv9u3LgxS5YsKVYml8t5+eWXr3hseet6U7i/jgtXCSDh/jqv3fPYsWOYzWY+/fRT0tPT6d+/PwkJCUXfr1KlCqdOnaJGjRocOHAAPz8/YmNjqVmzJnPnzkUmkzFv3jxq1arltTYKgiCASNNSZhMfqFVsTgVAp1Iw8QHvvWFHR0fzwQcf8PPPP+N2u3nmmWeKff/ll19m0qRJGI1GDAYDfn5+1K5dm9atWzNgwADsdjsNGzYsOptFEATBW0SWYsqeXfdmWf11sxFZigXh9iWyFHtRzyYRIogIgiBchcj9JQiCIFQYEVQEQRCECiOCiiAIglBhRFARBEEQKowIKoIgCEKFEau/biInTpzgrbfewmKxUFhYSIcOHRgzZgwymayymyYIglAqoqdyPf5aAu/Wh+n+nr//Kv+O/vz8fJ599lmmTJnC/PnzWbJkCcePH2fx4sUV0GBBEIQbQ/RUyuqvJbDqGXBcTNWSd97zb4CGZTsP5e9+++03WrZsSXR0NOBJ7f/GG2+gUql4/fXX2b17NwAPPfQQQ4YM4fnnnyc3N5fc3FyGDRvGt99+i0qlIikpiS5dujBq1ChSUlJ46aWXsNlsaDQa/u///g+Xy8WoUaPw9/enffv2jBgxojyvhiAIQjEiqJTVby9fDiiXOCye8nIElfT0dKpVq1aszGAw8Pvvv5OUlMSSJUtwOp0MHDiQVq1aAdCqVSsef/xxduzYQXJyMitXrsRut9OuXTtGjRrFG2+8weDBg+nQoQPbtm3j7bffZvz48WRkZPDdd9+hVquvu72CIAhXI4JKWeUlla28lMLDwzl8+HCxsvPnz3Po0CGaNWuGTCZDpVLRqFEjTp06BVCUmRggPj4epVKJUqlEq9UCcPz4cT755BPmzp2LJEkolZ7/7sjISBFQhFJxuByoFKrKboZwCxFzKmXlF1m28lJKSEhg06ZNJCYmAuBwOHj99dfx9fUtGvpyOBzs3buXqKgogGIT+FebzI+NjWXChAnMnz+fGTNm0LlzZ8CT8VkQSmKym/jt3G+8sOUFTuWewi25K7tJwi1C9FTKqtPU4nMqACqdp7wcjEYjr7/+Oi+++CKSJGE2m0lISGDw4MGkpKTQr18/HA4
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Lot Config\", s=15, data=df);"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [],
"source": [
"del df[\"Lot Config\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### MS SubClass\n",
"\n",
"By looking at this variable's realizations, one can see that several distinct features are lumped together in one. In particular, the above variables *has 2nd Flr* and *build_type_\\** and the future age related features at the bottom of this notebook together should comprise the same patterns in a more advantagous way. Thus, the column is dropped."
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['1-STORY 1946 & NEWER ALL STYLES',\n",
" '1-STORY 1945 & OLDER',\n",
" '1-STORY W/FINISHED ATTIC ALL AGES',\n",
" '1-1/2 STORY - UNFINISHED ALL AGES',\n",
" '1-1/2 STORY FINISHED ALL AGES',\n",
" '2-STORY 1946 & NEWER',\n",
" '2-STORY 1945 & OLDER',\n",
" '2-1/2 STORY ALL AGES',\n",
" 'SPLIT OR MULTI-LEVEL',\n",
" 'SPLIT FOYER',\n",
" 'DUPLEX - ALL STYLES AND AGES',\n",
" '1-STORY PUD (Planned Unit Development) - 1946 & NEWER',\n",
" '1-1/2 STORY PUD - ALL AGES',\n",
" '2-STORY PUD - 1946 & NEWER',\n",
" 'PUD - MULTILEVEL - INCL SPLIT LEV/FOYER',\n",
" '2 FAMILY CONVERSION - ALL STYLES AND AGES']"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(ALL_COLUMNS[\"MS SubClass\"][\"lookups\"].values())"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [],
"source": [
"del df[\"MS SubClass\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### MS Zoning\n",
"\n",
"This variable is dropped as most houses are located in a \"residential\" zone."
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"RL 2252\n",
"RM 459\n",
"FV 131\n",
"RH 27\n",
"C 25\n",
"I 2\n",
"A 2\n",
"RP 0\n",
"Name: MS Zoning, dtype: int64"
]
},
"execution_count": 74,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"MS Zoning\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEICAYAAACXo2mmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzs3Xl8jNf+wPHP7Gsmm5DNksS+iyWiaFG01FK1ttWq7r9WqxuqreJWUa1uuvd2QymqVaVoLbU2aCliJ5ZIIiSZJLNvz++PMOTaQiYict6vV183eebMec7MlfnO2b5HJkmShCAIgiAEgLy8GyAIgiDcPERQEQRBEAJGBBVBEAQhYERQEQRBEAJGBBVBEAQhYERQEQRBEAJGBBVBEAQhYERQEQRBEAJGBBVBEAQhYJTl3YDrLSkpiZiYmPJuhiAIQoVy4sQJUlJSrliu0gWVmJgYFi5cWN7NEARBqFD69etXonJi+EsQBEEIGBFUBEEQhIARQUUQBEEImEo3pyIIws3H7XaTnp6Ow+Eo76ZUeFqtltjYWFQq1TU9XwQVQRAqvPT0dIKCgqhVqxYymay8m1NhSZJETk4O6enpxMXFXVMdYvhLEIQKz+FwEB4eLgJKKclkMsLDw0vV4xM9FUEQKhyHpRBbvhmlRosuKAhABJQAKe37KIKKIAgVzr6/NvDHFzOQKxQ8+PZH5d0c4Txi+EsQhArF5/WQsW/3mZ+9mLMyL1tekqQyb1NKSgr16tVjyZIlxa736tWLMWPGALBjxw6GDx/OsGHDGDBgAF999dUF9Xz77bcMHTrU/19SUhLTpk27qrbs2bOHGTNmXPuLKSXRUxEEoUKRK5TcMvB+rPlmgsLCiaxdl6MnMi4o5/V6sBcUIPl86INDUCjL9uMuPj6eJUuW0LNnTwD27duH3W73Pz5x4kSmTp1KQkICbrebwYMH07ZtWxo2bOgv8+CDD/Lggw8CsHnzZl577TUefvjhq2pHgwYNaNCgQQBe0bURQUUQhArHFFGVXs+ORqaQo9bq4CJBxWWzY8nNAUAml2MMDSvTNtWvX5+0tDQKCwsJCgril19+oVevXmRmFvWkqlSpwuzZs+nXrx8NGjRgzpw5qNXqi9aVkZHBmDFj+OijjwgLC6OgoICXXnoJi8WC1+vl2WefJTk5mV69etGmTRv27duHTCbj448/Zvfu3cydO5d3332Xbt26kZiYSFpaGuHh4Xz44Ye43W5GjRpFdnY2UVFRbNmyhfXr1wfsfRDDX4IgVEgag6EooFyCXCE/72fF9WgS3bp1Y8WKFUiSxI4dO2jRooX/sbfffpvw8HDGjx9Pu3btmDp1Ki6X64I6nE4nTz/9NC+88IK/x/HJJ5/Qrl07Zs+ezfvvv88rr7yCJElYrVZ69uzJrFmzqFq1KmvXri1W1/Hjx3n22Wf54YcfyM3NZefOnfzwww/ExsYyd+5cnn76aXJycgL6HoigIgjCTUml0RIWHUtoVDRag/G63LNXr14sXbqULVu20KpVK/91p9NJamoqTz31FAsWLGD58uVkZGTwww8/XFDHuHHjSE5O9g+jARw6dIjWrVsDUK1aNYxGoz8YnB0+i4qKwul0FqsrNDSUqKioYo8fOnSIxMREABISEggLC2wPTgQVQRBuSnKFArVOh0ZvuG49lerVq2Oz2Zg5cya9e/f2X5fJZLz00kukpaUBEBISQkxMzAXDX9999x2nT5/mhRdeKHY9ISGBrVu3AnDy5EkKCgoICQnx130pF3usbt26bNu2DYBjx46Rl5d3Da/00sSciiAIQgD16NGDRYsWERcXx/HjxwFQq9W89957jB07Fo/Hg0wmo0mTJtxzzz3Fnjt16lTq1avnn6wHSExM5PHHH2fs2LEsX74ch8PBxIkTUV7jwoP+/fszZswY7rvvPqKjo9FoNNf+Yi9CJl2P9XY3kH79+onzVAThJrNnz55yXfFUkfzzzz/YbDbat2/PkSNHeOSRR/jjjz+KlbnY+1nSz07RUxEEQahEqlevzvPPP8+MGTPweDyMGzcuoPWLoCIIglCJREREMHPmzDKrX0zUC4IgCAEjgoogCIIQMGUy/LVw4UJ++uknoGh99p49e5g5cyaTJk1CoVDQvn17nn76aXw+H+PHj2ffvn2o1WreeOMNatasyfbt20tVVhAEQSgnUhkbP368NHfuXKl3797S0aNHJZ/PJz3yyCNSamqqtHz5cmn06NGSJEnStm3bpCeeeEKSJKnUZS/n7rvvLsNXKwhCedi9e3d5N+GmcrH3s6SfnWU6/LVz504OHjxIz549cblc1KhRA5lMRvv27dm4cSN///03HTp0AKB58+bs2rULi8VS6rKCIAjl4YsvvqB9+/YX7GyvTMo0qHz22Wc89dRTWCwWjMZzaRIMBgOFhYUXXFcoFAEpKwiCcDk/bzvBLVNWETdmCbdMWcXP204EpN5ffvmFHj16XJACvzIps6BSUFBAWloabdu2xWg0YrVa/Y9ZrVZMJtMF130+X0DKCoIgXMrP207w8sKdnDDbkYATZjsvL9xZ6sCSkpJCjRo1GDx4MLNnzw5MYyugMgsqW7ZsITk5GQCj0YhKpeLYsWNIksT69etp1aoViYmJ/qya27dvp27dugEpKwiCcCnTlu/D7vYWu2Z3e5m2fF+p6p0/fz4DBgwgPj4etVrNv//+W6r6Kqoy2/yYlpZGbGys//cJEybw4osv4vV6ad++Pc2aNaNJkyZs2LCBwYMHI0kSb775ZkDKCoIgXEqG2X5V10siPz+ftWvXkpuby8yZM7FYLMyaNatSfh6J3F+CIFR4V5P765YpqzhxkQASE6Jjw5jO13T/mTNnkpGRwejRowGw2+106dKFX3/9NeCp5a+H0uT+EpsfBUGoVF7qXg+dqngqfJ1KwUvd611znfPnz6dPnz7n6tPp6NatG/PmzbvmOisqkftLEIRKpW+LGKBobiXDbCc6RMdL3ev5r1+LX3755YJr48ePv+b6KjIRVARBqHT6togpVRARLk0MfwmCIAgBI4KKIAiCEDAiqAiCIAgBI4KKIAiCEDAiqAiCIAgBI1Z/CYIgBMCBAweYNm0adrsdm83GrbfeyogRI5DJZOXdtOtK9FQEQah8dsyDdxvD+JCi/91Ruk2KBQUFPP/884wdO5aZM2cyb9489u/fz9y5cwPU4IpD9FQEQahcdsyDxc+A+0yqlvzjRb8DNB14TVWuXLmSpKQkatWqBRQdzTF16lRUKlUAGlyxiJ6KIAiVy8qJ5wLKWW570fVrlJ2dTfXq1YtdMxgMqNXqa66zohJBRRCES/J63OXdhMDLT7+66yUQHR1NVlZWsWvHjx9ny5Yt11xnRSWCiiAIF3DabBzYvJFlH71LTvoxJJ+vvJsUOMGxV3e9BDp16sS6des4duwYAG63mylTprB///5rrrOiEnMqgiBcwGmz8Mv0ySBJnEw7xKDxUzCEhJZ3swKjy7jicyoAKl3R9WtkNBqZMmUKr776KpIkYbVa6dSpE/fee28AGlyxiKAiCOeRJAlbvhkAncmEXK64wjNuTnKFArVWh8tuwxAWhlxxE70PZyfjV04sGvIKji0KKNc4SX9W48aN+e677wLQwIpNBBWhUvK4XNgthcgAXXAIijMfmvnZWcwdNwqfz8fgCVMJi77ykIjX40auUN5U+xF0phAemPYhp48dJTKhDrogU3k3KbCaDix1EBEuTsypCJXSqaNp/HfEw3z9/BPkZ5+bYD20NQWrOQ97QT57N6y9Yj3mk1ks+/g9dq1egcNSWJZNvq4UCgXBEdVIaNnm5hn2Eq6LMuupfPbZZ6xatQq3282QIUNo06YNY8aMQSaTUadOHV5//XXkcjkzZsxgzZo1KJVKxo4dS9OmTTl69GipywrC5ezdtA6vx4PX4yF9907CoorO1ohr0YpNC+bg8/mom3TLZeuwFxbw20fvkLFvD3s3/ElswyZojUHXo/mCcMMqk0/flJQUtm3bxpw5c5g5cyZZWVlMnjyZkSNH8v333yNJEitXriQ1NZXNmzczf/5
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"MS Zoning\", s=15, data=df);"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [],
"source": [
"del df[\"MS Zoning\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Masonry Veneer Type\n",
"\n",
"None of the groups have a slope differing from the overall one."
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEICAYAAACXo2mmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzs3Xd4VMUawOHf2Z7dTQ8JhBKS0DsxNA0gIjaK9CZRREVRuKB4pSOIUhQVFC9iRUGliQWxo0iVIiBILyEQSCN9ezv3jwOBAEJCEgMy7/PwsDk7ZzIblv0y7RtJlmUZQRAEQSgDqopugCAIgvDvIYKKIAiCUGZEUBEEQRDKjAgqgiAIQpkRQUUQBEEoMyKoCIIgCGVGBBVBEAShzIigIgiCIJQZEVQEQRCEMqOp6Ab801q1akXVqlUruhmCIAg3lFOnTrFly5arlrvpgkrVqlVZuXJlRTdDEAThhtKzZ89ilRPDX4IgCEKZEUFFEARBKDMiqAiCIAhl5qabU7kct9tNSkoKDoejoptyUzMYDFSrVg2tVlvRTREE4RqJoAKkpKTg7+9PzZo1kSSpoptzU5JlmaysLFJSUoiOjq7o5giCcI3E8BfgcDgIDQ0VAaUCSZJEaGio6C0Kwg1O9FTOEgGl4ol/A6HY7DlgyQSdEfxClL+F64LoqQiCcOPZ+yW81QLmNoX8UxXdGuECIqhcB7Zs2ULdunVZvXp1ketdu3Zl7Nix11SnLMt07NiREydOFLk+bNgwNm3aVKK6vvzySxITE+nbty9xcXEkJiaSmJhIenr6NbVNEErF64GTZ3d2+zyQnXTF4rIs/wONEs4Rw1/XiZiYGFavXk3nzp0BOHjwIHa7/ZrrkySJXr168dVXXzFixAgAzpw5Q1JSEm3atClRXd27d6d79+6kpKTwzDPPsGjRomtulyCUmloDHSaAJQMCIqFq3GWLeXJyyF2yFJ/NRsjgh9CEhv7DDb05iaBynahXrx5JSUkUFBTg7+/P119/TdeuXUlNTQVg8eLF/Pjjj9jtdoKDg5k3bx6nTp1i3LhxaDQafD4fr776KlWqVCmss1evXjz44IOFQeXLL7+kZ8+eSJJE165dadmyJQcPHkSSJP73v/+xb98+Zs+ejVarpW/fvnTv3v2Kbd6wYQPLli3jjTfeAKB///7MnTuXgQMH0rRpU06cOEHt2rV56aWXsFqtTJgwgZycHAAmTpxI3bp1y+NHKdwMgqpDnw9BpQGd6bJFrBs2kjl3LgAqk4mwJx7/J1t40xLDX9eRu+66ix9//BFZltm9ezfNmzcHwOfzkZuby8KFC1m+fDler5c9e/awadMmmjRpwocffsiIESMoKCgoUl9ERATR0dH88ccfAKxataowf4/VaqVz584sXryY8PBw1q1bB4DT6eTTTz+9akABuO222zh06BB5eXkcPnyY4OBgIiIiSE9PZ+TIkaxYsQKbzcbPP//M22+/TevWrVm0aBHTpk1jypQpZfiTE25KhsC/DSgA6qCg849FL+UfI3oq15GuXbsyZcoUqlevTnx8fOF1lUqFVqvlmWeewWg0kpaWhsfjoXfv3rz77rs8+uij+Pv78/TTT19SZ9++ffnqq69Qq9VERUURFhZW+FyDBg0AqFKlCk6nE6BEe0QkSaJbt2588803pKSk0Lt378L6oqKiAGjevDlJSUkcOnSI33//ne+++w6AvLy8Ev50BKFkDE0aU2PRx8gOB4ZGjSq6OTcN0VO5jlSvXh2bzcaiRYvo1q1b4fUDBw7w888/M2fOHCZNmoTP50OWZdasWcMtt9zCRx99xD333MN77713SZ3t27dn586dfPHFF/Tr16/Ic5dbwqtSlewt0atXL77//nu2bdtG+/btAUhPTyczMxOAHTt2UKtWLWJiYhg8eDCLFi1izpw5RV6fIJQHTWAgphYtMLdtiyY4uKKbc9MQPZXrzH333cdXX31FdHQ0J0+eBCAqKgo/Pz/69+8PQKVKlcjIyKBZs2aMGTOG+fPn4/P5GDdu3CX1qdVqOnbsyPfff18uQ04RERGYTCaaNWuGRqO8nXQ6HdOmTSM1NZWmTZtyxx13EBcXx4QJE1i2bBkWi4Xhw4eXeVsEQah4knyTrbfr2bPnJeep7N+/n/r161dQi258jz/+OOPHjy8c8rrtttvYuHHjNdUl/i0E4fp0uc/OyxHDX8I1czgc9OzZk5iYmMKAIgjCzU0MfwnXzGAwXPY3l2vtpQiCcOMTPRVBEAShzIigIgiCIJSZchn+WrlyJV988QWgbKbbv38/ixYt4qWXXkKtVpOQkMDw4cPx+XxMmTKFgwcPotPpePHFF4mKimLXrl2lKisIgiBUELmcTZkyRV6yZIncrVs3OTk5Wfb5fPKjjz4q7927V/7hhx/kMWPGyLIsyzt37pSfeOIJWZblUpe9kh49elxybd++fWX5koVSEP8WgnB9utxn5+WU6/DXnj17OHLkCJ07d8blclGjRg0kSSIhIYFNmzbxxx9/0LZtWwCaNWvGX3/9hcViKXXZG9GWLVto06YNiYmJDBo0iL59+7Jv374iZVauXMns2bMvuffw4cMMHTqUxMREevXqxRtvvCEyswqCUCHKdfXXggULeOqpp7BYLJjN5sLrJpOJkydPXnJdrVaXSdny9uXOU7zyw0FO59qJDPLjv3fXpXvzqqWut3Xr1rz++uuAkqxx7ty5LFiw4Ir35Ofn88wzz/Dmm29Ss2ZNvF4vI0eOZMmSJQwYMKDUbRIEQSiJcgsq+fn5JCUl0bp1aywWC1artfA5q9VKQEAADoejyHWfz4fZbC512fL05c5TjFu5B7vbC8CpXDvjVu4BKJPAck5+fj4hISEkJiYSEhJCXl5eYVr87OxsnnzySUaOHElaWhqtWrWiZs2agBJsZ82ahVarZcuWLbzzzjtotVrS0tLo378/v//+OwcOHODBBx9k4MCBl81W7O/vX2avQxCEm0u5DX9t27at8NwOs9mMVqvlxIkTyLLMhg0biI+PJy4urjA77q5du6hTp06ZlC1Pr/xwsDCgnGN3e3nlh4Olrvv3338nMTGRfv36MW7cuMIg0qVLFxYuXIharSYrK4thw4Yxbtw42rRpQ0ZGBtWrVy9Sj8lkQqfTAZCWlsabb77JlClTmD9/Pi+//DLvvvsuS5cuBf4+W7EgCMK1KLeeSlJSEtWqVSv8eurUqTz77LN4vV4SEhJo2rQpjRs3ZuPGjfTv3x9Zlpk+fXqZlC1Pp3Mvf3DW310viQuHv44dO0b//v2Jiooqkjl4/fr1VKpUCZ/PB0BkZOQlcy8nT54kLS0NgNq1a6PVavH396dGjRrodDoCAwMLsxLD5bMVC4IgXItyCyqPPvpoka+bNWvGsmXLilxTqVS88MILl9xb2rLlKTLIj1OXCSCRQX5l+n0uTFF/YTbh7t27c//99zNq1CiWL19Ohw4dWLBgAQMGDKBGjRq43W5mzpzJrbfeSq1atS6bifhixSkjCIJQHCJNSwn99+66ReZUAPy0av57d+lPMTw3/KVSqbBarYwdO7Zwv8+FateuTbdu3ZgxYwbTpk1j5syZTJw4EVmWsVqtdOjQgYEDB7J169ZSt0kQBKEkRJZiSp4Zt7xWfwkiS7EgXK+Km6VY9FSuQffmVUUQEQRBuAyR+0sQBEEoMyKoCIIgCGVGBBVBEAShzIigIgiCIJQZEVQEQRCEMiNWf10ntmzZwqhRo6hVqxayLONyuZgyZUrhbndQshQfO3aMZ599tvBaSkoK3bp1o2HDhoXXWrVqJc6VEQShQoigci12L4M1L0BeCgRWg46ToUnfUld7LVmKAWrVqsWiRYtK/f0FQRBKSwSVktq9DFb9B9xnU7XknVS+hjIJLOcUN0vxxckkz/F6vUyePJm0tDQyMjK44447ePrppzl+/DgTJ07E7XZjMBh4/fXXcTqdTJo0CafTiV6vZ9q0aVSpUqXMXosgCDcPEVRKas0L5wPKOW67cr2UQeVcmhaXy8WBAwd46623WLBgAV26dKFTp06sXLmyMEv
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Mas Vnr Type\", s=15, data=df);"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [],
"source": [
"del df[\"Mas Vnr Type\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Miscellaneous Features\n",
"\n",
"This variable is basically a \"other\" field with no pattern."
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEICAYAAACXo2mmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzs3Xd409UawPFvdprVRQcdQFv2piBLQNlXhgzZiqK4uIrgVQGRrUwFHCA4Lg5EEBAnCip7DwWBslehtIWutM1sxu/+EQj0sgptZZ3P8/hYfj355aSUvDnrfWWSJEkIgiAIQgmQ3+oOCIIgCHcPEVQEQRCEEiOCiiAIglBiRFARBEEQSowIKoIgCEKJEUFFEARBKDEiqAiCIAglRgQVQRAEocSIoCIIgiCUGOWt7sA/rVGjRkRHR9/qbgiCINxRzpw5w7Zt267b7p4LKtHR0SxbtuxWd0MQBOGO0r179yK1E9NfgiAIQokRQUUQBEEoMSKoCIIgCCVGBBVBEAShxIigIgiCIJQYEVQEQRCEEnPPbSkWBOHO57DkY8s1o9RoCTAaUWm0t7pLwnkiqAiCcMc5tHUTf3wyC7lCwRPvzCYkKuZWd0k4T0x/CYJwR/F63KQe2n/+aw/m9LRrtpck6Z/olnCeGKkIgnBHkSuU3N/rMay5ZowhoURWrHzFdra8XPb8sQKXw0H9jl3QBQb9wz29N4mgIgjCHccUFk7nIcORKeSotQFXbJO8ZxebvpkPgDpAS6Nuvf/JLt6zRFARBOGOpNHrr/l9rdHo/1qMUv45IqgIgnBXikyoTK+xU3AXOIlMqHSru3PPEEFFEIS7UoDBSGz1mre6G/ccsftLEARBKDEiqAiCIAglRgQVQRAEocSIoCIIgiCUGBFUBEEQhBIjgoogCIJQYkplS/GyZcv47rvvAHA6nRw4cID58+czceJEFAoFzZo148UXX8Tr9TJu3DgOHTqEWq3mrbfeonz58uzevbtYbQVBEIRbRCpl48aNkxYtWiQ9/PDDUnJysuT1eqWnn35aSkpKklauXCkNHz5ckiRJ2rVrl/T8889LkiQVu+21dOvWrRRfrSAIwt2pqO+dpTr9tXfvXo4ePUrHjh0pKCigXLlyyGQymjVrxubNm/nzzz9p3rw5AHXr1mXfvn1YLJZitxUEQRBujVINKh999BEvvPACFosFg8Hgv67X68nPz7/sukKhKJG2giAIwq1Ramla8vLyOHHiBI0bN8ZisWC1Wv3fs1qtmEwmHA5HoeterxeDwVDstoIgCMKtUWojlR07dtCkSRMADAYDKpWKU6dOIUkSGzdupEGDBiQmJrJ+/XoAdu/eTeXKlUukrSAIgnBrlNpI5cSJE8TEXCzxOX78eF599VU8Hg/NmjWjTp061KpVi02bNtGnTx8kSWLSpEkl0lYQBEG4NWSSdG/V2uzevTvLli271d0QBEG4oxT1vVMcfhQEQRBKjAgqgiAIQokRQUUQBEEoMSKoCIIgCCVGBBVBEAShxIigIgiCIJQYEVQEQRCEEiOCiiAIglBiRFARBEEQSowIKoIgCEKJEUFFEARBKDEiqAiCIAglRgQVQRCuyuN23eouCHcYEVQEQbiM02bjyPbNrJg9k6yUU0he763uknCHKLV6KoIg3LmcNgs/zpgMksTZE8foPW4K+qDgW90t4Q4gRiqCcAlJkrCac7Cac/B6Pbe6O7eMXKFArQ0AQB8SglyhuMU9Eu4UYqQi3JPcBQXYLfnIgIDAIBTn3zRzz6WzaMwwvF4vfcZPJSQq5to3wrfuIFcokclkpdzrf06AKYjH3/6AzFPJRCZUIsBoutVdEu4QYqQi3JMykk/w38ED+ew/z5N7Lt1//djObVjNOdjzcjm4af1172M+m86KD99l35rfcFjyS7PL/yiFQkFgWAQJ9RuKaS/hhpTaSOWjjz5i9erVuFwu+vbtS8OGDRkxYgQymYxKlSoxduxY5HI5s2bNYu3atSiVSkaOHEnt2rVJTk4udltBuJaDWzbgcbvxuN2k7N9LSNloAOLqNWDL0oV4vV4qN7r/mvew5+fx6+zppB46wMFN64ipXgutwfhPdF8Qblul8u67bds2du3axcKFC5k/fz7p6elMnjyZoUOH8vXXXyNJEqtWrSIpKYnt27ezZMkSZsyYwfjx4wGK3VYQrqdOm4cwlgkjJDqGuLr1/deDIsoyYMYcnnr3I4Kjo695D7lCiSk0HACFSoVSpS7VPgvCnaBURiobN26kcuXKvPDCC1gsFoYNG8bixYtp2LAhAC1atGDTpk3ExcXRrFkzZDIZUVFReDwesrOzSUpKKlbbtm3blsbLEu4iwZFleXTiDJDJ0AcG+a/LFQoMwSFFuodGp6PlgGeo/kArgiOjxLqDIFBKQSUnJ4fU1FTmzp1LSkoKgwYNQpIk/0KmXq8nPz8fi8VCUNDFf9AXrhe3rSBci8flwuV0ojWa/Av0N0sXGFRopCMI97pSmf4KCgqiWbNmqNVq4uPj0Wg0hd7srVYrJpMJg8GA1WotdN1oNBZaE7mZtoJwNQV2G4e3buT7d97k1N7duAuct7pLgnBXKZWgUr9+fTZs2IAkSZw9exa73U6TJk3Ytm0bAOvXr6dBgwYkJiayceNGvF4vqampeL1eQkJCqF69erHaCsLVOO12fpk9gzMHkvhxxiScl3xQuRfZ8/Ow5ppvdTeEu0ipTH+1bNmSHTt20KNHDyRJYsyYMcTExDB69GhmzJhBfHw87du3R6FQ0KBBA3r37o3X62XMmDEADB8+vFhtBeFq5HI5QeGRmM+mEVYuDlkpH+pz2m3kpKaQk5ZK+Vr10AUGlurz3QirOYef3p2C02qly6tvEBRR9lZ3SbgLyCRJkm51J/5J3bt3Z9myZbe6G8ItZDXnYMnJxhASWmiRvjTkpKcyb+hzIEnUatWOVk8+h1KtKdXnLKr9G9bw66zpANRp24E2T//7FvdIuJ0V9b1TnKgX7jn6oOB/7ECfu6AAzn9uc9iseG+jxIyRCZVQqjW4XQVUvK/xre6OcJcQQUUQSpEhJJQOL77K2ZPHaNCpmz+f1u3AFBbBwPc+xuv1oNWLQ5tCyRBBRRBKUYDBSLXmD1Kt+YO3uiuXUapUGEJCb3U3hLuMCCqCINy2XC4XKSkpOByOW92Ve4ZWqyUmJgaVSnVTjxdBRRCE21ZKSgpGo5EKFSrcVVmgb1eSJJGVlUVKSgpxcXE3dQ+ReVG4513YDeZ2lW7pXFteLke2bSb96GGctnv7fExRORwOQkNDRUD5h8hkMkJDQ4s1MhQjFeGelp+dyaIxw7Dl5dJz1ESiKldF8nqx5ppxOR1o9YYSyenl9bjZ+dMydvz4LQBPvD0bTTl9se97LxAB5Z9V3J+3GKkI97Szx46Ql3EOt9PJrpU/4/V4sOaamT/8JeYNeZYdPy2joATm8yWvhC031/9np91W7HsKpW/btm1UqVKF5cuXF7reuXNnRowYAcCLL7540/cfMWIEnTt3pn///v7/UlNTb+geqamprF69+qb7UNLESEW4p0XEV8IYWgZ7Xh5123VArlBQYLdhO5+6JHnPrvNbgbXFeh6FSkXzfk+g0esJLhtdpIqSJcWWl0vq4YMERURiCoso9mu518THx7N8+XI6duwIwKFDh7Db7f7vz5o1q1j3f+2112jRosVNP37r1q0cP36cVq1aFasfJUUEFeGeZgwtw6OTZiJJElqDAQCtwUiddh04c3A/bQb+G63eUCLPpQ8KpuUTz5TIvYrK5XSycdF89q5agUwmZ+D7n4igcoOqVq3KiRMnyM/Px2g08uOPP9K5c2fS0tIAuP/++9m0aRMLFizg+++/Ry6XU6tWLUaNGsXJkycZNWoULpcLrVbLzJkzCQm5fmmFQ4cO8dZbbwG+BL2TJk1Cp9MxZswY0tPTOXfuHK1ateKll17i448/xuFwUK9ePT7//HPGjRtHQkICCxcuJDMzk27dujFo0CCCgoJo0aIFLVq0uOzeRmPJnVMS01/CPU8fFIwhOMRfZEtnCqRFvyfpOeo
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Misc Feature\", s=15, data=df);"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {},
"outputs": [],
"source": [
"del df[\"Misc Feature\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Roof\n",
"\n",
"Roofs in Ames, IA, are not special enough to make a difference in the price. Even \"hip\" roofs seem already priced in bigger houses."
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEICAYAAACXo2mmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzsnXd0VFXXh5+Z9F4INUAg9CJdOiggiDQRxIAQQAREARFEQZCiL2oUEBU/ihTRoDSJIiCCgkgPivTeIRCSkJA2aZOZ+/2xkwmBEFIm1POslZXJnTPnnhvC3ffs8ts6TdM0FAqFQqGwAvr7vQCFQqFQPDooo6JQKBQKq6GMikKhUCishjIqCoVCobAayqgoFAqFwmooo6JQKBQKq6GMikKhUCishjIqCoVCobAayqgoFAqFwmrY3u8F3GuaNGmCr6/v/V6GQqFQPFRcuXKF0NDQu4577IyKr68vISEh93sZCoVC8VDRo0ePPI1T7i+FQqFQWA1lVBQKhUJhNZRRUSgUCoXVeOxiKgqF4tHHaDQSFhZGSkrK/V7KQ4ejoyNly5bFzs6uQJ9XRkWhUDxyhIWF4ebmRoUKFdDpdPd7OQ8NmqYRHR1NWFgYFStWLNAcyv2lUCgeOVJSUihWrJgyKPlEp9NRrFixQu3w1E5FoVA8dCQnJ5OYmIi9vT1OTk7Y29vfNkYZlIJR2N+bMioKheKh4+jRo6xbtw69Xs8bb7yBj4/P/V6SIgPl/lIoFA8VJpOJy5cvA2A2m4mJicn9A5p2D1aVndDQUJo1a0ZgYCCBgYH06NGDN998k7S0tHzPtXTpUp577jl+++23bPNXq1aN9evXZxvbtWtXxo8ff8e5UlNTWbVqFQCzZ89m2bJl+V7P3VBGRaFQPFTY2NjQpk0bKlWqRP369e8su2RKh4RrkBAOJuO9XSTQtGlTgoODCQ4OJiQkBDs7O7Zs2ZLveTZt2sQXX3xBp06dsh339/fPZlROnjxJcnJyrnNFRUVZjEpRodxfCoXiocPT05NevXqh1+tzjKcAkBovBgVApwe3UvdugbeQlpZGZGQkHh4eAAQFBbFv3z4AunTpwoABAwgLC2PChAmYTCZ0Oh3vv/8+Bw8e5NixY0ycOJFZs2ZRrlw5y5zVq1fn/PnzJCQk4Obmxq+//krXrl0JD5drXrp0KZs2bSI5ORkvLy++/vpr5s2bx5kzZ/j666+L7FrVTkWhUDyUODo63tmgAOhtc359j9izZw+BgYF06tSJHj160L59e5o1a8Zff/1FWFgYK1eu5Mcff2TdunWcPHmSzz77jP79+/PDDz8wceJEJkyYQEBAADVq1ODTTz/NZlAy6dChA5s2bULTNA4dOkT9+vUBcQvGxsayZMkSVq1ahclk4vDhwwwbNozKlSszYsSIIrtutVNRKBSPJnbOUKwKaGZ5fY9p2rQps2bN4saNGwwaNIiyZcsCcPbsWRo1aoROp8POzo66dety9uxZzp49y5NPPglAjRo1uHbt2l3P0bVrV6ZOnUq5cuVo1KiR5bher8fOzo4xY8bg7OzMtWvXSE9PL5oLvQW1U1EoFI8mNrbg4AqO7vL6PuHl5cX06dN5//33iYyMpFKlShbXl9FoZP/+/fj5+VGpUiX+/fdfAI4fP56njLZy5cqRlJREcHAw3bp1sxw/ceIEf/75J1988QWTJk3CbDajaRp6vR6z2Vw0F5qB2qkoFApFEVO5cmUCAwOZNm0aX331FXv37iUgIACj0UjHjh2pVasW7777LpMmTWLx4sWkp6fz0Ucf5WnuTp06sWbNGipWrGjJivPz88PJyYnevXsDULx4cSIjI6lfvz5Go5Hp06fj6OhYJNeq07T7kG93H+nRo4fqp6JQPOIcP36cGjVq3O9lPLTk9PvL671Tub8UCoVCYTWUUVEoFAqF1VBGRaFQKBRWQxkVhUKhUFiNIsn+CgkJ4eeffwZEa+b48eMEBwfz0UcfYWNjQ8uWLRkxYgRms5mpU6dy8uRJ7O3tmTZtGn5+fhw4cKBQYxUKhUJxn9CKmKlTp2rLly/XunXrpl28eFEzm83a4MGDtaNHj2obN27Uxo0bp2mapu3fv18bNmyYpmlaocfmxgsvvFCEV6tQKB4Ejh07dr+X8FCT0+8vr/fOInV/HT58mDNnztC5c2fS0tIoX748Op2Oli1bsmvXLvbt20erVq0AqFevHkeOHCExMbHQYxUKheJ+c/r0aYYOHUpgYCA9e/bkq6++Ys+ePYwePTrH8Tt37rSoGteuXdvy+siRI4wePZq0tDTGjx/Ptm3b7vGV5I8iLX6cP38+w4cPJzExEVdXV8txFxcXLl++fNtxGxsbq4xVKBSK/PDL/itM33iSq7HJlPF04p1nq9G9/h3Uj/NAfHw8Y8aMYfbs2VSoUAGTycSoUaMoXrz4HT/TokULWrRoYXkdHBxseW/WrFkFXsu9psh2KvHx8Zw/f56mTZvi6uqKwWCwvGcwGHB3d7/tuNlstspYhUKhyCu/7L/CeyGHuRKbjAZciU3mvZDD/LL/SoHn3Lx5M02aNKFChQqAPATfLAq5Y8cO3nzzTcv43r17ExERccf52rZtS2pqquVno9HIhAkT6Nu3L3369CE0NLTAa7U2RWZU/vnnH5o1awaAq6srdnZ2XLp0CU3T2LFjB40aNaJBgwaWrdyBAweoWrWqVcYqFApFXpm+8STJRlO2Y8lGE9M3nizwnJGRkbepCru4uGBnZwfITuTUqVPExcVx+vRpvLy8KFmyZJ7nX7VqFV5eXvzwww/MmTOHDz/8sMBrtTZF5v46f/68RZUT4IMPPmDs2LGYTCZatmxJ3bp1eeKJJ9i5cye9e/dG0zQ+/vhjq4xVKBSKvHI1NufGVnc6nhfKlCnDsWPHsh27fPky//zzDyB94Lt168a6desICwvjxRdfzNf8p06dYt++fRw6dAiA9PR0YmJi8Pb2LvCarUWRGZXBgwdn+7levXqsXLky2zG9Xp+jhS3sWIVCocgrZTyduJKDASnj6VTgOdu0acP8+fPp06cP5cuXx2g0EhQURPPmzS1jevbsydixY0lOTubtt9/O1/z+/v6UKlWKYcOGkZKSwty5c/H09Czweq2JKn5UKBSPNe88Ww0nO5tsx5zsbHjn2WoFntPV1ZWgoCDef/99AgMDCQgIoHr16lSqVMkypmTJkri4uNCsWTNsbfP3fN+7d2/OnTtHv3796N27N76+vuj1D8btXEnfKxSKx5rMLC9rZn8B1K5dm++///62402bNrW81jQtR9fXzp07s/2c2ds+KCjIcuyzzz4r1PqKCmVUFArFY0/3+r6FNiL5ISUlhZdffpkmTZrg5+d3z857L1BGRaFQKO4xjo6Oj2xfpwfDCadQKBSKRwJlVBQKhUJhNZRRUSgUCoXVUEZFoVAoFFZDBeoVCoWiCDh9+jTTp08nOTmZpKQknnrqKUaOHIlOp7PaOWJiYpgyZQoGg4GkpCQqVarEpEmTuH79OmPGjLkvheFqp6JQKBSHVsKs2jDVU74fKtzNOFOleMKECQQHB7Ny5UpOnTrF8uXLrbRgYeHChTRv3pzFixezfPlynJ2drX6O/KJ2KgqF4vHm0EpY+yYYM6Ra4i7LzwB1XirQlHdSKbazsyMoKIh9+/YB0KVLFwYMGMD48eOxtbXl6tWrpKWl0alTJ/766y/Cw8OZM2cO4eHhzJs3D71eT1RUFAEBAfTt2xcfHx82btyIn58fDRo0YNy4ceh0OsLDw4mJieGNN94gKiqKatWqMW3aNC5evGg5l6+vL1euXMkmsW8N1E5FoVA83mz+MMugZGJMluMF5E4qxTt37iQsLIyVK1fy448/sm7dOk6eFDVkX19fFi9ejL+/P2FhYSxYsIAOHTpYqukjIiKYO3cuK1euZMmSJURHRzNw4EC6dOnCokWLaNWqFSNGjCAyMhKAxMREPvnkE1asWMHu3buJjo7ms88+Y9iwYQQHB9OgQYMCX19uKKOiUCjujCntfq+g6IkLy9/xPFCmTBmuXbuW7djly5c5evQ
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Roof Matl\", s=15, data=df);"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEICAYAAACXo2mmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzs3XlcVFX/wPHPnY0Bhh0REETAJVNRyUJ/oT1qaquVZS6lmWXp49YqLrlvmaktlllpbmUuWW49ZWnlmmZq7pqEKLLvDAzDLPf3x8VRcgNhBPW8Xy9fDXfOvffMNMyXs32PJMuyjCAIgiBUAVV1V0AQBEG4dYigIgiCIFQZEVQEQRCEKiOCiiAIglBlRFARBEEQqowIKoIgCEKVEUFFEARBqDIiqAiCIAhVRgQVQRAEocpoqrsCN1pMTAx16tSp7moIgiDcVM6dO8fu3buvWe62Cyp16tRhzZo11V0NQRCEm0q3bt3KVU50fwmCIAhVRgQVQRAEocqIoCIIgiBUmdtuTOVyLBYLSUlJFBcXV3dVbmp6vZ6QkBC0Wm11V0UQhGoiggqQlJSEh4cH9erVQ5Kk6q7OTUmWZbKyskhKSiI8PLy6qyMIQjUR3V9AcXExfn5+IqBUgiRJ+Pn5idaeINzmREullAgolSfeQ+GGMeWAMQN0buDqq/xXqBFES0UQhJvPke/go7vh/eaQf666ayNcRASVGmD37t20adOGPn360KdPH7p168awYcMoKSmp8LWWLVvGgw8+yPfff1/m+Keffkq/fv149tln6dOnD4cPHwbgxIkT/PHHH1e8XocOHTCbzRWuhyA4jc0KZ0tXdtutkJ1w1eKyLN+ASgnnie6vGqJ169bMmTPH8fPrr7/Oli1beOCBByp0nU2bNvHee+/RqFEjx7FTp06xZcsWli9fjiRJHDt2jLi4ONatW8emTZvw9/fn7rvvrrLXIghOpdZA+zFgTAfPYKgTfdli1pwccr9egb2oCN9+z6Hx87vBFb09iaBSA5WUlJCeno6XlxcAb7/9Nn/++ScAjzzyCM899xxJSUmMHj0am82GJEm89dZb/PXXXxw9epQxY8YwZ84cQkNDAfDw8CA5OZnVq1fTrl07GjduzOrVq0lLS+Pbb79Fq9XSpEkTJk2axOrVqwF45ZVX6N+/v6NOKSkpjB07FrPZjIuLC5MnTyYoKOgGvzOCUMo7FLp/ASoN6NwvW6Rw+w4y3n8fAJW7O/4DX76RNbxtiaBSQ/z+++/06dOHrKwsVCoVTz/9NG3atOGXX34hKSmJlStXYrVa6d27N61bt+ajjz6ib9++3H///Rw7dozRo0ezZs0aNmzYwIQJExwBBaB27drMmzePZcuW8dFHH6HX63n11Vfp0qULTzzxBP7+/kRFRaHX6zl16hT+/v4kJSURFRXluMaMGTPo06cP9913H7t27eLdd99l1qxZ1fFWCYJC73XVp9Xe3hcei1bKDSOCSg1xvvsrJyeH/v37ExISAkB8fDytWrVCkiS0Wi3NmzcnPj6e+Ph4R5dV48aNSU1NveK1ExMTMRgMTJ8+HYBDhw4xYMAAYmJiypTr3r07a9asITg4mK5du5Z57uTJk8yfP5/PP/8cWZbRaMRHR6jZ9FHNqLt0CXJxMfqmTau7OrcNMVBfw/j4+DBz5kzeeust0tPTiYyMdHR9WSwW9u/fT1hYGJGRkezduxeAY8eO4e/vf8VrnjhxgkmTJjkG/sPDw/H09EStViNJEna7HYAHHniAHTt28NNPP10SVCIiInjjjTdYunQpEydOrPBYjyDcaBovL9zvvhtD27ZofHyquzq3DfHnZg1Uv359+vTpw5QpU/jggw/Ys2cPPXr0wGKx8MADD9CkSRNGjBjB2LFjWbhwIVarlalTp17xep07dyY+Pp6nnnoKNzc3ZFlmxIgReHh40LRpU9555x0iIyNp3bo1d999N9nZ2Xhf1HUAEBcXx4QJEzCbzRQXFzNmzBhnvw2CINyEJPk2m2/XrVu3S/ZTOXbsGI0bN66mGtUsEydOpHPnzrRp0+a6zhfvpSDcmi733Xk5ovtLcOjfvz/5+fnXHVAEQRBE95fgsHDhwuqugiAINznRUhEEQRCqjAgqgiAIQpVxSvfXmjVr+PbbbwEwm80cO3aMpUuXMnXqVNRqNbGxsQwZMgS73c6ECRM4ceIEOp2OKVOmEBYWxoEDBypVVhAEQagmspNNmDBB/vrrr+WuXbvKiYmJst1ul1988UX5yJEj8o8//ijHxcXJsizL+/fvlwcOHCjLslzpslfzxBNPXHLs6NGjVfmSb2vivRSEW9Plvjsvx6kD9YcOHeLUqVO8/vrrLFq0iLp16wIQGxvLzp07ycjIoG3btgC0aNGCw4cPYzQaKSkpqVTZO++805kvyymSkpLo2rUrTZo0cRyLiYkhPj6+TKLJi5nNZtatW0f37t1vVDUFQRCuyqlBZf78+QwePBij0YjBYHAcd3d35+zZs5ccV6vVVVLW2b7bf46ZP54gOddEsLcrb3ZpxOMt61T6uvXr12fp0qWOn3fv3k18fPwVy2dkZLBq1SoRVARBqDGcNlCfn59PQkICrVu3xmAwUFhY6HiusLAQT0/PS47b7fYqKetM3+0/x6g1hziXa0IGzuWaGLXmEN/td+5GQcuWLaNv3750796dl156iZKSEj755BNOnTrF3LlznXpvQRCE8nJaUPnjjz8ci+gMBgNarZYzZ84gyzLbt2+nVatWREdHs3XrVgAOHDhAw4YNq6SsM8388QQmi63MMZPFxswfT1T62qdOnXJs1NWnTx/S0tIAJYDm5uayaNEiVq1ahc1m49ChQwwcOJD69euLyQmCINQYTuv+SkhIcGTaBSX9xxtvvIHNZiM2NpbmzZvTrFkzduzYQc+ePZFlmWnTplVJWWdKzjVV6HhFXK77C0ClUqHVannttddwc3MjNTUVq9Va6fsJgiBUNacFlRdffLHMzy1atGDlypVljqlUKiZNmnTJuZUt60zB3q6cu0wACfZ2ddo9jx8/zs8//8yqVaswmUx069YNWZZRqVSODMOCIAg1gVj8WEFvdmmEq1Zd5pirVs2bXRpd4YzKCwsLw9XVlZ49e/L8889Tq1Yt0tPT8fPzw2KxMHPmTKfdWxAEoSJE7q8KOj/Lq6pnf4WEhFzS4oqJiXFspLVkyZLLnrd27dpK3VcQBKEqiaByHR5vWadKphALgiDcakT3lyAIglBlRFARBEEQqowIKoIgCEKVEUFFEARBqDIiqAiCIAhVRsz+qkHOnj3LzJkzSU1NRa/Xo9frefPNN2nQoMFly997773s2LGjzLEPP/wQf39/evXqdSOqLAiCUIYIKtfj4ErYPAnyksArBDqOg6inK3VJk8nEoEGDmDx5Mi1btlRuc/AgkyZNKpO6RRAEoSYTQaWiDq6E9cPAUpqqJe+s8jNUKrD88ssvtG7d2hFQAKKioliyZAknT57k7bffxmazkZOTw4QJE4iOjqakpIRXX32VlJQUGjVqxIQJE8pcc9asWezduxe73U6/fv148MEHr7t+giAI5SGCSkVtnnQhoJxnMSnHKxFUkpKSHJuNAQwaNAij0Uh6ejoDBw4kLi6ORo0asX79etasWUN0dDTFxcW88cYb1KlTh+HDh7NlyxbH+b/99htJSUksX74cs9nM008/zb333uv0rQEEQbi9iaBSUXlJFTteToGBgRw+fNjx87x58wB4+umnCQ0N5eOPP0av11NYWOjYmCw4OJg6dZSV/S1btiQhIcFx/smTJzly5Ah9+vQBwGq1cu7cORFUhAqxWyyotNrqroZwExGzvyrKK6Rix8upY8eO7Nq1iwMHDjiOJSYmkpqayogRIxg2bBgzZsygYcOGyLIMQGpqKunp6QDs27evzIB+REQEMTExLF26lMWLF/Pggw8SGhpaqToKtw+b0Uj+Tz+RMnIk5vh4ZJENWygn0VKpqI7jyo6pAGhdleOV4O7uzrx585g1axbvvvsuVqsVtVrNqFGjSE1NZfjw4Xh6ehIYGEhOTg4
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Roof Style\", s=15, data=df);"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [],
"source": [
"del df[\"Roof Matl\"]\n",
"del df[\"Roof Style\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Sale Info\n",
"\n",
"Partial and abnormal (= foreclosure) sales seem to make a change with higher and lower prices respectively. These two types will be encoded in factor variables *partial_sale* and *abnormal_sale*. The impact seems to be not big though."
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"Normal 2396\n",
"Partial 233\n",
"Abnorml 189\n",
"Family 46\n",
"Alloca 22\n",
"AdjLand 12\n",
"Name: Sale Condition, dtype: int64"
]
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"Sale Condition\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEICAYAAACXo2mmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzs3XmcjWX/wPHP2ffZjTEzjBljzzbmsWQolPyikhQtSptURBEi65PSIhUlKdWDCKkUkTVZsssuxhiN2fc5+3b//jgcJozRzFiv9+vVqzP3uc51rhkz53tf2/eSSZIkIQiCIAiVQH61GyAIgiDcOERQEQRBECqNCCqCIAhCpRFBRRAEQag0IqgIgiAIlUYEFUEQBKHSiKAiCIIgVBoRVARBEIRKI4KKIAiCUGmUV7sBV1rr1q2Jioq62s0QBEG4rpw6dYqtW7destxNF1SioqJYsmTJ1W6GIAjCdaVnz57lKieGvwRBEIRKI4KKIAiCUGlEUBEEQRAqjQgqgiAIQqURQUUQBEGoNCKoCIIgCJXmpltSLAjC9a/Q6iTP7ESnVhCsV6FTi4+ya4XoqQiCcN1Zvi+Dzu//Rod31pFRZL/azRHOIYKKIAjXFbfHy87UAt9jr8SJPGvZL5CkK9Aq4QzRZxQE4bqiVMh55c765JQ4qBGoo1l04IULWvJg52xwWqHtC2CodmUbepMSQUUQhOtOVLCO6Y8koFTI0F9sPiV5Lax9w/dYbYAOw65cA29iIqgIgnBdCtCpyi6gDz77WPRSrhgRVARBuDFFtoR+y8Ftg8gWV7s1Nw0RVARBuDHpg6F2u6vdipuOWP0lCIIgVBoRVARBEIRKI4KKIAiCUGlEUBEEQRAqjQgqgiAIQqURQUUQBEGoNFWypHjJkiV8//33ADgcDg4dOsScOXOYNGkSCoWCpKQkBg4ciNfrZfz48Rw5cgS1Ws0bb7xBTEwMe/bsqVBZQRAE4SqRqtj48eOlBQsWSPfee6+Umpoqeb1e6ZlnnpEOHDggrVy5UhoxYoQkSZK0e/duacCAAZIkSRUuW5b777+/Cr9bQRCEG1N5PzurdPhr3759HDt2jG7duuF0OqlVqxYymYykpCQ2b97Mzp07ad++PQDNmzdn//79mM3mCpcVBEEQro4qDSozZ87kxRdfxGw2YzQa/dcNBgMlJSXnXVcoFJVSVhAEQbg6qixNS3FxMSkpKbRp0waz2YzFYvE/Z7FYCAgIwG63l7ru9XoxGo0VLisIgiBcHVXWU9m+fTtt27YFwGg0olKpOHnyJJIksXHjRhITE0lISGDDhg0A7Nmzh3r16lVKWUEQBOHqqLKeSkpKCtHR0f6vJ0yYwLBhw/B4PCQlJdGsWTOaNGnCpk2b6NOnD5Ik8eabb1ZKWUEQBOHqkEnSzXXWZs+ePVmyZMnVboYgCMJ1pbyfnWLzoyAIglBpRFARBEEQKo0IKoIgCEKlEUFFEARBqDQiqAiCIAiVRgQVQRAEodKIoCIIgiBUGhFUBEEQhEojgoogCIJQaURQEQRBECqNCCqCIAhCpRFBRRAEQag0IqgIgnBRLo/3ajdBuM6IoCIIwnnMdhcr9mcydOGfHM0qweu9qZKZCxVQZeepCIJw/SqyuXl+3k4kCfafKuLb59pQzaS92s0SrgOipyII55AkiewSOzkldjw38d25UiHDqPbdc4YHaFDKxUeFUD6ipyLclNxOJzZzCTJAFxiEQqEA4GS+lV6fbsHrlVg4oC11qhkvWZfL40UplyGTyaq41VdOqEHNL4PbcySrhGbRQQQb1Fe7ScJ1Qtx+CDelnNQUvhj0NF++MoCi7Ez/9dWHsskpcZBncbJ0T/ol6zmZZ2Howj9ZuONvimyuqmzyFaVUyIkO0dO5YXXCTJqr3RzhOlJlPZWZM2eydu1aXC4XDz/8MK1atWLkyJHIZDLq1q3LuHHjkMvlTJ8+nfXr16NUKhk1ahRNmzYlNTW1wmUFoSyHt/yOx+3G43aTdnAfITWiAOhYvxofrlbi8Urc3aRGmXUUWJ28vPBPdqYWsPTPdFrHhRKoU12J5gvCNatKPn23bt3K7t27mT9/PnPmzCEzM5O33nqLIUOG8M033yBJEmvWrOHAgQNs27aNRYsW8f777zNhwgSACpcVhEtpdsf/YQqrRkhUNLHNW/qvx4ToWf3Kbawbdjtx1Qxl1qGSy4gK8k1ea5RytEpxMyMIVdJT2bhxI/Xq1ePFF1/EbDYzfPhwFi5cSKtWrQDo0KEDmzZtIjY2lqSkJGQyGZGRkXg8HvLz8zlw4ECFyt55551V8W0JN5DgiBo8Oul9kMkwBAb5rysUcsIDyrfKyahVMe6exjyQEE3tMAPBejHvIAhVElQKCgpIT0/n008/JS0tjeeffx5JkvwTmQaDgZKSEsxmM0FBZ/+gz1yvaFlBKIvH5cLlcKA1Bfgn6P+tUKOG2+qHV1LLBOH6VyX99aCgIJKSklCr1cTFxaHRaEp92FssFgICAjAajVgsllLXTSZTqTmRf1NWEC7GabPy1x8b+eG9/3Jy3x7cTsfVbpIg3FCqJKi0bNmS33//HUmSyMrKwmaz0bZtW7Zu3QrAhg0bSExMJCEhgY0bN+L1eklPT8fr9RISEkKjRo0qVFYQLsZhs7H84/c5degAS99/E8c5Nyo3owKLk1yzCKxC5amS4a+OHTuyfft2evXqhSRJjB07lujoaMaMGcP7779PXFwcd911FwqFgsTERHr37o3X62Xs2LEAjBgxokJlBeFi5HI5QeERFGZlUK1WLLIKDn9disNmpSA9jYKMdGKatEAfGFil73c5ckrsvPjNboptLmb2bUlMaNkLEwShPGSSJN1U24Z79uzJkiVLrnYzhKvIUliAuSAfY0hoqUn6qlCQmc7sIc+BJNGkUxc6PfkcSvW1se/jh92nGPLtHgAea1OLN3o0ucotEq5l5f3sFDvqhZuOISgYQ1DwFXkvt9MJp+/b7FYLXu+1k/W3SXQgWpUch9vLnY0irnZzhBuECCqCUIWMIaHcPXAYWSeSSex+P2qt7mo3ya9msI71wzrikSSxaVOoNCKoCEIV0hlNNGx/Ow3b3361m3IetVJBRGDVzikJNx8RVARBuKpcLhdpaWnY7far3RQB0Gq1REdHo1L9u96rCCqCIFxVaWlpmEwmateufUNler4eSZJEXl4eaWlpxMbG/qs6RLIi4aaXU+Igu9iO0+2p0vexFhdxdOtmMo/9hcN6c++POZfdbic0NFQElGuATCYjNDS0Qr1G0VMRbmqZRXZ6fbqZPLOTec+2JqFWMHi9YMkGpxV0gaAPrfD7eD1udvy0hO1LvwPgiXc/RlNL7As5QwSUa0dF/y1ET0W4qe1NKyStwIbN5eHrzSdwe04HlE+TYFoL2DQNnBXvVUheCWtRkf9rh81a4TqFyvPZZ5/Rr18/HnvsMfr27cv+/fsvWjYtLY2HHnqo3HWvXr2avn370rdvXx588EFWrFhR4fa2a9fO3+69e/ficDhYtGgRAEuWLLmq2dpFT0W4qTWJDqRGoJZ8i5O+bWJQKuTgKAFLjq/A8XVw60BQV6xXoVCpaP/IE2gMBoJrRBESGV0JrS8fa3ER6X8dJqh6BAHVqqPWirPmz3Xs2DHWrl3L/PnzkclkHDp0iBEjRrB06dIK171r1y6++uorZs6cicFgoKCggN69exMfH098fHyF6+/fvz/gC3SLFi3iwQcfpGfPnhWutyJEUBFuajUCdSwdmIRXks6mrtcFw3+egZNboNsU0FbORklDUDAdn3i2UuoqL5fDwcYFc9i3ZgUymZynP5olgso/mEwm0tPTWbx4MR06dKBhw4YsXrwYgG3btjF9+nQkScJisTBlypRSq6K2bdvG1KlTUSgU1KxZk4kTJ5Z6ftGiRTzxxBMYDL6bkuDgYBYtWkRAQADFxcW8+uqrmM1mPB4PgwcPpm3bttxzzz20atWKI0eOIJPJ+OSTT9Dr9YwZM4Zjx45Rs2ZNnE4nACNHjuTuu+/m119/5dixY/62hoWF8fDDDzN58mR27tw
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Sale Condition\", s=15, data=df);"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABZ4AAALWCAYAAADGRoYRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzs3XlcVPX+x/H3wAiigITdSrNSy6FG3Ek0ccFMI7XESrPStNVWNSvNG5qX1Ost0Vtmyy3NMk1xIbPM61ZqJppFqbh0zSUVV0Q2Wef8/uA3kyOLgAMj8no+Hj6Qc75z5jNn8MPxM9/z+ZoMwzAEAAAAAAAAAICLeLg7AAAAAAAAAADA5YXCMwAAAAAAAADApSg8AwAAAAAAAABcisIzAAAAAAAAAMClKDwDAAAAAAAAAFyKwjMAAAAAAAAAwKUoPFczq1ev1lNPPaX27dsrODhYYWFhevrpp7V69eqLOu7ixYsVFBSkTz75xDWBniczM1Pz58/Xww8/rLCwMAUHBys8PFyjR4/W77//XiHPWVYhISHq2rWr4/v4+HgFBQVpwoQJTuM2bNig33777YLjqprRo0crKChIo0aNKnbMzp07FRQUpNGjR1diZBWja9euCgkJcXcYcCPyacWp7vn0fD169FBQUJDeeOONYsdMnTpVQUFBWrt2bSVGVrkGDBigoKAgZWRkuDsUVBLybMUhzzorTZ4tSatWrXTHHXc4vo+NjVVQUJDmzJnj2GbPYUePHr3oeF0tOTlZQUFBGjx4sLtDwSWGPFxxyMPOynO9m5eXp6CgIPXt27eywkQ5UHiuRqKjo/XMM8/o999/1+23364hQ4botttu088//6xnnnlGUVFR7g6xSP/73//Ut29fjR07VqdPn1aXLl00cOBA3XTTTYqLi1NkZKRWrVrl7jALufbaa/Xcc8+pY8eOjm1z587VY489puPHj5c4riqLi4vTxo0b3R0GUKHIp5WruuZTSfrll1+0f/9++fj46KuvvlJOTo67QwIqBXm2cpFnXZtnrVarnnvuOTVv3twFEQLuQR6uXORhrncvV2Z3B4DKER8frzlz5qhHjx6KiYmR2fzXW5+WlqZBgwZpwYIF6ty5s7p16+bGSJ0lJydr8ODBSk5OVnR0tO6//36ZTCbH/oSEBD366KMaPny4YmNjdcstt7gxWmcNGjTQ888/77Tt1KlTpRpX1Y0bN05fffWVatas6e5QAJcjn1a+6pxP4+LiZDKZ9Oijj+rdd9/VihUr1Lt3b3eHBVQo8mzlI8+6Ns82bdpUTZs2dVGEQOUjD1c+8jDXu5crZjxXE999950k6aGHHnL6pSFJfn5+GjlypCRp5cqVlR1aiSZPnqwTJ07ohRdeUL9+/Zx+aUhSy5YtNWrUKOXm5urDDz90U5Q4l9Vq1cGDB/XOO++4OxSgQpBPUVlycnK0fPlyWa1Wx3sWGxvr7rCACkeeRWUhzwJFIw+jspCHL38UnquJ3NxcSdKePXuK3B8SEqJp06YV6uuVnJysyZMnKyIiQi1atFCLFi3Us2dPvf/++8rLy7vg8544cUKvv/66OnXqpODgYHXt2lVvvvmm0tPTL/jY9PR0ffvtt6pdu7YGDRpU7Lg+ffpo2LBheuihh5y2Hz9+XGPHjlXnzp0VHByszp07a+zYsU63qUjSO++8o6CgIO3du1cxMTHq0qWLgoOD1bNnT82bN6/Q8yUnJ2v8+PHq2LGjWrRoocGDB2v37t2Fxp3fe2ngwIGaPn26JOnZZ59VUFBQkePs9u3bp5deekm33XabgoOD1a1bN/3rX/9SWlqa0zh7b+UzZ85o3Lhx6tChg5o1a6a+fftqxYoVxZ638+O80J/4+PgLHkuSXn75ZV1xxRX65JNPtHPnzlI9JicnR++//77uuusuBQcHKzQ0VE8//bS2bdvmNM7eC2z58uV67LHH1KxZM4WHh+vPP//U6NGjZbVadfr0ab322mtq166dWrVqpccee0wHDx5UTk6O3nzzTYWFhal169YaOHCgdu3aVSiWtWvX6vHHH1e7du3UtGlTtWvXTs8880ypXwsuf+RT8mlxXJ1P165dqzNnzigsLEzXXHONWrVqpc2bN+vgwYPFPubs2bOaMGGC2rdvr1atWmnQoEHasmWL05iNGzcqKChIX375pWJjY9W7d281a9ZMnTt31r/+9S9lZWUVOu6yZcvUv39/tWzZUq1atdIDDzyg5cuXO405cOCAgoKCNH36dI0fP14tW7ZUu3bt9N///tfR7/Snn37Se++9p65du6p58+aKjIzUDz/8IElasGCB7rzzTrVo0UK9e/fWf//731KdJ1x+yLPk2eK4O88mJydr3LhxCgsLU8uWLTVkyJAi+8UW1eO5rH766Sc9++yz6tChg4KDg9W2bVs9+uij2rx5s9O4l156SVarVSkpKRo7dqw6dOig5s2b69577y2yncCff/6pkSNHOn5PvPDCCzp27Fi548TliTxMHi6Ou/NwaWRnZ2vGjBmKiIhw1BeeffZZ7dixo8jxsbGxuu+++9SqVSuFhYXp2WefLfSzn56erunTp+vuu+9Wq1at1KxZM3Xv3l1vvfWWzp49W+5YqwNabVQTHTp00GeffabJkydr//796tWrl5o3by5PT09JUs2aNRUREeH0mLS0NPXr109JSUnq2rWrunXrpuTkZK1cuVJTp07VmTNnSlxI7siRIxowYICOHTum8PBw3Xjjjdq5c6c++ugjbdy4UZ9//rlq1apV7OM3b96srKwsdezYscRx3t7eeuaZZ5y2HTx4UAMGDNDJkyd12223KSIiQrt379b8+fO1Zs0azZs3T9ddd53TY15++WUdOXJE3bt3l9ls1tKlS/X666/L09NT/fr1kyRlZGTo4Ycf1t69e9W+fXtZLBZt3rxZAwcOVFZWlvz9/YuNMzIy0vG67rrrLjVu3LjYsb/++qsGDx6srKwshYeH67rrrlNCQoI+/vhjrV27VvPmzVNAQIDTY4YMGaKUlBRFREQoMzNTX331lYYNG6aPPvpIYWFhxT6XvUfUhVx77bUXHCNJV1xxhV599VW98soreu2117RgwQLHz1lRsrOzNWTIEG3dulUWi8Xxvq1atUrr16/XtGnTCt2+9cYbb+iqq67SwIEDdejQIcd7aRiGBg0aJJvNpsjISO3Zs0cbNmzQU089pRtuuEF79uzRnXfeqRMnTujbb7/Vk08+qRUrVsjHx0eSNGfOHEVHR+v6669Xr169VKNGDW3btk2rV6/Wpk2b9O233+qqq64q1XnA5Yt8Sj4tjqvzaVxcnCTprrvukiT17NlTP//8sxYuXKgXX3yxyMdMnDhRubm56tWrlzIyMrR8+XINHjxY7733njp16uQ0dvbs2dqzZ4969OihsLAwrVy5Uh9//LFOnTqlyZMnO8ZNmDBBn376qf72t7+pd+/estls+u677zR8+HDt2rVLI0aMcDruvHnzZDKZNGDAAP3xxx9q0aKF1q1bJ6kgfx87dkw9e/ZUenq6vvzySw0dOlT9+vXT4sWL1bNnT5nNZsXFxWnYsGGKi4tz/EcL1Qd5ljxbHHfm2fT0dD344IPat2+fbrvtNjVp0kTx8fF6+OGHHUU6V1mxYoWGDx+uK6+8UnfccYdq166t3bt3a/369YqPj3dMxrAzDENDhgzRmTNnFBERoYyMDC1btkzPPfecZs2apfbt20uSDh8+7PhZu/3221WvXj2tW7dOTzzxhEvjR9VHHiYPF+dSuN4tSVZWlgYPHqxffvlFQUFBGjBggE6cOKFVq1bp+++/1zvvvKPw8HDH+DFjxmjRokVq0KCB+vTpo+zsbH399dfatGmTvvjiCzVp0kS5ubl65JFHtGPHDnXs2FEdO3ZUenq6Vq9erf/85z86fPiwpk6dWuZYqw0D1ca4ceMMi8Xi+NO6dWvjiSeeMGbNmmUkJSUVGv/BBx8YFovFWLBggdP2I0eOGMHBwUaHDh0c2xYtWmRYLBZj1qxZjm1PPPGEERQUZKxdu9bp8bNnzzYsFosxefLkEuP
"text/plain": [
"<Figure size 1440x720 with 6 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plot = sns.lmplot(\n",
" x=\"Gr Liv Area\", y=\"SalePrice\", col=\"Sale Condition\", hue=\"Sale Condition\",\n",
" data=df, robust=True, col_wrap=4, ci=None, truncate=True, scatter_kws={\"s\": 15},\n",
")\n",
"# Adjust font sizes.\n",
"for ax in plot.axes:\n",
" ax.set_title(ax.get_title(), fontsize=20)\n",
" ax.set_xlabel(ax.get_xlabel(), fontsize=16)\n",
" ax.set_ylabel(ax.get_ylabel(), fontsize=16)"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {},
"outputs": [],
"source": [
"df[\"partial_sale\"] = df[\"Sale Condition\"].apply(lambda x: 1 if x == \"Partial\" else 0)\n",
"df[\"abnormal_sale\"] = df[\"Sale Condition\"].apply(lambda x: 1 if x == \"Abnorml\" else 0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Homes that are sold for the first time cleare are priced higher. A factor variable *new_home* is introduced."
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEICAYAAACXo2mmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzs3WdgVMXawPH/9mRbek9IoXcEXkIJQVABQUERpCgWVNSriAUBGwI20OsVBXunSBULiKLSCQhIN/QWCOnZtN0kW8/7YWEhEiCwCSEwvy9mJ3POmY3LPudMeUYmSZKEIAiCIFQDeW03QBAEQbh2iKAiCIIgVBsRVARBEIRqI4KKIAiCUG1EUBEEQRCqjQgqgiAIQrURQUUQBEGoNiKoCIIgCNVGBBVBEASh2ihruwFXWmJiIlFRUbXdDEEQhDrl5MmTbNq06aL1rrugEhUVxeLFi2u7GYIgCHXKgAEDqlRPdH8JgiAI1UYEFUEQBKHaiKAiCIIgVJvrbkxFEIRrm91uJz09nfLy8tpuSp3k4+NDdHQ0KpXqso4XQUUQhGtKeno6BoOBuLg4ZDJZbTenTpEkifz8fNLT04mPj7+sc4juL0EQrinl5eUEBQWJgHIZZDIZQUFBXj3liScVQRDqnMJSG/lmG75qBQFaFb7qil9lIqBcPm//duJJRRCEOmfZ7kxu+t8akt9eRWaRGDu5mognFUEQ6hSH08XWtAL3zy6JY/mlJIToz1tfkqQr/uTy2WefsWHDBhwOBzKZjHHjxtGiRYtK66anp/Pss8+yYMGCi573/vvvx+VyceTIEQIDA/H396dz5848/vjj1f0WLpsIKoIg1ClKhZxnb2lMbomVCD9fWkf7VVrP4XRhsthwSRLBeg1KxZXpmDl06BArV65k7ty5yGQy9u7dy7hx4/j555+9Pve3334LwPjx4+nTpw/Jyclen7O6iaAiCEKdExXgy4xhbVEqZGjVlX+Nma0OsordXWNymYxQo88VaZvBYCAjI4NFixaRnJxM06ZNWbRoEQCbN29mxowZSJKExWLh3XffrTB1d/Pmzbz33nsoFApiYmKYPHlylab2DhkyhNdee42GDRuyZs0aVq1aRVBQEEeOHCE/P5/i4mJefvll2rdvz6+//so333yDXC6nXbt2jBkzplrfvxhTEQShTjL6qs4bUAAU8jNdXkrFlev+CgsL4+OPP2bbtm0MHjyY3r17s2rVKgAOHjzIO++8w6xZs+jZsye//fab5zhJknjllVeYMWMGs2fPJiwsjB9++KFK1xw0aJCn7vfff8+gQYMA95qTmTNn8s477zB58mQKCwuZPn0633zzDXPnziU7O5uUlJRqff/iSUUQhGuSr0pBQogeSZLwVSmu2HXT0tLQ6/W89dZbAOzevZtHHnmExMREwsLCeOONN9BqtWRnZ9O2bVvPcSaTiZycHJ5++mnAPTW6c+fOVbrmrbfeyoABA3jooYfIzs6mefPmrFy5ko4dOwLQsGFD8vLyOH78OCaTiZEjRwJgsVg4fvw4Xbp0qbb3L4KKIAjXJKVCjv4KjaOcbf/+/cyfP5+PP/4YtVpNfHw8RqMRhULBK6+8wh9//IFer2fcuHFIkuQ5LiAggPDwcD766CMMBgMrVqxAq9VW6ZparZbExETeeOMN+vXr5ylPTU2lf//+HDhwgLCwMKKjo4mIiOCrr75CpVKxePFimjZtWq3vXwQVQRCEatSzZ08OHz7MwIED0Wq1SJLE2LFjMRgM9OvXj3vuuQdfX1+Cg4PJycnxHCeXy3nppZcYOXIkkiSh0+l4++23q3zdu+++m2HDhjFx4kRP2d69e7n//vspKyvjtddeIzAwkAceeIDhw4fjdDqJiori1ltvrc63j0w6O1ReBwYMGCD2UxGEa9jevXur/e67Lti1axezZ8/2BKLp06cTHBzM0KFDL/lclf0Nq/rdKZ5UBEEQ6rjZs2ezaNEipk2bVttNEUFFEAShrrv33nu59957K5SNGjWqVtoiphQLgiAI1UYEFUEQBKHa1Ej31+LFiz0LcaxWK3v37mXWrFm88cYbKBQKkpKSePLJJ3G5XEycOJH9+/ejVqt5/fXXiY2NZceOHV7VFQRBEGqJVMMmTpwozZs3T+rXr5+UlpYmuVwu6eGHH5ZSU1Ol5cuXS+PGjZMkSZK2b98uPfbYY5IkSV7XvZA777yzBt+tIAi1bc+ePbXdhDqvsr9hVb87a7T7a/fu3Rw6dIi+fftis9moV68eMpmMpKQkNmzYwNatW+natSsAbdq04Z9//sFsNntdVxAEobbcf//97Nq1CwCbzUa7du344osvPL8fPnw47du3Z+DAgQwfPpwhQ4YwZswYCgoKaqvJ1apGg8qnn37KE088gdlsRq8/k5pap9NRUlJyTrlCoaiWuoIgCFX14/aTdJmykvjxv9Blykp+3H7Sq/N16dKFv//+G4CtW7eSlJTEmjVrAPdwwMmTJ2nSpAlTp05l1qxZzJs3j+TkZCZMmOD1e7ka1FhQKS4u5ujRo3Ts2BG9Xo/FYvH8zmKxYDQazyl3uVzVUlcQBKEqftx+khcW7+ZkYRkScLKwjBcW7/YqsHTu3NkTVNasWcOgQYMoKSmhpKSE7du306FDh3P2d+nXrx+pqalYrVZv3s5VocaCypYtW+jUqRMAer0elUrF8ePHkSSJ9evX0759e9q2bcvatWsB2LFjB40aNaqWuoIgCFXxzvL9lNmdFcrK7E7eWb7/ss/ZrFkzjhw5giRJbNmyhQ4dOtCpUyc2bNjA5s2bPd34/2Y0GikuLr7s614tamzx49GjR4mOjva8njRpEmPGjMHpdJKUlETr1q1p2bIlKSkpDBkyBEmSePPNN6ulriAIQlVkFJZdUnlVyOVymjRpwtq1awkJCUGtVpOcnMzq1avZt28f9913H/PmzatwjCRJ5OXlERQUdNnXvVrUWFB5+OGHK7xu06bNOdtlyuVyJk+efM6x3tYVBEGoikh/X05WEkAi/X29Om+XLl349NNP6du3LwDt2rXjo48+AsDf3/+c+osWLaJjx47I5XV/6aBI0yIIwnXr+V6NeWHx7gpdYL4qBc/3auzVeTt37szLL7/sSe6oVqsxGAwVkjSOGzcOX1938AoLC+PVV1/16ppXCxFUBEG4bt1xQxTgHlvJKCwj0t+X53s19pRfrqioKPbvrzguc/pJBWDWrFlenf9qJoKKIAjXtTtuiPI6iAhn1P0OPEEQBOGqIYKKIAiCUG1EUBEEQRCqjQgqgiAIQrURQUUQBEGoNmL2lyAIQg04ePAg77zzDmVlZZSWltKtWzeysrJISkqiT58+ANx666107NjRs0Zl/Pjx3Hzzzfz555+kpqbi7++Pw+EgICCAF154gZiYmNp8S1UinlQEQbi+7VoA77WAif7u/+7yPkNHcXExzz77LC+++CKzZs1iwYIFHDhwgLi4OLZu3QrA8ePHqVevHlu2bPEct23bNjp27AjA888/z6xZs5g7dy4jRozg6aef9rpdV4IIKoIgXL92LYAlT0HRCUBy/3fJU14HlhUrVpCYmEhcXBzg3qpj6tSpDBgwgG3btgHuDMY9evQgKiqKQ4cOceLECcLCwips53Fa+/btUalUpKWledWuK0EEFUEQrl8rJoP9X7m/7GXuci/k5OSc01Wl0+kIDg5GJpNRUlLC2rVrSU5OJjk5mbVr114wgzFAUFBQndjISwQVQRDOy+501XYTalZR+qWVV1FkZCRZWVkVyk6cOOHZEmTDhg0UFBQQERFBcnIy27dvZ8uWLRcMKhkZGYSHh3vVritBBBVBEM5hLrfz2z9ZPLdgJwezS3C5pNpuUs3wi7608irq3r0769at4/jx4wDY7XamTJnCgQMH6NKlC99++y0dOnQAICYmhsLCQtLS0mjSpEml50tJScHHx6dOBBUx+0sQhHMUlTl4fM5WJAn+OVnE/Ec7EmLwqe1mVb+bJrjHUM7uAlP5usu9oNfrmTJlCi+//DKSJGGxWOjevTvDhg3DZrORmprK6NGjPfWbNGm
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Sale Type\", s=15, data=df);"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [],
"source": [
"df[\"new_home\"] = df[\"Sale Type\"].apply(lambda x: 1 if x == \"New\" else 0)"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
"outputs": [],
"source": [
"new_variables.extend([\"partial_sale\", \"abnormal_sale\", \"new_home\"])\n",
"interesting_variables.append(\"new_home\")"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
"outputs": [],
"source": [
"del df[\"Sale Condition\"]\n",
"del df[\"Sale Type\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Show summary of counts:"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"partial_sale 233\n",
"abnormal_sale 189\n",
"new_home 227\n",
"dtype: int64"
]
},
"execution_count": 92,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[[\"partial_sale\", \"abnormal_sale\", \"new_home\"]].sum()"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>partial_sale</th>\n",
" <th>abnormal_sale</th>\n",
" <th>new_home</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" partial_sale abnormal_sale new_home\n",
"Order PID \n",
"1 526301100 0 0 0\n",
"2 526350040 0 0 0\n",
"3 526351010 0 0 0\n",
"4 526353030 0 0 0\n",
"5 527105010 0 0 0"
]
},
"execution_count": 93,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[[\"partial_sale\", \"abnormal_sale\", \"new_home\"]].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Street Name\n",
"\n",
"Looking at the value counts this variable is pretty useless."
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Pave 2886\n",
"Grvl 12\n",
"Name: Street, dtype: int64"
]
},
"execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"Street\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 95,
"metadata": {},
"outputs": [],
"source": [
"del df[\"Street\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Age & Remodeling\n",
"\n",
"The dataset was put together over several years. Therefore, the variables with year numbers need to be aligned to indicate the right ages."
]
},
{
"cell_type": "code",
"execution_count": 96,
"metadata": {},
"outputs": [],
"source": [
"# For one house the year of being remodeled is one year\n",
"# before it was built. That input error is corrected.\n",
"input_error = (df[\"Year Remod/Add\"] < df[\"Year Built\"])\n",
"assert input_error.sum() == 1\n",
"df.loc[input_error, \"Year Remod/Add\"] = df.loc[input_error, \"Year Built\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Introduce a factor variable *remodeled*. Almost half the houses were remodeled at some point in time."
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"46.0"
]
},
"execution_count": 97,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"remodeled = (df[\"Year Remod/Add\"] > df[\"Year Built\"])\n",
"df[\"remodeled\"] = 0\n",
"df.loc[remodeled, \"remodeled\"] = 1\n",
"round(100 * remodeled.sum() / df.shape[0])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create discrete variables *years_since_built* and *years_since_remodeled*."
]
},
{
"cell_type": "code",
"execution_count": 98,
"metadata": {},
"outputs": [],
"source": [
"df[\"years_since_built\"] = df[\"Yr Sold\"] - df[\"Year Built\"]\n",
"df[\"years_since_remodeled\"] = df[\"Yr Sold\"] - df[\"Year Remod/Add\"]"
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXgAAAEFCAYAAADpIfy5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzt3XtYVNX+P/D3ZgZEuTwc8tgTXhBSj6hRx3g0c0S7KObR1MQLXjKxMtNR8qsCI0IqmqRpCoF2O+d50DIV69jpKTOSFFDsWGogXup4SUHLCykTMMPM+v3Rz0kuM4MwMHs279dfsPfMns8wa97sWbP2WpIQQoCIiBTHzdkFEBFR82DAExEpFAOeiEihGPBERArFgCciUigGPBGRQjHgZeKHH37AvHnzWvQx4+Li8N57793Vfe6s8877p6Wl4auvvnJ4jSRPzmivzvTFF19g2rRpdm/3+OOP44cffrirYy9fvhypqamNLc0mBrxMPPDAA9i4caOzy7DLWp0FBQWorq52QkXkDK7SXls7tbMLaAkJCQnw9/fHggULAAC7d+/Gnj17MG7cOGRkZMBoNMLT0xOxsbH4+9//jqtXryIxMRHXrl3Dr7/+io4dO+LNN9/EPffcg8cffxyhoaE4deoUFixYgF9//RXbtm2Du7s72rRpg+XLl6Nbt25Wa9Hr9YiPj8f58+fh5uaG3r17Y/ny5fj222+xYsUK/Oc//0FcXBy8vb1x6tQpXL58GcHBwVi3bh28vLxw7NgxJCcno6KiAu7u7li8eDEGDBiAn376CStXrkRZWRlMJhOmTZuGyMhIu3+bI0eOYM+ePSgvL8fAgQMRGxsLtVqNv/3tbzh48CD8/f0BwPL7mTNnLHXetnXrVhQWFuL111+HSqXC0KFDm/iKtW5sr/WLi4tDWVkZfv75ZwwZMgTz58/H2rVr8e2338JkMqFXr15ISEiAt7c3Hn/8cYwcORI5OTkoKyuDVqvFd999h6KiIqjVamRkZODee+/FmTNnsHz5cpSVlUGSJERHR2PMmDEAgA0bNuDTTz+Fn58fAgMDLXUYDAarj3unr7/+ut7Xq7y8HEuWLMHJkyfRoUMHqFQqPPzww41tLraJVuDEiRNi4MCBwmg0CiGEmDx5svjwww/FyJEjxfXr14UQQpw+fVoMHDhQ6PV68a9//Uts3rxZCCGE2WwWzz//vHjvvfeEEEI89thjIi0tTQghRHV1tejdu7e4cuWKEEKIjz/+WGzbts1mLR9//LGIjo623H/JkiXi3Llz4tChQ+If//iHEEKI2NhYMXHiRFFVVSUMBoMYM2aM2LlzpzAYDGLgwIFi3759QgghfvjhBzFy5EhRVVUlRowYIQoLC4UQQty8eVM89dRT4vvvv7dZS2xsrBg7dqzQ6/WiqqpKTJ06VWzdulUIIUSPHj3EtWvXLLe9/XvtOt99910hhBBTp04Vn3/+uc3Ho4Zhe61fbGysmD59uuX31NRUsXr1amE2m4UQQrzxxhsiKSnJ8rxXrVolhBDis88+Ez179hTFxcVCCCFefvllkZGRIYxGo3jiiSfEnj17hBBCXL58WQwaNEh89913Yu/evWLEiBHi1q1bwmg0ihdffFFMnTq1QY97/PhxcfbsWauv18qVK8XixYuF2WwW165dE+Hh4WLjxo02n3tjtYoz+JCQEHTq1Ak5OTkICgrCL7/8ApPJhF9++QXPPfec5XaSJOHChQuYPn06/vvf/+Kf//wnzp07hzNnzuDBBx+03C4sLAwAoFKpMHz4cEyaNAlDhgzBwIEDMWrUKJu1PPzww1i/fj2mTZuGRx99FNOnT0dgYCAuX75c43aDBg2Ch4cHAKBHjx747bffcPr0abi5uWHIkCEAgD59+uDTTz/Fjz/+iAsXLkCn01nuX1lZiRMnTuChhx6yWc/o0aPRrl07AMDTTz+Nb775BpMnT7b9B6VmxfZqu57bcnJycOvWLeTn5wMAjEYj7rnnHsv+YcOGAQA6d+6M9u3bo2fPngCALl264LfffsO5c+dQVVVlud29996LYcOG4cCBA/jtt98wdOhQy1n5uHHjkJmZ2aDHBYC8vDyrr9fBgweh0+kgSRL8/f2b9RNvqwh4AJgyZQqysrLQtWtXTJgwAWazGQMGDMCbb75puU1paSk6dOiANWvW4Pjx4xg3bhz69++P6upqiDum7LkdiACwdu1anD59Gvn5+XjnnXewc+dOZGRkWK2jc+fO2Lt3LwoKCnDo0CHMmDEDCQkJ+Mtf/lLjdp6enpafJUmCEAIqlQqSJNW43enTpyGEgK+vL/79739btl+9ehU+Pj52/y4qlarG72p13SZhMBjsHocci+21fnc+F7PZDJ1Oh8GDBwP4ozupqqrKsv/2PxwAcHd3r3Mss9lcZ5sQAtXV1ZbncNud7xN7j3v7NtZer9uPU9+xHa3VfMkaERGB4uJifPnllxg3bhweeeQR5OXl4aeffgIAfPPNN3j66adRVVWF3NxcTJ8+HWPGjME999yD/Px8mEymOse8fv06Bg8eDD8/Pzz33HOIiYnBqVOnbNbxwQcfID4+HhqNBosWLYJGo8GZM2ca9ByCg4MhSRLy8vIAAEVFRZg+fTqCgoLQpk0byxumtLQUI0eORGFhod1jfvbZZzAYDKiqqsKuXbsQHh4OAPD397eMBti7d6/d46hUKn7J6kBsr/ZpNBps3boVBoMBZrMZS5cuxbp16xp8/6CgILi7u+PLL78EAFy5cgV79uzBo48+ikGDBuGLL77AzZs3YTaba/wzasjj2nq9Bg0ahJ07d8JsNuO3335Ddnb2XT3vu9FqzuA9PDwQERGBq1evwt/fH/7+/li+fDkWLFgAIYTli5d27dphzpw5eP3115Geng6VSoW+ffviwoULdY7p7++P2bNn47nnnoOnpydUKhWSk5Nt1jFmzBgcPnwYI0aMQNu2bREQEIBnn30WJ0+ebNBzSE1NxapVq/D666/D3d0dqamp8PDwQHp6OlauXIl3330X1dXVmD9/foO+uOnUqROioqLw+++/Y+jQoRg7diyAP77oW758OXx9ffHoo4/ir3/9q83jPPbYY0hJSYHRaLQcgxqP7dW+l19+GSkpKRg7dixMJhNCQkIQFxfX4Pu7u7sjPT0dycnJSE1Nhclkwpw5c/DII48AAE6dOoVx48bB19cXPXv2xI0bNxr8uN27d7f6emm1WiQlJeGpp56Cv78/evTocVfP+25IQrSO6YJ///13TJ06FUlJSTX6J4nkiO2VHKFVnMEfOHAA//d//4dx48a1yJslJiYGZ8+erXff+vXrERwc3Ow1AMD//vc/vPLKK/XuCwoKqtE/SPLB9loX22vjtJozeCKi1qbVfMlKRNTaMOCJiBTKqX3w/fv3R8eOHevdZzAYaoxjdUWu/hxcof5Lly6hoKDA2WU0mK02X5sr/P1vc5VaXaVOwHatDW33Tg34jh07YteuXfXuKy4uRkhISAtX5Fiu/hxcof5nnnnG2SXcFVttvjZX+Pvf5iq1ukqdgO1aG9ru2UVDRKRQDHgiIoViwBMRKRQDnohIoRjwREQKxYAnIlIoBjwRkUIx4ImIFIoBT0SkUA26knXz5s34+uuvYTQaERUVhX79+iEuLg6SJKF79+5ISkqCm5sb0tLSkJOTA7VaDZ1Oh9DQ0EYVVWk0Wb2Cq9Jogqd78y1xReQstdv27fcA2zw1lt2ALygowPfff48PP/wQFRUVeP/99/Haa68hJiYG/fv3R2JiIrKzsxEQEIDDhw9jx44dKC0thVarRVZWVqOK8nRXoWvcZ/XuO7f6H406JpHcWWv3bPPUWHYDPjc3Fz169MCcOXNQXl6OxYsXY/v27ejXrx8AIDw8HHl5eQgKCoJGo4EkSQgICIDJZML169fh7+9v9dgGgwHFxcV1ttubK6K++8hRZWWly9RaH1evn6i1sxvwN27cQElJCTZt2oSLFy9i9uzZEEJYVkv38vLCrVu3UF5eDj8/P8v9bm+3FfAeHh6NmvhHCZMFuQJXr5+otbMb8H5+fgg
"text/plain": [
"<Figure size 432x288 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df[[\"years_since_built\", \"years_since_remodeled\"]].hist(bins=20);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Two factor variables *recently_built* and *recently_remodeled* are created indicating that the corresponding action took place in the last 10 years. The two scatter plots below suggest that these groups of \"recent vs. old\" affect the price."
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {},
"outputs": [],
"source": [
"df[\"recently_built\"] = df[\"years_since_built\"].apply(lambda x: 1 if x <= 10 else 0)\n",
"df[\"recently_remodeled\"] = df[\"years_since_remodeled\"].apply(lambda x: 1 if x <= 10 else 0)"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEICAYAAACXo2mmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzsnXd0VNXah58zLWUmvUESEhJ6hxCaBJAmoiJFEFCwUZSmKEoUryBer4J+FxtXxK6gUgQRlCKCSJMivQsYCCEQQvpMZiZTzvfHDgkIhEASQtnPWlmZnNlzzs6s5Lzztt+rqKqqIpFIJBJJOaCp7A1IJBKJ5NZBGhWJRCKRlBvSqEgkEomk3JBGRSKRSCTlhjQqEolEIik3pFGRSCQSSbkhjYpEIpFIyg1pVCQSiURSbkijIpFIJJJyQ1fZG7jetGrVioiIiMrehkQikdxUnDx5ks2bN19x3W1nVCIiIli4cGFlb0MikUhuKvr06VOqdTL8JZFIJJJyQxoViUQikZQb0qhIJBKJpNy47XIql8LhcJCSkoLNZqvsrdx2eHp6EhkZiV6vr+ytSCSSckAaFSAlJQUfHx+qV6+OoiiVvZ3bBlVVycjIICUlhZiYmMrejkQiKQdk+Auw2WwEBQVJg3KdURSFoKAg6SFKJLcQ0lMpRBqUykG+75JrITu/gAxzAV4GLQHeerwM8lZ2oyA9FYlEctOxdM8pOk/7nfZv/capHOnp3khIo3KbYLfbmT9/PgAffPAB33333VW9/sUXX2Tt2rWlWrtw4UJWrVrF5s2befbZZwFYuXIlaWlpV7dpieQSOF1uth3PEo/dKscy8ktcr6rq9diWpBBpVG4T0tPTi4xKRdOnTx86d+58wbGvv/4as9l8Xa4vubXRaTU817UO7WsF0z++Gk0i/S65LtNSwPTVh3l7xSEyzPbrvMvbFxmIvEFYuHAhCxYswO12M3jwYL766is0Gg3Nmzfn+eefJzMzk8TERPLy8lBVlalTpxIUFMTLL79MVpb41Pavf/2LOnXqcNdddxEXF0dSUhJBQUF88MEHfPTRRxw5coTp06cXXXPatGmEhYXx8MMPk5OTw+OPP16ihM23337LZ599hsvl4j//+Q9arZbnnnuOefPmAfDggw8ybdo0fvjhB4KDg4mNjQVgzZo1HDhwgMTERL799lsMBkMFvpOS24GIAC+mPxSHTqvgfZl8yrq/0vm/X/4CwOihY1THmtdzi7ct0lO5gfD19WXGjBlMnz6dL7/8ku+++460tDQ2bNjAhx9+SKdOnZgzZw6JiYns3r2bjz76iNatWzNr1iz+/e9/8+qrrwJw4sQJnnnmGebOnUtmZiZ79uzhqaeeombNmowePbroev369WPRokUA/PTTT/To0aPE/cXFxfHVV18xbNgw3n777VL/XnfeeSf16tVj6tSp0qBIyg1fL/1lDQqAv7G49ynIJP/urhfSU7mBiImJITk5mczMTIYPHw6AxWIhOTmZpKQk+vbtC4ibe1xcHMOGDWPTpk0sW7YMgJycHAACAgKoWrUqAFWrVsVuv7TrX61aNYxGI0eOHGHJkiV8+OGHJe4vPj4egGbNmvHWW29d9LyMXUtuJJpE+jN3eGtsDheNIv0rezu3DdKo3EBoNBoiIyOpWrUqn3/+OXq9noULF1KvXj2SkpLYs2cPdevWZevWraxZs4bY2Fjuv/9+evToQUZGRlHO5FJluhqNBrfbfdHxBx98kA8//JCwsDACAwNL3N/u3buJi4vjzz//pFatWnh4eJCRkYHL5cJisZCSknLZ1yqKIo2O5Lri722gVWxQZW/jtkMalRuMwMBAHnvsMQYPHozL5SIiIoLu3bvz1FNPMWHCBBYvXgzAG2+8gclk4uWXX2bevHmYzeYLQlv/JCgoCIfDwdtvv42np2fR8S5duvDaa6+VKpy1a9cuHnnkERRF4Y033iAkJIS2bdvSt29fqlWrRnR09GVf26xZM8aPH8/nn3+Ov7/81CiR3Koo6m328bFPnz4XJaMPHDhAvXr1KmlHlYvVamXQoEHMnz8fjaZyUmy38/svkdwsXOreeSmkp3Ibs337diZNmsSoUaPQaDQUFBQwZMiQi9bFxMTw2muvVcIOJRLJzYY0KrcxcXFxLFmypOhng8HArFmzKnFHEonkZkeWFEskEomk3JBGRSKRSCTlRoWEvxYuXMgPP/wACM2pAwcOMGvWrKIu7ISEBEaPHo3b7ebVV1/l0KFDGAwGXn/9daKjo9m5c2eZ1kokEomkklArmFdffVWdM2eOev/996vHjx9X3W63OnToUHXfvn3qihUr1MTERFVVVXXHjh3qU089paqqWua1JdG7d++Lju3fv788f2XJVSLff4nkxudS985LUaHhrz179nDkyBHuvfdeCgoKiIqKQlEUEhIS2LhxI9u2baNdu3YANG3alL1792I2m8u89mbE7XYzceJE+vfvz+DBgzl+/Hhlb0kikUiumgo1KjNnzmTUqFGYzWZMJlPRcaPRSF5e3kXHtVptuaytaBbtOEnbKauJefFn2k5ZzaIdJ8t8zl9//ZWCggLmzp3LuHHjmDJlSjnsVCKRSK4vFVZSnJubS1JSEq1bt8ZsNmOxWIqes1gs+Pr6YrPZLjjudrsxmUxlXluRLNpxkpcW7sHqcAFwMtvKSwv3ANCrWcQ1n/dSnphEIpHcbFSYp7J161batGkDgMlkQq/Xk5ycjKqqrF+/nvj4eOLi4ooGP+3cuZPatWuXy9qK5O0Vh4oMyjmsDhdvrzhUpvNeyhNzOp1lOqdEIpFcbyrMU0lKSiIyMrLo58mTJ/P888/jcrlISEigSZMmNGrUiA0bNjBgwABUVeWNN94ol7UVSWq29aqOl5Z/el1utxudTvamSiSSm4sKu2sNHTr0gp+bNm1aNMzpHBqN5pLyH2VdW5GE+3tx8hIGJNzfq0znjYuL47fffuOee+4p8sQkEonkZkM2P14lL3Srg5dee8ExL72WF7rVKdN5u3btisFgYMCAAbz55pu89NJLZTqfRCKRVAYyvnKVnEvGv73iEKnZVsL9vXihW50yJenh8p6YRCKR3ExIo3IN9GoWUWYjIpFIJLciMvwlkUgkknJDGhWJRCKRlBvSqEgkEomk3JBGRSKRSCTlhjQqEolEIik3pFG5gdi1axeDBw+u7G1IJBLJNSNLiq+F3fNg1WuQkwJ+kdB5IjR+sEyn/OSTT1i8eDFeXmXrzJdIJJLKRHoqV8vuebDkacg5Aaji+5KnxfEyEBUVxQcffFA+e5RIJJJKQhqVq2XVa+D4h/aXwyqOl4Fu3bpJAUmJRHLTI43K1ZKTcnXHJZKbGIfLXdlbkNxkSKNytfhFXt1xieQmxGxzsHzvacbN28XhtDzcbrWytyS5SZBG5WrpPBH0/0im673EcYnkFiHH6mTEN9tYvCuVJ2dtI8Nir+wtSW4SpFG5Who/CD3eB79qgCK+93i/zNVfAJGRkdd1NozkEqgqmNPEl9t15fW3KDqtgskgcnyhvh7oNPJWISkdMjN8LTR+sFyMiKQScdjAmgkoYAwBbeG/QtYx+LwbqC54fDkE17ryuVwFoNGDolTkjq8rQUYDy55px6G0PJpE+hNgNFT2liQ3CfLjh+T2JG0vvNcE/tdCGJJzHFomvBTLWdjz/ZXPk5kEi0bAjllgza6w7V5vdFoNkYHedK4XRrCPR2VvR3ITUWGeysyZM1m9ejUOh4OBAwfSsmVLXnzxRRRFoVatWkyaNAmNRsP06dNZs2YNOp2OCRMm0LhxY44fP17mtVeLqqoot9AnzZsFVa2kBPC+hcLDcBXA8fUQXFMcr9UVfp8iQl/1e5Z8jvxM+OFJOLFZGKDqCeDlX/F7l0huYCrEU9m8eTM7duzgu+++Y9asWZw+fZo333yTsWPH8u2336KqKqtWrWLfvn1s2bKF+fPnM23aNCZPngxQ5rVXi6enJxkZGZV3g7tNUVWVjIwMPD09r//Fmz8h8mHBtaFm1+LjAbEwaguM2Xbl0JdGX5hbA3QeoKuE30M
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"recently_built\", s=15, data=df);"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEICAYAAACXo2mmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzsnXdclXX7x9/3WYxz2CgKOMCZK8WdaLkyLcssc5RlaZarbGnZk6b1lD49jSf9pba0rFxppubINHOVe6VpThBxIPvAAQ7n3L8/LgQ1RRQQx/f9evni5j5f7vMF4b7ua30uTdd1HYVCoVAoSgBDWW9AoVAoFDcPyqgoFAqFosRQRkWhUCgUJYYyKgqFQqEoMZRRUSgUCkWJoYyKQqFQKEoMZVQUCoVCUWIoo6JQKBSKEkMZFYVCoVCUGKay3sC1pnnz5oSFhZX1NhQKheKG4vjx42zcuPGy6245oxIWFsb8+fPLehsKhUJxQ9G9e/cirVPhL4VCoVCUGMqoKBQKhaLEUEZFoVAoFCXGLZdTuRhOp5O4uDiysrLKeiuKUsbT05Pw8HDMZnNZb0WhuClRRgWIi4vDx8eHqlWromlaWW9HUUrouk5iYiJxcXFERESU9XYUipsSFf4CsrKyCAoKUgblJkfTNIKCgpRHqlCUIspTyUMZlFsD9f98c5CSmUOiPQcvi5EAbzNeFnUru15QnopCobjhWLL7BO0/+I02//mVE6nK87yeUEblFiE7O5u5c+cCMHHiRGbOnFnGO7o4M2fOZOLEiZd8/dVXX2XNmjVFutZ///tf1eh6E5LrcrM1JlmO3TpHEzMLXa/r+rXYliIPZVRuERISEvKNikJxI2MyGnixYy3a1AimZ5NK3B7ud9F1SRk5TFp1gPeW7yfRnn2Nd3nrogKR1wnz589n3rx5uN1u+vbty1dffYXBYKBx48a8/PLLJCUlMXLkSNLT09F1nQkTJhAUFMTrr79OcrI8tf3rX/+iVq1a3H333URFRXHkyBGCgoKYOHEiU6ZM4eDBg0yaNCn/PT/44ANCQkJ49NFHSU1N5cknn7zkk/2rr75KSkoKKSkpTJ06lc8//5wtW7bgdrvp168fnTt3pm/fvtSqVYsDBw7g7e1NkyZNWLduHWlpaXz55Zd4e3vz2muvERcXh8vl4sknn6RLly5s2bKFd955B19fX4xGIw0bNgRgxowZLF68GE3T6NKlC48//nj+fpxOJ2PGjCEmJga3283w4cNp3rw5y5cvZ/LkyQQGBuJ0OomMjCzF/zVFWREW4MWkPlGYjBrel8inrP07gf/+/DcAVg8TQ9pWv5ZbvGVRnsp1hK+vL5MnT2bSpElMnz6dmTNncurUKdavX88nn3xCu3btmDVrFiNHjmTXrl1MmTKFFi1aMGPGDN566y3efPNNAI4dO8bzzz/P7NmzSUpKYvfu3Tz77LNUr16doUOH5r9fjx49WLBgAQCLFy+ma9euhe6vRYsWzJo1ix07dhAXF8fMmTP5+uuvmTJlCmlpaQA0aNCAr776ipycHDw9PZk2bRrVq1dn8+bNzJ49m8DAQGbNmsW0adP46KOPSEpKYuzYsbz//vtMnz6d8PBwAA4ePMiSJUv47rvv+Pbbb/nll184fPhw/l7mzp1LQEAA3377LZ988gnjxo3D6XQyfvx4pk2bxhdffIGnp2dJ/vcorjN8vcyXNCgA/taCXqQgm+VabEmB8lSuKyIiIoiNjSUpKYmBAwcCkJGRQWxsLEeOHOHhhx8GICoqiqioKJ5++mn++OMPli5dCkBqaioAAQEBVKxYEYCKFSuSnX1x179SpUpYrVYOHjzIokWL+OSTTy67P4C///6bPXv20LdvXwByc3M5fvw4AHXr1gXEQFavXj3/ODs7m0OHDnHHHXcAYLPZqFatGseOHePMmTP5146KiiI2Npa///6b+Ph4+vXrl/+9xcTE5O/l77//ZuvWrezatSt/DwkJCfj5+REQEABAo0aNLvMTV9zM3B7uz+yBLchyuqgf7l/W27llUEblOsJgMBAeHk7FihX58ssvMZvNzJ8/n9tuu40jR46we/duateuzebNm1m9ejWRkZHcf//9dO3alcTExPycycXKZg0GA263+x/nH3nkET755BNCQkIIDAwsdH9nrxsZGUnz5s156623cLvdfPLJJ1SqVOmy31+1atXYsmULHTt2xG638/fffxMeHk5ISAiHDh2iWrVq7N69Gz8/PyIjI6levTqff/45mqYxffp0atWqxfLly/P3UKFCBZ599lmysrKYPHkywcHBpKWlkZSURGBgILt376ZChQqX3Zfi5sTf20LzyKCy3sYthzIq1xmBgYH069ePvn374nK5CAsLo3Pnzjz77LOMGjWKhQsXAvDOO+9gs9l4/fXXmTNnDna7/bzQ1oUEBQXhdDp57733zgsLdejQgXHjxvHee+8VeY/t2rVj06ZN9OnTh8zMTDp06IDNZrvs1z3yyCO88cYb9O7dm+zsbIYOHUpQUBDjxo1jxIgR2Gw2rFYrfn5+1K5dm5YtW9K7d29ycnJo0KABISEh+dfq1asX//rXv3jsscew2+306dMHi8XC6NGj6d+/P35+fphM6tdbobjWaPotVm/XvXv3fySj//rrL2677bYy2lHZ4nA4eOyxx5g7dy4Gw62RYruV/78ViqvlYvfOi6Ee5W5htm3bxpgxYxgyZAgGg4GcnBz69+//j3URERGMGzeuDHaoUChuNJRRuYWJiopi0aJF+Z9bLBZmzJhRhjtSKBQ3OrdGvEOhUCgU1wRlVBQKhUJRYpRK+Gv+/Pn88MMPgGhO/fXXX8yYMYN///vfGI1GoqOjGTp0KG63mzfffJP9+/djsVh4++23qVKlCjt27CjWWoVCoVCUEXop8+abb+qzZs3S77//fj0mJkZ3u936gAED9D179ujLly/XR44cqeu6rm/fvl1/9tlndV3Xi722MB588MF/nNu7d29JfsuK6xz1/61QXDkXu3dejFINf+3evZuDBw9y7733kpOTQ+XKldE0jejoaDZs2MDWrVtp3bo1AA0bNuTPP//EbrcXe+2NiNvtZvTo0fTs2ZO+ffue1z2uUCgUNwqlalSmTp3KkCFDsNvt5zXHWa1W0tPT/3HeaDSWyNrSZsH247Qav4qIV3+i1fhVLNh+vNjX/OWXX8jJyWH27Nm89NJLjB8/vgR2qlAoFNeWUispTktL48iRI7Ro0QK73U5GRkb+axkZGfj6+pKVlXXeebfbjc1mK/ba0mTB9uO8Nn83DqcLgOMpDl6bvxuAbo3Crvq6F/PEFAqF4kaj1DyVzZs307JlS0DEA81mM7Gxsei6zrp162jSpAlRUVH5A5d27NhBzZo1S2RtafLe8v35BuUsDqeL95bvL9Z1L+aJ5ebmFuuaCoVCca0pNU/lyJEj+TLmAGPHjuXll1/G5XIRHR3N7bffTv369Vm/fj29evVC13XeeeedEllbmsSnOK7ofFG50Otyu91Ku0qhUNxwlNpda8CAAed93rBhQ+bMmXPeOYPBcFH5j+KuLU1C/b04fhEDEurvVazrRkVF8euvv9KlS5d8T0yhUChuNFTz4xXySqdaeJmN553zMht5pVOtYl23Y8eOWCwWevXqxbvvvstrr71WrOspFApFWaDiK1fI2WT8e8v3E5/iINTfi1c61SpWkh4u7YkpFArFjYQyKldBt0ZhxTYiCoVCcTOiwl8KhUKhKDGUUVEoFApFiaGMikKhUChKDGVUFAqFQlFiKKOiUCgUihJDGZXriJ07d9K3b9+y3oZCoVBcNaqk+GrYNQdWjoPUOPALh/ajocEjxbrkZ599xsKFC/HyKl5nvkKhUJQlylO5UnbNgUXPQeoxQJePi56T88WgcuXKTJw4sWT2qFAoFGWEMipXyspx4LxA+8vpkPPFoFOnTkpAUqFQ3PAoo3KlpMZd2XmF4kbGlVPWO1DcYCijcqX4hV/ZeYXiRiQ7Hf5aBD88Cwn7wO0u6x0pbhCUUblS2o8G8wXJdLOXnFcobhYcKTCnL/w
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"recently_remodeled\", s=15, data=df);"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [],
"source": [
"del df[\"Yr Sold\"]\n",
"del df[\"Year Built\"]\n",
"del df[\"Year Remod/Add\"]"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {},
"outputs": [],
"source": [
"age_columns = [\n",
" \"remodeled\", \"years_since_built\", \"years_since_remodeled\",\n",
" \"recently_built\", \"recently_remodeled\",\n",
"]\n",
"new_variables.extend(age_columns)\n",
"interesting_variables.extend(age_columns)"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>remodeled</th>\n",
" <th>years_since_built</th>\n",
" <th>years_since_remodeled</th>\n",
" <th>recently_built</th>\n",
" <th>recently_remodeled</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>0</td>\n",
" <td>50</td>\n",
" <td>50</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>0</td>\n",
" <td>49</td>\n",
" <td>49</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>0</td>\n",
" <td>52</td>\n",
" <td>52</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>0</td>\n",
" <td>42</td>\n",
" <td>42</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>1</td>\n",
" <td>13</td>\n",
" <td>12</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" remodeled years_since_built years_since_remodeled \\\n",
"Order PID \n",
"1 526301100 0 50 50 \n",
"2 526350040 0 49 49 \n",
"3 526351010 0 52 52 \n",
"4 526353030 0 42 42 \n",
"5 527105010 1 13 12 \n",
"\n",
" recently_built recently_remodeled \n",
"Order PID \n",
"1 526301100 0 0 \n",
"2 526350040 0 0 \n",
"3 526351010 0 0 \n",
"4 526353030 0 0 \n",
"5 527105010 0 0 "
]
},
"execution_count": 105,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[age_columns].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Outliers\n",
"\n",
"The instructors' notes state:\n",
"\n",
"> **Five observations** that an instructor may wish to remove from the data set before giving it to students (a plot of SALE PRICE versus GR LIV AREA will quickly indicate these\n",
"points). Three of them are true **outliers** (Partial Sales that likely don’t represent actual market values) and two of them are simply unusual sales (very large houses priced\n",
"relatively appropriately). I would **recommend removing any houses with more than\n",
"4000 square feet** from the data set (which eliminates these five unusual observations)\n",
"before assigning it to students.\n",
"\n",
"To apply a more \"rigorous\" approach, outlier detection is conducted with a so-called Isolation Forest."
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {},
"outputs": [],
"source": [
"# Use only numeric columns that are strongly correlated with the target.\n",
"# This mitigates the risk that a \"not so good\" chosen factor variable introduced\n",
"# in this notebook causes an observation to be removed as an outlier.\n",
"with open(\"data/correlated_variables.json\", \"r\") as file:\n",
" content = json.loads(file.read())\n",
"strongly_correlated = content[\"strongly_correlated\"]\n",
"df_encoded = encode_ordinals(df[list(set(strongly_correlated) & set(df.columns))])\n",
"iso = IsolationForest(\n",
" n_estimators=100, bootstrap=True, behaviour=\"new\",\n",
" contamination=0.005, random_state=random_state # random_state has no effect!\n",
")\n",
"outliers = pd.DataFrame(\n",
" iso.fit_predict(df_encoded), columns=[\"outlier\"], index=df.index\n",
")\n",
"outliers[\"outlier\"] = outliers[\"outlier\"].apply(lambda x: 1 if x < 0 else 0)\n",
"df = pd.concat([df, outliers], axis=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The five aforementioned outliers are among the ones detected."
]
},
{
"cell_type": "code",
"execution_count": 107,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEICAYAAACXo2mmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADx0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wcmMyLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvMCCy2AAAIABJREFUeJzs3WdgVFXawPH/nV6TSSeFdHqH0EMXO6ioFHuvsHZBdwXxVVDXRdeyin0XXBQUWbuCiJCAgHRCkZqQhPQ6k+n3vh8GRhAMgSSEcn5fmNw5986ZMJnnnvYcSVEUBUEQBEFoAqqWroAgCIJw7hBBRRAEQWgyIqgIgiAITUYEFUEQBKHJiKAiCIIgNBkRVARBEIQmI4KKIAiC0GREUBEEQRCajAgqgiAIQpPRtHQFTre+ffsSHx/f0tUQBEE4qxQUFLB69eoTljvvgkp8fDwLFy5s6WoIgiCcVcaMGdOgcqL7SxAEQWgyIqgIgiAITUYEFUEQBKHJnHdjKoIgCA3h9XrJz8/H5XK1dFVOK4PBQEJCAlqt9pTOF0FFEAThOPLz87FarSQnJyNJUktX57RQFIXy8nLy8/NJSUk5pWuI7i9BEITjcLlcREREnDcBBUCSJCIiIhrVOhMtFUEQzj7OSrCXgs4ExvDAv83gfAoohzX2PYuWiiAIZ5+cRfBGb/hnN6gpaOnaCEcQQUUQhLOL3wcHDq3sln1Qsa/+8orS/HVqYZ988gler5fVq1fz0EMPATBx4sQWqYsIKoIgnF3UGhj2V0gbAT1uhPiexy/nKIflf4cfnwFH6emt42k2e/ZsZFk+6tjrr7/eInURYyqCIJx9bK3h2g9ApQGd+fhl9iyFpc8GHuvMMPjR01e/JuD1enniiSfIz8/H7/dz6623Mm/ePJ5++mnS0tKYN28eZWVltGrVitLSUh566CFuvvnm4PkDBw4kOzubnTt38uyzgd+DzWZjxowZbNu2jZdeegmtVsvYsWO58sorm6zeIqgIgnB2MoTW/7wp7PfH5qjmrUsz+OSTTwgPD+ell17CbrczZswYdDrdMeWuvfZa3nzzTV5++WU2btx4zPNPPfUUM2bMID09nQULFvDuu+8yYMAA3G43CxYsaPJ6i6AiCMK5Ka4X3PIN+JwQ16Ola3PS9uzZw4ABAwCwWCykpaWRnZ0dfF5p4FjRnj17mD59OhBo/SQnJwOc8jqUExFBRRCEc5MpDJIHtnQtTllaWhq//vorI0eOxG6389tvv9G9e3dKS0tJS0tj27ZtxMTEAIFpwH8cUzksJSWFF154gbi4ONatW0dpaWB8SaVqniF1EVQEQRDOQGPHjuWpp55iwoQJuN1uJk6cSHh4ONOnTycuLo7o6Ohg2YyMDO666y7uv//+Y67z9NNPM3nyZHw+H5Ik8dxzz1FSUtJs9ZaUhrahzhFjxowR+6kIgnBC27dvp0OHDi1djRZxvPfe0O9OMaVYEARBaDIiqAiCIAhNRgQVQRAEocmIoCIIgiA0mWaZ/bVw4UI+//xzANxuN9u3b2fOnDk899xzqNVqMjMzmThxIrIs8/TTT7Nz5050Oh3PPvssSUlJbNy4sVFlBUEQhBaiNLOnn35a+fjjj5XRo0crubm5iizLyh133KHk5OQo33//vTJ58mRFURRlw4YNyj333KMoitLosvW56qqrmvHdCoJwrti2bVtLV6HFHO+9N/S7s1m7v7Zs2cLu3bu57LLL8Hg8JCYmIkkSmZmZrFy5knXr1jFo0CAAunfvztatW7Hb7Y0uKwiCcLaTZZmpU6cybtw4brzxRnJzc1u6Sg3SrEFl9uzZ3H///djtdiwWS/C42Wymtrb2mONqtbpJygqCIJxuizYUMPD5paRM+ZqBzy9l0YbG7fOyZMkSPB4Pn3zyCY888gjPP/98E9W0eTXbivqamhr27dtHv379sNvtOByO4HMOh4OQkBBcLtdRx2VZxmKxNLqsIAjC6bRoQwFPLNyC0+sHoKDKyRMLtwBwZY/4U7rm8XpnzgbN1lJZu3Yt/fv3BwLJ0LRaLXl5eSiKQlZWFhkZGfTs2ZPly5cDsHHjRtq2bdskZQVBEE6nv3+/MxhQDnN6/fz9+52nfM3j9c74fL5Tvt7p0mwtlX379pGQkBD8efr06Tz66KP4/X4yMzPp1q0bXbp0ITs7m/Hjx6MoCjNmzGiSsoIgCKdTYZXzpI43xB97YmRZRqM589M1NlsN77jjjqN+7t69O/Pnzz/qmEql4plnnjnm3MaWFQRBOJ3ibEYKjhNA4mzGU75mz549+emnn7j00kuDvTNnA7H4URAEoZEeu6gdRq36qGNGrZrHLmp3ytccOXIkOp2O8ePHM3PmTJ544onGVvO0OPPbUoIgCGe4w4Pxf/9+J4VVTuJsRh67qN0pD9LDn/fOnOlEUBEEQWgCV/aIb1QQOVeI7i9BEAShyYigIgiCIDQZEVQEQRCEJiOCiiAIgtBkRFARBEEQmowIKoIgCGewTZs2ceONN7Z0NRpMTCkWBEFoCpvnw4/PQHU+hCbAiKnQdWyjLvnOO+/wxRdfYDSe+sr80020VARBEBpr83z48i9QfQBQAv9++ZfA8UZITEzktddea5o6niYiqAiCIDTWj8+A9w+5v7zOwPFGuOiii86KJJJHEkFFEIQ/5/e0dA3ODtX5J3f8HCaCiiAIx3LXwvYv4fN7oHQHyHJL1+jMFppwcsfPYSKoCIJwLGcVzL8Rtn4GH18PdWUtXaMz24ipoP3DYLrWGDh+nhFBRRCOoCgKJbUuSmtd+GWlpavTctQa0FkDj62xoFLXX/5813UsjHoVQlsDUuDfUa82evYXQEJCwlm1Z9TZNQIkCE3E5fVTVedFkiDCrEOjDtxf5VXUcc1bq5Blhfn39CctynKCKxEYd1BpQZKaudankSkK7s2G4m0Q3xNMES1dozNf17FNEkTOdqKlIpyXdhysYfCLPzHiHz+TV1EXPL5kewmltW7KHR6+2Fh44gtV7INF98KGOYEuo3OFWgO2RGh3MViiW7o2wlmk2Voqs2fPZunSpXi9XiZMmECfPn2YMmUKkiTRpk0bpk2bhkql4vXXX2fZsmVoNBqefPJJunbtSm5ubqPLCkJ9vtx8EI9fxuOXWb2vgtRDLZJh7aL45xINflnh0i6x9V+krgI+vxsOrIYtn0JyJhhtp6H2wumiKArSudQCbQBFaVy3b7N8+65evZoNGzYwb9485syZQ1FRETNnzuTBBx/kv//9L4qi8OOPP5KTk8OaNWtYsGABs2bNYvr06QCNLisIJ3J930TibUbSoiwMbRsVPJ4UbmLJw0P46dGhpEaZ67+ISnuoDx3Q6EFjaMYaC6ebwWCgvLy80V+yZxNFUSgvL8dgOPXPcrO0VLKysmjbti33338/drudxx9/nPnz59OnTx8ABg8eTHZ2NikpKWRmZiJJEnFxcfj9fioqKsjJyWlU2ZEjRzbH2xLOIckRZhbdPwAJiUirPnhcrVYRHdLAPyiDFS55HrpPgPBUMIY3U22FlpCQkEB+fj6lpaUtXZXTymAwkJBw6lOhmyWoVFZWUlhYyFtvvUV+fj733nvvUc1Is9lMbW0tdrsdm+337oLDxxtbVhDq4/H5cXr8hJl+H6A/ZeYoSL+gaSomnFG0Wi0pKSktXY2zTrN0f9lsNjIzM9HpdKSmpqLX64/6snc4HISEhGCxWHA4HEcdt1qtR42JnEpZQfgzdpeXb7YUceecdWTvLsPl9bd0lQThnNIsQaVXr16sWLECRVEoLi7G6XTSv39/Vq9eDcDy5cvJyMigZ8+eZGVlIcsyhYWFyLJMeHg4HTt2bFRZQfgzdrefh+ZvZM2+Cu6Zu54al7elq9Sy6srBfn517wjNq1m6v4YNG8batWu55pprUBSFqVOnkpCQwFNPPcWsWbNITU3loosuQq1Wk5GRwbhx45BlmalTA6tPJ0+e3KiygvBn1CqJxHATueV1tI+1om7mmT21Li97yxzsL3WQ2SaSCIv+xCedLvZiWHALuKph3EcQLrp6hMaTlPNpagM
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"outlier\", s=15, data=df);"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {},
"outputs": [],
"source": [
"# Remove the outliers.\n",
"df = df[df[\"outlier\"] == 0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Save the Results"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Save the \"interesting\" Variables"
]
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {},
"outputs": [],
"source": [
"with open(\"data/interesting_variables.json\", \"w\") as file:\n",
" file.write(json.dumps(interesting_variables))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Save the Data"
]
},
{
"cell_type": "code",
"execution_count": 110,
"metadata": {},
"outputs": [],
"source": [
"# Re-order the columns for convenience.\n",
"final_columns = (\n",
" sorted(set(list(ALL_COLUMNS.keys()) + new_variables) & set(df.columns))\n",
" + TARGET_VARIABLES\n",
")\n",
"df = df[final_columns]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Discarding useless and adding new predictors changed the final dataset significantly."
]
},
{
"cell_type": "code",
"execution_count": 111,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(2883, 106)"
]
},
"execution_count": 111,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "code",
"execution_count": 112,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>1st Flr SF</th>\n",
" <th>2nd Flr SF</th>\n",
" <th>3Ssn Porch</th>\n",
" <th>Bedroom AbvGr</th>\n",
" <th>Bsmt Cond</th>\n",
" <th>Bsmt Exposure</th>\n",
" <th>Bsmt Full Bath</th>\n",
" <th>Bsmt Half Bath</th>\n",
" <th>Bsmt Qual</th>\n",
" <th>Bsmt Unf SF</th>\n",
" <th>BsmtFin SF 1</th>\n",
" <th>BsmtFin SF 2</th>\n",
" <th>BsmtFin Type 1</th>\n",
" <th>BsmtFin Type 2</th>\n",
" <th>Electrical</th>\n",
" <th>Enclosed Porch</th>\n",
" <th>Fence</th>\n",
" <th>Fireplace Qu</th>\n",
" <th>Fireplaces</th>\n",
" <th>Full Bath</th>\n",
" <th>Functional</th>\n",
" <th>Garage Area</th>\n",
" <th>Garage Cars</th>\n",
" <th>Garage Cond</th>\n",
" <th>Garage Finish</th>\n",
" <th>Garage Qual</th>\n",
" <th>Gr Liv Area</th>\n",
" <th>Half Bath</th>\n",
" <th>Kitchen AbvGr</th>\n",
" <th>Kitchen Qual</th>\n",
" <th>Land Slope</th>\n",
" <th>Lot Area</th>\n",
" <th>Lot Shape</th>\n",
" <th>Low Qual Fin SF</th>\n",
" <th>Mas Vnr Area</th>\n",
" <th>Misc Val</th>\n",
" <th>Mo Sold</th>\n",
" <th>Open Porch SF</th>\n",
" <th>Overall Cond</th>\n",
" <th>Overall Qual</th>\n",
" <th>Paved Drive</th>\n",
" <th>Pool Area</th>\n",
" <th>Pool QC</th>\n",
" <th>Screen Porch</th>\n",
" <th>TotRms AbvGrd</th>\n",
" <th>Total Bath</th>\n",
" <th>Total Bsmt SF</th>\n",
" <th>Total Porch SF</th>\n",
" <th>Total SF</th>\n",
" <th>Total SF (box-cox-0.0)</th>\n",
" <th>Utilities</th>\n",
" <th>Wood Deck SF</th>\n",
" <th>abnormal_sale</th>\n",
" <th>air_cond</th>\n",
" <th>build_type_1Fam</th>\n",
" <th>build_type_2Fam</th>\n",
" <th>build_type_Twnhs</th>\n",
" <th>found_BrkTil</th>\n",
" <th>found_CBlock</th>\n",
" <th>found_PConc</th>\n",
" <th>has 2nd Flr</th>\n",
" <th>has Bsmt</th>\n",
" <th>has Fireplace</th>\n",
" <th>has Garage</th>\n",
" <th>has Pool</th>\n",
" <th>has Porch</th>\n",
" <th>major_street</th>\n",
" <th>new_home</th>\n",
" <th>nhood_Blmngtn</th>\n",
" <th>nhood_Blueste</th>\n",
" <th>nhood_BrDale</th>\n",
" <th>nhood_BrkSide</th>\n",
" <th>nhood_ClearCr</th>\n",
" <th>nhood_CollgCr</th>\n",
" <th>nhood_Crawfor</th>\n",
" <th>nhood_Edwards</th>\n",
" <th>nhood_Gilbert</th>\n",
" <th>nhood_Greens</th>\n",
" <th>nhood_GrnHill</th>\n",
" <th>nhood_IDOTRR</th>\n",
" <th>nhood_Landmrk</th>\n",
" <th>nhood_MeadowV</th>\n",
" <th>nhood_Mitchel</th>\n",
" <th>nhood_NPkVill</th>\n",
" <th>nhood_NWAmes</th>\n",
" <th>nhood_Names</th>\n",
" <th>nhood_NoRidge</th>\n",
" <th>nhood_NridgHt</th>\n",
" <th>nhood_OldTown</th>\n",
" <th>nhood_SWISU</th>\n",
" <th>nhood_Sawyer</th>\n",
" <th>nhood_SawyerW</th>\n",
" <th>nhood_Somerst</th>\n",
" <th>nhood_StoneBr</th>\n",
" <th>nhood_Timber</th>\n",
" <th>nhood_Veenker</th>\n",
" <th>park</th>\n",
" <th>partial_sale</th>\n",
" <th>railway</th>\n",
" <th>recently_built</th>\n",
" <th>recently_remodeled</th>\n",
" <th>remodeled</th>\n",
" <th>years_since_built</th>\n",
" <th>years_since_remodeled</th>\n",
" <th>SalePrice</th>\n",
" <th>SalePrice (box-cox-0.0)</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>1656.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>Gd</td>\n",
" <td>Gd</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>TA</td>\n",
" <td>441.0</td>\n",
" <td>639.0</td>\n",
" <td>0.0</td>\n",
" <td>BLQ</td>\n",
" <td>Unf</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>Gd</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>Typ</td>\n",
" <td>528.0</td>\n",
" <td>2</td>\n",
" <td>TA</td>\n",
" <td>Fin</td>\n",
" <td>TA</td>\n",
" <td>1656.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Gtl</td>\n",
" <td>31770.0</td>\n",
" <td>IR1</td>\n",
" <td>0.0</td>\n",
" <td>112.0</td>\n",
" <td>0.0</td>\n",
" <td>5</td>\n",
" <td>62.0</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>P</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>0.0</td>\n",
" <td>7</td>\n",
" <td>2.0</td>\n",
" <td>1080.0</td>\n",
" <td>272.0</td>\n",
" <td>2736.0</td>\n",
" <td>7.914252</td>\n",
" <td>AllPub</td>\n",
" <td>210.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>50</td>\n",
" <td>50</td>\n",
" <td>215000.0</td>\n",
" <td>12.278393</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>896.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>TA</td>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>TA</td>\n",
" <td>270.0</td>\n",
" <td>468.0</td>\n",
" <td>144.0</td>\n",
" <td>Rec</td>\n",
" <td>LwQ</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>MnPrv</td>\n",
" <td>NA</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>Typ</td>\n",
" <td>730.0</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Unf</td>\n",
" <td>TA</td>\n",
" <td>896.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Gtl</td>\n",
" <td>11622.0</td>\n",
" <td>Reg</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>6</td>\n",
" <td>0.0</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>Y</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>120.0</td>\n",
" <td>5</td>\n",
" <td>1.0</td>\n",
" <td>882.0</td>\n",
" <td>260.0</td>\n",
" <td>1778.0</td>\n",
" <td>7.483244</td>\n",
" <td>AllPub</td>\n",
" <td>140.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>49</td>\n",
" <td>49</td>\n",
" <td>105000.0</td>\n",
" <td>11.561716</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>1329.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>TA</td>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>TA</td>\n",
" <td>406.0</td>\n",
" <td>923.0</td>\n",
" <td>0.0</td>\n",
" <td>ALQ</td>\n",
" <td>Unf</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>NA</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>Typ</td>\n",
" <td>312.0</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Unf</td>\n",
" <td>TA</td>\n",
" <td>1329.0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Gd</td>\n",
" <td>Gtl</td>\n",
" <td>14267.0</td>\n",
" <td>IR1</td>\n",
" <td>0.0</td>\n",
" <td>108.0</td>\n",
" <td>12500.0</td>\n",
" <td>6</td>\n",
" <td>36.0</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>Y</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>0.0</td>\n",
" <td>6</td>\n",
" <td>1.5</td>\n",
" <td>1329.0</td>\n",
" <td>429.0</td>\n",
" <td>2658.0</td>\n",
" <td>7.885329</td>\n",
" <td>AllPub</td>\n",
" <td>393.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>52</td>\n",
" <td>52</td>\n",
" <td>172000.0</td>\n",
" <td>12.055250</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>2110.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>TA</td>\n",
" <td>No</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>TA</td>\n",
" <td>1045.0</td>\n",
" <td>1065.0</td>\n",
" <td>0.0</td>\n",
" <td>ALQ</td>\n",
" <td>Unf</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>TA</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>Typ</td>\n",
" <td>522.0</td>\n",
" <td>2</td>\n",
" <td>TA</td>\n",
" <td>Fin</td>\n",
" <td>TA</td>\n",
" <td>2110.0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Ex</td>\n",
" <td>Gtl</td>\n",
" <td>11160.0</td>\n",
" <td>Reg</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>5</td>\n",
" <td>7</td>\n",
" <td>Y</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>0.0</td>\n",
" <td>8</td>\n",
" <td>3.5</td>\n",
" <td>2110.0</td>\n",
" <td>0.0</td>\n",
" <td>4220.0</td>\n",
" <td>8.347590</td>\n",
" <td>AllPub</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>42</td>\n",
" <td>42</td>\n",
" <td>244000.0</td>\n",
" <td>12.404924</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>928.0</td>\n",
" <td>701.0</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>TA</td>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>Gd</td>\n",
" <td>137.0</td>\n",
" <td>791.0</td>\n",
" <td>0.0</td>\n",
" <td>GLQ</td>\n",
" <td>Unf</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>MnPrv</td>\n",
" <td>TA</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>Typ</td>\n",
" <td>482.0</td>\n",
" <td>2</td>\n",
" <td>TA</td>\n",
" <td>Fin</td>\n",
" <td>TA</td>\n",
" <td>1629.0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Gtl</td>\n",
" <td>13830.0</td>\n",
" <td>IR1</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>34.0</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>Y</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>0.0</td>\n",
" <td>6</td>\n",
" <td>2.5</td>\n",
" <td>928.0</td>\n",
" <td>246.0</td>\n",
" <td>2557.0</td>\n",
" <td>7.846590</td>\n",
" <td>AllPub</td>\n",
" <td>212.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>13</td>\n",
" <td>12</td>\n",
" <td>189900.0</td>\n",
" <td>12.154253</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 1st Flr SF 2nd Flr SF 3Ssn Porch Bedroom AbvGr Bsmt Cond \\\n",
"Order PID \n",
"1 526301100 1656.0 0.0 0.0 3 Gd \n",
"2 526350040 896.0 0.0 0.0 2 TA \n",
"3 526351010 1329.0 0.0 0.0 3 TA \n",
"4 526353030 2110.0 0.0 0.0 3 TA \n",
"5 527105010 928.0 701.0 0.0 3 TA \n",
"\n",
" Bsmt Exposure Bsmt Full Bath Bsmt Half Bath Bsmt Qual \\\n",
"Order PID \n",
"1 526301100 Gd 1 0 TA \n",
"2 526350040 No 0 0 TA \n",
"3 526351010 No 0 0 TA \n",
"4 526353030 No 1 0 TA \n",
"5 527105010 No 0 0 Gd \n",
"\n",
" Bsmt Unf SF BsmtFin SF 1 BsmtFin SF 2 BsmtFin Type 1 \\\n",
"Order PID \n",
"1 526301100 441.0 639.0 0.0 BLQ \n",
"2 526350040 270.0 468.0 144.0 Rec \n",
"3 526351010 406.0 923.0 0.0 ALQ \n",
"4 526353030 1045.0 1065.0 0.0 ALQ \n",
"5 527105010 137.0 791.0 0.0 GLQ \n",
"\n",
" BsmtFin Type 2 Electrical Enclosed Porch Fence Fireplace Qu \\\n",
"Order PID \n",
"1 526301100 Unf SBrkr 0.0 NA Gd \n",
"2 526350040 LwQ SBrkr 0.0 MnPrv NA \n",
"3 526351010 Unf SBrkr 0.0 NA NA \n",
"4 526353030 Unf SBrkr 0.0 NA TA \n",
"5 527105010 Unf SBrkr 0.0 MnPrv TA \n",
"\n",
" Fireplaces Full Bath Functional Garage Area Garage Cars \\\n",
"Order PID \n",
"1 526301100 2 1 Typ 528.0 2 \n",
"2 526350040 0 1 Typ 730.0 1 \n",
"3 526351010 0 1 Typ 312.0 1 \n",
"4 526353030 2 2 Typ 522.0 2 \n",
"5 527105010 1 2 Typ 482.0 2 \n",
"\n",
" Garage Cond Garage Finish Garage Qual Gr Liv Area Half Bath \\\n",
"Order PID \n",
"1 526301100 TA Fin TA 1656.0 0 \n",
"2 526350040 TA Unf TA 896.0 0 \n",
"3 526351010 TA Unf TA 1329.0 1 \n",
"4 526353030 TA Fin TA 2110.0 1 \n",
"5 527105010 TA Fin TA 1629.0 1 \n",
"\n",
" Kitchen AbvGr Kitchen Qual Land Slope Lot Area Lot Shape \\\n",
"Order PID \n",
"1 526301100 1 TA Gtl 31770.0 IR1 \n",
"2 526350040 1 TA Gtl 11622.0 Reg \n",
"3 526351010 1 Gd Gtl 14267.0 IR1 \n",
"4 526353030 1 Ex Gtl 11160.0 Reg \n",
"5 527105010 1 TA Gtl 13830.0 IR1 \n",
"\n",
" Low Qual Fin SF Mas Vnr Area Misc Val Mo Sold \\\n",
"Order PID \n",
"1 526301100 0.0 112.0 0.0 5 \n",
"2 526350040 0.0 0.0 0.0 6 \n",
"3 526351010 0.0 108.0 12500.0 6 \n",
"4 526353030 0.0 0.0 0.0 4 \n",
"5 527105010 0.0 0.0 0.0 3 \n",
"\n",
" Open Porch SF Overall Cond Overall Qual Paved Drive \\\n",
"Order PID \n",
"1 526301100 62.0 5 6 P \n",
"2 526350040 0.0 6 5 Y \n",
"3 526351010 36.0 6 6 Y \n",
"4 526353030 0.0 5 7 Y \n",
"5 527105010 34.0 5 5 Y \n",
"\n",
" Pool Area Pool QC Screen Porch TotRms AbvGrd Total Bath \\\n",
"Order PID \n",
"1 526301100 0.0 NA 0.0 7 2.0 \n",
"2 526350040 0.0 NA 120.0 5 1.0 \n",
"3 526351010 0.0 NA 0.0 6 1.5 \n",
"4 526353030 0.0 NA 0.0 8 3.5 \n",
"5 527105010 0.0 NA 0.0 6 2.5 \n",
"\n",
" Total Bsmt SF Total Porch SF Total SF \\\n",
"Order PID \n",
"1 526301100 1080.0 272.0 2736.0 \n",
"2 526350040 882.0 260.0 1778.0 \n",
"3 526351010 1329.0 429.0 2658.0 \n",
"4 526353030 2110.0 0.0 4220.0 \n",
"5 527105010 928.0 246.0 2557.0 \n",
"\n",
" Total SF (box-cox-0.0) Utilities Wood Deck SF \\\n",
"Order PID \n",
"1 526301100 7.914252 AllPub 210.0 \n",
"2 526350040 7.483244 AllPub 140.0 \n",
"3 526351010 7.885329 AllPub 393.0 \n",
"4 526353030 8.347590 AllPub 0.0 \n",
"5 527105010 7.846590 AllPub 212.0 \n",
"\n",
" abnormal_sale air_cond build_type_1Fam build_type_2Fam \\\n",
"Order PID \n",
"1 526301100 0 1 1 0 \n",
"2 526350040 0 1 1 0 \n",
"3 526351010 0 1 1 0 \n",
"4 526353030 0 1 1 0 \n",
"5 527105010 0 1 1 0 \n",
"\n",
" build_type_Twnhs found_BrkTil found_CBlock found_PConc \\\n",
"Order PID \n",
"1 526301100 0 0 1 0 \n",
"2 526350040 0 0 1 0 \n",
"3 526351010 0 0 1 0 \n",
"4 526353030 0 0 1 0 \n",
"5 527105010 0 0 0 1 \n",
"\n",
" has 2nd Flr has Bsmt has Fireplace has Garage has Pool \\\n",
"Order PID \n",
"1 526301100 0 1 1 1 0 \n",
"2 526350040 0 1 0 1 0 \n",
"3 526351010 0 1 0 1 0 \n",
"4 526353030 0 1 1 1 0 \n",
"5 527105010 1 1 1 1 0 \n",
"\n",
" has Porch major_street new_home nhood_Blmngtn \\\n",
"Order PID \n",
"1 526301100 1 0 0 0 \n",
"2 526350040 1 1 0 0 \n",
"3 526351010 1 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 1 0 0 0 \n",
"\n",
" nhood_Blueste nhood_BrDale nhood_BrkSide nhood_ClearCr \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_CollgCr nhood_Crawfor nhood_Edwards nhood_Gilbert \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 1 \n",
"\n",
" nhood_Greens nhood_GrnHill nhood_IDOTRR nhood_Landmrk \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_MeadowV nhood_Mitchel nhood_NPkVill nhood_NWAmes \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_Names nhood_NoRidge nhood_NridgHt nhood_OldTown \\\n",
"Order PID \n",
"1 526301100 1 0 0 0 \n",
"2 526350040 1 0 0 0 \n",
"3 526351010 1 0 0 0 \n",
"4 526353030 1 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_SWISU nhood_Sawyer nhood_SawyerW nhood_Somerst \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_StoneBr nhood_Timber nhood_Veenker park \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" partial_sale railway recently_built recently_remodeled \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" remodeled years_since_built years_since_remodeled \\\n",
"Order PID \n",
"1 526301100 0 50 50 \n",
"2 526350040 0 49 49 \n",
"3 526351010 0 52 52 \n",
"4 526353030 0 42 42 \n",
"5 527105010 1 13 12 \n",
"\n",
" SalePrice SalePrice (box-cox-0.0) \n",
"Order PID \n",
"1 526301100 215000.0 12.278393 \n",
"2 526350040 105000.0 11.561716 \n",
"3 526351010 172000.0 12.055250 \n",
"4 526353030 244000.0 12.404924 \n",
"5 527105010 189900.0 12.154253 "
]
},
"execution_count": 112,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {},
"outputs": [],
"source": [
"df.to_csv(\"data/data_clean_with_transformations_and_factors.csv\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}