ames-housing/02_descriptive_visualizations.ipynb

4991 lines
3.6 MiB
Text
Raw Permalink Normal View History

2021-05-25 08:22:14 +02:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Descriptive Visualizations\n",
"\n",
"The purpose of this notebook is to visually examine the nominal features, discard the useless ones among them, and create new factor variables.\n",
"\n",
"The \"main\" plot used in this notebook is *Gr Liv Area* vs. *SalePrice* as the overall living area is the most correlated predictor (which is also very intuitive). Many of the nominal variables change the slopes of the regression lines for sub-groups of data points significantly."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## \"Housekeeping\""
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"import seaborn as sns\n",
"\n",
"from sklearn.ensemble import IsolationForest\n",
"\n",
"from utils import (\n",
" ALL_COLUMNS,\n",
" NOMINAL_VARIABLES,\n",
" TARGET_VARIABLES,\n",
" load_clean_data,\n",
" encode_ordinals,\n",
" print_column_list,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
2024-07-10 01:31:28 +02:00
"random_state = np.random.RandomState(42)"
2021-05-25 08:22:14 +02:00
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
2024-07-10 01:31:28 +02:00
"source": [
"pd.set_option(\"display.max_columns\", 120)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
2021-05-25 08:22:14 +02:00
"source": [
"sns.set_style(\"white\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load the Data\n"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 5,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"df = load_clean_data(\"data/data_clean_with_transformations.csv\")"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 6,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(2898, 86)"
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 6,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 7,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>1st Flr SF</th>\n",
" <th>1st Flr SF (box-cox-0)</th>\n",
" <th>2nd Flr SF</th>\n",
" <th>3Ssn Porch</th>\n",
" <th>Alley</th>\n",
" <th>Bedroom AbvGr</th>\n",
" <th>Bldg Type</th>\n",
" <th>Bsmt Cond</th>\n",
" <th>Bsmt Exposure</th>\n",
" <th>Bsmt Full Bath</th>\n",
" <th>Bsmt Half Bath</th>\n",
" <th>Bsmt Qual</th>\n",
" <th>Bsmt Unf SF</th>\n",
" <th>BsmtFin SF 1</th>\n",
" <th>BsmtFin SF 2</th>\n",
" <th>BsmtFin Type 1</th>\n",
" <th>BsmtFin Type 2</th>\n",
" <th>Central Air</th>\n",
" <th>Condition 1</th>\n",
" <th>Condition 2</th>\n",
" <th>Electrical</th>\n",
" <th>Enclosed Porch</th>\n",
" <th>Exter Cond</th>\n",
" <th>Exter Qual</th>\n",
" <th>Exterior 1st</th>\n",
" <th>Exterior 2nd</th>\n",
" <th>Fence</th>\n",
" <th>Fireplace Qu</th>\n",
" <th>Fireplaces</th>\n",
" <th>Foundation</th>\n",
" <th>Full Bath</th>\n",
" <th>Functional</th>\n",
" <th>Garage Area</th>\n",
" <th>Garage Cars</th>\n",
" <th>Garage Cond</th>\n",
" <th>Garage Finish</th>\n",
" <th>Garage Qual</th>\n",
" <th>Garage Type</th>\n",
" <th>Gr Liv Area</th>\n",
" <th>Gr Liv Area (box-cox-0)</th>\n",
" <th>Half Bath</th>\n",
" <th>Heating</th>\n",
" <th>Heating QC</th>\n",
" <th>House Style</th>\n",
" <th>Kitchen AbvGr</th>\n",
" <th>Kitchen Qual</th>\n",
" <th>Land Contour</th>\n",
" <th>Land Slope</th>\n",
" <th>Lot Area</th>\n",
" <th>Lot Area (box-cox-0.1)</th>\n",
" <th>Lot Config</th>\n",
" <th>Lot Shape</th>\n",
" <th>Low Qual Fin SF</th>\n",
" <th>MS SubClass</th>\n",
" <th>MS Zoning</th>\n",
" <th>Mas Vnr Area</th>\n",
" <th>Mas Vnr Type</th>\n",
" <th>Misc Feature</th>\n",
" <th>Misc Val</th>\n",
" <th>Mo Sold</th>\n",
" <th>Neighborhood</th>\n",
" <th>Open Porch SF</th>\n",
" <th>Overall Cond</th>\n",
" <th>Overall Qual</th>\n",
" <th>Paved Drive</th>\n",
" <th>Pool Area</th>\n",
" <th>Pool QC</th>\n",
" <th>Roof Matl</th>\n",
" <th>Roof Style</th>\n",
" <th>Sale Condition</th>\n",
" <th>Sale Type</th>\n",
" <th>Screen Porch</th>\n",
" <th>Street</th>\n",
" <th>TotRms AbvGrd</th>\n",
" <th>Total Bath</th>\n",
" <th>Total Bsmt SF</th>\n",
" <th>Total Porch SF</th>\n",
" <th>Total SF</th>\n",
" <th>Total SF (box-cox-0.2)</th>\n",
" <th>Utilities</th>\n",
" <th>Wood Deck SF</th>\n",
" <th>Year Built</th>\n",
" <th>Year Remod/Add</th>\n",
" <th>Yr Sold</th>\n",
" <th>SalePrice</th>\n",
" <th>SalePrice (box-cox-0)</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>1656.0</td>\n",
" <td>7.412160</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>3</td>\n",
" <td>1Fam</td>\n",
" <td>Gd</td>\n",
" <td>Gd</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>TA</td>\n",
" <td>441.0</td>\n",
" <td>639.0</td>\n",
" <td>0.0</td>\n",
" <td>BLQ</td>\n",
" <td>Unf</td>\n",
" <td>Y</td>\n",
" <td>Norm</td>\n",
" <td>Norm</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>TA</td>\n",
" <td>TA</td>\n",
" <td>BrkFace</td>\n",
" <td>Plywood</td>\n",
" <td>NA</td>\n",
" <td>Gd</td>\n",
" <td>2</td>\n",
" <td>CBlock</td>\n",
" <td>1</td>\n",
" <td>Typ</td>\n",
" <td>528.0</td>\n",
" <td>2</td>\n",
" <td>TA</td>\n",
" <td>Fin</td>\n",
" <td>TA</td>\n",
" <td>Attchd</td>\n",
" <td>1656.0</td>\n",
" <td>7.412160</td>\n",
" <td>0</td>\n",
" <td>GasA</td>\n",
" <td>Fa</td>\n",
" <td>1Story</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Lvl</td>\n",
" <td>Gtl</td>\n",
" <td>31770.0</td>\n",
" <td>18.196923</td>\n",
" <td>Corner</td>\n",
" <td>IR1</td>\n",
" <td>0.0</td>\n",
" <td>020</td>\n",
" <td>RL</td>\n",
" <td>112.0</td>\n",
" <td>Stone</td>\n",
" <td>NA</td>\n",
" <td>0.0</td>\n",
" <td>5</td>\n",
" <td>Names</td>\n",
" <td>62.0</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>P</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>CompShg</td>\n",
" <td>Hip</td>\n",
" <td>Normal</td>\n",
" <td>WD</td>\n",
" <td>0.0</td>\n",
" <td>Pave</td>\n",
" <td>7</td>\n",
" <td>2.0</td>\n",
" <td>1080.0</td>\n",
" <td>272.0</td>\n",
" <td>2736.0</td>\n",
" <td>19.344072</td>\n",
" <td>AllPub</td>\n",
" <td>210.0</td>\n",
" <td>1960</td>\n",
" <td>1960</td>\n",
" <td>2010</td>\n",
" <td>215000.0</td>\n",
" <td>12.278393</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>896.0</td>\n",
" <td>6.797940</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>2</td>\n",
" <td>1Fam</td>\n",
" <td>TA</td>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>TA</td>\n",
" <td>270.0</td>\n",
" <td>468.0</td>\n",
" <td>144.0</td>\n",
" <td>Rec</td>\n",
" <td>LwQ</td>\n",
" <td>Y</td>\n",
" <td>Feedr</td>\n",
" <td>Norm</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>TA</td>\n",
" <td>TA</td>\n",
" <td>VinylSd</td>\n",
" <td>VinylSd</td>\n",
" <td>MnPrv</td>\n",
" <td>NA</td>\n",
" <td>0</td>\n",
" <td>CBlock</td>\n",
" <td>1</td>\n",
" <td>Typ</td>\n",
" <td>730.0</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Unf</td>\n",
" <td>TA</td>\n",
" <td>Attchd</td>\n",
" <td>896.0</td>\n",
" <td>6.797940</td>\n",
" <td>0</td>\n",
" <td>GasA</td>\n",
" <td>TA</td>\n",
" <td>1Story</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Lvl</td>\n",
" <td>Gtl</td>\n",
" <td>11622.0</td>\n",
" <td>15.499290</td>\n",
" <td>Inside</td>\n",
" <td>Reg</td>\n",
" <td>0.0</td>\n",
" <td>020</td>\n",
" <td>RH</td>\n",
" <td>0.0</td>\n",
" <td>None</td>\n",
" <td>NA</td>\n",
" <td>0.0</td>\n",
" <td>6</td>\n",
" <td>Names</td>\n",
" <td>0.0</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>Y</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>CompShg</td>\n",
" <td>Gable</td>\n",
" <td>Normal</td>\n",
" <td>WD</td>\n",
" <td>120.0</td>\n",
" <td>Pave</td>\n",
" <td>5</td>\n",
" <td>1.0</td>\n",
" <td>882.0</td>\n",
" <td>260.0</td>\n",
" <td>1778.0</td>\n",
" <td>17.333478</td>\n",
" <td>AllPub</td>\n",
" <td>140.0</td>\n",
" <td>1961</td>\n",
" <td>1961</td>\n",
" <td>2010</td>\n",
" <td>105000.0</td>\n",
" <td>11.561716</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>1329.0</td>\n",
" <td>7.192182</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>3</td>\n",
" <td>1Fam</td>\n",
" <td>TA</td>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>TA</td>\n",
" <td>406.0</td>\n",
" <td>923.0</td>\n",
" <td>0.0</td>\n",
" <td>ALQ</td>\n",
" <td>Unf</td>\n",
" <td>Y</td>\n",
" <td>Norm</td>\n",
" <td>Norm</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>TA</td>\n",
" <td>TA</td>\n",
" <td>Wd Sdng</td>\n",
" <td>Wd Sdng</td>\n",
" <td>NA</td>\n",
" <td>NA</td>\n",
" <td>0</td>\n",
" <td>CBlock</td>\n",
" <td>1</td>\n",
" <td>Typ</td>\n",
" <td>312.0</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Unf</td>\n",
" <td>TA</td>\n",
" <td>Attchd</td>\n",
" <td>1329.0</td>\n",
" <td>7.192182</td>\n",
" <td>1</td>\n",
" <td>GasA</td>\n",
" <td>TA</td>\n",
" <td>1Story</td>\n",
" <td>1</td>\n",
" <td>Gd</td>\n",
" <td>Lvl</td>\n",
" <td>Gtl</td>\n",
" <td>14267.0</td>\n",
" <td>16.027549</td>\n",
" <td>Corner</td>\n",
" <td>IR1</td>\n",
" <td>0.0</td>\n",
" <td>020</td>\n",
" <td>RL</td>\n",
" <td>108.0</td>\n",
" <td>BrkFace</td>\n",
" <td>Gar2</td>\n",
" <td>12500.0</td>\n",
" <td>6</td>\n",
" <td>Names</td>\n",
" <td>36.0</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>Y</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>CompShg</td>\n",
" <td>Hip</td>\n",
" <td>Normal</td>\n",
" <td>WD</td>\n",
" <td>0.0</td>\n",
" <td>Pave</td>\n",
" <td>6</td>\n",
" <td>1.5</td>\n",
" <td>1329.0</td>\n",
" <td>429.0</td>\n",
" <td>2658.0</td>\n",
" <td>19.203658</td>\n",
" <td>AllPub</td>\n",
" <td>393.0</td>\n",
" <td>1958</td>\n",
" <td>1958</td>\n",
" <td>2010</td>\n",
" <td>172000.0</td>\n",
" <td>12.055250</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>2110.0</td>\n",
" <td>7.654443</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>3</td>\n",
" <td>1Fam</td>\n",
" <td>TA</td>\n",
" <td>No</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>TA</td>\n",
" <td>1045.0</td>\n",
" <td>1065.0</td>\n",
" <td>0.0</td>\n",
" <td>ALQ</td>\n",
" <td>Unf</td>\n",
" <td>Y</td>\n",
" <td>Norm</td>\n",
" <td>Norm</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>TA</td>\n",
" <td>Gd</td>\n",
" <td>BrkFace</td>\n",
" <td>BrkFace</td>\n",
" <td>NA</td>\n",
" <td>TA</td>\n",
" <td>2</td>\n",
" <td>CBlock</td>\n",
" <td>2</td>\n",
" <td>Typ</td>\n",
" <td>522.0</td>\n",
" <td>2</td>\n",
" <td>TA</td>\n",
" <td>Fin</td>\n",
" <td>TA</td>\n",
" <td>Attchd</td>\n",
" <td>2110.0</td>\n",
" <td>7.654443</td>\n",
" <td>1</td>\n",
" <td>GasA</td>\n",
" <td>Ex</td>\n",
" <td>1Story</td>\n",
" <td>1</td>\n",
" <td>Ex</td>\n",
" <td>Lvl</td>\n",
" <td>Gtl</td>\n",
" <td>11160.0</td>\n",
" <td>15.396064</td>\n",
" <td>Corner</td>\n",
" <td>Reg</td>\n",
" <td>0.0</td>\n",
" <td>020</td>\n",
" <td>RL</td>\n",
" <td>0.0</td>\n",
" <td>None</td>\n",
" <td>NA</td>\n",
" <td>0.0</td>\n",
" <td>4</td>\n",
" <td>Names</td>\n",
" <td>0.0</td>\n",
" <td>5</td>\n",
" <td>7</td>\n",
" <td>Y</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>CompShg</td>\n",
" <td>Hip</td>\n",
" <td>Normal</td>\n",
" <td>WD</td>\n",
" <td>0.0</td>\n",
" <td>Pave</td>\n",
" <td>8</td>\n",
" <td>3.5</td>\n",
" <td>2110.0</td>\n",
" <td>0.0</td>\n",
" <td>4220.0</td>\n",
" <td>21.548042</td>\n",
" <td>AllPub</td>\n",
" <td>0.0</td>\n",
" <td>1968</td>\n",
" <td>1968</td>\n",
" <td>2010</td>\n",
" <td>244000.0</td>\n",
" <td>12.404924</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>928.0</td>\n",
" <td>6.833032</td>\n",
" <td>701.0</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>3</td>\n",
" <td>1Fam</td>\n",
" <td>TA</td>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>Gd</td>\n",
" <td>137.0</td>\n",
" <td>791.0</td>\n",
" <td>0.0</td>\n",
" <td>GLQ</td>\n",
" <td>Unf</td>\n",
" <td>Y</td>\n",
" <td>Norm</td>\n",
" <td>Norm</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>TA</td>\n",
" <td>TA</td>\n",
" <td>VinylSd</td>\n",
" <td>VinylSd</td>\n",
" <td>MnPrv</td>\n",
" <td>TA</td>\n",
" <td>1</td>\n",
" <td>PConc</td>\n",
" <td>2</td>\n",
" <td>Typ</td>\n",
" <td>482.0</td>\n",
" <td>2</td>\n",
" <td>TA</td>\n",
" <td>Fin</td>\n",
" <td>TA</td>\n",
" <td>Attchd</td>\n",
" <td>1629.0</td>\n",
" <td>7.395722</td>\n",
" <td>1</td>\n",
" <td>GasA</td>\n",
" <td>Gd</td>\n",
" <td>2Story</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Lvl</td>\n",
" <td>Gtl</td>\n",
" <td>13830.0</td>\n",
" <td>15.946705</td>\n",
" <td>Inside</td>\n",
" <td>IR1</td>\n",
" <td>0.0</td>\n",
" <td>060</td>\n",
" <td>RL</td>\n",
" <td>0.0</td>\n",
" <td>None</td>\n",
" <td>NA</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>Gilbert</td>\n",
" <td>34.0</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>Y</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>CompShg</td>\n",
" <td>Gable</td>\n",
" <td>Normal</td>\n",
" <td>WD</td>\n",
" <td>0.0</td>\n",
" <td>Pave</td>\n",
" <td>6</td>\n",
" <td>2.5</td>\n",
" <td>928.0</td>\n",
" <td>246.0</td>\n",
" <td>2557.0</td>\n",
" <td>19.016856</td>\n",
" <td>AllPub</td>\n",
" <td>212.0</td>\n",
" <td>1997</td>\n",
" <td>1998</td>\n",
" <td>2010</td>\n",
" <td>189900.0</td>\n",
" <td>12.154253</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 1st Flr SF 1st Flr SF (box-cox-0) 2nd Flr SF 3Ssn Porch \\\n",
"Order PID \n",
"1 526301100 1656.0 7.412160 0.0 0.0 \n",
"2 526350040 896.0 6.797940 0.0 0.0 \n",
"3 526351010 1329.0 7.192182 0.0 0.0 \n",
"4 526353030 2110.0 7.654443 0.0 0.0 \n",
"5 527105010 928.0 6.833032 701.0 0.0 \n",
"\n",
" Alley Bedroom AbvGr Bldg Type Bsmt Cond Bsmt Exposure \\\n",
"Order PID \n",
"1 526301100 NA 3 1Fam Gd Gd \n",
"2 526350040 NA 2 1Fam TA No \n",
"3 526351010 NA 3 1Fam TA No \n",
"4 526353030 NA 3 1Fam TA No \n",
"5 527105010 NA 3 1Fam TA No \n",
"\n",
" Bsmt Full Bath Bsmt Half Bath Bsmt Qual Bsmt Unf SF \\\n",
"Order PID \n",
"1 526301100 1 0 TA 441.0 \n",
"2 526350040 0 0 TA 270.0 \n",
"3 526351010 0 0 TA 406.0 \n",
"4 526353030 1 0 TA 1045.0 \n",
"5 527105010 0 0 Gd 137.0 \n",
"\n",
" BsmtFin SF 1 BsmtFin SF 2 BsmtFin Type 1 BsmtFin Type 2 \\\n",
"Order PID \n",
"1 526301100 639.0 0.0 BLQ Unf \n",
"2 526350040 468.0 144.0 Rec LwQ \n",
"3 526351010 923.0 0.0 ALQ Unf \n",
"4 526353030 1065.0 0.0 ALQ Unf \n",
"5 527105010 791.0 0.0 GLQ Unf \n",
"\n",
" Central Air Condition 1 Condition 2 Electrical \\\n",
"Order PID \n",
"1 526301100 Y Norm Norm SBrkr \n",
"2 526350040 Y Feedr Norm SBrkr \n",
"3 526351010 Y Norm Norm SBrkr \n",
"4 526353030 Y Norm Norm SBrkr \n",
"5 527105010 Y Norm Norm SBrkr \n",
"\n",
" Enclosed Porch Exter Cond Exter Qual Exterior 1st \\\n",
"Order PID \n",
"1 526301100 0.0 TA TA BrkFace \n",
"2 526350040 0.0 TA TA VinylSd \n",
"3 526351010 0.0 TA TA Wd Sdng \n",
"4 526353030 0.0 TA Gd BrkFace \n",
"5 527105010 0.0 TA TA VinylSd \n",
"\n",
" Exterior 2nd Fence Fireplace Qu Fireplaces Foundation \\\n",
"Order PID \n",
"1 526301100 Plywood NA Gd 2 CBlock \n",
"2 526350040 VinylSd MnPrv NA 0 CBlock \n",
"3 526351010 Wd Sdng NA NA 0 CBlock \n",
"4 526353030 BrkFace NA TA 2 CBlock \n",
"5 527105010 VinylSd MnPrv TA 1 PConc \n",
"\n",
" Full Bath Functional Garage Area Garage Cars Garage Cond \\\n",
"Order PID \n",
"1 526301100 1 Typ 528.0 2 TA \n",
"2 526350040 1 Typ 730.0 1 TA \n",
"3 526351010 1 Typ 312.0 1 TA \n",
"4 526353030 2 Typ 522.0 2 TA \n",
"5 527105010 2 Typ 482.0 2 TA \n",
"\n",
" Garage Finish Garage Qual Garage Type Gr Liv Area \\\n",
"Order PID \n",
"1 526301100 Fin TA Attchd 1656.0 \n",
"2 526350040 Unf TA Attchd 896.0 \n",
"3 526351010 Unf TA Attchd 1329.0 \n",
"4 526353030 Fin TA Attchd 2110.0 \n",
"5 527105010 Fin TA Attchd 1629.0 \n",
"\n",
" Gr Liv Area (box-cox-0) Half Bath Heating Heating QC \\\n",
"Order PID \n",
"1 526301100 7.412160 0 GasA Fa \n",
"2 526350040 6.797940 0 GasA TA \n",
"3 526351010 7.192182 1 GasA TA \n",
"4 526353030 7.654443 1 GasA Ex \n",
"5 527105010 7.395722 1 GasA Gd \n",
"\n",
" House Style Kitchen AbvGr Kitchen Qual Land Contour \\\n",
"Order PID \n",
"1 526301100 1Story 1 TA Lvl \n",
"2 526350040 1Story 1 TA Lvl \n",
"3 526351010 1Story 1 Gd Lvl \n",
"4 526353030 1Story 1 Ex Lvl \n",
"5 527105010 2Story 1 TA Lvl \n",
"\n",
" Land Slope Lot Area Lot Area (box-cox-0.1) Lot Config \\\n",
"Order PID \n",
"1 526301100 Gtl 31770.0 18.196923 Corner \n",
"2 526350040 Gtl 11622.0 15.499290 Inside \n",
"3 526351010 Gtl 14267.0 16.027549 Corner \n",
"4 526353030 Gtl 11160.0 15.396064 Corner \n",
"5 527105010 Gtl 13830.0 15.946705 Inside \n",
"\n",
" Lot Shape Low Qual Fin SF MS SubClass MS Zoning \\\n",
"Order PID \n",
"1 526301100 IR1 0.0 020 RL \n",
"2 526350040 Reg 0.0 020 RH \n",
"3 526351010 IR1 0.0 020 RL \n",
"4 526353030 Reg 0.0 020 RL \n",
"5 527105010 IR1 0.0 060 RL \n",
"\n",
" Mas Vnr Area Mas Vnr Type Misc Feature Misc Val Mo Sold \\\n",
"Order PID \n",
"1 526301100 112.0 Stone NA 0.0 5 \n",
"2 526350040 0.0 None NA 0.0 6 \n",
"3 526351010 108.0 BrkFace Gar2 12500.0 6 \n",
"4 526353030 0.0 None NA 0.0 4 \n",
"5 527105010 0.0 None NA 0.0 3 \n",
"\n",
" Neighborhood Open Porch SF Overall Cond Overall Qual \\\n",
"Order PID \n",
"1 526301100 Names 62.0 5 6 \n",
"2 526350040 Names 0.0 6 5 \n",
"3 526351010 Names 36.0 6 6 \n",
"4 526353030 Names 0.0 5 7 \n",
"5 527105010 Gilbert 34.0 5 5 \n",
"\n",
" Paved Drive Pool Area Pool QC Roof Matl Roof Style \\\n",
"Order PID \n",
"1 526301100 P 0.0 NA CompShg Hip \n",
"2 526350040 Y 0.0 NA CompShg Gable \n",
"3 526351010 Y 0.0 NA CompShg Hip \n",
"4 526353030 Y 0.0 NA CompShg Hip \n",
"5 527105010 Y 0.0 NA CompShg Gable \n",
"\n",
" Sale Condition Sale Type Screen Porch Street TotRms AbvGrd \\\n",
"Order PID \n",
"1 526301100 Normal WD 0.0 Pave 7 \n",
"2 526350040 Normal WD 120.0 Pave 5 \n",
"3 526351010 Normal WD 0.0 Pave 6 \n",
"4 526353030 Normal WD 0.0 Pave 8 \n",
"5 527105010 Normal WD 0.0 Pave 6 \n",
"\n",
" Total Bath Total Bsmt SF Total Porch SF Total SF \\\n",
"Order PID \n",
"1 526301100 2.0 1080.0 272.0 2736.0 \n",
"2 526350040 1.0 882.0 260.0 1778.0 \n",
"3 526351010 1.5 1329.0 429.0 2658.0 \n",
"4 526353030 3.5 2110.0 0.0 4220.0 \n",
"5 527105010 2.5 928.0 246.0 2557.0 \n",
"\n",
" Total SF (box-cox-0.2) Utilities Wood Deck SF Year Built \\\n",
"Order PID \n",
"1 526301100 19.344072 AllPub 210.0 1960 \n",
"2 526350040 17.333478 AllPub 140.0 1961 \n",
"3 526351010 19.203658 AllPub 393.0 1958 \n",
"4 526353030 21.548042 AllPub 0.0 1968 \n",
"5 527105010 19.016856 AllPub 212.0 1997 \n",
"\n",
" Year Remod/Add Yr Sold SalePrice SalePrice (box-cox-0) \n",
"Order PID \n",
"1 526301100 1960 2010 215000.0 12.278393 \n",
"2 526350040 1961 2010 105000.0 11.561716 \n",
"3 526351010 1958 2010 172000.0 12.055250 \n",
"4 526353030 1968 2010 244000.0 12.404924 \n",
"5 527105010 1998 2010 189900.0 12.154253 "
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 7,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Newly created variables are collected in the *new_variables* list."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 8,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"new_variables = []"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Derived Characteristics\n",
"\n",
"Certain characteristics of a house are assumed to have a \"binary\" influence on the sales price. For example, the existence of a pool could be an important predictor while the exact size of the pool can be deemed not so important.\n",
"\n",
"The below cell creates boolean factor variables out of a set of numeric variables."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 9,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"derived_variables = {\n",
" \"has 2nd Flr\": \"2nd Flr SF\",\n",
" \"has Bsmt\": \"Total Bsmt SF\",\n",
" \"has Fireplace\": \"Fireplaces\",\n",
" \"has Garage\": \"Garage Area\",\n",
" \"has Pool\": \"Pool Area\",\n",
" \"has Porch\": \"Total Porch SF\",\n",
"}\n",
"# Factorize numeric columns.\n",
"for factor_column, column in derived_variables.items():\n",
" df[factor_column] = df[column].apply(lambda x: 1 if x > 0 else 0)\n",
"derived_variables = list(derived_variables.keys())"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 10,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"new_variables.extend(derived_variables)"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 11,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>has 2nd Flr</th>\n",
" <th>has Bsmt</th>\n",
" <th>has Fireplace</th>\n",
" <th>has Garage</th>\n",
" <th>has Pool</th>\n",
" <th>has Porch</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" has 2nd Flr has Bsmt has Fireplace has Garage has Pool \\\n",
"Order PID \n",
"1 526301100 0 1 1 1 0 \n",
"2 526350040 0 1 0 1 0 \n",
"3 526351010 0 1 0 1 0 \n",
"4 526353030 0 1 1 1 0 \n",
"5 527105010 1 1 1 1 0 \n",
"\n",
" has Porch \n",
"Order PID \n",
"1 526301100 1 \n",
"2 526350040 1 \n",
"3 526351010 1 \n",
"4 526353030 0 \n",
"5 527105010 1 "
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 11,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[derived_variables].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2nd Floors\n",
"\n",
"A second floor may have a positive effect on the sales price. However, having a second floor correlates with overall living space. The individual effect is therefore not as clear as it seems in the plot below."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 12,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGwCAYAAACAZ5AeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hU1dbA4d/MZCa990YakAQChA4hFLGgKEoRK6KIYsculquComK7l4vXT1ARUFBEQWxgR6T33gMBUiC918nMfH/sZJJJAgQIhLLe58lzc86cOWcneJOVvddeS2OxWCwIIYQQQoizom3pAQghhBBCXAokqBJCCCGEaAYSVAkhhBBCNAMJqoQQQgghmoEEVUIIIYQQzUCCKiGEEEKIZiBBlRBCCCFEM7Br6QFcTsxmM5mZmTg7O6PRaFp6OEIIIYRoAovFQklJCX5+fmi1J56PkqDqPMrMzKR///4tPQwhhBBCnIHly5cTEBBwwtclqDqPnJ2dAfWP4uLi0sKjEUIIIURTFBcX079/f+vv8RORoOo8qlnyc3FxkaBKCCGEuMicKnVHEtWFEEIIIZqBBFVCCCGEEM1AgiohhBBCiGYgOVVCCCHERcRsNlNZWdnSw7ik6PV6dDrdWd9HgiohhBDiIlFZWUlycjJms7mlh3LJ8fDwICAg4KzqSEpQJYQQQlwELBYLx44dQ6fTERoaetIilKLpLBYLpaWlZGZmAhAYGHjG95KgSgghhLgIVFVVUVpaSlBQEE5OTi09nEuKo6MjoIp0+/n5nfFSoIS5QgghxEXAZDIBYDAYWngkl6aaQNVoNJ7xPSSoEkIIIS4i0jv23GiO76sEVUIIIYQQzUCCKiGEEEKIZiBBlRBCiMub2QRH18L6T2DXd1B0vKVHdNruuusu3njjjZYeRrNYtGgR3bp1O+P3p6amEh0dzZ49e5pxVE0ju/+EEEJc3g6vgC+GgaW69lOHW2HIf8Dg3LLjuoAZjUamTp3KP//8Q0pKCi4uLiQkJPD000/j7+9/zp8fHR3d4FyXLl346quvzvmzT0aCKiGEEJe3rfNqAyqAHV9DwiMQ2KnlxnSBKy8vZ/fu3Tz00EPExMRQWFjIG2+8wUMPPcSiRYvOyxjeeust+vbtaz3W6/VNep/FYsFkMmFn1/whkCz/CSGEuLwZ3GyPNVrQXnxzDhaLhXfeeYcePXrQp08fPvjgA5vXZ82axZAhQ4iPj6d///5MnDiRkpIS6+tpaWk8+OCDdO/enfj4eK6//nqWL1/e6LNcXV2ZNWsWgwcPJjIykvj4eF5++WV27dpFeno6ULsM99tvv3HXXXfRqVMnbrzxRrZs2WJzr0WLFjFgwAA6derEI488Qn5+fpO+Xjc3N3x9fa0fHh4ejV63bt06oqOjWb58OcOHD6dDhw5s2rSpSc84XRJUCSGEuLzF3wb2rrXHfZ8B7zZNe29ZHuQdBdOZ1zZqLt999x1OTk4sWLCAZ599lg8//JBVq1ZZX9doNLz00kv89NNPTJkyhbVr1/Luu+9aX3/ttdeorKxk7ty5/PjjjzzzzDOnVWS0uLgYjUaDm5ttkPqf//yHsWPHsnjxYsLDw3n66aepqqoCYNu2bbz00kvceeedLF68mJ49e/LRRx+d5Xeice+//z5PP/00S5YsaXT5sDlcfKG4EEII0ZxCusN9f0LmHnD2gYBOYNeEApuHV8FPT0LOAYi/E/o9C55h5368JxAdHc2jjz4KQHh4OHPnzmXNmjX06dMHgHvuucd6bUhICE888QSvvvoqEydOBCA9PZ1BgwZZA47Q0NAmP7uiooL33nuP66+/HhcXF5vX7r33XgYMGADA+PHjuf766zly5AhRUVF8/vnn9O3bl/vvvx+AiIgItmzZwooVK075zKeeesqm8vm7777LVVdddcLrx48fb/1enCsSVAkhhBC+0eqjqQrS4JvRUJKtjrd8Ad5RkPjkuRlfE9SfffH19SUnJ8d6vHr1ambMmMGhQ4coLi7GZDJRUVFBWVkZjo6OjB49mokTJ7Jy5UoSEhK45ppriImJOeVzjUYjjz/+OBaLhUmTJp10XL6+vgDk5uYSFRXFwYMHGwRC8fHxTQqqXnjhBRISEhrc+0Q6dOhwynueLVn+E0IIIU5XcUZtQFUj6c+WGUu1+onXGo0Gi8UCqPymBx54gOjoaD744AMWLVrEK6+8AtS2ZRk5ciR//PEHN910E/v37+fmm2/miy++OOkzjUYjTzzxBOnp6Xz22WcNZqnANoG8pmq52WxucN3p8vX1JSwszPpxqqXKmv5+55IEVUIIIcTpcg8B91a252KHtMxYmmDXrl1YLBaef/554uPjiYiIIDMzs8F1gYGB3H777fzvf/9jzJgxLFiw4IT3rAmojhw5wuzZs/H09DztcUVFRbF9+3abc9u2bTvt+1woJKgSQgghTpeLH9wyB9oMAtdAuOIlaHdTS4/qhMLCwjAajXzxxRekpKSwePFi5s+fb3PNG2+8wYoVK0hJSWHXrl2sW7eOqKioRu9nNBoZP348O3fu5L333sNkMpGVlUVWVhaVlZVNHtddd93FihUrmDlzJocPH2bu3LlNWvq7UElOlRBCCHEmgrvArZ9DRQk4e7f0aE4qJiaGF154gU8++YR///vfdOvWjaeeeooJEyZYrzGbzbz22mscP34cFxcX+vbtywsvvNDo/TIyMvjrr78AuOkm22Dy888/p2fPnk0aV3x8PK+//joffPAB06ZNo3fv3jz00EP83//93xl+pS1LY6lZcBXnXHFxMV27dmXTpk2NrjsLIYQQJ1JeXk5ycjIRERE4ODi09HAuOSf7/jb197cs/wkhhBBCNAMJqoQQQgghmoEEVUIIIYQQzUCCKiGEEEKIZiBBlRBCCCFEM5CgSgghhBCiGUhQJYQQQgjRDCSoEkIIIYRoBhJUCSGEEEI0AwmqhBBCCHFBmzdvHgMHDqRDhw6MHDmyQRPmC4UEVUIIIYS4YC1ZsoS33nqLRx55hO+++46YmBjGjh1LTk5OSw+tAQmqhBBCCNFkFUYTxwvKqTCazsvzZs2axS233MKIESNo3bo1kyZNwsHBgYULF56X55+OFg2qBg4cSHR0dIOPSZMmAVBRUcGkSZPo2bMnnTt35rHHHiM7O9vmHunp6YwbN45OnTrRu3dv3n77baqqqmyuWbduHcOGDSMuLo6rr76aRYsWNRjLqaYWmzIWIYQQ4lK2P6OIJxZsZeD7f/PEgq3szyg6p8+rrKxk165dJCQkWM9ptVoSEhLYsmXLOX32mWjRoOrbb79l5cqV1o9Zs2YBcO211wLw5ptvsmzZMqZOncoXX3xBZmYmjz76qPX9JpOJBx54AKPRyPz585kyZQrfffcd06ZNs16TkpLCAw88QM+ePfn++++5++67+de//sWKFSus1zRlavFUYxFCCCEuZRVGE//5Yz9LdxyntNLE0h3HmfrH/nM6Y5WXl4fJZMLb29vmvLe39wU5sdGiQZWXlxe+vr7Wj2XLltGqVSt69OhBUVERCxcu5Pnnn6d3797ExcXx5ptvsmXLFrZu3QrAypUrSUpK4t133yU2Npb+/fvz+OOPM2/ePCorKwGYP38+ISEhPP/880RFRTFq1CgGDRrE7NmzreM41dRiU8YihBBCXMrySo0s35dlc+7vfVnklRpbaEQXngsmp6qyspIffviBESNGoNFo2LlzJ0aj0WbKLyoqiqCgIGsgs3XrVtq2bYuPj4/1msTERIqLi0lKSrJe07t3b5tnJSYmWu/RlKnFpoxFCCGEuJR5OunpH+1rc25AtC+eTvpz90xPT3Q6XYOk9JycHJvf/ReKCyao+uOPPygqKmLYsGEAZGdno9frcXNzs7nO29ubrKws6zX1v6k1x6e6pri4mPLy8iZNLTZlLEIIIcSlzF6v48mr2jK4QwBOBh2DOwTwxFVtsdfrztkzDQYD7du3Z82aNdZzZrOZNWvW0Llz53P23DNl19IDqLFw4UL69euHv79/Sw9FCCGEEI1o6+/Kf26JJ6/UiKeT/pwGVDXGjBnDhAkTiIuLo2PHjsy
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 640x480 with 1 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"has 2nd Flr\", s=15, data=df);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Basements\n",
"\n",
"Nearly all houses in Ames, IA, have a basement. Therefore, *has Bsmt* is most likely not an important predictor."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 13,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGwCAYAAACAZ5AeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAD4HElEQVR4nOydZ3RUZdeGr5n0HkghhFATeoDQWyCKqIiiAoIoqCAKFsSCCmIDRUDl9UNsYANEFJFioaiggCC9hN5ChwBppPdkvh87k8kkAZIQCJB9rTWLnDPPnPPMRJmbXe5tMJlMJhRFURRFUZQrwljRG1AURVEURbkZUFGlKIqiKIpSDqioUhRFURRFKQdUVCmKoiiKopQDKqoURVEURVHKARVViqIoiqIo5YCKKkVRFEVRlHLAtqI3UJnIzc0lKioKFxcXDAZDRW9HURRFUZQSYDKZSElJwdfXF6Px4vEoFVXXkKioKMLCwip6G4qiKIqilIE1a9bg5+d30edVVF1DXFxcAPmluLq6VvBuFEVRFEUpCcnJyYSFheV/j18MFVXXEHPKz9XVVUWVoiiKotxgXK50RwvVFUVRFEVRygEVVYqiKIqiKOWAiipFURRFUZRyQGuqFEVRFOUmJycnh6ysrIrexnWLnZ0dNjY2V3wdFVWKoiiKcpNiMpk4d+4c8fHxFb2V6x5PT0/8/PyuyEdSRZWiKIqi3KSYBZWvry/Ozs5qPF0MJpOJ1NRUoqKiAKhevXqZr6WiSlEURVFuQnJycvIFlZeXV0Vv57rGyckJEJNuX1/fMqcCtVBdURRFUW5CzDVUzs7OFbyTGwPz53QltWcqqhRFURTlJkZTfiWjPD4nFVWKoiiKoijlgIoqRVEURVGUckBFlaIoilK5yc2Bkxth81ewdzEknavoHV0THnnkEd57772K3sZNhXb/KYqiKJWb42thTm8w5cpxsweh1/+BvUvF7usm5JFHHmHz5s35x15eXrRp04bRo0dTo0aNq3bfMWPGkJiYyOeff37V7gEaqVIURVEqO+FzLYIKYPdPEBtRcfu5yenfvz/r1q1j7dq1fP7555w7d45XXnmlordVLqioUhRFUSo39u7WxwYjGCtHIsdkMvHBBx/Qrl07OnfuzCeffGL1/MyZM+nVqxchISGEhYUxbtw4UlJS8p8/c+YMTz31FG3btiUkJIS7776bNWvWXPKejo6O+Pj44OvrS0hICAMHDmTfvn35zyckJDBq1Cg6dOhA8+bNueOOO1i4cCEAp0+fpmHDhixbtoyHH36Y5s2b07dvX44dO8auXbvo06cPLVu25IknniAuLg6ATz75hMWLF/P333/TsGFDGjZsyKZNm8rrI7SicvxXoyiKoigXI2SARKcykuS4y8vgVb9kr027AOlJ4F4dbOyu3h6vEosXL2bIkCHMnz+f8PBwxowZQ6tWrejcuTMgNgOvv/46AQEBnDp1ivHjx/Phhx8ybtw4AN555x2ysrL4/vvvcXZ2JiIiolS+WPHx8SxfvpzmzZvnn/v44485cuQIX331FVWqVOHkyZOkp6dbve6TTz5h7Nix+Pv7M3bsWEaNGoWLiwuvv/46Tk5OvPDCC3z88ceMHz+exx9/nCNHjpCcnMykSZMA8PDwuMJPrnhUVCmKoiiVm4C28MTfELUfXLzBrwXY2l/+dcf/gyUvQuxhCBkIXV+BKrWv/n7LkYYNGzJixAgA6tSpw/fff8+GDRvyRdXgwYPz1wYEBPDCCy/w9ttv54uqyMhI7rzzTho2bAhAzZo1L3vPH3/8kQULFmAymUhLS6NOnTp88803+c9HRkbSuHFjmjVrln/fwjz++ON06dIFgEcffZSXXnqJWbNm0bp1awAeeOABFi1aBICLiwuOjo5kZmbi4+NTmo+n1KioUhRFURSfhvIoKQln4OdHISVGjnfMAa9ACH3x6uzvKmEWQ2Z8fHyIjY3NP16/fj0zZszg6NGjJCcnk5OTQ0ZGBmlpaTg5OfHoo48ybtw41q1bR6dOnbjjjjto1KjRJe/Zq1cvnnrqKQBiYmKYMWMGQ4cOZeHChbi6uvLQQw8xcuRI9u3bR+fOnenevTutWrW66L7NI3gKnzOn/64lWlOlKIqiKKUl+bxFUJmJ+Lti9nIF2Npax1YMBgMmkwmQ+qXhw4fTsGFDPvnkExYtWsRbb70FWEa59OvXj5UrV3Lfffdx6NAhHnjgAebMmXPJe7q6ulK7dm1q165N69atee+99zh+/DjLly8HICwsjFWrVjF48GCioqIYPHgw77//vtU17OwsqVazE3rB92IwGMjNzeVao6JKURRFUUqLRwB41LI+17hXxezlKrF3715MJhNjxowhJCSEunXrEhUVVWRd9erVeeihh/j000/z67NKg3l4ccG6qapVq9K7d2+mTJnC2LFj+emnn67ovdjZ2V0TkaXpP0VRFEUpLa6+0H82rJ4M53ZBm8ehyX0VvatypXbt2mRlZTFnzhy6devGtm3bmDdvntWa9957j65du1KnTh0SExPZtGkTgYGBl7xueno60dHRAMTGxvL555/j4OCQX8f18ccf07RpU+rXr09mZiarV6++7DUvR40aNVi3bh1Hjx7F09MTNzc3q2hXeaGiSlEURVHKQo1W8OB3kJECLl4VvZtyp1GjRrz22mt89dVXfPTRR7Rp04aXXnqJ0aNH56/Jzc3lnXfe4dy5c7i6utKlSxdee+21S153/vz5+dEsDw8PGjZsyJdffkm9evUAiSp99NFHnDlzBkdHR1q3bs1HH310Re+lf//+bN68mb59+5Kamsp3331H+/btr+iaxWEwmZOnylUnOTmZ1q1bs23bNlxdXSt6O4qiKMpNTHp6OseOHaNu3bo4OjpW9Hauey71eZX0+1trqhRFURRFUcoBFVWKoiiKoijlgIoqRVEURVGUckBFlaIoiqIoSjmgokpRFEVRFKUcUFGlKIqiKIpSDqioUhRFURRFKQdUVCmKoiiKopQDKqoURVEURVHKARVViqIoiqLckMydO5du3brRrFkz+vXrx65duyp0PyqqFEVRFEW54Vi2bBmTJk3i2WefZfHixTRq1IihQ4cSGxtbYXtSUaUoiqIoyhWTkZXDuYR0MrJyrsn9Zs6cSf/+/enbty9BQUGMHz8eR0dHFi5ceE3uXxwVKqq6detGw4YNizzGjx8PQEZGBuPHj6d9+/a0bNmS5557jpiYGKtrREZGMmzYMFq0aEHHjh15//33yc7OtlqzadMmevfuTXBwMLfffjuLFi0qspfLhRBLshdFURRFqYwcOp/EC/PD6fa/1bwwP5xD55Ou6v0yMzPZu3cvnTp1yj9nNBrp1KkTO3bsuKr3vhQVKqoWLFjAunXr8h8zZ84EoEePHgBMnDiRVatWMXXqVObMmUNUVBQjRozIf31OTg7Dhw8nKyuLefPmMXnyZBYvXsy0adPy15w6dYrhw4fTvn17fv31Vx577DHeeOMN1q5dm7+mJCHEy+1FURRFUSojGVk5/N/KQyzffY7UzByW7z7H1JWHrmrE6sKFC+Tk5ODl5WV13svLq0IDHhUqqqpWrYqPj0/+Y9WqVdSqVYt27dqRlJTEwoULGTNmDB07diQ4OJiJEyeyY8cOwsPDAVi3bh0RERF8+OGHNG7cmLCwMJ5//nnmzp1LZmYmAPPmzSMgIIAxY8YQGBjIoEGDuPPOO5k1a1b+Pi4XQizJXhRFURSlMnIhNYs1B6Otzq0+GM2F1KwK2lHFcd3UVGVmZvLbb7/Rt29fDAYDe/bsISsryyq0FxgYiL+/f76QCQ8Pp0GDBnh7e+evCQ0NJTk5mYiIiPw1HTt2tLpXaGho/jVKEkIsyV4URVEUpTJSxdmOsIY+VuduaehDFWe7q3fPKlWwsbEpUpQeGxtrpQmuNdeNqFq5ciVJSUn07t0bgJiYGOzs7HB3d7da5+XlRXR0dP6awh+e+fhya5KTk0lPTy9RCLEke1EURVGUyoiDnQ0vdm9Az2Z+ONvb0LOZHy90b4CDnc1Vu6e9vT1NmzZlw4YN+edyc3PZsGEDLVu2vGr3vRy2FXb
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 640x480 with 1 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"has Bsmt\", s=15, data=df);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Fireplaces\n",
"\n",
"Bigger houses are more likely to have a fireplace. Thus, the variable *has Fireplace* might be an interesting predictor."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 14,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGwCAYAAACAZ5AeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOydZ3hU5daG75n0HtJIQughtAAJnRCKiIIoKnBAFEQRBSt+VuwComA9iHoEG6CiiDSlKigivffeCRBSSe/JfD8Wk8kkAUIIBMi6r2sus/e8s/eayTnMk7XW+yyDyWQyoSiKoiiKolwRxsoOQFEURVEU5WZARZWiKIqiKEoFoKJKURRFURSlAlBRpSiKoiiKUgGoqFIURVEURakAVFQpiqIoiqJUACqqFEVRFEVRKgDbyg6gKlFQUEBsbCwuLi4YDIbKDkdRFEVRlDJgMplIT0/Hz88Po/HC+SgVVdeQ2NhYunTpUtlhKIqiKIpSDlauXIm/v/8Fn1dRdQ1xcXEB5Jfi6upaydEoiqIoilIW0tLS6NKlS+H3+IVQUXUNMZf8XF1dVVQpiqIoyg3GpVp3tFFdURRFURSlAlBRpSiKoiiKUgGoqFIURVEURakAtKdKURRFUSqBgoICcnJyKjsMBbCzs8PGxuaKr6OiSlEURVGuMTk5ORw7doyCgoLKDkU5j6enJ/7+/lfkI6miSlEURVGuISaTiejoaGxsbKhZs+ZFzSSVq4/JZCIjI4PY2FgAAgICyn0tFVWKoiiKcg3Jy8sjIyODwMBAnJ2dKzscBXBycgLEpNvPz6/cpUCVx4qiKIpyDcnPzwfA3t6+kiNRimIWuLm5ueW+hooqRVEURakEdAbs9UVF/D5UVCmKoiiKolQAKqoURVEURVEqABVViqIoStWmIB9OroeNX8OeeZB6ttJCefDBB3n33Xcr7f7F6datG9OmTbvq9zl16hQNGzZk3759V/1eVxPd/acoiqJUbY6vgh/6gOm8Z1Sz+6D3f8HepXLjukY8+OCDbNy4scT5PXv2MHv27MKdccqlUVGlKIqiVG22z7AIKoBdv0DEUxDQovJiusYMGDCAkSNHWp2ztbXFy8vroq/Lzc3Fzs7uaoZ2Q6HlP0VRFKVqY+9ufWwwgrHycg4mk4kPPviAtm3b0rFjRz777DOr56dOnUrv3r0JCwujS5cujB49mvT09MLnT58+zeOPP06bNm0ICwvjzjvvZOXKlRe9p6OjI76+vlYPKFn+a9iwIT/99BOPP/44YWFhTJ48GYDly5fTp08fmjVrxq233srnn39OXl5eidc9+uijNG/enFtvvZWlS5deMJ78/Hxee+01unXrRvPmzenRowfTp08vsW727NnceeedhIaGEhkZydixYwufS0lJ4fXXX6d9+/a0bNmSIUOGsH///ot+DleKiipFURSlahM2EBzcLMedXgTvBmV7beY5OHcS8svvbVScefPm4ezszKxZs3jppZf44osvWLNmTeHzBoOB119/nYULFzJhwgTWr1/Phx9+WPj82LFjycnJ4ccff2TBggW8+OKLFWoy+vnnn3PbbbexYMEC+vXrx+bNmxk1ahRDhgxh8eLFjB07lrlz5xYKLjOffvopPXr04LfffqN37948//zzHDlypNR7FBQU4O/vz6effsqiRYt46qmn+O9//8vixYsL1/z000+MHTuWAQMGsGDBAv73v/9Rq1atwuefffZZEhIS+Prrr5k7dy5NmzbloYceIikpqcI+ixKYlGtGamqqKSQkxJSamlrZoSiKoihFid1vMu2eZzIdW2UyZaaU7TXHVptMn7UxmUZ7mkzznzKZEo+X6WWZmZmmvXv3mjIzM0s8N3jwYNP9999vda5fv36mDz/88ILXW7Jkialt27aFx3fddZfps88+K9t7OH/Ppk2bmsLCwgof48ePN5lMJtMtt9ximjp1auHakJAQ07vvvmv1+oceesg0efJkq3Pz5883dezY0ep1b731ltWa/v37m95++22TyWQyRUVFmUJCQkx79+69YJxjxowxPfPMM4XHkZGRpk8++aTUtZs2bTK1bNnSlJ2dbXW+e/fuppkzZ5b6mov9Xsr6/a09VYqiKIri21AeZSX5NPw6BNLj5XjbD+BdHyKfu+JQGja0jsPX15eEhITC47Vr1zJlyhSOHj1KWloa+fn5ZGdnk5mZiZOTE0OGDGH06NGsXr2aiIgIbr/9dho1anTRe/bu3ZvHH3+88NjNze2Ca0NDQ62O9+/fz9atW60yU8VjAggPD7d6XVhY2EV3+82YMYM5c+Zw5swZsrOzyc3NLXwfCQkJxMbG0qFDh1Jfe+DAATIyMmjXrp3V+aysLE6ePHnBe14pKqoURVEU5XJJi7EIKjOH/6oQUWVra/3VbDAYMJlMgFgPjBgxgvvvv5/nnnsODw8PtmzZwuuvv05ubi5OTk7079+fyMhI/vnnH9asWcNXX33FqFGjePDBBy94T1dXV2rXrl2m+IqXEjMyMnjmmWe4/fbbS6x1cHAo0zWLs2jRIt5//31GjRpFeHg4Li4ufPvtt+zYsaNM101PT8fX15cffvihxHMXE4xXiooqRVEURblcPILAoxYkF8l6NO591W+7Z88eTCYTr7zyCkajtEUvWbKkxLqAgADuv/9+7r//fj7++GNmzZp1UVF1JTRp0oRjx45dUpRt376de++9t/B4x44dNG7cuNS1W7duJTw8nEGDBhWeK5phcnV1pUaNGqxbt4727duXeH3Tpk2Jj4/HxsaGoKCgy3xH5Ucb1RVFURTlcnH1gwHToUEPcAuAW16HJvdc9dvWrl2b3NxcfvjhB6Kiopg/fz4zZ860WvPuu++yatUqoqKi2LNnDxs2bKB+/fpXLaannnqK3377jc8//5xDhw5x5MgRFi1axH//+1+rdUuXLmX27NkcO3aMSZMmsXPnTgYPHnzB97l7925WrVrFsWPHmDhxIrt27bJa88wzzzB16lS+//57jh8/zp49ewozUxEREYSFhfHUU0+xevVqTp06xdatW/nvf/9b4joViWaqFEVRFKU81GgJ930P2eng4n1NbtmoUSNeffVVvv76az755BNat27N888/z6hRowrXFBQUMHbsWM6ePYurqyudOnXi1VdfvWoxderUicmTJ/PFF1/w9ddfY2trS7169ejfv7/VumeeeYbFixczZswYfH19+fjjjwkODi71mgMHDmTfvn0899xzGAwG7rzzTh544AH+/fffwjV9+vQhOzubadOm8cEHH+Dp6UnPnj0BKZl+9dVXTJw4kVdffZVz587h4+ND69at8fHxuWqfhcFkLtQqV520tDRatWrFli1bcHV1rexwFEVRlEogKyuLY8eOUbduXRwdHSs7nGtCw4YN+eKLL+jevXtlh3JBLvZ7Kev3t5b/FEVRFEVRKgAVVYqiKIqiKBWA9lQpiqIoinJVOXDgQGWHcE3QTJWiKIqiKEoFoKJKURRFURSlAlBRpSiKoiiKUgGoqFIURVEURakAVFQpiqIoiqJUACqqFEVRFEVRKgAVVYqiKIqiVDozZsygW7duNGvWjP79+7Nz587KDumyUVGlKIqiKEqlsnjxYsaPH89TTz3FvHnzaNSoEcOGDSMhIaGyQ7ssVFQpiqIoimJFdm4+Z5OzyM7Nvyb3mzp1KgMGDKBfv34EBwczZswYHB0dmTNnzjW5f0VRqaKqW7duNGzYsMRjzJgxAGRnZzNmzBjatWtHeHg4zzzzDPHx8VbXOHPmDMOHD6dFixZ06NCB999/n7y8PKs1GzZsoE+fPoSGhnLbbbcxd+7cErFcKu1YllgURVEU5UbnYEwq/zdrO90+/of/m7WdgzGpV/V+OTk57Nmzh4iIiMJzRqORiIgItm3bdlXvXdFUqqiaPXs2q1evLnxMnToVgJ49ewLw3nvvsWLFCiZOnMgPP/xAbGwsTz/9dOHr8/PzGTFiBLm5ucycOZMJEyYwb948Jk2aVLgmKiqKESNG0K5dO3777Tceeugh3njjDVatWlW4pixpx0vFoiiKoig3Otm5+fx3+UGW7DpLRk4+S3a
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 640x480 with 1 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"has Fireplace\", s=15, data=df);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Garages\n",
"\n",
"Holding the overall living area fixed adding a garage seems to affect the price positively. Thus, *has Garage* seems like an interesting predictor as well."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 15,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGwCAYAAACAZ5AeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAD+uklEQVR4nOydeXiM5/rHPzPZIxtZRAQhEVsQtYaQFi3VquJHKV1USxfdqe6lddBWz3G0PaUbqlrVWlpbF62qXS2xb7GTkI3se+b3x53JZJIgSUOQ+3NdczXvO8+87zOTc8w39/K9DSaTyYSiKIqiKIryjzBW9QYURVEURVFuBlRUKYqiKIqiVAIqqhRFURRFUSoBFVWKoiiKoiiVgIoqRVEURVGUSkBFlaIoiqIoSiWgokpRFEVRFKUSsK3qDVQn8vPziY2NpUaNGhgMhqrejqIoiqIoZcBkMpGWloaPjw9G46XjUSqqriGxsbFERERU9TYURVEURakAa9euxdfX95LPq6i6htSoUQOQX4qLi0sV70ZRFEVRlLKQmppKRERE4ff4pVBRdQ0xp/xcXFxUVCmKoijKDcaVSne0UF1RFEVRFKUSUFGlKIqiKIpSCaioUhRFURRFqQS0pkpRFEVRbgDy8vLIycmp6m3clNjZ2WFjY/OPr6OiSlEURVGuY0wmE+fOnePixYtVvZWbGg8PD3x9ff+Rj6SKKkVRFEW5jjELKh8fH5ydndU8upIxmUykp6cTGxsLQJ06dSp8LRVViqIoinKdkpeXVyioPD09q3o7Ny1OTk6AmHT7+PhUOBWoheqKoiiKcp1irqFydnau4p3c/Jg/439St6aiSlEURVGuczTld/WpjM9YRZWiKIqiKEoloKJKURRFURSlElBRpSiKolRv8vPg1GbY+hnsWwIp56p6R2XigQce4F//+ldVb0Mpgnb/KYqiKNWbE+tgXn8w5ctxy/ug73/AvkbV7us65eTJk8ycOZNNmzYRHx9PzZo1adSoEQMHDqRPnz7Y2lZfaVF937miKIqiAETOtwgqgD3fQeenoE7rqtvTdcru3bt5+OGHady4MW+++SaNGjUCYO/evcyfP5/g4GCaNm1aoWtnZ2djb29fmdu95mj6T1EURane2LtZHxuMYLwxYg4mk4n33nuPDh060KVLFz788EOr52fPnk3fvn0JDQ0lIiKCCRMmkJaWVvj82bNnefzxx2nfvj2hoaHcddddrF279pL3evnllwkICODbb7+le/fuBAQEEBAQwN133823335LkyZNCte///779OrVi9atW9OjRw+mT59uZVfw4Ycf0q9fP77//nu6d+9Oq1atAPjrr78YOnQo7dq1o2PHjowePZpTp05Z7WXHjh3069ePli1bMmDAAFavXk2TJk04cOBA4ZrDhw/z6KOP0qZNGzp37sy4ceNITEys+IddBlRUKYqiKNWb0CHg4Go57joWPBuX7bUZF+DCKcirmpl8S5YswdnZmYULFzJu3Dg+/vhjNmzYUPi8wWDgtddeY/ny5UydOpXNmzfz/vvvFz7/9ttvk52dzddff82yZcsYO3bsJT2xDhw4wNGjRxk5ciRGY+nyoagtQY0aNZgyZQorVqzgtdde4/vvv2fOnDlW60+dOsUvv/zCRx99xNKlSwHIyMhgxIgRLFq0iDlz5mAwGHjqqafIz5doYmpqKk888QTBwcEsWbKEZ5991uo9ASQnJ/PQQw/RvHlzfvjhBz7//HMSEhJ47rnnyvrRVogbQ4oriqIoytXCvz08+jvEHoAaXuDbGmzLkIY6sQGWPw8JRyB0GHQbBzUbXP39FqFJkyaMGTMGgICAAL7++ms2bdpEly5dAHj44YcL1/r7+/Pcc8/x1ltvMWHCBACio6Pp1atXYYSpXr16l7zXiRMnAGjYsGHhuYSEBHr27Fl4PHbsWIYNGwbAk08+aXXv48ePs2LFCh577LHC8zk5Obz33nvUqlWr8FyvXr2s7jt58mTCwsKIiooiODiYZcuWATBp0iQcHBwICgoiNjaW119/vfA1X3/9Nc2bN+eFF16wuk5ERATHjx+3eg+ViYoqRVEURfFuIo+yknQWvn8Q0uLleOc88AyE8Oevzv4uQdF0G4C3tzcJCQmFxxs3bmTWrFkcO3aM1NRU8vLyyMrKIiMjAycnJx588EEmTJjA+vXr6dy5M3fccUe5aqI8PDwKI0wPPPCAVXpv5cqVfPXVV5w+fZr09HRyc3NxcXGxer2fn5+VoAIRbzNmzGDXrl1cuHABk8kEQExMDMHBwRw/fpwmTZrg4OBQ+JqWLVtaXePgwYNs2bKFNm3alNjzqVOnVFQpiqIoynVD6nmLoDIT9fs1F1XFO+0MBkOhCDlz5gyjR49m6NChPP/887i7u7N9+3Zee+01cnJycHJyYtCgQYSHh/Pnn3+yYcMGPv30U8aPH88DDzxQ4l4NGkgU7vjx4zRv3hwAGxubwvNF97Jz507Gjh3L008/TXh4OK6urqxYsYLZs2dbXdM8c68ojz/+OHXr1mXSpEn4+PiQn5/P3XffXa7xMenp6dx2222MHTu2xHPe3t5lvk550ZoqRVEURSkv7v7gXt/6XLO+VbOXS7Bv377C4vLQ0FAaNmxIbGxsiXV16tRh6NChfPTRR4wYMYKFCxeWer3mzZvTqFEjvvjii8L6pkuxc+dO/Pz8eOKJJ2jZsiUBAQFER0dfcc8XLlzg+PHjPPHEE4SFhREYGEhSUpLVmoYNG3L48GGys7MLz+3Zs8dqTYsWLThy5Ah169alQYMGVo+rOUdRRZWiKIqilBcXHxg8Fxr3Atc6cNtr0LxfVe/KigYNGpCTk8O8efM4ffo0S5cuZcGCBVZr/vWvf7Fu3TpOnz7Nvn372LJlC4GBgaVez2AwMGXKFI4fP87QoUP5/fffOXHiBFFRUXz77bckJiZiY2NTeO+YmBhWrFjBqVOn+Oqrr1i9evUV9+zu7o6HhwffffcdJ0+eZNOmTUydOtVqTd++fTGZTLzxxhscPXqUdevW8eWXXxbuEeD+++8nKSmJF154gd27d3Pq1CnWrVvHK6+8Ql5eXrk/y7KiokpRFEVRKkLdW+C+r+DxDRDxErj6VvWOrGjatCmvvPIKn332GXfffTfLli2zKtwGyM/P5+2336ZPnz48+uijBAQE8NZbb13ymqGhoSxevJiGDRvy9ttvc9dddzFkyBBWrFjBK6+8wtChQwHo0aMHDz30EG+//Tb9+vVj586dPPHEE1fcs9Fo5D//+Q/79u3j7rvvZsqUKbz00ktWa1xcXPjkk084cOAA/fr14z//+Q9PPfUUQKHPVe3atfn222/Jz89n5MiR9O3bl8mTJ+Pq6nrJzsXKwGAyJ1+Vq05qaipt27Zl+/btJYr1FEVRFKU4mZmZhd1qjo6OVb2d65affvqJV199lW3btlX4c7rcZ13W728tVFcURVEU5YZi6dKl+Pv7U7t2bQ4dOsS0adPo3bt3lQtPFVWKoiiKotxQxMXFMWPGDOLi4vD29qZ37948//y17bwsDRVViqIoiqLcUDz22GNWJqLXC1qoriiKoiiKUgmoqFIURVEURakEVFQpiqIoiqJUAiqqFEVRFEVRKgEVVYqiKIqiKJWAiipFURRFUZRKQEWVoiiKoijXJfPnz6d79+60bNmSQYMGsXv37qre0mVRUaUoiqIoynXHypUrmTJlCk899RRLliyhadOmjBw5koSEhKre2iVRUaUoiqIoyhXJysnjXFImWTl51+R+s2fPZvDgwQwcOJCgoCAmTpyIo6MjixYtuib3rwhVKqq6d+9OkyZNSjwmTpwIQFZWFhMnTqRjx460adOGp59+mvj4eKtrREdHM2rUKFq3bk1YWBjvvvsuubm5Vmu2bNlC//79CQkJ4fbbb2fx4sUl9nKlEGNZ9qIoiqIoNyOHz6fw3MJIun/wJ88tjOTw+ZSrer/s7Gz27dtH586dC88ZjUY6d+7Mzp07r+q9/wlVKqp++OEH1q9fX/iYPXs2AL179wZg8uTJrFmzhunTpzNv3jxiY2MZM2ZM4evz8vIYPXo0OTk
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 640x480 with 1 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"has Garage\", s=15, data=df);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Pools\n",
"\n",
"Unfortunately, almost no one in Ames, IA, has a pool. The predictor *has Pool* seems quite uninteresting."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 16,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGwCAYAAACAZ5AeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3iUVfbA8e/UzEwmvffeIITQIQRQRMEuYhdRRMXeu+uKyiqW3WV1/a1YwIYiimLFjgjSS+gtJCG910mZTPv9MTAwhBIgEsr5PI+PzjvvvO/NIDMn9557jsLhcDgQQgghhBDHRdndAxBCCCGEOB1IUCWEEEII0QUkqBJCCCGE6AISVAkhhBBCdAEJqoQQQgghuoAEVUIIIYQQXUCCKiGEEEKILqDu7gGcSex2O5WVlXh6eqJQKLp7OEIIIYToBIfDQXNzM8HBwSiVh56PkqDqBKqsrGTEiBHdPQwhhBBCHINFixYRGhp6yOclqDqBPD09AecfitFo7ObRCCGEEKIzTCYTI0aMcH2PH4oEVSfQ3iU/o9EoQZUQQghxijlS6o4kqgshhBBCdAEJqoQQQgghuoAEVUIIIYQQXUByqoQQQogzmM1mw2KxdPcwupVGo0GlUh33dSSoEkIIIc5ADoeD8vJy6uvru3soJwVfX19CQ0OPq46kBFVCCCHEGWhvQBUcHIzBYDhji1I7HA5aWlqorKwEICws7JivJUGVEEIIcYax2WyugCogIKC7h9Pt9Ho94CzSHRwcfMxLgZKoLoQQQpxh9uZQGQyGbh7JyWPve3E8+WUSVAkhhBBnqDN1ye9guuK9kKBKCCGEEKILSFAlhBBCCNEFJKgSQghxRrPZHawuqOWDZQV8t6GUysa27h5St7vhhhv4xz/+0d3D6HIpKSn88ssvf9n1ZfefEEKIM9ryvBpueHcFdofz8WWZEbxweToGrXxFnkg33HADK1euBECr1RIVFcX111/P9ddf380j6zyZqRJCCHFG+2xNsSugApifU0JeVXP3DegMdtVVV7FkyRK+//57zj//fJ577jm+/fbb7h5Wp0lQJYQQ4ozm5eFek0ipALVKdsU5HA5efvllBg4cyNChQ3n99dfdnp81axYXX3wxmZmZjBgxgilTptDcvC8YLSkp4fbbb2fAgAFkZmZy4YUXsmjRosPeU6fTERQURFRUFPfccw+xsbH89ttvAJSWlnLHHXfQp08f+vbty3333Ud1dbXb6z/++GNGjRpFeno6o0ePZv78+V3zZnSSBFVCCCHOaJf3jcTosW+p7+6zE4kPNHbuxa11UFcIttOvd96XX36JwWBg7ty5PPLII7zxxhv8+eefrucVCgVPPfUU3377LdOmTWP58uW88sorruefe+452tvb+eijj/jmm294+OGHj7ouloeHBxaLBbvdzp133klDQwMffvghs2bNoqioiAceeMB17s8//8wLL7zAxIkT+eabb7jmmmt48sknWb58+fG/GZ0kC8ZCCCHOaH2i/Zh/VxY7Kprw9/QgPdwHrboTcw4Ff8K3D0DNTsi8HoY/An4xf/2AT5CUlBTuvvtuAGJjY/noo49YtmwZQ4cOBeCmm25ynRsZGcn999/PM888w5QpUwDnzNLo0aNJSUkBICoqqtP3ttlsfPvtt2zfvp2rr76aZcuWsWPHDn799VdXG5mXX36ZCy+8kA0bNpCRkcG7777L2LFjXTlYcXFx5OTkMHPmTAYPHny8b0enSFAlhBDijJcY7EVisFfnX9BQAp9NgOY9y0/rPoSABMh+4PCvO4XsDYb2CgoKoqamxvV46dKlzJgxg7y8PEwmEzabDbPZTGtrK3q9ngkTJjBlyhSWLFlCVlYW5513HqmpqYe95yeffMLnn3+OxWJBqVRy0003ce211/LRRx8RGhrq1pcvMTERb29v8vLyyMjIIC8vj6uvvtrten379uWDDz7ognejc2T5TwghhDhapop9AdVeub92z1j+Imq1+7yLQqHA4XBm9BcXFzN58mRSUlJ4/fXX+eKLL/j73/8O7GvzcuWVV/LLL79w6aWXsmPHDq644go+/PDDw97z4osvZv78+fz666/k5OTwxBNPoFSeOqHKqTNSIYQQ4mThEwk+0e7H0i7unrF0g82bN+NwOHj88cfJzMwkLi6OysrKDueFhYVx7bXX8t///peJEycyd+7cw17XaDQSExNDSEiIWzCVkJBAeXk5ZWVlrmO5ubk0NjaSkJAAQHx8PGvXrnW73tq1a0lMTDyeH/WoSFAlhBBCHC1jMFz1PiSNBq8wOPsp6HFpd4/qhImJicFisfDhhx9SVFTE/PnzmTNnjts5//jHP1i8eDFFRUVs3ryZFStWuAKgo5WVlUVycjIPP/wwmzdvZsOGDTz66KMMHDiQXr16AXDLLbfw5Zdf8vHHH1NQUMCsWbP4+eefufnmm4/75+0syakSQgghjkVEX7j6AzA3g2dAd4/mhEpNTeWJJ57g7bff5l//+hf9+/fnwQcf5LHHHnOdY7fbee655ygvL8doNDJs2DCeeOKJY7qfQqHg//7v/3j++ecZP348CoWCYcOG8fTTT7vOGTVqFE8++SQzZ87khRdeICIighdeeIFBgwYd98/b6XE69i6Qir+cyWSiX79+rFmzBqOxk9t1hRBCiC7W1tZGfn4+cXFx6HS67h7OSeFw70lnv79l+U8IIYQQogtIUCWEEEII0QUkqBJCCCGE6AISVAkhhBBCdAEJqoQQQgghuoAEVUIIIYQQXUCCKiGEEEKILiBBlRBCCCFEF5CgSgghhBCiC0hQJYQQQohT3uzZsxk5ciS9evXiyiuvZMOGDSd8DBJUCSGEEOKU9v333/Piiy9y11138eWXX5KamsqkSZOoqak5oeOQoEoIIYQQXc5ssVHe0IbZYvvL7zVr1iyuuuoqxo0bR2JiIs8++yw6nY558+b95ffeX7cGVSNHjiQlJaXDP88++ywAZrOZZ599lkGDBtGnTx/uueceqqur3a5RWlrKbbfdRu/evRkyZAgvvfQSVqvV7ZwVK1YwduxY0tPTOffcc/niiy86jOVI04adGYsQQgghYEdFE/fPzWHkP3/n/rk57Kho+svu1d7ezubNm8nKynIdUyqVZGVlsW7dur/svgfTrUHV559/zpIlS1z/zJo1C4AxY8YA8MILL7Bw4UKmT5/Ohx9+SGVlJXfffbfr9TabjcmTJ2OxWJgzZw7Tpk3jyy+/5LXXXnOdU1RUxOTJkxk0aBBfffUVN954I3/7299YvHix65zOTBseaSxCCCGEcM5Q/fuXHSzYWE5Lu40FG8uZ/suOv2zGqq6uDpvNRkBAgNvxgICAEz750a1Blb+/P0FBQa5/Fi5cSHR0NAMHDqSpqYl58+bx+OOPM2TIENLT03nhhRdYt24dOTk5ACxZsoTc3FxeeeUV0tLSGDFiBPfddx+zZ8+mvb0dgDlz5hAZGcnjjz9OQkIC48ePZ/To0bz33nuucRxp2rAzYxFCCCEE1LVYWLS9yu3Y79urqGuxdNOITpyTJqeqvb2dr7/+mnHjxqFQKNi0aRMWi8VtOi8hIYHw8HBXIJOTk0NycjKBgYGuc7KzszGZTOTm5rrOGTJkiNu9srOzXdfozLRhZ8YihBBCCPAzaBiREuR27KyUIPwMmr/mfn5+qFSqDknpNTU1bvHBiXDSBFW//PILTU1NjB07FoDq6mo0Gg3e3t5u5wUEBFBVVeU658A3bO/jI51jMploa2vr1LRhZ8YihBBCCPDQqHhgVDIX9ArFoFVxQa9Q7h+VjIdG9ZfcT6vV0rNnT5YtW+Y6ZrfbWbZsGX369PlL7nko6hN6t8OYN28ew4cPJyQkpLuHIoQQQojjkBzixb+vyqSuxYKfQfOXBVR7TZw4kccee4z09HQyMjJ4//33aW1t5fLLL/9L73ugkyKoKikpYenSpbz++uuuY4GBgVgsFhobG91miGpqaggKCnKdc+Auvb2zS/ufc2CiWnV1NUajEZ1Oh1KpPOK0YWfGIoQQQoh9PDQqQn3+2mBqrwsuuIDa2lpee+01qqqqSEtL451
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 640x480 with 1 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"has Pool\", s=15, data=df);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Porch\n",
"\n",
"Most houses have a porch."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 17,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGwCAYAAACAZ5AeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOydd3RU9daGn5n0HtIIIdSEHiCRHgJBREUsiFwQBRVEwYJ4VRS7oFxAxfsh6hVsiIIiSlERUBFEeu+9EyCQRnqfzPfHzmQySYAQAgGyn7VmmXPmlN+Z3Mu82eXdBrPZbEZRFEVRFEW5LIxVvQBFURRFUZQbARVViqIoiqIolYCKKkVRFEVRlEpARZWiKIqiKEoloKJKURRFURSlElBRpSiKoiiKUgmoqFIURVEURakE7Kt6AdWJgoIC4uLicHNzw2AwVPVyFEVRFEUpB2azmYyMDAICAjAazx+PUlF1FYmLiyM6Orqql6EoiqIoSgVYsWIFgYGB531fRdVVxM3NDZBfiru7exWvRlEURVGU8pCenk50dHTR9/j5UFF1FbGk/Nzd3VVUKYqiKMp1xsVKd7RQXVEURVEUpRJQUaUoiqIoilIJqKhSFEVRFEWpBLSmSlEURVFuUEwmE3l5eVW9jGseBwcH7OzsLvs6KqoURVEU5QbDbDZz5swZkpOTq3op1w3e3t4EBgZelo+kiipFURRFucGwCKqAgABcXV3VcPoCmM1mMjMziYuLA6BWrVoVvpaKKkVRFEW5gTCZTEWCytfXt6qXc13g4uICiEl3QEBAhVOBWqiuKIqiKDcQlhoqV1fXKl7J9YXl87qcGjQVVYqiKIpyA6Ipv0ujMj4vFVWKoiiKoiiVgIoqRVEURVGUSkBFlaIoilK9KTDBiXWw4XPYPR/SzlT1iq4YDz30EP/5z3+qehlXnHnz5tG2bdurfl/t/lMURVGqN8dWwrd9wFwg2y3vh7v/DxzdqnZdNwgPPfQQGzZsAMDR0ZE6deowcOBABg4cWMUrq3w0UqUoiqJUb7bNsgoqgJ0/QOKhqlvPDUj//v1ZtWoVixYt4o477uDtt99m4cKFFbpWbm5uJa+u8lBRpSiKolRvHD1ttw1GMN64iRyz2cx7771H+/bt6dy5Mx999JHN+9OnT+fuu+8mPDyc6OhoxowZQ0ZGRtH7p06d4oknnqBdu3aEh4dz5513smLFigve09nZGX9/f+rUqcMzzzxD/fr1WbZsGQCnT5/mySefJCIigptuuolnn32WhISEonM/+ugjevfuzY8//kj37t1p1aoVAKmpqbz55ptERkbSsmVL7rrrLpYvX25z35UrV3LHHXcQERHB0KFDiww+rxQqqhRFUZTqTfgAcPKwbncZBb6Nyndu1jk4dwJM1898vfnz5+Pq6sqcOXN48cUX+eSTT1i9enXR+waDgddee42FCxcyceJE1q1bx/vvv1/0/ttvv01ubi4zZ87k119/ZdSoUZfsieXk5EReXh4FBQU89dRTpKSk8O233zJ9+nRiYmJ47rnnbI4/ceIEv//+Ox9//DELFiygoKCAxx9/nC1btvD++++zaNEiXnjhBYxGq6zJzs7mq6++4r333mPmzJnExsby7rvvVvBTKx83rhRXFEVRlPIQ3A4e+wvi9oKbHwS2BnvHi593bDUsfA4SD0L4QOj6ItSod+XXe5k0adKEESNGAFC/fn1mzpzJ2rVr6dy5MwCDBw8uOjY4OJh///vfvPXWW4wZMwaQyNLtt99OkyZNAKhTp065720ymVi4cCH79+/n/vvvZ+3atRw4cIC//vqraDzMe++9x5133smOHTuKolJ5eXm89957+Pj4ALBq1Sp27NjBokWLaNCgQZnryMvLY+zYsdStWxeAgQMH8r///e9SPqpLRkWVoiiKovg3kVd5STkFPz4MGYVpqq3fgm8IRD134fOuASxiyIK/vz+JiYlF22vWrGHatGkcOXKE9PR0TCYTOTk5ZGVl4eLiwsMPP8yYMWNYtWoVkZGR3HbbbTRt2vSC9/z+++/56aefyMvLw2g0MnjwYB544AFmzpxJYGCgzby90NBQPD09OXLkSJGoCgoKKhJUAHv37iUwMLBIUJWFi4tLkaACCAgIsHnOK4Gm/xRFURTlUkk/axVUFg79VTVruUTs7W3jKQaDAbPZDMDJkycZPnw4TZo04aOPPmLevHm8+eabgHV8S79+/Vi6dCm9e/fmwIED/Otf/+Lbb7+94D3vvvtuFixYwF9//cW2bdt45ZVXbFJ1F8Mym8+Cs7PzZT3nlUJFlaIoiqJcKl7B4FXXdl+zu6tmLZXI7t27MZvNvPzyy4SHh9OgQYMyi7tr1arFAw88wMcff8yQIUOYM2fOBa/r7u5OvXr1qFmzpo2YCgkJ4cyZM8TGxhbtO3ToEKmpqYSEhJz3ek2aNOHMmTMcPXq0Ak955VBRpSiKoiiXinsA9J8BjW4Hj1pw82vQvHdVr+qyqVevHnl5eXz77bfExMSwYMECZs+ebXPMf/7zH1auXElMTAy7d+9m/fr1FxRAFyIyMpLGjRszatQodu/ezY4dO3jppZdo3749LVu2PO957du3p23btowcOZLVq1cTExPDihUr+Oeffyq0jspCRZWiKIqiVITaN8H938ATqyH6JfAIrOoVXTZNmzbllVde4fPPP+euu+7i119/5fnnn7c5pqCggLfffptevXrx2GOPUb9+fd56660K3c9gMPC///0PT09PBg0axODBg6lTpw7/93//d9FzP/roI8LCwnj++ee58847mTRpEgUFBRc970piMF/pBKNSRHp6Om3atGHz5s24u7tX9XIURVGUG5Ds7GyOHj1KgwYNylV7pAgX+tzK+/2tkSpFURRFUZRKQEWVoiiKoihKJaCiSlEURVEUpRJQUaUoiqIoilIJqKhSFEVRFEWpBFRUKYqiKIqiVAIqqhRFURRFUSoBFVWKoiiKoiiVgIoqRVEURVGUSkBFlaIoiqIo1x2zZs2ie/futGzZkn79+rFjx46qXpKKKkVRFEVRri8WLVrEhAkTePrpp5k/fz5NmzZl6NChJCYmVum6VFQpiqIoinJZ5OSZOJOSTU6e6arcb/r06fTv35++ffsSGhrK2LFjcXZ2Zu7cuVfl/uejSkVV9+7dadKkSanX2LFjAcjJyWHs2LF06NCBiIgInnnmGRISEmyucfr0aYYNG0br1q3p1KkT7777Lvn5+TbHrF+/nj59+hAWFsatt97KvHnzSq3lYmHE8qxFURRFUaobB86m8e852+j+wd/8e842DpxNu6L3y83NZffu3URGRhbtMxqNREZGsnXr1it674tRpaLqp59+YtWqVUWv6dOnA9CzZ08Axo8fz/Lly5k8eTLffvstcXFxjBgxouh8k8nE8OHDycvLY/bs2UycOJH58+czZcqUomNiYmIYPnw4HTp04Oeff+aRRx7h9ddfZ+XKlUXHlCeMeLG1KIqiKEp1IyfPxP8tPcDinWfIzDWxeOcZJi89cEUjVufOncNkMuHr62uz39fXt8qDHVUqqnx8fPD39y96LV++nLp169K+fXvS0tKYO3cuL7/8Mp06dSIsLIzx48ezdetWtm3bBsCqVas4dOgQ77//Ps2aNSM6Oppnn32WWbNmkZubC8Ds2bMJDg7m5ZdfJiQkhEGDBnH77bfz9ddfF63jYmHE8qxFURRFUaob5zLzWLE/3mbf3/vjOZeZV0UrqlqumZqq3NxcfvnlF/r27YvBYGDXrl3k5eXZhPdCQkIICgoqEjLbtm2jcePG+Pn5FR0TFRVFeno6hw4dKjqmU6dONveKiooqukZ5wojlWYuiKIqiVDdquDoQ3cTfZl+3Jv7UcHW4cvesUQM7O7tSRemJiYk2eqAquGZE1dKlS0lLS6NPnz4AJCQk4ODggKenp81xvr6+xMfHFx1T8gO0bF/smPT0dLKzs8sVRizPWhRFURSluuHkYMdzPRrTq2Ugro529GoZyL97NMbJwe6K3dPR0ZEWLVqwdu3aon0FBQWsXbuWiIiIK3bf8mBfpXcvxty5c+natSs1a9as6qU
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 640x480 with 1 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"has Porch\", s=15, data=df);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Neighborhoods\n",
"\n",
"The instructors' notes say:\n",
"\n",
"> For instructors who cover nominal variables in their class, I would suggest incorporating the neighborhood variable into their models by converting it to a set of dummy (indicator) variables. I have found that the coefficients for the continuous variables tend to have values with more realistic interpretations when used in conjunction with the neighborhood variable.\n",
"\n",
"Indeed, plotting the price distributions by neighborhood reveals significant differences in the price level."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 18,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA4EAAAL/CAYAAADY9cLJAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeVhUZf8G8JsZWYRREEEB7TXQ3EASLRVErGwlzS3UQiv3BNPKXF5FkUKl0jIL1NwX1FxQMrHNFjRGTIUQTFNBs1gEEQSRbWZ+f/ib884IyDAMszD357q8ZM55OOcBBj33eZ7zfSwUCoUCREREREREZBZEhu4AERERERER6Q9DIBERERERkRlhCCQiIiIiIjIjDIFERERERERmhCGQiIiIiIjIjDAEEhERERERmRGGQCIiIiIiIjPCEEhERERERGRGGAKJiIiIiIjMCEMgEVEtFixYgG7duqFbt26Ii4szdHeMHr9fhhcXFyf8DBYsWKDXcyvP261bN50d86mnnhKO+c8//+jsuMagKb5fzRH/XSFqOi0M3QEiovtNmDABp06dqnO/ra0t2rRpg27dusHPzw8jRoxAq1at9NhDau7ufw+6urri+++/h5WVVb2f+/nnn+OLL74AAAQGBuLTTz9tsn4SERFpgyOBRGRyysrK8O+//+Knn35CZGQknnjiCRw6dMjQ3aJmLCcnB3v27DF0N4iIiHSCI4FEZNR69eoFb29v4bVCocDt27eRnp6Oq1evAgBKS0sxf/58VFRUYOzYsQbqKTV369evR1BQEFq2bGnorhARETUKQyARGbXBgwfjrbfeqnXfDz/8gP/+978oKSkBAERGRmLw4MFwcXFp9HmjoqIQFRXV6ONQ81FQUIAdO3Zg2rRphu5KrUaNGoVRo0YZuhtERGQCOB2UiEzWM888g5UrVwqvKysrsWvXLgP2iJqj3r17Cx9v2rQJpaWlhusMERGRDjAEEpFJe+KJJ9C9e3fhdVJSkgF7Q83RSy+9BHd3dwBAUVERNm/ebOAeERERNQ6ngxKRyfPx8cGFCxcAANevX1fbp1qpcebMmXjrrbdQXl6Ow4cP4+jRo8jMzERBQQGqqqpw6NAh9OjRA8C90uQHDx4EAKxYsUKjaXanT5/Gt99+i9OnTyMvLw+3b9+GtbU1OnTogJ49e2LQoEF4+umnYWNj88Dj5OTkIC4uDr/99hv+/vtvFBUVwdbWFm5ubvD19cWYMWOEUPIgVVVVSEhIwA8//IA///wThYWFqKiogLW1NZycnPCf//wH3t7eePLJJ9Weu9SVW7duYd++ffj+++/xzz//oKysDO3atYOvry9eeeUV9OzZs9bPO3/+PEaOHAkAaN26NU6cOAFra+t6z1daWopBgwahrKwMABAfH692g0BbIpEIs2bNwjvvvAMA2Lp1KyZMmIA2bdo0+thKt27dwsGDB3H8+HFkZmaisLAQ1tbWaNeuHfr3749Ro0ahV69eDzxGXFwc/vvf/wIARo4cWe905uLiYsTGxuLHH3/E9evXUV1djXbt2uGxxx7D2LFjhfeE6jIGFy9e1PhryszMxO7du3HixAnk5ubCwsICHTt2xODBgzFx4kQ4OjpqfCylK1euYM+ePfjtt9+Qm5sLAOjQoQOeeuopjB8/Hs7Ozhofq7CwEPv370diYiKuXr2KoqIi2NnZwdXVFb6+vhg9ejS6dOnywGP8888/GDJkiNCPn376CcC9fwsOHjyIM2fOID8/H6WlpXjttdewaNGiOo+ly+/XpUuXEBcXB6lUipycHNy5cwcODg5wd3fHoEGDEBQU1KD3b1VVFb7++mscO3YMf/75J27evIkWLVrA2dkZffv2xYsvvoiBAwdqfDwA+PHHH3HgwAGkp6fj1q1bcHR0ROfOnTF8+HAMHToULVrwEpWoKfE3jIhMnr29vfDxnTt3Htj2ypUrmD17Ni5duqSz8+fm5mLhwoX47bffauyrrq7GX3/9hb/++guHDh3Co48+ir1799Z6HLlcjs8//xybNm1CRUWF2r7i4mIUFxfjzz//xPbt2zFlyhS8/fbbsLCwqPVYWVlZCA0NxZUrV2rsKysrw99//42///4bJ06cQExMDL7//nt06tRJi6++dikpKZg1axZu3Lihtv369eu4fv06Dhw4gBkzZtT6vGfPnj3h6emJjIwM3L59G9999x1eeumles959OhRIQD26tVLJwFQ6YUXXsD69etx4cIF3LlzBxs2bMC8efN0cuzY2Fh8+umnwrOtSpWVlSgpKcGVK1ewe/dujBo1CkuXLtVomYr6nDx5EnPmzEFBQYHa9qtXr+Lq1auIi4tDaGgoZs6cqdXxd+/ejeXLl6OyslJt+8WLF3Hx4kXs3bsXGzdurDfYqtq7dy8++OCDGsdU/n7t3r0bK1asEELZg+zfvx9RUVE1vudFRUUoKirCn3/+iW3btmH8+PGYP38+xGKxRn2srKxEZGQkvvrqK42/LkB336/q6mpERUVh165dkMlkavvy8/ORn5+PU6dOYcOGDVi4cKFws+VB/vjjD7z33nv4+++/1bZXVFTgzp07uHr1Kg4cOICBAwdi5cqV9YbVO3fu4J133sGvv/6qtj0vLw95eXlISkrC3r178dlnn9XbNyLSHkMgEZm84uJi4WOJRFJnu6KiIkyZMgXZ2dmwtrZG37594ebmhrKyMvzxxx9anfvSpUuYOHEi8vPzhW1t27aFj48PHB0dUVFRgb///ht//vknysvLa4Q7JZlMhnfeeQffffedsK19+/bw9vaGo6Mj7ty5g7S0NPz999+orq7GunXrUFhYiA8++KDGsUpLSzFx4kTk5OQAuDeS1aNHD3Tu3Bm2trYoLy9HXl4eLly4gFu3bmn1dT9IdnY2oqKiUFxcDFtbWwwYMABOTk64ceMGkpOTcffuXchkMnzxxReQy+WYPXt2jWOMHTsWS5YsAXDvgl2TELh//37h46CgIN19QQAsLCwwe/ZszJgxA8C94PbGG2+gXbt2jTrusmXLsH37duF1mzZt0Lt3bzg7O6OiogJ//vkn/vrrLygUChw4cAA3btzAl19+CZFI+6c5UlNT8eabb+Lu3bvC19arVy906dIFVVVVOHfuHK5evYrPP/9cq9HOuLg4LF26FADg7u4OLy8v2NjYIDMzE2fPnoVCoUBRURFmzJiBo0eParTG57Fjx7B8+XIA934v+vbtC1tbW1y9ehVnz56FXC5HcXExZs+ejbVr12LQoEF1HmvTpk346KOPhNdWVlbo168fXF1dcfv2bSQnJ6OoqAgymQzbtm1DTk4O1qxZU+cNF1UrVqwQAmDXrl3RvXt3tGjRAlevXq3zZ6ar75dcLsdbb70ljEYCgIODA/r16wd7e3vk5OQgOTkZVVVVuH37NhYsWIDbt2/j9ddfr/Pr+f333zF16lS194q3tzc6d+6Mqqoq/PHHH0I4/O233/DKK69g9+7ddQbBqqoqTJ8+Hb///ruwzdnZGY899hjs7Ozw999/48yZMzhz5gxmzpyJjh071tk3ImochkAiMnlnz54VPn7QRcOePXtQXV2N5557DkuXLlW7UJHL5TXunNentLQUM2fOFAJgmzZtEBYWhhdffLHGBWNZWRmOHTtW5zOLX3zxhRAAnZ2dsWTJEjzzzDM1jnP06FEsXrwYJSUl2Lt3L3x9fREYGKjW5sCBA0IA7NKlCz7//HN4eHjUOKdCocC5c+cQFxenk9ElpXXr1qGqqgrDhg3D0qVL1YJ5cXExwsLC8P333wttBw0ahD59+qgdY+jQoYiKikJZWRlOnTqF69ev46GHHqrznJcvX0ZqaioAwNbWFi+++KLOvh6lp556Co8++ij++OMPlJeXY926dUJQ1cb+/fuFACiRSLBgwQKMGDEClpaWau1OnjyJefPmIS8vD8ePH8emTZswdepUrc5ZUVGB+fPnCxf1HTt2xKefflpjOnBCQgIWLlyoVYXc8PBwODo64sMPP0RAQIDavt9//x1vvvkmSktLkZ+fj23btmk02vjxxx9DJBJh3rx5eP3119UC1eXLl/H222/j0qV
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 1000x800 with 1 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"_, ax = plt.subplots(figsize=(10, 8))\n",
"sns.boxplot(x=\"Neighborhood\", y=\"SalePrice\", data=df, ax=ax)\n",
"ax.set_title(\"Prices by Neighborhood\", fontsize=24)\n",
"ax.set_xlabel(\"Neighborhood\", fontsize=18)\n",
"ax.tick_params(axis='x', labelrotation = 45)\n",
2021-05-25 08:22:14 +02:00
"ax.set_ylabel(\"House Price\", fontsize=18);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The 28 neighborhoods are encoded as factor variables."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 19,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"neighborhood = pd.get_dummies(df[\"Neighborhood\"], prefix=\"nhood\", dtype=int)\n",
2021-05-25 08:22:14 +02:00
"df = pd.concat([df, neighborhood], axis=1)\n",
"del df[\"Neighborhood\"]"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 20,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"new_variables.extend(neighborhood.columns)"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 21,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(2898, 28)"
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 21,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[neighborhood.columns].shape"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 22,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>nhood_Blmngtn</th>\n",
" <th>nhood_Blueste</th>\n",
" <th>nhood_BrDale</th>\n",
" <th>nhood_BrkSide</th>\n",
" <th>nhood_ClearCr</th>\n",
" <th>nhood_CollgCr</th>\n",
" <th>nhood_Crawfor</th>\n",
" <th>nhood_Edwards</th>\n",
" <th>nhood_Gilbert</th>\n",
" <th>nhood_Greens</th>\n",
" <th>nhood_GrnHill</th>\n",
" <th>nhood_IDOTRR</th>\n",
" <th>nhood_Landmrk</th>\n",
" <th>nhood_MeadowV</th>\n",
" <th>nhood_Mitchel</th>\n",
" <th>nhood_Names</th>\n",
" <th>nhood_NoRidge</th>\n",
" <th>nhood_NPkVill</th>\n",
" <th>nhood_NridgHt</th>\n",
" <th>nhood_NWAmes</th>\n",
" <th>nhood_OldTown</th>\n",
" <th>nhood_SWISU</th>\n",
" <th>nhood_Sawyer</th>\n",
" <th>nhood_SawyerW</th>\n",
" <th>nhood_Somerst</th>\n",
" <th>nhood_StoneBr</th>\n",
" <th>nhood_Timber</th>\n",
" <th>nhood_Veenker</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" nhood_Blmngtn nhood_Blueste nhood_BrDale nhood_BrkSide \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_ClearCr nhood_CollgCr nhood_Crawfor nhood_Edwards \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_Gilbert nhood_Greens nhood_GrnHill nhood_IDOTRR \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 1 0 0 0 \n",
"\n",
" nhood_Landmrk nhood_MeadowV nhood_Mitchel nhood_Names \\\n",
"Order PID \n",
"1 526301100 0 0 0 1 \n",
"2 526350040 0 0 0 1 \n",
"3 526351010 0 0 0 1 \n",
"4 526353030 0 0 0 1 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_NoRidge nhood_NPkVill nhood_NridgHt nhood_NWAmes \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_OldTown nhood_SWISU nhood_Sawyer nhood_SawyerW \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_Somerst nhood_StoneBr nhood_Timber nhood_Veenker \n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 "
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 22,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[neighborhood.columns].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Nominal Features\n",
"\n",
"This section investigates the rest of the nominal variables with regard to which realizations / encoding might be a useful predictor."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 23,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Alley Type of alley access to property\n",
"Bldg Type Type of dwelling\n",
"Central Air Central air conditioning\n",
"Condition 1 Proximity to various conditions\n",
"Condition 2 Proximity to various conditions (if more than one is present)\n",
"Exterior 1st Exterior covering on house\n",
"Exterior 2nd Exterior covering on house (if more than one material)\n",
"Foundation Type of foundation\n",
"Garage Type Garage location\n",
"Heating Type of heating\n",
"House Style Style of dwelling\n",
"Land Contour Flatness of the property\n",
"Lot Config Lot configuration\n",
"MS SubClass Identifies the type of dwelling involved in the sale.\n",
"MS Zoning Identifies the general zoning classification of the sale.\n",
"Mas Vnr Type Masonry veneer type\n",
"Misc Feature Miscellaneous feature not covered in other categories\n",
"Roof Matl Roof material\n",
"Roof Style Type of roof\n",
"Sale Condition Condition of sale\n",
"Sale Type Type of sale\n",
"Street Type of road access to property\n"
]
}
],
"source": [
"print_column_list(set(NOMINAL_VARIABLES) - set([\"Neighborhood\"]))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Alleys\n",
"\n",
"Almost no house has access to an alley."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 24,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGwCAYAAACAZ5AeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hU1dbA4d/MZFInvTdSSYEQQm8BpAmioIAgCqLIFUQpNgSxgaLgtVzEBjZEQRGpghRFAUE6JPQQQgIEQnrvk8x8f8zHwTHUEAhlvc/Dc5l99tlnTbhm1uyzz9oqo9FoRAghhBBCXBN1fQcghBBCCHE7kKRKCCGEEKIOSFIlhBBCCFEHJKkSQgghhKgDklQJIYQQQtQBSaqEEEIIIeqAJFVCCCGEEHXAor4DuJMYDAYyMzOxs7NDpVLVdzhCCCGEuAJGo5GSkhI8PDxQqy8+HyVJ1Q2UmZlJ586d6zsMIYQQQtTCpk2b8PLyuuhxSapuIDs7O8D0j6LT6eo5GiGEEEJcieLiYjp37qx8jl+MJFU30LlbfjqdTpIqIYQQ4hZzuaU7slBdCCGEEKIOSFIlhBBCCFEHJKkSQgghhKgDsqZKCCGEuA1UV1ej1+vrO4xbklarRaPRXPM4klQJIYQQtzCj0Uh6ejr5+fn1HcotzcnJCS8vr2uqIylJlRBCCHELO5dQeXh4YGtrK8Wlr5LRaKS0tJTMzEwAvL29az2WJFVCCCHELaq6ulpJqFxdXes7nFuWjY0NYCrS7eHhUetbgbJQXQghhLhFnVtDZWtrW8+R3PrO/QyvZV2aJFVCCCHELU5u+V27uvgZSlIlhBBCCFEHJKkSQgghhKgDklQJIYS4o1UbqonLjGNhwkLWnVhHVmlWfYd0S9qxYwfh4eEUFhYCsHTpUlq2bFnPUd1Y8vSfEEKIO9qu9F2MWj8Kg9EAwH1B9/F6+9exsbCp58huTnFxcTzyyCN07NiRL774or7DuanITJUQQog72oqkFUpCBbAqZRUnCk7UX0A3ucWLFzN06FB27dpFRkZGfYdzU5GkSgghxB3NztLO7LVapUajvvYtS25HJSUlrF69mocffpi77rqLZcuWXdX569evp1+/fjRp0oRu3brxySefUFVVBcDLL7/MqFGjzPrr9XratWvHzz//XGfv4XqSpEoIIcQdrU9IH+y05xOrJ5s8SZBD0BWdW1BRwJmiM+gNd8aee2vWrCE4OJjg4GD69u3LkiVLMBqNV3Tu7t27mThxIsOGDWP16tW8+eabLF26lNmzZwMwcOBANm/erFQ2B9i4cSPl5eX07t37uryfuiZJlRBCiDtaU/em/ND7Bz7o/AHf9PyG4VHD0Wq0lz1vd/puhq0eRu9lvZm2bRpnis/cgGjr1+LFi+nbty8AHTt2pKioiJ07d17RuZ988gkjR46kX79++Pv706FDB8aPH8/ChQsBaN68OUFBQaxYsUI5Z8mSJfTq1Qs7O7uLDXtTkYXqQggh7njBTsEEOwVfcf+Mkgxe2PQCueW5ACxNWkoDhwaMaDLieoVY75KTkzlw4ACffvopABYWFvTu3ZvFixfTpk2by56fkJDA3r17lZkpMG2zU1FRQVlZGTY2NgwcOJCffvqJJ598kuzsbDZv3sy8efOu23uqa5JUCSGEEFcpqyxLSajO+Tvt79s6qVq8eDFVVVV07NhRaTMajVhaWvL6669f9vzS0lLGjh3L3XffXeOYlZUVAPfffz/vv/8+cXFxxMXF4efnd0uVZZCkSgghhLhK3rbe+Nj5kFaSprR1b9C9HiO6vqqqqlixYgWTJk2iQ4cOZseeeeYZVq1aRXDwpWf6GjVqREpKCgEBARft4+zsTPfu3Vm6dCnx8fH079+/TuK/USSpEkIIIa6Sq60rH971IZ/Ff0ZCXgIDwwbSPeD2Tao2btxIQUEBDz74IPb29mbH7r77bhYvXsxLL710yTGeeeYZnnrqKXx8fOjZsydqtZqEhAQSExN57rnnlH4DBw5k1KhRGAwGHnjggevxdq4bSaqEEEKIWmjs1pgP7/qQ0qpSnK2d6zuc62rx4sW0b9++RkIF0LNnT7766iuOHj16yTE6duzI7Nmz+fTTT/nyyy+xsLAgODiYgQMHmvVr3749Hh4ehIaG4unpWafv43pTGa/0WUhxzYqLi2nRogV79uxBp9PVdzhCCCFuceXl5aSkpBAUFIS1tXV9h1MnSkpK6NSpE9OnT7/g+qvr5VI/yyv9/JaZKiGEEELUO4PBQF5eHt988w0ODg507dq1vkO6apJUCSGEEKLepaWl0a1bN7y8vJgxYwYWFrdeinLrRSyEEEKI246fn99l12Xd7KSiuhBCCCFEHZCkSgghhBCiDkhSJYQQQghRBySpEkIIIYSoA5JUCSGEEELUAUmqhBBCCHHbCQ8PZ/369Tf0mlJSQQghhBD1Iisrizlz5rBp0ybS09Oxt7enQYMG9O3bl379+mFjY1PfIV4VSaqEEEIIccOlpqby8MMPY29vz3PPPUd4eDiWlpYcPXqURYsW4enpSbdu3Wqcp9fr0Wq19RDx5UlSJYQQQggq9NXklepxttVipdVc9+tNmTIFjUbDkiVLsLW1Vdr9/f3p3r0757YmDg8P54033uCvv/5i+/btDB8+nCVLlvDUU0/xyCOPKOcdPnyY/v3788cff+Dr63vd47+Qel1T1bVrV8LDw2v8mTp1KgAVFRVMnTqVNm3a0KxZM8aOHUt2drbZGGlpaYwcOZKmTZvSrl073n33Xaqqqsz67Nixg379+hEVFUWPHj1YunRpjVgWLFhA165dadKkCQMHDmT//v1mx68kFiGEEOJWlJhRxLOL4un6wUaeXRRPYkbRdb1eXl4ef//9N0OGDDFLqP5JpVIpf//kk0/o0aMHK1euZODAgdx7772sWrXKrP/KlStp3rx5vSVUUM9J1eLFi9myZYvyZ+7cuQD06tULgHfeeYcNGzYwc+ZMvv/+ezIzMxkzZoxyfnV1NaNGjUKv17Nw4UJmzJjBsmXLmDVrltInNTWVUaNG0aZNG1asWMFjjz3Gq6++yubNm5U+q1evZvr06TzzzDMsW7aMiIgIRowYQU5OjtLncrEIIYQQt6IKfTX/W5/ImgPplFZWs+ZAOjPXJ1Khr75u1zx16hRGo5GgoCCz9nMTF82aNeO9995T2u+77z4GDBiAv78/Pj4+9O3bl71795KWlgaYNmP+9ddf6dOnz3WL+UrUa1Ll4uKCu7u78mfDhg00aNCA1q1bU1RUxJIlS5g0aRLt2rUjKiqKd955h7i4OOLj4wHYsmULSUlJvPfee0RGRtK5c2fGjx/PggULqKysBGDhwoX4+fkxadIkQkJCGDp0KD179uTbb79V4pg7dy6DBg1iwIABhIaGMnXqVKytrVmyZAnAFcUihBBC3IrySvVsOppl1rbxaBZ5pfobHsvixYtZvnw5oaGhyuc4QFRUlFm/yMhIQkJClNmqnTt3kpubq0zK1JebpqRCZWUlv/zyCwMGDEClUnHw4EH0ej3t27dX+oSEhODj46MkMvHx8YSFheHm5qb0iY2Npbi4mKSkJKVPu3btzK4VGxurjFFZWcmhQ4fMrqNWq2nfvj1xcXEAVxSLEEIIcStyttXSOdzdrO2ucHecba/fYvAGDRqgUqlISUkxa/f39ycgIABra2uz9gvdIuzTpw8rV64EYNWqVcTGxuLs7HzdYr4SN01StX79eoqKiujXrx8A2dnZaLVaHBwczPq5urqSlZWl9PlnQgUory/Xp7i4mPLycvLy8qiursbV1bXGdc6tmbqSWIQQQohbkZVWw3Pdw+jdxAtbSw29m3jxbPew67pY3dnZmQ4dOjB//nxKS0trNcZ9993HsWPHOHjwIOvWraNv3751HOXVu2me/luyZAmdOnXC09OzvkMRQggh7ihhnvb8b1DMDX3674033uDhhx9mwIABjB07lvDwcFQqFQcOHCA5OZnGjRtf8nw/Pz+aNWvGK6+8QnV1NV27dr3uMV/OTZFUnTlzhq1bt/Lxxx8rbW5ubuj1egoLC81miHJycnB
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 640x480 with 1 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Alley\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 25,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"Alley\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Building Type\n",
"\n",
"The type of a building clearly affects the valuation. The two types of townhouses as well as the 2-family condo and duplex type are summarized into a single category. This makes sense a) semantically, and b) by looking at the two sub-clusters in the scatter plot."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 26,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGwCAYAAACAZ5AeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hT5dvA8W+SJk3bdO896YBSWkaBsgQHQ1GGA2UoooAyXKi4RVFw/hD1FRQFRARRhhM3IAiUWfYqHXTvla7M94/AgVBGKaVFeD7XxSU55znn3KG2ufuM+5GZzWYzgiAIgiAIwmWRt3YAgiAIgiAI1wKRVAmCIAiCIDQDkVQJgiAIgiA0A5FUCYIgCIIgNAORVAmCIAiCIDQDkVQJgiAIgiA0A5FUCYIgCIIgNAOb1g7gemIymSgsLMTBwQGZTNba4QiCIAiC0Ahms5nq6mq8vLyQy8/fHyWSqhZUWFhInz59WjsMQRAEQRCaYMOGDfj4+Jz3vEiqWpCDgwNg+aJoNJpWjkYQBEEQhMbQarX06dNH+hw/H5FUtaBTQ34ajUYkVYIgCILwH3OxqTtiorogCIIgCEIzEEmVIAiCIAhCMxBJlSAIgiAIQjMQc6oEQRAE4QozmUzodLrWDkM4D6VSiUKhuOz7iKRKEARBEK4gnU5Heno6JpOptUMRLsDFxQUfH5/LqiMpkipBEARBuELMZjN5eXkoFAoCAwMvWDhSaB1ms5mamhoKCwsB8PX1bfK9RFIlCIIgCFeIwWCgpqYGPz8/7O3tWzsc4Tzs7OwAS5FuLy+vJg8FipRZEARBEK4Qo9EIgEqlauVIhIs5lfTq9fom30MkVYIgCIJwhYn9Xq9+zfE1EkmVIAiCIAhCMxBJlSAIgiAIQjMQSZUgCIJwXTOazOzIKOXLLRn8vDeXwsq61g6pWWRnZxMVFcWhQ4fO2yY5OZmoqCgqKytbMLJrl1j9JwiCIFzXtqaVMPrzZExmy+sh8f68OSwWe9XV+xE5ffp0Vq9eLb12cXEhNjaWp59+mujo6BaLY9WqVTz33HMXbPPXX38REBDQQhG1rqv3/xhBEARBaAHf7syWEiqANSk5PNQrlFh/59YLqhF69erFrFmzACguLmbOnDlMnDiR9evXt1gMgwYNolevXtLrKVOm0KZNG6ZOnSodc3Nza7F4WpsY/hMEQRCua4621jWJ5DKwUVz9q/VUKhWenp54enoSExPDww8/TF5eHqWlpee9ZsOGDfTv35+4uDhGjx5NTk5OgzYrVqygT58+dOjQgUmTJrFw4UI6d+58zvup1WopBk9PT5RKpXTs33//5bbbbpPKSpzy6KOP8vTTTwPw4Ycfcscdd7B8+XLpmY899hhVVVVW13z77bcMHDiQ9u3bM2DAAJYuXXqp/1wtQiRVgiAIwnVtWMcANLanB24m940gzEPTqGvLa3RkldagN7buFjTV1dX88MMPBAcH4+Lics42eXl5TJ48mb59+7JmzRruuusu3nvvPas2O3fu5JVXXmHMmDGsWbOGpKQk5s2b16SYBgwYgNFo5K+//pKOlZSUsGHDBoYPHy4dO3HiBGvXrmXevHksWLCAQ4cO8eqrr0rnf/jhBz744AOeeOIJfvnlF5588knmzp1rNfx5tRDDf4IgCMJ1LSHIlTWTkjhaUIWbgy2xfs6obC7e55CcVsILq/eTVqzlzk4BTOnXhkC3lquavn79ehISEgCoqanB09OT+fPnn3crnGXLlhEUFMT06dMBCAsL4+jRo3z22WdSm6+++orevXszbtw4AEJDQ9m9e3eThhTVajW33XYbq1atYuDAgYAlQfL19aVr165Su/r6et5++228vb0BePHFF5kwYQLTp0/H09OTDz/8kOnTp3PLLbcAEBgYSGpqKt988w1Dhw695LiuJJFUCYIgCNe9CC9HIrwcG90+r6KWR5fuoqRaB8CKHdmEemh45IbwKxViA127dpV6dCoqKli2bBkPP/ww3377Lf7+/g3aHz9+nLi4OKtj8fHxVq/T09O56aabrI7FxcU1eZ7W3XffzZ133klBQQHe3t6sWrWKoUOHWhXa9PX1lRIqgISEBEwmE+np6Tg4OHDixAleeOEFXnrpJamNwWDA0bHxX6+WIpIqQRAEQbhEhZX1UkJ1yj9HC1s0qbKzsyM4OFh63a5dOzp37syKFSt44oknWiyOC2nbti3R0dGsWbOGHj16kJqayrBhwxp9fU1NDQCvv/46HTp0sDp3NW5OLZIqQRAEQbhE/i5qAlztyC6rlY71b+fbihFZtlmRyWTU19ef83x4eDh///231bE9e/ZYvQ4NDWX//v1Wx/bt23dZcd15550sXryYgoICkpKS8PW1/nfKy8uTerIAUlJSkMvlhIaG4uHhgZeXF1lZWdx+++2XFUdLuPrSPEEQBEG4ynk4qvm/kR3pF+WFt5MtT94cycD2Pi0ag06no6ioiKKiIo4fP87rr79OTU0Nffv2PWf7ESNGkJGRwVtvvUVaWho//vhjg8neo0aNYsOGDSxcuJCMjAyWL1/OP//8c1n74g0ePJiCggJWrFhhNUH9FFtbW6ZPn87hw4fZsWMHM2fOZODAgXh6egIwdepUPv30U7788kvS09M5cuQIK1euZOHChU2O6UoRPVWCIAiC0ARxAS58Mroj1fUG3BxsW/z5GzdupGfPngA4ODgQFhbGBx98YDUJ/Ex+fn58+OGHzJo1i6+++oq4uDieeOIJnn/+ealNp06dmDFjBh999BFz5syhZ8+ePPDAA5dVwsDR0ZFbbrmFDRs2NJivBRAUFMTNN9/Mww8/TEVFBTfccAOvvPKKdP6uu+5CrVbz+eef8/bbb2Nvb09kZCT3339/k2O6UmRms9l88WZCc9BqtXTq1ImdO3ei0TRuua4gCILw31VXV0d6ejqhoaGo1erWDqdJXnzxRdLS0vj666+bfI/777+fNm3a8OKLL1od//DDD/nzzz/5/vvvLzfMy3ahr1VjP7/F8J8gCIIgCJLPP/+cw4cPk5mZyZIlS1izZk2TSxdUVFTwxx9/sG3bNu67775mjvTqI4b/BEEQBEGQ7N27lwULFlBdXU1gYCAvvPACd911V5PuNXToUCoqKpg2bRphYWHNHOnVRyRVgiAIgiBIPvjgg2a719mrDc82ZcoUpkyZ0mzPa21i+E8QBEEQBKEZiKRKEARBEAShGYikShAEQRAEoRmIpEoQBEEQBKEZiKRKEARBEAShGYikShAEQRAEoRmIkgqCIAiCIDSwfft2Pv/8c/bv309RUREff/yx1TYzo0ePZtu2bQ2uO3DgADY212d6cX2+a0EQBEEQLqimpoaoqCiGDx/O5MmTz9nm7rvvZurUqVbHrteECkRSJQiCIAj/CfV6I2U1elztldgqFVf8eX369KFPnz4XbKNWq/H09Gxw/J133uHPP/8kPz8fDw8PBg8ezKRJk1AqlcDpPf9Gjx7NRx99REVFBXfccQcvvfQSX3zxBYsWLcJkMjFmzBgeeeSRK/L+roRWnVPVr18/oqKiGvyZMWMGAPX19cyYMYOuXbuSkJDAlClTKC4utrpHbm4u48ePp0OHDnTv3p233noLg8Fg1SY5OZmhQ4cSGxvLzTffzKpVqxrEsnTpUvr160f79u2566672Lt3r9X5xsQiCIIgCFfC0YIqHl+RQr/31vP4ihSOFlS1dkgX5ODgwKxZs/j555954YUX+Pbbb1m0aJFVmxMnTvDPP/+wYMEC3nvvPb777jvGjx9PQUEBS5YsYdq0acyZM4c9e/a0zptoglZNqr777js2bdok/Vm4cCEAAwYMAODNN99k3bp1zJkzhyVLllBYWGjVBWk0GpkwYQJ6vZ7ly5cze/ZsVq9ezdy5c6U2WVlZTJgwga5du/L9999z//338+KLL7Jx40apzS+//MKsWbOYNGkSq1evJjo6mnHjxlFSUiK1uVgsgiAIgnAl1OuN/O/Po6zdl0+NzsjaffnM+fMo9Xpja4fGsmXLSEhIkP7Mnj0bgEcffZSOHTsSEBBAv379ePDBB1m7dq3VtWazmTfffJO
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 640x480 with 1 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Bldg Type\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 27,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"# Unify the two townhouse types into one.\n",
"df[\"Bldg Type\"] = df[\"Bldg Type\"].apply(\n",
" lambda x: \"Twnhs\" if x in (\"TwnhsE\", \"TwnhsI\") else x\n",
")\n",
"# Unify the two kinds of 2-family homes.\n",
"df[\"Bldg Type\"] = df[\"Bldg Type\"].apply(\n",
" lambda x: \"2Fam\" if x in (\"2FmCon\", \"Duplx\") else x\n",
")"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 28,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"build_type = pd.get_dummies(df[\"Bldg Type\"], prefix=\"build_type\", dtype=int)\n",
2021-05-25 08:22:14 +02:00
"df = pd.concat([df, build_type], axis=1)\n",
"del df[\"Bldg Type\"]"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 29,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"new_variables.extend(build_type.columns)"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 30,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>build_type_1Fam</th>\n",
" <th>build_type_2Fam</th>\n",
" <th>build_type_Twnhs</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" build_type_1Fam build_type_2Fam build_type_Twnhs\n",
"Order PID \n",
"1 526301100 1 0 0\n",
"2 526350040 1 0 0\n",
"3 526351010 1 0 0\n",
"4 526353030 1 0 0\n",
"5 527105010 1 0 0"
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 30,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[build_type.columns].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Air Conditioning\n",
"\n",
"Air conditioning clearly increases the valuation (\"steeper\" regression line with respect to the overall living area)."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 31,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGwCAYAAACAZ5AeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOydZ3hU5daG75n0kEYaIYSaEFqA0EkIoBRFVKQIomBBjmBBLKBYj6AcQMXzcVCPYDmAiCLSlKaCAoL0EnrvPY30nsz3Y2UymSRAEgIBsu7rmovsPe/s/c5EmYdVnmUwmUwmFEVRFEVRlOvCWNEbUBRFURRFuRNQUaUoiqIoilIOqKhSFEVRFEUpB1RUKYqiKIqilAMqqhRFURRFUcoBFVWKoiiKoijlgIoqRVEURVGUcsC2ojdQmcjNzSUqKooqVapgMBgqejuKoiiKopQAk8lESkoKvr6+GI1XjkepqLqJREVF0blz54rehqIoiqIoZWDt2rX4+fld8XkVVTeRKlWqAPJLcXFxqeDdKIqiKIpSEpKTk+ncuXP+9/iVUFF1EzGn/FxcXFRUKYqiKMptxrVKd7RQXVEURVEUpRxQUaUoiqIoilIOqKhSFEVRFEUpB7SmSlEURVFuU3Jzc8nMzKzobdz22NnZYWNjc93XUVGlKIqiKLchmZmZnDhxgtzc3Ireyh2Bh4cHfn5+1+UjqaJKURRFUW4zTCYTFy5cwMbGhpo1a17VkFK5OiaTidTUVKKiogCoXr16ma+lokpRFEVRbjOys7NJTU3F398fZ2fnit7ObY+TkxMgJt2+vr5lTgWqtFUURVGU24ycnBwA7O3tK3gndw5mcZqVlVXma6ioUhRFUZTbFJ0jW36Ux2epokpRFEVRFKUcUFGlKIqiKIpSDqioUhRFUSo3uTlwehNs+Qr2LYKkixW9o0pLly5dmDlz5nVd4+zZszRo0IADBw6Uz6ZKgXb/KYqiKJWbk+tgdh8w5fk9NX0EHvw/sK9Ssfu6DqKjo5k2bRpr1qzh0qVLeHl50ahRI5588knCwsLK7T6PP/44DRs25O233y63a5aEixcv0q1bN+rUqcPSpUutnqtevTrr16+natWqN3VPoJEqRVEUpbITOcciqAD2/AixRytuP9fJ2bNn6du3L5s2beL1119nyZIlfP3117Rr145x48bd9P2YTCays7PL9ZoLFy6kR48eJCcns2vXLqvnbGxs8PHxwda2+LjRjdiPGRVViqIoSuXG3s362GAE4+2byBk3bhwGg4GffvqJe++9l7p161K/fn2GDBnCvHnz8tclJiby9ttv0759e1q2bMkTTzzBwYMH85//9NNPeeihh1i8eDFdunShVatWvPLKKyQnJwPwxhtvsGXLFr799lsaNGhAgwYNOHv2LJs3b6ZBgwasXbuWvn370rRpU7Zv387p06d57rnnCA8Pp0WLFvTr148NGzaU+v2ZTCYWLlzIQw89xAMPPMD8+fOtni+c/rvSfm4EKqoURVGUyk3oQHBwtRx3HA1e9Uv22rTLcPk05JTd26g8iY+PZ926dQwaNKhYU1A3N4uAfOmll4iNjeWrr75i4cKFNGnShCeffJL4+Pj8NadPn+aPP/5g2rRpTJ8+na1bt/LVV18B8Pbbb9OiRQsGDBjA+vXrWb9+vZUb+SeffMKoUaNYvnw5DRo0IDU1lc6dOzNz5kwWLVpEx44defbZZzl//nyp3uOmTZtIT08nPDycXr16sWzZMlJTU6/5usL7uRHcvlJcURRFUcqDgDbwjz8g6gBU8Qa/5mBbAlPNk3/D0lcg9giEDoJOr0HV2jd+v1fh9OnTmEwm6tWrd9V127ZtY/fu3WzcuDHfQHTMmDGsWrWK3377jUceeQSQqNDEiRNxcXEBoFevXmzcuJFXXnkFV1dX7OzscHR0xMfHp8g9Ro4cSYcOHfKPPTw8aNiwYf7xyy+/zKpVq/jzzz8ZPHhwid/j/Pnz6dmzJzY2NgQHB1OzZk1+/fVX+vbte9XXFd7PjUBFlaIoiqL4NJBHSUk4Bz89ASkxcrxzNngFQsQrN2Z/JcRkMpVo3aFDh0hNTaVdu3ZW59PT0zl9+nT+cY0aNfIFFYCvry+xsbElukfTpk2tjlNSUvjss89Ys2YN0dHR5OTkkJ6eXqpIVWJiIitXruT777/PP9erVy/mz59/TVFVeD83AhVViqIoilJaki9ZBJWZo39UuKiqXbs2BoOB48ePX3VdSkoKPj4+zJ49u8hzrq6WVGhxxd4lFW7meXpmPvzwQzZs2MCYMWOoVasWjo6OjBw5slRjYZYsWUJGRgYDBgyw2k9ubi4nTpygbt26Jd7PjUBFlaIoiqKUFvcAcK8FCZaoDo0erLj95OHh4UFERARz5szh8ccfL1JXlZiYiJubG02aNCEmJgYbGxsCAgLKfD87Oztyc3OvvRDYuXMnffr0oXv37oAIu3PnzpXqfgsWLODpp5+mT58+VufHjRvHggULGD16dKmuV95oobqiKIqilBYXXxgwC+rfC67V4e63ofFDFb0rAN577z1yc3Pp378/v/32GydPnuTYsWN8++23+bVS4eHhhIaG8sILL7B+/XrOnj3Ljh07+L//+z/27NlT4nvVqFGDXbt2cfbsWeLi4q4qsGrXrs3KlSs5cOAABw8eZNSoUSUWZAAHDhxg3759PPzwwwQHB1s97r//fhYvXnzDrBJKiooqRVEURSkLNVrCI9/Cs39D59fB1a+idwRAzZo1WbhwIe3atePDDz/kgQceYMiQIWzcuJGxY8cCMjz4yy+/pE2bNrz55pv06NGDV199lXPnzuHt7V3iez399NPY2Nhw//33ExYWdtX6qDfeeAM3NzcGDhzIs88+S8eOHWnSpEmJ7zV//nyCgoIIDAws8lz37t2JjY1l7dq1Jb7ejcBgKmlyVLlukpOTadWqFdu3b7cq/FMURVGU0pCenp5fQ+To6FjR27kjuNpnWtLvb41UKYqiKIqilAMqqhRFURRFUcoBFVWKoiiKoijlgIoqRVEURVGUckBFlaIoiqIoSjmgokpRFEVRFKUcUFGlKIqiKIpSDqioUhRFURRFKQdUVCmKoiiKopQDKqoURVEURblleeONN2jQoAFffvml1flVq1bRoEGDCtpV8aioUhRFURTllsbBwYGvvvqKhISEit7KVVFRpSiKoihKqcjIyuFiQjoZWTk35X7h4eF4e3szffr0m3K/slKhoqpLly40aNCgyGPcuHEAZGRkMG7cONq1a0eLFi148cUXiYmJsbrG+fPnGTZsGM2bNycsLIwPP/yQ7OxsqzWbN2+mT58+hISE0L17dxYuXFhkL3PmzKFLly40bdqU/v37s3v3bqvnS7IXRVEURbnTOXwpiZfnRdLlkzW8PC+Sw5eSbvg9jUYjr776Kt999x0XL1684fcrKxUqqubPn8/69evzHzNmzACgR48eAEyYMIHVq1czZcoUZs+eTVRUFCNGjMh/fU5ODsOHDycrK4u5c+cyadIkFi1axNSpU/PXnDlzhuHDh9OuXTt+/vlnnnzySd555x3WrVuXv2b58uVMnDiRF154gUWLFtGwYUOGDh1KbGxs/ppr7UVRFEVR7nQysnL4v1WHWbHnIqmZOazYc5Epqw7flIhV9+7dadSokdV3/K1GhYoqT09PfHx88h+rV6+mVq1atG3blqSkJBYsWMAbb7xBWFgYISEhTJgwgZ07dxIZGQnA+vXrOXr0KB9//DGNGjWic+fOvPTSS8yZM4fMzEwA5s6dS0BAAG+88QaBgYEMHjyYe++9l5kzZ+bvY8aMGQwYMIB+/foRFBTEuHHjcHR0ZMGCBQAl2ouiKIqi3OlcTs1i7aFoq3NrDkVzOTXrptx/9OjRLF68mGPHjt2U+5WWW6amKjMzk19++YV+/fphMBjYu3cvWVlZhIeH568JDAzE398/X8hERkYSHByMt7d3/pqIiAiSk5M5evRo/pqwsDCre0VERORfIzMzk3379lndx2g0Eh4ezs6dOwFKtBdFURRFudOp6mxH5wY+VufuauBDVWe7m3L/Nm3aEBE
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 640x480 with 1 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Central Air\", s=15, data=df);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Use a new variable name to cleary show that the variable's *dtype* is changed from *str* to *int*."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 32,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"df[\"air_cond\"] = df[\"Central Air\"].apply(lambda x: 1 if x == \"Y\" else 0).astype(int)\n",
"del df[\"Central Air\"]"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 33,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"new_variables.append(\"air_cond\")"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 34,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>air_cond</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" air_cond\n",
"Order PID \n",
"1 526301100 1\n",
"2 526350040 1\n",
"3 526351010 1\n",
"4 526353030 1\n",
"5 527105010 1"
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 34,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[[\"air_cond\"]].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### \"Proximity to various Conditions\"\n",
"\n",
"The columns *Condition 1* and *Condition 2* have the same realizations and can be regarded as \"tags\" given to a house indicating the nearby presence of a) a major street, b) a railroad, or c) a park.\n",
"\n",
"The default tag \"Norm\" (implying no \"condition\") is given to 86% of the houses (this realization should therefore not be regarded as a tag!).\n",
"\n",
"From the comparison of the grouped scatter plots below, it can be assumed that the proximity of a major street decreases the valuation (lower regression slope through the cloud of blue and orange dots). Therefore, a factor variable *major_street* is extracted indicating the proximity of an \"artery\" or \"feeder\" street.\n",
"\n",
"Further, a factor variable *railway* is extracted as a relatively high proportion of the houses has such a tag. From the plots, a railway seems to not affect the valuations strongly.\n",
"\n",
"Lastly, a factor variable *park* is extracted. From the plots, this does not seem to affect the valuation much."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"List the \"raw\" realizations:"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 35,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Feedr 174\n",
"Artery 97\n",
"RRAn 48\n",
"PosN 43\n",
"RRAe 29\n",
"PosA 24\n",
"RRNn 11\n",
"RRNe 6\n",
"Name: count, dtype: int64"
2021-05-25 08:22:14 +02:00
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 35,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(\n",
" (\n",
" df[\"Condition 1\"].value_counts() + df[\"Condition 2\"].value_counts()\n",
" )\n",
" .sort_values(ascending=False)[1:]\n",
")"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 36,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"# Condition 2 is only filled with anything other than \"Norm\"\n",
"# if Condition 1 already has such a tag.\n",
"assert not ((df[\"Condition 1\"] == \"Norm\") & (df[\"Condition 2\"] != \"Norm\")).any()"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 37,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"86"
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 37,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 86% of the houses actually have no tag.\n",
"round(100* (df[\"Condition 1\"] == \"Norm\").sum() / df.shape[0])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"From a simple scatter plot it is hard to see any significant impact by a predictor."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 38,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGwCAYAAACAZ5AeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3gUVdvA4d/uZlM3vTdSIQRCIKFJTGjSpApIk6KIgkhTUJqiICXYEUGaijSlN5HiS1N6DZ0AKRAgpPe6u9n9/tiPxTUBQggJwrmvi+tlzpyZeWZ9SZ49c+Y5Eq1Wq0UQBEEQBEF4LNLqDkAQBEEQBOFZIJIqQRAEQRCESiCSKkEQBEEQhEogkipBEARBEIRKIJIqQRAEQRCESiCSKkEQBEEQhEogkipBEARBEIRKYFTdATxPNBoNKSkpWFhYIJFIqjscQRAEQRDKQavVkp+fj5OTE1Lp/cejRFJVhVJSUmjRokV1hyEIgiAIQgX89ddfuLi43He/SKqqkIWFBaD7j6JQKKo5GkEQBEEQyiMvL48WLVrof4/fj0iqqtDdR34KhUIkVYIgCILwH/OwqTtiorogCIIgCEIlEEmVIAiCIAhCJRBJlSAIgiAIQiUQc6oEQRAE4Sml0WhQKpXVHcYzTy6XI5PJHvs8IqkSBEEQhKeQUqkkPj4ejUZT3aE8F2xsbHBxcXmsOpIiqRIEQRCEp4xWq+XOnTvIZDI8PT0fWHBSeDxarZaCggJSUlIAcHV1rfC5RFIlCIIgCE8ZtVpNQUEBbm5umJubV3c4zzwzMzNAV6Tbycmpwo8CReorCIIgCE+ZkpISAIyNjas5kufH3eRVpVJV+BwiqRIEQRCEp5RYJ7bqVMZnLZIqQRAEQRCESiCSKkEQBEEQhEogkipBEAThuVaiKSEqJYrV0avZdX0XqQWp1R3Sf8LAgQOZOXOmfrt169b88ssvDzzm+++/p1u3bk84suoj3v4TBEEQnmsnkk4wbPcwNFpdPajOPp35JOwTzIzMqjmyh0tNTWXhwoXs37+f5ORk7O3tCQwM5PXXX6dZs2ZVGsv69ev1b9EBBAQEMH/+fNq0aaNve/PNNxkwYMATj2XNmjVs27aNixcvkp+fz4kTJ7Cysnri1xUjVYIgCMJzbUvMFn1CBbAtfhvXs69XX0DldOvWLXr06MHRo0cZP348v//+Oz/++CNNmzZl2rRpVR6PnZ2dQVJVFgsLC2xtbZ94LIWFhURERPDOO+888Wv9k0iqBEEQhOeahbGFwbZUIkUmffwlS560adOmIZFIWLduHe3bt8fHx4eaNWsyePBg1q5dq++XmJjI8OHDCQkJITQ0lDFjxpCWlqbff/eR3ObNm2ndujUNGzbk/fffJy8vT9+noKCA8ePHExISQnh4OD///HOpeP75+K9169YAjBgxgoCAAP32vx//aTQa5s2bR/PmzQkKCqJbt278/fff+v23bt0iICCAP//8k4EDB1K/fn26du1KVFTUAz+bN954g6FDh1K/fv1H+EQfn0iqBEEQhOdaF78uWMjvJVZv13sbHyufch2bXZzN7dzbqDQVr21UEVlZWRw4cID+/fuXWRz07qMujUbDu+++S3Z2NitWrGDp0qXcvHmT999/36B/QkICe/bsYeHChSxatIgTJ06wZMkS/f4vvviCEydO8MMPP/DTTz9x/PhxLl68eN/41q9fD0BkZCQHDx7Ub//b8uXLWbp0KRMmTGDr1q2Eh4fz7rvvcv36dYN+3377LUOGDGHz5s14e3szbtw41Gp1uT6rqiTmVAmCIAjPtfqO9fm146/EZMVga2pLHfs6yGXyhx53Mukk049M53rudV7xe4Wh9YfirnCvgoh1SZBWq8XX1/eB/Y4cOcLVq1fZs2ePfvmVL774gk6dOnHu3DmCg4MB3VItkZGRKBQKALp27cqRI0d4//33yc/PZ/369Xz55Zf6eVqzZ8+mRYsW972unZ0doEvuHB0d79vvp59+4u2336ZTp04AfPjhhxw7doxly5bx6aef6vu9+eabtGzZEoDRo0fTqVMnbty4gZ+f3wPvv6qJkSpBEAThuedr40s773Y0dmlsMGp1P8n5yYz7axxxOXFotBo2xmxkZ/zOKohUR6vVlqtfbGwsLi4uBuvZ+fv7Y2VlRVxcnL7N3d1dn1ABODk5kZ6eDsDNmzdRqVQGj9JsbGzw8SnfaN795OXlkZKSQmhoqEF7aGgosbGxBm0BAQH6v99N0jIyMh7r+k+CSKoEQRAE4RGlFqaSUWT4S/1Q4qEqu76XlxcSicQgMXocRkalH1yVN3GrCnL5vZHDu5XPNRrN/bpXG5FUCYIgCMIjcjV3xc3CzaCtTY029+ld+WxsbAgPD2fVqlUUFBSU2p+TkwOAn58fSUlJ3LlzR78vJiaGnJyccj868/T0RC6Xc/bsWX1bdnZ2qXlP/yaXy/VrGJZFoVDg5OTE6dOnDdpPnz6Nv79/uWJ72oikShAEQRAekb25Pd+0/Ibm7s1xMndiRIMRtPGquqQK4NNPP0Wj0dCrVy927drF9evXiY2NZfny5fTp0weAsLAwatWqxQcffMDFixc5d+4c48ePp0mTJtSrV69c17GwsKBnz558+eWX+jlaEydOfOhaee7u7hw5coTU1FSys7PL7DNkyBCWLFnC9u3biYuL46uvviI6OppBgwY92ofxL6mpqVy+fJmEhAQArl69yuXLl8nKynqs8z6MmKguCIIgCBVQ16Eu37T8hgJ1AbamT7720r95enqyceNGFi5cyOeff05KSgp2dnbUrVuXqVOnArpHZT/88APTp09nwIABSCQSIiIimDJlyiNda/z48RQUFDB8+HAsLCwYPHiwQcmFskyYMIHZs2ezbt06nJ2d2bt3b6k+gwYNIi8vj9mzZ5ORkYGfnx8//PAD3t7ejxTfv61evZp58+bpt/v37w/o3kbs0aPHY537QSTap+mh6TMuLy+Phg0bcurUKYMJgYIgCILwT0VFRcTHx+Pj44OpqWl1h/NceNBnXt7f3+LxnyAIgiAIQiUQSZUgCIIgCEIlEEmVIAiCIAhCJRBJlSAIgiAIQiUQSZUgCIIgCEIlEEmVIAiCIAhCJRBJlSAIgiAIQiUQSZUgCIIgCEIlEEmVIAiCIAhCJRBJlSAIgiAIlSoqKorAwECGDh1arv7ff/893bp1e8JRPXkiqRIEQRAEoVKtX7+eAQMGcOLECZKTk+/bT6vVolarK+26SqWy0s5VESKpEgRBEIRnWLGqhKTsIopVJVVyvfz8fLZv306/fv1o2bIlmzZt0u87duwYAQEB/PXXX/To0YN69eqxdetW5s2bR3R0NAEBAQQEBLBx40YAcnJy+Oijj3jhhRcIDQ1l0KBBREdH6893d4Rr3bp1tG7dmuDgYDZv3kzTpk1LJVjvvvsuH3744RO992pNqlq3bq3/AP/5Z9q0aQAUFxczbdo0mjZtSkhICKNGjSItLc3gHImJiQwdOpT69evTrFkzPv/881JZ77Fjx+jevTtBQUG0bdtW/x/rn1atWkXr1q2pV68evXr14ty5cwb7yxOLIAiCIDxNribn8t7aM7T+ej/vrT3D1eTcJ37NHTt24Ovri6+vL127dmXDhg1otVqDPl9//TXjxo1j+/btvPjii7z55pvUrFmTgwcPcvDgQTp27AjAmDFjSE9PZ8mSJWzcuJG6devy+uuvk5WVpT9XQkICu3btYt68eWzevJkOHTpQUlLCnj179H3S09P566+/6Nmz5xO992pNqtavX6//AA8ePMjSpUsB6NChAwCzZs1i3759zJkzhxUrVpCSksLIkSP1x5eUlDBs2DBUKhWrV69m9uzZbNq0iblz5+r73Lx5k2HDhtG0aVO2bNnC66+/zscff8yBAwf0fbZv305kZCQjRoxg06ZN1K5dmyFDhpCenq7v87BYBEEQBOFpUqwq4dvdV9lxPokCZQk7zicxZ/fVJz5itX79erp27QpAREQEubm5HD9+3KDP6NGjefHFF6lRowbOzs6Ym5sjk8lwdHTE0dERU1NTTp48yblz55g7dy716tX
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 640x480 with 1 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Condition 1\", s=15, data=df);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"However, plotting the groups seperately reveals different slopes."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 39,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAB8wAAAXiCAYAAAB5hxKQAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeVyVZf7/8fc5cNhEQUBxrUQFNUXNyiVc0spqakZjxhaz1bKmsr6almZZ5qhppelUVppmYzkV2thU2uI+KrmmpoKG+4IIHGTnwDm/P/hxB8p6AA/L69nDR3Kd+77uz2HxOpz3fV2XyeFwOAQAAAAAAAAAAAAAQD1jdnUBAAAAAAAAAAAAAAC4AoE5AAAAAAAAAAAAAKBeIjAHAAAAAAAAAAAAANRLBOYAAAAAAAAAAAAAgHqJwBwAAAAAAAAAAAAAUC8RmAMAAAAAAAAAAAAA6iUCcwAAAAAAAAAAAABAvURgDgAAAAAAAAAAAAColwjMAQAAAAAAAAAAAAD1EoE5UI/NmzdPYWFhCgsLK/bxgQMHKiwsTC+++GKlrhMdHW1cJzo6ulJ9AQBQkzG2AgBw+TH+Avmq6nsdAIrDeAugLnN3dQFATZKTk6MffvhBGzZs0J49e5ScnKy0tDT5+vqqZcuW6tKliwYPHqxevXrJbOZ+k/oqPT1d+/fv1549e7Rnzx7t3btXp06dkiS1bNlSa9ascXGFVSM6OloPPPCA8fFtt92mOXPmlHrOiy++qBUrVkiSYmJiqrM8ALUEYyvKo76MrcX55ZdfNGLECOPjzz//XNdcc40LKwJQFzD+ojzqy/h78e+2ZZk+fbruuuuuaqwIQF3BeIvyYLyVvLy85O/vrw4dOujmm2/Wn//8Z3l4eJTaX0k3ZlgsFjVq1EghISG64YYbNGzYMAUGBpba14gRI/TLL79Iksxms1auXKn27duXePzJkyc1aNAgSdLTTz+tZ555ptT+UTcQmAP/3w8//KAZM2YYg1VhVqtVVqtVv/32m5YtW6arrrpKEyZM0IABAy5/oTVE4UGjvv0y+cQTTxgDbH2yatUqxcTElPhiBQAuxthaMYyt9W9slWTcaFbg66+/rpbAfODAgTp16pSGDh2qGTNmVHn/AGoOxt+KYfytn+MvAFQW423FMN7W7/E2KytLZ8+e1dmzZ7Vu3TotXrxY8+fPV6tWrSrcl81mU2JiohITE7Vt2zYtXrxYb731liIiIsp1vt1u17x58zR37twKXxt1G4E5IOndd98t8g/kDTfcoIEDB6pt27Zq1KiRUlJSdOTIEa1Zs0abN2/W0aNHNXv27Dr/Iqeq7m7r2bNnnZ1t7O/vr86dO2vnzp3KyMhwdTnVyuFwaN68efrnP//p6lIA1AKMrcVjbC1bfRpbs7KytHr1akmSj4+PMjIytGrVKk2aNKnMu+0BoDiMv8Vj/C1bfRl/7733Xt13332lHtOsWbPLVA2A2orxtniMt2Wrr+NtYmKiDh06pIULF+rs2bM6dOiQnnzySX399ddyc3Mrta/OnTtr+vTpxsfp6ek6duyYPv/8c+3evVtWq1XPPPOMVq5cqdatW5ervh9++EEHDx5Uhw4dnHuCqJMIzFHvRUVFGS9wAgMDNWfOHF1//fWXHNenTx8NHz5csbGxmj59upKSki53qagh7rjjDt19993q0qWLrrzySkn5s7bq8oucxo0bKzk5WT/++KP279+vTp06ubokADUYYysqqj6OrZL0448/Kj09XZI0adIkTZw4USkpKVqzZo1uvfVWF1cHoLZh/EVF1cfxNzAwUKGhoa4uA0AtxniLimK8zde7d2/ddddd+vOf/6xTp04pNjZWP/74Y5m/+/r4+FzSV/fu3fWXv/xFzz77rFavXq2MjAwtWrRIr7zySql9NWjQQDk5ObLZbHrnnXf0/vvvO/cEUSexcQbqtfj4eL3++uuS8v/h/fTTT4t9gVNYaGioFi5cqEceeeRylIga6O6779Ydd9xhvMCpD0aMGGHMdGO5GgClYWyFM+rj2CrlL78u5e/NFhkZqTZt2hRpB4DyYvyFM+rr+AsAzmK8hTMYb//g6+urJ5980vh48+bNTvdlMpn0/PPPV6gvf39//e1vf5OUvyLC3r17nb4+6h4Cc9RrixcvVmZmpiRp9OjRatu2bbnOM5vN+stf/lLi49u3b9e4ceM0cOBAdenSRddee62GDBmi2bNnl3o3YXR0tMLCwhQWFqbo6GhJ0nfffacHH3xQvXr1Unh4uAYPHqyZM2fKarWWWefZs2f12muvadCgQerSpYsiIiL0xBNPlHsgGjhwoMLCwvTiiy8WaQ8LCzP2nJGkCRMmGHUX/Jk3b16pz6s46enp+vDDD3X33Xfr+uuvV+fOndWvXz+NHj1aa9euLbXWESNGKCwsTCNGjJCU/wJ2+vTpuvnmmxUeHq6ePXvq0Ucf1fr168v13FFU8+bNdffdd0uS1q5dqz179lSqv6SkJM2ePVtDhgzRtddeqy5dumjgwIEaN26ctm/fXuq5F39f7tu3Ty+++KIGDhyozp07F9lj/eJjf/vtN40dO1b9+/dXeHi4br755mLv8t25c6dGjx6tAQMGqEuXLrrppps0a9YspaWlVep5A/UBY2vpGFtR4Ny5c9qyZYsk6c9//nOR/2/atKnMGSjz5s0zvgckKTU1Ve+++64xtoaFhWn58uXG17Fgb8UVK1Zc8r1V8DW+WEJCgmbPnq277rrL+P7p37+/nn322VK/50+ePGn0vXz5ckn5S9499thjioiIUKdOnTRixAglJSUZY3dZMwGk/Dc0Cvr97rvvyjweqE8Yf0vH+IvKSk1N1QcffKB77rlHvXr1UufOnY3vw1WrVsnhcFyWPtavX6/HHntMvXr1UteuXTV48GBNnz5d8fHxZZ67fPly4/v35MmTysnJ0eLFizVs2DD17Nnzku93AJdivC0d4y3Ko/B7t2fPnq1UX61bt5aPj0+F+nriiSfk6ekpyfmJYVX9s4eagSXZUW85HA6tWLFCUv4dgQV3FlWG3W7X1KlTtXTp0iLtOTk5OnDggA4cOKClS5fqnXfe0Q033FBmX+PGjdPKlSuLtB89elQLFy7UTz/9pKVLl6pJkybFnr99+3aNGjWqSMCXkJCgtWvXau3atXrmmWecfJbVY//+/Ro1apTOnTtXpD0+Pl6rV6/W6tWrdcstt+jNN980BrSS7NixQ0899ZSSk5ONtuzsbG3atEmbNm3S+PHj9eijj1bL86jLHn/8cX355ZfKysrSO++8o4ULFzrVz6ZNm/Tss89eEj6fOnVKp06d0sqVKzV8+HBNmjRJZnPp93V9/vnnmjp1qnJzc8u87tdff61JkybJZrMZbcePH9fixYu1bt06/etf/1KTJk20cOFCzZo1q8ibFSdOnNCCBQu0ZcsWffrpp2rQoEEFnzVQPzC2Mrai/L755hvl5eXJbDbrzjvvlCTdeeedmjt3rmw2m/773//qgQceKFdfR48e1SOPPGKE4lVh5cqVmjx58iXLBJ49e1arVq3SqlWr9Ne//lWvvfaa3N1L/rXS4XBo/Pjx+s9//nPJYwEBARo0aJBWrVql77//Xi+99FKp34sF4bu/v79uuukmJ58ZUPcw/jL+onpt2bJFzz333CVveBf+Puzfv79mz55d4u+KVdHH9OnTtXjx4iJtR48e1eLFi7Vy5Up9+OGH5X5OycnJevrpp3XgwIFynwPUd4y3jLeoGhaLxfh7ab9LlofJZDL2QC9vX8HBwbr77ru1ZMkSbdiwQbt27VL37t2drqGyP3uoOQjMUW8dOnTIGAR79OghX1/fSvf55ptvGi9wWrVqpccee0ydOnVSZmam1qxZo6VLlyo1NVWjRo3SV199pQ4dOpTY1zvvvKNdu3bppptu0pAhQ9SiRQudP39en332mdatW6djx45p+vTpevvtty859/Tp08YLHLPZrGHDhunWW2+Vr6+vYmJi9NFHH2nevHnq3LmzU8/zm2++0blz54wXCs8991yRuwSl/H1Kyis+Pl4PPfSQUlJSZDKZNHToUP3pT3+Sv7+
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 2000x1500 with 9 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"street = [\"Artery\", \"Feedr\"]\n",
"railway = [\"RRNn\", \"RRAn\", \"RRNe\", \"RRAe\"]\n",
"park = [\"PosA\", \"PosN\"]\n",
"plot = sns.lmplot(\n",
" x=\"Gr Liv Area\", y=\"SalePrice\", col=\"Condition 1\", hue=\"Condition 1\",\n",
" col_order=[\"Norm\"] + street + railway + park,\n",
" data=df, robust=True, col_wrap=4, ci=None, truncate=True, scatter_kws={\"s\": 15},\n",
")\n",
"# Adjust font sizes.\n",
"for ax in plot.axes:\n",
" ax.set_title(ax.get_title(), fontsize=20)\n",
" ax.set_xlabel(ax.get_xlabel(), fontsize=16)\n",
" ax.set_ylabel(ax.get_ylabel(), fontsize=16)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Extract factor variables *major_street*, *railway*, and *park*."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 40,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"df[\"major_street\"] = 0\n",
"df.loc[\n",
" df[\"Condition 1\"].isin(street) | df[\"Condition 2\"].isin(street),\n",
" \"major_street\",\n",
"] = 1"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 41,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"df[\"railway\"] = 0\n",
"df.loc[\n",
" df[\"Condition 1\"].isin(railway) | df[\"Condition 2\"].isin(railway),\n",
" \"railway\",\n",
"] = 1"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 42,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"df[\"park\"] = 0\n",
"df.loc[\n",
" df[\"Condition 1\"].isin(park) | df[\"Condition 2\"].isin(park),\n",
" \"park\",\n",
"] = 1"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 43,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"Condition 1\"]\n",
"del df[\"Condition 2\"]"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 44,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"new_variables.extend([\"major_street\", \"railway\", \"park\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Show summary of counts:"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 45,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"major_street 264\n",
"railway 94\n",
"park 60\n",
"dtype: int64"
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 45,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[[\"major_street\", \"railway\", \"park\"]].sum()"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 46,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>major_street</th>\n",
" <th>railway</th>\n",
" <th>park</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" major_street railway park\n",
"Order PID \n",
"1 526301100 0 0 0\n",
"2 526350040 1 0 0\n",
"3 526351010 0 0 0\n",
"4 526353030 0 0 0\n",
"5 527105010 0 0 0"
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 46,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[[\"major_street\", \"railway\", \"park\"]].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Exterior\n",
"\n",
"This dimensions tells the main material with which the houses are made of. The category is too diverse and the various grouped scatter plots did not reveal differing slopes. For simplicity, this variable is dropped.\n",
"\n",
"This variable actually also represents tags that could be associated with a house (possibly up to two different tags)."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 47,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGwCAYAAACAZ5AeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeZzN1f/A8dfnc9fZ99UwZjFjGIylZIwUSotkaVG2pND6DZGkIrKlvkrfX5ZK0UJZS1RUikjZJesYy1hm32fu+vn8/ri5XDMYzJjBeT4e88g9n/M559w7zb3ve1ZJVVUVQRAEQRAE4YrINd0AQRAEQRCE64EIqgRBEARBEKqACKoEQRAEQRCqgAiqBEEQBEEQqoAIqgRBEARBEKqACKoEQRAEQRCqgAiqBEEQBEEQqoC2phtwI1EUhczMTDw8PJAkqaabIwiCIAhCJaiqSklJCcHBwcjy+fujRFB1FWVmZtK+ffuaboYgCIIgCJfh119/JTQ09LzXRVB1FXl4eACOX4qnp2cNt0YQBEEQhMooLi6mffv2zs/x8xFB1VV0esjP09NTBFWCIAiCcI252NQdMVFdEARBEAShCoigShAEQRAEoQqIoEoQBEEQBKEKiDlVgiAIgnAZ7HY7Vqu1ppshVAGdTodGo7nickRQJQiCIAiXQFVVTp06RX5+fk03RahCvr6+hIaGXtE+kiKoEgRBEIRLcDqgCg4Oxt3dXWzmfI1TVZXS0lIyMzMBCAsLu+yyRFAlCIIgCJVkt9udAVVAQEBNN0eoIm5uboBjk+7g4ODLHgoUE9UFQRAEoZJOz6Fyd3ev4ZYIVe307/RK5smJoEoQBEEQLpEY8rv+VMXvVARVgiAIgiAIVUAEVYIgCIIgCFVABFWCIAjCDc2u2tmeu5uv0r5h9YlfyTLl1HSTalx8fDxr1qyp6WZcc8TqP0EQBOGGtiV7B09vGo2CAsA9dTrySpP/4KY1XpX6R40axdKlS8ulp6Sk8NFHH1WqjL59+9KwYUNeeeWVKmnT+vXr8fHxqZKyzpaZmcmUKVP4+++/OXLkCH379r2kNs+YMYM1a9awfPnyKm9bVRBBlSAIgnBD+yZ9tTOgAlh5/Cf6RPekoU/sVWtDu3btmDRpkkuaXq+/avWfZrFY0Ov1BAUFVUk5FaX7+fnx1FNP8cknn1xRHbWRGP4TBEEQbmieWtftEWRkNNKVH1lyKU4HMmf/nO4p2rRpE4mJiWzevNmZf86cObRp04bs7GxGjRrFn3/+ybx584iPjyc+Pp709HQA9u/fzxNPPEHz5s1JTk5mxIgR5ObmOsvp27cvb7zxBm+++SatW7dm4MCBQPnhv3379tGvXz+aNm1K69atefXVVykpKXFeHzVqFE8//TQffPABKSkp3HXXXRU+z4iICMaMGUO3bt3w8vKqMM+mTZt44IEHSEpKolWrVvTq1Yvjx4+zZMkS3n//ffbu3et8nkuWLLnMV7x6iJ4qQRAE4YZ2b0Qnvjv+EyW2UgAGNniE+p4RlbpXtaugqKCVqm2bhdatW9OvXz9GjhzJ8uXLOXbsGO+++y7vvvsugYGBvPLKKxw+fJgGDRrw/PPPA+Dv709hYSH9+/fnwQcf5OWXX8ZsNjNt2jReeOEF5s2b5yx/6dKlPPLII3z55ZcV1l9aWsrAgQNp3rw5ixYtIicnhzFjxjB+/HgmT57szLdx40Y8PT2ZO3fuZT9Xm83GM888w4MPPsg777yD1Wpl586dSJLEPffcw4EDB1i3bp2zjvMFZjVFBFWCIAjCDa2JXwKftn2PQ0WH8TX4kODTAJ2su+h9qsmOmmNDtapInjL4aJF0lzcAtHbtWpo3b+6SNnjwYIYMGQLACy+8wIYNG3j11Vc5cOAA3bt3p2PHjoAjsNDpdBiNRpdhu88++4xGjRoxbNgwZ9rEiRNp3749aWlpREVFAVC/fn1Gjhx53ratWLECi8XClClTnBtkvvbaawwZMoQXX3yRwMBAwLF55oQJE65o2LK4uJiioiJuv/126tWrB0BMTIzzuru7OxqN5oqHJ6uLCKoEQRCEG160Vz2ivepVOr9qU1DyrGD/93GxAjoFyefygqrWrVszduxYl7SzJ4rr9XqmTZtG165dCQ8P5+WXX75omXv37mXTpk3lgjWAo0ePOoOqxo0bX7Cc1NRU4uPjXXaRb9GiBYqikJaW5gyq4uLirngemK+vLz169GDgwIG0bduWNm3acPfddxMcHHxF5V4tIqgSBEEQhEul4AyonMrs4HN5H6tubm5ERkZeMM+2bdsAKCgooKCg4KJH5ZSWlnL77bfz4osvlrt2dk/P6XPvrlRVlTNp0iT69u3LunXrWLVqFdOnT2fu3LkkJSVVSfnVSUxUFwRBEIRLJUtI58ZP7tU3uf3o0aNMnDiR8ePH07RpU1566SUU5cyKRZ1O5/IYHD1QBw4coE6dOkRGRrr8XMrZhTExMezbt4/S0lJn2tatW5Fl2dnbVdUaNWrE4MGDWbBgAXFxcaxYsQKo+HnWJiKoEgRBEIRLJGklpCA9kpuMpAHJV4PkfvkfqRaLhaysLJef06v07HY7I0aMoF27dvTs2ZNJkyaxb98+Pv74Y+f9derUYceOHaSnp5Obm4uiKDz66KMUFBQwbNgwdu7cydGjR1m3bh0vv/wydvu53Wznd99996HX6xk1ahT79+/njz/+YPz48dx///3Oob9LsWfPHvbs2UNJSQm5ubns2bOHgwcPAnDs2DHefvtttm3bxvHjx1m/fj2HDx8mOjra+TzT09PZs2cPubm5WCyWS66/OonhP0EQBEG4DJJBhiAdqCBprmzl37p160hJSXFJi4qK4vvvv+eDDz7g+PHjzJw5E4Dg4GDGjx/PsGHDSElJoWHDhjz++OOMGjWKe++9F5PJxE8//URERARffvkl06ZNY+DAgVgsFsLDw2nXrh2yXPkA0M3NjY8++og333yTBx54ADc3N+68805GjRp1Wc+1W7duzn/v3r2bFStWUKdOHX7++Wfc3Nw4dOgQS5cuJT8/n+DgYHr37k2vXr0A6Ny5M6tXr6Zfv34UFhYyadIkevTocVntqA6SqqpqTTfiRlFcXEzLli3ZsmULnp6eNd0cQRAE4RKZTCbnyjmj8ersuC5cHRf63Vb281sM/wmCIAiCIFQBEVQJgiAIgiBUARFUCYIgCIIgVAERVAmCIAiCIFQBEVQJgiAIgiBUARFUCYIgCIIgVAERVAmCIAiCIFQBEVQJgiAIgiBUARFUCYIgCIJwQenp6cTHx7Nnz54rKmfGjBncf//9VdSq2kcEVYIgCIJwA9m2bRsJCQkMGjSoystevXo1Dz30EC1btqR58+bce++9vPnmm1VeT20lzv4TBEEQhBvIokWL6NOnD4sWLSIjI4OQkJAqKXfjxo0MHTqUF154gQ4dOiBJEqmpqfz+++9VUv61QPRUCYIgCEINUa02lPwiVKvtqtRXUlLCypUreeSRR7jttttYunSp81pBQQHDhw/nlltuoWnTptx5550sXrzY5f5Dhw7Rq1cvmjRpQpcuXfjzzz+d137++WeaN2/OE088QXR0NFFRUXTq1InXX3+9XDuWLVtGhw4daNmyJUOHDqW4uNh5rW/fvkyYMIGpU6dy880307ZtW2bMmOFyf2pqKo888ghNmjThnnvuYcOGDcTHx7NmzZqqeqkuS40GVR06dCA+Pr7cz7hx4wAwm82MGzeO1q1b07x5c5577jmys7Ndyjhx4gSDBg2iWbNmtGnThilTpmCzuf7PuWnTJrp3705iYiJ33HEHS5YsKdeWzz//nA4dOtCkSRMefPBBdu7c6XK9Mm0RBEEQhMpSTmVj/XwFlskfYv18Bcqp6v9MWbVqFdHR0URHR9O1a1cWL16MqqoAvPvuu6SmpjJnzhxWrlzJ2LFj8fPzc7l/6tSpDBgwgGXLlpGUlMSQIUPIy8sDICgoiIMHD7J///4LtuHo0aP89NNPzJw5k1mzZvHXX38xZ84clzxLly7F3d2dr776ihEjRvC///3P2eNlt9t55plncHNz4+uvv+aNN97gv//
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 640x480 with 1 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Exterior 1st\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 48,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGwCAYAAACAZ5AeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3gUxf/A8ffu1fReCYQkkBB6EyQEUUCxIFIsKAgiUqxfESmCBQQBERFFf1JUFESKNBtYUFGaIB2RGkIJJb0nV3d/f5wcHAmQQEICzOt58sjNzs7OXczd52ZnPiOpqqoiCIIgCIIgXBW5qjsgCIIgCIJwIxBBlSAIgiAIQgUQQZUgCIIgCEIFEEGVIAiCIAhCBRBBlSAIgiAIQgUQQZUgCIIgCEIFEEGVIAiCIAhCBdBWdQduJoqikJaWhoeHB5IkVXV3BEEQBEEoA1VVKSwsJDg4GFm++HiUCKquobS0NNq3b1/V3RAEQRAE4Qr88ccfhIaGXvS4CKquIQ8PD8DxS/H09Kzi3giCIAiCUBYFBQW0b9/e+Tl+MSKouobO3vLz9PQUQZUgCIIgXGcuN3VHTFQXBEEQBEGoACKoEgRBEARBqAAiqBIEQRAEQagAYk6VIAiCIFwBu92O1Wqt6m4IFUCn06HRaK66HRFUCYIgCEI5qKrKmTNnyMnJqequCBXI19eX0NDQq8ojKYIqQRAEQSiHswFVcHAw7u7uIpnzdU5VVYqKikhLSwMgLCzsitsSQZUgCIIglJHdbncGVAEBAVXdHaGCuLm5AY4k3cHBwVd8K1BMVBcEQRCEMjo7h8rd3b2KeyJUtLO/06uZJyeCKkEQBEEoJ3HL78ZTEb9TEVQJgiAIgiBUABFUCYIgCIIgVAARVAmCIAg3NbtqZ2fWXpYkf8svp/4g3ZRZ1V2qMnFxcaxZs6aqu1Eu1anPYvWfIAiCcFPblrGLZzaPRkEB4N4aHRnT6H+4aY2Vet1Ro0axYsWKEuWJiYl8+umnZWrj8ccfp169eowZM6ZC+rR+/Xp8fHwqpK3z/fzzzyxcuJB9+/ZhsVioW7cuzz33HO3atavwa1UlEVQJgiAIN7VvU35xBlQAq07+Sp/ontTzqVPp127Xrh2TJk1yKdPr9ZV+3QtZLBb0ej1BQUEV0s6F/v77bxISEhg6dCje3t4sX76cp59+miVLllC/fv2rumZ1IoIqQRAE4abmqXVNjyAjo5GufsuSsrhUILN582YGDBjA559/TsuWLQGYM2cOn332Gd999x1Tp05ly5YtbNmyhXnz5gHw66+/EhERwcGDB5kyZQrbtm3Dzc2Ntm3b8sorr+Dv7w84Rrjq1q2LRqPh22+/JTY2lvnz5xMXF8dHH31Ep06dADhw4ABvvfUWO3fuxM3NjbvuuotRo0bh4eEBOEbb8vLyaNSoEQsWLECv1/Pbb7+VeC4XjqS99NJL/Prrr/z222/OoOrxxx8nLi4OvV7P0qVL0el09OrVi+eff9553tGjRxkzZgy7d++mZs2aFTZCV1HEnCpBEAThpnZfRCc8zgusBtR9lNqeEWU6164oWOw2VFWt8H61bt2avn37MmLECPLz8/n33395//33mTBhAoGBgYwZM4ZmzZrx8MMPs379etavX09YWBh5eXn069eP+vXrs3TpUj755BMyMzN58cUXXdpfsWIFOp2OhQsXMm7cuBLXLyoqYsCAAfj4+LB06VKmT5/Oxo0bGT9+vEu9TZs2kZyczNy5c5k1a1aZnpuiKBQWFuLr61uiT+7u7ixZsoThw4fz0UcfsWHDBuc5zz//PDqdjq+//ppx48YxderUsr+g14AYqRIEQRBuao384vmi7QccyT+Kr8GHeJ+66GTdZc8rtJo5U5yLRbHho3cn0OCJXlO+j9W1a9fSrFkzl7LBgwczZMgQAF588UU2btzIa6+9xqFDh+jevTsdO3YEwMvLC51Oh9FodBnt+vLLL6lfvz4vvfSSs2zixIm0b9+e5ORkoqKiAKhduzYjRoy4aN++//57LBYLb7/9tjMx5uuvv86QIUN4+eWXCQwMBBxJMydMmFCu25affvopRUVF3HPPPS7lcXFxPPfcc87+ffnll2zatIm2bduyceNGjhw5wieffEJISAgAQ4cOZeDAgWW+bmUTQZUgCIJw04v2qkW0V60y17cqdk6b87GpjrlYOZYi9LKGQI1Xua7bunVrxo4d61J2/kRxvV7P1KlT6dq1K+Hh4bzyyiuXbXP//v1s3ry5RLAGcPz4cWdQ1aBBg0u2k5SURFxcnEv2+ObNm6MoCsnJyc6gKjY2tlwB1XfffcdHH33E//3f/5XY6icuLs7lcVBQEJmZmc7+hIaGOgMqoNTnWJVEUCUIgiAI5WRX7M6A6qxCq5lAY/mCKjc3NyIjIy9ZZ8eOHQDk5uaSm5t72S1yioqKuOOOO3j55ZdLHDt/ROvsfndXqzzt/PDDD7z66qu8//77JCQklDiu1bqGJZIkVcqt1coi5lQJgiAIQjlpZQ26Cyaze+krJkg53/Hjx5k4cSLjx4+ncePGjBw5EkU5F8zpdDqXx+AYgTp06BA1atQgMjLS5ac8exbGxMRw4MABioqKnGXbt29HlmXnaFd5fP/997zyyiu8++673H777eU+PyYmhjNnzpCWluYs27lzZ7nbqUwiqBIEQRCEctLKGiI8/PDUGtBJGoKMXnjpyp/XymKxkJ6e7vKTlZUFgN1uZ/jw4bRr146ePXsyadIkDhw4wGeffeY8v0aNGuzatYuUlBSysrJQFIXHHnuM3NxcXnrpJXbv3s3x48dZt24dr7zyCna7vcx9u//++9Hr9YwaNYqDBw/y119/MX78eB544AHnrb+y+u677xg5ciQjR46kSZMmzuean59f5jYSEhKoXbs2o0aNYv/+/WzdupX33nuvXP2obCKoEgRBEIQr4KbVE+HhT5RXIEFGL3Ry+dMwrFu3jsTERJefxx57DICPP/6YkydPOlfmBQcHM378eKZPn87+/fsBePLJJ9FoNNx33320adOGU6dOERISwsKFC1EUhQEDBnD//fczceJEvLy8kOWyf+y7ubnx6aefkpOTw4MPPsj//vc/2rRpw2uvvVbu57lkyRJsNhtvvvmmy3N96623ytyGLMt8+OGHmEwmHnzwQcaMGcPQoUPL3ZfKJKnX083K61xBQQEtWrRg27ZteHp6VnV3BEEQhHIymUzOFXRGY+VmXBeurUv9bsv6+S1GqgRBEARBECqACKoEQRAEQRAqgAiqBEEQBEEQKoAIqgRBEARBECqACKoEQRAEQRAqgAiqBEEQBEEQKoAIqgRBEARBECqACKoEQRAEQRAqgAiqBEEQBEG4pJSUFOLi4ti3b99VtTNjxgweeOCBCupV9SOCKkEQBEG4iezYsYP4+HgGDRpU4W3/8ssvPPzww7Ro0YJmzZpx3333lWsrmuudtqo7IAiCIAjCtbN06VL69OnD0qVLSU1NJSQkpELa3bRpE0OHDuXFF1+kQ4cOSJJEUlISGzZsqJD2rwdipEoQBEEQqohqtaHk5KNabdfkeoWFhaxatYpHH32U22+/nRUrVjiP5ebmMmzYMG699VYaN27MXXfdxbJly1zOP3LkCL169aJRo0Z06dKFLVu2OI/99ttvNGvWjKeeeoro6GiioqLo1KkTb7zxRol+rFy5kg4dOtCiRQuGDh1KQUGB89jjjz/OhAkTmDJlCq1ataJt27bMmDHD5fykpCQeffRRGjVqxL333svGjRuJi4tjzZo1FfVSXZEqDao6dOhAXFxciZ+zO3KbzWbGjRtH69atadasGc8//zwZGRkubZw6dYpBgwbRpEkT2rRpw9tvv43N5vo/5+bNm+nevTsNGzbkzjvvZPny5SX6smDBAjp06ECjRo146KGH2L17t8vxsvRFEARBEMpKOZOBdcH3WCZ/gnXB9yhnKv8zZfXq1URHRxMdHU3Xrl1ZtmwZqqoC8P7775OUlMScOXNYtWoVY8eOxc/Pz+X8KVOm0L9/f1auXEnTpk0ZMmQI2dnZAAQFBXH48GEOHjx4yT4cP36cX3/
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 640x480 with 1 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Exterior 2nd\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 49,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"Exterior 1st\"]\n",
"del df[\"Exterior 2nd\"]\n",
"# Also discard the associated ordinal variables.\n",
"del df[\"Exter Cond\"]\n",
"del df[\"Exter Qual\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Foundation\n",
"\n",
"The type of foundation appears to have an effect. However, only three of the six realizations occur in a large number. Factor variables *found_BrkTil*, *found_CBlock*, and *found_PConc* are extracted but not regarded as \"interesting\"."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 50,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGwCAYAAACAZ5AeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeXxM1/vA8c9MMlkm+x5ZZJVFIrIgRVBbddNaSrWUoqVqa6mlulGKqqqiRWuptlRV7S0ttdS+RGIPspGI7Ps+k5nfH/NzmW8sESHKeb9eXl/33HPvfe7kW/Pk3HOfI9NqtVoEQRAEQRCEeyKv7wAEQRAEQRAeBSKpEgRBEARBqAMiqRIEQRAEQagDIqkSBEEQBEGoAyKpEgRBEARBqAMiqRIEQRAEQagDIqkSBEEQBEGoA4b1HcDjRKPRkJmZiZmZGTKZrL7DEQRBEAShBrRaLSUlJTg6OiKX33o8SiRVD1BmZibt2rWr7zAEQRAEQaiFPXv24OzsfMv9Iql6gMzMzADdD8Xc3LyeoxEEQRAEoSaKi4tp166d9D1+KyKpeoCuPfIzNzcXSZUgCIIg/MfcaeqOmKguCIIgCIJQB0RSJQiCIAiCUAdEUiUIgiAIglAHxJwqQRAEQXhIVFVVoVKp6juMx45CocDAwOCezyOSKkEQBEGoZ1qtlvT0dPLz8+s7lMeWtbU1zs7O91RHUiRVgiAIglDPriVUjo6OKJVKUSD6AdJqtZSWlpKZmQlAgwYNan0ukVQJgiAIQj2qqqqSEio7O7v6DuexZGpqCuiKdDs6Otb6UaCYqC4IgiAI9ejaHCqlUlnPkTzern3+9zKnTSRVgiAIgvAQEI/86lddfP4iqRIEQRAEQagDIqkSBEEQBEGoAyKpEgRBEB5rVZoqYjJjWB23mr+S/yKrNKu+Q/pPOnz4MP7+/hQWFj4U56kP4u0/QRAE4bF2NP0oQ3cMRaPVAPC81/N83OpjTA1N6zmyW5s4cSLr16+v1v7333/j4eFRDxHVzmuvvUZAQAAffPCB1BYWFsa+ffuwsLCox8hqRyRVgiAIwmNtY/xGKaEC2JK0hf5B/Qm0C6zHqO6sTZs2zJgxQ6/N1ta2nqKpO0ZGRjg4ONR3GLUiHv8JgiAIjzUzIzO9bblMjoH83pcsud+uJR83/jEwMODIkSO89NJLBAcHExUVxezZs1Gr1dJxHTp04IcfftA714svvsj8+fOlbX9/f3777TeGDx9O06ZNeeqpp/jnn3/0jtmzZw9dunQhJCSE1157jStXrujtz8vLY8yYMbRp04amTZvStWtXtmzZIu2fOHEiR44c4ccff8Tf3x9/f39SU1Nv+vjvr7/+4rnnniM4OJgOHTqwbNkyvWt16NCBRYsW8f777xMWFsaTTz7Jr7/+WuvPtrZEUiUIgiA81rr6dMVMcT2xerPJm3hZetXo2IKKAq4UXUGleTjW68vIyGDIkCE0adKEjRs3MnnyZNauXcvChQvv+lwLFizgmWeeYdOmTbRt25b33ntPWkbn6tWrjBgxgvbt27NhwwZ69erFl19+qXd8ZWUlQUFBfPfdd2zZsoXevXszfvx4Tp48CcAHH3xAWFgYvXv3Zt++fezbt++m1cxPnz7NO++8w7PPPsvmzZsZMWIEX3/9NevWrdPrt3z5coKDg9mwYQOvvvoqkydPJjEx8a7v+16Ix3+CIAjCY62pQ1NWPbuK+Px4bExsaGzXGIWB4o7HHUs/xtSDU0kuSqabTzeGNB2Cq7nrA4hYZ/fu3YSFhUnbbdq0wcvLC2dnZz7++GNkMhk+Pj5kZGQwe/Zshg8fjlxe87GU7t278/zzzwMwZswYfvrpJ06ePEnbtm355ZdfaNiwIRMnTgTA29ubCxcu8P3330vHOzk5MXjwYGn7tddeY9++fWzdupWQkBAsLCxQKBSYmJjc9nHf8uXLadmyJcOHDwfAy8uL+Ph4li5dSo8ePaR+bdu2pW/fvgC8+eab/PDDDxw+fBhvb+8a3/O9EkmVIAiC8NjztvbG27rmX74ZJRmM3TOW3PJcANbFr6OhZUMGNxl8hyPrTmRkJJMnT5a2TU1N+fTTTwkLC9MrZBkREUFpaSnp6em4uLjU+Pz+/v7S35VKJebm5uTm6u43ISGBkJAQvf6hoaF621VVVSxatIht27aRkZGBSqWisrISExOTu7hLSExMpGPHjnpt4eHh/Pjjj1RVVUlLytwYr0wmw97enpycnLu61r0SSZUgCIIg3KWssiwpobpmf9r+B5pUmZqa1upNv5tVDr9xztU1CoX+aJ1MJkOj0VTrdytLly7lxx9/ZNKkSfj7+2Nqasr06dPvaRmY2zE01E9pZDIZWq32vlzrVsScKkEQBEG4Sw2UDXAx0x/16dSwUz1Fc52Pjw8xMTF6yUR0dDRmZmY4OzsDujcEMzMzpf3FxcWkpqbe9XVOnTql13bixAm97ePHj9OxY0defPFFAgICcHd3Jzk5Wa+PQqG4Y6Lm7e3N8ePHq53b09Oz1gsf3y8iqRIEQRCEu2SntGPOk3No69oWR6Ujw0OH08mj/pOqV199lfT0dKZOnUpCQgI7duxg/vz5DBw4UJpP9cQTT7Bp0yaOHTvG+fPnmTBhwl3NtQLo06cPycnJfP755yQmJrJ58+ZqdbM8PDw4cOAAx48fJyEhgY8//pjs7Gy9Pq6urpw4cYLU1FRyc3NvmmANGjSIgwcP8s0335CUlMT69etZuXIlgwYNustP5/4Tj/8EQRAEoRaC7IOY8+QcStWl2JjY1Hc4gG5y+HfffcesWbNYs2YN1tbWvPTSSwwbNkzqM3ToUFJTUxk6dCgWFhaMHj36rkeqXFxcmD9/PjNmzODnn38mJCSEd999l0mTJkl9hg0bRkpKCoMHD8bU1JTevXvTqVMnioqKpD6DBg1i4sSJPPfcc5SXl1cr2wAQFBTE3LlzmTdvHgsXLsTBwYFRo0bpTVJ/WMi0D/qB42OsuLiYiIgIoqOjMTc3r+9wBEEQhIdAeXk5SUlJeHl53fUkbqHu3O7nUNPvb/H4TxAEQRAEoQ6IpEoQBEEQBKEOiKRKEARBEAShDoikShAEQRAEoQ6IpEoQBEEQBKEOiKRKEARBEAShDoikShAEQRAEoQ6IpEoQBEEQBKEOiKRKEARBEIQHat26dTRr1qzWx6empuLv78+5c+cAOHz4MP7+/hQWFtZViLUilqkRBEEQBKFWJk6cqLfmn7W1NcHBwYwbN46AgIBanXP+/PksWLDgtn3Onj3Lvn37sLF5OJYHukYkVYIgCIIg1FqbNm2YMWMGANnZ2cydO5e33nqL3bt337S/SqW67fkGDRpEnz59pO2XXnqJ3r1707t3b6nNwMAABweHew++jonHf4IgCILwCKlQVZFeUE6FquqBXM/IyAgHBwccHBwIDAzkzTff5OrVq+Tm5kqP6f7880/69etHkyZN2Lx5c7Vz5Obm0qNHD4YPH45CoZDO5+DggIGBAWZmZnpt//v472FRr0lVhw4d8Pf3r/ZnypQpAFRUVDBlyhQiIyMJCwtj5MiRZGdn650jLS2NIUOG0LRpU1q2bMnnn3+OWq3W63P48GG6d+9OcHAwnTt3Zt26ddViWblyJR06dKBJkyb06tWLkydP6u2vSSyCIAiCUJ8uZBTxzppYOny5m3fWxHIho+iBXr+kpIRNmzbh4eGBtbW11D579mz69+/Pn3/+SVRUlN4xV69e5dVXX8XPz4958+ZhZGT0QGOuS/WaVK1du5Z9+/ZJf5YvXw7A008/DcD06dPZtWsXc+fO5aeffiIzM5MRI0ZIx1dVVTF06FBUKhWrV69m5syZrF+/nnnz5kl9UlJSGDp0KJGRkWzcuJEBAwbw4YcfsnfvXqnPn3/+yYwZMxg+fDjr168nICCAwYMHk5OTI/W5UyyCIAiCUJ8qVFV8teMCW0+lU1pZxdZT6czdceG+j1jt3r2bsLAwwsLCCA8PZ+fOnXz11VfI5ddTjAEDBvDUU0/h7u6Oo6Oj1J6YmMgrr7xCVFQUM2bMwMDA4L7Ger/Va1Jla2urN5y3a9cuGjZ
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 640x480 with 1 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Foundation\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 51,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Foundation\n",
2021-05-25 08:22:14 +02:00
"PConc 1282\n",
"CBlock 1242\n",
"BrkTil 310\n",
"Slab 48\n",
"Stone 11\n",
"Wood 5\n",
"Name: count, dtype: int64"
2021-05-25 08:22:14 +02:00
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 51,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"Foundation\"].value_counts()"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 52,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAABfAAAAH6CAYAAABI7m0CAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hU1dYG8PdMSe+F0EuABKSDUkPXi6BcgaCCoIAgXBURpCgIgogBASGiF0FBQC6IJWBDjIpUgQAhdEkh1ATSJ3Uy9Xx/5JtjJnXSmEny/u7DY7LnzD7rTDJ3T9beZ21BFEURRERERERERERERERkU2TWDoCIiIiIiIiIiIiIiIpjAp+IiIiIiIiIiIiIyAYxgU9EREREREREREREZIOYwCciIiIiIiIiIiIiskFM4BMRERERERERERER2SAm8ImIiIiIiIiIiIiIbBAT+ERERERERERERERENogJfCIiIiIiIiIiIiIiG8QEPhERERERERERERGRDWICn4jKFRERgcDAQAQGBiIiIsLa4eD5559HYGAgnn/+eWuHQkREVGkcX8kahgwZgsDAQLz11lvWDqWYu3fvSu+JvXv3WjscIqonOB5TTbD2eFve75Hpd/7jjz9+wJFRZSisHQDRgxYREYEXXnjB4uNXrlyJMWPG1GBERNZT1vvBwcEBXl5eaN++PYYPH47hw4dDoSh/2EhPT8f+/fvx119/ITY2FhkZGdBqtXB3d4e/vz+6d++OJ554AgEBAdV9OURkRRxfiUqn1Wrx22+/4ejRo7h48SIyMjKQk5MDFxcXNGnSBJ06dcKwYcPQu3dvyGTF11gNGTIECQkJJfatVCrh4eGBtm3b4tFHH8WYMWPg6OhY05dERDaK4zHRP8p7Pzg5OaFBgwbo3LkzxowZgz59+jzA6Ir7+OOP8cknn1Spj9GjR2PVqlXVFBHZCibwicgmvPXWW9i3bx+aNGmCP//809rhEID8/HwkJiYiMTERBw8exI4dO/Dpp5/C19e3xOONRiM2bdqELVu2IDc3t9jjqampSE1NxenTp7Fp0yb069cPixYtQps2bWr6UoiI6i2Or9b322+/YdWqVSUm4FUqFVQqFa5cuYI9e/agZcuWWLhwIQYNGmRx/zqdDikpKUhJScGJEyewbds2fPbZZ/D396/GqyAioqrgeGyb8vLycPPmTdy8eRM//vgjRo0ahZCQEMjlcmuHRmSGCXyq18aPH4/nnnuuzGMaNmz4gKIhS+3cudPaIdRJRd8PeXl5uHz5Mr744gskJCTg0qVLeOWVV/DNN99AEASz5+bn52P27Nk4dOgQgILVgCNGjEDfvn3RtGlTODg4IC0tDZcuXcIff/yBv//+G3/99Re+/vprvP322w/0Oomo5nF8rZ04vla///73v9iwYYP0fb9+/TBkyBC0bt0abm5uyMzMxI0bN/Dnn3/ixIkTuHnzJtavX19qAr9BgwbYunWrWVt+fj5u3LiBb7/9FmfOnMGdO3cwY8YM7N+/H3Z2djV5eURk4zge104cj2tG0feDKIrIzMzE+fPnsX37dqSlpeH7779Hw4YNMWfOHKvE+Nxzz2HYsGElPnbw4EGEhoYCAGbPno2hQ4eWeJy7uzsA/h7VNUzgU73m7e3NMh5E/6+k90PXrl0xcuRIPP3007h16xYuXryIQ4cOYciQIWbHLVu2TEred+nSBevWrUPTpk2LnWPgwIGYOXMmDh06hJCQkJq7GCKyKo6vREBYWJiUvPf29kZoaCh69uxZ7Li+fftiwoQJiImJwcqVK5Genl5qn0qlssT3VufOnTFy5EhMmjQJp0+fxu3bt/Hbb7/hySefrL4LIqJah+Mx0T9Kez/07NkTQ4YMwZgxY6DRaLBz5068+uqrVpkE9/b2hre3d4mPXb58Wfraz8+P7+16hpvYEhFRmdzd3TF9+nTp+2PHjpk9/vvvv2Pfvn0ACjbC2b59e4nJ+8IGDx6MsLAw9O7du/oDJiIisrKkpCS89957AArq6+7cubPE5H1hAQEB2Lp1K1588cVKnVMmk5k999KlS5Xqh4iIqL5p06aNdPdbbm4u4uPjrRsQURFcgU9UCVqtFt9++y1+/fVXxMbGIicnB+7u7njooYfw5JNPYuTIkSVuQAYU7AR++vRp9OzZs8xbmgpvXhIdHV3s8cDAQADAzJkz8dprr+HixYvYvn07zp49i/T0dHh6eqJ37974z3/+g9atW5d5Pfn5+di+fTt++eUX3L59G/b29mjVqhXGjBmDsWPHlvt6GI1GRERE4OjRo4iKisKNGzeQk5MDR0dHNGnSBH379sXzzz+Pxo0bl3mdAJCQkCBdW2GFXwNLX8OzZ8/i66+/RmRkJFJSUmBvb4+mTZti4MCBmDRpEry8vEp8XuGNbr788kv06tULv/zyC77++mtER0cjLy8PjRo1wtChQzF9+nR4eHiU+xrVdp07d5a+TkxMNHts06ZN0tcrV66Ek5OTRX26ubmVetuf0WjETz/9hJ9//hlXr15FZmYmXFxc0LZtWzz++ON4+umnS10RUfS9Y1pFsX//fty8eRMA0Lp1a4waNQrjxo0rd2NerVaLffv24eDBg/j777+RkZEBhUKBJk2aoGvXrnj88ccRFBRUrKwQEVUcx1dzHF89yn2NbNX27duhVqsBALNmzSr3d8VEJpPhqaeeqvR5C0+ga7XaSvcDAH/++Se+//57nD9/Hunp6XB2dkbLli0xdOhQTJgwAc7OzuX2ERMTg6+//hqnT5/G/fv3kZeXJ224269fPzz11FNo0KBBheLSaDSYPXu2VEN67ty5ZgsNiKjqOB6b43jsUe5rVBc0adJE+rqkMXTv3r1YuHAhgIJyNg0aNMDu3bvxyy+/4NatW1CpVNLvq6U2bdqE9evXAwAee+wxrFu3rtpW/lv6e0S1AxP4RBV09+5dvPTSS8VmZFNTU3H06FEcPXoUX3/9NTZu3PjABrpdu3YhJCQEer1eaktOTsaPP/6I33//HZ9//jkeeeSREp+bkpKCSZMm4fr161KbWq1GVFQUoqKiEB4ejilTppR5/v/+978l7pSenZ2Na9eu4dq1a/jqq6+wZs0aPPbYY5W8SssZjUasWLECu3btMmvXarX4+++/8ffff2PXrl346KOP0K9fv3L7mj9/Pn788Uez9ps3b2Lr1q34448/sGvXrlI3dq0rCie5DQaD9HV0dLR0K9/DDz+MDh06VPlcKpUKL7/8Ms6dO2fWnpGRgdOnT+P06dPYtWsXPv/8c7MPWSVJTU3FtGnT8Pfff5u1X7p0CZcuXcLx48excePGUv8A+fvvvzFz5kzcvXvXrF2n0yEuLg5xcXH47rvvcPDgwXLvOiCisnF8LY7ja+0cX0VRlO5Mc3JywtNPP/3Azl14kr1Ro0aV6kOj0WDu3Ln4/fffzdpVKhXOnz+P8+fPY+fOnfjss8/Qvn37EvswGAxYvXo1duzYAVEUzR4zbWp/8uRJXL9+HatWrbI4tpycHLz88ss4ffo0ZDIZ3n33XTzzzDMVv0giKhXH4+I4HtfO8biiCo+hJU3GFJaRkYGZM2cW+zvTUqIoYvXq1fjiiy8AAGPGjMGKFSu4eS6Vigl8ogrIzc3F5MmTcefOHQDAo48+iuDgYDRo0AB3797Frl27cPr0aURGRuI///kPdu3aVeP/B3z8+HFcvHgRAQEBeOGFFxAQEACNRoPff/8dX375JdRqNRYsWIDw8PBiM7l6vR4zZsyQPswEBQVh/PjxaNiwIe7du4fdu3fj+PHjyMzMLDMGvV4PX19fPPbYY+jatSuaNWsGe3t73Lt3D1FRUdi9ezfy8vIwd+5c7Nu3z2yFhGmTltDQUGkWu+jmbBW1du1a6cNM06ZN8dJLL+Ghhx6CWq3Gn3/+iV27diE7OxszZszAd999h3bt2pXa10cffYSoqCg8+uijGDVqFBo3bozU1FTs3r0bhw8fxq1bt7By5UqsW7euSjHbupiYGOnrwivlzpw5I309cODAKp/HYDDgP//5D6KiogAU1COcMGECmjZtiuTkZISFheGPP/7A9ev
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 2000x500 with 3 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plot = sns.lmplot(\n",
" x=\"Gr Liv Area\", y=\"SalePrice\", col=\"Foundation\", hue=\"Foundation\",\n",
" col_order=[\"PConc\", \"CBlock\", \"BrkTil\"],\n",
" data=df, robust=True, col_wrap=4, ci=None, truncate=True, scatter_kws={\"s\": 15},\n",
")\n",
"# Adjust font sizes.\n",
"for ax in plot.axes:\n",
" ax.set_title(ax.get_title(), fontsize=20)\n",
" ax.set_xlabel(ax.get_xlabel(), fontsize=16)\n",
" ax.set_ylabel(ax.get_ylabel(), fontsize=16)"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 53,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"foundation = pd.get_dummies(df[\"Foundation\"], prefix=\"found\", dtype=int)\n",
2021-05-25 08:22:14 +02:00
"# Only keep the top 3 realizations.\n",
"del foundation[\"found_Slab\"]\n",
"del foundation[\"found_Stone\"]\n",
"del foundation[\"found_Wood\"]\n",
"df = pd.concat([df, foundation], axis=1)\n",
"del df[\"Foundation\"]"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 54,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"new_variables.extend([\"found_BrkTil\", \"found_CBlock\", \"found_PConc\"])"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 55,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>found_BrkTil</th>\n",
" <th>found_CBlock</th>\n",
" <th>found_PConc</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" found_BrkTil found_CBlock found_PConc\n",
"Order PID \n",
"1 526301100 0 1 0\n",
"2 526350040 0 1 0\n",
"3 526351010 0 1 0\n",
"4 526353030 0 1 0\n",
"5 527105010 0 0 1"
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 55,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[foundation.columns].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Garage Type\n",
"\n",
"As can be expected, the *Garage Type* looks very similar to the above *has Garage* variable. Therefore, it is dropped."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 56,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGwCAYAAACAZ5AeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hURRfA4d/uZje9kN5DSEIoCST0QKgiTUSKoAiigBQFUQQBFRUQAVE/UVBBVLoiQgBFUEGKRJo06TW0EEoK6WU3m/3+GLLJkgBJSKHM+zx5yL07997ZjZLDzJkzCoPBYECSJEmSJEm6J8qq7oAkSZIkSdLDQAZVkiRJkiRJ5UAGVZIkSZIkSeVABlWSJEmSJEnlQAZVkiRJkiRJ5UAGVZIkSZIkSeVABlWSJEmSJEnlwKyqO/AoycvL4/r161hbW6NQKKq6O5IkSZIklYDBYCAjIwNXV1eUytuPR8mgqhJdv36d1q1bV3U3JEmSJEkqg23btuHu7n7b12VQVYmsra0B8UOxsbGp4t5IkiRJklQS6enptG7d2vh7/HZkUFWJ8qf8bGxsZFAlSZIkSQ+Yu6XuyER1SZIkSZKkciCDKkmSJEmSpHIggypJkiRJkqRyIHOqJEmSJKmc6fV6dDpdVXdDKiG1Wo1Kpbrn+8igSpIkSZLKicFg4OrVqyQnJ1d1V6RScnBwwN3d/Z7qSMqgSpIkSZLKSX5A5erqipWVlSz0/AAwGAxkZmZy/fp1ADw8PMp8LxlUSZIkSVI50Ov1xoDKycmpqrsjlYKlpSUginS7urqWeSpQJqpLkiRJUjnIz6GysrKq4p5IZZH/c7uXXDgZVEmSJElSOZJTfg+m8vi5yaBKkiRJkiSpHMigSpIkSZIkqRzIRHVJkiTp0Zanh9h/4ephsHYG3wiwda/qXkkPIBlUSZIkSY+289thSQ8w5Inj0Gfgyc9AY11pXYiPj2fevHls27aNq1evYmtri6+vL926daNHjx7G1WkPktmzZzNnzpw7tjl58mQl9aZyyKBKkiRJerQdXFYQUAEc/gmajwCP+pXy+EuXLtG3b19sbW0ZPXo0wcHBaDQaTp48yYoVK3Bzc+Oxxx4r0731ej0KhQKlsvKzfQYNGsSzzz5rPH766afp06cPffr0qfS+VBaZUyVJkiQ92jR2pscKJSgrb8xh0qRJqFQqVq1aRZcuXQgICMDHx4f27dvzzTff0K5dO2PbBQsW8OSTTxIWFkbr1q2ZNGkSGRkZxtejoqJo1KgRf/31F126dCE0NJS4uDgOHTrEwIEDadq0KQ0bNqR///4cPXrUpB9nz56lb9++hIaG0qVLF3bs2EFwcDCbNm0ytrly5QqvvfYajRo1okmTJrz88svExsYW+76sra1xcXExfqlUKuO5n376iYEDBxa55qmnnmLWrFkATJgwgVdeeYU5c+bQrFkzGjRowHvvvYdWqzW2z8vLY968ebRr14569erRrVs3fv/99zL9HMqDDKokSZKkR1vYs2BuW3Dcciw4BZXs2qwbcOMi6MtW2+jGjRv8888/9OvX77b1rQov9VcoFLzzzjusW7eOGTNmsGvXLj7++GOT9tnZ2cyfP5+pU6eybt06nJycyMjIoHv37vzwww+sWLECPz8/hg4dSnp6OiBGtEaMGIGlpSU///wzU6ZM4bPPPjO5r06nY/DgwVhbW7Ns2TJ+/PFHrKyseOmll0wCnZJ4+umnOXv2LIcOHTKeO3bsGCdPnqRXr17Gczt37uTs2bMsWbKE//3vf2zcuJEvv/zS+Pq8efNYs2YNkydP5rfffuPFF1/kzTffZM+ePaXqT7kxSJUmLS3NULNmTUNaWlpVd0WSJEkq7PoJg+HIaoPh3HaDISu1ZNecizYYZjc2GCY5GAxrRhiyrp4xHDt2zJCVlVXixx48eNBQs2ZNw59//mlyvkmTJoawsDBDWFiYYebMmbe9fsOGDYYmTZoYj1etWmWoWbOm4fjx43d8rl6vN4SHhxs2b95sMBgMhm3bthnq1KljuH79urHNP//8Y6hZs6Zh48aNBoPBYFizZo2hY8eOhry8PGObnJwcQ7169Qzbt2+/63tt27atYcGCBcbjl156yfD+++8bjz/44AND//79jcfjx483NGnSxJCZmWk898MPPxjCwsIMer3ekJOTY6hfv75h//79Js95++23DW+88cZd+3OrrKys2/78Svr7W+ZUSZIkSZJLsPgqqZTL8PMAyEgQxweWgHMIOLYul+6sXLmSvLw8xo4dazIKtGPHDubNm0dMTAzp6eno9XpycnLIysoyJrOr1WqCg03fS0JCArNmzWLPnj0kJiaSl5dHVlYWcXFxAJw7dw53d3dcXFyM19SrV8/kHidOnODixYs0aNDA5HxOTg4XL14s9Xvs06cPb7/9Nm+99RYKhYJff/2Vt956y6RNcHCwSZJ+eHg4mZmZXLlyhczMTLKyshg0aJDJNTqdjtq1a5e6P+VBBlWSJEmSVFrp1woCqnwXd5U6qPL19UWhUHDu3DmT8z4+PgBYWFgYz8XGxjJs2DD69u3L6NGjsbe3Z9++fbzzzjvodDpj8GFhYVGkOvj48eNJTk7mnXfewdPTE41GwzPPPFOqLVkyMzOpW7cun3zySZHXHB0dS3yffG3btkWj0bBx40bUajW5ubl06tSpVP0BMQXo5uZm8ppGoyl1f8qDDKokSZIkqbTsvcHeF1IKjdDUaFvq21SrVo0WLVqwdOlS+vfvf8d9A48ePYrBYGDChAnG1XwbNmwo0XP279/P+++/T+vWIui7cuUKN27cML7u7+/P1atXSUhIwNnZGYDDhw+b3KNu3bps2LABJycnbGxsSvU+i2NmZkb37t2JiopCrVbzxBNPmASRIEouZGdnG88fPHgQKysrPDw8sLe3R6PREBcXR5MmTe65P+VBJqpLkiRJUmnZuEKfRRDUEWw9oO07ENju7tcV4/3330ev19OrVy/Wr1/P2bNniYmJYe3atcTExKBSqQDw8/NDp9OxZMkSLl26xJo1a1i+fHmJnlG9enV++eUXzp49y3///cfYsWNNApgWLVrg4+PD+PHjOXHiBPv27TOuwsv35JNPUq1aNV5++WX27t3LpUuX2L17N1OnTuXq1atleu+9e/dm165dbN++3SRBPZ9Wq+Wdd97hzJkzbNu2jdmzZ9O/f3+USiU2NjYMGjSI6dOns3r1ai5evMjRo0dZsmQJq1evLlN/7pUcqZIkSZKksvBqAM8shpwMsHaC7GwgvdS38fX1ZfXq1cybN49PP/2Ua9euoVarCQwMZNCgQTz33HMA1KpVi7feeov58+fzv//9j0aNGvHGG28wfvz4uz7jww8/5N1336VHjx54eHgwevRoZs6caXxdpVLx5ZdfMnHiRJ5++ml8fHwYN24cw4cPx9zcHABLS0uWLl3KJ598wsiRI8nIyMDNzY2IiIgyj1xVr16d8PBwUlJSqF+/aF2wiIgI/Pz86NevH1qtlq5du/Lqq68aX3/99ddxdHRk3rx5xMbGYmtrS506dRg+fHiZ+nOvFAaDwVAlT34Epaen07BhQ/bt21cuQ6eSJEnS/SM7O5tz587h7+9fZBrrQbRv3z6ee+45Nm7ciK+vb4U8w2Aw0KFDB5577rkidasmTJhAamoqX331VYU8+1Z3+vmV9Pe3HKmSJEmSJImNGzdiZWWFn58fFy9e5MMPP6RBgwYVFlAlJSXx22+/kZCQQM+ePSvkGZVNBlWSJEmSJJGRkcEnn3xCXFwc1apVo3nz5iWaWiyriIgIqlWrxpQpU7C3t6+w51QmGVRJkiRJkkT37t3p3r17pT3vbpspz5gxo5J6Un7k6j9JkiRJkqRyIIMqSZIkSZKkciCDKkmSJEmSpHIggypJkiRJkqRyIIMqSZIkSZKkciCDKkmSJEmSpHIgSypIkiRJ0iNu3rx5/Pnnn8TExGBhYUF4eDhjx46lRo0a7N69mwEDBtzx+sWLF9O0adNK6u39SwZVkiRJkvSI27NnD/369SM0NBS9Xs///vc/Bg8ezG+//UZ4eDjR0dHGth9++CH
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 640x480 with 1 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Garage Type\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 57,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"Garage Type\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Heating\n",
"\n",
"Most of the houses have gas. The variable is not helpful."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 58,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGwCAYAAACAZ5AeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOydd3yNd/vH3yc7kYEkMmQgIUaQWCFia7VaVXRYbanuQQctndp6Slt9Ho+2v9KFGlWPVRRttWqLPWsLgiAJInue3x+Xc05OEkSEGNf79Tovub/ne9/39z5a55Prur6fy2A0Go0oiqIoiqIo14RNRS9AURRFURTldkBFlaIoiqIoSjmgokpRFEVRFKUcUFGlKIqiKIpSDqioUhRFURRFKQdUVCmKoiiKopQDKqoURVEURVHKAbuKXsCdREFBAWfOnKFSpUoYDIaKXo6iKIqiKKXAaDSSnp5OtWrVsLG5dDxKRdUN5MyZM7Rr166il6EoiqIoShlYsWIFvr6+l3xfRdUNpFKlSoD8pbi6ulbwahRFURRFKQ1paWm0a9fO/D1+KVRU3UBMKT9XV1cVVYqiKIpyi3Gl0h0tVFcURVEURSkHVFQpiqIoiqKUAyqqFEVRFEVRygGtqVIURVGUW4CCggJycnIqehm3Jfb29tja2l7zdVRUKYqiKMpNTk5ODnFxcRQUFFT0Um5bKleujK+v7zX5SKqoUhRFUZSbGKPRSEJCAra2tgQGBl7WfFK5eoxGIxkZGZw5cwYAPz+/Ml9LRZWiKIqi3MTk5eWRkZGBv78/Li4uFb2c2xJnZ2dATLqrVatW5lSgyl1FURRFuYnJz88HwMHBoYJXcntjEqy5ubllvoaKKkVRFEW5BdCesdeX8vh8VVQpiqIoiqKUAyqqFEVRFEVRygEVVYqiKMqdTUE+HFsPG76F3fMg9VRFr+i25rHHHuNf//pXRS/juqCiSlEURbmzObIKJt0Li4fC/wbA7+9BTnpFr+q6MXz4cF544YVi47GxsYSFhXHhwoVyuc+lrvfFF18wZMiQcrnHzYaKKkVRFOXOZtt0MBYy1dz5MyQfrLj13OZUrlwZV1fXil7GdUFFlaIoinJn4+BufWywARu1cdy0aRN9+/alUaNGtGvXjlGjRpGRkWF+f/78+fTs2ZPIyEhat27N66+/TnJyMgDHjx/n8ccfB6B58+aEhYUxfPhwoHj6r2PHjkyYMIERI0YQGRlJ+/bt+fnnn63WsmXLFrp3707Dhg3p2bMny5YtIywsjD179lzvj+GqUFGlKIqi3NlE9AZHN8txm6HgWbt052aeg3PHIL/s3kY3I8eOHePpp5/m7rvvZsGCBfznP/9h8+bNfPTRR+Y5eXl5DBkyhAULFvDVV19x4sQJs3Dy8/Pjiy++AGDp0qWsXr2at99++5L3mzRpEuHh4cyfP5++ffsycuRIDh8+DEBaWhrPP/88derUYd68eQwZMoTPPvvsOj592VEpriiKotzZBDSHp/6EM3ugkhf4Nga7UhhtHlkDi16F5AMQ0Q/aDoMqwdd/veXA33//TWRkpNWYyWQUYOLEiXTr1o0BAwYAUKNGDd5++20ee+wxRo4ciaOjIw899JB5fmBgIG+//TYPPfQQ6enpVKpUCQ8PDwA8PT1xdy8SDSxC27Zt6devHwBPP/00kydPJjY2llq1arFw4UIARo0ahaOjI6GhoZw5c4Z33nnnmj+H8kZFlaIoiqJ4h8mrtKScgP89DulJcrx1KniGQMyr12d95UxUVBQjR460Gtu+fTvDhg0DYO/evezbt88saEB65BUUFHD8+HFCQkLYtWsXX375JXv37iUlJQWj0QhAQkICoaGhV7WesDDLZ28wGPDy8jKnEuPi4ggLC8PR0dE8p2HDhld1/RuFiipFURRFuVrSTlsElYmDf94yosrZ2ZngYOuo2qlTFiuJjIwMevfuzWOPPVbsXD8/PzIyMhg0aBAxMTGMHTuWKlWqkJCQwKBBg8rU5sXOzlqOGAwGs0i7lVBRpSiKoihXi0cAeARByjHLWL1uFbeecqZ+/focPHiwmPAysX//fs6fP8/QoUPx8/MDYNeuXVZz7O3tAeu0YlmoWbMmCxYsICcnx9z/cOfOndd0zeuFFqoriqIoytXiWg0emQK1u4CbH3R4G+p3r+hVlRtPP/00W7du5cMPP2TPnj0cOXKEZcuW8eGHHwLg7++Pvb09U6dOJT4+nj///JP/+7//s7pG9erVMRgM/P3335w9e5b09LJ5f3Xr1g2j0ci7777LoUOHWLVqFT/88ANw8/VDVFGlKIqiKGWhehN49Ed4bg20ewPcfCt6ReVG3bp1mTp1KkeOHKFv37706NGD8ePHU61aNQCqVq3KmDFjWLp0KV27duXbb7/lzTfftLqGj48PL7/8Mp9//jnR0dFWOwevBldXV77++mv27NlD9+7d+c9//sOLL74IYI5c3SwYjLdi0vIWJS0tjaZNm7J58+bb1vhMURRFKV+ysrKIi4ujZs2aODk5VfRybgoWLFjAW2+9xaZNm8rtM7nc51za72+tqVIURVEU5aZm/vz5BAQE4OPjw759+xg7diz33HPPTScyVVQpiqIoinJTk5iYyPjx40lMTMTb25t77rmHV1+9+XZaqqhSFEVRFOWm5umnn+bpp5+u6GVcES1UVxRFURRFKQdUVCmKoiiKopQDKqoURVEURVHKARVViqIoiqIo5YCKKkVRFEVRlHJARZWiKIqiKDecxx57jH/9618VvYxyRS0VFEVRFEW5LgwfPpx58+YVG//9998rYDXXHxVViqIoiqJcN9q0acPo0aOtxqpWrXpD7p2Tk3ND+wNq+k9RFEVR7hCyc/M5lZJFdm7+Dbung4MD3t7eVi9bW9ti81JSUnjjjTdo3rw5jRs35qmnnuLIkSNWc3777Tfuu+8+wsPD6dixIz/88IPV+x07duSrr77ijTfeoEmTJrz33nvX89GKUaGiqmPHjoSFhRV7ffDBBwBkZ2fzwQcfEBUVRWRkJC+//DJJSUlW1zh58iTPPPMMjRs3plWrVnzyySfk5eVZzYmNjaVHjx6Eh4dz1113MXfu3GJrmT59Oh07dqRhw4Y8/PDD7Nixw+r90qxFURRFUW5W9p9O5ZVZ2+j4+d+8Mmsb+0+nVvSSrBg+fDi7du3i66+/5ueff8ZoNPLMM8+Qm5sLwK5du3jllVfo2rUrCxcu5KWXXuK///1vse/0H374gbp16zJ//nxeeOGFG/oMFSqqZs+ezerVq82vSZMmAXDPPfcA8PHHH7N8+XLGjRvH1KlTOXPmDC+99JL5/Pz8fJ599llyc3OZOXMmY8aMYd68eYwfP948Jz4+nmeffZaoqCh++eUXnnjiCd555x1WrVplnrN48WJGjx7Niy++yLx586hbty6DBg0iOTnZPOdKa1EURVGUm5Xs3Hz+s2w/S3aeIiMnnyU7TzFu2f4bErH6+++/iYyMNL8GDx5cbM6RI0f466+/GDVqFM2aNaNu3bqMHTuW06dPs2zZMgAmTZpEq1atePHFF6lZsyY9e/akX79+fP/991bXatmyJU8++SRBQUEEBQVd9+crTIWKqqpVq1qFA5cvX05QUBAtWrQgNTWVOXPmMHz4cFq1akV4eDgff/wxW7duZdu2bQCsXr2agwcP8tlnn1GvXj3atWvHkCFDmD59Ojk5OQDMnDmTgIAAhg8fTkhICP3796dLly5MnjzZvI5JkybxyCOP0KtXL0JDQ/nggw9wcnJizpw5AKVai6IoiqLcrJzLyGXFvkSrsb/3JXIuI/e63zsqKor58+ebX++8806xOYcOHcLOzo7GjRubx6pUqULNmjU5dOgQAIcPH6ZJkyZW5zVp0oSjR4+Sn28Rh+Hh4dfpSa7MTVNTlZOTw4IFC+jVqxcGg4Fdu3aRm5tLdHS0eU5ISAj+/v5mIbNt2zbq1KmDl5eXeU5MTAxpaWkcPHjQPKdVq1ZW94qJiTFfIycnh927d1vdx8bGhujoaLZu3QpQqrUoiqIoys1KFRd72oV5W421D/Omiov9db+3s7MzwcHB5le
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 640x480 with 1 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Heating\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 59,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"Heating\"]\n",
"# Also discard the associated ordinal variable.\n",
"del df[\"Heating QC\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### House Style\n",
"\n",
"In summary, this variable is very similar to the above derived variable *has 2nd Flr*. Therefore, it is dropped."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 60,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGwCAYAAACAZ5AeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3gUVdvA4d/W7G567z2QAAFCQieAIk1sIIK+UhSxAAKiiCjqKwgfoKIvUhSUKqCINBEBBRUF6b2XQEghIb1vkq3fH4GFJQESSKGc+7q4ZM6cmXl2g5tnz5x5jsRsNpsRBEEQBEEQ7oi0rgMQBEEQBEG4H4ikShAEQRAEoRqIpEoQBEEQBKEaiKRKEARBEAShGoikShAEQRAEoRqIpEoQBEEQBKEaiKRKEARBEAShGsjrOoAHiclkIj09HVtbWyQSSV2HIwiCIAhCJZjNZoqKivDw8EAqvfF4lEiqalF6ejodO3as6zAEQRAEQbgNf//9N15eXjfcL5KqWmRrawuU/VDs7OzqOBpBEARBECqjsLCQjh07Wn6P34hIqmrRlVt+dnZ2IqkSBEEQhHvMrabuiInqgiAIgiAI1UAkVYIgCIIgCNVAJFWCIAiCIAjVQMypEgRBEIS7hMlkQqfT1XUYDxyFQoFMJrvj84ikShAEQRDuAjqdjvj4eEwmU12H8kBycnLCy8vrjupIiqRKEARBEOqY2WwmNTUVmUyGv7//TQtMCtXLbDaj1WpJT08HwNvb+7bPJZIqQRAEQahjBoMBrVaLj48PGo2mrsN54KjVaqCsSLeHh8dt3woUqbAgCIIg1DGj0QiAUqms40geXFeSWb1ef9vnEEmVIAiCINwlxLqwdac63nuRVAmCIAiCIFQDkVQJgiAIgiBUAzFRXRAEQXigmY1Gig8fpuTUKWTOzmhiYlB4eNR1WEIlvfvuu+Tn5/PVV1/VdShipEoQBEF4sGn37CGh/wDSPp5IyptvkT7tc4za4roOq5x3332XYcOGlWvfvXs34eHh5Ofn10FUlbdnzx4GDhxIy5Ytadq0KV27dmXs2LGWYqerV6+mefPmdRzlnRFJlSAIgvBAy129Bq4puJm/bh26+Pg6jOj+ExcXx8svv0xkZCRLly7ll19+4YMPPkChUNxXxU5FUiUIgiA80KT2dtc1SJEo7u3ZMb/99huPPfYYkZGRdOrUiQULFljtDw8PZ8uWLVZtzZs3Z/Xq1UBZdfePP/6Y2NhYGjduzMMPP8zcuXMtffPz83n//fdp3bo10dHRDBw4kFOnTt0wnu3bt+Pm5sY777xD/fr1CQgIoEOHDkyaNAmVSsXu3bt57733KCgoIDw8nPDwcGbOnMmsWbN4/PHHy53vqaeeYvr06RVey2QyMXfuXDp16kSTJk148skn2bRpU2Xfujtyb/+rEQRBEIQ75PjkU+T/vA5TUREAbkNewyYoqFLHGnJzMRUVofDwQKJQ1GCUlXfs2DFGjRrF8OHD6dGjBwcPHmTChAk4OTnx9NNPV+ocS5Ys4c8//2T69Ol4e3uTmprKpUuXLPvfeOMNbGxs+Pbbb7G3t+fHH3/khRde4LfffsPJyanc+dzd3cnIyGDv3r20aNGi3P5mzZoxbtw4ZsyYYUmANBoNBQUFzJ49myNHjtCkSRMATpw4wenTp5k1a1aFsc+dO5d169YxYcIEgoKC2Lt3L2PGjMHFxYWWLVtW6vXfLpFUCYIgCA80TVRTglb8SGlcHDJnZ1SNGiGpRBHOor17uTR+Arr4eBx79cJt6FCUfr41GuvWrVtp1qyZVduVwqFXLFy4kDZt2vD6668DEBwcTFxcHPPnz690UpWamkpgYCAxMTFIJBJ8fa++rn379nHkyBF27txpKVY6duxYtmzZwm+//cazzz5b7nzdu3dn+/bt9O/fH3d3d5o2bUqbNm3o2bMndnZ2KJVK7O3tkUgkuLu7W46ztbUlNjaW1atXW5Kq1atX06JFC/z9/ctdR6fTMXfuXBYuXGh5n/z9/dm/fz8//vijSKoEQRAEoabZhIZiExpa6f76S5e4+MYojNnZAOStWoUyKBC3V16pqRABaNWqFePHj7dqO3z4MGPGjLFsnz9/nkceecSqT3R0NN999x1Go7FSS7D06tWLl156ie7du9O+fXseeughYmNjATh9+jRarZZWrVpZHVNSUkJiYmKF55PJZEyZMoVRo0axc+dOjhw5wpw5c/j222/56aef8LjJ05Z9+/Zl3LhxvPfee0gkEn755Rfee++9CvsmJCRQXFzMSy+9ZNWu1+tp0KDBLV/3nRJJlSAIgiBUkSEjw5JQXVG4/d8aT6rUajWBgYFWbdfelqssiUSC2Wy2ajMYDJa/N2rUiD/++IN//vmHHTt2MGrUKNq2bcuMGTMoKirC3d2dJUuWlDuvvb39Ta/r6elJz5496dmzJ2+88QbdunVj+fLljBw58obHPPzwwyiVSjZv3oxCocBgMNC9e/cK+2q1WqDsFqCnp6fVvtpYAkgkVYIgCIJQRQpvb+S+vhguXrS0OXTpXIcRXRUSEsKBAwes2g4cOEBQUJBllMrFxYX09HTL/gsXLlBcbF1Gws7Ojh49etCjRw+6devGyy+/TG5uLo0aNSIzMxOZTIafn99tx+no6Ii7u7vlugqFotytTAC5XE7Pnj1ZvXo1CoWCxx57DJVKVeE5Q0NDUSqVpKSk1PitvoqIpEoQBEEQqkju5obfl9PJmDmL0pMncXruWey7dq3rsAB46aWXeOaZZ5g9ezY9evTg0KFDLFu2jI8++sjSp3Xr1ixbtoxmzZphNBqZNm0aimsm2i9cuBB3d3caNGiAVCpl06ZNuLu74+DgQNu2bYmKiuL1119nzJgxBAUFkZ6ezt9//03nzp1p3LhxuZiWL1/OyZMn6dKlCwEBAZSWlrJ27Vri4uL48MMPAfD19UWr1bJz507Cw8NRq9Wo1WoA+vTpQ48ePQD44Ycfbvja7ezseOmll5gyZQpms5mYmBgKCgo4cOAAdnZ29OrVq1re4xsRSZUgCIIg3AZ1ZCR+M77EpNUid3au63AsGjVqxPTp05kxYwZff/017u7ujBw50mqS+tixYxk3bhz9+vXDw8ODcePGcfz4cct+W1tb5s2bR0JCAlKplMaNG/PNN98glZZVYvrmm2+YPn067733Hjk5Obi5udG8eXPc3NwqjKlJkybs37+fjz76iPT0dDQaDfXq1WP27NmWEaXo6Giee+45Ro0aRW5uLsOHD2fEiBEABAUF0axZM/Ly8mjatOlNX/+oUaNwcXFh7ty5JCcnY29vT8OGDRkyZMgdva+VITFff1NVqDGFhYXExMSwf/9+7Ozsbn2AIAiC8EAoKSkhPj6e4ODgG97aepCZzWa6du3K888/z6BBg2rkGjf7GVT297cYqRIEQRAE4a6VnZ3Nr7/+SmZmZqVLQtQVkVQJgiAIgnDXatOmDc7Oznz88cc4OjrWdTg3JZIqQRAEQRDuWqdPn67rECpNrP0nCIIgCIJQDURSJQiCIAiCUA1EUiUIgiAIglANRFIlCIIgCIJQDURSJQiCIAiCUA1EUiUIgiAIglANRFIlCIIgCMJt27t3L0OGDCE2Npbw8HC2bNlitT8pKYnRo0cTGxtL48aN6dChA0OHDuXcuXMAJCcnEx4ezsmTJ+si/Gol6lQJgiAIgnDbtFot4eHh9O7dm+HDh1vt0+v1vPTSSwQHBzNr1izc3d25dOkS//zzDwUFBdUei16vt1oYuraJkSpBEARBuI+U6o1cyiuhVG+slet17NiRN998ky5dupTbFxcXR2JiIh999BFRUVH4+voSExPDm2++SVRUFACPPPIIAD179iQ8PJwBAwYAYDKZmDVrFh06dCAyMpKnnnqKf/75x3LuKyNcGzZsoH///jRu3JgVK1YQHR3Npk2brOLYsmULUVFRFBYW1tC7UKZOk6pOnToRHh5e7s+ECRMAKC0tZcKECbRq1YpmzZoxYsQIMjMzrc6RkpLCq6++StOmTWnTpg2ffPIJBoPBqs/u3bvp1asXkZGRdOnShdWrV5eLZdmyZXT
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 640x480 with 1 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"House Style\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 61,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"House Style\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Land Contour\n",
"\n",
"This variable is assumed to contain the same information as the ordinal variable *Land Slope* and is dropped."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 62,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGwCAYAAACAZ5AeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hU1dbA4d/UZJJJ7z0hIQVCCDWUIIig2FHsIooo2EA/G9iuoCio6OXi9Qo2QEURBbGBBUUF6b2FkoRAeu+ZZOr3x8jAGEqAQCjrfR4eM/vsc84KkZmVffZeW2Gz2WwIIYQQQojTomzrAIQQQgghLgSSVAkhhBBCtAJJqoQQQgghWoEkVUIIIYQQrUCSKiGEEEKIViBJlRBCCCFEK5CkSgghhBCiFajbOoCLidVqpaSkBHd3dxQKRVuHI4QQQogWsNls1NfXExgYiFJ57PEoSarOopKSEvr379/WYQghhBDiFPzxxx8EBwcf87gkVWeRu7s7YP+h6PX6No5GCCGEEC1RV1dH//79HZ/jxyJJ1Vl06JGfXq+XpEoIIYQ4z5xo6o5MVBdCCCGEaAWSVAkhhBBCtAJJqoQQQgghWoHMqRJCCCHOIovFgslkauswxBE0Gg0qleq0ryNJlRBCCHEW2Gw2ioqKqKqqautQxFF4e3sTHBx8WnUkJakSQgghzoJDCVVgYCBubm5SBPocYbPZaGhooKSkBICQkJBTvpYkVUIIIcQZZrFYHAmVn59fW4cj/kGn0wH2It2BgYGn/ChQJqoLIYQQZ9ihOVRubm5tHIk4lkM/m9OZ7yZJlRBCCHGWyCO/c1dr/GwkqRJCCCGEaAWSVAkhhBBCtAKZqC6EEOKiZrHa2Hywkl2FNfi5a+kR7Uugp2tbh3VWTJgwgZqaGv73v/+1dSgXBEmqhBBCXNTWZJdz14drsdrsr4emhvHqjcm4ac/eR+S5nNzYbDYWLFjAV199RWZmJiqVisjISK677jpuvfVWx8q507Vo0SJeffVVNmzY0CrXawvy+E8IIcRF7cuNeY6ECmDxlnyyS+vbLqBzzFNPPcWrr77KZZddxty5c1m8eDEPPfQQv/76K3/99Vdbh3dKjEbjGbmuJFVCCCEuah4uzjWJlApQq86tVXqzZ8/m2muvJTU1lf79+zNx4kTq6w8nfosWLaJ79+6sWLGCK6+8ki5dujBq1ChHQUuw18qaMmUK3bt3Jy0tjddffx2bzXa02zksWbKE7777jjfffJMHHniAlJQUwsPDGTRoEB9//DFpaWkAWK1W/vvf/3LJJZeQnJzM9ddfz59//um4Tl5eHgkJCfz888/cdddddO7cmeuuu47NmzcDsHbtWp555hlqa2tJSEggISGBt99+G4Dq6mqefvppevToQefOnbnvvvvIyclxXPvtt9/m+uuvd4p7zpw5DBw40PF6woQJPPTQQ7z77rukp6czZMiQk/wJtIwkVUIIIS5qN3YNR+9y+FHfI5fG0c5f36JzqxqM5FY0YLJYz1R4gH25/3PPPcf333/P1KlTWbNmDW+88YZTn8bGRj766CNef/11Pv30UwoLC3nttdccxz/66CO+/vprXn31VT777DOqq6v55Zdfjnvf7777jpiYGAYNGnTUmDw8PAD4+OOPmT17NuPHj+fbb78lPT2dhx56yCn5Afj3v//NqFGjWLx4MdHR0TzxxBOYzWa6dOnCs88+i16vZ+XKlaxcuZJ7770XsCdEO3bs4N133+WLL77AZrMxevTok64ntXr1avbv38/s2bOZNWvWSZ3bUjKnSgghxEWtS6QPix/uw97iWnzdXUgO9UKrPvGYw9rscp77egfZZXXc1C2csQPbE+F7Zop73nPPPY6vw8PDeeyxx3jxxReZOHGio91kMjFp0iQiIyMBuPPOO53maM2dO5fRo0dz+eWXAzBp0iRWrlx53PseOHCAmJiYE8b34Ycfcv/993P11VcD9keGa9euZe7cubz44ouOfvfeey8DBgwAYNy4cVx99dUcOHCA2NhYPDw8UCgUBAQEOPrn5OTw22+/8fnnn9O1a1cApk2bxoABA1i2bBlXXnnlCWM7xM3NjcmTJ6PValt8zsmSpEoIIcRFLy7Qg7hAjxb3L6w28NC8TZTX2+fmLNiQR4y/ngcHxJ6R+FatWsWsWbPIzs6mrq4Oi8VCU1MTBoPBMVFcp9M5EiqAwMBAysvLAaitraW0tJTOnTs7jqvVapKTk4/7CPBEjwcB6urqKCkpcSQ9h3Tt2pXdu3c7tSUkJDi+PpQ8VVRUEBt79L+3rKws1Gq1U9w+Pj7ExMSQlZV1wtiOFB8ff0YTKpDHf0IIIcRJK6lpciRUh/y5t+QYvU9PXl4eY8aMccwzWrRoEf/6178A5y1V1GrncRKFQtGipOh4oqOjyc7OPq1rHEmj0Ti+PlTB3Go9vUenR/s+zWZzs36ttUrxeCSpEkIIIU5SmLcr4T7OH9JXdAw5I/fauXMnNpuNCRMmkJqaSkxMjNME9Jbw8PAgICCArVu3OtrMZjM7d+487nnXXnstOTk5LFu2rNkxm81GbW0ter2ewMBANm3a5HR806ZNxMXFtThGjUaDxWJxaouNjcVsNjvFXVlZyf79+x3X9vX1payszCmxysjIaPF9W5MkVUIIIcRJ8vdw5X93dmVgQiBBni48PjieKzsFn9Y1a2trycjIcPpTWFhIVFQUJpOJTz75hNzcXBYvXsz8+fNP+vojRozg/fffZ9myZWRlZTFp0iRqamqOe86VV17JVVddxRNPPMHMmTPZvn07+fn5LF++nHvuuYe1a9cCMGrUKN5//32WLFlCdnY206ZNY/fu3YwYMaLF8YWFhdHQ0MDq1aupqKjAYDAQHR3NZZddxgsvvMCGDRvYvXs3Tz31FEFBQVx22WUApKWlUVFRwfvvv8/BgweZN28eK1asOOm/n9Ygc6qEEEKIU5AS7s27d3WlvsmMr7vLaV9v3bp1DB061Kntpptu4pVXXuGZZ57h/fff56233qJ79+48/vjjjB8//qSuf++991JaWsr48eNRKpUMGzaMwYMHU1tbe8xzFAoFb775Jl988QULFy5k5syZqFQqoqKiGDp0KOnp6YA9Yaurq2Pq1KmOOVL/+9//iI6ObnF8Xbt25bbbbuOxxx6jqqqKRx55hLFjxzJlyhReeeUVHnjgAUwmE927d+e9995zPEqMjY3lxRdfZNasWbz77rtcfvnl3HvvvSxYsOCk/n5ag8J2ug9cRYvV1dXRrVs3Nm7ciF7fsuW6Qgghzn+NjY3s37+fmJgYXF0vji1wzjfH+xm19PNbHv8JIYQQQrQCSaqEEEIIIVqBJFVCCCGEEK1AkiohhBBCiFYgSZUQQgghRCuQpEoIIYQQohVIUiWEEEII0QokqRJCCCGEaAWSVAkhhBDinHLXXXfxyiuvtHUYJ02SKiGEEEK0ugkTJvDQQw+1dRhnlSRVQgghhBCtQJIqIYQQ4iLQZLJQVN1Ik8nSpnE88cQTPPbYY05tJpOJtLQ0Fi9e3CYxtZY2TaoGDhxIQkJCsz+TJk0CoKmpiUmTJpGWlkaXLl0YO3YsZWVlTtcoKChg9OjRdO7cmd69e/Paa69hNpud+qxdu5YbbriB5ORkBg8ezKJFi5rFMm/ePAYOHEinTp24+eab2bZtm9PxlsQihBBCnIv2Ftfy2IItDHzzdx5bsIW9xbVtFsu1117L8uXLqa+vd7StXLmSxsZGBg0a1GZxtYY2Taq++uorVq5c6fgze/ZsAIYMGQLAq6++yvLly5k+fTqffPIJJSUlPPLII47zLRYLY8aMwWQyMX/+fKZOncrXX3/NjBkzHH1yc3MZM2YMaWlpfPPNN9x99908//zzrFixwtFnyZIlTJkyhYcffpivv/6axMRERo0aRXl5uaPPiWIRQgghzkVNJgv/XraXpduLaDBaWLq9iOnL9rbZiFV6ejo6nY5ffvnF0fb9998zcOBA9Hp9m8TUWto0qfL19SUgIMDxZ/ny5URGRtKzZ09qa2tZuHAhEyZMoHfv3iQnJ/Pqq6+yefNmtmzZAtgz28z
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 640x480 with 1 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Land Contour\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 63,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"Land Contour\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Lot Configuration\n",
"\n",
"This variable shows no good pattern and is dropped."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 64,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGwCAYAAACAZ5AeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3gUVdvA4d+2ZJNseu+9kRAIoYYAiryCKEoTG6KIggqKhWZBQVFA8f14rWCjKIJIUxFQUUCQ0EuogZCQQnrPpm39/lhYWEINgQQ493XlMnPmzMyziew+OXPmORKj0WhEEARBEARBuCbS5g5AEARBEAThViCSKkEQBEEQhCYgkipBEARBEIQmIJIqQRAEQRCEJiCSKkEQBEEQhCYgkipBEARBEIQmIJIqQRAEQRCEJiBv7gBuJwaDgcLCQuzs7JBIJM0djiAIgiAIV8BoNFJdXY2HhwdS6cXHo0RSdQMVFhbSo0eP5g5DEARBEIRG2LRpE15eXhfdL5KqG8jOzg4w/VJUKlUzRyMIgiAIwpVQq9X06NHD/Dl+MSKpuoHO3PJTqVQiqRIEQRCEm8zlpu6IieqCIAiCIAhNQCRVgiAIgiAITUAkVYIgCIIgCE1AzKkSBEEQhOtMr9ej1WqbOwzhIhQKBTKZ7JrPI5IqQRAEQbhOjEYj+fn5lJeXN3cowmU4OTnh5eV1TXUkRVIlCIIgCNfJmYTKw8MDW1tbUfi5BTIajdTU1FBYWAiAt7d3o88lkipBEARBuA70er05oXJ1dW3ucIRLsLGxAUxFuj08PBp9K1BMVBcEQRCE6+DMHCpbW9tmjkS4Emd+T9cy900kVYIgCIJwHYlbfjeHpvg9iaRKEARBEAShCYikShAEQRAEoQmIpEoQBEG4vRn0kLUNdnwFh1ZCVX5zR3Rbqa2t5YUXXqBdu3ZERkZSWVlJz549mT9/fnOHdtXE03+CIAjC7e3kZvhuABgNpu3WD0G//wMru+aN6wImTZpEZWUln3/+eaOOf/zxx4mKiuKNN964bN/MzEzmzJnDv//+S2lpKR4eHrRt25bhw4fTunXrRl3/QlauXMmuXbtYsmQJzs7O2Nvbs2zZMvMTeTcTMVIlCIIg3N72LTqbUAEc+BFK0povnhbgwIEDDBw4kIyMDN555x3WrFnDZ599RkhICDNnzmzSa2VnZxMaGkpERATu7u5IJBJcXFxEUiUIgiAINx0rB8ttiRSkN+eNnB07djB48GBiY2NJSkpi1qxZ6HQ6wDTKtWPHDhYuXEhkZCSRkZHk5OQ0OIfRaOS1114jMDCQH374gTvuuIOAgACio6MZM2aMxShZamoqw4YNIy4ujk6dOjF58mSqq6vN+ydNmsTzzz/PN998Q1JSEp06dWLq1KnmsgWPP/443377LTt37iQyMpLHH38coMHtvxMnTvDII4/QunVr+vbty9atW4mMjGT9+vXX48fYaDfn/zWCIAiC0FTaPmwanaqvMm13Gweu4Vd2bG0Z1FWBgzfIFNcvxitQUFDAyJEjGTBgADNnziQjI4M333wTa2trXnjhBd544w1OnjxJeHg4L774IgAuLi4NznPkyBGOHz/ORx99hFTacOzFwcGUhNbU1DBixAji4+NZtmwZJSUlvPnmm7z77rvMmDHD3H/79u24u7uzYMECsrKyePnll4mOjmbIkCF88sknfPTRRxw/fpxPPvkEhaLhz1Cv1zN69Gh8fHz46aefUKvVTT5a1lREUiUIgiDc3vw6wNN/QeERsHMDrzYgt7r8cSf/hdUvQ8lxaPsYdB8PzoHXP96L+OGHH/Dy8uKtt95CIpEQGhpKQUEBs2bNYvTo0djb26NQKFAqlbi7u1/0PCdPngQgJCTkktdbvXo1Go2GmTNnmgtnvvXWWzz77LOMGzcONzc3ABwdHXnrrbeQyWSEhobSo0cPkpOTGTJkCE5OTiiVShQKxUVj+vfff8nOzua7774z93n55ZcZPnz41f6Irjtx+08QBEEQ3CMhpj8EJYHS/vL9K07BT8OgONU0H2vvd3BoxXUP81JOnDhBfHy8RRHLhIQEampqyM9v+icaT5w4QWRkpEXF+Hbt2mEwGMjIyDC3hYWFWSz74u7uTklJyRVfJyMjAy8vL4ukKy4u7hqjvz5EUiUIgiAIV0tdANXFlm1pfzVPLE0sKCgIgPT09CY5n1xueVNMIpFgNBqb5NwtjUiqBEEQBOFqOfqBY4BlW3S/5onltNDQUPbu3WuRsOzevRs7Ozu8vLwAUCgUGAyGi50CgOjoaMLCwvj2228v2LeystJ8vdTUVGpqasz79uzZg1QqJTg4uCleEgDBwcHk5+dTXHw2iT1w4ECTnb8piaRKEARBEK6WygOGLIDw3mDvDXe+Aa0euCGXrqqq4siRIxZfeXl5PProo+Tn5/Puu+9y4sQJ1q9fzyeffMLw4cPNE859fX3Zv38/OTk5lJaWXjBpkkgkTJ8+nZMnT/Loo4+yadMmsrOzOXr0KF988QXPP/88AP369cPKyopJkyZx7Ngxtm3bxrvvvssDDzxgnk/VFLp27Yq/vz8TJ07k6NGj7N69m9mzZzfZ+ZuSmKguCIIgCI3h2w4eWgj11WDnesMuu2PHDvr372/RNnjwYN577z2+/PJLPvjgA5YuXYqTkxODBw/mueeeM/d76qmnmDRpEvfeey91dXX89ddf+Pn5NbhGXFwcy5cvZ86cObz55puUlZXh4eFBfHw8r7/+OgA2NjZ88803vPfeewwePBgbGxvuvvtuJk2a1KSvVyaT8dlnn/Hmm28yePBg/P39mTBhAs8++yzW1tZNeq1rJTHeqjc2WyC1Wk1CQgK7d+9GpVI1dziCIAjCdVRXV0dGRgbBwcEolcrmDueWsnv3bh599FH+/PNPAgICLn/AFbjU7+tKP7/FSJUgCIIgCC3an3/+ia2tLYGBgWRlZfHee+/Rrl27JkuomopIqgRBEARBaNGqq6uZNWsWubm5ODs7k5iYyMSJE5s7rAZEUiUIgiAIQovWv3//BvPIWiLx9J8gCIIgCEITEEmVIAiCIAhCExBJlSAIgiAIQhMQSZUgCIIgCEITEEmVIAiCIAhCExBJlSAIgiAI103Pnj2ZP3/+JftERkayfv36GxPQdSRKKgiCIAiCYGHSpElUVlby+eefX/O5li1bho2NTRNE1fKJpEoQBEEQhOvGxcWluUO4YcTtP0EQBEFo4eq1evIr6qjX6m/4tR9//HGmTZvGBx98QMeOHenatSuffPKJeb/RaOSTTz7hjjvuIDY2lqSkJKZNm2bef/7tv5MnT/LYY4/RunVr+vbty7///tvgmnl5eYwdO5b27dvTsWNHnnvuOXJycq7r62wKzZpU9ezZk8jIyAZfU6dOBaC+vp6pU6fSqVMn4uPjeeGFFyguLrY4R25uLiNHjqRNmzZ06dKFmTNnotPpLPps376dAQMGEBsby3/+8x9WrFjRIJZFixbRs2dPWrduzYMPPkhKSorF/iuJRRAEQRCa2rGCKl5auo+eH23kpaX7OFZQdcNjWLlyJba2tixdupTx48fz2WefmZOh33//nfnz5zN16lT++OMPPv/8cyIiIi54HoPBwAsvvIBCoeCnn35i6tSpzJo1y6KPVqtlxIgR2NnZsWjRIhYvXoytrS1PP/00Go3mur/Wa9GsSdWyZcvYsmWL+WvevHkA9OnTB4D333+fDRs2MHv2bL777jsKCwsZM2aM+Xi9Xs+oUaPQarUsWbKEGTNmsHLlSj7++GNzn+zsbEaNGkWnTp34+eefeeKJJ3jzzTfZvHmzuc+aNWuYPn06o0ePZuXKlURFRTFixAhKSkrMfS4XiyAIgiA0tXqtnv9bf4y1B/Kp0ehZeyCf2euP3fARq8jISMaMGUNQUBD9+/cnNjaW5ORkwDSq5ObmRmJiIj4+PsTFxTFkyJALnmfr1q2kp6czc+ZMoqKi6NChAy+//LJFnzVr1mAwGHjvvfeIjIwkNDSU6dOnk5eXx44dO677a70WzZpUubi44O7ubv7asGEDAQEBdOzYkaqqKpYvX86kSZPo0qULsbG
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 640x480 with 1 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Lot Config\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 65,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"Lot Config\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### MS SubClass\n",
"\n",
"By looking at this variable's realizations, one can see that several distinct features are lumped together in one. In particular, the above variables *has 2nd Flr* and *build_type_\\** and the future age related features at the bottom of this notebook together should comprise the same patterns in a more advantagous way. Thus, the column is dropped."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 66,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['1-STORY 1946 & NEWER ALL STYLES',\n",
" '1-STORY 1945 & OLDER',\n",
" '1-STORY W/FINISHED ATTIC ALL AGES',\n",
" '1-1/2 STORY - UNFINISHED ALL AGES',\n",
" '1-1/2 STORY FINISHED ALL AGES',\n",
" '2-STORY 1946 & NEWER',\n",
" '2-STORY 1945 & OLDER',\n",
" '2-1/2 STORY ALL AGES',\n",
" 'SPLIT OR MULTI-LEVEL',\n",
" 'SPLIT FOYER',\n",
" 'DUPLEX - ALL STYLES AND AGES',\n",
" '1-STORY PUD (Planned Unit Development) - 1946 & NEWER',\n",
" '1-1/2 STORY PUD - ALL AGES',\n",
" '2-STORY PUD - 1946 & NEWER',\n",
" 'PUD - MULTILEVEL - INCL SPLIT LEV/FOYER',\n",
" '2 FAMILY CONVERSION - ALL STYLES AND AGES']"
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 66,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(ALL_COLUMNS[\"MS SubClass\"][\"lookups\"].values())"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 67,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"MS SubClass\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### MS Zoning\n",
"\n",
"This variable is dropped as most houses are located in a \"residential\" zone."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 68,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"MS Zoning\n",
2021-05-25 08:22:14 +02:00
"RL 2252\n",
"RM 459\n",
"FV 131\n",
"RH 27\n",
"C 25\n",
"A 2\n",
"I 2\n",
"RP 0\n",
"Name: count, dtype: int64"
2021-05-25 08:22:14 +02:00
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 68,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"MS Zoning\"].value_counts()"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 69,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGwCAYAAACAZ5AeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hUVfrA8e/MZNJ7771CCAktEEIREBCkhqqsLj92wRXLroogioIgYF10dUXFFVQUkaoIiFRBmkACJIFASEJ678kkmZnM74+BC0NCC4FQzud58sice+697wRJ3jn3nPfIdDqdDkEQBEEQBOGWyNs6AEEQBEEQhPuBSKoEQRAEQRBagUiqBEEQBEEQWoFIqgRBEARBEFqBSKoEQRAEQRBagUiqBEEQBEEQWoFIqgRBEARBEFqBUVsH8CBpbGyksLAQCwsLZDJZW4cjCIIgCMIN0Ol01NTU4OzsjFx+9fEokVTdQYWFhfTp06etwxAEQRAEoQX27NmDq6vrVY+LpOoOsrCwAPR/KZaWlm0cjSAIgiAIN6K6upo+ffpIv8evRiRVd9DFR36WlpYiqRIEQRCEe8z1pu6IieqCIAiCIAitQCRVgiAIgiAIrUAkVYIgCIIgCK1AzKkSBEEQhHuYVqtFrVa3dRj3NKVSiUKhuOXriKRKEARBEO5BOp2O/Px8ysvL2zqU+4KtrS2urq63VEdSJFWCIAiCcA+6mFA5Oztjbm4uikq3kE6no7a2lsLCQgDc3NxafC2RVAmCIAjCPUar1UoJlYODQ1uHc88zMzMD9EW6nZ2dW/woUExUFwRBEIR7zMU5VObm5m0cyf3j4vfyVuaniaRKEARBEO5R4pFf62mN76VIqgRBEARBEFqBSKoEQRAEQRBagUiqBEEQhAdaY6OWnNPJJPy6iZQDe6kuK23rkIQr9OvXj+XLl7d1GNclVv8JgiAID7SspJOsfet1dLpGAMJi+/Lw359BaWraxpG13KxZs1i/fj3jx4/nzTffNDg2b948vvvuO0aNGsXixYsBKC0t5cMPP2TPnj0UFxdjY2NDaGgoTz/9NJ07d25y/ezsbPr373/V+3t4eLBz585Wez9r1qyRVujdzURSJQiCIDzQknZvlxIqgFP7dtP50VG4+AW0YVS3zs3Njc2bNzN79mxMLySI9fX1bNq0CXd3d4O+zz77LGq1msWLF+Pl5UVJSQkHDhy4amFRNzc39u3b16Q9MTGR6dOn8/jjj7fqe7G3t2/V690u4vGfIAiC8EAzNrcweC2TyZG3wpYlba1du3a4ubmxbds2qW3btm24ubkRFhYmtVVWVnLkyBFeeuklunfvjoeHBxEREUybNu2qo1EKhQInJyeDL5lMxty5cxk6dChTpkyR+qakpPDEE08QERFBdHQ0c+bMoaamRjo+a9Ysnn76ab788ktiY2OJjo5m3rx5BqUNrnz8FxISwo8//sj06dPp2LEjAwcOZMeOHQYx7tixg4EDB9KhQwf+8pe/sH79ekJCQqisrGzx9/R6RFIlCIIgPNDa9X4I48seLUWPGoe9u8cNnauqrqKisACtRnO7wrslcXFxrFu3Tnq9du1aRo8ebdDH3Nwcc3Nztm/fTkNDQ4vuo1arefbZZ3F0dGTBggVSe21tLVOmTMHGxoY1a9awZMkS9u/fz/z58w3OP3ToEJmZmaxYsYLFixezfv161q9ff817fvzxxzzyyCP89NNP9O7dm5deekkaWcvKyuL555+nf//+bNy4kQkTJvDvf/+7Re/tZoikShAEQXiguQeF8thbHzDsX7MY98YiugwfjcJIed3zspITWfX6y3z53N/Zvuy/VBQW3IFob87w4cM5evQoOTk55OTkcOzYMYYPH27Qx8jIiMWLF7Nhwwa6dOnChAkT+OCDDzh9+vQN32f+/PlkZWXxySefYGJiIrVv2rSJhoYG3n77bYKDg+nRowevv/46GzdupLi4WOpnY2PD66+/TkBAAA899BB9+vThwIED17znqFGjePTRR/Hx8eGFF16gtraWEydOAPDDDz/g5+fHzJkz8ff3Z+jQoYwaNeqG309LiTlVgiAIwgPPwcMLBw+vG+5fVVLMz/9ehKqyAoDEXduwc3Oj24ixtyvEFrG3t6dv376sX78enU5H3759m52fNGjQIPr27cuRI0dISEhg7969LFu2jAULFjQZ2brS999/z7p16/j6669xdXU1OHbu3DlCQkIMKr936tSJxsZG0tPTcXR0BCAwMNBgaxgnJyfOnDlzzfuGhIRIfzY3N8fS0pLSUv3KzfT0dMLDww36R0REXPN6rUGMVAmCIAjCTaopK5USqosyEuLbKJpru/gIcP369cTFxV21n4mJCT179mT69OmsWrWKUaNG8Z///Oea1z5y5AhvvfUWb7zxBp06dWpxjEZGhmM8MpkMnU53zXOUSsPRRJlMRmNj41V63xkiqRIEQRCEm2Tl6ISVk7NBW1B0jzaK5tp69eqFWq1Go9EQGxt7w+cFBgZSW1t71eN5eXk899xzjBs3jrFjmx+hCwgIICUlxeA6x44dQy6X4+fnd+Nv4ib5+fmRmJho0Hby5Mnbdr+LRFIlCIIgCDfJwtaO4f96Bb+orljaORAz7nGComPaOqxmKRQKtmzZwubNmw0esV1UVlbGE088wcaNGzl9+jRZWVls2bKFZcuWXXX1X319PdOnT8fFxYWpU6dSVFTU5Atg2LBhGBsbM2vWLM6cOcPBgweZP38+I0aMkB793Q7jx48nPT2dd999l/T0dDZv3ixNfL+d+yWKOVWCIAiC0AKuAUEMf/EVGlQqzK1t2jqca7K0tLzqMQsLCzp27MiKFSvIzMxEo9Hg6urK2LFjeeqpp5o95/jx4yQlJQHQp0+fZvukpKRgZmbGl19+yVtvvcWYMWMwMzNj4MCBzJo169bf1DV4eXnx4Ycf8vbbb/P1118TGRnJU089xdy5czE2Nr5t95XprvfQUmg11dXVdO7cmaNHj17zf3BBEARBuJa6ujrS09Px8/OTCnsK1/bpp5+yatUq9uzZ0+zxa31Pb/T3txipEgRBEAThvrNy5Uo6dOiAnZ0dR48e5csvv2z1Su9XEkmVIAiCIAj3nfPnz/Ppp59SUVGBu7s7kydPZtq0abf1niKpEgRBEAThvjN79mxmz559R+8pVv8JgiAIgiC0ApFUCYIgCIIgtAKRVAmCIAiCILQCkVQJgiAIgiC0ApFUCYIgCIIgtAKRVAmCIAiCILQCkVQJgiAIgnDXiI+PJywsjKlTp7Z1KDdNJFWCIAiCINw11qxZw6RJk/jzzz8pKCho63BuikiqBEEQBEFoVr1aS35FHfVq7R25X01NDZs3b2bixIn07duX9evX35H7tpY2Tar69etHSEhIk6958+YBUF9fz7x584iOjiYqKopnn32W4uJig2vk5uYydepUOnbsSI8ePXj77bfRaDQGfQ4dOsSoUaMIDw/n4YcfZt26dU1iWblyJf369aNDhw6MHTuWEydOGBy/kVgEQRAE4X5xpqCKf65OoN/7u/nn6gTOFFTd9ntu2bIFf39//P39GT58OGvXrkWn0932+7aWNk2q1qxZw759+6Svr776CoDBgwcDsHDhQnbt2sWSJUv45ptvKCws5JlnnpHO12q1TJs2DbVazapVq1i8eDHr16/no48+kvpkZWUxbdo0oqOj2bhxI08++SSvvfYae/fulfps3ryZRYsWMX36dNavX09oaChTpkyhpKRE6nO9WARBEAThflGv1vLv7WfYcjKf2gYtW07ms2T7mds+YrVmzRqGDx8OQK9evaiqquLw4cO39Z6tqU2TKnt7e5ycnKSvXbt24e3tTbdu3aiqqmLt2rXMmjWLHj16EB4ezsKFC4mPjychIQGAffv2kZqayrvvvktYWBh9+vTh+eefZ+XKlTQ0NACwatUqPD09mTVrFgEBAUyaNIlBgwaxfPlyKY6vvvqKcePGERcXR2BgIPPmzcPU1JS1a9cC3FAsgiAIgnC/KKtVsyelyKBtd0oRZbXq23bPtLQ0Tp48yaOPPgqAkZERQ4YMYc2
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 640x480 with 1 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"MS Zoning\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 70,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"MS Zoning\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Masonry Veneer Type\n",
"\n",
"None of the groups have a slope differing from the overall one."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 71,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGwCAYAAACAZ5AeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3QU1dvA8e/WbDab3hshJKSQUEIVCFUEQVCKBQFBwI6iAgpiQ+UFVFQE/QmKggXEQlEUUBRRkCa9l0AgjfS6abvZnfePgQ1LAoQQCOV+ztkjc+fOzN3EbJ7c8lyFJEkSgiAIgiAIwhVR1ncDBEEQBEEQbgYiqBIEQRAEQagDIqgSBEEQBEGoAyKoEgRBEARBqAMiqBIEQRAEQagDIqgSBEEQBEGoAyKoEgRBEARBqAPq+m7ArcRqtZKZmYmTkxMKhaK+myMIgiAIQg1IkkRxcTE+Pj4olRfujxJB1TWUmZlJly5d6rsZgiAIgiDUwt9//42fn98Fz4ug6hpycnIC5G+KwWCo59YIgiAIglATRqORLl262H6PX4gIqq6hs0N+BoNBBFWCIAiCcIO51NQdMVFdEARBEAShDoigShAEQRAEoQ6IoEoQBEEQBKEOiDlVgiAIgnCNWCwWzGZzfTdDOI9Go0GlUl3xfURQJQiCIAhXmSRJpKenk5+fX99NES7Azc0NPz+/K8ojKYIqQRAEQbjKzgZUPj4+6PV6kQD6OiJJEiUlJWRmZgLg7+9f63uJoEoQBEEQriKLxWILqDw9Peu7OUI1HB0dATlJt4+PT62HAsVEdUEQBEG4is7OodLr9fXcEuFizn5/rmTOmwiqBEEQBOEaEEN+17e6+P6IoEoQBEEQBKEOiKBKEARBEAShDoiJ6oIgCMKtzWqBlP8gfR84eUGD9uDsV9+tEm5AoqdKEARBuLWd3AALesOqCfDDw/D7a2AqvqZNmDRpEpGRkbz22mtVzr3xxhtERkYyadKkq/Lst956i969e1d7Li0tjejoaP788886e96cOXOIjIy86OtGJYIqQRAE4da2exFI1srjfd9BTsI1b4a/vz+rVq2irKzMVlZeXs4vv/xCQEDAVXvuvffey4kTJ9i5c2eVc8uWLcPT05MuXbrU6t4WiwWr1WpXNmrUKDZu3Gh7+fn5MXbsWLuyG5UIqgRBEIRbm9bF/lihBOW1nx3TpEkT/P39+f33321lv//+O/7+/kRHR9vV/eeff3jwwQdp3bo17dq14/HHHycpKcl23mQy8eabbxIfH0/Tpk3p1q0b8+bNq/a50dHRxMTEsHTpUrtySZJYvnw5/fv3R61Wk5KSQmRkJL///jsPPfQQzZs35+6772bXrl22a5YtW0br1q35888/6dOnD02bNiUtLc3uvk5OTnh7e9teKpXKVvbdd98xcuTIKm285557mDVrFiD36j311FN89NFH3HbbbbRs2ZLXXnsNk8lkq2+1Wpk3bx7du3enWbNm3H333axZs+YS34ErJ4IqQRAE4dbWYjA4OFced5oAno1rdGlFfj6m1FSkOtrPb9CgQSxbtsx2vHTpUgYOHFilXmlpKSNHjmTp0qUsXLgQhULBmDFjbL1CX3/9NevWrWPWrFmsWbOGd999l8DAwIs+d/Xq1ZSUlNjKtm7dSkpKCoMGDbKr+8EHHzB69GhWrFhBw4YNGT9+PBUVFbbzZWVlfPbZZ0ydOpVffvnlshKe3nvvvRw/fpy9e/fayg4ePMiRI0fs2rF582aOHz/O119/zfvvv8/atWv5+OOPbefnzZvHihUreOONN/j11195+OGHeeGFF9i2bVuN21IbIqgSBEEQbm1BbeCRP+G+L+HhX6HDWFBrL3lZ8X//cWroMI7f0ZPTU97AlJJ6xU25++672bFjB6mpqaSmprJz507uvvvuKvV69epFz549CQkJITo6mmnTpnH06FESEuRhy9OnTxMSEkKrVq0IDAykdevW9O3b94LP7devHxUVFXa9OcuWLaNVq1aEhoba1R01ahRdu3YlNDSUsWPHkpqayqlTp2znzWYzU6ZMoWXLljRq1MiWrbwm/Pz8iI+Ptwssly1bRps2bQgODraVabVapk2bRuPGjenatStjx47lq6++wmq1YjKZmDdvHtOmTaNTp04EBwczcOBA7r77br777rsat6U2xOo/QRAEQfCOlF81ZE5PJ/XZ57Dk5gJQsHQp2oYheD366BU1w8PDg65du7J8+XIkSaJr1654eHhUqXfy5Elmz57Nnj17yMvLQ5IkQA6mIiIiGDBgAKNGjeLOO++kU6dOdO3alfj4+As+18XFhTvuuMPWM2Y0Gvn999+rnTh/7kRyb29vAHJzcwkLCwNAo9Fc0WTz+++/n8mTJ/PSSy+hUChYuXIlL730UpU2nBusxcXFUVJSwunTpykpKaG0tJRRo0bZXWM2m6sMo9Y1EVQJgiAIwmWqyMqyBVRnGTf+e8VBFchDcW+++SYAr7/+erV1nnjiCQIDA5k6dSo+Pj5YrVb69u1r22IlJiaGP//8k3/++YdNmzbx3HPP0aFDB2bPnn3B59577708/PDDnDp1iq1bt6JUKrnzzjur1NNoNLZ/n81Cfu5kdJ1Od0XZybt164ZWq2Xt2rVoNBoqKiqqbceFnB3CnDdvHr6+vnbntNpL90BeCRFUCYIgCMJl0vj7ow4MpCK1csjP5Y4edXLvTp06YTabUSgU1fYu5eXlkZiYyNSpU2ndujUA27dvr1LPYDDQp08f+vTpQ69evXjkkUfIz8/Hzc2t2ufedtttBAUFsWzZMrZu3cpdd91VL/sVqtVq+vfvz7Jly9BoNNx1113odDq7OkeOHKGsrMxWvnv3bvR6Pf7+/ri6uqLVaklLS6Nt27bXtu3X9GmCIAiCcBNQe3kR9OEssuZ8RPmhQ7gNfgDnnj3r5N4qlYrVq1fb/n0+V1dX3Nzc+O677/D29iYtLY333nvPrs6CBQvw9vYmOjoapVLJmjVr8Pb2xsXFpcr9zlIoFAwaNIiFCxdSUFBQZcjtWrrvvvvo06cPAN9++22V8yaTiZdffpknn3yS1NRU5syZw7Bhw1AqlRgMBkaNGsX06dORJIlWrVpRVFTEzp07MRgMDBgw4Kq1WwRVgiAIglALjrGxBM3+EGtJCWp39zq9t8FguOA5pVLJBx98wNSpU+nbty+hoaG88sorPPTQQ7Y6Tk5OzJ8/n1OnTqFUKmnatCmffvopSuXF16cNHDiQOXPm0LhxY5o3b15n7+dyNWzYkLi4OAoKCqptR/v27QkJCWHo0KGYTCb69u3LM888Yzv/3HPP4eHhwbx580hJScHZ2ZkmTZrwxBNPXNV2K6Szs9uEq85oNNKqVSt27Nhx0R8YQRAE4eZRVlZGYmIioaGhVYaxhOpJkkTPnj0ZMmRIlbxVkyZNorCwkP/97391+syLfZ9q+vtb9FQJgiAIgnDdyM3N5ddffyU7O7vaHF3XMxFUCYIgCIJw3Wjfvj3u7u68+eabuLq61ndzLosIqgRBEARBuG4cOXLkoudnzJhxjVpy+URGdUEQBEEQhDoggipBEARBEIQ6IIIqQRAEQRCEOiCCKkEQBEEQhDoggipBEARBEIQ6IIIqQRAEQRDq1LJly2z7Et5KREoFQRAEQRCqNWnSJJYvX247dnNzIzY2lhdeeIGoqKgrvv9vv/3GN998w8GDB7FarQQFBdGrVy+GDRt2wY2fr2eip0oQBEEQhAvq1KkTGzduZOPGjSxcuBC1Wn3RPfTMZnON7vvBBx/w/PPPExsby2effcbKlSuZNGkSR44c4aeffqqr5l9TIqgSBEEQhBtIudlCekEZ5WbLNXmeVqvF29sbb29voqOjefTRRzl9+jS5ubmkpKQQGRnJqlWrGDZsGE2bNmXlypVV7pGbm8vAgQMZM2YMJpOJvXv3MnfuXCZOnMjEiRNp2bIlQUFBdOzYkTlz5jBgwAAA5syZwz333MOPP/5I165diYuLY8qUKVgsFj777DM6duxI+/bt+eSTT+yeFxkZyQ8//MCYMWNo3rw5PXv25M8//7zqX6t6Daq6d+9
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 640x480 with 1 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Mas Vnr Type\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 72,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"Mas Vnr Type\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Miscellaneous Features\n",
"\n",
"This variable is basically a \"other\" field with no pattern."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 73,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGwCAYAAACAZ5AeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hURdvA4d/uZtN7770nQEKoIfQqCFKk2EXfz4pYUCkKgiIgggX0fcGGqCgiTUG6FEECSAmQAIGQkN77pm42+/2xcGQJJYSEIMx9XblkZ+ec85xE2CdzZp6RabVaLYIgCIIgCMItkbd2AIIgCIIgCHcDkVQJgiAIgiA0A5FUCYIgCIIgNAORVAmCIAiCIDQDkVQJgiAIgiA0A5FUCYIgCIIgNAORVAmCIAiCIDQDg9YO4F5SX19PXl4eZmZmyGSy1g5HEARBEIRG0Gq1VFRU4OjoiFx+7fEokVTdRnl5efTs2bO1wxAEQRAEoQn27NmDs7PzNd8XSdVtZGZmBuh+KObm5q0cjSAIgiAIjaFSqejZs6f0OX4tIqm6jS498jM3NxdJlSAIgiD8y9xo6o6YqC4IgiAIgtAMRFIlCIIgCILQDERSJQiCIAiC0AzEnCpBEARBuANoNBrUanVrh3FPUiqVKBSKWz6PSKoEQRAEoRVptVpycnIoKSlp7VDuadbW1jg7O99SHUmRVAmCIAhCK7qUUDk6OmJqaiqKQ99mWq2WyspK8vLyAHBxcWnyuURSJQiCIAitRKPRSAmVnZ1da4dzzzIxMQF0RbodHR2b/ChQTFQXBEEQhFZyaQ6VqalpK0ciXPoZ3Mq8NpFUCYIgCEIrE4/8Wl9z/AxEUiUIgiAIgtAMRFIlCIIgCILQDMREdUEQBOGeVl+vIftsIvmpyZhYWuEWHIa5jW1rh9Ukjz32GMHBwbz11lutHco9SYxUCYIgCPe09IST/DxzCn98s4SNn3zAnz98g7q6urXDAmDKlCkEBQUxY8aMBu/NmjWLoKAgpkyZIrUtXryYl19+uUVjufIrNTW1Wc6/du1aOnTo0Cznai0iqRIEQRDuaQm7d6DV1kuvT+/bTVF2ZitGpM/FxYVNmzZRfVmiV1NTw8aNG3F1ddXra21tjbm5eYvF0r17d/bt26f35e7u3mLXa6rWqkwvkipBEAThnmZoaqb3WiaTI2+GLUuaS2hoKC4uLmzbtk1q27ZtGy4uLoSEhOj1feyxx3j//fel1ytWrGDAgAG0adOG6OhoJk6cKL1XX1/Pl19+Sf/+/QkPD6dXr17873//u24shoaGODg46H1dqum0Y8cORowYQZs2bejbty+fffYZdXV10rHLli1j6NChRERE0LNnT2bOnElFRQUABw8eZOrUqZSXl0sjYIsXLwYgKCiIHTt26MXRoUMH1q5dC0BGRgZBQUFs2rSJRx99lDZt2rBhwwYAfvnlF+677z7atGnDoEGDWLFiReO+6U0k5lQJgiAI97TQHr05vXcntVVVAHQeMQZbV7dGHVulKqe2shJzWzsUBi33kTpq1CjWrl3LsGHDAFizZg0jR47k0KFD1zzm5MmTvP/++8yfP5/IyEhKS0s5fPiw9P7ChQv55ZdfmDp1KlFRUeTl5ZGSktKk+A4fPszkyZN5++236dChA2lpaUyfPh2ACRMmALqSBW+99Rbu7u6kp6cza9YsPvzwQ2bOnElkZCTTpk1j0aJFbNmyBbj52l0LFixgypQphISEYGRkxG+//cann37KjBkzCAkJ4fTp00yfPh1TU1NGjBjRpPu8EZFUCYIgCPc014BgHn7/IwrTUzGxtMLRxw+FgfKGx6WfimfHV59TnJVJWK9+dBk5FitHpxaJcdiwYSxcuJDMTN1jyaNHj/LRRx9dN6nKzs7GxMSEXr16YW5ujpubG6GhoQCoVCq+++47ZsyYISUYnp6eN5zTtHv3biIjI6XX3bt3Z9GiRXz22Wc888wz0rk8PDx4+eWX+fDDD6Wk6sknn5SOc3d355VXXuGdd95h5syZGBoaYmFhgUwmw8HB4ea/QcATTzzBgAEDpNeLFy9mypQpUpuHhwdJSUn8/PPPIqkSBEEQhJZi5+aBnZtHo/uXFxaw4eO5VJWVAhC/axs2Li50emB0i8Rna2tLr169WLduHVqtll69emFre/0VitHR0bi6utKvXz+6d+9O9+7d6d+/PyYmJiQnJ1NbW0uXLl1uKo7OnTszc+ZM6fWl7V3OnDnD0aNHWbJkifSeRqOhpqaGqqoqTExM2L9/P0uXLiU5ORmVStXg/VsVHh4u/bmyspK0tDTeeustacQMoK6uDgsLi1u+1rWIpEoQBEEQblJFcZGUUF1yIe5YiyVVoHsE+O677wLwzjvv3LC/ubk569at49ChQ+zbt08aUVq9ejVGRkZNisHExAQvL68G7ZWVlbz00kt6I0WXGBkZkZGRwbPPPstDDz3Eq6++ipWVFUeOHOGtt95CrVZfN6mSyWRotVq9tsvnal1y+ePCyspKAN577z3atWun108ub7np5CKpEgRBEISbZGHvgIWDI+X5eVJbQOeuLXrN7t27o1arkclkxMTENOoYAwMDoqOjiY6OZsKECXTs2JEDBw7Qs2dPjI2NOXDgAB4ejR+hu5bQ0FBSUlKumnABJCQkoNVqmTJlipTUbN68Wa+PUqlEo9E0ONbW1pa8vH++zxcuXKDq4vy3a7G3t8fR0ZH09HRpHtrtIJIqQRAEQbhJZtY2DHt1Kvt/+ZH8C8m07T+IgM7RLXpNhUIhJSKKRqxO3LVrF+np6XTs2BFLS0v27NlDfX09Pj4+GBkZ8X//9398+OGHKJVK2rdvT1FREefOnWP06JsfbXvxxRd57rnncHV1ZeDAgcjlcs6cOcPZs2d59dVX8fLyQq1W8/3339OnTx+OHDnCypUr9c7h5uZGZWUlsbGxBAUFYWJigomJCV26dGHFihVERkai0WhYsGABSuWN57xNnDiR2bNnY2FhQffu3amtrSU+Pp6ysjLGjx9/0/fYGCKpEgRBEIQmcPYLYNikqdRWVWFqaXVbrnkzNagsLCzYvn07n332GTU1NXh5ebFw4UICAgIAeOGFF1AoFCxatIi8vDwcHBwYN25ck+Lq3r07S5Ys4fPPP+fLL7/EwMAAX19fKUELDg5m6tSpfPnll3z00Ud06NCB1157jcmTJ0vnaN++PePGjeOVV16hpKSECRMm8NJLLzF58mSmTZvGI488gqOjI9OmTSMhIeGGMY0ePRpjY2O+/vpr5s+fj6mpKYGBgTzxxBNNusfGkGmvfFAptBiVSkVUVBRHjhxp0eJsgiAIwr9DdXU1KSkp+Pj4YGxs3Nrh3NOu97No7Oe3KP4pCIIgCILQDERSJQiCIAiC0AxEUiUIgiAIgtAMRFIlCIIgCILQDERSJQiCIAiC0AxEUiUIgiAIgtAMRFIlCIIgCILQDERSJQiCIAiC0AxEUiUIgiAIwm0VFBTEjh07WjuMZie2qREEQRAEoVlNmTKFdevWNWiPiYnh66+/boWIbg+RVAmCIAiC0Oy6d+/O3Llz9doMDQ1bKZrbQzz+EwRBEIR7QI1aQ05pNTVqzW25nqGhIQ4ODnpfVlZX33g6Ozubl19+mQ4dOtCpUyeef/55MjIyANi3bx9t2rShrKxM75jZs2fz+OOPt/h93IxWTar69OlDUFBQg69Zs2YBUFNTw6xZs+jcuTORkZG89NJLFBQU6J0jKyuLZ555hnbt2tG1a1c++OAD6urq9PocPHiQESNGEB4eTv/+/Vm7dm2DWFasWEGfPn1o06YNo0eP5sSJE3rvNyYWQRAEQbgTnc0t55VVcfRZuJtXVsVxNre8tUOSqNVqnn76aczMzFixYgU//fQTpqam/Oc//6G2tpauXbtiaWnJ1q1bpWM0Gg2bN29m6NChrRh5Q62aVK1evZp9+/ZJX8uWLQNg0KBBAMyZM4ddu3bxySef8P3335OXl8eECROk4zUaDc8++yxqtZqVK1cyb9481q1bx6JFi6Q+6enpPPvss3Tu3Jlff/2VJ554grfffpu9e/dKfTZt2sT
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 640x480 with 1 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Misc Feature\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 74,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"Misc Feature\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Roof\n",
"\n",
"Roofs in Ames, IA, are not special enough to make a difference in the price. Even \"hip\" roofs seem already priced in bigger houses."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 75,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGwCAYAAACAZ5AeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOydd3yNZxvHvycnWyKRnRhBQoKYQdAYNUoHpUoppUqpKh0UtVetLlV9S6ulRqt2axYtJQ1JzdjE3hlIZJ8k5/3jknNyJEgiEuP+fj7n8+a5n/t5nuuc9HV+ue7r/l0avV6vR6FQKBQKhULxQJgVdwAKhUKhUCgUTwJKVCkUCoVCoVAUAkpUKRQKhUKhUBQCSlQpFAqFQqFQFAJKVCkUCoVCoVAUAkpUKRQKhUKhUBQCSlQpFAqFQqFQFALmxR3A00RmZiZRUVGUKFECjUZT3OEoFAqFQqHIA3q9nsTERNzc3DAzu3s+SomqIiQqKoqmTZsWdxgKhUKhUCgKwD///IOHh8ddzytRVYSUKFECkF+KnZ1dMUejUCgUCoUiLyQkJNC0aVPD9/jdUKKqCMla8rOzs1OiSqFQKBSKx4z7le6oQnWFQqFQKBSKQkCJKoVCoVAoFIpCQIkqhUKhUCgUikJA1VQpFAqFQvEAZGRkoNPpijsMxQNgYWGBVqt94PsoUaVQKBQKRQHQ6/VcvXqVmzdvFncoikLA0dERDw+PB/KRVKJKoVAoFIoCkCWo3NzcsLW1VabOjyl6vZ6kpCSioqIA8PT0LPC9lKhSKBQKhSKfZGRkGASVs7NzcYejeEBsbGwAMel2c3Mr8FKgKlRXKBQKhSKfZNVQ2draFnMkisIi63f5IPVxSlQpFAqFQlFA1JLfk0Nh/C6VqFIoFAqFQqEoBJSoUigUCoVCoSgElKhSKBQKxVNNZmYm58+fJzw8nMOHD3Pr1q3iDqnY0Ov1jB49mvr16+Pn58fRo0eLLZawsDD8/PyIj48vthjyi9r9p1AoFIqnmrNnz7Jw4UL0ej0A1atXp23btlhaWhZzZEaGDx/OqlWrADA3N8fd3Z02bdrw/vvvY2VlVWjP2b59O6tWrWLBggWULVuWUqVK5ZgTFhZGjx49KFmyJCEhISbPj4iIoFOnTgAcP348z89944038Pf3Z+TIkQ/+JooRlalSKBQKxVPN/v37DYIK4ODBg8TGxhZjRLnTuHFjQkJC2LJlCyNGjOC3335j5syZhfqMCxcu4OrqSp06dXB1dcXc/O65lxIlSrB582aTseXLl+Pl5VWoMT1OKFGlUCgUiqeaOzNSGo0GM7NH7+vR0tISV1dXPD09admyJY0aNSI0NNRwPi0tjUmTJtGwYUOqV69O165diYiIMLlHeHg4r776KgEBAQQHB/P555+Tnp4OSDZs4sSJXL58GT8/P5o3b37PeNq3b8+KFSsMxykpKaxfv5727dubzLtx4wYfffQRjRs3pmbNmrRt25a1a9cazg8fPpzw8HAWLFiAn58ffn5+XLx4saAfU7Hy6P1Xo1AoFApFEVKrVi2TJazGjRvn3dAzMx3SU0Gf+ZCiy50TJ06wb98+LCwsDGPTp0/nzz//ZOrUqaxatQpvb2/69OljaKNz7do1+vbtS/Xq1fn9998ZN24cy5cv57vvvgNg5MiRDBo0CA8PD0JCQli+fPk9Y3j55ZfZvXs3ly9fBuDPP/+kdOnSVKtWzWReWloa1apV4/vvv2ft2rV07tyZoUOHGgTfyJEjqV27Np07dyYkJISQkJAHcjUvTlRNlUKhUCieasqUKUOfPn2IioqiRIkSeHh43HPZy0B6Ktw4B5o0sHUGO3cwL7z6pjvZtm0btWvXJj09nbS0NMzMzBg9ejQASUlJLFmyhClTptC0aVMAJk6cyL///svy5cvp06cPv/zyCx4eHowZMwaNRoOPjw/Xrl3j888/Z8CAAdjb21OiRAm0Wi2urq73jcfZ2ZkmTZqwcuVK3nvvPVasWEHHjh1zzHN3d6d3796G4zfeeIOQkBA2bNhAjRo1sLe3x8LCAmtr6zw991FGiSqFQqFQPPW4urrm7wv91jVIjAEHJzDXQFIsaK3A3v2hxRgUFMS4ceNITk5m/vz5aLVaWrduDcD58+fR6XTUqVPHMN/CwoIaNWpw6tQpAE6dOkXt2rVNTC4DAwNJSkri6tWrBaqF6tixI59++ikvv/wy+/fv5+uvv2bPnj0mczIyMpg9ezYbN27k2rVr6HQ60tLSsLa2LsjH8Eijlv8UCoVCocgvSTGgzzAdS324W/9tbGzw9vbG39+fyZMnExERwbJlyx7qM+9HkyZNSE1NZcSIETz77LO57hb88ccfWbBgAX369GHBggWsXr2a4ODgB2oH86iiRJVCoVAoFPnFzgPM7ljssXYsssebmZnRr18/vv76a1JSUihXrhwWFhbs3bvXMEen03Hw4EF8fX0B8PHxYd++fSY7Hffs2WNY8iwI5ubmvPzyy4SHh+e69Aewd+9eWrRowcsvv4y/vz9ly5bl7NmzJnMsLCzIzCzaurSHgRJVCoVCoVDklxLOYOsCFnZgZgH2nmDjWKQhtGnTBjMzMxYvXoytrS1du3Zl+vTpbN++ncjISEaPHk1KSgqvvvoqAK+//jpXr15l4sSJnDp1ii1btvDNN9/Qq1evB9rt+P7777Nz504aN26c63lvb29CQ0PZu3cvp06dYsyYMcTExJjMKV26NAcOHODixYtcv379sRVYqqZKoVAoFIqCYG4JjmXA0hK0Rf91am5uTvfu3Zk7dy5du3ZlyJAh6PV6hg4dSmJiIgEBAcydOxcHBwdACsa///57pk+fztKlS3F0dOTVV1+lf//+DxSHpaUlTk5Odz3fv39/Lly4QO/evbGxsaFz5860bNnSxLn+rbfeYvjw4bz44oukpKTw119/PVBMxYVGnz0PqHioJCQkEBgYyJ49e7CzsyvucBQKhUJRQFJSUjhz5gwVKlR4Iguun0bu9TvN6/e3Wv5TKBQKhUKhKASUqFIoFAqFQqEoBJSoUigUCoVCoSgElKhSKBQKhUKhKASUqFIoFAqFQqEoBJSoUigUCoVCoSgElKhSKBQKhUKhKASUqFIoFAqFQqEoBJSoUigUCoVCYYKfnx9btmwptPu98cYbfPrpp4bj5s2bM3/+/EK7/6OCalOjUCgUCsVTRnR0NLNnz2bbtm1cu3YNZ2dnqlSpQs+ePWnYsGGe73Px4kVatGhxzzlTpkzhm2++wdz8yZccT/47VCgUCoVCYeDixYt07dqVkiVLMnToUCpXrkx6ejohISGMHz+ejRs35vlenp6ehISEGI5/+uknduzYwbx58wxj9vb2T00rHyWqFAqFQqEoRlJ1GdxI0lHK1gIrC+1Df9748ePRaDQsW7YMW1tbw3ilSpXo2LFjjvk9evTA19eXMWPGGMauX79OkyZN+OGHH0wyW7a2tmi1WlxdXU3u8cYbb+Dv78/IkSNzjSk+Pp5p06bx119/kZaWRkBAACNGjMDf3/9B326RUqw1Vc2bN8fPzy/Ha/z48QCkpqYyfvx4goKCqF27NgMHDiQmJsbkHpcvX6Zv377UrFmThg0bMm3aNNLT003mhIWF0aFDBwICAmjVqhUrV67MEcvixYtp3rw51atXp1OnTkRERJicz0ssCoVCoVDkhxPXbvHB0v00/2IbHyzdz4lrtx7q827evMmOHTvo1q2biaDKomTJkjnGOnXqxNq1a0lLSzOM/fHHH7i5udGgQYNCiev9998nNjaWH374gZUrV1KtWjV69uzJzZs3C+X+RUWxiqrly5cTEhJieGWlC9u0aQPA5MmT2bp1KzNmzGDhwoVERUXx3nvvGa7PyMigX79+6HQ6lixZwtSpU1m1ahUzZ840zLlw4QL9+vUjKCiI33//nZ49ezJq1Ch27NhhmLN+/XqmTJnCgAEDWLVqFf7+/vTu3ZvY2FjDnPvFolAoFApFfkjVZfDVlhNsOHiVpLQMNhy8yowtJ0jVZTy0Z54/fx69Xk/FihXzfM1zzz0HYFK4vnLlSl555RU0Gs0Dx7R7924
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 640x480 with 1 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Roof Matl\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 76,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGwCAYAAACAZ5AeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd1xV9f/A8dfdcNl7g4gKbtwLR2ZaWubIylFqllquyllmapma1TfT+qWmucvMUVbaMEeaKwfuLcpS2Xvc+fvjyIUrqIggqJ/n48FDzrmfc877QsGbz3h/ZGaz2YwgCIIgCIJwT+SVHYAgCIIgCMLDQCRVgiAIgiAI5UAkVYIgCIIgCOVAJFWCIAiCIAjlQCRVgiAIgiAI5UAkVYIgCIIgCOVAJFWCIAiCIAjlQFnZATxKTCYTCQkJ2NnZIZPJKjscQRAEQRBKwWw2k52djaenJ3L5rfujRFJ1HyUkJNC+ffvKDkMQBEEQhDLYuXMn3t7et3xdJFX3kZ2dHSB9U+zt7Ss5GkEQBEEQSiMrK4v27dtbfo/fikiq7qOCIT97e3uRVAmCIAjCA+ZOU3fERHVBEARBEIRyIJIqQRAEQRCEciCSKkEQBEEQhHIg5lQJgiAIQgUxGo3o9frKDkO4A5VKhUKhuOf7iKRKEARBEMqZ2Wzm2rVrpKWlVXYoQik5Ozvj7e19T3UkRVIlCIIgCOWsIKHy9PREq9WKgs9VmNlsJicnh4SEBAB8fHzKfC+RVAmCIAhCOTIajZaEys3NrbLDEUrB1tYWkIp0e3p6lnkoUExUFwRBEIRyVDCHSqvVVnIkwt0o+H7dyxw4kVQJgiAIQgUQQ34PlvL4fomkShAEQRAEoRyIpEoQBEEQBKEciKRKEARBeLSZjBC9Dw58Ayc3Qua1yo6oUpnNZqZMmULz5s0JDQ3l9OnT9/X5HTt2ZNmyZff1meVFrP4TBEEQHm2Xd8HKnmA2Scf1X4BnPge1XeXGdZNJkyaxceNGAJRKJV5eXjz55JOMGTMGjUZTbs/5559/2LhxIytWrCAgIAAXF5cS261du5ZVq1YRExODQqHA39+fp556imHDhlnizcjI4P/+7//KLbaqTiRVgiAIwqMtcnVhQgVw/AdoPQJ8GlZeTLfQtm1bZs2ahcFg4OTJk0ycOBGZTMb48ePL7RkxMTF4eHjQuHHjW7ZZt24dM2fOZPLkyTRv3hydTsfZs2c5f/58ucXxIBJJlSAIgvBoUztaH8vkIK+avx7VajUeHh6AVKSydevW7Nmzx/K6Tqdjzpw5/Pbbb2RlZVGvXj3eeecdGjRoYGlz4MAB5syZw5kzZ3B2dqZHjx68+eabKJVKq96w0NBQ/Pz82LZtW7E4tm3bxlNPPUWfPn0s52rWrGn5fP78+Vb3AVixYgVfffUVNWrU4P3337e0TUlJoV27dnzzzTe0atWq2LMyMjL4+OOP+fvvv9HpdNSrV493332XsLCwMn0NK5KYUyUIgiA82sJfBI1D4XHbceBW89btizCkpaGLi8NcCfv7nTt3jiNHjqBSqSzn5syZwx9//MHs2bPZuHEjQUFBvPrqq5btcq5fv87QoUOpX78+P//8M9OmTWPdunV8/fXXAEyePJnRo0fj7e3N7t27WbduXYnPdnd3JzIykri4uBJff+WVV3jqqado27Ytu3fvZvfu3TRq1Ig+ffrw66+/otPpLG03bdqEp6cnLVu2LPFeY8aMITk5mW+++YYNGzZQt25dBg4cWCW3ABJJlSAIgvBo828Gr/4NfZbDoN+g9WhQqu94WfZ//3Gl/wAuPtGZq9Omo4stOcEoTzt27KBRo0bUr1+fZ555huTkZIYMGQJATk4Oa9asYcKECbRv354aNWrw4YcfotFoLMnRd999h7e3N++//z4hISF06tSJUaNG8e2332IymXBwcMDOzg6FQoGHhweurq4lxjFy5EgcHR3p2LEjXbp0YdKkSWzevBmTSRpGtbOzw8bGxtKz5uHhgVqtpnPnzgBs3brVcq8NGzbQq1evEutEHTx4kGPHjjFv3jzq169PtWrVmDhxIo6Ojvzxxx/l+rUtD1Wzf1MQBEEQ7iePUOmjlPTXrhE35k2MKSkApK9fj7paEO6vvVZREQLQokULpk2bRm5uLsuWLUOhUNClSxcAoqOj0ev1VnOhVCoVDRo04OLFiwBcvHiRRo0aWSUwTZo0IScnh2vXruHr61uqODw9Pfnhhx84d+4c//33H0eOHGHSpEmsW7eOxYsXI5eX3Gej0Wjo3r0769evp2vXrpw8eZLz589bespudvbsWXJycmjRooXV+by8PKKjo0sV6/0kkipBEARBuEuGxERLQlUga/e/FZ5U2draEhQUBMDMmTN59tln+fHHH63mNt1PtWrVolatWvTv35+DBw/Sv39/Dhw4cMuhPIA+ffrQo0cPrl27xoYNG2jZsiV+fn4lts3OzsbDw4OVK1cWe83BwaGEKyqXGP4TBEEQhLuk8vFBeVMi4PhEp/sag1wuZ9iwYXzxxRfk5eURGBiISqXi8OHDljZ6vZ7jx49To0YNAEJCQjhy5Ahms9nS5tChQ9jZ2eHt7X1P8RQ8Izc3F5B6yQqGA4sKDQ2lXr16rF27ll9//ZXevXvf8p5169YlKSkJhUJBUFCQ1cethiYrk0iqBEEQBOEuKd3d8f9iLnbt26P09MR99CgcbswXup+efPJJ5HI5q1evRqvV0rdvX+bMmcM///zDhQsXmDJlCnl5eTz33HMA9OvXj2vXrvHhhx9y8eJFtm7dyvz58xk8ePAth+xKMnXqVL766isOHTpEXFwckZGRTJw4EVdXV8LDwwHw8/Pj7NmzXLp0iZSUFKuNivv06cOiRYswm8088cQTt3xO69atCQ8PZ8SIEezevZvY2FgOHz7M559/zvHjx8v2RatAYvhPEARBEMrAtl49/Od9gSknB+UtCmRWNKVSyYABA1i8eDF9+/Zl3LhxmM1mJkyYQHZ2NvXq1WPx4sU4OTkB4OXlxaJFi5gzZw5r167F2dmZ5557jtdff/2untu6dWvWr1/P999/T1paGi4uLjRq1Ihly5ZZioU+//zzHDhwgN69e5OTk8OKFSssc6O6devGzJkz6dat220Ll8pkMhYtWsTcuXN55513SE1Nxd3dnaZNm+Lu7l7Gr1rFkZmL9gEKFSorK4smTZpw6NAh7O3tKzscQRAEoQLk5eURFRVFcHAwNjY2lR1OlRQbG8sTTzzBunXrqFu3bmWHA9z++1ba39+ip0oQBEEQhPtCr9eTlpbG3LlzadiwYZVJqMqLmFMlCIIgCMJ9cfjwYSIiIjh+/DjTp0+v7HDKneipEgRBEAThvmjRogVnz56t7DAqjOipEgRBEARBKAciqRIEQRAEQSgHIqkSBEEQBEEoByKpEgRBEARBKAciqRIEQRAEQSgHIqkSBEEQBOGOXnrpJT766KPKDqNKE0mVIAiCIAgATJo0idDQ0GIfV65cuet77d+/n9DQUDIyMiog0qpJ1KkSBEEQBMGibdu2zJo1y+qcq6trJUXzYBFJlSAIgiBUYfl6I6k5ely0KjQqRYU/T61W4+Hhccd2P/30EytWrCAqKgqtVkvLli159913cXNzIzY2lpdffhmAZs2aAdCzZ09mz55dobFXtkod/uvYsWOJ3YwFpevz8/OZPn06LVq0oFGjRowaNYqkpCSre8THxzN06FAaNmxIq1at+PjjjzEYDFZt9u/fT8+ePalXrx5PPPEEGzZsKBbL6tWr6dixI/Xr16dPnz4cO3bM6vXSxCIIgiAI5enc9UzeXBtJx8928ObaSM5dz6zskCwMBgNjxoxh06ZNfPXVV8TFxTFp0iQAfHx8mD9/PgC///47u3fvZvLkyZUZ7n1RqUnVunXr2L17t+Vj6dKlADz55JMAzJw5k+3btzN37lxWrlxJQkICI0eOtFxvNBoZNmwYer2eNWvWMHv2bDZu3Mi8efMsbWJiYhg2bBgtWrTg559/ZuDAgbz33nvs2rXL0mbz5s3MmjWLESNGsHHjRsLCwhgyZAjJycmWNneKRRAEQRD
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 640x480 with 1 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Roof Style\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 77,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"Roof Matl\"]\n",
"del df[\"Roof Style\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Sale Info\n",
"\n",
"Partial and abnormal (= foreclosure) sales seem to make a change with higher and lower prices respectively. These two types will be encoded in factor variables *partial_sale* and *abnormal_sale*. The impact seems to be not big though."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 78,
2021-05-25 08:22:14 +02:00
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"Sale Condition\n",
2021-05-25 08:22:14 +02:00
"Normal 2396\n",
"Partial 233\n",
"Abnorml 189\n",
"Family 46\n",
"Alloca 22\n",
"AdjLand 12\n",
"Name: count, dtype: int64"
2021-05-25 08:22:14 +02:00
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 78,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"Sale Condition\"].value_counts()"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 79,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGwCAYAAACAZ5AeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hUxdfA8e9udpNseu89gQQIpBBaCL0pTboFRBEFFMGGEFERFAERyw/0lSIgIIgYmiCgoIAgndA7KaSRXjdtS/b9Y2VxDSWEQEDm8zx5ZOfOvfcskezJzNwzEp1Op0MQBEEQBEG4K9L6DkAQBEEQBOG/QCRVgiAIgiAIdUAkVYIgCIIgCHVAJFWCIAiCIAh1QCRVgiAIgiAIdUAkVYIgCIIgCHVAJFWCIAiCIAh1QFbfATxKqqqqyM7OxtLSEolEUt/hCIIgCIJQAzqdjtLSUlxcXJBKbz4eJZKq+yg7O5sOHTrUdxiCIAiCINTC7t27cXNzu+lxkVTdR5aWloD+m2JlZVXP0QiCIAiCUBNKpZIOHToYPsdvRiRV99G1KT8rKyuRVAmCIAjCQ+Z2S3fEQnVBEARBEIQ6IJIqQRAEQRCEOiCSKkEQBEEQhDog1lQJgiAIQh3TarWo1er6DkOoIblcjomJyV1fRyRVgiAIglBHdDodmZmZFBYW1ncowh2ys7PDzc3trupIiqRKEARBEOrItYTKxcUFCwsLUej5IaDT6SgrKyM7OxsAd3f3Wl9LJFWCIAiCUAe0Wq0hoXJ0dKzvcIQ7oFAoAH2RbhcXl1pPBYqF6oIgCIJQB66tobKwsKjnSITauPZ9u5u1cCKpEgRBEIQ6JKb8Hk518X0TSZUgCIIgCEIdEEmVIAiCIAhCHRBJlSAIgvBI01bpOJKcz/L9yfxyMoPs4or6Dumm1q1bR1RUVH2HcVdiY2N55ZVXDK+fffZZPv7441ue87C8b/H0nyAIgvBIO5CYx7OLD1Kl07/uF+7JjAGhWJjW7Udkfn4+//vf/9i9eze5ubnY2toSEhLCK6+8QvPmzev0XrejVCpZtGgRv/76K+np6djY2NCgQQOeeeYZunXrdl/Xhc2bNw+Z7PrfdefOnRk+fDjPP/+8oa1nz5506NDhvsVUWyKpEgRBEB5pPx1NMyRUABuOp/NiO39CPW3r9D7jxo1DrVYza9YsvL29ycvLY//+/fe9UGhxcTHPPPMMJSUlvP766zRt2hQTExMOHz7Mp59+SuvWrbGxsblv8djZ2d22j7m5Oebm5vc+mLskpv8EQRCER5q1mXFNIqkEZCZ1O1JTXFzMkSNHmDBhAq1bt8bT05NmzZoxevRounTpYui3dOlS+vTpQ3h4OB06dGDq1KmUlpbe8to7duygf//+NG3alC5duvDVV1+h0Whu2v/zzz8nPT2dNWvW0L9/f4KCgvD392fIkCFs2LDBUFqgqKiIiRMn0qJFC8LCwnjxxRdJTk42XOfalNyePXt4/PHHiYiIYOTIkYYimqCv3TVz5kyioqJo1aoVs2fPRqfTGcXzz+m/Z599lvT0dGbOnElwcDDBwcFG9/qnVatW0bVrV0JDQ+nRowcbNmwwOh4cHMxPP/3E2LFjCQsLo3v37vz++++3/Lu8WyKpEgRBEB5pAyK9sDK7PnHzaqcgApysanZyeQEUpID21rWNLCwssLCwYMeOHahUqpv2k0gkvPvuu2zevJlZs2Zx4MABPv3005v2P3LkCJMmTWL48OFs2bKFDz/8kHXr1jF//vwb9q+qqmLLli306dMHV1fXasctLS0NU3GxsbGcPn2ab775hh9//BGdTseoUaOM6jhVVFSwZMkSZs+ezffff8/Vq1f55JNPDMeXLFnC+vXrmTFjBqtWraKoqIjt27ff9P3MmzcPNzc3xo8fz969e9m7d+8N+23fvp0ZM2YwYsQINm3axFNPPcXkyZM5cOCAUb+vvvqKxx9/nJ9//pn27dszYcKEezoyKJIqQRAE4ZEW4WPPhrHR/N/QCFaPas2o9oGYymrw8Zj8FyzuAXPDYPMbUJR+064ymYxZs2axYcMGoqKieOqpp/j88885f/68Ub/nn3+e1q1b4+XlRZs2bXj99dfZunXrTa/71VdfMWrUKPr374+3tzdt27bltddeY/Xq1TfsX1BQQFFREQEBAbd+a8nJ/PHHH0yfPp2oqChCQkKYM2cOWVlZ7Nixw9BPrVYzbdo0mjZtSpMmTRg6dKhRYrNs2TJGjRpF9+7dCQwMZNq0aVhbW9/0vnZ2dpiYmGBpaYmzszPOzs437Ld48WL69+/P0KFD8ff3Z8SIEXTr1o0lS5YY9evfvz+9e/fG19eXN998k7KyMk6ePHnL9343xJoqQRAE4ZEX5GJNkMvNP+yrKUqHn4ZDaa7+9bEV4BQKDjdfTN2jRw86duzIkSNHOH78OHv27OHbb79l+vTpDBgwAIB9+/axYMECEhMTUSqVaLVaKisrKS8vN2yl8k/nz58nPj7eaGTqVuf8e+rtZhISEpDJZISFhRna7O3t8ff3JyEhwdCmUCjw8fExvHZxcSEvLw+AkpIScnJyjK4hk8kIDQ2tcRw3k5iYyJNPPmnUFhkZyfLly43ark0fgn600MrKivz8/Lu6962IpEoQBEEQ7pQy63pCdU3KgVsmVQBmZma0bduWtm3bMnbsWN59913mzZvHgAEDSEtLY/To0Tz99NO88cYb2NracvToUd59913UavUNk6qysjLGjRtH9+7db3ivf3NwcMDGxobExMQ7e7838c+n9kA/fXm3CVNdksvlRq8lEglVVVX37H5i+k8QBEEQ7pStF9j6GLcFdLrjywQFBVFWVgbAmTNn0Ol0xMbGEh4ejr+/v9Gi7xtp3LgxSUlJ+Pr6VvuSSqt/xEulUnr27MmmTZvIysqqdry0tBSNRkNgYCAajYYTJ04YjhUUFJCUlERQUFCN3pu1tTXOzs5G19BoNJw5c+aW58nl8tsmPgEBAcTHxxu1xcfH1zi2e0UkVYIgCIJwp6xcYMgyaNADrN2h07sQ1Pmm3QsKChg+fDgbN27k/PnzpKamsnXrVr799lvD03++vr6o1WpWrFhBamoqGzZsuOnaqGvGjh3Lxo0b+eqrr7h06RIJCQn88ssvfPHFFzc954033sDNzc3wtN/ly5dJTk4mLi6O/v37U1ZWhp+fH126dOH999/nyJEjnD9/nrfffhtXV1ejpxVvZ/jw4SxatIgdO3aQkJDAtGnTKC4uvuU5np6eHD58mKysrJtO1b344ousX7+eVatWkZyczNKlS9m+fTsvvPBCjWO7F8T0nyAIgiDUhmckPLkcKkvB0hEqKgDlDbtaWloSFhbGsmXLSElJQaPR4ObmxuDBgxkzZgwAISEhvPPOOyxatIjPP/+cqKgo3nzzTSZNmnTTENq1a8f8+fP5+uuvWbRoETKZjICAAAYPHnzTc+zs7FizZg0LFy7km2++IT09HVtbWxo2bMjEiRMNC8lnzpzJxx9/zJgxY1Cr1URFRbFw4cJqU2q38sILL5CTk8OkSZOQSqUMHDiQbt26UVJSctNzxo8fz5QpU+jatSsqlYoLFy5U69O1a1cmT57MkiVLmDFjBp6ensyYMYNWrVrVOLZ7QaJ7kCY//+OUSiXNmzfn6NGjWFnV8HFdQRAE4aFQUVFBUlIS/v7+D0WhSsHYrb5/Nf38FtN/giAIgiAIdUAkVYIgCIIgCHVAJFWCIAiCIAh1QCRVgiAIgiAIdUAkVYIgCIIgCHVAJFWCIAiCIAh1QCRVgiAIgiAIdUAkVYIgCIIgCHVAJFWCIAiCIDy0Dh48SHBw8G23v7kfRFIlCIIgCAIAsbGxBAcHs3DhQqP2HTt2EBwcXE9RPTxEUiUIgiAIgoGZmRmLFi2iqKiozq6pUqnq7FoPMpFUCYIgCMIDqlKtJbOogkq19r7dMzo6GicnJxYsWHDTPr/++iu9evUiNDSUzp07s2TJEqPjnTt35uuvv2bixIlERkYyZcoU1q1bR1RUFDt37qRHjx6EhYUxfvx4ysvLWb9+PZ07d6ZFixZMnz4drfb6+92
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 640x480 with 1 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Sale Condition\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 80,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAB8wAAAPvCAYAAACmyTR0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3wUdf7H8fdukk2AkFBCIAEiNfQuKAKCgFiwYgtFwK6nYMeOiJ6oP71T8BRBDwGRiEhTUKSIoqEbmqGFIiXB9IT0svP7I5c1S9qmsQn7ej4ed5KZ78x8dnZ2P7v7me/3azIMwxAAAAAAAAAAAAAAAC7G7OwAAAAAAAAAAAAAAABwBgrmAAAAAAAAAAAAAACXRMEcAAAAAAAAAAAAAOCSKJgDAAAAAAAAAAAAAFwSBXMAAAAAAAAAAAAAgEuiYA4AAAAAAAAAAAAAcEkUzAEAAAAAAAAAAAAALomCOQAAAAAAAAAAAADAJVEwBwAAAAAAAAAAAAC4JArmgKS7775bHTp00N133+3sUFzWtm3b1KFDB3Xo0EHbtm0rsv75559Xhw4dNHTo0Eofq+A4s2bNqvS+gAtt6NCh6tChg55//nlnhwIUQT51PvIpeB1eGMuWLbO9Bk6fPu3scOCieL07H3kXjjh9+rTt+Vu2bFmR9bNmzbKtR9W+bgBnIk87H3kaZT3H/M6KwtydHQCQnp6ulStXauPGjTp48KCSkpJkGIa8vb3VvHlzBQcHq1evXho0aJACAgKcHe4FtXXrVm3YsEE7d+5UTEyMkpOT5eXlJT8/P3Xp0kVXXnmlRowYoTp16jg7VLioWbNm6cMPP7T9/eqrr2rMmDGlbjN06FCdOXNG/fr108KFC6s7RMBlkE9LRj5FbfThhx/afmzx9vZWWFiYPD09nRwVgALk3ZKRd1Eb1eS8+/zzz2v58uWSpA0bNqhFixZOjgio+cjTJSNPozaqyXkaFw8K5nCq8PBwPfXUU4qKiiqyLjExUYmJidq/f7+WLVsmPz8//fbbb06I8sI7ePCgpk+frl27dhVZl5OTo3Pnzun48eP67rvv9M9//lMPP/ywJk6cKLPZdQeNKCjC3nrrrXrrrbecHY7L+uSTT3T77bfLYrE4OxTApZBPi0c+LT/yac2xYsUK279TU1O1fv16jRw50nkBAbAh7xaPvFt+5N2ag7wLXDzI08UjT5cfebrmIE/jQqBgDqc5fvy47rvvPqWlpUnKT0DXXHONWrduLQ8PDyUmJurgwYMKCwsrdsiUi9XPP/+sJ554Qunp6ZKk9u3b67rrrlO3bt3UqFEjZWRk6MyZM9q8ebM2btyo5ORkvf3227r99tvl4+Pj5Oirz1tvvVVlH0wOHTpUJftBUWfPnlVoaKjGjx/v7FAAl0E+LR75tHjk09ph586dOnXqlCSpbt26Sk9P14oVK/hBAKgByLvFI+8Wj7xbOzg7706aNEmTJk26IMcCLnbk6eKRp4tHnq4dnJ2n4ToomMNp/v3vf9s+vMyYMUOjRo0q0mbAgAG67777lJCQoO+///5Ch3jBHTlyRI8//rgyMjLk7u6uF198UaNHjy5yJ1/fvn11yy23KCEhQR9++KEWLVrkpIiBvzVs2FCJiYmaM2eO7rzzTnl5eTk7JMAlkE+LIp+itlu5cqUkyc/PTxMmTNB7772n3377TXFxcfLz83NydIBrI+8WRd5FbUfeBS4e5OmiyNOo7cjTuFBcdzwNOFVeXp5+/vlnSVLXrl2L/fBSWKNGjTR27NgLEZrTGIahZ555RhkZGZKkN998U2PHji112JtGjRpp6tSpmjlzptzduf8FznX//fdLkmJjY/Xll186ORrANZBPiyKforbLysrSDz/8IEm67rrrdPPNN8tsNisvL0/ffvutk6MDXBt5tyjyLmo78i5w8SBPF0WeRm1HnsaFxDsenCIhIUGZmZmSpEsuuaRS+8rOztavv/6qX3/9VXv27NHJkyeVnp4ub29vBQUF6corr9TYsWPVqFGjSscdGxurL774Qps3b9bp06eVnp6uxo0bq2fPnrrrrrt0xRVXVHjfmzZt0sGDByVJV111lW6++WaHt73mmmtKXHf69GktWLBAv/32m6KiomS1WtW0aVNdfvnlGjt2rDp06FDitgXrHnvsMU2aNEl79+7V559/rp07dyohIUENGzbU5Zdfrocfflht27YtNcbMzEx9/vnnWrNmjU6ePClPT0+1bt1ao0aN0u23317mY3z++ee1fPlyNW/eXBs3brQtv/vuu7V9+3bb38uXL9fy5cvttu3Xr58WLlxY4uMqjtVq1bfffqvvvvtOERERSk5Olre3t9q3b69rr71Wd9xxR4nzdM+aNUsffvihpPzheLKysrRw4UKtXr1aJ06ckCS1bdtWt9xyi0JCQi6aD5/XXHONVq1apUOHDmnu3LkKCQlR3bp1K7y/Q4cO6YsvvtC2bdv0119/yWw2KzAwUAMGDND48ePVokWLYrc7ffq0hg0bJunvu4l//PFHff311zpw4IASEhLUp08fLVy4sMS2oaGhOnDggDIyMhQUFKQ77rhDISEh8vDwkJT/heO7777TkiVLFBkZqfT0dLVp00Z33nmnQkJCZDKZio0tPT1dmzZt0m+//ab9+/fr9OnTyszMVP369dWuXTtdddVVCgkJUb169Sp83uBayKdFkU9LRz6t+TZs2KCUlBRJ0k033aSmTZvqsssu05YtW7RixQrdc8895drfsWPHNG/ePP3222+KjY2Vr6+v+vTpo3vuuUc9e/Ysdpvi8uNvv/2mhQsXat++fUpOTpa/v78GDRqkRx55RM2aNSs1huzsbH399df64YcfdOTIEaWmpsrX11edO3fWDTfcoBtvvLHEH+3Ov2ZjYmI0f/58bdq0SVFRUUpPT9eCBQt02WWXFWkbGxur//73v9q4caPOnj0rHx8f9e7dW4899pjat29v93g///xzbd68WdHR0fL29lb//v31+OOPKygoqFznGxc38m5R5N3SkXdrvqrIu3l5eQoNDdWKFSt09OhRmUwmBQUF6YYbbtDdd99d5vbnn/uqdurUKa1bt07bt2/X4cOHFRcXJ0lq3LixevTooVGjRunKK68scftly5bphRdekJR/vgIDA/X1119r+fLlOnr0qHJyctSyZUtdf/31mjhxourUqVNqPEePHtXcuXO1ZcsWJSQkqFGjRrr00ks1YcIEde/eveoeOFwOeboo8nTpyNM1X1V/P66ojRs3asWKFdq9e7cSEhJUr149tWrVSsOGDdPYsWMd+i338OHD+uqrr7R9+3adPXtW6enpatCggdq3b68BAwbo5ptvlr+/v902ycnJWr9+vbZs2aKIiAhFR0crJydHvr6+6tixo0aMGKFbb721xOsG5XNxvGpQ6xQUnaT8D8qVMXXq1CIJS5KSkpKUlJSkvXv36osvvtBHH32kPn36VPg4q1at0quvvmqb66XA2bNn9cMPP+iHH37Q7bffrtdee61CCWnZsmW2f0+YMKHCcRa2YsUKvfLKK8rOzrZb/ueff+rPP//U0qVL9fjjj+uhhx4qc1+LFi3Sm2++qdzcXNuymJgYrVq1SuvWrdPcuXPVt2/fYreNjY3VhAkT7J7rjIwMhYeHKzw8XGvXrr1gyc0RSUlJeuSRR/T777/bLU9MTNT27du1fft2LVq0SHPnzlXz5s1L3VdcXJzuv/9+HThwwG75vn37tG/fPv3666/66KOPSr2zs7YwmUyaPHmyHn30USUkJGjhwoUOXVvF+eSTT/T+++/LarXaLY+MjFRkZKQWL16s119/Xbfcckup+zEMQ1OmTLEN3VOWadOmafHixXbLDh06pDfeeEPbt2/X+++/r7y8PD3zzDNau3atXbuIiAhNmzZNERERev3114vd/0MPPWT3gbtAYmKiduzYoR07dujLL7/UnDlzyvxSAEjk0+KQT8mntV1BzmrdurXtR+ObbrpJW7Zs0cGDB3Xo0KFSf4Aq7Py5CqX86+iHH37Qjz/+qOeee04TJ04scz/vvfee5syZY7fszJkzCg0N1Y8//qg
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 2000x1000 with 6 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plot = sns.lmplot(\n",
" x=\"Gr Liv Area\", y=\"SalePrice\", col=\"Sale Condition\", hue=\"Sale Condition\",\n",
" data=df, robust=True, col_wrap=4, ci=None, truncate=True, scatter_kws={\"s\": 15},\n",
")\n",
"# Adjust font sizes.\n",
"for ax in plot.axes:\n",
" ax.set_title(ax.get_title(), fontsize=20)\n",
" ax.set_xlabel(ax.get_xlabel(), fontsize=16)\n",
" ax.set_ylabel(ax.get_ylabel(), fontsize=16)"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 81,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"df[\"partial_sale\"] = df[\"Sale Condition\"].apply(lambda x: 1 if x == \"Partial\" else 0)\n",
"df[\"abnormal_sale\"] = df[\"Sale Condition\"].apply(lambda x: 1 if x == \"Abnorml\" else 0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Homes that are sold for the first time cleare are priced higher. A factor variable *new_home* is introduced."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 82,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGwCAYAAACAZ5AeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hURRfA4d/uZje990YqKRBCS6GEIqDYUIqoSLc3QBQRKyB8gl3BEmw0EQRBVARUEJQSEloIHdJII7237d8fgYU1lCSkUOZ9Hh/ZuXPvPRsle3buzBmJXq/XIwiCIAiCIFwTaVsHIAiCIAiCcDMQSZUgCIIgCEIzEEmVIAiCIAhCMxBJlSAIgiAIQjMQSZUgCIIgCEIzEEmVIAiCIAhCMxBJlSAIgiAIQjMwaesAbiU6nY78/HwsLS2RSCRtHY4gCIIgCA2g1+upqqrCxcUFqfTy41EiqWpF+fn59OvXr63DEARBEAShCf755x/c3Nwue1wkVa3I0tISqPuPYmVl1cbRCIIgCILQEJWVlfTr18/wOX45IqlqRecf+VlZWYmkShAEQRBuMFebuiMmqguCIAiCIDQDkVQJgiAIgiA0A5FUCYIgCIIgNAMxp0oQBEEQrjNarRa1Wt3WYdwy5HI5Mpnsmq8jkipBEARBuE7o9Xpyc3MpLS1t61BuOXZ2dri5uV1THUmRVAmCIAjCdeJ8QuXi4oKFhYUoFN0K9Ho91dXV5OfnA+Du7t7ka4mkShAEQRCuA1qt1pBQOTo6tnU4txRzc3Ogrki3i4tLkx8FionqgiAIgnAdOD+HysLCoo0juTWd/7lfy1w2kVQJgiAIwnVEPPJrG83xcxdJlSAIgiAIQjMQSZUgCIIgCEIzEEmVIAiCcEvT6vTsSy9mWVw6vyflkF9e29Yhtal169YRERHR1mHckMTqP0EQBOGWtie1iLHfxqPT170e2sWTd4aHYaG48T4ii4uL+fTTT/nnn38oLCzE1taWkJAQnn32Wbp3794qMQwYMIDs7OzLHh82bBjz589vlVha2433f4wgCIIgNKM1+7MMCRXA+sRsHu/jR5inbdsF1USTJk1CrVYzf/58vL29KSoqIi4urlWLif70009otVoADh48yKRJk9i8eTNWVlYAmJmZtVosrU08/hMEQRBuadamxjWJpBIwkd14K/DKy8vZt28f06ZNo0ePHnh6ehIeHs5TTz3FwIEDDf0WL17MkCFD6NKlC/369WPWrFlUVVVd8dpbtmxh2LBhdOrUiYEDB/LZZ5+h0Wgu2dfBwQFnZ2ecnZ2xta1LTB0dHXFycuKRRx5h9erVRv2PHz9OcHAwZ86cASA4OJgffviBxx9/nPDwcAYOHMjmzZuNzjl79ixTpkwhIiKCqKgonnnmGbKyshr9M2tuIqkSBEEQbmnDu3lhZXrhwc3ztwXi72TVoHNLq1VkFlej1upaKrwGs7CwwMLCgi1btqBSqS7bTyKR8Prrr7Nhwwbmz5/Pnj17eP/99y/bf9++fbzyyiuMGzeOjRs38vbbb7Nu3TpiY2MbFZ9EImHEiBGsW7fOqH3t2rVERkbi4+NjaPv0008ZPHgwv/zyC0OGDOHFF18kJSUFqKsj9dhjj2FpacmKFStYuXIlFhYWPP7441d8361BJFWCIAjCLa1rO3vWP9eLL0Z3ZdWTPXiybwAKk6t/PManFvHAl3H0e38br/98mMzi6laI9vJMTEyYP38+69evJyIigocffpiPPvqIEydOGPWbMGECPXr0wMvLi549e/LCCy+wadOmy173s88+48knn2TYsGF4e3vTu3dvpkyZwqpVqxod47Bhw0hLSyMpKQmoS5A2bNjAiBEjjPrdeeedjBw5Ej8/P1544QXCwsJYvnw5ABs3bkSn0/G///2P4OBgAgICmDdvHmfPniUhIaHRMTUnMadKEARBuOUFulgT6GLd4P5ny2p4dsUBiqrqRkZW78vCz8mKZ/oHtFSIDTJ48GD69+/Pvn37SExMZMeOHXzzzTfMnTuX4cOHA7B7924WLVpEamoqlZWVaLValEolNTU1hu1aLnbixAkOHDhgNDJ1tXMux9XVlX79+vHTTz8RHh7Otm3bUKlU3HnnnUb9unbtavS6S5cuHD9+3BBPRkYG3bp1M+qjVCrJyMhocCwtQSRVgiAIgtBI+eVKQ0J13r+n8ts8qQIwNTWld+/e9O7dm+eee47XX3+dhQsXMnz4cLKysnjqqacYNWoUU6dOxdbWlv379/P666+jVqsvmSBVV1czadIk7rjjjkveq7FGjhzJ9OnTee2111i3bh133313oxKz6upqOnbsyAcffFDvmIODQ6PjaU4iqRIEQRCERvK0M8PL3pyskhpD2+CO7m0Y0eUFBgayZcsWAI4ePYper2fGjBlIpXWPOK/06A+gQ4cOpKWlGc15uhb9+vXD3NyclStXsmPHDr7//vt6fRITExk6dKjh9aFDhwgNDQWgY8eObNq0CUdHR8OKwuuFmFMlCIIgCI3kZG3GF6O7MSDYBVcbU168PYi7Orm1aUwlJSWMGzeOX375hRMnTpCZmcmmTZv45ptvDKv/fHx8UKvVLF++nMzMTNavX3/VuVHPPfccv/zyC5999hmnT58mJSWF33//nY8//rhJccpkMoYPH86HH36Ij49PvUd9AJs3b+ann34iLS2NBQsWkJSUxJgxYwAYMmQI9vb2PPPMM+zbt4/MzEzi4+OZO3cuubm5TYqpuYiRKkEQBEFognAvO74c240qpQYHy8Y/BmtulpaWdO7cmaVLl5KRkYFGo8HNzY2RI0fy9NNPAxASEsKrr77K119/zUcffURERAQvvvgir7zyymWv26dPH2JjY/n888/5+uuvMTExwd/fn5EjRzY51gceeIDY2FjDPK//mjRpEhs3bmT27Nk4Ozvz4YcfEhgYCIC5uTnff/89H3zwAc8//zxVVVW4urrSs2fPNh+5kuj1ev3VuwnNobKyku7du7N///42/w8vCIIgXF9qa2tJS0vDz8/vpi6QCXVlGiZMmMD27dtxcnIyOhYcHMznn3/OoEGDWjWmK/38G/r5LUaqBEEQBEFoFSqViuLiYhYuXMjgwYPrJVQ3OjGnShAEQRCEVrFhwwZuu+02KioqmD59eluH0+zESJUgCIIgCK1i+PDhl51Hdd7JkydbKZrmJ0aqBEEQBEEQmoFIqgRBEARBEJqBSKoEQRAEQRCagUiqBEEQBEEQmoFIqgRBEARBEJqBSKoEQRAEQRCagUiqBEEQBEFospUrV9K1a1c0Go2hraqqio4dOzJ27FijvvHx8QQHB5ORkcGAAQMIDg4mODiY8PBwBgwYwJQpU4iLi2vtt9BsRFIlCIIgCEKTRUdHU11dzZEjRwxt+/btw8nJiUOHDqFUKg3t8fHxeHh40K5dOwAmT57Mzp072bx5M++++y42NjZMnDiRL7/8stXfR3MQxT8FQRAE4SajVGspqVZjbyHHVC5r0Xv5+/vj7OxMQkICXbp0ASAhIYGBAweyZ88eEhMTiY6ONrSf/zPUbQLt7OwMgIeHB5GRkTg7O7NgwQIGDx6Mv79/i8be3Np0pOriob+L/5k9ezYASqWS2bNnEx0dTdeuXZk0aRKFhYVG18jJyeHJJ5+kc+fO9OzZk3fffddoCBLqMuNhw4YRFhbG7bffzrp16+rFsmLFCgYMGECnTp0YOXIkSUlJRscbEosgCIIgtLVTeRW8sDqRAR9u54XViZzKq2jxe0ZHRxMfH294HR8fT1RUFJGRkYb22tpaDh06ZJRUXcq4cePQ6/Vs3bq1RWNuCW2aVP3000/s3LnT8M/ixYsBuPPOOwF455132LZtG5988gnLly8nPz+f559/3nC+VqvlqaeeQq1Ws2rVKubPn8/PP//MggULDH0yMzN56qmniI6O5pdffmH8+PG88cYb7Nixw9Bn48aNzJs3j+eee46ff/6ZkJAQHnvsMYqKigx9rhaLIAiCILQ1pVrLx1tOselwLtUqLZsO5/LJllMo1doWvW+PHj04cOAAGo2GyspKjh8/bkiqEhISADh48CA
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 640x480 with 1 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Sale Type\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 83,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"df[\"new_home\"] = df[\"Sale Type\"].apply(lambda x: 1 if x == \"New\" else 0)"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 84,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"new_variables.extend([\"partial_sale\", \"abnormal_sale\", \"new_home\"])"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 85,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"Sale Condition\"]\n",
"del df[\"Sale Type\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Show summary of counts:"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 86,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"partial_sale 233\n",
"abnormal_sale 189\n",
"new_home 227\n",
"dtype: int64"
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 86,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[[\"partial_sale\", \"abnormal_sale\", \"new_home\"]].sum()"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 87,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>partial_sale</th>\n",
" <th>abnormal_sale</th>\n",
" <th>new_home</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" partial_sale abnormal_sale new_home\n",
"Order PID \n",
"1 526301100 0 0 0\n",
"2 526350040 0 0 0\n",
"3 526351010 0 0 0\n",
"4 526353030 0 0 0\n",
"5 527105010 0 0 0"
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 87,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[[\"partial_sale\", \"abnormal_sale\", \"new_home\"]].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Street Name\n",
"\n",
"Looking at the value counts this variable is pretty useless."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 88,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Street\n",
2021-05-25 08:22:14 +02:00
"Pave 2886\n",
"Grvl 12\n",
"Name: count, dtype: int64"
2021-05-25 08:22:14 +02:00
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 88,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"Street\"].value_counts()"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 89,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"Street\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Age & Remodeling\n",
"\n",
"The dataset was put together over several years. Therefore, the variables with year numbers need to be aligned to indicate the right ages."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 90,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"# For one house the year of being remodeled is one year\n",
"# before it was built. That input error is corrected.\n",
"input_error = (df[\"Year Remod/Add\"] < df[\"Year Built\"])\n",
"assert input_error.sum() == 1\n",
"df.loc[input_error, \"Year Remod/Add\"] = df.loc[input_error, \"Year Built\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Introduce a factor variable *remodeled*. Almost half the houses were remodeled at some point in time."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 91,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"46"
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 91,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"remodeled = (df[\"Year Remod/Add\"] > df[\"Year Built\"])\n",
"df[\"remodeled\"] = 0\n",
"df.loc[remodeled, \"remodeled\"] = 1\n",
"round(100 * remodeled.sum() / df.shape[0])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create discrete variables *years_since_built* and *years_since_remodeled*."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 92,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"df[\"years_since_built\"] = df[\"Yr Sold\"] - df[\"Year Built\"]\n",
"df[\"years_since_remodeled\"] = df[\"Yr Sold\"] - df[\"Year Remod/Add\"]"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 93,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGzCAYAAAAFROyYAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAABQdElEQVR4nO3de1xUdeI//hczDggOYNxUvAcyiNwGKwXH+NiaZWqFtJlLWi0fL5lC5QXXTEANMLXES+qaud5ZP5luqFm6mauJl90vBJiLknlbKmBMucZl5vz+8MdZRwVmdGDOzLyej0ePnHPe8z7v92Hm8OKc93kfB0EQBBARERFJiMzSDSAiIiK6EwMKERERSQ4DChEREUkOAwoRERFJDgMKERERSQ4DChEREUkOAwoRERFJDgMKERERSQ4DChEREUkOAwqZxWeffQaVSoVr165Zuimtmjt3LtRqtVnrXLVqFVQqlcGyJ554AnPnzjXrdoikxJq+97bgXscZY82dOxdPPPGEWdvT1sc4BhSidlJcXIxVq1bxYE5EZAQGFDKL5557Dvn5+ejevbulm2IRr7/+OvLz81ssU1xcjNWrV+M///lPO7WKqG3Z+/ee2hYDio2qq6uDXq9vt+3J5XI4OTnBwcGh3bYpJR06dICTk5Olm0F2jt9709TU1Fi6CdQCBhQzOnnyJFQqFQ4dOnTXuuzsbKhUKuTm5gIAfvjhByQkJOCxxx5DSEgIxo4di7///e8G77lx4waWLFmCMWPGQK1WIyIiAv/7v/+Lf//73wblTp06BZVKhf379+PDDz/E0KFDERYWhqqqKjQ0NGD16tUYMWIEQkJCMGjQIIwfPx7ffvutSX3bunUrRo0ahbCwMDz66KMYO3YssrOzxfX3uhb9xBNPYMqUKfjnP/+JF154ASEhIfjd736HvXv33lV/RUUF0tLS8MQTTyA4OBiPP/445syZg+vXr4tl6uvrsXLlSjz55JMIDg5GdHQ03n//fdTX15vUlyZXr15FfHw8wsPDodFosHr1atz+cO+m/Xrq1CmD9127dg0qlQqfffaZuKy1a8OfffYZEhMTAQATJ06ESqW6Z91kffi9t47vfdPYsytXrmDSpElQq9WYNWsWAECv1+Mvf/kLRo0ahZCQEERFRWHBggW4efOmQR1NfTt16hTGjh2L0NBQjBkzRvwef/XVVxgzZoz4s/3+++/vakdOTg7+8Ic/IDw8HI888ghef/11/PDDD3eV++c//4nY2FiEhIRg+PDhyMrKarZvf/vb38T2PPbYY3jrrbfw008/tbpPjO23IAj46KOP8PjjjyMsLAwTJkzAhQsXWq3/QXVo8y3YkUGDBqFbt27Izs7Gk08+abAuOzsbvXr1glqtxoULFzB+/Hh06dIFkyZNgouLC7744gu88cYbWLVqlfjeq1ev4vDhw3j66afRo0cPlJeX469//Stefvll7N+/H126dDHYxkcffQSFQoH4+HjU19dDoVBg9erVWL9+PX7/+98jNDQUVVVVKCwsxNmzZzFkyBCj+rVr1y4sXrwYTz31FCZOnIi6ujoUFRXhu+++w5gxY1p87+XLl5GYmIgXXngBMTEx2L17N+bOnYsBAwagX79+AIDq6mrExcXhhx9+QGxsLIKCgvDrr7/i66+/xi+//AIPDw/o9Xq8/vrr+Ne//oUXX3wRfn5+OH/+PDZv3oxLly7ho48+MvbHBADQ6XT43//9X4SFhWH27Nk4duwYVq1aBZ1OJwYJc3r00UcxYcIEbN26FVOnTsXDDz8MAPDz8zP7tqh98Xt/N6l+7xsbGxEfH4+BAwciKSkJHTt2BAAsWLAAe/bswdixYzFhwgRcu3YN27dvx/fff4+dO3dCoVAY9G3mzJl46aWX8Oyzz+KTTz7B1KlTkZqaig8//BDjx48HAPz5z3/Gm2++iYMHD0Imu3Uu4MSJE5g0aRJ69OiB6dOn47fffsO2bdswfvx4fPbZZ+jRowcAoKioCPHx8fDw8MCMGTPQ2NiIVatWwdPT864+rV27FpmZmRg5ciReeOEFXL9+Hdu2bUNcXBz27t0LNze3ZveHsf3OzMzE2rVrER0djejoaJw9exZ//OMf0dDQYNL+N5lAZrV8+XIhODhYqKioEJdptVohKChIWLlypSAIgvDKK68Io0ePFurq6sQyer1eGDdunDBixAhxWV1dnaDT6Qzqv3r1qhAcHCysXr1aXHby5EkhICBA+N3vfifU1tYalH/22WeFyZMnP1CfXn/9dWHUqFEtltm9e7cQEBAgXL16VVw2bNgwISAgQDhz5oy4TKvVCsHBwUJGRoa4LDMzUwgICBC++uqru+rV6/WCIAjC3r17hcDAQIO6BEEQdu7cKQQEBAj/+te/jO5PUlKSEBAQICxatMhgO5MnTxYGDBggaLVaQRD+u19Pnjxp8P6rV68KAQEBwu7du8VlK1euFAICAgzKDRs2TEhKShJff/HFF/esj6wfv/fW871ftmyZwfIzZ84IAQEBwueff26w/B//+Mddy5v69v/+3/8Tlx07dkwICAgQQkNDhf/85z/i8qysrLu+788995wQGRkp/Prrr+Kyc+fOCYGBgcKcOXPEZdOmTRNCQkIM6isuLhb69+9vcJy5du2a0L9/f2Ht2rUGbS8qKhKCgoIMliclJQnDhg0zud9arVYYMGCAMHnyZPHnIgiC8MEHHwgBAQEGxzhz4yUeM3vuuedQX1+PgwcPissOHDiAxsZGPPvss7hx4wZOnjyJkSNHoqqqCtevX8f169fx66+/QqPR4NKlS/jll18AAI6OjmLy1ul0+PXXX+Hi4oK+ffve89Th888/L/5F0MTNzQ0XLlzApUuX7rtPbm5u+Pnnn1sdBHov/v7+eOSRR8TXHh4e6Nu3L65evSou++qrrxAYGHjXX58AxGvbBw8ehJ+fHx5++GFxn12/fh2DBw8GgPu6VBIXF2ewnbi4ODQ0NCAnJ8fkusi+8XtvSMrf+6YzHE0OHjwIV1dXDBkyxGAbAwYMgIuLy13b8Pf3N5imICwsDAAwePBg+Pr63rW8qc+lpaU4d+4cYmJi0LlzZ7FcYGAgoqKicPToUQC3fubHjx/H8OHDDerz8/ODRqMxaMuhQ4eg1+sxcuRIg7Z7eXmhd+/eLe4fY/t94sQJNDQ04OWXXzYYa/TKK680W7e58BKPmfn5+SEkJATZ2dn4/e9/D+DWad7w8HD07t0b+fn5EAQBmZmZyMzMvGcdWq0WXbp0gV6vx5YtW7Bjxw5cu3YNOp1OLHP7B7xJ0+nB2yUkJGDatGl46qmnEBAQAI1Gg+eeew6BgYFG92nSpEk4ceIEfv/736N3794YMmQIRo8ejYEDB7b63m7dut21zN3d3eAa55UrVzBixIgW67l8+TJ++OEHREZG3nO9VqtttS23k8lk6Nmzp8Gyvn37AgDvsiGT8XtvSKrf+w4dOqBr1653baOystLobdzZN1dXVwC4q16lUgng1jgbACgpKQHw3+PM7fz8/HD8+HHU1NSguroav/32G3r37n1Xub59+4pBBgAuXboEQRCa3Y8dOjT/K97Yfje1u0+fPgbrPTw84O7u3mz95sCA0gaef/55vPfee/j5559RX1+PvLw8LFiwAADEEfZ//OMfMXTo0Hu+v1evXgCAdevWITMzE7GxsUhMTIS7uztkMhnS0tIMBnM2ufOvKODW2IdDhw7h73//O7799lt8+umn2Lx5M1JTU8UDaWv8/Pxw8OBBfPPNNzh27Bi++uor7NixA2+88QYSEhJafK9cLjdqG63R6/UICAjAn/70p3uuv/PgYA7N3ZnQnndJkPXg9/6/pPq9v/3s1O3b8PT0xLJly+75Hg8PD4PXzfWtueX3+pmZi16vh4ODAzZs2HDP7bu4uLT4XlP6bQkMKG3gmWeeQUZGBvbt24fffvsNCoUCI0eOBADxr3aFQoGoqKgW6/nyyy8xaNAgpKWlGSyvqKjAQw89ZHR7OnfujNjYWMTGxqK6uhovv/wyVq1aZfSBCrj1QX/mmWfwzDPPoL6+HjNmzMC6deswZcqUB769tle
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 640x480 with 2 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df[[\"years_since_built\", \"years_since_remodeled\"]].hist(bins=20);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Two factor variables *recently_built* and *recently_remodeled* are created indicating that the corresponding action took place in the last 10 years. The two scatter plots below suggest that these groups of \"recent vs. old\" affect the price."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 94,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"df[\"recently_built\"] = df[\"years_since_built\"].apply(lambda x: 1 if x <= 10 else 0)\n",
"df[\"recently_remodeled\"] = df[\"years_since_remodeled\"].apply(lambda x: 1 if x <= 10 else 0)"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 95,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGwCAYAAACAZ5AeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3RUVdfA4d/MZNJ77wkkhAAJvRNAEBBRVEAUFQuiYkF8VewNFQUV348XGygqoCiiFEXBgiKC9BJ6D4GEhFTS+8x8fxwyk0kCJCEQyn7WmmXunXPvPZMsyc45++yjMZlMJoQQQgghxHnRNnUHhBBCCCGuBBJUCSGEEEI0AgmqhBBCCCEagQRVQgghhBCNQIIqIYQQQohGIEGVEEIIIUQjkKBKCCGEEKIR2DR1B64mRqOR9PR0nJyc0Gg0Td0dIYQQQtSByWSisLAQX19ftNozj0dJUHURpaen07dv36buhhBCCCEaYPXq1fj7+5/xfQmqLiInJydA/VCcnZ2buDdCCCGEqIuCggL69u1r/j1+JhJUXUSVU37Ozs4SVAkhhBCXmXOl7kiiuhBCCCFEI5CgSgghhBCiEUhQJYQQQgjRCCSnSgghhLhIDAYD5eXlTd0NUY1er0en0533fSSoEkIIIS4wk8nEyZMnycnJaequiDNwd3fH39//vOpISlAlhBBCXGCVAZWvry+Ojo5SAPoSYjKZKCoqIj09HYCAgIAG30uCKiGEEOICMhgM5oDKy8urqbsjauHg4ACoIt2+vr4NngqURHUhhBDiAqrMoXJ0dGzinoizqfz5nE/OmwRVQgghxEUgU36Xtsb4+UhQJYQQQgjRCCSoEkIIIYRoBBJUCSGEuKoZjCa2JGYzb30iv+xMIT2vpKm7dMlavHgxnTt3bpR7bdy4kZYtW5KXl3de93n++ed59NFHzcd33303b7311vl2r0Fk9Z8QQoir2oaELO7+fCNGkzq+pX0Qbw+PwdH26v4V2b9/f+655x7uu+++pu7KWb300kuYTKYzvn8xP4eMVAkhhLiqfb812RxQASyNP0FCRmGT9aesrKzJnn05cnFxwdXVtam7AUhQJYQQ4irnYmddk0irARvdxVupd/fdd/PGG2/w1ltv0a1bN8aOHcvBgwd54IEH6NChAz179uSZZ54hOzvbfI3RaOSzzz5j4MCBxMTEcM011/DJJ5+Y309NTeWJJ56gc+fOdO3alUceeYTk5GTz+5VTZp9//jlxcXF069aN119/3VxO4O677+bEiRNMmTKFli1b0rJlyxr9Tk5OJjo6ml27dlmdnzNnDv369cNoNNbp82/bto2hQ4cSGxvLbbfdxsGDB83vffDBB9x888017t+/f/8an6U2dfkcjUmCKiGEEFe14R2DcbazTPWN7xdJc2/nOl2bU1RGUnYR5Ya6BRBnsmTJEvR6Pd9++y0TJ07k3nvvpXXr1vzwww/Mnj2brKws/vOf/5jbv//++3z22Wc8+uijLF++nGnTpuHt7Q2oOktjx47FycmJ+fPn8+233+Lo6MgDDzxgNQq2ceNGjh8/zty5c5k6dSpLlixhyZIlgApm/P39mTBhAmvXrmXt2rU1+hwcHEzPnj1ZvHix1fnFixczbNgwtNq6hRjvvvsuzz//PD/88AOenp48/PDDjbY/Yl0+R2O6uieMhRBCXPU6hHqw9LGeHEzLx9PJjphAN2xtzh0QbEzI4qUlu0nILODWTsE83r8FIZ4NK/AZHh7Os88+C8DHH39M69ateeqpp8zvv/322/Tt25ejR4/i4+PDvHnzePXVVxk2bBgAoaGh5gTy5cuXYzQaeeutt8y1l6ZMmUKXLl3YtGkTcXFxALi5ufHqq6+i0+mIiIigb9++rF+/nttuuw13d3d0Oh1OTk74+Picsd+33norkyZN4oUXXsDW1pY9e/Zw8OBBPv744zp/9vHjx9OrVy8Apk6dSt++ffnjjz8YMmRIPb6Dtavr52gsElQJIYS46kX6uhDp61Ln9qm5xTw6fxtZhWrkZ+GWZJp5O/PINRENen6bNm3MX+/fv5+NGzfSoUOHGu2OHz9Ofn4+ZWVldO/evdZ77d+/n+PHj9OxY0er86WlpRw/ftx8HBkZabUdi4+Pj9XUW10MGDCAN954gz/++IMbbriBJUuW0K1bN4KDg+t8j/bt25u/dnd3p1mzZiQkJNSrH5cKCaqEEEKIekrPKzUHVJX+OZje4KCqcu85gKKiIvr168fEiRNrtPPx8SEpKems9yoqKqJNmzZMmzatxnuenp7mr21srEMAjUZz1lV0tbG1teWWW25h8eLFDBw4kGXLlvHSSy/V6x5nU1ufKioqGu3+jU2CKiGEEKKegtztCfZwIPlUsfncdW0CGuXebdq04bfffiMoKKhG4ANqqtDe3p4NGzYQEhJS6/UrVqzAy8sLZ+e65YbVRq/X1ynZfOTIkdx444188803GAwGBg0aVK/nxMfHExgYCEBubi6JiYk0b94cUEFgZmYmJpPJPJW5b9++C/I5GoMkqgshhBD15O1iz8d3daR/S1/8XO14amAU18f6N8q977zzTnJzc3nqqafYuXMnx48fZ82aNbzwwgsYDAbs7Ox48MEHee+991i6dCnHjx8nPj6e77//HoChQ4fi4eHBI488wpYtW0hKSmLjxo1MnjyZkydP1rkfQUFBbN68mbS0NKuVh9VFRETQrl07pk2bxg033IC9vX29Pu/HH3/M+vXrOXjwIM8//zweHh4MGDAAgG7dupGdnc1nn33G8ePHmT9/PmvWrKnX/ev6ORqDjFQJIYQQDdA22J1P7u5IYWkFnk52jXZfPz8/vv32W6ZNm8bYsWMpKysjMDCQ3r17m1fUPfroo+h0OmbMmEF6ejo+Pj6MGjUKUFOJX3/9NdOmTWP8+PEUFhbi5+dHjx496jVyNWHCBF599VUGDBhAWVkZBw4cOGPbW2+9le3btzNixIh6f96nn36at956i8TERFq1asUnn3yCra0toAK21157jVmzZvHJJ58waNAg7r//fhYuXHhBPsf50pjqO4EqGqygoIBOnTqxdevW8xqSFUIIcfkoKSnh6NGjNGvWrN6jOJeLjz76iF9//ZVly5Y1dVca7Gw/p7r+/pbpPyGEEEI0SGFhIQcPHmT+/PncfffdTd2dJifTf0IIIYRokDfffJOff/6ZAQMG1Jj6e/XVV884cjV06FDeeOONi9HFi0qCKiGEEEI0yNSpU5k6dWqt7z3xxBOMHTu21veu1BQYCaqEEEII0ei8vLzw8vJq6m5cVJJTJYQQQgjRCCSoEkIIIYRoBBJUCSGEEEI0AgmqhBBCCCEagQRVQgghhBCNQIIqIYQQQohGIEGVEEIIIZrM/Pnz6d+/P7GxsYwcOZKdO3c2dZcaTIIqIYQQQgBQWm7gZG4JpeWGi/K85cuXM2XKFB577DGWLFlCdHQ0Y8eOJSsr66I8v7E1aVDVv39/WrZsWeP1+uuvA1BaWsrrr79Ot27d6NChA48//jiZmZlW90hJSeGhhx6iXbt29OjRg3feeYeKigqrNhs3bmTYsGHExMQwcOBAFi9eXKMv54qU69IXIYQQ4nJ1MC2f/yyMp//7f/OfhfEcTMu/4M/88ssvue222xgxYgSRkZG8/vrr2Nvbs2jRogv+7AuhSYOqH374gbVr15pfX375JQCDBw8G4O2332bVqlVMnz6dr776ivT0dMaPH2++3mAwMG7cOMrLy1mwYAFTp05lyZIlzJgxw9wmKSmJcePG0a1bN3788UfuvfdeXn75ZdasWWNuU5dI+Vx9EUIIIS5XpeUG/m/lQVbsOklRmYEVu04yfeXBCzpiVVZWxp49e+jZs6f5nFarpWfPnmzfvv2CPfdCatKgytPTEx8fH/Nr1apVhIaG0rVrV/Lz81m0aBHPP/88PXr0ICYmhrfffpvt27cTHx8PwNq1azl8+DDvvfcerVq1om/fvjzxxBPMnz+fsrIyABYsWEBwcDDPP/88ERERjB49muuuu445c+aY+3GuSLkufRFCCCEuV6eKyll9IMPq3N8HMjhVVH7hnnn
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 640x480 with 1 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"recently_built\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 96,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGwCAYAAACAZ5AeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeViUVfvA8e8MDPu+gyAqCKiAuAvhmktZmkuWlpVmZYvZptmemaWVva+v1S8tKy0ts1zK1BbLXHLfV1RUBATZ932Z3x9HZhhBRSRxuT/XNVc8z5znec6gyc0597mPRq/X6xFCCCGEEFdE29gdEEIIIYS4EUhQJYQQQgjRACSoEkIIIYRoABJUCSGEEEI0AAmqhBBCCCEagARVQgghhBANQIIqIYQQQogGYN7YHbiZVFZWkpqaiq2tLRqNprG7I4QQQog60Ov1FBQU4OHhgVZ74fEoCaquotTUVHr06NHY3RBCCCFEPaxfvx4vL68Lvi9B1VVka2sLqD8UOzu7Ru6NEEIIIeoiPz+fHj16GH6OX4gEVVdR1ZSfnZ2dBFVCCCHEdeZSqTuSqC6EEEII0QAkqBJCCCGEaAASVAkhhBBCNADJqboGVVRUUFZW1tjdENcxnU6HmZlZY3dDCCFuKhJUXUP0ej1nz54lOzu7sbsibgBOTk54eXlJTTQhhLhKJKi6hlQFVB4eHtjY2MgPQ1Ever2ewsJCUlNTAfD29m7kHgkhxM1BgqprREVFhSGgcnV1bezuiOuctbU1oArOenh4yFSgEEJcBZKofo2oyqGysbFp5J6IG0XV3yXJzxNCiKtDgqprjEz5iYYif5eEEOLqkqBKCCGEEKIBSFAlhBBCCNEAJKgSDW7ZsmV07Nixsbtx1Wzbto3g4GByc3PrfE3v3r2ZP3/+FT33o48+4q677rqiewghoKJSz864TL7eEseq/Umk5hY3dpfEdUpW/4kr0rt3bx588EFGjx7d2F0RQoh62Xoygwe+2EalXh0PjmjCu0NDsbGQH5Hi8shI1XWqtLS0sbvQKPR6PeXl5Y3dDSHEDeSHXYmGgApgxd4znEwraLwOieuWBFXXiQceeICpU6fyzjvv0KVLF8aOHcuxY8d45JFHaNeuHVFRUUyaNInMzEzDNZWVlXz++ef07duX0NBQevbsyaeffmp4Pzk5mWeeeYaOHTvSuXNnnnjiCRITEw3vv/TSSzz55JN88cUXREdH06VLF9566y3DEv0HHniAM2fOMH36dIKDgwkODq7R78TEREJCQjhw4IDJ+fnz59OrVy8qKysv+rmrptbWr1/P0KFDCQsLY9euXVRWVjJ37lx69+5NeHg4gwYN4tdff61x3caNGxk8eDDh4eE8+OCDZGRksH79em6//Xbat2/PCy+8QFFRkeG60tJSpk2bRmRkJGFhYYwcOZL9+/eb9Gn9+vX079+f8PBww/fgfDt37uS+++4jPDycHj16MG3aNAoLCy/4OXNzc3n11Vfp2rUr7du358EHHyQmJsakzWeffUZUVBTt2rXjlVdeoaSk5KLfOyFE3dhbmtZx02rA3ExWz4rLJ0HVdWT58uXodDq+++47Jk6cyEMPPUTr1q358ccfmTdvHhkZGTz77LOG9h9++CGff/45Tz75JKtXr2bmzJm4ubkBqnbR2LFjsbW1ZdGiRXz33XfY2NjwyCOPmIyCbdu2jfj4eBYsWMCMGTNYvnw5y5cvB1ROj5eXFxMmTGDTpk1s2rSpRp99fX2Jiopi2bJlJueXLVvGkCFD0Grr9lfwww8/5IUXXmD16tUEBwczd+5cVqxYwVtvvcWqVasYPXo0kyZNYvv27SbXffzxx7z++ussXryYs2fP8uyzz/L111/z4Ycf8tlnn7Fp0ya++eYbQ/v333+f3377zfBZ/f39eeSRRwxbByUnJzN+/Hh69erFihUrGD58OB9++KHJM+Pj43n00Ufp168fP//8M//973/ZtWsXb7/99gU/3zPPPENGRgaff/45y5Yto02bNjz00EOG565evZqPPvqI5557jqVLl+Lu7s63335bp++dEOLihrb3xc7SONU3vlcgLdzs6nRtdmEpCZmFlFVc/BdEcZPQi6smLy9PHxQUpM/Ly6vxXlFRkf7w4cP6oqKiWq8dNWqUfvDgwYbjTz75RP/www+btElOTtYHBQXpT548qc/Ly9OHhobqlyxZUuv9VqxYoe/fv7++srLScK6kpEQfHh6u37hxo16v1+snT56s79Wrl768vNzQZsKECfpnn33WcNyrVy/9V199ZXLvpUuX6jt06GA4XrVqlb5Tp076kpISvV6v1x88eFAfHBysT0hIqLVv1W3dulUfFBSk/+OPP0z62bZtW/3u3btN2r7yyiv6559/3uS6zZs3G96fO3euPigoSB8fH2849/rrrxu+jwUFBfo2bdrof/75Z8P7paWl+ujoaP3nn3+u1+v1+g8//FA/YMAAk+d+8MEH+qCgIH1OTo6hH6+//rpJmx07duhDQkL0xcXFer3e9Pu2Y8cOffv27Q3fnyp9+vTRL168WK/X6/X33nuvfsqUKSbvDx8+XD9o0KALfu8u9XdKCGF0PCVXv2r/Gf2WE+n6vKKyOl2z9US6/taZf+ubv/SLftIPe/XxGQX/ci9FY7nYz+/qJAvvOtKmTRvD1zExMWzbto127drVaBcfH09eXh6lpaV07dq11nvFxMQQHx9P+/btTc6XlJQQHx9vOA4MDDTZ4sTd3Z1jx45dVr/79OnD1KlT+eOPP7jjjjtYvnw5Xbp0wdfXt873CAsLM3x9+vRpioqKePjhh03alJWV0apVK5Nz1ackXV1dsba2xs/Pz3DOzc3NMDUZHx9PWVmZyfdEp9MRHh7OiRMnADhx4gTh4eEmz4iIiDA5jomJ4ejRo6xcudJwTq/XU1lZSWJiIgEBASbtjx49SmFhIV26dDE5X1xcbPizOHHiBCNGjKjx3G3btiGEuHKBHvYEetjXuX1yThFPLtpNRoEa2V+yM5HmbnY80TPgEleKG5kEVdeRqv3cAAoLC+nVqxcTJ06s0c7d3Z2EhISL3quwsJA2bdowc+bMGu+5uLgYvjY3N/0rotFo0Ov1519yURYWFgwePJhly5bRt29fVq5cyauvvnpZ9zj/swPMnTsXT0/PGs+qrnr/NRpNrZ/nUnldl6uwsJARI0bwwAMP1Hivts2NCwoKcHd3N5mGrGJvX/d/5IUQV09qbokhoKqy4ViqBFU3OQmqrlNt2rTht99+o0mTJjUCBYBmzZphZWXF1q1bTUZmql+/Zs0aXF1dsbOrW+5AbXQ6XZ2CkuHDh3PnnXfy7bffUlFRQb9+/er9zICAACwsLEhKSqJz5871vs/5mjZtik6nY/fu3TRp0gRQo18HDhzgoYceMjz7r7/+Mrlu3759JsetW7cmNjYWf3//Oj23TZs2pKenY2ZmdsHRu4CAAPbt28fgwYMv+FwhxNXTxMkKX2drErOMC136t6n5S5O4uUii+nXqvvvuIycnh+eff579+/cTHx/Pxo0befnll6moqMDS0pJHH32UDz74gBUrVhAfH8/evXv54YcfABg4cCDOzs488cQT7Ny5k4SEBLZt28a0adM4e/ZsnfvRpEkTduzYQUpKisnKw/MFBATQtm1bZs6cyR133IGVlVW9P7udnR0PP/ww06dPZ/ny5cTHx3Po0CG++eYbQxJ9fdjY2DBy5Ejef/99NmzYQGxsLK+//jrFxcXcfffdAIwYMYK4uDjee+89Tp48ycqVK2s889FHH2XPnj1MnTqVI0eOEBcXx9q1a5k6dWqtz42KiiIiIoKnnnqKTZs2kZiYyO7du/nvf/9rmJp88MEHWbp0KUuXLuXUqVPMnj2b48eP1/uzCiGujJu9Ff93f3t6B3vg6WDJ832DuD3Mq7G7JRqZjFRdpzw9Pfnuu++YOXMmY8eOpbS0FB8fH7p162ZYUffkk09iZmbG7NmzSU1Nxd3d3ZCXY21tzcKFC5k5cybjx4+noKAAT09PIiMjL2vkasKECbzxxhv06dOH0tJSjh49esG2d999N3v27GH
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 640x480 with 1 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"recently_remodeled\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 97,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"Yr Sold\"]\n",
"del df[\"Year Built\"]\n",
"del df[\"Year Remod/Add\"]"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 98,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"age_columns = [\n",
" \"remodeled\", \"years_since_built\", \"years_since_remodeled\",\n",
" \"recently_built\", \"recently_remodeled\",\n",
"]\n",
"new_variables.extend(age_columns)"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 99,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>remodeled</th>\n",
" <th>years_since_built</th>\n",
" <th>years_since_remodeled</th>\n",
" <th>recently_built</th>\n",
" <th>recently_remodeled</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>0</td>\n",
" <td>50</td>\n",
" <td>50</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>0</td>\n",
" <td>49</td>\n",
" <td>49</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>0</td>\n",
" <td>52</td>\n",
" <td>52</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>0</td>\n",
" <td>42</td>\n",
" <td>42</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>1</td>\n",
" <td>13</td>\n",
" <td>12</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" remodeled years_since_built years_since_remodeled \\\n",
"Order PID \n",
"1 526301100 0 50 50 \n",
"2 526350040 0 49 49 \n",
"3 526351010 0 52 52 \n",
"4 526353030 0 42 42 \n",
"5 527105010 1 13 12 \n",
"\n",
" recently_built recently_remodeled \n",
"Order PID \n",
"1 526301100 0 0 \n",
"2 526350040 0 0 \n",
"3 526351010 0 0 \n",
"4 526353030 0 0 \n",
"5 527105010 0 0 "
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 99,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[age_columns].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Outliers\n",
"\n",
"The instructors' notes state:\n",
"\n",
"> **Five observations** that an instructor may wish to remove from the data set before giving it to students (a plot of SALE PRICE versus GR LIV AREA will quickly indicate these\n",
"points). Three of them are true **outliers** (Partial Sales that likely don’t represent actual market values) and two of them are simply unusual sales (very large houses priced\n",
"relatively appropriately). I would **recommend removing any houses with more than\n",
"4000 square feet** from the data set (which eliminates these five unusual observations)\n",
"before assigning it to students.\n",
"\n",
"To apply a more \"rigorous\" approach, outlier detection is conducted with a so-called Isolation Forest."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 100,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"# Use only numeric columns that are strongly correlated with the target.\n",
"# This mitigates the risk that a \"not so good\" chosen factor variable introduced\n",
"# in this notebook causes an observation to be removed as an outlier.\n",
"with open(\"data/correlated_variables.json\", \"r\") as file:\n",
" content = json.loads(file.read())\n",
"strongly_correlated = content[\"strongly_correlated\"]\n",
"df_encoded = encode_ordinals(df[list(set(strongly_correlated) & set(df.columns))])\n",
2024-07-10 01:31:28 +02:00
"iso = IsolationForest(n_estimators=100, bootstrap=True, contamination=0.005, random_state=random_state)\n",
2021-05-25 08:22:14 +02:00
"outliers = pd.DataFrame(\n",
" iso.fit_predict(df_encoded), columns=[\"outlier\"], index=df.index\n",
")\n",
"outliers[\"outlier\"] = outliers[\"outlier\"].apply(lambda x: 1 if x < 0 else 0)\n",
"df = pd.concat([df, outliers], axis=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The five aforementioned outliers are among the ones detected."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 101,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2024-07-10 01:48:08 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAGwCAYAAACAZ5AeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hUdfb48ffUzEwmvffeIIReEgIoFhArYi8oYu+97bo2VrHsflldfyv2hiAKdrEjgvQSeoCQhPReJ2X674+BgSGUgIEgnNfz8MjcuXPvGZCZk085R+F0Op0IIYQQQog/RdnbAQghhBBCnAwkqRJCCCGE6AGSVAkhhBBC9ABJqoQQQggheoAkVUIIIYQQPUCSKiGEEEKIHiBJlRBCCCFED1D3dgCnEofDQU1NDd7e3igUit4ORwghhBDd4HQ6aWtrIzQ0FKXy4ONRklQdRzU1NYwZM6a3wxBCCCHEUVi0aBHh4eEHfV6SquPI29sbcP2lGI3GXo5GCCGEEN1hMpkYM2aM+3v8YCSpOo72TPkZjUZJqoQQQoi/mMMt3ZGF6kIIIYQQPUCSKiGEEEKIHiBJlRBCCCFED5A1VUIIIYQ4LLvdjtVq7e0wjgmNRoNKpfrT15GkSgghhBAH5XQ6qaqqoqmpqbdDOab8/f0JDw//U3UkJakSQgghxEHtSahCQ0MxGAwnXfFqp9NJe3s7NTU1AERERBz1tSSpEkIIIcQB2e12d0IVFBTU2+EcM3q9HnAV6Q4NDT3qqUBZqC6EEEKIA9qzhspgMPRyJMfenvf4Z9aNSVIlhBBCiEM62ab8DqQn3qMkVUIIIYQQPUCSKiGEEEKIHiBJlRBCiFObww4ly2Hlm7D5c2it6u2IxG5paWn8/PPPAJSVlZGWlsbWrVt7OaqDk91/QgghTm3Fi+HDieB0uB73uxzO/z/QevduXKeQV199lZ9//pkvv/zyoOdERESwZMkSAgICjmNkR0ZGqoQQQpza8mbtTagANn4C9QW9F484IJVKRUhICGr10Y8HWSyWHoyoK0mqhBBCnNq0vp6PFUpQykTOkbBYLEybNo3s7Gz69evHlVdeyYYNGwCYP38+Q4YM8Tj/559/Ji0tzf38f//7X/Lz80lLSyMtLY358+d3uceBpv+2b9/OjTfeyMCBA8nJyeGhhx6ioaHB/fy1117LM888wz//+U+GDx/O1KlTj8Xbd5OkSgghxKltwBXg5bP38agHISile6/taITGErCfnD3xuuvFF1/khx9+YPr06Xz++efExcVx4403dqu1zYQJE7jhhhtISUlhyZIlLFmyhAkTJhz2dS0tLVx33XX06dOHzz77jLfeeov6+nruvfdej/M+//xzNBoNs2fP5umnnz7Kd9g9kooLIYQ4tUUPhRt/gZqt4B0M4f1BrT3864r/gG/ug/odMOBqGP0QBMQd+3hPMO3t7cyZM4fnn3+eMWPGAPDss8/yxx9/8NlnnxEYGHjI1+t0OgwGg3t6r7s++ugj+vTpw/333+8+9txzzzFmzBiKiopISEgAID4+nocffvgo3tmRk6RKCCGECElz/equ5nL4dDK01bker/sQgpIg975jE98JrKSkBKvVyqBBg9zHNBoNWVlZ7Ny587BJ1dHKz89nxYoVDBw48IAx7Umq+vbte0zufyCSVAkhhBBHylS9N6Hao+CXUzKpOhylUonT6fQ49mdawezR3t7O6aefzoMPPtjluX1HvPb09TseZE2VEEIIcaT8osEv1vNYxvm9E0svi42NRaPRsHbtWvcxq9XKxo0bSU5OJiAggLa2Ntrb293P5+fne1xDo9HgcDg4En379mXHjh1ERUURFxfn8au3ehVKUiWEEEIcKWMoXPY+pIwDnwg4/W/Q58LejqpXGAwGrrzySl588UV+//13CgoKeOKJJ+js7OSSSy6hf//+6PV6/v3vf1NSUsLXX3/dZXdfVFQUZWVlbN26lYaGhm6VPrjqqqtobm7m/vvvZ8OGDZSUlLB48WIee+wx7Hb7sXq7hyRJlRBCCHE0ogbB5R/ArX/AmIfBJ7y3I+o1Dz74IOPGjePhhx9m4sSJ7Nq1i7feegs/Pz/8/f156aWX+P333zn//PP59ttvueuuuzxeP27cOEaNGsXkyZPJzs7mm2++Oew9w8LCmD17Ng6Hg6lTp3L++efz3HPP4ePjg1LZO+mNwrn/RKc4ZkwmE4MHD2bNmjUYjcbeDkcIIYQ4pM7OTvdOOp1O19vhHFOHeq/d/f6WkSohhBBCiB4gSZUQQgghRA+QpEoIIYQQogdIUiWEEEII0QMkqRJCCCGE6AGSVAkhhBBC9ABJqoQQQggheoAkVUIIIYQQPUCSKiGEEEKIHiBJlRBCCCFOSrNmzWLs2LH069ePSy+9lA0bNhzT+0lSJYQQQoiTznfffcfzzz/PHXfcweeff056ejpTp06lvr7+mN1TkiohhBBCHBdmq52q5k7MVvsxv9e7777LZZddxqRJk0hOTubpp59Gp9Mxb968Y3bPXk2qxo4dS1paWpdfTz/9NABms5mnn36a4cOHM3DgQO666y7q6uo8rlFRUcHNN99M//79yc7O5oUXXsBms3mcs2LFCiZOnEhmZiZnnXUW8+fP7xLL4YYIuxOLEEIIIQ5se3Ur987NY+y/fuPeuXlsr249ZveyWCxs3ryZnJwc9zGlUklOTg7r1q07Zvft1aTqs88+Y8mSJe5f7777LgDjx48H4LnnnmPhwoXMmDGDDz/8kJqaGu6880736+12O7fccgtWq5U5c+Ywffp0Pv/8c1555RX3OaWlpdxyyy0MHz6cL7/8kuuuu46///3vLF682H1Od4YIDxeLEEIIIQ7MbLXzfz9vZ8HGKtotdhZsrGLGz9uP2YhVY2MjdrudoKAgj+NBQUHHdECkV5OqwMBAQkJC3L8WLlxIbGwsw4YNo7W1lXnz5vHoo4+SnZ1NZmYmzz33HOvWrSMvLw+AJUuWUFBQwEsvvURGRgZjxozhnnvuYdasWVgsFgDmzJlDdHQ0jz76KElJSVxzzTWMGzeO9957zx3H4YYIuxOLEEIIIQ6ssd3Kom21Hsd+21ZLY7u1lyI6Nk6YNVUWi4WvvvqKSZMmoVAo2LRpE1ar1WPoLikpicjISHcik5eXR2pqKsHBwe5zcnNzMZlMFBQUuM/Jzs72uFdubq77Gt0ZIuxOLEIIIYQ4sACDhjFpIR7HTksLIcCgOTb3CwhApVJ1WZReX1/vkTP0tBMmqfr5559pbW1l4sSJANTV1aHRaPD19fU4LygoiNraWvc5+//h7Hl8uHNMJhOdnZ3dGiLsTixCCCGEODAvjYr7zkxlQr9wDFoVE/qFc++ZqXhpVMfkflqtlr59+7Js2TL3MYfDwbJlyxg4cOAxuSeA+phd+QjNmzeP0aNHExYW1tuhCCGEEKKHpYb58H+XDaCx3UqAQXPMEqo9pkyZwiOPPEJmZiZZWVm8//77dHR0cPHFFx+ze54QSVV5eTlLly7l1VdfdR8LDg7GarXS0tLiMUJUX19PSEiI+5z9d+ntGV3a95z9F6XV1dVhNBrR6XQolcrDDhF2JxYhhBBCHJqXRkW437FNpvaYMGECDQ0NvPLKK9TW1pKRkcFbb7118k//zZ8/n6CgIE477TT3sczMTDQajcfQXWFhIRUVFQwYMACAAQMGsH37do+EaOnSpRiNRpKTk93nLF++3ON+S5cudV+jO0OE3YlFCCGEECeWa665hoULF7Jp0yY+/fRT+vfvf0zv1+sjVQ6Hg/nz53PRRRehVu8Nx8fHh0mTJjF9+nT8/PwwGo1MmzaNgQMHuhOZ3NxckpOTefjhh3nooYeora1lxowZXH311Wi1WgCuuOIKZs2axYsvvsikSZNYvnw5CxYsYObMme57HW6IsDuxCCGEEOLU1utJ1dKlS6moqGDSpEldnnv88cdRKpXcfffdWCwWcnNzefLJJ93Pq1QqXn/9dZ566ikuv/xy9Ho9EydO5O6773afExMTw8yZM3n++ef54IMPCA8PZ9q0aYwaNcp9TneGCA8XixBCCCFObQqn0+n
2021-05-25 08:22:14 +02:00
"text/plain": [
2024-07-10 01:48:08 +02:00
"<Figure size 640x480 with 1 Axes>"
2021-05-25 08:22:14 +02:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"outlier\", s=15, data=df);"
]
},
2024-07-10 01:31:28 +02:00
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We concur with the notes from the paper and remove only the sales of houses with more than 4000 square feet."
]
},
2021-05-25 08:22:14 +02:00
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 102,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"# Remove the outliers.\n",
2024-07-10 01:31:28 +02:00
"df = df[df[\"Gr Liv Area\"] <= 4000]"
2021-05-25 08:22:14 +02:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Save the Results"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Save the Data"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 103,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"# Re-order the columns for convenience.\n",
"final_columns = (\n",
" sorted(set(list(ALL_COLUMNS.keys()) + new_variables) & set(df.columns))\n",
" + TARGET_VARIABLES\n",
")\n",
"df = df[final_columns]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Discarding useless and adding new predictors changed the final dataset significantly."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 104,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2024-07-10 01:31:28 +02:00
"(2893, 109)"
2021-05-25 08:22:14 +02:00
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 104,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 105,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>1st Flr SF</th>\n",
" <th>1st Flr SF (box-cox-0)</th>\n",
" <th>2nd Flr SF</th>\n",
" <th>3Ssn Porch</th>\n",
" <th>Bedroom AbvGr</th>\n",
" <th>Bsmt Cond</th>\n",
" <th>Bsmt Exposure</th>\n",
" <th>Bsmt Full Bath</th>\n",
" <th>Bsmt Half Bath</th>\n",
" <th>Bsmt Qual</th>\n",
" <th>Bsmt Unf SF</th>\n",
" <th>BsmtFin SF 1</th>\n",
" <th>BsmtFin SF 2</th>\n",
" <th>BsmtFin Type 1</th>\n",
" <th>BsmtFin Type 2</th>\n",
" <th>Electrical</th>\n",
" <th>Enclosed Porch</th>\n",
" <th>Fence</th>\n",
" <th>Fireplace Qu</th>\n",
" <th>Fireplaces</th>\n",
" <th>Full Bath</th>\n",
" <th>Functional</th>\n",
" <th>Garage Area</th>\n",
" <th>Garage Cars</th>\n",
" <th>Garage Cond</th>\n",
" <th>Garage Finish</th>\n",
" <th>Garage Qual</th>\n",
" <th>Gr Liv Area</th>\n",
" <th>Gr Liv Area (box-cox-0)</th>\n",
" <th>Half Bath</th>\n",
" <th>Kitchen AbvGr</th>\n",
" <th>Kitchen Qual</th>\n",
" <th>Land Slope</th>\n",
" <th>Lot Area</th>\n",
" <th>Lot Area (box-cox-0.1)</th>\n",
" <th>Lot Shape</th>\n",
" <th>Low Qual Fin SF</th>\n",
" <th>Mas Vnr Area</th>\n",
" <th>Misc Val</th>\n",
" <th>Mo Sold</th>\n",
" <th>Open Porch SF</th>\n",
" <th>Overall Cond</th>\n",
" <th>Overall Qual</th>\n",
" <th>Paved Drive</th>\n",
" <th>Pool Area</th>\n",
" <th>Pool QC</th>\n",
" <th>Screen Porch</th>\n",
" <th>TotRms AbvGrd</th>\n",
" <th>Total Bath</th>\n",
" <th>Total Bsmt SF</th>\n",
" <th>Total Porch SF</th>\n",
" <th>Total SF</th>\n",
" <th>Total SF (box-cox-0.2)</th>\n",
" <th>Utilities</th>\n",
" <th>Wood Deck SF</th>\n",
" <th>abnormal_sale</th>\n",
" <th>air_cond</th>\n",
" <th>build_type_1Fam</th>\n",
" <th>build_type_2Fam</th>\n",
" <th>build_type_Twnhs</th>\n",
" <th>found_BrkTil</th>\n",
" <th>found_CBlock</th>\n",
" <th>found_PConc</th>\n",
" <th>has 2nd Flr</th>\n",
" <th>has Bsmt</th>\n",
" <th>has Fireplace</th>\n",
" <th>has Garage</th>\n",
" <th>has Pool</th>\n",
" <th>has Porch</th>\n",
" <th>major_street</th>\n",
" <th>new_home</th>\n",
" <th>nhood_Blmngtn</th>\n",
" <th>nhood_Blueste</th>\n",
" <th>nhood_BrDale</th>\n",
" <th>nhood_BrkSide</th>\n",
" <th>nhood_ClearCr</th>\n",
" <th>nhood_CollgCr</th>\n",
" <th>nhood_Crawfor</th>\n",
" <th>nhood_Edwards</th>\n",
" <th>nhood_Gilbert</th>\n",
" <th>nhood_Greens</th>\n",
" <th>nhood_GrnHill</th>\n",
" <th>nhood_IDOTRR</th>\n",
" <th>nhood_Landmrk</th>\n",
" <th>nhood_MeadowV</th>\n",
" <th>nhood_Mitchel</th>\n",
" <th>nhood_NPkVill</th>\n",
" <th>nhood_NWAmes</th>\n",
" <th>nhood_Names</th>\n",
" <th>nhood_NoRidge</th>\n",
" <th>nhood_NridgHt</th>\n",
" <th>nhood_OldTown</th>\n",
" <th>nhood_SWISU</th>\n",
" <th>nhood_Sawyer</th>\n",
" <th>nhood_SawyerW</th>\n",
" <th>nhood_Somerst</th>\n",
" <th>nhood_StoneBr</th>\n",
" <th>nhood_Timber</th>\n",
" <th>nhood_Veenker</th>\n",
" <th>park</th>\n",
" <th>partial_sale</th>\n",
" <th>railway</th>\n",
" <th>recently_built</th>\n",
" <th>recently_remodeled</th>\n",
" <th>remodeled</th>\n",
" <th>years_since_built</th>\n",
" <th>years_since_remodeled</th>\n",
" <th>SalePrice</th>\n",
" <th>SalePrice (box-cox-0)</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>1656.0</td>\n",
" <td>7.412160</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>Gd</td>\n",
" <td>Gd</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>TA</td>\n",
" <td>441.0</td>\n",
" <td>639.0</td>\n",
" <td>0.0</td>\n",
" <td>BLQ</td>\n",
" <td>Unf</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>Gd</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>Typ</td>\n",
" <td>528.0</td>\n",
" <td>2</td>\n",
" <td>TA</td>\n",
" <td>Fin</td>\n",
" <td>TA</td>\n",
" <td>1656.0</td>\n",
" <td>7.412160</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Gtl</td>\n",
" <td>31770.0</td>\n",
" <td>18.196923</td>\n",
" <td>IR1</td>\n",
" <td>0.0</td>\n",
" <td>112.0</td>\n",
" <td>0.0</td>\n",
" <td>5</td>\n",
" <td>62.0</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>P</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>0.0</td>\n",
" <td>7</td>\n",
" <td>2.0</td>\n",
" <td>1080.0</td>\n",
" <td>272.0</td>\n",
" <td>2736.0</td>\n",
" <td>19.344072</td>\n",
" <td>AllPub</td>\n",
" <td>210.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>50</td>\n",
" <td>50</td>\n",
" <td>215000.0</td>\n",
" <td>12.278393</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>896.0</td>\n",
" <td>6.797940</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>TA</td>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>TA</td>\n",
" <td>270.0</td>\n",
" <td>468.0</td>\n",
" <td>144.0</td>\n",
" <td>Rec</td>\n",
" <td>LwQ</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>MnPrv</td>\n",
" <td>NA</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>Typ</td>\n",
" <td>730.0</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Unf</td>\n",
" <td>TA</td>\n",
" <td>896.0</td>\n",
" <td>6.797940</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Gtl</td>\n",
" <td>11622.0</td>\n",
" <td>15.499290</td>\n",
" <td>Reg</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>6</td>\n",
" <td>0.0</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>Y</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>120.0</td>\n",
" <td>5</td>\n",
" <td>1.0</td>\n",
" <td>882.0</td>\n",
" <td>260.0</td>\n",
" <td>1778.0</td>\n",
" <td>17.333478</td>\n",
" <td>AllPub</td>\n",
" <td>140.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>49</td>\n",
" <td>49</td>\n",
" <td>105000.0</td>\n",
" <td>11.561716</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>1329.0</td>\n",
" <td>7.192182</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>TA</td>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>TA</td>\n",
" <td>406.0</td>\n",
" <td>923.0</td>\n",
" <td>0.0</td>\n",
" <td>ALQ</td>\n",
" <td>Unf</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>NA</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>Typ</td>\n",
" <td>312.0</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Unf</td>\n",
" <td>TA</td>\n",
" <td>1329.0</td>\n",
" <td>7.192182</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Gd</td>\n",
" <td>Gtl</td>\n",
" <td>14267.0</td>\n",
" <td>16.027549</td>\n",
" <td>IR1</td>\n",
" <td>0.0</td>\n",
" <td>108.0</td>\n",
" <td>12500.0</td>\n",
" <td>6</td>\n",
" <td>36.0</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>Y</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>0.0</td>\n",
" <td>6</td>\n",
" <td>1.5</td>\n",
" <td>1329.0</td>\n",
" <td>429.0</td>\n",
" <td>2658.0</td>\n",
" <td>19.203658</td>\n",
" <td>AllPub</td>\n",
" <td>393.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>52</td>\n",
" <td>52</td>\n",
" <td>172000.0</td>\n",
" <td>12.055250</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>2110.0</td>\n",
" <td>7.654443</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>TA</td>\n",
" <td>No</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>TA</td>\n",
" <td>1045.0</td>\n",
" <td>1065.0</td>\n",
" <td>0.0</td>\n",
" <td>ALQ</td>\n",
" <td>Unf</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>TA</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>Typ</td>\n",
" <td>522.0</td>\n",
" <td>2</td>\n",
" <td>TA</td>\n",
" <td>Fin</td>\n",
" <td>TA</td>\n",
" <td>2110.0</td>\n",
" <td>7.654443</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Ex</td>\n",
" <td>Gtl</td>\n",
" <td>11160.0</td>\n",
" <td>15.396064</td>\n",
" <td>Reg</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>5</td>\n",
" <td>7</td>\n",
" <td>Y</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>0.0</td>\n",
" <td>8</td>\n",
" <td>3.5</td>\n",
" <td>2110.0</td>\n",
" <td>0.0</td>\n",
" <td>4220.0</td>\n",
" <td>21.548042</td>\n",
" <td>AllPub</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>42</td>\n",
" <td>42</td>\n",
" <td>244000.0</td>\n",
" <td>12.404924</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>928.0</td>\n",
" <td>6.833032</td>\n",
" <td>701.0</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>TA</td>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>Gd</td>\n",
" <td>137.0</td>\n",
" <td>791.0</td>\n",
" <td>0.0</td>\n",
" <td>GLQ</td>\n",
" <td>Unf</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>MnPrv</td>\n",
" <td>TA</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>Typ</td>\n",
" <td>482.0</td>\n",
" <td>2</td>\n",
" <td>TA</td>\n",
" <td>Fin</td>\n",
" <td>TA</td>\n",
" <td>1629.0</td>\n",
" <td>7.395722</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Gtl</td>\n",
" <td>13830.0</td>\n",
" <td>15.946705</td>\n",
" <td>IR1</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>34.0</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>Y</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>0.0</td>\n",
" <td>6</td>\n",
" <td>2.5</td>\n",
" <td>928.0</td>\n",
" <td>246.0</td>\n",
" <td>2557.0</td>\n",
" <td>19.016856</td>\n",
" <td>AllPub</td>\n",
" <td>212.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>13</td>\n",
" <td>12</td>\n",
" <td>189900.0</td>\n",
" <td>12.154253</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 1st Flr SF 1st Flr SF (box-cox-0) 2nd Flr SF 3Ssn Porch \\\n",
"Order PID \n",
"1 526301100 1656.0 7.412160 0.0 0.0 \n",
"2 526350040 896.0 6.797940 0.0 0.0 \n",
"3 526351010 1329.0 7.192182 0.0 0.0 \n",
"4 526353030 2110.0 7.654443 0.0 0.0 \n",
"5 527105010 928.0 6.833032 701.0 0.0 \n",
"\n",
" Bedroom AbvGr Bsmt Cond Bsmt Exposure Bsmt Full Bath \\\n",
"Order PID \n",
"1 526301100 3 Gd Gd 1 \n",
"2 526350040 2 TA No 0 \n",
"3 526351010 3 TA No 0 \n",
"4 526353030 3 TA No 1 \n",
"5 527105010 3 TA No 0 \n",
"\n",
" Bsmt Half Bath Bsmt Qual Bsmt Unf SF BsmtFin SF 1 \\\n",
"Order PID \n",
"1 526301100 0 TA 441.0 639.0 \n",
"2 526350040 0 TA 270.0 468.0 \n",
"3 526351010 0 TA 406.0 923.0 \n",
"4 526353030 0 TA 1045.0 1065.0 \n",
"5 527105010 0 Gd 137.0 791.0 \n",
"\n",
" BsmtFin SF 2 BsmtFin Type 1 BsmtFin Type 2 Electrical \\\n",
"Order PID \n",
"1 526301100 0.0 BLQ Unf SBrkr \n",
"2 526350040 144.0 Rec LwQ SBrkr \n",
"3 526351010 0.0 ALQ Unf SBrkr \n",
"4 526353030 0.0 ALQ Unf SBrkr \n",
"5 527105010 0.0 GLQ Unf SBrkr \n",
"\n",
" Enclosed Porch Fence Fireplace Qu Fireplaces Full Bath \\\n",
"Order PID \n",
"1 526301100 0.0 NA Gd 2 1 \n",
"2 526350040 0.0 MnPrv NA 0 1 \n",
"3 526351010 0.0 NA NA 0 1 \n",
"4 526353030 0.0 NA TA 2 2 \n",
"5 527105010 0.0 MnPrv TA 1 2 \n",
"\n",
" Functional Garage Area Garage Cars Garage Cond \\\n",
"Order PID \n",
"1 526301100 Typ 528.0 2 TA \n",
"2 526350040 Typ 730.0 1 TA \n",
"3 526351010 Typ 312.0 1 TA \n",
"4 526353030 Typ 522.0 2 TA \n",
"5 527105010 Typ 482.0 2 TA \n",
"\n",
" Garage Finish Garage Qual Gr Liv Area \\\n",
"Order PID \n",
"1 526301100 Fin TA 1656.0 \n",
"2 526350040 Unf TA 896.0 \n",
"3 526351010 Unf TA 1329.0 \n",
"4 526353030 Fin TA 2110.0 \n",
"5 527105010 Fin TA 1629.0 \n",
"\n",
" Gr Liv Area (box-cox-0) Half Bath Kitchen AbvGr \\\n",
"Order PID \n",
"1 526301100 7.412160 0 1 \n",
"2 526350040 6.797940 0 1 \n",
"3 526351010 7.192182 1 1 \n",
"4 526353030 7.654443 1 1 \n",
"5 527105010 7.395722 1 1 \n",
"\n",
" Kitchen Qual Land Slope Lot Area Lot Area (box-cox-0.1) \\\n",
"Order PID \n",
"1 526301100 TA Gtl 31770.0 18.196923 \n",
"2 526350040 TA Gtl 11622.0 15.499290 \n",
"3 526351010 Gd Gtl 14267.0 16.027549 \n",
"4 526353030 Ex Gtl 11160.0 15.396064 \n",
"5 527105010 TA Gtl 13830.0 15.946705 \n",
"\n",
" Lot Shape Low Qual Fin SF Mas Vnr Area Misc Val Mo Sold \\\n",
"Order PID \n",
"1 526301100 IR1 0.0 112.0 0.0 5 \n",
"2 526350040 Reg 0.0 0.0 0.0 6 \n",
"3 526351010 IR1 0.0 108.0 12500.0 6 \n",
"4 526353030 Reg 0.0 0.0 0.0 4 \n",
"5 527105010 IR1 0.0 0.0 0.0 3 \n",
"\n",
" Open Porch SF Overall Cond Overall Qual Paved Drive \\\n",
"Order PID \n",
"1 526301100 62.0 5 6 P \n",
"2 526350040 0.0 6 5 Y \n",
"3 526351010 36.0 6 6 Y \n",
"4 526353030 0.0 5 7 Y \n",
"5 527105010 34.0 5 5 Y \n",
"\n",
" Pool Area Pool QC Screen Porch TotRms AbvGrd Total Bath \\\n",
"Order PID \n",
"1 526301100 0.0 NA 0.0 7 2.0 \n",
"2 526350040 0.0 NA 120.0 5 1.0 \n",
"3 526351010 0.0 NA 0.0 6 1.5 \n",
"4 526353030 0.0 NA 0.0 8 3.5 \n",
"5 527105010 0.0 NA 0.0 6 2.5 \n",
"\n",
" Total Bsmt SF Total Porch SF Total SF \\\n",
"Order PID \n",
"1 526301100 1080.0 272.0 2736.0 \n",
"2 526350040 882.0 260.0 1778.0 \n",
"3 526351010 1329.0 429.0 2658.0 \n",
"4 526353030 2110.0 0.0 4220.0 \n",
"5 527105010 928.0 246.0 2557.0 \n",
"\n",
" Total SF (box-cox-0.2) Utilities Wood Deck SF \\\n",
"Order PID \n",
"1 526301100 19.344072 AllPub 210.0 \n",
"2 526350040 17.333478 AllPub 140.0 \n",
"3 526351010 19.203658 AllPub 393.0 \n",
"4 526353030 21.548042 AllPub 0.0 \n",
"5 527105010 19.016856 AllPub 212.0 \n",
"\n",
" abnormal_sale air_cond build_type_1Fam build_type_2Fam \\\n",
"Order PID \n",
"1 526301100 0 1 1 0 \n",
"2 526350040 0 1 1 0 \n",
"3 526351010 0 1 1 0 \n",
"4 526353030 0 1 1 0 \n",
"5 527105010 0 1 1 0 \n",
"\n",
" build_type_Twnhs found_BrkTil found_CBlock found_PConc \\\n",
"Order PID \n",
"1 526301100 0 0 1 0 \n",
"2 526350040 0 0 1 0 \n",
"3 526351010 0 0 1 0 \n",
"4 526353030 0 0 1 0 \n",
"5 527105010 0 0 0 1 \n",
"\n",
" has 2nd Flr has Bsmt has Fireplace has Garage has Pool \\\n",
"Order PID \n",
"1 526301100 0 1 1 1 0 \n",
"2 526350040 0 1 0 1 0 \n",
"3 526351010 0 1 0 1 0 \n",
"4 526353030 0 1 1 1 0 \n",
"5 527105010 1 1 1 1 0 \n",
"\n",
" has Porch major_street new_home nhood_Blmngtn \\\n",
"Order PID \n",
"1 526301100 1 0 0 0 \n",
"2 526350040 1 1 0 0 \n",
"3 526351010 1 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 1 0 0 0 \n",
"\n",
" nhood_Blueste nhood_BrDale nhood_BrkSide nhood_ClearCr \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_CollgCr nhood_Crawfor nhood_Edwards nhood_Gilbert \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 1 \n",
"\n",
" nhood_Greens nhood_GrnHill nhood_IDOTRR nhood_Landmrk \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_MeadowV nhood_Mitchel nhood_NPkVill nhood_NWAmes \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_Names nhood_NoRidge nhood_NridgHt nhood_OldTown \\\n",
"Order PID \n",
"1 526301100 1 0 0 0 \n",
"2 526350040 1 0 0 0 \n",
"3 526351010 1 0 0 0 \n",
"4 526353030 1 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_SWISU nhood_Sawyer nhood_SawyerW nhood_Somerst \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_StoneBr nhood_Timber nhood_Veenker park \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" partial_sale railway recently_built recently_remodeled \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" remodeled years_since_built years_since_remodeled \\\n",
"Order PID \n",
"1 526301100 0 50 50 \n",
"2 526350040 0 49 49 \n",
"3 526351010 0 52 52 \n",
"4 526353030 0 42 42 \n",
"5 527105010 1 13 12 \n",
"\n",
" SalePrice SalePrice (box-cox-0) \n",
"Order PID \n",
"1 526301100 215000.0 12.278393 \n",
"2 526350040 105000.0 11.561716 \n",
"3 526351010 172000.0 12.055250 \n",
"4 526353030 244000.0 12.404924 \n",
"5 527105010 189900.0 12.154253 "
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 105,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 106,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"df.to_csv(\"data/data_clean_with_transformations_and_factors.csv\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "ames-housing",
2021-05-25 08:22:14 +02:00
"language": "python",
"name": "ames-housing"
2021-05-25 08:22:14 +02:00
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.4"
2021-05-25 08:22:14 +02:00
}
},
"nbformat": 4,
"nbformat_minor": 4
}