ames-housing/02_descriptive_visualizations.ipynb

4991 lines
2.1 MiB
Text
Raw Normal View History

2021-05-25 08:22:14 +02:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Descriptive Visualizations\n",
"\n",
"The purpose of this notebook is to visually examine the nominal features, discard the useless ones among them, and create new factor variables.\n",
"\n",
"The \"main\" plot used in this notebook is *Gr Liv Area* vs. *SalePrice* as the overall living area is the most correlated predictor (which is also very intuitive). Many of the nominal variables change the slopes of the regression lines for sub-groups of data points significantly."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## \"Housekeeping\""
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"import seaborn as sns\n",
"\n",
"from sklearn.ensemble import IsolationForest\n",
"\n",
"from utils import (\n",
" ALL_COLUMNS,\n",
" NOMINAL_VARIABLES,\n",
" TARGET_VARIABLES,\n",
" load_clean_data,\n",
" encode_ordinals,\n",
" print_column_list,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
2024-07-10 01:31:28 +02:00
"random_state = np.random.RandomState(42)"
2021-05-25 08:22:14 +02:00
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
2024-07-10 01:31:28 +02:00
"source": [
"pd.set_option(\"display.max_columns\", 120)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
2021-05-25 08:22:14 +02:00
"source": [
"sns.set_style(\"white\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load the Data\n"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 5,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"df = load_clean_data(\"data/data_clean_with_transformations.csv\")"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 6,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(2898, 86)"
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 6,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 7,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>1st Flr SF</th>\n",
" <th>1st Flr SF (box-cox-0)</th>\n",
" <th>2nd Flr SF</th>\n",
" <th>3Ssn Porch</th>\n",
" <th>Alley</th>\n",
" <th>Bedroom AbvGr</th>\n",
" <th>Bldg Type</th>\n",
" <th>Bsmt Cond</th>\n",
" <th>Bsmt Exposure</th>\n",
" <th>Bsmt Full Bath</th>\n",
" <th>Bsmt Half Bath</th>\n",
" <th>Bsmt Qual</th>\n",
" <th>Bsmt Unf SF</th>\n",
" <th>BsmtFin SF 1</th>\n",
" <th>BsmtFin SF 2</th>\n",
" <th>BsmtFin Type 1</th>\n",
" <th>BsmtFin Type 2</th>\n",
" <th>Central Air</th>\n",
" <th>Condition 1</th>\n",
" <th>Condition 2</th>\n",
" <th>Electrical</th>\n",
" <th>Enclosed Porch</th>\n",
" <th>Exter Cond</th>\n",
" <th>Exter Qual</th>\n",
" <th>Exterior 1st</th>\n",
" <th>Exterior 2nd</th>\n",
" <th>Fence</th>\n",
" <th>Fireplace Qu</th>\n",
" <th>Fireplaces</th>\n",
" <th>Foundation</th>\n",
" <th>Full Bath</th>\n",
" <th>Functional</th>\n",
" <th>Garage Area</th>\n",
" <th>Garage Cars</th>\n",
" <th>Garage Cond</th>\n",
" <th>Garage Finish</th>\n",
" <th>Garage Qual</th>\n",
" <th>Garage Type</th>\n",
" <th>Gr Liv Area</th>\n",
" <th>Gr Liv Area (box-cox-0)</th>\n",
" <th>Half Bath</th>\n",
" <th>Heating</th>\n",
" <th>Heating QC</th>\n",
" <th>House Style</th>\n",
" <th>Kitchen AbvGr</th>\n",
" <th>Kitchen Qual</th>\n",
" <th>Land Contour</th>\n",
" <th>Land Slope</th>\n",
" <th>Lot Area</th>\n",
" <th>Lot Area (box-cox-0.1)</th>\n",
" <th>Lot Config</th>\n",
" <th>Lot Shape</th>\n",
" <th>Low Qual Fin SF</th>\n",
" <th>MS SubClass</th>\n",
" <th>MS Zoning</th>\n",
" <th>Mas Vnr Area</th>\n",
" <th>Mas Vnr Type</th>\n",
" <th>Misc Feature</th>\n",
" <th>Misc Val</th>\n",
" <th>Mo Sold</th>\n",
" <th>Neighborhood</th>\n",
" <th>Open Porch SF</th>\n",
" <th>Overall Cond</th>\n",
" <th>Overall Qual</th>\n",
" <th>Paved Drive</th>\n",
" <th>Pool Area</th>\n",
" <th>Pool QC</th>\n",
" <th>Roof Matl</th>\n",
" <th>Roof Style</th>\n",
" <th>Sale Condition</th>\n",
" <th>Sale Type</th>\n",
" <th>Screen Porch</th>\n",
" <th>Street</th>\n",
" <th>TotRms AbvGrd</th>\n",
" <th>Total Bath</th>\n",
" <th>Total Bsmt SF</th>\n",
" <th>Total Porch SF</th>\n",
" <th>Total SF</th>\n",
" <th>Total SF (box-cox-0.2)</th>\n",
" <th>Utilities</th>\n",
" <th>Wood Deck SF</th>\n",
" <th>Year Built</th>\n",
" <th>Year Remod/Add</th>\n",
" <th>Yr Sold</th>\n",
" <th>SalePrice</th>\n",
" <th>SalePrice (box-cox-0)</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>1656.0</td>\n",
" <td>7.412160</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>3</td>\n",
" <td>1Fam</td>\n",
" <td>Gd</td>\n",
" <td>Gd</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>TA</td>\n",
" <td>441.0</td>\n",
" <td>639.0</td>\n",
" <td>0.0</td>\n",
" <td>BLQ</td>\n",
" <td>Unf</td>\n",
" <td>Y</td>\n",
" <td>Norm</td>\n",
" <td>Norm</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>TA</td>\n",
" <td>TA</td>\n",
" <td>BrkFace</td>\n",
" <td>Plywood</td>\n",
" <td>NA</td>\n",
" <td>Gd</td>\n",
" <td>2</td>\n",
" <td>CBlock</td>\n",
" <td>1</td>\n",
" <td>Typ</td>\n",
" <td>528.0</td>\n",
" <td>2</td>\n",
" <td>TA</td>\n",
" <td>Fin</td>\n",
" <td>TA</td>\n",
" <td>Attchd</td>\n",
" <td>1656.0</td>\n",
" <td>7.412160</td>\n",
" <td>0</td>\n",
" <td>GasA</td>\n",
" <td>Fa</td>\n",
" <td>1Story</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Lvl</td>\n",
" <td>Gtl</td>\n",
" <td>31770.0</td>\n",
" <td>18.196923</td>\n",
" <td>Corner</td>\n",
" <td>IR1</td>\n",
" <td>0.0</td>\n",
" <td>020</td>\n",
" <td>RL</td>\n",
" <td>112.0</td>\n",
" <td>Stone</td>\n",
" <td>NA</td>\n",
" <td>0.0</td>\n",
" <td>5</td>\n",
" <td>Names</td>\n",
" <td>62.0</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>P</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>CompShg</td>\n",
" <td>Hip</td>\n",
" <td>Normal</td>\n",
" <td>WD</td>\n",
" <td>0.0</td>\n",
" <td>Pave</td>\n",
" <td>7</td>\n",
" <td>2.0</td>\n",
" <td>1080.0</td>\n",
" <td>272.0</td>\n",
" <td>2736.0</td>\n",
" <td>19.344072</td>\n",
" <td>AllPub</td>\n",
" <td>210.0</td>\n",
" <td>1960</td>\n",
" <td>1960</td>\n",
" <td>2010</td>\n",
" <td>215000.0</td>\n",
" <td>12.278393</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>896.0</td>\n",
" <td>6.797940</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>2</td>\n",
" <td>1Fam</td>\n",
" <td>TA</td>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>TA</td>\n",
" <td>270.0</td>\n",
" <td>468.0</td>\n",
" <td>144.0</td>\n",
" <td>Rec</td>\n",
" <td>LwQ</td>\n",
" <td>Y</td>\n",
" <td>Feedr</td>\n",
" <td>Norm</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>TA</td>\n",
" <td>TA</td>\n",
" <td>VinylSd</td>\n",
" <td>VinylSd</td>\n",
" <td>MnPrv</td>\n",
" <td>NA</td>\n",
" <td>0</td>\n",
" <td>CBlock</td>\n",
" <td>1</td>\n",
" <td>Typ</td>\n",
" <td>730.0</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Unf</td>\n",
" <td>TA</td>\n",
" <td>Attchd</td>\n",
" <td>896.0</td>\n",
" <td>6.797940</td>\n",
" <td>0</td>\n",
" <td>GasA</td>\n",
" <td>TA</td>\n",
" <td>1Story</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Lvl</td>\n",
" <td>Gtl</td>\n",
" <td>11622.0</td>\n",
" <td>15.499290</td>\n",
" <td>Inside</td>\n",
" <td>Reg</td>\n",
" <td>0.0</td>\n",
" <td>020</td>\n",
" <td>RH</td>\n",
" <td>0.0</td>\n",
" <td>None</td>\n",
" <td>NA</td>\n",
" <td>0.0</td>\n",
" <td>6</td>\n",
" <td>Names</td>\n",
" <td>0.0</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>Y</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>CompShg</td>\n",
" <td>Gable</td>\n",
" <td>Normal</td>\n",
" <td>WD</td>\n",
" <td>120.0</td>\n",
" <td>Pave</td>\n",
" <td>5</td>\n",
" <td>1.0</td>\n",
" <td>882.0</td>\n",
" <td>260.0</td>\n",
" <td>1778.0</td>\n",
" <td>17.333478</td>\n",
" <td>AllPub</td>\n",
" <td>140.0</td>\n",
" <td>1961</td>\n",
" <td>1961</td>\n",
" <td>2010</td>\n",
" <td>105000.0</td>\n",
" <td>11.561716</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>1329.0</td>\n",
" <td>7.192182</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>3</td>\n",
" <td>1Fam</td>\n",
" <td>TA</td>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>TA</td>\n",
" <td>406.0</td>\n",
" <td>923.0</td>\n",
" <td>0.0</td>\n",
" <td>ALQ</td>\n",
" <td>Unf</td>\n",
" <td>Y</td>\n",
" <td>Norm</td>\n",
" <td>Norm</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>TA</td>\n",
" <td>TA</td>\n",
" <td>Wd Sdng</td>\n",
" <td>Wd Sdng</td>\n",
" <td>NA</td>\n",
" <td>NA</td>\n",
" <td>0</td>\n",
" <td>CBlock</td>\n",
" <td>1</td>\n",
" <td>Typ</td>\n",
" <td>312.0</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Unf</td>\n",
" <td>TA</td>\n",
" <td>Attchd</td>\n",
" <td>1329.0</td>\n",
" <td>7.192182</td>\n",
" <td>1</td>\n",
" <td>GasA</td>\n",
" <td>TA</td>\n",
" <td>1Story</td>\n",
" <td>1</td>\n",
" <td>Gd</td>\n",
" <td>Lvl</td>\n",
" <td>Gtl</td>\n",
" <td>14267.0</td>\n",
" <td>16.027549</td>\n",
" <td>Corner</td>\n",
" <td>IR1</td>\n",
" <td>0.0</td>\n",
" <td>020</td>\n",
" <td>RL</td>\n",
" <td>108.0</td>\n",
" <td>BrkFace</td>\n",
" <td>Gar2</td>\n",
" <td>12500.0</td>\n",
" <td>6</td>\n",
" <td>Names</td>\n",
" <td>36.0</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>Y</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>CompShg</td>\n",
" <td>Hip</td>\n",
" <td>Normal</td>\n",
" <td>WD</td>\n",
" <td>0.0</td>\n",
" <td>Pave</td>\n",
" <td>6</td>\n",
" <td>1.5</td>\n",
" <td>1329.0</td>\n",
" <td>429.0</td>\n",
" <td>2658.0</td>\n",
" <td>19.203658</td>\n",
" <td>AllPub</td>\n",
" <td>393.0</td>\n",
" <td>1958</td>\n",
" <td>1958</td>\n",
" <td>2010</td>\n",
" <td>172000.0</td>\n",
" <td>12.055250</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>2110.0</td>\n",
" <td>7.654443</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>3</td>\n",
" <td>1Fam</td>\n",
" <td>TA</td>\n",
" <td>No</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>TA</td>\n",
" <td>1045.0</td>\n",
" <td>1065.0</td>\n",
" <td>0.0</td>\n",
" <td>ALQ</td>\n",
" <td>Unf</td>\n",
" <td>Y</td>\n",
" <td>Norm</td>\n",
" <td>Norm</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>TA</td>\n",
" <td>Gd</td>\n",
" <td>BrkFace</td>\n",
" <td>BrkFace</td>\n",
" <td>NA</td>\n",
" <td>TA</td>\n",
" <td>2</td>\n",
" <td>CBlock</td>\n",
" <td>2</td>\n",
" <td>Typ</td>\n",
" <td>522.0</td>\n",
" <td>2</td>\n",
" <td>TA</td>\n",
" <td>Fin</td>\n",
" <td>TA</td>\n",
" <td>Attchd</td>\n",
" <td>2110.0</td>\n",
" <td>7.654443</td>\n",
" <td>1</td>\n",
" <td>GasA</td>\n",
" <td>Ex</td>\n",
" <td>1Story</td>\n",
" <td>1</td>\n",
" <td>Ex</td>\n",
" <td>Lvl</td>\n",
" <td>Gtl</td>\n",
" <td>11160.0</td>\n",
" <td>15.396064</td>\n",
" <td>Corner</td>\n",
" <td>Reg</td>\n",
" <td>0.0</td>\n",
" <td>020</td>\n",
" <td>RL</td>\n",
" <td>0.0</td>\n",
" <td>None</td>\n",
" <td>NA</td>\n",
" <td>0.0</td>\n",
" <td>4</td>\n",
" <td>Names</td>\n",
" <td>0.0</td>\n",
" <td>5</td>\n",
" <td>7</td>\n",
" <td>Y</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>CompShg</td>\n",
" <td>Hip</td>\n",
" <td>Normal</td>\n",
" <td>WD</td>\n",
" <td>0.0</td>\n",
" <td>Pave</td>\n",
" <td>8</td>\n",
" <td>3.5</td>\n",
" <td>2110.0</td>\n",
" <td>0.0</td>\n",
" <td>4220.0</td>\n",
" <td>21.548042</td>\n",
" <td>AllPub</td>\n",
" <td>0.0</td>\n",
" <td>1968</td>\n",
" <td>1968</td>\n",
" <td>2010</td>\n",
" <td>244000.0</td>\n",
" <td>12.404924</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>928.0</td>\n",
" <td>6.833032</td>\n",
" <td>701.0</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>3</td>\n",
" <td>1Fam</td>\n",
" <td>TA</td>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>Gd</td>\n",
" <td>137.0</td>\n",
" <td>791.0</td>\n",
" <td>0.0</td>\n",
" <td>GLQ</td>\n",
" <td>Unf</td>\n",
" <td>Y</td>\n",
" <td>Norm</td>\n",
" <td>Norm</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>TA</td>\n",
" <td>TA</td>\n",
" <td>VinylSd</td>\n",
" <td>VinylSd</td>\n",
" <td>MnPrv</td>\n",
" <td>TA</td>\n",
" <td>1</td>\n",
" <td>PConc</td>\n",
" <td>2</td>\n",
" <td>Typ</td>\n",
" <td>482.0</td>\n",
" <td>2</td>\n",
" <td>TA</td>\n",
" <td>Fin</td>\n",
" <td>TA</td>\n",
" <td>Attchd</td>\n",
" <td>1629.0</td>\n",
" <td>7.395722</td>\n",
" <td>1</td>\n",
" <td>GasA</td>\n",
" <td>Gd</td>\n",
" <td>2Story</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Lvl</td>\n",
" <td>Gtl</td>\n",
" <td>13830.0</td>\n",
" <td>15.946705</td>\n",
" <td>Inside</td>\n",
" <td>IR1</td>\n",
" <td>0.0</td>\n",
" <td>060</td>\n",
" <td>RL</td>\n",
" <td>0.0</td>\n",
" <td>None</td>\n",
" <td>NA</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>Gilbert</td>\n",
" <td>34.0</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>Y</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>CompShg</td>\n",
" <td>Gable</td>\n",
" <td>Normal</td>\n",
" <td>WD</td>\n",
" <td>0.0</td>\n",
" <td>Pave</td>\n",
" <td>6</td>\n",
" <td>2.5</td>\n",
" <td>928.0</td>\n",
" <td>246.0</td>\n",
" <td>2557.0</td>\n",
" <td>19.016856</td>\n",
" <td>AllPub</td>\n",
" <td>212.0</td>\n",
" <td>1997</td>\n",
" <td>1998</td>\n",
" <td>2010</td>\n",
" <td>189900.0</td>\n",
" <td>12.154253</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 1st Flr SF 1st Flr SF (box-cox-0) 2nd Flr SF 3Ssn Porch \\\n",
"Order PID \n",
"1 526301100 1656.0 7.412160 0.0 0.0 \n",
"2 526350040 896.0 6.797940 0.0 0.0 \n",
"3 526351010 1329.0 7.192182 0.0 0.0 \n",
"4 526353030 2110.0 7.654443 0.0 0.0 \n",
"5 527105010 928.0 6.833032 701.0 0.0 \n",
"\n",
" Alley Bedroom AbvGr Bldg Type Bsmt Cond Bsmt Exposure \\\n",
"Order PID \n",
"1 526301100 NA 3 1Fam Gd Gd \n",
"2 526350040 NA 2 1Fam TA No \n",
"3 526351010 NA 3 1Fam TA No \n",
"4 526353030 NA 3 1Fam TA No \n",
"5 527105010 NA 3 1Fam TA No \n",
"\n",
" Bsmt Full Bath Bsmt Half Bath Bsmt Qual Bsmt Unf SF \\\n",
"Order PID \n",
"1 526301100 1 0 TA 441.0 \n",
"2 526350040 0 0 TA 270.0 \n",
"3 526351010 0 0 TA 406.0 \n",
"4 526353030 1 0 TA 1045.0 \n",
"5 527105010 0 0 Gd 137.0 \n",
"\n",
" BsmtFin SF 1 BsmtFin SF 2 BsmtFin Type 1 BsmtFin Type 2 \\\n",
"Order PID \n",
"1 526301100 639.0 0.0 BLQ Unf \n",
"2 526350040 468.0 144.0 Rec LwQ \n",
"3 526351010 923.0 0.0 ALQ Unf \n",
"4 526353030 1065.0 0.0 ALQ Unf \n",
"5 527105010 791.0 0.0 GLQ Unf \n",
"\n",
" Central Air Condition 1 Condition 2 Electrical \\\n",
"Order PID \n",
"1 526301100 Y Norm Norm SBrkr \n",
"2 526350040 Y Feedr Norm SBrkr \n",
"3 526351010 Y Norm Norm SBrkr \n",
"4 526353030 Y Norm Norm SBrkr \n",
"5 527105010 Y Norm Norm SBrkr \n",
"\n",
" Enclosed Porch Exter Cond Exter Qual Exterior 1st \\\n",
"Order PID \n",
"1 526301100 0.0 TA TA BrkFace \n",
"2 526350040 0.0 TA TA VinylSd \n",
"3 526351010 0.0 TA TA Wd Sdng \n",
"4 526353030 0.0 TA Gd BrkFace \n",
"5 527105010 0.0 TA TA VinylSd \n",
"\n",
" Exterior 2nd Fence Fireplace Qu Fireplaces Foundation \\\n",
"Order PID \n",
"1 526301100 Plywood NA Gd 2 CBlock \n",
"2 526350040 VinylSd MnPrv NA 0 CBlock \n",
"3 526351010 Wd Sdng NA NA 0 CBlock \n",
"4 526353030 BrkFace NA TA 2 CBlock \n",
"5 527105010 VinylSd MnPrv TA 1 PConc \n",
"\n",
" Full Bath Functional Garage Area Garage Cars Garage Cond \\\n",
"Order PID \n",
"1 526301100 1 Typ 528.0 2 TA \n",
"2 526350040 1 Typ 730.0 1 TA \n",
"3 526351010 1 Typ 312.0 1 TA \n",
"4 526353030 2 Typ 522.0 2 TA \n",
"5 527105010 2 Typ 482.0 2 TA \n",
"\n",
" Garage Finish Garage Qual Garage Type Gr Liv Area \\\n",
"Order PID \n",
"1 526301100 Fin TA Attchd 1656.0 \n",
"2 526350040 Unf TA Attchd 896.0 \n",
"3 526351010 Unf TA Attchd 1329.0 \n",
"4 526353030 Fin TA Attchd 2110.0 \n",
"5 527105010 Fin TA Attchd 1629.0 \n",
"\n",
" Gr Liv Area (box-cox-0) Half Bath Heating Heating QC \\\n",
"Order PID \n",
"1 526301100 7.412160 0 GasA Fa \n",
"2 526350040 6.797940 0 GasA TA \n",
"3 526351010 7.192182 1 GasA TA \n",
"4 526353030 7.654443 1 GasA Ex \n",
"5 527105010 7.395722 1 GasA Gd \n",
"\n",
" House Style Kitchen AbvGr Kitchen Qual Land Contour \\\n",
"Order PID \n",
"1 526301100 1Story 1 TA Lvl \n",
"2 526350040 1Story 1 TA Lvl \n",
"3 526351010 1Story 1 Gd Lvl \n",
"4 526353030 1Story 1 Ex Lvl \n",
"5 527105010 2Story 1 TA Lvl \n",
"\n",
" Land Slope Lot Area Lot Area (box-cox-0.1) Lot Config \\\n",
"Order PID \n",
"1 526301100 Gtl 31770.0 18.196923 Corner \n",
"2 526350040 Gtl 11622.0 15.499290 Inside \n",
"3 526351010 Gtl 14267.0 16.027549 Corner \n",
"4 526353030 Gtl 11160.0 15.396064 Corner \n",
"5 527105010 Gtl 13830.0 15.946705 Inside \n",
"\n",
" Lot Shape Low Qual Fin SF MS SubClass MS Zoning \\\n",
"Order PID \n",
"1 526301100 IR1 0.0 020 RL \n",
"2 526350040 Reg 0.0 020 RH \n",
"3 526351010 IR1 0.0 020 RL \n",
"4 526353030 Reg 0.0 020 RL \n",
"5 527105010 IR1 0.0 060 RL \n",
"\n",
" Mas Vnr Area Mas Vnr Type Misc Feature Misc Val Mo Sold \\\n",
"Order PID \n",
"1 526301100 112.0 Stone NA 0.0 5 \n",
"2 526350040 0.0 None NA 0.0 6 \n",
"3 526351010 108.0 BrkFace Gar2 12500.0 6 \n",
"4 526353030 0.0 None NA 0.0 4 \n",
"5 527105010 0.0 None NA 0.0 3 \n",
"\n",
" Neighborhood Open Porch SF Overall Cond Overall Qual \\\n",
"Order PID \n",
"1 526301100 Names 62.0 5 6 \n",
"2 526350040 Names 0.0 6 5 \n",
"3 526351010 Names 36.0 6 6 \n",
"4 526353030 Names 0.0 5 7 \n",
"5 527105010 Gilbert 34.0 5 5 \n",
"\n",
" Paved Drive Pool Area Pool QC Roof Matl Roof Style \\\n",
"Order PID \n",
"1 526301100 P 0.0 NA CompShg Hip \n",
"2 526350040 Y 0.0 NA CompShg Gable \n",
"3 526351010 Y 0.0 NA CompShg Hip \n",
"4 526353030 Y 0.0 NA CompShg Hip \n",
"5 527105010 Y 0.0 NA CompShg Gable \n",
"\n",
" Sale Condition Sale Type Screen Porch Street TotRms AbvGrd \\\n",
"Order PID \n",
"1 526301100 Normal WD 0.0 Pave 7 \n",
"2 526350040 Normal WD 120.0 Pave 5 \n",
"3 526351010 Normal WD 0.0 Pave 6 \n",
"4 526353030 Normal WD 0.0 Pave 8 \n",
"5 527105010 Normal WD 0.0 Pave 6 \n",
"\n",
" Total Bath Total Bsmt SF Total Porch SF Total SF \\\n",
"Order PID \n",
"1 526301100 2.0 1080.0 272.0 2736.0 \n",
"2 526350040 1.0 882.0 260.0 1778.0 \n",
"3 526351010 1.5 1329.0 429.0 2658.0 \n",
"4 526353030 3.5 2110.0 0.0 4220.0 \n",
"5 527105010 2.5 928.0 246.0 2557.0 \n",
"\n",
" Total SF (box-cox-0.2) Utilities Wood Deck SF Year Built \\\n",
"Order PID \n",
"1 526301100 19.344072 AllPub 210.0 1960 \n",
"2 526350040 17.333478 AllPub 140.0 1961 \n",
"3 526351010 19.203658 AllPub 393.0 1958 \n",
"4 526353030 21.548042 AllPub 0.0 1968 \n",
"5 527105010 19.016856 AllPub 212.0 1997 \n",
"\n",
" Year Remod/Add Yr Sold SalePrice SalePrice (box-cox-0) \n",
"Order PID \n",
"1 526301100 1960 2010 215000.0 12.278393 \n",
"2 526350040 1961 2010 105000.0 11.561716 \n",
"3 526351010 1958 2010 172000.0 12.055250 \n",
"4 526353030 1968 2010 244000.0 12.404924 \n",
"5 527105010 1998 2010 189900.0 12.154253 "
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 7,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Newly created variables are collected in the *new_variables* list."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 8,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"new_variables = []"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Derived Characteristics\n",
"\n",
"Certain characteristics of a house are assumed to have a \"binary\" influence on the sales price. For example, the existence of a pool could be an important predictor while the exact size of the pool can be deemed not so important.\n",
"\n",
"The below cell creates boolean factor variables out of a set of numeric variables."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 9,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"derived_variables = {\n",
" \"has 2nd Flr\": \"2nd Flr SF\",\n",
" \"has Bsmt\": \"Total Bsmt SF\",\n",
" \"has Fireplace\": \"Fireplaces\",\n",
" \"has Garage\": \"Garage Area\",\n",
" \"has Pool\": \"Pool Area\",\n",
" \"has Porch\": \"Total Porch SF\",\n",
"}\n",
"# Factorize numeric columns.\n",
"for factor_column, column in derived_variables.items():\n",
" df[factor_column] = df[column].apply(lambda x: 1 if x > 0 else 0)\n",
"derived_variables = list(derived_variables.keys())"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 10,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"new_variables.extend(derived_variables)"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 11,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>has 2nd Flr</th>\n",
" <th>has Bsmt</th>\n",
" <th>has Fireplace</th>\n",
" <th>has Garage</th>\n",
" <th>has Pool</th>\n",
" <th>has Porch</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" has 2nd Flr has Bsmt has Fireplace has Garage has Pool \\\n",
"Order PID \n",
"1 526301100 0 1 1 1 0 \n",
"2 526350040 0 1 0 1 0 \n",
"3 526351010 0 1 0 1 0 \n",
"4 526353030 0 1 1 1 0 \n",
"5 527105010 1 1 1 1 0 \n",
"\n",
" has Porch \n",
"Order PID \n",
"1 526301100 1 \n",
"2 526350040 1 \n",
"3 526351010 1 \n",
"4 526353030 0 \n",
"5 527105010 1 "
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 11,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[derived_variables].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2nd Floors\n",
"\n",
"A second floor may have a positive effect on the sales price. However, having a second floor correlates with overall living space. The individual effect is therefore not as clear as it seems in the plot below."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 12,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEGCAYAAACpXNjrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAACXvUlEQVR4nOydd3iUVfbHP+/MZDKTOqmTSnogJJDQCaEGQhVBwF6xYNu1u2uXFXV3raxrWVHXH64NQYqKSgSE0DuEEkoI6cmk90ym/v64kIACoiYCej/PMw/v3Hnfe+/MhPfMveec71GcTqcTiUQikUg6EdX5noBEIpFIfn9I4yKRSCSSTkcaF4lEIpF0OtK4SCQSiaTTkcZFIpFIJJ2O5nxP4EJh0KBBhIaGnu9pSCQSyUVFSUkJW7Zs+VG7NC7HCQ0NZfHixed7GhKJRHJRMW3atNO2y20xiUQikXQ60rhIJBKJpNORxkUikUgknY70uUgkEsk5YrVaKS4uxmw2n++p/ObodDrCwsJwcXE5p/OlcZFIJJJzpLi4GE9PTyIjI1EU5XxP5zfD6XRSXV1NcXExUVFR53SN3BaTSCSSc8RsNuPn5/eHMiwAiqLg5+f3s1ZscuUikUh+HzidcCQTKg9D5FAI7dMlw/zRDMsJfu77lsZFIpH8PshfB59cKYyMRxDcthq8ZWL0+UJui0kkkt8HjWXCsAA0lUNrzZnP7aQyVsXFxVxyySWd0tcJcnJyuPLKK5k0aRKTJ0/m66+//tl9pKenU1Pz4/efnp7O5MmTmTJlClOmTGHnzp1d8h5ArlwkEsnvhcihEDsW8lbD0AfAP/7H59itsO1d2PF/kDgNUv8Eru6/+VTPhk6n45///CeRkZGYTCamT5/O0KFD8fLy6pT+58+fj6+vb/vz4uLi055ns9nQaH65iZDGRSKR/D7wCoUr5oO5HjwCQaX+8Tmlu+DbR8TxmuchtC/EZfyqYe12O0888QS7du3CaDTy5ptvotPp+Oyzz1iwYAFWq5WIiAheeOEF9Ho933zzDW+88QYqlQpPT08++uijU/o7ORrLaDTi6+tLTU0NXl5epKenM3XqVL7//ntsNhtz584lJiaG2tpaHnzwQUwmEykpKfzSAsOLFy8mMzOTlpYWHA4HH3744S/+XOS2mEQi+f2gdQOv4NMbFgCNDlQn/aZ20f/qIQsKCrj22mtZvnw5np6erFixAoCMjAw+//xzvvjiC6Kjo1m0aBEAb775Ju+99x5ffPEFb7311ln7zs7Oxmq10q1bt/Y2Hx8flixZwlVXXcV///tfAN544w369u3L8uXLycjIoLS09Ix93njjjUyZMoXLL7/8tK8fOHCA11577VcZFpArF4lE8kciuDdc8QHkr4eQvtBtyK/uMiwsjISEBAASExMpKSkB4MiRI8ydO5fGxkaam5sZOnQoAH369OGRRx5hwoQJZGScedVUUVHBww8/zD//+U9Uqo51wNixYwFISkriu+++A2Dbtm28/vrrAIwcORJvb+8z9vvDbbEfkpaWhsFgOId3fnakcZFIJH8sekwSj05Cq9W2H6vVatra2gB45JFHePPNN+nRoweLFy9m69atADzzzDPs2bOHNWvWMH36dD7//HN8fHxO6bOpqYnbb7+d+++/n5SUlFNeO5Ehr1KpsNvtnfY+TqDX//rVHMhtMYlEIukSmpubCQgIwGq18uWXX7a3FxYWkpyczL333ouPjw/l5eWnXGexWLj77ruZMmUK48ePP6exBgwY0D7G2rVrqa+v77w38guRKxeJRCLpAu69914uv/xyfH19SU5Oprm5GYAXXniBgoICnE4ngwcPpkePHqdc980337B9+3bq6upYsmQJAP/4xz/at95Ox913382DDz7IpEmT6NOnDyEhIV33xs4RxflLwwp+Z0ybNk0WC5NIJGclJyfnrDf53zune/9nunfKbTGJRCKRdDrSuEgkEomk05HGRSKRSCSdTpcZl7y8vHb9milTptC3b1/+7//+j7q6OmbOnMnYsWOZOXNme1SD0+nk2WefJSMjg8mTJ7N///72vpYsWcLYsWMZO3Zsu4MLYN++fUyePJmMjAyeffbZ9qzUM40hkUgkkt+GLjMu0dHRLFu2jGXLlrF48WL0ej0ZGRnMmzeP1NRUMjMzSU1NZd68eQBkZWWRn59PZmYmc+bMYfbs2YAwFK+//jqfffYZCxcu5PXXX283FrNnz2bOnDlkZmaSn59PVlYWwBnHkEgkEslvw2+yLbZp0ybCw8MJDQ1l1apVTJ06FYCpU6eycuVKgPZ2RVFISUmhoaGBiooK1q9f354x6u3tTVpaGuvWraOiooKmpiZSUlJQFIWpU6eyatWqU/r64RgSiUQi+W34TYzL8uXL2yWdq6urCQwMBCAgIIDq6moATCYTQUFB7dcEBQVhMpl+1G40Gk/bfuL8s40hkUgkfySysrIYN25c+67Rb0mXGxeLxcLq1atPm2mqKEqXV3X7LcaQSCSSCw273c4zzzzDu+++y/Lly/nqq6/Izc39zcbvcuOSlZVFYmIi/v7+APj5+VFRUQEIYbYTAmpGo/EUGYTy8nKMRuOP2k0m02nbT5x/tjEkEonkQmXprhLS/rGaqEeWk/aP1SzdVfKr+svOziYiIoLw8HC0Wi2TJk1qdx38FnS5cVm+fDmTJnWIxKWnp7N06VIAli5dyujRo09pdzqd7N69G09PTwIDAxk6dCjr16+nvr6e+vp61q9fz9ChQwkMDMTDw4Pdu3fjdDpP29cPx5BIJJILkaW7Snh08V5K6lpxAiV1rTy6eO+vMjBncin8VnSptlhLSwsbN27kmWeeaW+bNWsW9913H4sWLSIkJIS5c+cCMGLECNauXUtGRgZ6vZ7nn38eAIPBwF133cWMGTMAoaFzQg766aef5tFHH8VsNjN8+HCGDx9+1jEkEonkQuTFFYdotZ6qcNxqtfPiikNM7RN6nmb16+hS4+Lm5saWLVtOafPx8WH+/Pk/OldRFJ5++unT9jNjxox243IyvXr14quvvvpR+5nGkEgkkguR0rrWn9V+LpzJpfBbITP0JRKJ5DwTYjh9DZUztZ8LvXr1Ij8/n6KiIiwWC8uXLyc9Pf0X9/dzkcZFIpFIzjMPj+uO3uXU0sx6FzUPj+v+i/vUaDQ89dRT3HrrrUycOJEJEyYQFxf3a6d67uP/ZiNJJBKJ5LSc8Ku8uOIQpXWthBj0PDyu+6/2t4wYMYIRI0Z0xhR/NtK4SCQSyQXA1D6hF63z/nTIbTGJRCKRdDrSuEgkEomk05HGRSKRSCSdjjQuEolEIul0pHGRSCQSSacjjYtEIpH8Tnn00UdJTU1tL3nyWyKNi0QikfxOmTZtGu++++55GVsaF4lE8vOwWcHhON+z+P2R/Rm8mgSzDeLf7M9+dZcDBgzA29v718/tFyCNi0QiOXdyV8FbqfDhdDDlnO/Z/H7I/gy+vAfqiwCn+PfLezrFwJwvpHGRSCTnhs0K3/wVqo9A3mo4sOR8z+j3w6pnwPoDBWRrq2i/SJHGRSI5B8rrW9mYW0VpXcv5nsr5Q6UG7/CO57rzs93yu6S++Oe1XwRIbTGJ5CQOljVwtLKJxBBvIv3dAWFYZn2wg+ySeuIDPXjvpgGE+7qdtR+Hw4nV7sD1B0q3FzUqFYx7HvYvBp0Bel91vmf0+8E77PiW2GnaL1LkykUiOc7h8kaufmczd3+8i5v/bxslxws15VY2k11SL86paOJIRdNZ+ymobuaOD3cw+pW1fLO3rMvn/ZtiTID0x2HI3eDud75n8/th9FPg8oPaLS560f4reOCBB7jqqqs4duwYw4cPZ+HChb+qv59Dl65cGhoaeOKJJzh8+DCKovD8888TFRXF/fffT0lJCaGhocydOxdvb2+cTifPPfcca9euRafT8Y9//IPExEQAlixZwltvvQXAnXfeyWWXXQbAvn372sscjxgxgscffxxFUairqzvtGBLJ2ThW1URtixWAvKpmSmpbCTXoifJzJzbQndyKZsJ93IgOcD9rP5kHTGQeELXK//p5NoOj/fBx13b
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"has 2nd Flr\", s=15, data=df);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Basements\n",
"\n",
"Nearly all houses in Ames, IA, have a basement. Therefore, *has Bsmt* is most likely not an important predictor."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 13,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEGCAYAAACpXNjrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAACF20lEQVR4nO2dd3gVddqG75PeKySBEAKh94D0UDQYUHoV+8qquIoitl0roii7a19ldWV1/dy1AgooqJRQQ++RXkNCSQIhhUB65vvjSXISCEhJaP7u68rFnDlzppyEeedtz2uzLMvCYDAYDIYqxOFKn4DBYDAYrj+McTEYDAZDlWOMi8FgMBiqHGNcDAaDwVDlGONiMBgMhirH6UqfwNVCp06dCA0NvdKnYTAYDNcUhw4dYvXq1WesN8alhNDQUL7//vsrfRoGg8FwTTF06NBK15uwmMFgMBiqHGNcDAaDwVDlGONiMBgMhirH5FwMBoOhCikoKODgwYPk5uZe6VOpUtzc3KhTpw7Ozs7ntb0xLgaDwVCFHDx4EG9vb+rVq4fNZrvSp1MlWJZFWloaBw8epH79+uf1GRMWMxgMhiokNzeXwMDA68awANhsNgIDAy/IGzOei8FguD6wLNg9D47ugnrdILTtFTuV68mwlHKh12SMi8FguD5IWAZfj5SR8QqBBxeCr2mMvlKYsJjBYLg+OHFEhgUgOxlyjp9928s4xurgwYP079+/yvfZunVrBg0axMCBA7n99tvZt2/fJe939erVbNiwoQrO0BgXg8FwvVCvGzTsDQ5O0OPPUKPxmdsUFcCqj+DDzrD475B38vKfZxVRt25dZs2axQ8//MDgwYP5+OOPL3mfa9asYePGjVVwdiYsZjAYrhd8QuG2zyE3E7yCwMHxzG0Ob4RfntXy4kkQ2g4axVT7qRUVFfHiiy+yceNGgoOD+fDDD3Fzc2Pq1Kl8++23FBQUEB4ezhtvvIG7uzs///wz//znP3FwcMDb25svv/zynPvPzs7Gx8cHgN27d/Pcc89RUFBAcXExH3zwAU5OTjzwwANERkayceNGWrZsybBhw3j//fc5fvw4b731FgEBAXzzzTc4ODjwww8/8NJLL9G+ffuLvmZjXAwGw/WDi4d+zoaTmzyb4kK9dna/LKd14MAB3nnnHV577TUef/xx5s6dy6BBg4iJieG2224D4N1332X69Oncc889fPjhh3z66acEBweTlZVV6T4TExMZNGgQJ0+eJDc3l6lTpwLwzTffcO+99zJw4EDy8/MpLi7m2LFjJCYm8o9//INJkyYxfPhwfvzxR77++mtiY2P517/+xYcffsjtt9+Oh4cH999//yVfszEuBoPh90Ot1nDbfyEhDmq3g7pdL8th69SpQ7NmzQBo0aIFhw4dAuRlvPfee5w4cYKTJ0/SrVs3ANq2bcuzzz7LrbfeSkxM5Z5VaVgM4KeffuKll17i008/JTIykn/9618kJyfTu3dv6tWrV3YOTZo0AaBhw4Z06dIFm81GkyZNys6nKjE5F4PB8PuiaT+45a/QegQ4XJ5boIuLS9myo6MjRUVFADz77LOMHz+eH3/8kUcffZT8/HwAXn31VcaNG8eRI0cYNmwY6enp59x/dHQ069atA2DAgAF89NFHuLm5MXr0aFauXHnGOTg4OJS9ttlsZedTlRjjYjAYDFeIkydPUrNmTQoKCvjxxx/L1icmJtKmTRsef/xx/P39SU5OPud+1q9fT926dQFISkoiLCyMe++9l169erFz587zPh9PT09OnqyaIgcTFjMYDIYrxOOPP86IESMICAigTZs2ZTf2N954gwMHDmBZFp07d6Zp06ZnfLY052JZFs7Ozrz22msA/Pzzz8yaNQsnJydq1KjBQw89RHZ29nmdz0033cTYsWOJjY295IS+zbIuY8H3VczQoUPNsDCDwXDJbN++vSy/cr1R2bWd7d5pwmIGg8FgqHKMcTEYDAZDlWOMi8FgMBiqnGozLvv27WPQoEFlP+3ateP//u//yMjIYNSoUfTu3ZtRo0aRmZkJaF7Aa6+9RkxMDAMGDGDr1q1l+5oxYwa9e/emd+/ezJgxo2z9li1bGDBgADExMbz22muUpo/OdgyDwWAwXB6qzbhEREQwa9YsZs2axffff4+7uzsxMTFMmTKFLl26MG/ePLp06cKUKVMAWLp0KQkJCcybN4+JEycyYcIEQIZi8uTJTJ06lWnTpjF58uQyYzFhwgQmTpzIvHnzSEhIYOnSpQBnPYbBYDAYLg+XJSy2cuVKwsLCCA0NJTY2lsGDBwMwePBgFixYAFC23mazERkZSVZWFqmpqcTFxREVFYWfnx++vr5ERUWxbNkyUlNTyc7OJjIyEpvNxuDBg4mNja2wr9OPYTAYDIbLw2UxLnPmzCmTnE5LSyMoKAiAmjVrkpaWBkBKSgohISFlnwkJCSElJeWM9cHBwZWuL93+XMcwGAyG3ztLly6lT58+ZZGk6qLajUt+fj4LFy7klltuOeM9m81W7RPbLscxDAaD4VqgqKiIV199lU8++YQ5c+Ywe/Zs9uzZUy3HqnbjsnTpUlq0aEGNGjUACAwMJDU1FYDU1FQCAgIAeSTlJQ6Sk5MJDg4+Y31KSkql60u3P9cxDAaD4Vph5sZDRP1tIfWfnUPU3xYyc+Oli0vGx8cTHh5OWFgYLi4u9OvXryydUNVUu3GZM2cO/fr1K3sdHR3NzJkzAZg5cya9evWqsN6yLDZt2oS3tzdBQUF069aNuLg4MjMzyczMJC4ujm7duhEUFISXlxebNm3CsqxK93X6MQwGg+FaYObGQzz3/a8cysjBAg5l5PDc979esoE5W5qhOqhWbbFTp06xYsUKXn311bJ1o0ePZty4cUyfPp3atWvz3nvvAdCzZ0+WLFlCTEwM7u7uTJo0CQA/Pz8eeeQRhg8fDsCYMWPw8/MD4OWXX+a5554jNzeXHj160KNHj3Mew2AwGK4F3py7k5yCikrFOQVFvDl3J4Pbhl6hs7owqtW4eHh4sHr16grr/P39+fzzz8/Y1maz8fLLL1e6n+HDh5cZl/K0atWK2bNnn7H+bMcwGAyGa4HDGTkXtP58OVuaoTowHfoGg8FwlVHbr/IJmWdbf760atWKhIQEkpKSyM/PZ86cOURHR1/SPs+GMS4Gg8FwlfFMnya4OztWWOfu7MgzfZpc0n6dnJwYP348DzzwAH379uXWW2+lUaNGl7TPsx6rWvZqMBgMhoumNK/y5tydHM7IobafO8/0aVIl+ZaePXvSs2fPS97Pb2GMi8FgMFyFDG4bes0k7yvDhMUMBoPBUOUY42IwGAyGKscYF4PBYDBUOca4GAwGg6HKMcbFYDAYDFWOMS4Gg8HwO+G5556jS5cuZSNQqhNjXAwGg+F3wtChQ/nkk08uy7GMcTEYDBdGYQEUF1/ps7j+iZ8K77aECX76N37qJe+yQ4cO+Pr6Xvq5nQfGuBgMhvNnTyx81AW+GAYp26/02Vy/xE+FH8dCZhJg6d8fx1aJgblcGONiMBjOj8IC+PkvkLYb9i2EbTOu9Bldv8S+CgWnKSAX5Gj9NYIxLgbD+ZB1CPYvhYyDV/pMrhwOjuAbZn/tdnnCK79LMs/yd3a29VchRlvMYChPylY4tgtCWkNgA63LOgTf3A2HN0DNZnDnt+Affu79FBdDUQE4u1b/OV8uHBygzyTY+j24+UHr26/0GV2/+NYpCYlVsv4awXguBkMpqdvh//rDtPvgq5H2p8Rju2VYAI5uh6M7zr2f4/th6t3wzw6w7YdqPeXLTnAziH4Buo4Bz8ArfTbXL73Gg/Nps1uc3bX+EnjyySe5/fbb2b9/Pz169GDatGmXtL9zUa2eS1ZWFi+++CK7du3CZrMxadIk6tevzxNPPMGhQ4cIDQ3lvffew9fXF8uyeP3111myZAlubm787W9/o0WLFgDMmDGDjz76CICHH36YIUOGALBly5ayMcc9e/bkhRdewGazkZGRUekxDIZzkrYHco6XLO+GjEQ9KQY0gBpN4NhO8AuHwN+Yf7Fjjn4AfngUwqPMjdhwYbS+Tf/
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"has Bsmt\", s=15, data=df);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Fireplaces\n",
"\n",
"Bigger houses are more likely to have a fireplace. Thus, the variable *has Fireplace* might be an interesting predictor."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 14,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEGCAYAAACpXNjrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAACV4klEQVR4nOydd3hUZfbHP1PSe530hIQeAqETAgECoYpS7Y1V+CnYy64VWFHUtbHKriur6+ra6CAi0iGE3kOHQHrvfZIp9/fHIQkgTUkE9H6eZx7u3Ln3vm9mhnvmvOec79EoiqKgoqKioqLSjGiv9wRUVFRUVH5/qMZFRUVFRaXZUY2LioqKikqzoxoXFRUVFZVmRzUuKioqKirNjv56T+BGoXfv3gQGBl7vaaioqKjcVGRnZ7Nz586f7VeNy1kCAwNZsmTJ9Z6GioqKyk3FuHHjLrpfXRZTUVFRUWl2VOOioqKiotLsqMZFRUVFRaXZUWMuKioqf3hMJhNZWVkYjcbrPZUbFnt7e4KCgrCxsbmq41XjoqKi8ocnKysLFxcXwsLC0Gg013s6NxyKolBcXExWVhatWrW6qnPUZTEVFZU/PEajES8vL9WwXAKNRoOXl9cv8uxUz0VFReX3gaLAqTVQeBLC+kFg1190umpYLs8vfX9U46KiovL7IG0LfHuHGBlnP5i8AdzUwujrhbospqKi8vugMlcMC0BVHtSWXPrYK7SxysrK4pZbbmnGyck1O3fuzG233db4yMnJ4Yknnmi2Mbp2/WXeWkuiei4qKiq/D8L6QeuhcGYD9HsGvNv+/BiLCXZ/Cnv/C5HjIOYxsHP6zaYYEhLC8uXLz9v34Ycf/uw4s9mMXn9z355v7tmrqKioNOAaCLd/AcZycPYFre7nx+Tsh59ekO1NsyGwG7RJuOjlLBYLr7zyCvv378dgMPDPf/4Te3t7FixYwPz58zGZTISGhvK3v/0NBwcHVq1axT/+8Q+0Wi0uLi58/fXXV5xyVlYWjzzyCD/88ANLlixhzZo11NTUYLVamTdvHrNmzeLUqVOYzWYee+wxhgwZwpIlS1i7di1VVVXk5+dz66238thjj5133erqaqZOnUpFRQVms5knn3ySIUOGALBs2TI+++wzNBoN7dq145133qGkpIQZM2aQk5MDwEsvvUT37t1/wZt/ERQVRVEUZezYsdd7CioqKi1NzkFF+aunosxwlUfqFkVRFOXo0aPnHZaZmal06NChcf8TTzyhLFu2TFEURSkpKWk87v3331e+/PJLRVEU5ZZbblHy8vIURVGU8vLynw2dmZmpREVFKbfeeqty6623KjNnzlQyMzOVUaNGKYqiKIsXL1b69++vlJaWKoqiKO+9917jmOXl5crQoUOV6upqZfHixUpsbKxSUlKi1NbWKqNGjVKSk5MVRVGU6OhoRVEUxWQyKZWVlYqiKEpxcbEyZMgQxWq1KidPnlSGDh2qFBcXK4qiNI71zDPPKLt371YURVGys7OV4cOHX/Ttu/B9UpRL3ztVz0VFReWPg39nuP1LSEuCgG4Q0veShwYFBdGhQwcAIiMjyc7OBuDUqVPMmTOHyspKqqur6devHyDxjhdeeIERI0aQkHBxb+jCZbGsrKzzXo+NjcXd3R2ApKQkNmzYwH/+8x8A6urqyM3NBaBv3754eHgAkJCQwN69e4mKimq8jqIovP/+++zevRutVkt+fj5FRUXs2LGD4cOH4+npCdA41rZt20hJSWk8v6qqiurqapycfv2SoWpcVFRU/li0HyWPK2Bra9u4rdPpqKurA+CFF17gn//8J+3bt2fJkiXs2rULgNdee42DBw+yadMmxo8fz+LFixsNwNXi4OBw3vMPP/yQ8PDw8/YdPHjwZ2nBFz5fsWIFJSUlLFmyBBsbG+Lj4xvnfzGsVisLFizAzs7uF833cqjZYioqKiq/gOrqanx8fDCZTKxYsaJxf0ZGBl26dOHJJ5/Ew8ODvLy8axqnX79+fPXVVyhnM9uOHj3a+NrWrVspKyvDaDSybt06unXrdt65lZWVeHl5YWNjw44dOxq9rj59+vDTTz9RWloKQFlZWeNY//vf/xrPP3bs2DXNHVTPRUVFReUX8eSTTzJx4kQ8PT3p0qUL1dXVAPztb38jPT0dRVHo06cP7du3v6Zxpk6dyuzZs7n11luxWq0EBQXxySefANC5c2cef/zxxoD+uUtiAKNHj+bRRx9l9OjRdOrUqdH7adOmDY888gj33XcfWq2Wjh078tZbb/Hyyy/z2muvMXr0aCwWCz169OC11167pvlrFOUKCd9/EMaNG6c2C1NR+YNy7NixxvjKjc6SJUs4fPgw06dP/83Hvtj7dKl7p7ospqKioqLS7KjLYioqKio3EePGjbtka+EbCdVzUVFRUVFpdlrMuJw5c+Y8DZ1u3brx3//+l7KyMiZNmsTQoUOZNGkS5eXlgORlv/766yQkJDB69GiOHDnSeK2lS5cydOhQhg4dytKlSxv3Hz58mNGjR5OQkMDrr7/emFVxqTFUVFRUVH4bWsy4hIeHs3z5cpYvX86SJUtwcHAgISGBefPmERMTw5o1a4iJiWHevHkAJCYmkpaWxpo1a5g1axYzZ84ExFDMnTuXBQsWsHDhQubOndtoLGbOnMmsWbNYs2YNaWlpJCYmAlxyDBUVFRWV34bfZFls+/btBAcHExgYyPr16xkzZgwAY8aMYd26dQCN+zUaDdHR0VRUVFBQUEBSUlJj1aqbmxuxsbFs2bKFgoICqqqqiI6ORqPRMGbMGNavX3/etS4cQ0VFRUXlt+E3MS4rV65slK8uLi7G19cXAB8fH4qLiwHIz8/Hz8+v8Rw/Pz/y8/N/tt9gMFx0f8PxlxtDRUVF5fdKYmIiw4YNa1whut60uHGpr69nw4YNDB8+/GevaTSaFu/+9luMoaKionI9sVgsvPbaa3z66aesXLmSH3744TytsOtBixuXxMREIiMj8fb2BsDLy4uCggIACgoKGgXUDAbDeXIJeXl5GAyGn+3Pz8+/6P6G4y83hoqKisqNwLL92cS+tYFWL6wk9q0NLNuffU3XS05OJjQ0lODgYGxtbRk1alRjmOB60eLGZeXKlYwa1SQSFx8fz7JlywDpKzB48ODz9iuKwoEDB3BxccHX15d+/fqRlJREeXk55eXlJCUl0a9fP3x9fXF2dubAgQMoinLRa104hoqKisr1Ztn+bF5ccojssloUILuslheXHLomA3Op8MH1pEWLKGtqati2bdt5GjVTpkzhqaeeYtGiRQQEBDBnzhwABgwYwObNm0lISMDBwYHZs2cDIgk9depUJkyYAMC0adMaZaJnzJjBiy++iNFoJC4ujri4uMuOoaKionK9eWf1CWpNlvP21ZosvLP6BGO6Bl6nWTU/LWpcHB0d2blz53n7PDw8+OKLL352rEajYcaMGRe9zoQJExqNy7lERUXxww8//Gz/pcZQUVFRud7klNX+ov1Xw6XCB9cTtUJfRUVF5TckwN3hF+2/GqKiokhLSyMzM5P6+npWrlxJfHz8r75ec6AaFxUVFZXfkOeHtcPBRnfePgcbHc8Pa/err6nX65k+fToPP/wwI0eOZMSIEbRp0+Zap3pNqMKVKioqKr8hDXGVd1afIKeslgB3B54f1u6a4y0DBgxgwIABzTHFZkE1LioqKiq/MWO6Bv6ugvcXQ10WU1FRUVFpdlTjoqKioqLS7KjGRUVFRUWl2VGNi4qKiopKs6MaFxUVFRWVZkc1LioqKiq/A1588UViYmIa25tcb1TjoqKiovI7YNy4cXz66afXexqNqMZFRUXll2E2gdV6vWdxc5O8AD7oBDPd5d/kBdd8yZ49e+Lm5nbtc2smVOOioqJy9aSsh49j4KvxkH/ses/m5iR5Aax4AsozAUX+XfFEsxiYGwnVuKioqFwdZhOs+gsUn4IzG+Do0us9o5uT9a+B6QIFZFOt7P8doRoXFZWroSIbUhOhLOt6z+T6odWBW3DTc/sbZwnmpqL8Et+hS+2/SVG1xVRUziX/CBSdBL/O4BUh+yqy4bt7IWcf+HSAu+e
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"has Fireplace\", s=15, data=df);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Garages\n",
"\n",
"Holding the overall living area fixed adding a garage seems to affect the price positively. Thus, *has Garage* seems like an interesting predictor as well."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 15,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEGCAYAAACpXNjrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAACJgElEQVR4nO2dd3hUZdqH70nvlWQSQkijhxJ6CQQJhB46YlcsWFDEtnZEUda174q68um6ulY6CiIl1NB76BAghUAmvfeZ8/3xJJkEQk+o731duZg5c+oknN95uk7TNA2FQqFQKOoRi+t9AgqFQqG49VDiolAoFIp6R4mLQqFQKOodJS4KhUKhqHeUuCgUCoWi3rG63idwo9C9e3f8/Pyu92koFArFTUVKSgpbt249Z7kSl0r8/PxYsGDB9T4NhUKhuKkYM2ZMncuVW0yhUCgU9Y4SF4VCoVDUO0pcFAqFQlHvqJiLQqFQXITy8nJOnTpFSUnJ9T6V64adnR1NmjTB2tr6ktZX4qJQKBQX4dSpUzg7OxMYGIhOp7vep3PN0TSNzMxMTp06RVBQ0CVto9xiCoVCcRFKSkrw9PS8LYUFQKfT4enpeVmWm7JcFArFrYGmwbEVkH4UAnuDX8d63f3tKixVXO71K3FRKBS3Bgkb4JcJIjJOPvDYanBVhdHXC+UWUygUtwb5Z0RYAApSoTjr/Ote5RirU6dOMXz48KvaR10kJCTw+OOPM2DAAMaMGcP999/P9u3b6/041wJluSgUiluDwN7QbCCcWA29n4dGLc5dx1gO27+Bnf+F0DHQ82mwdbzmp1oXpaWlPP744/ztb3+jf//+ABw9epT9+/fTtWvXS9pHRUUFVlY3xm39xjgLhUKhuFpc/ODO76EkF5y8wcLy3HVO74a/XpHXa2eCXydoHnVFhzMajbzxxhvs3r0bvV7Pl19+iZ2dHXPmzOG3336jvLycgIAAPvjgA+zt7Vm2bBlffPEFFhYWODs789NPP9Xa3++//05YWFi1sAC0aNGCFi1EJOPi4njvvfcoLS3Fzs6OmTNnEhwczIIFC1ixYgVFRUWYTCa+/vprnnrqKfLy8qioqODZZ59lwIABAHzxxRf8/vvveHh44OvrS2hoKI888ghJSUm8/fbbZGdnY2dnx4wZMwgJCbmi76UaTaFpmqaNHj36ep+CQqFoaE7v1bS3PTTtLRf5ObnhkjY7ePBgrffJycla69atq5dPmTJFW7RokaZpmpaVlVW93ieffKL98MMPmqZp2vDhw7XU1FRN0zQtNzf3nGPMnDlT++9//3vec8jPz9fKy8s1TdO0jRs3ak8//bSmaZo2f/58rU+fPlp2dramaZpWXl6u5efna5qmaZmZmdqAAQM0k8mk7d27VxsxYoRWUlKi5efna1FRUdo333yjaZqmPfDAA9rJkyc1TdO0PXv2aPfff/8lfQ+adv57p7JcFArF7YNve7jzB0iIhcadoGmvK95VkyZNaN26NQChoaGkpKQAcOzYMT777DPy8/MpLCykd+/eAHTs2JFXXnmFIUOGEBV1cWtp8uTJJCYmEhgYyKxZs8jPz+fll18mMTERnU5HeXl59brh4eG4ubkBUpPyySefsH37diwsLDAYDGRkZLBr1y769++Pra0ttra29OvXD4DCwkJ2797Ns88+W72/srKyK/5eqlDiolAobi9aDZOfq8TGxqb6taWlJaWlpQC88sorfPnll7Rq1YoFCxawbds2AN555x327t3L2rVrGTt2LPPnz8fd3b16H82aNWPHjh3V77/44gv27dvHBx98AMA///lPunfvzhdffMGpU6d44IEHqte1t7evfv3HH3+QlZXFggULsLa2JjIysvrc6kLTNFxcXFi8ePFVfiO1UdliCoVCUY8UFhbi5eVFeXk5f/zxR/XypKQkOnTowLPPPou7uzupqam1touOjmbXrl3ExMRUL6tZtJifn49erwdg4cKF5z1+fn4+np6eWFtbs2XLlmqLqlOnTqxZs4bS0lIKCwtZu3YtAE5OTjRp0oRly5YBIjaHDx++ui8BZbkoFApFvfLss88yfvx4PDw86NChA4WFhQB88MEHJCYmomkaPXr0oFWrVrW2s7Oz49///jfvv/8+M2fOpFGjRjg6OvLkk08C8Oijj/LKK6/w1Vdf0bdv3/MePzo6mieffJLo6Gjatm1LcHAwAO3btycyMpIRI0bg6elJixYtcHZ2BuDDDz9k+vTpfPXVV1RUVDB06NBzzu9y0WnaVSZ83yKMGTNGDQtTKBR1cujQoer4ys1MYWEhjo6OFBcXc++99zJjxgxCQ0Mvefu6vofz3TuV5aJQKBS3CdOmTSM+Pp7S0lJGjx59WcJyuShxUSgUituEjz/++JodSwX0FQqFQlHvNJi4nDhxgpEjR1b/dOrUif/+97/k5OQwceJEBg4cyMSJE8nNzQUkQ+Hdd98lKiqK6OhoDhw4UL2vhQsXMnDgQAYOHFgrS2L//v1ER0cTFRXFu+++S1X46HzHUCgUCsW1ocHEJTg4mMWLF7N48WIWLFiAvb09UVFRzJ49m549e7JixQp69uzJ7NmzAVi/fj0JCQmsWLGCGTNmMH36dECEYtasWcyZM4e5c+cya9asarGYPn06M2bMYMWKFSQkJLB+/XqA8x5DoVAoFNeGa+IW27x5M/7+/vj5+RETE8OoUaMAGDVqFKtWrQKoXq7T6QgLCyMvL4+0tDRiY2Orq09dXV0JDw9nw4YNpKWlUVBQQFhYGDqdjlGjRlXnh5/vGAqFQqG4NlwTcVm6dGl1e+rMzEy8vb0B8PLyIjMzEwCDwYCPj0/1Nj4+PhgMhnOW6/X6OpdXrX+hYygUCsXtwvr16xk0aFC1x+ha0+DiUlZWxurVqxk8ePA5n+l0ugaf7nYtjqFQKBQ3EkajkXfeeYdvvvmGpUuXsmTJEuLj46/pOTS4uKxfv57Q0FAaNWoEgKenJ2lpaQCkpaXh4eEBiEVSsx1Camoqer3+nOUGg6HO5VXrX+gYCoVCcSOyaHcK4e+vJuiVpYS/v5pFu1Ouan9xcXEEBATg7++PjY0Nw4YNq9VW5lrQ4OKydOlShg0zN4mLjIxk0aJFACxatKh6dkHVck3T2LNnD87Oznh7e9O7d29iY2PJzc0lNzeX2NhYevfujbe3N05OTuzZswdN0+rc19nHUCgUihuNRbtTeHXBPlJyitGAlJxiXl2w76oE5nzhhGtJgxZRFhUVsWnTJt55553qZZMmTWLq1KnMmzePxo0b89lnnwHQt29f1q1bR1RUFPb29sycORMANzc3nnrqKcaNGwdIG+qq1tJvvfUWr776KiUlJURERBAREXHBYygUCsWNxofLj1Bcbqy1rLjcyIfLjzCqo991Oqurp0HFxcHBga1bt9Za5u7uzvfff3/OujqdjrfeeqvO/YwbN65aXGrSrl07lixZcs7y8x1DoVAobjRO5xRf1vJL4XzhhGuJqtBXKBSK60hjN/vLWn4ptGvXjoSEBJKTkykrK2Pp0qVERkZe8f6uBCUuCoVCcR15aVBL7K0tay2zt7bkpUEtr3ifVlZWTJs2jUcffZShQ4cyZMgQmjdvfrWnennncE2PplAoFIpaVMVVPlx+hNM5xTR2s+elQS2vOt7St2/fC859aWiUuCgUCsV1ZlRHv5s6eF8Xyi2mUCgUinpHiYtCoVAo6h0lLgqFQqGod5S4KBQKhaLeUeKiUCgUinpHiYtCoVDcgrz66qv07NmzetzJtUaJi0KhUNyCjBkzhm+++ea6HV+Ji0KhuDwqysFkut5ncWsRNwc+bQvT3eTfuDlXvcuuXbvi6up69ed2hShxUSgUl058DHzVE34cC4ZD1/tsbg3i5sAfUyA3GdDk3z+m1IvAXE+UuCgUikujohyWvQyZx+DEaji48Hqf0a1BzDtQflYH5PJiWX4To8RFobgU8lLg5HrIOXW9z+T6YWEJrv7m93bXz+VyS5F7nr+p8y2/SVC9xRSKmhgOQMZR8GkPniGyLC8Ffr0PTu8Cr9Zwz2/gHnDh/ZhMYCwHa9uGP+drhYUFDJoJBxa
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"has Garage\", s=15, data=df);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Pools\n",
"\n",
"Unfortunately, almost no one in Ames, IA, has a pool. The predictor *has Pool* seems quite uninteresting."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 16,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEGCAYAAACpXNjrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAACf00lEQVR4nOzdd3hUddrw8e/0kkxLmUnvlRAIndClCoggYNddcZVddR+7u5a1PNbn3XXV3XWLrK6ra1kVKSooKAgBpbfQIb3PJJPMTCaZPvP+MWEAKaImIno+18VlPDNzziEJc8+v3PctCoVCIQQCgUAg6EXi830DAoFAIPjxEYKLQCAQCHqdEFwEAoFA0OuE4CIQCASCXicEF4FAIBD0Oun5voEfihEjRpCcnHy+b0MgEAguKI2NjWzZsuWU40Jw6ZGcnMySJUvO920IBALBBWXu3LmnPS5MiwkEAoGg1wnBRSAQCAS9TgguAoFAIOh1wpqLQCAQ9DGfz0dDQwNut/t838q3plQqSUlJQSaTndPzheAiEAgEfayhoQGNRkNGRgYikeh83843FgqFsFqtNDQ0kJmZeU6vEabFBAKBoI+53W5iY2MvyMACIBKJiI2N/UYjL2HkIhAIfhRCoRCfH7ZQYXEyMjOWAan6831LJ7lQA8sx3/T+heAiEAh+FDZVWfnFa9sJhcCoUbD8ttEk6lXn+7Z+soRpMYFA8KNgcbg51p3K0unB1u0785PPcxurhoYGLrnkkl4/54ABA5g9ezYzZszgkUceIRgMfuPz/OUvf+GVV175zvcjBBeBQPCjMDIrlovy45GKRdw+MYcsY9SpTwr4YPPf4W8jYd3/A0/X93+jfSgtLY3ly5fzwQcfUFlZyWeffXbe7kWYFhMIBD8KCToVf7t2MA6XnziNAon4NGsETbvgk/vDX697GpIHQ+6U7/dGewQCAX73u9+xa9cuTCYTf/vb31Aqlbz77ru88847+Hw+0tPT+f3vf49KpeLjjz/mr3/9K2KxGI1Gw5tvvnnGc0ulUgYNGkRtbS0NDQ08+OCDdHR0EBMTwzPPPENSUtIZj/cWYeQiEAh+NFRyKSad8vSBBUCqBPEJn6ll529Npra2lmuvvZYVK1ag0WhYtWoVAFOmTOH999/ngw8+ICsri8WLFwPwt7/9jVdeeYUPPviAv//972c9t8vlYtOmTeTl5fHkk09y2WWX8eGHHzJr1iyefPJJgDMe7y1CcBEIBD8diQPgitdh5K0w92VIG3XebiUlJYXCwkIAioqKaGxsBODo0aNcc801zJo1iw8//JCjR48CMGjQIO6//37effddAoHAac9ZV1fH7Nmzufrqq5kwYQLjx49n165dkfWd2bNns2PHDoAzHu8twrSYQCD4aSmYGf5znsnl8sjXEokEj8cDwP3338/f/vY3CgoKWLJkCVu3bgXg8ccfZ8+ePaxbt4558+bx/vvvYzAYTjrnsTWXHwJh5CIQCAQ/IF1dXcTHx+Pz+fjwww8jx+vq6hg4cCB33HEHBoOBlpaWczrfoEGDWLFiBQAffvghQ4cOPevx3iKMXAQCgeAH5I477uDyyy8nJiaGgQMH0tUV3tH2+9//ntraWkKhECNHjqSgoOCczvfwww/zwAMP8Morr0QW7s92vLeIQqHzvOH7B2Lu3LlCszCBQNAnDh48GFlfuZCd7u9xpvdOYVpMIBAIBL1OCC4CgUAg6HVCcBEIBAJBr+uz4FJVVcXs2bMjfwYPHsy///1vbDYbCxYsYOrUqSxYsAC73Q6EK5o++eSTTJkyhVmzZrF///7IuZYuXcrUqVOZOnUqS5cujRzft28fs2bNYsqUKTz55JMcWz460zUEAoFA8P3os+CSlZXF8uXLWb58OUuWLEGlUjFlyhQWLVpEaWkpq1evprS0lEWLFgFQVlZGTU0Nq1ev5oknnuCxxx4DwoHixRdf5N133+W9997jxRdfjASLxx57jCeeeILVq1dTU1NDWVkZwBmvIRAIBILvx/cyLbZp0yZSU1NJTk5mzZo1zJkzB4A5c+ZECqsdOy4SiSgpKcHhcGCxWNi4cSOjR49Gr9ej0+kYPXo0GzZswGKx4HQ6KSkpQSQSMWfOHNasWXPSub56DYFAIBB8P76X4LJixYpImQGr1YrRaAQgPj4eq9UKgNlsJiEhIfKahIQEzGbzKcdNJtNpjx97/tmuIRAIBIKwsrIypk2bFplR6m19Hly8Xi9r167l4osvPuUxkUjU593Zvo9rCAQCwYUkEAjw+OOP8/LLL7NixQo++ugjKioqevUafR5cysrKKCoqIi4uDoDY2FgsFgsAFouFmJgYIDwiObGcQUtLCyaT6ZTjZrP5tMePPf9s1xAIBIIL0bJdjYz+v7Vk3r+C0f+3lmW7Gr/T+crLy0lPTyc1NRW5XM7MmTMjywq9pc+Dy4oVK5g583iRuIkTJ7Js2TIAli1bxqRJk046HgqF2L17NxqNBqPRyJgxY9i4cSN2ux273c7GjRsZM2YMRqOR6Ohodu/eTSgUOu25vnoNgUAguNAs29XIA0v20mhzEQIabS4eWLL3OwWYMy039KY+rS3W3d3Nl19+yeOPPx45tnDhQu68804WL15MUlISL7zwAgDjx49n/fr1TJkyBZVKxdNPPw2AXq/n1ltvZf78+QDcdttt6PV6AB599FEeeOAB3G4348aNY9y4cWe9hkAgEFxo/rDqMC7fySX2Xb4Af1h1mDmDks/TXX29Pg0uarWaLVu2nHTMYDDw2muvnfJckUjEo48+etrzzJ8/PxJcTlRcXMxHH310yvEzXUMgEAguNE021zc6fi7OtNzQm4QMfYFAIPgBS9KfvlvmmY6fi+LiYmpqaqivr8fr9bJixQomTpz4rc93OkJwEQgEgh+w+6blo5JJTjqmkkm4b1r+tz6nVCrlkUce4aabbmLGjBlMnz6d3Nzc73qrJ1+jV88mEAgEgl51bF3lD6sO02RzkaRXcd+0/O+83jJ+/HjGjx/fG7d4WkJwEQgEgh+4OYOSf9CL96cjTIsJBAKBoNcJwUUgEAgEvU4ILgKBQCDodUJwEQgEAkGvE4KLQCAQCHqdEFwEAoHgJ+iBBx6gtLQ00g6ltwnBRSAQCH6C5s6dy8svv9xn5xeCi0Ag+EZ8/iDBYOh838ZPS/m78Hx/eEwf/m/5u9/5lMOGDUOn0333ezsDIbgIBIJzVnaklYv/VMbPX93KEXPn+b6dn4byd+HD28FeD4TC//3w9l4JMH1JCC4CgeCc+PxB/vfD/VS2drHhaBsrypvP9y39NKx5HHxfqYDsc4WP/4AJwUUgOBeORqguA1vD+b6T80YiFp1UiVerEqpHfS/sZ/idO9PxHwjht0MgOMGhZgeVrU6KknRkxEWFDzoa4b/XQdNOiC+Ea94BQ/pZzxMMhvAFgii+Us32QiYWi3j4kn58uKcJvUrGZRdYrasLli6lZ0rsNMd/wISRi0DQ40hLJ1f/czO3vbWLG/+9jcZjzZjajoYDC0DrQWg9dNbz1Fq7+NUbO5j03Ho+3vvjmjrKM2m4Z2o+vxibRUyU4nzfzk/DpEdA9pXeLTJV+Ph3cPfdd3PVVVdRXV3NuHHjeO+9977T+b6qT0cuDoeD3/3udxw5cgSRSMTTTz9NZmYmd911F42NjSQnJ/PCCy+g0+kIhUI89dRTrF+/HqVSyf/93/9RVFQEwNKlS/n73/8OwC233MJll10GwL59+yJtjsePH89DDz2ESCTCZrOd9hoCwdlUtznp6PYBUNXWRWOHi2S9CmKyIS4f2g6DPh1iz973YvUBM6sPhPuR//b9ckZmxWKIkvf5/Qt+pAZcEf7vmsfDU2G6lHBgOXb8W3ruued64ebOrE9HLk899RRjx47lk08+Yfny5WRnZ7No0SJKS0tZvXo1paWlLFq0CICysjJqampYvXo1TzzxBI899hgANpuNF198kXfffZf33nuPF198EbvdDsBjjz3GE088werVq6mpqaGsrAzgjNcQCM6mMElHv0QNAGNz48iO75kW06fCte/C1e/A9csgNuus59Eqj39mS9IrkUl
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"has Pool\", s=15, data=df);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Porch\n",
"\n",
"Most houses have a porch."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 17,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEGCAYAAACpXNjrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAACNJElEQVR4nOydd3hUZdqH7ynpvU56QqghQEIvoUgwICDSxa5YsKDYV7Ega1vX7urqyurnumsHaYIKEkoA6S10AiQhCWTSe5853x9PkgkYECSh+d7XlYszZ86c9z2TcJ7zPuX36DRN01AoFAqFogXRX+wJKBQKheLKQxkXhUKhULQ4yrgoFAqFosVRxkWhUCgULY4yLgqFQqFocYwXewKXCn379iU4OPhiT0OhUCguK7Kysti0adNv9ivjUk9wcDDz58+/2NNQKBSKy4oJEyY0u1+5xRQKhULR4ijjolAoFIoWRxkXhUKhULQ4KuaiUCgULUBtbS2ZmZlUVVVd7Km0Co6OjoSEhGBnZ3dWxyvjolAoFC1AZmYmbm5uREREoNPpLvZ0WhRN08jPzyczM5M2bdqc1WeUW0yhUChagKqqKnx8fK44wwKg0+nw8fE5p1WZWrkoFIorA02DlOWQewgiBkJw9ws+hSvRsDRwrtemjItCobgySFsLX08RI+MaAPesBA9VGH2xUG4xhUJxZVB6QgwLQFk2VBac/tgL0MYqMzOTa6+9tsXP2a1bN8aOHcuoUaOYNWsWVqv1vM65adMm7r333haaoQ1lXBQKxZVBxEBoNxz0Rhj8F/Dt8NtjLLWw8SP4sB+s/jtUl1/4eZ4nYWFhLFq0iMWLF3PkyBFWrFhxVp+rq6tr5ZmdjHKLKRSKKwP3YLj+c6gqBld/0Bt+e8zxHfDz07K9+lUI7gHtE1ptShaLheeee44dO3ZgMpn48MMPcXR05LvvvuPbb7+ltraW8PBwXn/9dZycnPjpp5/45z//iV6vx83NjS+//PK05zYajXTv3p309HQyMzN55plnKCwsxNvbm7/97W8EBQXx9NNPY29vz/79++nRowc33XQTL7zwAgUFBRgMBt577z0AKioqmDFjBocOHSI6Opo333zzvONHauWiUCiuHOydwT2wecMCYHSUlU0Ddk6tOp309HRuvvlmli5dipubG8uWLQMgISGB77//nsWLFxMZGcm8efMA+PDDD/n0009ZvHgxH3300RnPXVlZyYYNG+jQoQMvv/wy48eP54cffmDMmDG8/PLLjceZzWa++eYbZs6cyRNPPMHNN9/M4sWL+eabb/Dz8wNg3759PPPMM/z4449kZmaybdu28752ZVwUCsWfh8BucP1/od8DMOETCBvQqsOFhIQQFRUFQHR0NFlZWQCkpKRw0003MWbMGH744QdSUlIA6N69O08//TTfffcdFoul2XMeO3aMsWPHcuONN3LVVVcxZMgQduzY0RjfGTt27EnG4ZprrsFgMFBWVobZbCYhQVZqDg4OODmJce3WrRsBAQHo9Xo6derUOM/zQbnFFArFn4tOo+XnAmBvb9+4bTAYqK6uBuDpp5/mww8/pFOnTsyfP5/NmzcD8OKLL7Jr1y5Wr17NxIkT+f777/Hy8jrpnA0xl7OlwYCcyzxPZ9jOBbVyUSgUigtMeXk5fn5+1NbW8sMPPzTuP3bsGDExMTz88MN4eXmRnZ19Vufr3r07S5cuBeCHH36gV69evznG1dWVgICAxgSAmpoaKisrW+BqmketXBQKheIC8/DDDzN58mS8vb2JiYmhvFyy1l5//XXS09PRNI1+/frRqVOnszrf888/z8yZM/n0008bA/rN8frrrzNr1izee+897OzsGgP6rYFO0y5AwvdlwIQJE1SzMIVC8YfZv39/Y3zlSqW5azzdvVO5xRQKhULR4ijjolAoFIoWRxkXhUKhULQ4rWZcjh49ytixYxt/evTowX/+8x+KioqYOnUqw4cPZ+rUqRQXFwPSL+Dll18mISGBMWPGsHfv3sZzLViwgOHDhzN8+HAWLFjQuH/Pnj2MGTOGhIQEXn75ZRrCR6cbQ6FQKBQXhlYzLpGRkSxatIhFixYxf/58nJycSEhIYM6cOfTv35/ly5fTv39/5syZA0BSUhJpaWksX76cl156idmzZwNiKD744AO+++475s6dywcffNBoLGbPns1LL73E8uXLSUtLIykpCeC0YygUCoXiwnBB3GIbNmwgNDSU4OBgEhMTGTduHADjxo1rzLlu2K/T6YiNjaWkpIScnBzWrVtHXFwcnp6eeHh4EBcXx9q1a8nJyaGsrIzY2Fh0Oh3jxo0jMTHxpHOdOoZCoVAoLgwXxLgsXbq0UZogPz8ff39/APz8/MjPzwdE/yYgIKDxMwEBAZjN5t/sN5lMze5vOP5MYygUCsWfmaSkJEaMGNHoRWpNWt241NTUsHLlSq655prfvKfT6Vq9c9uFGEOhUCgudSwWCy+++CKffPIJS5cuZcmSJRw+fLjVxmt145KUlER0dDS+vr4A+Pj4kJOTA0BOTg7e3t6ArEiaSh1kZ2djMpl+s99sNje7v+H4M42hUCgUlwMLd2QR99pK2jy9lLjXVrJwx/kLSSYnJxMeHk5oaCj29vaMHj26MZTQGrS6cVm6dCmjR9tE4uLj41m4cCEACxcuZNiwYSft1zSNnTt34ubmhr+/PwMHDmTdunUUFxdTXFzMunXrGDhwIP7+/ri6urJz5040TWv2XKeOoVAoFJc6C3dkMXP+brKKKtGArKJKZs7ffd4G5nQhhtaiVbXFKioq+PXXX3nxxRcb902bNo1HHnmEefPmERQUxLvvvgvAkCFDWLNmDQkJCTg5OfHqq68C4OnpyQMPPMCkSZMAmD59Op6engC88MILzJw5k6qqKgYPHszgwYPPOIZCoVBc6ryx7CCVtSerElfWWnhj2UHGdQ++SLM6d1rVuDg7O7Np06aT9nl5efH555//5lidTscLL7zQ7HkmTZrUaFya0rVrV5YsWfKb/acbQ6FQKC51jhc1r1R8uv1ny+lCDK2FqtBXKBSKS4ggz+b7r5xu/9nStWtX0tLSyMjIoKamhqVLlxIfH39e5zwTyrgoFArFJcSTIzriZHdym2YnOwNPjuh4Xuc1Go3MmjWLu+++m1GjRjFy5Ejat29/Xuc843itdmaFQqFQnDMNcZU3lh3keFElQZ5OPDmiY4vEW4YMGcKQIUPO+zxngzIuCoVCcYkxrnvwZRW8bw7lFlMoFApFi6OMi0KhUChaHGVcFAqFQtHiKOOiUCgUihZHGReFQqFQtDjKuCgUCsWfgJkzZ9K/f//G9ietjTIuCoVC8SdgwoQJfPLJJxdsPGVcFArFuVFXC1brxZ7FlU3yd/BOF5jtKf8mf3fep+zduzceHh7nP7ezRBkXhUJx9hxOhI/6wxcTwbz/Ys/myiT5O/hhBhRnAJr8+8OMFjEwFxJlXBQKxdlRVws/PQX5KXB0JexbcLFndGWS+CLUnqKAXFsp+y8jlHFRKM6GkixITYKizIs9k4uH3gAeobbXjhfOxfKnovg0f2On23+JorTFFIqmmPdC3iEI6AY+bWVfSRZ8cwsc3w5+UXDTt+AVfubzWK1gqQU7h9af84VCr4cRr8Le+eDoCd1uuNgzujLxCKl3iTWz/zJCrVwUigZy9sN/roW5d8BXU2xPinkpYlgAcvdD7oEzn6cgFb67Bf7ZG/YtbtUpX3BMURD/LAyYDi4+F3s2VybDZoHdKb1b7Jxk/3nw2GOPccMNN5CamsrgwYOZO3fueZ3v92jVlUtJSQnPPfcchw4dQqfT8eqrr9KmTRseffRRsrKyCA4O5t1338XDwwNN03jllVdYs2YNjo6OvPbaa0RHRwOwYMECPvroIwDuv/9+xo8fD8CePXsa2xwPGTKEZ599Fp1OR1FRUbNjKBRnJP8wVBbUb6dA0TF5WvRuC74dIe8geIaDz+/0wDiwVH4AFj8I4XHqRqw4e7pdL/8mvigPOB4hYlga9v9B3n777RaY3NnTqiuXV155hUGDBvHzzz+zaNEi2rZty5w5c+jfvz/Lly+
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"has Porch\", s=15, data=df);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Neighborhoods\n",
"\n",
"The instructors' notes say:\n",
"\n",
"> For instructors who cover nominal variables in their class, I would suggest incorporating the neighborhood variable into their models by converting it to a set of dummy (indicator) variables. I have found that the coefficients for the continuous variables tend to have values with more realistic interpretations when used in conjunction with the neighborhood variable.\n",
"\n",
"Indeed, plotting the price distributions by neighborhood reveals significant differences in the price level."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 18,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAoYAAAIgCAYAAAAYz6iKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAADSkElEQVR4nOzdeVhUZfsH8O8giyuIBJhm9rr3qmmrqYWvGKipRW7ZqJlkbmi5Ye5L7luZWuZSairmCi4ooJAi7ruhaCqKuAA6DJvszPP7A+f8GBlgmBmYAb6f6+rKc86cc+4ZZrnPfZ5FJoQQICIiIqIKz8LUARARERGReWBiSEREREQAmBgSERER0XNMDImIiIgIABNDIiIiInqOiSERERERAWBiSFSmDBw4EE2bNsWePXtMHYpRPXjwAE2bNkXTpk1NHYpZKanXZc+ePWjatCkGDhxY7H3PnDmDpk2bwtXV1agxGUt5/IyY+2tO5YulqQMgKk8mTZoEX1/ffOurVauGevXqoV27dhg0aBBq165tguiouAYOHIizZ88CANzc3LBq1aoCH/vFF1/g4sWLGDVqFEaPHl1aIRIRGRUrhkQlwMrKCi+99BJeeuklODg4IDU1FTdu3MAff/yBHj164Pz583od9+WXX8Z//vMf1KhRw8gRU1EOHz6M8PDwUj2nlZUV/vOf/+A///lPqZ6XiCouVgyJSsCbb76JzZs3S8tpaWkIDAzEvHnzkJSUhDFjxuDIkSOoXLlysY67ePFiY4dKxbB8+XKsX7++1M7n7OyMgICAUjsfERErhkSloEqVKvDw8MDUqVMBAE+ePMGRI0dMHBXp6sMPP4RMJsPx48f1rvYSEZUFTAyJStHHH38MC4vcj921a9ek9XkbzCclJWHJkiXo0qULWrVqhXfeeUfr47QRQuDgwYMYOnQo2rdvjxYtWuDDDz9E//79sXHjRiiVSq37nT9/HmPHjoWLiwtatGiBNm3a4KuvvsKBAwdQ0HTq0dHRmDlzJjp37ow33ngDrVq1QseOHTFw4ECsWbMG8fHx+r5MuHDhAoYNG4b3338frVq1wqeffootW7ZApVJpPO7Bgwdo1qwZmjZtin///bfA4z179gxvvvkmmjZtirCwsGLH06xZM3Tp0gUA8PPPPxd7f7Xivs66dD75+++/MXDgQLz99tt466230LdvX6mdq64dMUJCQjBw4EC88847ePPNN9G3b18cOHBAp+ek3vfdd9/Fm2++ic8//xz79+8vdB+VSoWdO3diwIABeO+999CyZUu4urpi+vTpiIqK0rrPix0wjh07hiFDhqBt27Zo1qwZNm7cmG+f9PR0rFy5UnqPtm3bFmPHjsW9e/cKje/69euYMGECOnToIP2dvv76awQGBhb5egQFBeHrr7/G+++/jxYtWsDFxQXjx4/X+Lxrk5ycjEWLFsHV1RUtW7ZEhw4dMG3aNMTExBR5TiJj4q1kolJkbW0Ne3t7KBQKpKSk5NseHx+Pnj17Ijo6GtbW1rCystL52MnJyfj2229x8uRJAIBMJoOtrS0SExNx/vx5nD9/Hra2tujZs6fGfkuWLNG4PVq9enUkJibi1KlTOHXqFEJCQrB06VIpoQVyk9qBAwfi2bNnAHLbwlWpUgWPHj3Co0ePcPbsWbz++utwcXEp1usDAIGBgRg3bhyys7Nha2uL7Oxs3LhxA3PmzMGpU6fw888/w9Iy96vrlVdeQbt27XDixAns2bMHkyZN0nrMgwcPIjU1FXXq1EG7du2KHRMAjB49GkFBQTh79ixOnDiB9u3bF2t/fV7novz6669SoiqTyVCjRg38888/uHLlCiIiInQ6xi+//IIVK1bAwsIC1apVQ2pqKq5cuYLx48fj6dOn+Oqrrwrcd+PGjViwYIF07vT0dFy+fBmXL1/GpUuXMGPGjHz7pKWlYdSoUVKCbmVlhcqVK+Phw4fYsWMH9u7dix9//BEfffRRgef9448/sGjRIum82l6zlJQUfPHFF7h+/Tqsra1hYWGB+Ph4HDx4ECdPnsTOnTvx6quv5ttv+/btmDVrlnQRYmtri+TkZISFhSEsLAyffPIJFi5ciEqVKmnsp1KpMHnyZPj5+QEAKlWqhGrVqiE2NhYHDhzAwYMHMX36dMjl8nznjIuLw4ABA6Sk2MbGBklJSdi5cyeCg4Mxbty4Al8LImNjxZCoFKWnp0uVNG0dSH755RdkZ2dj3bp1uHLlCi5evIjdu3frdOwJEybg5MmTqFy5MqZOnYqzZ8/i7NmzuHLlCg4ePAgvLy/Y2tpq7LNp0yasX78eL730EubMmYPz58/jwoULuHz5Mn766Sc4OjrC398f69at09hv0aJFePbsGVq1agVfX1+Eh4fj3LlzuHz5Mnbt2oVBgwbp3UFm6tSpaNu2LY4cOYJz587h3Llz8Pb2hoWFBY4cOZKvjV/v3r0BAPv27UN2drbWY6orZh4eHsVKvPJq2LAhPvnkEwC5bQ2LQ9/XuTDqJBkAevbsiRMnTuDcuXM4e/YsRowYgU2bNhWZHEZEROCXX37Bd999hzNnzuD8+fM4ceIEOnfuDAD48ccfkZCQoHXf+Ph4LF26FB4eHggLC8O5c+dw+vRpeHp6AgC2bt2qtXK4YMEChIWFwdraGrNnz8bFixdx/vx5BAQE4L333kNGRgYmTJiAu3fvaj3v06dPsXTpUsjlcum8ly5dkiq6aitXrkRiYiLWr18vJapbt25F7dq1kZCQgGXLluU79sWLF6WksHPnzjh27Jj0HhwzZgxkMhn27duHNWvW5Nt3/fr18PPzg0wmw3fffYezZ8/i3LlzCA0NRZcuXaBSqTBnzhycO3cu376TJk1CVFQU7O3t8euvv2rEW716dSxatEjra0FUIgQRGc33338vmjRpIgYMGKB1++bNm0WTJk1EkyZNREBAgLR+wIABokmTJqJ58+bi5s2bBR5f/bjdu3drrD969Kho0qSJaNq0qTh27JhOsSYmJorWrVuLli1bioiICK2PuXjxomjatKl49913RUZGhrT+jTfeEE2aNBGXL1/W6VxFiY6Oll6Xbt26aZxLbcWKFaJJkybirbfeEqmpqdL6jIwM0aZNG9GkSRNx+PDhfPtFRkZKr839+/eLFZf69V6yZIkQQoj79++L5s2baz1Xv379RJMmTcSKFSs01hvyOud9XV7Uv39/0aRJE+Hp6SlUKlW+7TNnzpT2ffH9snv3bmnbr7/+mm/ftLQ08f7774smTZoIX19fjW2nT5+W9h08eLDWc6s/B25ubhrbo6OjRbNmzUSTJk3Etm3b8u2XmpoqPvroI9GkSRPh7e1d4HnHjRuXb1819d/sjTfeEPfu3cu3PSAgQDRp0kS0aNEi3/vsyy+/FE2aNBH9+vUT2dnZ+fZdtmyZaNKkiWjdurVITk6W1qekpIi33npLNGnSRCxdujTfftnZ2eKLL74QTZo0EXK5XGPbuXPnpOd16tSpfPveu3dPtGjRQjRp0kR07NixwOdNZCysGBKVMCEEHjx4gN9//x1LliwBANStWxcdO3bM99gPP/wQTZo0KfY51LevPvjgA51v3wYGBiI1NRXt2rVDs2bNtD7mzTffxCuvvILExESNNlLVq1cHkNuJxtgGDx4Ma2trrettbGyQkpKCEydOSOutra3h4eEBAFqrq+pq4XvvvYd69eoZFFu9evXQq1cvALltDUUB7S/zMuR1Lkh8fLxUeRoyZAhkMlm+x3zzzTdFHsfGxgaDBg3Kt75y5cr44IMPAKDQtptDhw7Veu7hw4cDAKKionDjxg1p/ZEjR6BSqeDo6Ig+ffrk269KlSoYMmQIgNzhgXJycrSe9+uvvy7kWeXq3Lkz6tevn2+9q6srZDIZMjMzcf/+fWl9QkICzpw5AwAYNmxYvlvFQO5ramNjg9TUVBw7dkxaf/LkSaSkpMDKykqKP69KlSph5MiRAHLbmeb93Kh7nbdu3Rrvv/9+vn3r16+Pjz/+uMjnS2QsTAyJSsDZs2elTgPNmjVDp06dsHjxYqSnp8PR0RG//PKL1uTnzTff1Ot8V65cAQB06NB
"text/plain": [
"<Figure size 720x576 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"_, ax = plt.subplots(figsize=(10, 8))\n",
"sns.boxplot(x=\"Neighborhood\", y=\"SalePrice\", data=df, ax=ax)\n",
"ax.set_title(\"Prices by Neighborhood\", fontsize=24)\n",
"ax.set_xlabel(\"Neighborhood\", fontsize=18)\n",
"ax.tick_params(axis='x', labelrotation = 45)\n",
2021-05-25 08:22:14 +02:00
"ax.set_ylabel(\"House Price\", fontsize=18);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The 28 neighborhoods are encoded as factor variables."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 19,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"neighborhood = pd.get_dummies(df[\"Neighborhood\"], prefix=\"nhood\", dtype=int)\n",
2021-05-25 08:22:14 +02:00
"df = pd.concat([df, neighborhood], axis=1)\n",
"del df[\"Neighborhood\"]"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 20,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"new_variables.extend(neighborhood.columns)"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 21,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(2898, 28)"
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 21,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[neighborhood.columns].shape"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 22,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>nhood_Blmngtn</th>\n",
" <th>nhood_Blueste</th>\n",
" <th>nhood_BrDale</th>\n",
" <th>nhood_BrkSide</th>\n",
" <th>nhood_ClearCr</th>\n",
" <th>nhood_CollgCr</th>\n",
" <th>nhood_Crawfor</th>\n",
" <th>nhood_Edwards</th>\n",
" <th>nhood_Gilbert</th>\n",
" <th>nhood_Greens</th>\n",
" <th>nhood_GrnHill</th>\n",
" <th>nhood_IDOTRR</th>\n",
" <th>nhood_Landmrk</th>\n",
" <th>nhood_MeadowV</th>\n",
" <th>nhood_Mitchel</th>\n",
" <th>nhood_Names</th>\n",
" <th>nhood_NoRidge</th>\n",
" <th>nhood_NPkVill</th>\n",
" <th>nhood_NridgHt</th>\n",
" <th>nhood_NWAmes</th>\n",
" <th>nhood_OldTown</th>\n",
" <th>nhood_SWISU</th>\n",
" <th>nhood_Sawyer</th>\n",
" <th>nhood_SawyerW</th>\n",
" <th>nhood_Somerst</th>\n",
" <th>nhood_StoneBr</th>\n",
" <th>nhood_Timber</th>\n",
" <th>nhood_Veenker</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" nhood_Blmngtn nhood_Blueste nhood_BrDale nhood_BrkSide \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_ClearCr nhood_CollgCr nhood_Crawfor nhood_Edwards \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_Gilbert nhood_Greens nhood_GrnHill nhood_IDOTRR \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 1 0 0 0 \n",
"\n",
" nhood_Landmrk nhood_MeadowV nhood_Mitchel nhood_Names \\\n",
"Order PID \n",
"1 526301100 0 0 0 1 \n",
"2 526350040 0 0 0 1 \n",
"3 526351010 0 0 0 1 \n",
"4 526353030 0 0 0 1 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_NoRidge nhood_NPkVill nhood_NridgHt nhood_NWAmes \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_OldTown nhood_SWISU nhood_Sawyer nhood_SawyerW \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_Somerst nhood_StoneBr nhood_Timber nhood_Veenker \n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 "
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 22,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[neighborhood.columns].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Nominal Features\n",
"\n",
"This section investigates the rest of the nominal variables with regard to which realizations / encoding might be a useful predictor."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 23,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Alley Type of alley access to property\n",
"Bldg Type Type of dwelling\n",
"Central Air Central air conditioning\n",
"Condition 1 Proximity to various conditions\n",
"Condition 2 Proximity to various conditions (if more than one is present)\n",
"Exterior 1st Exterior covering on house\n",
"Exterior 2nd Exterior covering on house (if more than one material)\n",
"Foundation Type of foundation\n",
"Garage Type Garage location\n",
"Heating Type of heating\n",
"House Style Style of dwelling\n",
"Land Contour Flatness of the property\n",
"Lot Config Lot configuration\n",
"MS SubClass Identifies the type of dwelling involved in the sale.\n",
"MS Zoning Identifies the general zoning classification of the sale.\n",
"Mas Vnr Type Masonry veneer type\n",
"Misc Feature Miscellaneous feature not covered in other categories\n",
"Roof Matl Roof material\n",
"Roof Style Type of roof\n",
"Sale Condition Condition of sale\n",
"Sale Type Type of sale\n",
"Street Type of road access to property\n"
]
}
],
"source": [
"print_column_list(set(NOMINAL_VARIABLES) - set([\"Neighborhood\"]))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Alleys\n",
"\n",
"Almost no house has access to an alley."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 24,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEGCAYAAACpXNjrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAACl2klEQVR4nOzdd3iT5frA8W/SJG26d7p3yyiFUmahUCi07I2KGzyKR3ArjiOOo6jnuEX0KA6cqGwZMltW2aOslrZ073SmaTqyf3/kR7CypQXR93NdXrZv3rzP06T0zvuM+xaZzWYzAoFAIBB0IPGN7oBAIBAI/nqE4CIQCASCDicEF4FAIBB0OCG4CAQCgaDDCcFFIBAIBB1OcqM78GcxYMAA/P39b3Q3BAKB4KZSXl7OgQMHzjsuBJf/5+/vz6pVq250NwQCgeCmMnXq1AseF4bFBAKBQNDhhOAiEAgEgg4nBBeBQCAQdDhhzkUgEAj+IL1eT1lZGW1tbTe6K53Ozs6OgIAApFLpFZ0vBBeBQCD4g8rKynByciIkJASRSHSju9NpzGYzdXV1lJWVERoaekXPEYbFBAKB4A9qa2vDw8PjLx1YAEQiER4eHld1hybcuQgEgr8Es9nM7rLdFDYW0tenL9Ge0del3b96YDnran9OIbgIBIK/hENVh3g47WHMmPGSe7F03FJ8HHxudLf+toRhMYFA8JdQ3VKNGUt5qprWGhq1jRc9989axmrbtm106dKF/Px8wDKnM378eAAOHDjAgw8+eCO7d1WE4CIQCP4S+vn0Y4j/ECQiCQ/2fJBQl/MnnvUmPT9k/cCUtVP43/H/0apvvQE9vbj169fTp08fNmzYcKO7cs2E4CIQCP4SFA4K3k18l83TN/NQr4eQ2cjOOyerLov/HPoP+ap8Pjn2CYerD9+Anl5Yc3MzR44c4fXXX79scGlpaeH5559n+vTpTJ48mW3btgFw5513cvr0aet5t99+O9nZ2Z3a74sRgotAIPjLkEvleNt7YyO2ueDjtmJbJKJzU81yG/n16tplpaamMmTIEEJDQ3Fzc+PUqVMXPffTTz9l4MCBrFixgm+//Za3336blpYWpk+fbs2RWFhYiFarpWvXrtfrR2hHCC4CgeBvo6tHV94d9i53d7+b/wz5D3GKuBvdJasNGzYwbtw4AMaOHXvJu5f09HQ+//xzJk2axN13341Wq6WyspLRo0ezY8cO9Ho9K1euvGhSyetBWC0mEAj+VpKCkkgKSrrR3WhHpVKxf/9+cnNzEYlEGI1GRCIRd9xxx0Wfs3DhQsLCws47PmjQIFJTU9m4ceMNzfQu3LkIBALBDbZ582YmTZrE9u3bSUtLY+fOnQQEBFBVVXXB8xMSEvj++++tq96ysrKsj91yyy0sWLCAmJgYXFxcrkv/L0QILgKBQHCDrV+/npEjR7Y7lpKSwmeffXbB8+fMmYPBYGDixImMGzeODz/80PpYjx49cHR0vKFDYiAMiwkEAsEN991335137J577uGee+6xfj9gwAAGDBgAWJJIvvrqqxe8llKpxGw2k5CQ0DmdvULCnYtAIBD8RaxZs4Zbb72Vxx9/HLH4xv55F+5cBAKB4C9i8uTJTJ48+UZ3AxDuXAQCgUDQCTotuBQUFDBp0iTrf3FxcXz99deoVCpmzZpFSkoKs2bNorHRkv/HbDazYMECkpOTmTBhApmZmdZrrV69mpSUFFJSUli9erX1+KlTp5gwYQLJycksWLDAunLiYm0IBAKB4ProtOASFhbGL7/8wi+//MKqVauQy+UkJyezePFi4uPj2bJlC/Hx8SxevBiAXbt2UVRUxJYtW3jttdd45ZVXAEugWLRoEcuWLWP58uUsWrTIGixeeeUVXnvtNbZs2UJRURG7du0CuGgbAoFAILg+rsuw2L59+wgMDMTf35/U1FTrmOBvc+KcPS4SiYiNjUWtVlNdXU16ejqDBw/G1dUVFxcXBg8ezO7du6murkaj0RAbG4tIJGLy5Mmkpqa2u9bv2xAIBALB9XFdgsuGDRusaaPr6urw9vYGwMvLi7q6OsCyfM7H51ztBR8fH5RK5XnHFQrFBY+fPf9SbQgEAsFfTW1tLU899RQjRoxg6tSp3HbbbWzduvWKn99Zqfw7fbWYTqcjLS2Np5566rzHRCJRp1dxux5tCAQCwY1gNpuZO3cukydP5t133wWgvLyctLS0ducZDAYkkuu7OLjTW9u1axfR0dF4enoC4OHhQXV1Nd7e3lRXV+Pu7g5Y7kh+m+qgqqoKhUKBQqHg4MGD1uNKpZL+/ftf9PxLtSEQCAQ30pqMct7enEOFqhU/VznzRnVhcm//P3y9/fv3I5VKuf32263H/P39ufvuu1m1ahVbtmyhpaUFk8mEl5cXkyZNYtiwYQA899xzDBs2DDc3t2v9sS6o04fFfpvpEyApKYk1a9YAlg0/I0aMaHfcbDZz7NgxnJyc8Pb2JiEhgfT0dBobG2lsbCQ9PZ2EhAS8vb1xdHTk2LFjmM3mC17r920IBALBjbImo5znV52kXNWKGShXtfL8qpOsySj/w9c8c+YM3bt3v+jjWVlZLFy4kO+//56xY8eyceNGwDKitG/fPmug6QydGlxaWlrYu3cvKSkp1mOzZ89mz549pKSksHfvXmbPng1AYmIigYGBJCcn8+KLL/Lyyy8D4Orqypw5c5g+fTrTp09n7ty5uLq6AvDyyy8zf/58kpOTCQoKYujQoZdsQyAQCG6Utzfn0Ko3tjvWqjfy9uacDmvj3//+NxMnTmTatGkA1sVQAEOHDuXAgQPodDp27dpF3759sbOz67C2f69Th8Xs7e05cOBAu2Nubm588803550rEomsAeX3zgaW34uJiWH9+vXnHb9YGwKBQHCjVKguXFL5YsevRGRkJFu2bLF+//LLL1NfX2/9eymXnyuGZmtrS//+/dm9ezcbN25k7Nixf7jdKyHs0BcIBILrwM/1wlUvL3b8SgwcOBCtVsvSpUutx9ra2i56/tixY1m1ahWHDx9myJAhf7jdKyEEF4FAILgO5o3qglzavvyyXGrDvFFd/vA1RSIRH3/8MYcOHSIpKYnp06fz7LPP8vTTT1/w/MGDB3Po0CEGDRqETCb7w+1eCSFxpUAgEFwHZ1eFdeRqMQBvb2/ef//9Cz72+5ouUqm03epbaJ/KvyMJwUUgEAiuk8m9/a85mNwshGExgUAgEHQ4IbgIBAKBoMMJwUUgEAgEHU4ILgKBQCDocEJwEQgEAkGHE1aLCQQCwU2sW7duREVFYTQaCQsL47///W+7nfk3inDnIhAIBDcxOzs7fvnlF9avX49UKuWnn3660V0ChOAiEAiukt6ox2Q23ehu3JxOLIP3e8Arrpb/n1jWoZfv27cvxcXFpKWlccsttzB58mRmzpxJbW0tJpOJpKQk1Gq19fyUlBRqa2upr6/nkUceYdq0aUybNo0jR45cc1+E4CIQCK7YnvI9TFs7jX9u/Sd5DXk3ujs3lxPLYN2j0FgKmC3/X/dohwUYg8HArl27iIqKok+fPixbtow1a9Ywbtw4vvjiC8RiMUlJSdYqlcePH8fPzw9PT09ef/117r33XlauXMlHH33E/Pnzr7k/wpyLQCC4Inqjnv8e/C+F6kIK1YVsKd5ChFvEje7WzSP1VdD/LgOyvtVyvOetf/iybW1tTJo0CbDcuUyfPp3CwkKeeOIJampq0Ol0BAQEAJbElR9//DHTpk1jw4YN1szIe/fuJS/v3IcFjUZDc3MzDg4Of7hfQnARCK6AsllJkbqIIKcgfB19b3R3bggbsQ0+Dj4UqgsBcJQ53uAe3WQay67u+BU6O+fyWwsWLGDmzJmMGDGCAwcOsGjRIgB69+5NSUkJ9fX1bNu2jYceeggAk8nEsmXLsLW1vaa+/JYwLCYQ/MaZhjNsLtpMsbrYekzZrOSx7Y9x/5b7mZM6h/Kmy1cONJlN6Iy6zuzqdScWiXmm3zM82PNB5vWbx4SwCTe6SzcXl4CrO34NmpqarGXfz1blBUsW5ZEjR/Lmm28SHh5uLXGckJDAd999Zz3v9OnT19w
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Alley\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 25,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"Alley\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Building Type\n",
"\n",
"The type of a building clearly affects the valuation. The two types of townhouses as well as the 2-family condo and duplex type are summarized into a single category. This makes sense a) semantically, and b) by looking at the two sub-clusters in the scatter plot."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 26,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEGCAYAAACpXNjrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAACr3ElEQVR4nOydd3iUVfbHP9NnkkySSZlJJb2REHoJXXoRQYq9wNrL2ntB17ara1t115+sva6CFBEVFJCA9BoCoaT3TOpMptffHwMDSEBKAqLv53l8nNz3vvfemZA5773nnO8Reb1eLwICAgICAp2I+HwvQEBAQEDgj4dgXAQEBAQEOh3BuAgICAgIdDqCcREQEBAQ6HQE4yIgICAg0OlIz/cCfi8MHDiQ2NjY870MAQEBgQuKmpoaNm3adFy7YFwOERsby8KFC8/3MgQEBAQuKKZPn95hu3AsJiAgICDQ6QjGRUBAQECg0xGMi4CAgIBApyP4XAQEBP6UOJ1Oqqursdls53spFwRKpZK4uDhkMtkp9ReMi4CAwJ+S6upq1Go1iYmJiESi872c3zVer5fm5maqq6tJSko6pXuEYzEBAYE/JTabjfDwcMGwnAIikYjw8PDT2uUJOxcBAYE/BF6vl9X79RTrTQxKCic3PvQ37xEMy6lzup+VYFwEBAT+EGwobeaGj7bi9YJWrWDJHUOIDlWd72X9aRGOxQQEBP4Q6I02Dlen0rfbabM4T9j3bMtYZWVlMXXqVC655BIuvfRStm/fDvj8OBdffHGH91x77bXs3r37lMb/+uuvmTp1KlOnTiUnJ4cpU6YwdepUXn755bNa97lE2LkICAj8IRiUHM5FGZGsPdjE7SNTSNYGHtfH6fbw6cYKPt9UyXMXaXB7vEjEp380plQqWbJkCQBr167l1Vdf5dNPPz3r93CYGTNmMGPGDABGjRrFRx99RFhYWKeNfy4QjIuAgMAfgqgQFf+5ug9Gq4sItaJDo7G72sDflu4FwGhVY3G4UCtPLbT2RJhMJoKDg49rt9lsPProo+zbt4/k5ORjnOHz58/n3XffRa1Wk5mZiVwuZ+7cuSedZ8GCBezfv5/HH38cgK+++ori4mKuu+46brzxRrKzs9m7dy9paWm8+OKLqFQqCgsL+cc//oHFYkGj0fD3v/8drVZ7Vu/3VBGOxQQEBP4wqORSdCHKE+5GlDIx0qOunalD32azMXXqVCZMmMATTzzB7bffflyfL774AqVSyffff89f//pX9uzZA0BDQwNvv/02X375JV988QWlpaWnNOfEiRNZvXo1TqfvuG/hwoX+3U1ZWRlXXXUV33//PYGBgXz++ec4nU6ee+453njjDX/f11577Yze75kg7FwEBAT+NHSPCeE/V/dhU2kLYYFuAuWSMxrn6GOxHTt28PDDD/Ptt98e02fLli1ce+21AGRmZpKRkQHA7t276d+/P6GhoQBMmDCB8vLy35wzMDCQQYMG8fPPP5OcnIzT6SQjI4Pq6mqio6Pp27cvAJdccgmffPIJw4YN48CBA8yZMwcAj8dDZGTkGb3fM0EwLgICAn8qxmVHMS47iqKiok4JRe7duzetra20tLR0wupOzqxZs/i///s/kpOTj1Ej/vX7EIlEeL1e0tLS+PLLL7t8XR0hHIsJCAgInAUlJSW43W7/TuQw/fv39+9mDhw4wP79+wHo0aMHW7ZswWAw4HK5WLFixSnP1bNnT+rr6/n222+PiUqrra1lx44dAHz77bf07duXpKQkWlpa/O1Op5ODBw+ezVs9LYSdi4CAgMBpctjnAr6w5hdffBGJ5NgjtiuvvJJHH32UiRMnkpKSQnZ2NgA6nY5bbrmFWbNmERISQnJyMmq1+pTnnjhxIkVFRYSEhPjbkpKS+Oyzz3jsscdITU3lyiuvRC6X88Ybb/Dcc8/R3t6O2+3m+uuvJy0trRM+gd9GMC4CAgICp0lRUVGH7XFxcf7dilKpPKED/eKLL+byyy/H5XJx5513MmbMmBPOtWrVqmN+3rZtG7Nnzz6mTSqVdpgDk5WVxWeffXayt9JlCMZFQEBA4Bzz1ltvsX79eux2O0OHDj2pcTmM0Whk1qxZZGRkkJeXdw5WeXYIxkVAQEDgHPPwww+f9j3BwcEsX778uPajd0u/JwSHvoCAgIBAp9NlxqW0tNSvjTN16lT69OnDhx9+SFtbG3PmzGHcuHHMmTMHg8EA+Jxizz33HGPHjmXKlCn+hCOARYsWMW7cOMaNG8eiRYv87YWFhUyZMoWxY8fy3HPP+fWCTjSHgICAgMC5ocuMS3JyMkuWLGHJkiUsXLgQlUrF2LFjmTdvHnl5eaxYsYK8vDzmzZsHQH5+PuXl5axYsYJnn32Wp59+GvAZirfeeouvvvqK+fPn89Zbb/mNxdNPP82zzz7LihUrKC8vJz8/H+CEcwgICAgInBvOybHYhg0biI+PJzY2lpUrVzJt2jQApk2bxk8//QTgbxeJRPTq1Quj0Yher2fdunUMGTKE0NBQQkJCGDJkCGvXrkWv12MymejVqxcikYhp06axcuXKY8b69RwCAgICAueGc+LQX7ZsmT/hp7m52S+cFhkZSXNzM+DT24mKivLfExUVRUNDw3HtOp2uw/bD/U82h4CAgMDviUcffZSff/6Z8PBwv1P+kUceYfPmzf7clxkzZnDdddedz2WeEV1uXBwOB6tWreL+++8/7ppIJOrySnDnYg4BAQGBM2H69Olcc801x0WPPfTQQ0yYMOE8rapz6HLjkp+fT3Z2NhEREQCEh4ej1+vRarXo9Xp/jQKdTkd9fb3/vvr6enQ6HTqdjs2bN/vbGxoaGDBgwAn7n2wOAQEBgTNl8Y4a/rl8P7VtVmJCVTw4PoNpvWPPasz+/ftTXV39m/3eeustVq9ejd1up3fv3jzzzDOIRCKuvfZasrKy2Lp1K1arlRdffJF58+Zx4MABJk6cyL333ntW6zsbutznsmzZMiZPnuz/edSoUSxevBiAxYsXM3r06GPavV4vO3fuRK1Wo9VqGTp0KOvWrcNgMGAwGFi3bh1Dhw5Fq9USFBTEzp078Xq9HY716zkEBAQEzoTFO2p4dOFuatqseIGaNiuPLtzN4h01XTLfSy+95I+03b9/P9dccw1ff/013377LTabjdWrV/v7ymQyFi5cyBVXXMHtt9/O3Llz+fbbb1m0aBGtra1dsr5ToUt3LhaLhfXr1/PMM8/4226++WbuueceFixYQExMDK+//joAI0aMYM2aNYwdOxaVSsULL7wAQGhoKLfffjszZ84E4I477vALxD311FM8+uij2Gw2hg8fzvDhw086h4CAgMCZ8M/l+7E63ce0WZ1u/rl8/1nvXjri18diy5cv591338Vms9HW1kZaWhqjRo0C8P8/PT2dtLQ0v785Pj6e+vp6NBpNp6/vVOhS4xIQEMCmTZuOadNoNHz00UfH9RWJRDz11FMdjjNz5ky/cTmaHj16dJiZeqI5BAQEBM6E2jbrabV3Jna7nb/97W98/fXXREdH8+abb2K32/3X5XI5AGKx2P/68M8ul6vL13cihAx9AQEBgd8gJlR1Wu2dyWFDotFoMJvNHUrA/B4RtMUEBAQEfoMHx2fw6MLdxxyNqWQSHhyfcVbj3nfffWzevJnW1laGDx/OX//61+P6BAcHM2vWLC6++GIiIiLo0aPHWc15rhB5D2um/MmZPn06CxcuPN/LEBAQOEcUFRWRlZV1yv27IlrsQqOjz+xE353CzkVAQEDgFJjWO/ZPZ0zOBsHnIiAgICDQ6QjGRUBAQECg0xGMi4CAgIBApyMYFwEBAQGBTkcwLgICAgICnY4QLSYgICBwHqirq+Ohhx6iubkZkUjEZZddxvXXX3/WkvsFBQW8+OKLNDU1oVKpyM7O5oknnkCl6vqEz6MRjIuAgIDAeUAikfDII4+QnZ2NyWRixowZDBkyBDhzyf2mpibuvvtuXn31VXr37g3ADz/8gNlsFoyLgIDA7xuny4NELEIs/pPVSSr4ClY+A4ZqCImD0XMh97IzHk6r1fpFJoOCgkhOTvYXPOyI3r1
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Bldg Type\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 27,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"# Unify the two townhouse types into one.\n",
"df[\"Bldg Type\"] = df[\"Bldg Type\"].apply(\n",
" lambda x: \"Twnhs\" if x in (\"TwnhsE\", \"TwnhsI\") else x\n",
")\n",
"# Unify the two kinds of 2-family homes.\n",
"df[\"Bldg Type\"] = df[\"Bldg Type\"].apply(\n",
" lambda x: \"2Fam\" if x in (\"2FmCon\", \"Duplx\") else x\n",
")"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 28,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"build_type = pd.get_dummies(df[\"Bldg Type\"], prefix=\"build_type\", dtype=int)\n",
2021-05-25 08:22:14 +02:00
"df = pd.concat([df, build_type], axis=1)\n",
"del df[\"Bldg Type\"]"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 29,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"new_variables.extend(build_type.columns)"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 30,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>build_type_1Fam</th>\n",
" <th>build_type_2Fam</th>\n",
" <th>build_type_Twnhs</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" build_type_1Fam build_type_2Fam build_type_Twnhs\n",
"Order PID \n",
"1 526301100 1 0 0\n",
"2 526350040 1 0 0\n",
"3 526351010 1 0 0\n",
"4 526353030 1 0 0\n",
"5 527105010 1 0 0"
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 30,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[build_type.columns].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Air Conditioning\n",
"\n",
"Air conditioning clearly increases the valuation (\"steeper\" regression line with respect to the overall living area)."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 31,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEGCAYAAACpXNjrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAACKh0lEQVR4nO2dd1hUZ9qH76H3KgxVEMSGBXvBkqBg7xpjupvETWJiTNtNVVN3N724yRfTNrtpxp5ojAUr9o5dUVFAGHrvM+f74wEGFbGB9b2vi8szZ04d8Pzm6TpN0zQUCoVCoWhALK73BSgUCoXi1kOJi0KhUCgaHCUuCoVCoWhwlLgoFAqFosFR4qJQKBSKBsfqel/AjUL37t3x9/e/3pehUCgUNxUpKSls3br1vPVKXKrw9/dnwYIF1/syFAqF4qZizJgxda5XbjGFQqFQNDhKXBQKhULR4ChxUSgUCkWDo2IuCoVCcZlUVFSQnJxMaWnp9b6Ua4adnR0BAQFYW1tf0vZKXBQKheIySU5OxtnZmeDgYHQ63fW+nEZH0zSysrJITk6mWbNml7SPcospFArFZVJaWoqnp+dtISwAOp0OT0/Py7LUlOWiUChuDTQNjq2AjKMQ3Bv8Ozbq6W4XYanmcu9XiYtCobg1SNwAP08QkXHygUdXg6sqjL5eKLeYQqG4NShIFWEBKEyDkuwLb9sIY6wyMjJ45plnGDBgAGPGjOHRRx/l5MmTV3SsBQsWYDAYLnu/zz77jG+++eaC748cOZJnnnnmrHWffPIJmzZtuuxzXQwlLgqF4tYguDc0jwELK+j7N2jS4vxtjBWw5Qv4vAes/ReUFTXIqTVN48knn6Rbt26sWrWKBQsW8Nxzz5GVlXVFx1u4cCHp6el1vmc0Gq/omMePH8dkMrFjxw6Ki4tr1j/99NP06tWrwc5TjXKLKRSKWwMXf7jreyjNAydvsLA8f5szu+HPF2V57Tvg3wnCoq/61Fu2bMHKyoqJEyfWrGvVqlXN8tdff82yZcsoLy8nOjqaqVOnkpyczKOPPkrnzp3ZvXs3er2ezz//nLVr17J//36ef/557OzsmDNnDkOGDGHw4MFs2rSJRx55hKKiIubMmUNFRQVBQUG8++672Nvb13uNS5YsYcSIEZw4cYLY2FiGDx8OwIsvvsgdd9zBoEGDiIqKOus8Q4cOveLPRFkuCoXi1sHGAVx86xYWACs7sWyqsa7/gXypHDt2jPDw8Drfi4uL49SpU8ybN4/Fixdz4MABtm/fDsCpU6e49957Wbp0Kc7OzixfvpxBgwbRtm1b3n//fRYvXoydnR0Abm5uLFy4kKFDhxIdHc38+fP57bffCAkJYd68eRe9xj/++IOhQ4cydOhQli5desHtap/nalCWi0KhuH3wbQ93/RcS48CvEzQ93x3U0GzcuJGNGzcyatQoAIqLi0lMTMTX15eAgABat24NQHh4OCkpKRc8zpAhQ2qWjx07xscff0xBQQFFRUX07t273mvYt28f7u7u+Pn5odfrefnll8nNzcXNza3e81wNSlwUCsXtRauh8tOAhIWFsXz58jrf0zSNyZMnc/fdd5+1Pjk5GRsbm5rXlpaWlJWVXfActd1eL774Ip9//jmtWrViwYIFbNu2rd7rW7p0KSdPniQqKgqAwsJCVqxYwV133VXvea4G5RZTKBSKq6RHjx6Ul5czZ86cmnWHDx9mx44d9O7dm/nz51NUJMkDBoPhooF+R0fHmu3roqioCC8vLyoqKvj999/rPZbJZGLZsmX89ttvrF69mtWrV/P555+zZMmSy7jDy0dZLgqFQnGV6HQ6Zs2axTvvvMNXX32Fra0t/v7+vPzyywQHB3P8+PEay8XBwYH33nsPC4sLf7cfPXo0M2bMqAnon8vTTz/N+PHj8fDwoEOHDvUK0Y4dO9Dr9ej1+pp1Xbt25fjx4xfMSGsIdJrWCAnfNyFjxoxRw8IUCsUlcejQoZpYye1EXfd9oWencospFAqFosFR4qJQKBSKBkeJi0KhUCganEYTlxMnTjBy5Mian06dOvGf//yH3NxcJk2aRExMDJMmTSIvLw+QdL233nqL6Ohohg8fzoEDB2qOtXDhQmJiYoiJiWHhwoU16/fv38/w4cOJjo7mrbfeojp8dKFzKBQKheLa0GjiEhISwuLFi1m8eDELFizA3t6e6OhoZs+eTc+ePVmxYgU9e/Zk9uzZAKxfv57ExERWrFjBm2++ycyZMwERilmzZvHrr78yd+5cZs2aVSMWM2fO5M0332TFihUkJiayfv16gAueQ6FQKBTXhmviFtu8eTOBgYH4+/sTGxtbU6k6atQoVq1aBVCzXqfTERERQX5+Punp6cTFxREZGYmbmxuurq5ERkayYcMG0tPTKSwsJCIiAp1Ox6hRo4iNjT3rWOeeQ6FQKBTXhmsiLkuXLmXYsGEAZGVl4e3tDYCXl1dNMZHBYMDHx6dmHx8fHwwGw3nr9Xp9neurt6/vHAqFQnE70bJlS/75z3/WvP7mm2/47LPPrsm5G11cysvLWb16NYMGDTrvPZ1O1+jT3K7FORQKheJGxMbGhhUrVpCdXc9sm0ai0cVl/fr1hIeH06RJEwA8PT1rqkLT09Px8PAAxCJJS0ur2S8tLa2mqrT2eoPBUOf66u3rO4dCoVDcqCzanULkP1fT7MWlRP5zNYt2X7iJ5aViZWXFhAkT+P777xvgCi+PRheXpUuXntW6OSoqikWLFgGwaNEi+vfvf9Z6TdPYs2cPzs7OeHt707t3b+Li4sjLyyMvL4+4uDh69+6Nt7c3Tk5O7NmzB03T6jzWuedQKBSKG5FFu1N4acE+UnJL0ICU3BJeWrCvQQTm3nvv5ffff6egoODqL/QyaFRxKS4uZtOmTcTExNSsmzx5Mhs3biQmJoZNmzYxefJkAPr160dgYCDR0dG89tprzJgxA5DZAk888QTjxo1j3LhxTJkypaZN9IwZM3j11VeJjo6madOm9O3bt95zKBQKxY3Ie8uPUFJx9uTHkgoj7y0/ctXHdnJyYuTIkfz3v/+96mNdDo3auNLBwYGtW7eetc7d3b1OE02n09UIyrlUC8u5tGvXrs7Onhc6h0KhUNyInMktuaz1l8uDDz7ImDFjGDNmTIMc71JQFfoKhUJxnfFzq3uGyoXWXy5ubm4MGjTokiZWNhRKXBQKheI688LAlthbnz2a2d7akhcGtmywc/zlL38hJyenwY53MdQ8F4VCobjOjOroD0js5UxuCX5u9rwwsGXN+itl9+7dNctNmjRh7969V3W8y0GJi0KhUNwAjOrof9ViciOh3GIKhUKhaHCUuCgUCoWiwVHiolAoFIoGR4mLQqFQKBocJS4KhUKhaHCUuCgUCsUtiKZpTJw4kXXr1tWsW7ZsGQ8//PA1Ob8SF4VCobgF0el0vP766/zzn/+krKyMoqIiPvroowu22WpoVJ2LQqG4PCorwMISLNR30wYl/leIfQPyksE1APpPh/Z3XdUhW7RowZ133slXX31FcXExI0eOpGnTpg10wfWjxEWhUFw6CbGw7O/gGggD3wF96+t9RbcG8b/C71OhoqpRZV6SvIarFpgnn3yS0aNHY2Njw/z586/yQi8dJS4KheLSqKwQYck6Jj8HFypxaShi3zALSzUVJbL+KsXFwcGBIUOG4ODggI2NzVUd63JQdq1CcSnkp8DJ9ZCbfL2v5PphYSkWSzV2rtfvWm418i7wd3Wh9ZeJhYUFFtfYjaksF4WiNoYDkHkUfNqDZ6isy0+BX+6DM7vAqzXcMwfcg+o/jskExgqwtm38a75WWFiIK+zAArBzg/Z3X+8runVwDRBXWF3rb1KU5aJQVJN+CP4zDOY+BD9NMH9rzDwmwgKQcQgyDtd/nOyT8Ot98O+ucPC3Rr3ka46+NUS9Ar2mgKPn9b6aW4f+08H6nNkt1vay/ialUcUlPz+fqVOnMmjQIAYPHszu3bvJzc1l0qRJxMTEMGnSJPLy8gDJyX7rrbeIjo5m+PDhHDhwoOY4CxcuJCYmhpiYGBYuXFizfv/+/QwfPpzo6GjeeustNE0
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Central Air\", s=15, data=df);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Use a new variable name to cleary show that the variable's *dtype* is changed from *str* to *int*."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 32,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"df[\"air_cond\"] = df[\"Central Air\"].apply(lambda x: 1 if x == \"Y\" else 0).astype(int)\n",
"del df[\"Central Air\"]"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 33,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"new_variables.append(\"air_cond\")"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 34,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>air_cond</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" air_cond\n",
"Order PID \n",
"1 526301100 1\n",
"2 526350040 1\n",
"3 526351010 1\n",
"4 526353030 1\n",
"5 527105010 1"
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 34,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[[\"air_cond\"]].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### \"Proximity to various Conditions\"\n",
"\n",
"The columns *Condition 1* and *Condition 2* have the same realizations and can be regarded as \"tags\" given to a house indicating the nearby presence of a) a major street, b) a railroad, or c) a park.\n",
"\n",
"The default tag \"Norm\" (implying no \"condition\") is given to 86% of the houses (this realization should therefore not be regarded as a tag!).\n",
"\n",
"From the comparison of the grouped scatter plots below, it can be assumed that the proximity of a major street decreases the valuation (lower regression slope through the cloud of blue and orange dots). Therefore, a factor variable *major_street* is extracted indicating the proximity of an \"artery\" or \"feeder\" street.\n",
"\n",
"Further, a factor variable *railway* is extracted as a relatively high proportion of the houses has such a tag. From the plots, a railway seems to not affect the valuations strongly.\n",
"\n",
"Lastly, a factor variable *park* is extracted. From the plots, this does not seem to affect the valuation much."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"List the \"raw\" realizations:"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 35,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Feedr 174\n",
"Artery 97\n",
"RRAn 48\n",
"PosN 43\n",
"RRAe 29\n",
"PosA 24\n",
"RRNn 11\n",
"RRNe 6\n",
"Name: count, dtype: int64"
2021-05-25 08:22:14 +02:00
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 35,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(\n",
" (\n",
" df[\"Condition 1\"].value_counts() + df[\"Condition 2\"].value_counts()\n",
" )\n",
" .sort_values(ascending=False)[1:]\n",
")"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 36,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"# Condition 2 is only filled with anything other than \"Norm\"\n",
"# if Condition 1 already has such a tag.\n",
"assert not ((df[\"Condition 1\"] == \"Norm\") & (df[\"Condition 2\"] != \"Norm\")).any()"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 37,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"86"
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 37,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 86% of the houses actually have no tag.\n",
"round(100* (df[\"Condition 1\"] == \"Norm\").sum() / df.shape[0])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"From a simple scatter plot it is hard to see any significant impact by a predictor."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 38,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEGCAYAAACpXNjrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAC0jklEQVR4nOydd3hU1daH35nJJJnUSe+9QAiBEEIvgUACAqGrqFcFCwooFsSGHRQ7KNi4Koid3iI19BZaKGkkpJA+qZOeybTvj/kYjAQIkIhcz/s8Ps7sc87ea86EWWfvvdZvifR6vR4BAQEBAYF2RHy7DRAQEBAQ+N9DcC4CAgICAu2O4FwEBAQEBNodwbkICAgICLQ7gnMREBAQEGh3TG63Af8U+vTpg4eHx+02Q0BAQOCOorCwkMTExCvaBefy/3h4eLBu3brbbYaAgIDAHcXEiRNbbReWxQQEBAQE2h3BuQgICAgItDuCcxEQEBAQaHeEPRcBAQGBa6BWqykoKKCpqel2m3JbMTc3x9PTE6lU2qbzBeciICAgcA0KCgqwtrbG19cXkUh0u825Lej1eioqKigoKMDPz69N1wjLYgICAgLXoKmpCQcHh3+tYwEQiUQ4ODjc0OxNmLkICAj8T6DX6zlQcICc6hwiXSMJdQxtt77/zY7lEjd6DwTnIiAg8D/B8ZLjPLX7KfTocZI58cvoX3C1dL3dZv1rEZbFBAQE/icobShFj6E8VVljGdWq6que2x5lrMrKynjuuecYPnw4EydO5PHHHycnJ+eW+01MTOSJJ54AICEhgWXLlgGwa9cuLly4YDzvs88+4/Dhw7c8XlVVFQ8++CA9evTgnXfeueX+LiHMXAQEBP4n6OXai0EegzhSdIRHwx7Fz/bKjWe1Ts2q9FWszlzNCN8RTO0yFZlUdsNj6fV6nnrqKcaPH8+iRYsASE9Pp6Kios0b3m1h2LBhDBs2DDA4lyFDhhAYGAjAM8880y5jmJmZ8cwzz5CZmUlmZma79AmCcxEQEPgfwcXShU+iPqFWXYuDuQMSseSKc1IrUnn/+PsAfHn6S7o6dmWQx6AbHuvo0aOYmJhw3333Gds6d+4MGBzPhx9+yIEDBxCJRMyYMYNRo0aRmJjI0qVLsbOzIyMjg9DQUD7++GNEIhH79+/nvffeQyaT0bNnT2Of69atIzk5mTFjxrB7926OHTvGV199xZIlS/jyyy8ZMmQII0eO5MiRI3zwwQdotVq6du3K22+/jampKdHR0YwfP549e/ag0WhYvHgxAQEBLT6LhYUFkZGR5OXl3fB9uBbCspiAgMD/DDKpDGcL51YdC4CZ2AwT0eVnapnkxmctAJmZmYSGth4wsGPHDtLT09m4cSPLly/nww8/pLS0FIDU1FReffVV/vjjDwoKCjh58iQqlYrXX3+dr7/+mnXr1lFWVnZFnxEREURHR/Piiy+yceNGvL29jcdUKhUvv/wyixYtYvPmzWi1Wn755RfjcTs7O9avX8+UKVP4/vvvb+rz3gyCcxEQEPjX0NmhM58M+YQHuzzI+4PeJ8Ilot3HOHnyJKNHj0YikeDo6EivXr04d+4cAN26dcPV1RWxWEznzp0pLCwkOzsbT09PYx7N2LFjb2i8nJwcPD09jctxEyZM4MSJE8bjsbGxAHTt2pXCwsJ2+pTXR1gWExAQ+FcR7R1NtHf0LfURFBTE9u3bb/g6U1NT42uJRIJWq70lO9rCpYx6sVj8t4x3CWHmIiAgIHCD9O3bl+bmZn7//XdjW3p6OidOnCAyMpKtW7ei1WqprKzkxIkTdOvW7ap9+fv7U1hYaNzziI+Pb/U8S0tL6uvrr2j38/OjsLCQixcvArBx40Z69ep1Kx+vXRBmLgICAgI3iEgkYunSpbz33nv897//xczMDA8PD1599VV69uxJUlIS48aNQyQSMXfuXJycnMjOzm61LzMzM9555x2mT59u3NBvzYmMGjWK119/nR9//JHPP/+8xfULFy7kmWeeMW7o/znQoC1ER0dTV1eHWq1m165dfP/998aotJtFpG+PgO//ASZOnCgUCxMQELiCtLQ0QkJCbrcZ/whauxdX++0UlsUEBAQEBNodwbkICAgICLQ7gnMREBAQEGh3Osy5ZGdnM27cOON/ERERrFixAqVSybRp04iNjWXatGlUVxv0f/R6PQsWLCAmJoa4uDhSUlKMfa1fv57Y2FhiY2NZv369sT05OZm4uDhiYmJYsGCBUS/oamMICAgICPw9dJhz8ff3Z+PGjWzcuJF169Yhk8mIiYlh2bJl9OvXjx07dtCvXz+jKNv+/fvJzc1lx44dzJ8/n7feegswOIqlS5eyatUqVq9ezdKlS43O4q233mL+/Pns2LGD3Nxc9u/fD3DVMQQEBAQE/h7+lmWxI0eO4OXlhYeHBwkJCYwfPx6A8ePHs2vXLgBju0gkIjw8nJqaGkpLSzl48CADBgxALpdja2vLgAEDOHDgAKWlpdTV1REeHo5IJGL8+PEkJCS06OuvYwgICAgI/D38Lc4lPj6eMWPGAFBRUYGzszMATk5OVFRUAKBQKHB1vVx7wdXVFYVCcUW7i4tLq+2Xzr/WGAICAgJ3Krt27aJTp05kZWW1erympoaff/75b7bq6nS4c2lubmb37t2MHDnyimMikajDK7z9HWMICAgIdDRbtmyhZ8+erWbwazQaampq+PXXX2+oT71ej06nay8TW9DhGfr79+8nNDQUR0dHABwcHCgtLcXZ2ZnS0lLs7e0Bw4ykpKTEeF1JSQkuLi64uLhw7NgxY7tCoaB3795XPf9aYwgICAh0NBuSCvlo+3mKlI24y2XMHdGJ8T08bqnP+vp6Tp48ycqVK3nyySeZPXs2iYmJfPbZZ9jY2JCTk0OXLl3Iy8tj3Lhx9O/fn5deeolvv/2WrVu30tzcTExMDLNnz6agoIBHH32U7t27k5KSwl133UV1dTXz5s0DYNWqVVy4cIFXX331lmzu8JlLfHw8o0ePNr6Pjo5mw4YNAGzYsMFYCOdSu16v5/Tp01hbW+Ps7MzAgQM5ePAg1dXVVFdXc/DgQQYOHIizszNWVlacPn0avV7fal9/HUNAQECgI9mQVMgr685RqGxEDxQqG3ll3Tk2JN2aGnFCQgKDBg3Cz88POzs7kpOTAYOE/7x589i+fTtz5szB29ubjRs38tJLL3Hw4EEuXrzImjVr2LhxIykpKRw/fhyAixcvcv/99xMfH8+0adPYs2cParUaMNSQmTRp0i3ZCx08c2loaODw4cMtSmdOnz6dZ599ljVr1uDu7s7ixYsBiIqKYt++fcTExCCTyXjvvfcAkMvlzJw5k8mTJwMwa9Ys5HI5AG+++SavvPIKTU1NDB48mMGDB19zDAEBAYGO5KPt52lUt1QeblRr+Wj7+VuavcTHx/PQQw8BBo2x+Ph4hgwZQlhYGF5eXq1ec+jQIQ4dOmQMbmpoaCA3Nxc3Nzfc3d0JDw8HDIKYffv2Ze/evfj7+6NWq+nUqdNN23qJDnUuFhYWJCYmtmizs7Pjhx9+uOJckUjEm2++2Wo/kydPNjqXPxMWFsaWLVuuaL/aGAICAgIdSZGy8Yba24JSqeTo0aNkZGQgEonQarWIRCKioqKwsLC46nV6vZ7p06czZcqUFu0FBQVXXHf33Xfz9ddf4+/vz8SJE2/a1j8jZOgLCAgItBPu8tYrW16tvS1s376dcePGsWfPHnbv3s2+ffvw9PRsURAMrpTkHzhwIGvXrjW2KRSKq0bOdu/enZKSErZs2WKM7L1VBOciICAg0E7MHdEJmbRliWWZVMLcETe/zLRlyxaGDx/eoi02NvaKqDE7OzsiIiIYM2YMH3zwAQMHDmTMmDFMmTKFuLg4Zs+e3aqU/yXuuusuIiIisLW1vWlb/4wguf//CJL7AgICrXGjkvsdES32d/DEE08wdepU+vXrd9VzbkRyXygWJiAgINCOjO/hcUc4k0vU1NRw991306lTp2s6lhtFcC4CAgIC/2JsbGzYvn17u/cr7LkICAgICLQ7gnMREBAQEGh3BOciICAgINDuCM5FQEB
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Condition 1\", s=15, data=df);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"However, plotting the groups seperately reveals different slopes."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 39,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABasAAARKCAYAAABB6CxvAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAEAAElEQVR4nOzdd3yUZbrG8d87M5n0QgkJgQCCSkTBgiwgiIpGLKAIKurqrm1ZFXUta0SDHRSjru6qR2Qta1sbTRQVkGpAqiIIZhVpgTRa2qRMe88f4wwJKSSQZCjX93z8cDJvu5OwT8KVO/djmKZpIiIiIiIiIiIiIiISRJZgFyAiIiIiIiIiIiIiorBaRERERERERERERIJOYbWIiIiIiIiIiIiIBJ3CahEREREREREREREJOoXVIiIiIiIiIiIiIhJ0CqtFREREREREREREJOgUVssxY/DgwQwePLjaa9OmTaN79+5MmzatUffq3r07N9xwQ1OWJyJyxNG6KiLSsrTuSmO8/PLLdO/eneXLlwe7FJEjhtZZkeCzBbsAOXL99ttv/Pe//2X58uXk5uZSWVlJXFwcPXr0IDU1lcsvvxy73R7sMg+K/4vT/Pnzg1xJw3399desXLmSn3/+maysLBwOB8OGDeP5558PdmnV3HDDDaxYsQKLxcKMGTPo3r17jXPGjh3L9OnTefvttznrrLOCUKVIcGhdPbwcKetqVatXr+a6664D4Mknn2TUqFEHfa8j8XMm0lhadw8vR8q66/9+ti5XXHEFEydObMGKRA5fWmcPL0fyOhsREUGnTp1ITU3lpptuIjIyssZ1gwcPZseOHYG3DcMgMjKSbt26cckll/DHP/6RkJCQGtf5c4mkpCS+/vprQkND67z3+vXrsdkUpzYnfXTloLzyyiu8+uqreL1eTj/9dK644goiIiLYtWsXK1asYNy4cXz44YeN/sljS0tNTeXUU0+lXbt2jbruyy+/JDw8vJmqOjivvfYaWVlZREREkJiYyKZNm4JdUr28Xi8ZGRm8+eabwS5F5LCgdVXralP45JNPAN835h9//PEhhdUiRzutu1p3D9UVV1xBhw4darx+0kknBaEakcOP1lmts4fKv86apklBQQHffPMNL7/8MvPnz+ejjz6q8wcdf/rTn4iJicHj8ZCbm8ucOXN45plnWLZsGZMmTarzeTk5ObzzzjuMHj26ud4laQCF1dJokyZN4uWXX6Z9+/b885//5NRTT61xzoIFC3jrrbeCUF3jREdHEx0d3ejrunXr1gzVHJqHHnqIxMREOnfuzIoVK/jTn/4U7JLq1blzZzIzM1myZAkDBgwIdjkiQaV1VetqUyguLubrr7+mS5cudO/endmzZ7NhwwZ69OgR7NJEDjtad7XuNoUrrriCvn37BrsMkcOS1lmts01h/3X2/vvv57LLLmP9+vXMmjWLK664otbr/vznP9OxY8fA23fccQfDhw9nwYIFrFixgj/84Q81romNjcUwDCZPnsyVV15J69atm/4dkgbRzGpplO3bt/PKK68QEhLC5MmTa/2CA3DeeefV2jH75Zdf8sc//pHevXvTq1cvhg0bxuuvv47T6axxrn9WVFlZGc8++yznnnsup5xyCqmpqUyePBnTNGtcY5om77//Ppdeeik9e/bk7LPP5sknn6SkpKTWOvefPbV8+XK6d+/Ojh072LFjB927dw/8N3bs2MB1dc2eKikp4YUXXmDIkCH07NmTPn36cMstt7B06dIa5/qf9fLLL/Pzzz8zevRozjzzTE499VSuv/56vv/++1prrku/fv3o0qULhmE06rpguffeezEMg4yMDLxeb4Ov++mnn7jrrrvo378/p5xyCueddx6PP/44BQUFNc4dO3Ys3bt3Jzs7m/fee49hw4bRq1evwOeu6vH333+fSy65hJ49ezJ48GAmTZoU+Dv21VdfceWVV3LaaafRv39/nnzySSoqKprmAyHHPK2rPlpXD93MmTOpqKjgiiuuCHzj/vHHH9d6btWP1dq1axk9ejR/+MMfAp+7hnzOwPervWPHjuWcc87hlFNO4ayzzuL++++vtUunvjX522+/pXv37jz00EO11ut0Ounbty99+/at9e+2SGNo3fXRutsy3G43H3zwAVdffTVnnHEGp556KsOHD+f999+v83vgH3/8kbvvvpsBAwZwyimncM455/Doo4+Sn59f6/k//fQTt9xyC6effjpnnHEGN954Iz/88EOdNfk/9zt37iQ9PZ2zzz6bk0466bDvbpUjh9ZZH62zTa9Vq1ZccMEFAKxbt67B13Xu3Jk+ffrUe11YWBi33347JSUlvPrqqw267/bt2wOf9+3bt3PvvffSt29fevbsyYgRI1iwYEGDa5R91FktjTJt2jRcLheXXnopJ554Yr3n7v/rGP/4xz94/fXXadWqFUOHDiUiIoJvv/2Wf/zjH2RmZvLmm2/WuMblcnHLLbdQUFDAoEGDsFqtfPPNN7zwwgs4nU7uvPPOaudPmDCB9957j/j4eEaNGoXNZmPevHn8+OOPOJ3OA87C6tChA3feeSfvvPMO4PtpnN+Bfp2vuLiYa6+9lo0bN9KzZ0/+/Oc/s3fvXr766ituvvlmHn/8ca655poa1/3000+88cYbnHbaaVx11VXk5OQwZ84cbrzxRmbMmEHXrl3rfe6RqkePHlx22WV89tlnTJ8+nZEjRx7wmgULFnDXXXcBMGTIEJKSkli/fj0ffvgh8+bN47///S/Jyck1rpswYQKrVq3inHPO4ZxzzsFqtVY7npGRwYoVKzjvvPMYMGAA8+fP58UXX8TlchEbG8sLL7zABRdcwJlnnsmSJUv44IMP8Hg8PPHEE03zwZBjmtbVumldbZxPPvkEi8XC8OHDadu2LfHx8XzxxRc8+OCDRERE1HrNmjVreP311+nduzcjR45k7969dOnSpUGfs8WLF3PXXXfhdrs577zz6NSpE/n5+cyZM4eFCxfy7rvvcvLJJ9d4Zm1r8sCBA+nUqRNfffUVDz/8cI3updmzZ1NYWMjNN998xM61lMOH1t26ad1tWi6Xi9tuu43MzEyOO+44hg4dSmhoKMuXL+epp57ixx9/5Lnnnqt2zZQpU3j00Uex2+0MHjyYxMREtm7dyqeffsr8+fP55JNPSEpKCpz//fffc9NNN+FyuUhNTaVz5878/PPP3HDDDfTr16/O2goLCxk1ahQRERFceOGFGIZBmzZtmu1jIccWrbN10zrbdA52bnR91/3xj3/kgw8+4OOPP+aGG26gS5cuDbrnjh07uOqqq0hOTubyyy+nqKiIL7/8kjvuuIO333673vVYalJYLY2yevVqAPr379+o63744Qdef/112rdvz6effkp8fDzg+xWOO++8M/DrP7fddlu16woKCkhJSeHtt98mLCwMgDvvvJMhQ4bwn//8h7/+9a+B4fjff/897733Hp06deLTTz8lLi4O8HXw/ulPf2Lnzp21zpSrqmPHjtx1111Mnz4dIBCMNsTzzz/Pxo0bGTVqFE888UTgJ5V/+ctfGDlyJOPHj2fgwIHVfhUFYOHChTzzzDOMGDEi8NpHH33EY489xrvvvsvjjz/e4BqayrRp06ptSnAgHTp0qFZ/Q9177718/fXX/POf/+TSSy8NfI5r43A4GDt2LB6Ph/fee48zzzwzcGzy5Mm88MILPPbYY7X+Gtn69euZPn16rUG2//jMmTNJSEgAfJ/31NRU3nzzTcLCwpg2bVrgV7icTifDhw9n6tSp3H333fqmXg6Z1tW6aV1t+Lq6Zs0a/ve//zFw4EASExMBGDZsGG+99RazZs3iqquuqvW6zMxMnnjiiRr/KDrjjDPq/ZwVFRVx//33ExYWxgcffMDxxx8fOPbLL78watQoxo0bF7hHVXWtyddccw0ZGRl89tlnXH/99dWO+WdxX3311Qf6UIgckNbdumndbdz3s9OnT691o0X/x3zSpElkZmZy/fXX8/DDDwcaJjweD4888ghTp05lyJAhgS7BzZs38/jjj9OhQwfef//9wPemAN999x0333wzEyZMCHT8mab
"text/plain": [
"<Figure size 1440x1080 with 9 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"street = [\"Artery\", \"Feedr\"]\n",
"railway = [\"RRNn\", \"RRAn\", \"RRNe\", \"RRAe\"]\n",
"park = [\"PosA\", \"PosN\"]\n",
"plot = sns.lmplot(\n",
" x=\"Gr Liv Area\", y=\"SalePrice\", col=\"Condition 1\", hue=\"Condition 1\",\n",
" col_order=[\"Norm\"] + street + railway + park,\n",
" data=df, robust=True, col_wrap=4, ci=None, truncate=True, scatter_kws={\"s\": 15},\n",
")\n",
"# Adjust font sizes.\n",
"for ax in plot.axes:\n",
" ax.set_title(ax.get_title(), fontsize=20)\n",
" ax.set_xlabel(ax.get_xlabel(), fontsize=16)\n",
" ax.set_ylabel(ax.get_ylabel(), fontsize=16)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Extract factor variables *major_street*, *railway*, and *park*."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 40,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"df[\"major_street\"] = 0\n",
"df.loc[\n",
" df[\"Condition 1\"].isin(street) | df[\"Condition 2\"].isin(street),\n",
" \"major_street\",\n",
"] = 1"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 41,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"df[\"railway\"] = 0\n",
"df.loc[\n",
" df[\"Condition 1\"].isin(railway) | df[\"Condition 2\"].isin(railway),\n",
" \"railway\",\n",
"] = 1"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 42,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"df[\"park\"] = 0\n",
"df.loc[\n",
" df[\"Condition 1\"].isin(park) | df[\"Condition 2\"].isin(park),\n",
" \"park\",\n",
"] = 1"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 43,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"Condition 1\"]\n",
"del df[\"Condition 2\"]"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 44,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"new_variables.extend([\"major_street\", \"railway\", \"park\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Show summary of counts:"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 45,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"major_street 264\n",
"railway 94\n",
"park 60\n",
"dtype: int64"
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 45,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[[\"major_street\", \"railway\", \"park\"]].sum()"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 46,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>major_street</th>\n",
" <th>railway</th>\n",
" <th>park</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" major_street railway park\n",
"Order PID \n",
"1 526301100 0 0 0\n",
"2 526350040 1 0 0\n",
"3 526351010 0 0 0\n",
"4 526353030 0 0 0\n",
"5 527105010 0 0 0"
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 46,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[[\"major_street\", \"railway\", \"park\"]].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Exterior\n",
"\n",
"This dimensions tells the main material with which the houses are made of. The category is too diverse and the various grouped scatter plots did not reveal differing slopes. For simplicity, this variable is dropped.\n",
"\n",
"This variable actually also represents tags that could be associated with a house (possibly up to two different tags)."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 47,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEkCAYAAADjOHzWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAADNlUlEQVR4nOydd3hUVfrHP7dMTa+TCiQQei9CqIoC0gQEdO29+2Mta2+sKJa1iwVXXXUtawWkCYJKB+m9k0DqpGeSTL/3/v4YGIgkMUCC7X6eh+eZOTntDjP3vee87/m+gqZpGjo6Ojo6Ok2I+FtPQEdHR0fnz4duXHR0dHR0mhzduOjo6OjoNDm6cdHR0dHRaXJ046Kjo6Oj0+ToxkVHR0dHp8mRf+sJ/F7o27cvycnJv/U0dHR0dP5Q5OXlsW7dupPKdeNylOTkZL755pvfeho6Ojo6fyguvvjiOsv1bTEdHR0dnSZHNy46Ojo6Ok2Oblx0dHR0dJoc3eeio6Pzp8Tn85Gbm4vb7f6tp/KnwGw2k5KSgsFgaFR93bjo6Oj8KcnNzSUsLIxWrVohCMJvPZ0/NJqmUVpaSm5uLmlpaY1qo2+L6ejo/GlweKvIrs7BrXhwu93ExMTohqUJEASBmJiYU1oF6isXHR2dPwW5NQU8svlZtlfs5m+txjFGPE83LE3IqX6W+spFR0fnT8HW8p1sr9gNwP+y5+DX/KfVT4cOHRg3blzw3zvvvNNg/bfffvu0xnnkkUc4cODAabUFWLhwIaNHj6Z9+/Zs3769wboOh4NPPvnktMc6HfSVi46Ozp+CJIsNWZDwawop1kREoe5nZ82ronlUBKOIYDq5jtlsZs6cOY0ed+bMmdx6662nNFdFUXj66adPuY0kScH3bdu25fXXX+eJJ5741bYOh4PPPvuMK6644pTGPBP0lYuOjs6fgh4xXXij7zM83OXvvNR7KpIgnVRH86modi9aqR+10IvmVRvVd1VVFSNGjODQoUMA3HPPPXzxxRe88MILuN1uxo0bx7333gvAnDlzmDRpEuPGjePxxx9HUZTA/Hr04Nlnn+Wiiy5i8+bNXHXVVcEVx7x58xg7dixjxozhX//61/Fr+kWbE2ndujXp6eknzXX//v3B8ceOHUt2djYvvvgiR44cYdy4cTz33HONuuYzRV+56Ojo/GnoE9udPrHdAdidt/vkCooGytHXGuDXwFi7yjFjcYxbbrmFUaNG8fjjj/PQQw9x9dVXU1lZySWXXALAJ598ElzpHDx4kIULF/LZZ59hMBiYOnUqc+fOZfz48TidTrp27cqDDz5Yazy73c4LL7zAN998Q3h4ONdffz1LlizhggsuqLdNQ/zvf//j6quv5qKLLsLr9aKqKvfeey/79+8/pRXZmaIbFx0dnb8OBhHBKqI5VQSzAMaTndT1bYsNGDCA7777jieffLLem/SaNWvYsWMHkyZNAghGrAFIksSIESNOarN9+3bOOeccoqOjARg7dizr16/nggsuqLdNQ3Tv3p23336bwsJChg8fTqtWrU6pfVOhGxcdHZ2/DIIkQKwBwa+BJATeNxJVVTl48CBms5nKykoSEhJOqqNpGhMmTAhukZ2IyWSq5TNpDKfTZuzYsXTr1o2ffvqJm2++mX/+85+kpqaeUh9Nge5z0dHR+UshiELAmX8KhgXggw8+oHXr1rz44os89NBD+Hw+AGRZDr7OzMxk0aJFlJaWAlBRUUFeXl6D/Xbt2pX169dTVlaGoijMnz+fPn36nMaVBcjJySE1NZWrr76a888/n7179xISEkJNTc1p93k66CsXHR0dnRP4pc9l0KBBXHzxxXz55Zd8+eWXhIaG0qdPH9566y2mTJnCJZdcwkUXXUTHjh158cUXueuuu7j++utRVRWDwcDjjz/eYK6o+Ph47r33Xq655ho0TWPIkCFccMEFvzrP77//nmnTplFWVsYtt9xChw4deO+991i4cCFz5sxBlmViY2O55ZZbiIyMpGfPnowZM4ZBgwbxwAMPNMln1RCCpmlas4/yB+Diiy/W87no6PyJ2L17Nx06dPitp/Gnoq7PtL57p74tpqOjo6PT5OjGRUdHR0enydGNi46Ojo5Ok9NsxuXQoUO19Hl69uzJBx98QEVFBddddx3Dhw/nuuuuo7KyEgiE8D311FMMGzaMsWPHsnPnzmBfs2bNYvjw4QwfPpxZs2YFy3fs2MHYsWMZNmwYTz31FMfcR/WNoaOjo6Nzdmg245Kens6cOXOYM2cO33zzDRaLhWHDhvHOO++QmZnJ4sWLyczMDIrCLV++nOzsbBYvXsy0adOYOnUqEDAUM2bM4IsvvuDLL79kxowZQWMxdepUpk2bxuLFi8nOzmb58uUA9Y6ho6Ojo3N2OCvbYmvWrCE1NZXk5GSWLl3K+PHjARg/fjxLliwBCJYLgkD37t1xOBwUFRWxcuVKBgwYQGRkJBEREQwYMIAVK1ZQVFREdXU13bt3RxAExo8fz9KlS2v19csxdHR0dHTODmfFuMyfP58xY8YAUFpaSnx8PABxcXHBw0Z2u73WideEhATsdvtJ5Tabrc7yY/UbGkNHR0fnbLNkyRLatWvHwYMHG6zXo0ePOsu3bNnC5MmTGTduHCNHjuT1118H4PXXX+e9995r8vk2Fc1+iNLr9fLDDz/UKYcgCEKzJ/M5G2Po6Ojo1Me8efPo1asX8+fPZ8qUKafc/oEHHuDVV1+lffv2KIpCVlZWM8yy6Wn2lcvy5cvp1KkTsbGxAMTExFBUVARAUVFRUKzNZrNRWFgYbFdYWIjNZjup3G6311l+rH5DY+jo6OjUh3/jLtzT3sZ9z/O4p72Nf+OuM+6zpqaGjRs38vTTTzN//nwgcE+64oorGDduHGPGjGHDhg3B+tOnT2f06NFcc801lJWVAVBWVkZcXBwQEL9s06ZNsP6BAwe46qqrOP/88/noo48AyM3NZeTIkTz66KOMHj2a66+/PpieeNu2bYwdOzYovX9sR6k5aHbjMn/+fEaPHh18P3ToUGbPng3A7NmzOf/882uVa5rGli1bCAsLIz4+noEDB7Jy5UoqKyuprKxk5cqVDBw4kPj4eEJDQ9myZQuaptXZ1y/H0NHR0akL/8Zd+L/4DsodgYJyB/4vvjtjA7N06VIGDRpEWloaUVFR7Nixg3nz5jFw4MBgwFP79u0BcDqddO7cOagtNmPGDACuueYaLrzwQu644w7+97//4fF4gv1nZWXx3nvv8eWXX/LGG28ENc4OHz7MFVdcwfz58wkLC2PRokUAPPzww0FV51MVxDxVmtW4OJ1OVq9ezfDhw4NlN998M6tWrWL48OGsXr2am2++GYAhQ4aQmprKsGHDeOyxx4LZ1SIjI7n99tuZNGkSkyZN4o477iAyMhKAJ554gkcffZRhw4bRokULBg8e3OAYOjo6OnXhX7AcfL9Ii+zzB8rPgBMfrkeNGsX8+fPp0qUL33zzDa+//jr79u0jNDQUAFEUGTVqFADjxo1j48aNANx55518/fXXDBgwgHnz5nHjjTcG+x8yZAhGo5Ho6Giio6OD/uWUlJSgTEunTp3Iy8vD4XBQU1MT9O0056oFmtnnYrVaWbduXa2yqKgoPvzww5PqCoJQb7rOY4bll3Tp0oV58+adVF7fGDo6Ojp1cmzF0tjyRlBRUcHatWvZt28fgiCgKAqCIHD//ffz8ccfs2zZMh588EGuu+66YHTriZzoK27RogWXX345l1xyCZmZmZSXlwNgNB7PdCZJEn6/v87yE1c7Zwv9hL6Ojo5OVPiplTeCRYsWMW7cOH788Ud++OEHli1bRkpKCuvXryc2NpZLLrmEyZMnBw+Mq6oa3L6aO3cuvXr1AuCnn34KHhA/fPgwoigSHn7q8woPDyckJIStW7cCsGDBgtO+tsagS+7r6Oj85ZFHDQ74XE7cGjPIyKMGn3af8+bN46abbqpVNnz4cB588EGsViuyLGO1WoM57a1WK9u2beOtt94iOjqaV155BYA5c+bwzDPPYDabkSSJF1544bT9JU8//TSPPvoooijSp0+f4JZcc6BL7h9
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Exterior 1st\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 48,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEkCAYAAADjOHzWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAADNvUlEQVR4nOydd3gU1frHP1O2JpuebDok9N6F0EXpIKDYK/Z2+V312htXFMu1iwWv/V71KiogRUFQaQLSIXQIgfSebJLNtpn5/bGwIZJAgAQs83kenid7duacM8vuvPOe9z3fV9A0TUNHR0dHR6cJEc/1BHR0dHR0/nzoxkVHR0dHp8nRjYuOjo6OTpOjGxcdHR0dnSZHNy46Ojo6Ok2Oblx0dHR0dJoc+VxP4PdC3759SUhIONfT0NHR0flDkZOTw7p1645r143LERISEvjmm2/O9TR0dHR0/lBcfPHF9bbry2I6Ojo6Ok2Oblx0dHR0dJoc3bjo6Ojo6DQ5esxFR0fnT4nX6yU7OxuXy3Wup/KnwGw2k5iYiMFgaNTxunHR0dH5U5KdnY3NZqNly5YIgnCup/OHRtM0SkpKyM7OJiUlpVHn6MtiOjo6fxocnkoyq7JwKW5cLheRkZG6YWkCBEEgMjLylLxA3XPR0dH5U5Bdncejm59je/kurmg5gXHi+bphaUJO9bPUPRcdHZ0/BVvLdrC9fBcA/8uch0/zndL5HTp0YMKECYF/77777gmPf+edd05rno8++ij79+8/rXMBnn/+eUaNGsX48eO56667cDgcp3T+N998w1NPPXXa4zcW3XPR0dH5UxBvsSMLEj5NIdEahyjU/+zs8nlxKV5MkoxFNgbazWYz8+bNa/R4s2bN4vbbbz+lOSqKwjPPPHPK50iSFHg9YMAA7rvvPmRZ5l//+hezZs3i/vvvP6U+zwa656Kjo/OnoEdkF97s+yyPdPk/Xu49DUmQjjvGo/g4XF1Cbk05h6pKcPm8J+yzsrKSkSNHkpGRAcC9997Ll19+yYsvvojL5WLChAncd999AMybN4/JkyczYcIEnnjiCRRF8c+rRw+ee+45LrroIjZv3sy1117L9u3bAViwYAHjx49n3Lhx/Otf/6q9lt+ccywDBw5Elv1+Qffu3cnPzwf8Hsndd9/NTTfdxIgRI3jhhRcC53z99deMHDmSyZMns2nTplP6XE8X3XPR0dH509Anqjt9oroDsCtn13Hve1UFn6YCoKLhVX2Y8afWHjUWR7ntttsYM2YMTzzxBA8//DDXXXcdFRUVXHbZZQB8+umnAU/nwIEDfPfdd3z++ecYDAamTZvG/PnzmThxIk6nk65du/LQQw/VmUtBQQEvvvgi33zzDSEhIdx4440sXbqUCy+8sMFzfsvXX3/N6NGjA6937drF3LlzMRqNjBo1imuvvRZJknjjjTf45ptvCA4O5rrrrqNjx46n+MmeOrpx0dHR+ctglmRCDGYcXhdBshFzI5bFBgwYwPfff89TTz3V4LLZmjVrSE9PZ/LkyQCBTDUASZIYOXLkceds376d8847j4iICADGjx/P+vXrufDCCxs851jefvttJEnioosuCrSlpaVhs9kAaNWqFTk5OZSXl9cZZ8yYMWRmZp6w76ZANy46Ojp/GSRRIs4SRpRZwSBISOLJIwOqqnLgwAHMZjMVFRXExsYed4ymaUyaNCmwRHYsJpOpTsykMZzsnG+++Yaff/6Zjz76qE4Wl9FYaywlSQoszZ0L9JiLjo7OXwpJFDFLhkYZFoCPPvqIVq1a8dJLL/Hwww/j9frjNLIsB/5OS0tj8eLFlJSUAFBeXk5OTs4J++3atSvr16+ntLQURVFYuHAhffr0Oel8VqxYwXvvvcfbb7+NxWI56fFHxykrK8Pr9fL999+f9JymQPdcdHR0dDg+5jJo0CAuvvhiZs+ezezZswkODqZPnz68/fbbTJ06lcsuu4yLLrqIjh078tJLL/H3v/+dG2+8EVVVMRgMPPHEEyesERUTE8N9993H9ddfj6ZpDBkyhAsvvPCk85w+fToej4cpU6YA0K1btxOmFsfExHD33XdzxRVXYLPZ6NChwyl8KqePoGmadlZG+p1z8cUX6/VcdHT+ROzateus3Uj/KtT3mTZ079SXxXR0dHR0mhzduOjo6OjoNDm6cdHR0dHRaXKazbhkZGTU0enp2bMnH330EeXl5UyZMoURI0YwZcoUKioqAH8q39NPP83w4cMZP348O3bsCPQ1Z84cRowYwYgRI5gzZ06gPT09nfHjxzN8+HCefvppjoaPGhpDR0dHR+fs0GzGJTU1lXnz5jFv3jy++eYbLBYLw4cP59133yUtLY0lS5aQlpYWEIdbsWIFmZmZLFmyhOnTpzNt2jTAbyhmzpzJl19+yezZs5k5c2bAWEybNo3p06ezZMkSMjMzWbFiBUCDY+jo6OjonB3OyrLYmjVrSEpKIiEhgWXLljFx4kQAJk6cyNKlSwEC7YIg0L17dxwOB4WFhaxatYoBAwYQFhZGaGgoAwYMYOXKlRQWFlJVVUX37t0RBIGJEyeybNmyOn39dgwdHR0dnbPDWTEuCxcuZNy4cQCUlJQQExMDQHR0dGDTUUFBQZ2dr7GxsRQUFBzXbrfb620/evyJxtDR0dE52yxdupR27dpx4MCBEx7Xo0ePetu3bNnCpZdeyoQJExg9ejRvvPEGAG+88Qbvv/9+k8+3qWj2TZQej4cff/yxXlkEQRCavZjP2RhDR0dHpyEWLFhAr169WLhwIVOnTj3l8x988EFee+012rdvj6IoHDx4sBlm2fQ0u+eyYsUKOnXqRFRUFACRkZEUFhYCUFhYGBBTs9vtAelogPz8fOx2+3HtBQUF9bYfPf5EY+jo6Og0hG/jTlzT38F17wu4pr+Db+POM+6zurqajRs38swzz7Bw4ULAf0+6+uqrmTBhAuPGjWPDhg2B42fMmMHYsWO5/vrrKS0tBaC0tJTo6GjArxfWunXrwPH79+/n2muv5YILLuCTTz4BIDs7m9GjR/PYY48xduxYbrzxxkB54m3btjF+/HgmTJjA888/H1hRag6a3bgsXLiQsWPHBl4PGzaMuXPnAjB37lwuuOCCOu2aprFlyxZsNhsxMTEMHDiQVatWUVFRQUVFBatWrWLgwIHExMQQHBzMli1b0DSt3r5+O4aOjo5Offg27sT35fdQdqSqY5kD35ffn7GBWbZsGYMGDSIlJYXw8HDS09NZsGABAwcODCQ8tW/fHgCn00nnzp0DGmMzZ84E4Prrr2fUqFHcdddd/O9//8Ptdgf6P3jwIO+//z6zZ8/mzTffDGidHTp0iKuvvpqFCxdis9lYvHgxAI888khA3flUxTRPlWY1Lk6nk19++YURI0YE2m699VZWr17NiBEj+OWXX7j11lsBGDJkCElJSQwfPpzHH3+cJ598EoCwsDDuvPNOJk+ezOTJk7nrrrsICwsD4Mknn+Sxxx5j+PDhJCcnM3jw4BOOoaOjo1MfvkUrwPubsshen7/9DDj24XrMmDEsXLiQLl268M033/DGG2+wd+9egoODARBFkTFjxgAwYcIENm7cCMDdd9/N119/zYABA1iwYAE333xzoP8hQ4ZgNBqJiIggIiIiEF9OTEwMyLR06tSJnJwcHA4H1dXVgdhOc3ot0MwxF6vVyrp16+q0hYeH8/HHHx93rCAIAYPyW44alt/SpUsXFixYcFx7Q2Po6Ojo1EtZA3XoG2pvBOXl5axdu5a9e/ciCAKKoiAIAg888AD//e9/Wb58OQ899BBTpkwJZLcey7Gx4uTkZK666iouu+wy0tLSKCsrA46X2Pf5fPW2H+vtnC30Hfo6Ojo64SGn1t4IFi9ezIQJE/jpp5/48ccfWb58OYmJiaxfv56oqCguu+wyLr300sCGcVVVA8tX8+fPp1evXgD8/PPPgQ3ihw4dQhRFQkJOfV4hISEEBQWxdetWABYtWnTa19YYdMl9HR2dvzzymMH+mMuxS2MGGXnM4NPuc8GCBdxyyy112kaMGMFDDz2E1WpFlmWsVivPP/884F/p2bZtG2+
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Exterior 2nd\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 49,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"Exterior 1st\"]\n",
"del df[\"Exterior 2nd\"]\n",
"# Also discard the associated ordinal variables.\n",
"del df[\"Exter Cond\"]\n",
"del df[\"Exter Qual\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Foundation\n",
"\n",
"The type of foundation appears to have an effect. However, only three of the six realizations occur in a large number. Factor variables *found_BrkTil*, *found_CBlock*, and *found_PConc* are extracted but not regarded as \"interesting\"."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 50,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEGCAYAAACpXNjrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAACuF0lEQVR4nOzdd3iTVfvA8W+SJk0605nu3UIplBbKKKtQppSyVdzgfEVEUUFREV9QHC8qKi5+KuIAZE8ZUmbZo1BoKZQuOtO9R+bvj0CgsqEVx/O5Li/bJ0/OOU1L7jzPOee+RUaj0YhAIBAIBC1IfLcHIBAIBIJ/HiG4CAQCgaDFCcFFIBAIBC1OCC4CgUAgaHFCcBEIBAJBi7O42wP4q+jWrRuenp53exgCgUDwt5Kfn8/BgwevOC4Elws8PT1ZtWrV3R6GQCAQ/K2MHj36qseF22ICgUAgaHFCcBEIBAJBixOCi0AgEAhanDDnIhAIBH+g1WrJy8ujsbHxbg/lL0Mul+Pl5YVUKr2p84XgIhAIBH+Ql5eHra0tfn5+iESiuz2cu85oNFJWVkZeXh7+/v439RzhtphAIBD8QWNjI05OTkJguUAkEuHk5HRLV3LClYtAIPhHMBqN7MnbQ1ZVFlFuUYQ5h91Re0Jgae5WXw8huAgEgn+Ew0WHmbR9EkaMuChcWBy3GDdrt7s9rH8tIbgIBIJ/hOL6YoyYylOVNJRQ1VR1zeBiNBrv6MokNDSUkJAQ8/dffPEFXl5et93e9axatYpTp07x1ltvXfOcgwcPIpVK6dSpEwBLlixBoVAwcuTIVhnTzRCCi0Ag+Efo4taF3p692V+wnyc6PIG//ZUTz1qDlmVpy1ievpzBfoMZ3248CqnilvuSy+WsXbu2JYbdIg4dOoSVlZU5uDzwwAN3eURCcBEIBP8QKmsVH8V8RI22Bie5ExKx5IpzUstSef/w+wB8efxL2ju3p7dn7xbp//Tp08ycOZOGhgZ8fHyYM2cO9vb2PPLII0ybNo0OHTpQXl7O2LFj2b59O6tWrWL79u00NDSQm5vLgAEDmDZtGgArV65kwYIF2Nra0rZtW2QyGQDbt2/nq6++QqvVolQqmTt3Lo2NjSxduhSxWMy6deuYMWMG+/fvx8rKiieeeOK64woPD+fgwYPU1NTw7rvvEhUV1SKvBQirxQQCwT+IQqrA1cr1qoEFwFJsiYXo0mdqheTWr1rAtJpsxIgRjBgxgueeew6AadOm8corr7B+/XpCQkKYP3/+Dds5ffo08+bNY/369WzatInCwkKKi4v5/PPPWbJkCYsXL+bcuXPm8zt37syyZctYs2YNcXFxfPvtt3h5eTFu3DjGjx/P2rVrrwgQ1xuXXq9nxYoVvP766zc13lshXLkIBIJ/jbZObfmo70ccUR8hzCmMTqpOt9XOH2+L1dTUUFNTQ9euXQEYNWoUL7zwwg3biY6OxtbWFoDAwEDy8/OprKyka9euODo6AjB06FCys7MBKCoqYsqUKZSUlKDRaG44z3OjcQ0cOBCAsLAw8vPzb/KnvzlCcBEIBP8qsT6xxPrE/mn9SSQSjEbTQgONRtPssYu3uy6ep9frr9vWO++8w/jx4+nfvz8HDx6846uNi/2LxeIb9n2rhNtiAoFAcIdsbW2xs7PjyJEjAKxdu5YuXboApnIep06dAmDz5s03bCs8PJzDhw9TUVGBVqtt9pyamhpUKhUAa9asMR+3tramrq7ulsbV2oQrF4FAIGgBH3zwgXni3Nvbm/feew+Axx9/nBdffJFly5YRExNzw3ZcXV2ZNGkS48aNw9bWltDQUPNjkyZN4oUXXsDe3p5u3bqRl5cHQL9+/Zg8eTIJCQnMmDHjpsbV2kTGi9dr/3KjR48WioUJBALANNF++Zu6wORqr8u13juF22ICgUAgaHFCcBEIBAJBixOCi0AgEAhaXKsFl8zMTPMmoxEjRtCpUyd++OEHKisrmTBhAoMGDWLChAlUVVUBplw/77zzDgMHDiQ+Pp6UlBRzW6tXr2bQoEEMGjSI1atXm4+fOnWK+Ph4Bg4cyDvvvGNe7netPgQCgUDw52i14BIQEMDatWtZu3Ytq1atQqFQMHDgQBYsWEB0dDRbt24lOjqaBQsWALB7926ys7PZunUrs2fP5u233wZMgWL+/PksW7aM5cuXM3/+fHOwePvtt5k9ezZbt24lOzub3bt3A1yzD4FAIBD8Of6U22L79+/H29sbT09PEhISzJk6R44cybZt2wDMx0UiEREREVRXV1NcXExiYiI9e/ZEqVRib29Pz5492bNnD8XFxdTW1hIREYFIJGLkyJEkJCQ0a+uPfQgEAoHgz/Gn7HPZuHEjw4YNA6CsrAxXV1cAXFxcKCsrA0CtVuPmdik9tpubG2q1+orjKpXqqscvnn+9PgQCgeDv4mJaf6PRiEQiYcaMGeasx5fLy8vjP//5Dxs2bDAf27NnD3PnzgXg/PnzuLq6IpfLadOmDZGRkeZ0/K+99hp9+/ZlyJAhLT7+Vg8uGo2G7du38/LLL1/xmEgkavVqb39GHwKBQNDSLs9ftmfPHj7++GN+/vnnZufodLqrPrd379707m3K9nx5VuY/U6vfFtu9ezdhYWE4OzsD4OTkRHFxMQDFxcXm5GwqlYqioiLz84qKilCpVFccV6vVVz1+8fzr9SEQCAStYU1SPj3f347/axvp+f521iS1bBLI2tpa7OzsAFNhsAcffJD//Oc/xMXFNTsvNzeXkSNHkpycfM22Pv/8c7777rsWHd/VtHpw2bhxY7MXIDY21pwTZ82aNfTv37/ZcaPRyPHjx7G1tcXV1ZVevXqRmJhIVVUVVVVVJCYm0qtXL1xdXbGxseH48eMYjcartvXHPgQCgaClrUnKZ/qqk+RXNmAE8isbmL7q5B0HmItp/YcMGcKbb77JxIkTzY+lpqbyxhtvsGXLFvOxzMxMnn/+ed5//33Cw8PvqO+W0Kq3xerr69m3bx+zZs0yH3v66ad58cUXWbFiBR4eHsybNw+AmJgYdu3axcCBA1EoFMyZMwcApVLJxIkTGTt2LADPPfccSqUSgJkzZzJ9+nQaGxvp06cPffr0uW4fAoFA0NL+t+UMDdrmGYUbtHr+t+UMIyM9b7vdy2+LJSUl8eqrr5rnVTp06IC3t7f53PLyciZOnMj8+fMJCgq67T5bUqsGFysrKw4ePNjsmIODA4sWLbriXJFIxMyZM6/aztixY83B5XIdOnRoNol1oz4EAoGgpRVUNtzS8dsRGRlJRUUF5eXlgOm99XK2trZ4eHhw9OjRv0xwEXboCwQCwR3wUF69muW1jt+OjIwM9Hq9+a7NH0mlUubPn8+aNWtYv359i/V7J4SU+wKBQHAHpg5uw/RVJ5vdGlNIJUwd3OaO2r045wKmDCYffPABEsnVyzeD6Wrmm2++YcKECVhZWd31uWYh5f4FQsp9gUBw0a2m3F+TlM//tpyhoLIBD6WCqYPb3NF8y1/VraTcF65cBAKB4A6NjPT8RwaTOyHMuQgEAoGgxQnBRSAQCAQtTgguAoFAIGhxQnARCAQCQYsTgotAIBAIWpwQXAQCgeAvqKSkhClTpjBgwABGjx7NU089RVZWFuHh4YwYMYLhw4czbtw4MjMzAVNCy2eeeea2+oqMjGzJoQPCUmSBQCD4yzEajUyaNImRI0fyySefAJCWlkZZWRk+Pj7mnGNLly7lm2++4YMPPribw70q4cpFIBDcEq1ei8FouNvD+GtJXgaftIe3lab/Jy+7o+YOHDiAhYUFDzzwgPlY27ZtmxVIhOap+C9XWVnJxIkTiY+P57777iMtLQ2Auro6pk+fTnx8PPHx8c2yKoMpAeb999/Pzp0772j8IFy5CASCW7A3fy8fHPoAN2s3pnWZRpDDXyNJ4l2VvAzWTwbthUSVVbmm7wHC77utJtPT0wkLC7vqY+fPn2fEiBHU1dXR2NjIsmVXBrLPP/+cdu3a8eWXX7J//35effVV1q5dy5dffomNjY05/1hVVZX5OaWlpTz77LO8+OKL9OzZ87bGfTnhykUgENwUrV7LB4c
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Foundation\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 51,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Foundation\n",
2021-05-25 08:22:14 +02:00
"PConc 1282\n",
"CBlock 1242\n",
"BrkTil 310\n",
"Slab 48\n",
"Stone 11\n",
"Wood 5\n",
"Name: count, dtype: int64"
2021-05-25 08:22:14 +02:00
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 51,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"Foundation\"].value_counts()"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 52,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABFEAAAF6CAYAAADRSsf1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAADe1ElEQVR4nOzdd3xV9f3H8de5K5tAgCREhuIgoiiKKAoCYhEFtAju0brrrqNSK1qxAq1x1lIHP2e1bgQtWFGRjQxRBKFxQTBABhCybpI7z++P470kZN2EJDfA++nDxpx7z/l+7yU9ST58hmGapomIiIiIiIiIiDTIFu0NiIiIiIiIiIjsDxREERERERERERGJgIIoIiIiIiIiIiIRUBBFRERERERERCQCCqKIiIiIiIiIiERAQRQRERERERERkQgoiCICvP/++/Tp04f333+/Vde599576dOnD1u3bm3VdURE2gPdWw8MI0aMYMSIEW223tatW+nTpw/33ntvm60pcqDR/Xf/8o9//IM+ffqwcuXKFr92ffdU/dk1nyPaG5D9R58+fRp8/K9//Svjx49vo920T//4xz+YPn06//rXvzjllFOivZ2oC70f1cXExNCtWzdOO+00fve735Genl7rPK/XywcffMCnn37Kxo0bKS4uxul00rNnT04++WQmTJhAZmZmW70MkVale2vjdG+tW1PvlXXdkw3DICEhgSOPPJJzzz2Xiy++GIdDPx7KwUH338bp/ltTtO+jV155JatWrYr4+eeffz633nprq+zlYKbvktJk9f0f8eijj27jnex/7rrrLq6//nrS0tKivZU2dfLJJ3PyyScDsHv3bpYtW8Ybb7zBf//7X9555x169uwZfu7mzZu55ZZb+Omnn+jUqRODBw+mW7du+Hw+fvzxR9566y1ee+01/vnPf3LmmWdG6yWJtDjdW5vvYLy37su9svo92e/3k5+fz+eff85f/vIXvvrqKx5//PFovCSRqNH9t/kOxvsvRO8+ev7554fXDfnss8/Izs7mzDPPrPU1e/TRR5OWlsZHH31EUlJSq+zpYKQgijTZbbfdFu0t7LdSU1NJTU2N9jba3Mknn1zj68bn83H99dfzxRdf8Oyzz/LXv/4VgJ07d3LVVVeRn5/Pb3/7W+666y5iY2NrXGvXrl1Mnz6d0tLSNn0NIq1N99bmO9jurft6r9z7ngywfft2xowZw5w5c7jzzjvp3r17q74GkfZE99/mO9juvyHRuo/WlRm1bds2srOz+dWvflVv5tThhx/e4ns5mKknirQKr9fLjBkzOPfcczn++OM58cQTueyyy/joo49qPXflypX06dOHf/zjH3Veq65a7Op1nitWrODKK6/khBNO4MQTT+SGG27gp59+qvNaW7Zs4fbbb2fgwIH079+fSy65hIULF9b7OlasWMEDDzzA6NGjOfHEEznuuOMYO3Ys06dPx+Px1NpnKL3vN7/5DX369An/G9JQ7eFHH33E5ZdfzoABAzjuuOM499xzef755/F6vfW+JxUVFTzyyCMMHz6cY489lpEjRzJjxgxM06z3NbUHTqeTiy66CIB169aFjz/11FPk5+czduxY7rvvvlq/FAB07tyZBx98kDFjxtQ4XlhYyEMPPcSIESM49thjGTRoELfeeivffvttrWs09+unsrKSGTNmMH78eE444QROOOEEzjnnHKZMmcLOnTv35S0RiYjurbq3wr7dK+uTkZHBYYcdBkBRUVFE5zTl6zFk3bp13HHHHZx++ukce+yxDBkyhGuuuabBc0KCwSBTpkyhT58+3HrrrVRVVUW0T5GWoPuv7r+Naeg+Gnp95eXl/PWvf2XEiBEcc8wx9X6NhIQCM8ceeyyzZ89u1r7UZ6rlKRNFWpzX6+Xaa69l1apV9O7dm8suu4yqqirmzZvHnXfeSXZ2NnfddVeLrLVw4ULmz5/P6aefziWXXMJPP/3EokWLWL9+PXPnziUlJSX83JycHC6++GKKi4sZOnQoRx99NFu2bOGWW27h9NNPr/P6//d//8fmzZs54YQTGDZsGF6vl6+++op//OMfrFy5kldeeQW73Q5Y31zmz5/PqlWrOP/88znkkEMifh1PPPEEzz//PJ06dWLs2LHEx8ezZMkSnnjiCZYuXcqLL76Iy+WqcY7P5+Paa6+lsLCQoUOHYrfb+eyzz3j88cfxer37Tf2jYRgAVFVV8cEHHwBwyy23NHpe9fcjNzeXyy67jMLCQgYNGsSYMWPIy8vj448/ZuHChfzjH//gjDPOqHWNpnz9lJSU8Jvf/Ibs7GwOO+wwJkyYgNPpJDc3l5kzZzJy5Ei6dOmyr2+HSL10b9W9FfbtXtmQvLw8Nm/eTEJCAr179270+c35enznnXeYPHkyNpuNESNGcOihh7Jr1y6+/fZb3nzzTUaPHl3veh6Phz/84Q988sknXH755dx///3YbPq7QGkbuv/q/huJxu6jXq+X3/zmN5SUlDB48GASExMbzFbJzs7m+uuvx+12M2PGDE477bTW3L40gYIo0mR1RUwPOeSQcPrYyy+/zKpVqxg6dCjPPvtsuLHSrbfeyoUXXsjzzz/P8OHDOfHEE/d5L5999hkvvvgip556avjY448/zowZM5g5cybXX399+Phf/vIXiouLue+++/jtb39b4xr1/SA6efJkunfvHv5FP+Spp57i2WefZd68eeEf+q666irKysrC32gibb719ddf8/zzz9OtWzfeffddunbtCsDdd9/NrbfeyoIFC3jppZe48cYba5xXWFhIZmYmL7/8cvhvIW+99VZGjRrFK6+8wu9+9zucTmej669cubJJDapg39Ne/X4/b7/9NgDHHXccAN9++y1er5e0tLSIfoCvbvLkyRQWFnLHHXdw0003hY9fdtllXHHFFdx77718/vnnJCQk1DivqV8/2dnZXHLJJTz44IM1fnh3u90Eg8Em7Vlkb7q36t4ayb11X+6VIatWrQp/vfn9fgoLC/n8889xuVw8/PDDJCYmNnqNpn49/vjjjzz00EMkJiby73//myOPPLLG9fLz8+tdq7i4mJtuuomvv/6au+++mxtuuKFZr1ukPrr/6v7b1J9tm3Mf3bFjB0cccQSvv/468fHxDV5/+fLl3HbbbcTFxfHGG29ooEI7oyCKNNneHanBqgsMfaOZOXMmhmFw77331uhM3blzZ2666Sbuv/9+3n333Rb5RjN69Oga32QALrroImbMmMH69evDx/Lz81m2bBndu3fniiuuqPH8X/3qV5x88sl13mx79OhR57pXXXUVzz77LEuWLGnwb84iMXPmTABuuumm8DcZAIfDwR//+EcWLVrEu+++W+sbDcD9999fI427c+fOnHnmmcyePZvNmzdz1FFHNbr+qlWr6vwzbci+fKMpLi5m6dKl5OTk0KlTp3DQo7CwEKDOaT0Nyc/PZ+nSpWRkZHDdddfVeOzEE09kzJgxfPjhh3z66aeMGzeuxuORfv3s2rWLjz76iK5du/LHP/6x1t9+7h2cEWkO3Vt1b43k3trce2V1q1atqvXn4nA4uOCCC8KB7cY09evxzTffxO/3c/PNN9cKoDT0erZt28Z1111Hbm4uWVlZnHfeeZG+TJGI6f6r+29zfrZtzn303nvvbTSA8sEHHzBp0iR69uzJCy+8QEZGRpP2Jq1PQRRpsu+++67ex8rLy9myZQtpaWl1NjAaNGgQAP/73/9aZC/HHntsrWPdunUDrPKLkI0bNwIwYMCAcIpidfV9o6moqOBf//oXn376KTk5Objd7ho1maEfZvdFaG+h96a6ww47jPT0dLZu3UpZWVmNrtpJSUn06tWr1jmhH0Qjbbx62223tXpDterfaJxOJ926deOSSy7hxhtvDP95NVf1P9u6/nZi0KBBfPjhh2zcuLFWECXSr5/169cTDAYZOHBgo9/4RJpL91bdW9vKrbfeGt5bMBhkx44dfPbZZ/ztb39j/vz5vPvuuw3em5vz9bh27VqAeksM6rJ582YuvvhiKisr+b//+79av1iKtBTdf3X/barm3EdjYmIaHav9r3/9i/n
"text/plain": [
"<Figure size 1440x360 with 3 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plot = sns.lmplot(\n",
" x=\"Gr Liv Area\", y=\"SalePrice\", col=\"Foundation\", hue=\"Foundation\",\n",
" col_order=[\"PConc\", \"CBlock\", \"BrkTil\"],\n",
" data=df, robust=True, col_wrap=4, ci=None, truncate=True, scatter_kws={\"s\": 15},\n",
")\n",
"# Adjust font sizes.\n",
"for ax in plot.axes:\n",
" ax.set_title(ax.get_title(), fontsize=20)\n",
" ax.set_xlabel(ax.get_xlabel(), fontsize=16)\n",
" ax.set_ylabel(ax.get_ylabel(), fontsize=16)"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 53,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"foundation = pd.get_dummies(df[\"Foundation\"], prefix=\"found\", dtype=int)\n",
2021-05-25 08:22:14 +02:00
"# Only keep the top 3 realizations.\n",
"del foundation[\"found_Slab\"]\n",
"del foundation[\"found_Stone\"]\n",
"del foundation[\"found_Wood\"]\n",
"df = pd.concat([df, foundation], axis=1)\n",
"del df[\"Foundation\"]"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 54,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"new_variables.extend([\"found_BrkTil\", \"found_CBlock\", \"found_PConc\"])"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 55,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>found_BrkTil</th>\n",
" <th>found_CBlock</th>\n",
" <th>found_PConc</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" found_BrkTil found_CBlock found_PConc\n",
"Order PID \n",
"1 526301100 0 1 0\n",
"2 526350040 0 1 0\n",
"3 526351010 0 1 0\n",
"4 526353030 0 1 0\n",
"5 527105010 0 0 1"
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 55,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[foundation.columns].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Garage Type\n",
"\n",
"As can be expected, the *Garage Type* looks very similar to the above *has Garage* variable. Therefore, it is dropped."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 56,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEGCAYAAACpXNjrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAACpXklEQVR4nOyddXhU19aH3zMSd1cSElyDEwhQgpYAQYu0tKVChZZC3ZC6t7SXtrf96kaRAkFKkeDuxYKFhBCbuGf8fH/sZIITIIFLe97nmYeZPefsvedMOGv2Xmv9liTLsoyCgoKCgkIdorrVE1BQUFBQ+OehGBcFBQUFhTpHMS4KCgoKCnWOYlwUFBQUFOocxbgoKCgoKNQ5mls9gf8VunTpQnBw8K2ehoKCgsJtRUZGBjt37ryoXTEuVQQHB7No0aJbPQ0FBQWF24oRI0Zcsl3ZFlNQUFBQqHMU46KgoKCgUOcoxkVBQUFBoc5RfC4KCgr/CkwmE+np6ej1+ls9ldsSBwcHQkJC0Gq1tTpeMS4KCgr/CtLT03F1dSU8PBxJkm71dG4rZFkmPz+f9PR0GjZsWKtzlG0xBQWFfwV6vR5vb2/FsFwHkiTh7e19Tas+ZeWioKDwz0CW4eRqyD0B4TEQ3O6iQxTDcv1c67VTjIuCgsI/g9TNMHeMMDIuAfDwOnBXEqNvFYpxUVBQ+GdQmiUMC0BZNlQWXN64yDJc5pd4Xl4e77zzDgcOHMDd3R2tVstDDz1Ev3796mnitWPz5s18+OGHAKSlpeHn54eDgwNNmzbl/fffv6VzuxSKcVFQUPhnEB4DjfrD6XUQ8zT4NLn4GNkK5XlQkQ+OHuDsByp1zduyzOTJkxk2bBgfffQRIORN1q1bV+tpmM1mNJq6v7X26NGDHj16ADBhwgSef/55WrduXefj1BWKcVFQUPhn4BYMd/0I+mJwOd9o2DBVQkmGeF6aDVpncHCzvb1jxw60Wi3jxo2ztQUHBzNhwgRARJw9//zzVFZWAjB9+nTat2/Pzp07+fTTT3FzcyMlJYVVq1bx+OOPk52djcFg4N5772XMmDEALFiwgG+++QZXV1eaNWuGnZ0dM2bMoKCggJkzZ5KZmQnAyy+/TIcOHa74kbdv387PP//MF198AcDWrVv57bff+Pzzz2nXrh2jR49m69at+Pj48Mknn+Dl5UVaWhqvvfYahYWFODg48MYbbxAZGXl91/xKyAqyLMvy8OHDb/UUFBQU6pGjR4/KsqFcljP2y3LGPvHQl5x3zI8//ii/9dZbl+2joqJC1uv1sizLckpKiu2+sWPHDrlt27ZyWlqa7djCwkJZlmW5srJSjouLkwsKCuTs7Gy5d+/ecmFhoWw0GuVx48bJr732mizLsvz000/Lu3fvlmVZljMyMuSBAwdedh733HOPfPDgQdlqtcoDBgyQ8/PzbX0kJibKsizLTZo0kRMSEmRZluX//Oc/tnHuvfdeOSUlRZZlWT5w4IA8YcKEq167ao4ePXpR2+XuncrKRUFB4d+DnRN4hoOxDLROYOdyxcNfe+019u7di1ar5Y8//sBsNvP6669z7NgxVCoVqamptmNbt25NaGio7fXPP//MmjVrAMjKyuLMmTPk5eXRqVMnPDw8ABg4cKCtj23btnHq1Cnb+WVlZZSXl+Ps7HzZ+UmSRHx8PEuXLmXEiBHs37+f9957DwCVSsWgQYMAiI+P54knnqC8vJz9+/fz1FNP2fowGo1XvWzXg2JcFBQU/l04eojHJWjcuDGrV6+2vZ45cyYFBQWMGjUKgB9++AEfHx8SEhKwWq20adPGdqyTk5Pt+c6dO9m2bRvz5s3D0dGRCRMmYDAYrjgtq9XK/Pnzsbe3v6aPM2LECB577DHs7OwYOHDgZf09kiQhyzJubm4kJCRc0xjXg5JEqaCgoFBF165dMRgM/Pbbb7a2cxMHS0tL8fX1RaVSkZCQgMViuWQ/paWluLu74+joSHJyMgcOHADE6mb37t0UFxdjNpvPM2QxMTH8/PPPttdJSUm1mrO/vz9+fn58+eWXjBw50tZutVpZtWoVAMuWLaNDhw64uLgQEhLCypUrARHAcOzYsVqNc60oxkVBQUGhCkmS+Pzzz9m9ezexsbGMGjWKF154gWeffRaA8ePHs3jxYoYOHcrp06fPW62cS8+ePTGbzdx555189NFHREVFAcIQPPLII4wePZpx48YRHByMq6srAK+88gqHDx9myJAhDBo0iLlz59Z63kOGDCEwMPA8x7yTkxMHDx5k8ODB7Nixg8mTJwPwwQcfsHDhQoYOHUpcXBxr1669nkt1VSRZrg4M/3czYsQIpViYgsI/mKSkJJo3b36rp2Hzo5jNZp544glGjhx5wzk0r7/+Os2bN2f06NG2tnbt2rF///4bne55XOoaXu7eqfhcFBQUFG4ic+bMYdu2bRgMBmJiYujbt+8N9TdixAgcHR158cUX62iGdYNiXBQUFBRuIi+88EKd9ne5HZe6XrVcK4rPRUFBQUGhzqk343L69Gni4+Ntj/bt2/PDDz9QVFTExIkT6d+/PxMnTqS4uBgQUQtvvvkm/fr1Y8iQIRw5csTW1+LFi+nfvz/9+/dn8eLFtvZq51e/fv148803qXYfXW4MBQUFBYWbQ70Zl4iICBISEkhISGDRokU4OjrSr18/vv76a6Kjo1m9ejXR0dF8/fXXAGzatInU1FRWr17NG2+8waxZswBhKObMmcP8+fNZsGABc+bMsRmLWbNm8cYbb7B69WpSU1PZtGkTwGXHUFBQUFC4OdyUbbHt27cTGhpKcHAwiYmJDBs2DIBhw4bZwuCq2yVJIioqipKSEnJyctiyZQvdu3fHw8MDd3d3unfvzubNm8nJyaGsrIyoqCgkSWLYsGEkJiae19eFYygoKCgo3BxuikN/xYoVDB48GID8/Hz8/PwA8PX1JT8/HwCdTkdAQIDtnICAAHQ63UXt/v7+l2yvPv5KYygoKCjcSrKysnj++efJz89HkiTuuusuUlNT2bdvHyaT6bwywo899hgDBw68xTO+furduBiNRtatW8czzzxz0XuSJNV7ZbibMYaCgoJCbVCr1bz44ou0bNmSsrIyRo4cyeeff87MmTNJT0/n0UcfvSnSLDeDejcumzZtomXLlvj4+ADg7e1NTk4Ofn5+5OTk4OXlBYgVSXZ2tu287Oxs/P398ff3Z9euXbZ2nU5H586dL3v8lcZQUFBQqC1L9mfwwarjZBZVEuThyHMDmjKs3Y1VtvTz87Ptqri4uBAREYFOp6NRo0bnHff888/Tv39/Ww7MM888w5133klJSQlr1qyhrKwMnU7H0KFDeeKJJwBISEjg559/xmQy0bZtW2bOnAnUZP5LksTIkSO5//77b+gz1JZ697msWLGCuLg42+vY2FiWLFkCwJIlS+jTp8957bIsc+DAAVxdXfHz8yMmJoYtW7ZQXFxMcXExW7ZsISYmBj8/P1xcXDhw4ACyLF+yrwvHUFBQUKgNS/Zn8NKiQ2QUVSIDGUWVvLToEEv2Z9TZGOnp6SQlJdG2bduL3hs1apQtf6W0tJT9+/dzxx13AHDo0CE+++wzli5dyl9//cWhQ4dITk5m5cqVzJ07l4SEBFQqFcuWLSMpKQmdTsfy5ctZtmwZI0aMqLP5X416XblUVFSwbds2Xn/9dVvbpEmTmDp1KgsXLiQoKIjZs2cD0KtXLzZu3Ei/fv1wdHTk7bffBsDDw4PHH3/cpko6efJkm1z1zJkzeemll9Dr9fTs2ZOePXtecQwFBQWF2vDBquNUms4Xpaw0Wfhg1fEbXr2AkICZMmUKL7/8Mi4uF8v+d+7cmddee42CggJWrVrFgAEDbGrH3bp1w9PTE4B+/fqxd+9eNBoNhw8ftt0n9Xo93t7e9O7dm7Nnz/LGG2/Qq1cvYmJibnjutaVejYuTkxM7d+48r83T05Mff/zxomMlSbIt4y5k1KhRtot2Lq1bt2b58uUXtV9uDAUFBYXakFlUeU3t14LJZGLKlCkMGTKE/v37X/a46jotK1as4J133rG1X+hDrpbSHz58+CV92wk
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Garage Type\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 57,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"Garage Type\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Heating\n",
"\n",
"Most of the houses have gas. The variable is not helpful."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 58,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEGCAYAAACpXNjrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAACOoElEQVR4nO2dd1gUZ9eH76X3KiyI2LFhQWLHFhS7EVtMNTHFFBMTk5geNTExedPfaJpf6ptiYjexolixF5DYKwooIL3Xne+Pw7KgoKggap77uvbK7uzszDMrmbOn/Y5O0zQNhUKhUChqELO6XoBCoVAobj+UcVEoFApFjaOMi0KhUChqHGVcFAqFQlHjKOOiUCgUihrHoq4XcLPQtWtXfHx86noZCoVCcUsRHx/Pzp07L9mujEspPj4+LF68uK6XoVAoFLcUo0aNqnS7CospFAqFosZRxkWhUCgUNY4yLgqFQqGocVTORaFQKKpBUVERcXFx5Ofn1/VS6gQbGxsaNGiApaVltfZXxkWhUCiqQVxcHI6OjjRu3BidTlfXy7mhaJpGSkoKcXFxNGnSpFqfUWExhUKhqAb5+fm4u7v/6wwLgE6nw93d/aq8NuW5KBSK2wNNg+NhcOEYNO4JPh1r/BT/RsNi5GqvXRkXhUJxexCzBeaNEyPj4AWPrwdn1RhdV6iwmEKhuD3IOi+GBSA7AfJSq973Bo2x6tixove0ePFi3nnnnWs61uHDh9m0aVPZ6/DwcObOnXtd66tNlHFRKBS3B417QvMBYGYBvV+Gei0u3aekCHZ8DV91g43/gYKcG7/Oa+Ri49KvXz8mTpxYhyu6PCosplAobg+cfODunyE/Axw8wcz80n3ORcLqV+X5xlngEwh+ITd2naWkpqYyffp0zp07B8Drr7/OHXfcQXR0NO+99x4FBQXY2Ngwa9YsGjRowBdffEF+fj579+7liSeeID8/nwMHDjBt2jReffVVHBwcOHDgABcuXGDq1KkMGjQIg8HAO++8w44dO/D29sbCwoLRo0czaNCgWr8+ZVwUCsXtg5WdPKrCwkY8G0OxvLa0rdXl5OfnM2LEiLLXGRkZBAcHA/Dee+/x0EMP0alTJ86dO8ejjz7KqlWraNq0Kb/99hsWFhZs27aNzz77jNmzZzN58uQyYwJcooWYlJTE77//zqlTp3jqqacYNGgQYWFhxMfHs3LlSlJSUhgyZAijR4+u1Ws2ooyLQqH49+DdHu7+H8REQP1AaNijVk9nY2PDsmXLyl4vXryYAwcOALBt2zZOnDhR9l52djY5OTlkZWXxyiuvcObMGXQ6HUVFRdU6V//+/TEzM6N58+YkJycDsHfvXgYNGoSZmRkeHh507dq1Bq/u8ijjolAo/l20GiqPOsZgMDB//nysra0rbJ85cyZdu3blyy+/JC4ujvHjx1freFZWVrWxzGtGJfQVCoWiDujZsye//PJL2evDhw8DkJWVhV6vB2DJkiVl79vb25OTc3UFCIGBgYSFhWEwGEhOTmbXrl01sPLqoYyLQqFQ1AFvvPEGBw4cYPjw4QwZMoR58+YB8Nhjj/Hpp58SGhpKcXFx2f5du3blxIkTjBgxgpUrV1brHAMHDkSv1zNkyBCmTp1KmzZtcHR0rJXruRidpt2ggu+bnFGjRqlhYQqFokoOHz5M69at63oZV01OTg729vakpaUxduxY5s2bh4eHxzUdq7LvoKp7p8q5KBQKxW3Mk08+SWZmJkVFRTz99NPXbFiuFmVcFAqF4jamfF7nRqJyLgqFQqGocWrNuJw6dYoRI0aUPQIDA/npp59IT09nwoQJDBgwgAkTJpCRkQHIvIB3332XkJAQhg8fzsGDB8uOtWTJEgYMGMCAAQMqVE8Yk2EhISG8++67GNNHVZ1DoVAoFDeGWjMuTZs2ZdmyZSxbtozFixdja2tLSEgIc+fOpXv37oSFhdG9e/cy4bXNmzcTExNDWFgYM2fOZMaMGYAYijlz5jB//nwWLFjAnDlzyozFjBkzmDlzJmFhYcTExLB582aAKs+hUCgUihvDDQmLbd++HV9fX3x8fAgPDyc0NBSA0NBQ1q1bB1C2XafTERAQQGZmJklJSURERBAUFISLiwvOzs4EBQWxZcsWkpKSyM7OJiAgAJ1OR2hoKOHh4RWOdfE5FAqFQnFjuCEJ/RUrVjBs2DAAUlJS8PT0BMDDw4OUlBQAEhMT8fLyKvuMl5cXiYmJl2zX6/WVbjfuf7lzKBQKxa1M69atadHCpPb85ZdfEh8fzw8//MC3335bhyu7lFo3LoWFhaxfv54XX3zxkvd0Ol2tT3a7EedQKBSKG8HFWmUA8fHxNXLs4uJiLCxqziTUunHZvHkz/v7+1KtXDwB3d3eSkpLw9PQkKSkJNzc3QDyShISEss8lJCSg1+vR6/UVJAsSExPp0qVLlftf7hwKhUJxo1gaGc9Ha45yLj2P+i62TB3YktCOtTsZMz09nddff53Y2FhsbW155513aNWqVZXbZ8+ezdmzZ4mNjaV+/fp8+umnNbaWWs+5rFixgqFDTSJxwcHBLF26FIClS5fSr1+/Cts1TSMqKgpHR0c8PT3p2bMnERERZGRkkJGRQUREBD179sTT0xMHBweioqLQNK3SY118DoVCobgRLI2M57XF/xCfnocGxKfn8drif1gaeX1ehlHCf8SIEUyaNOmS92fPnk2bNm34+++/mTJlCq+88spltwOcPHmSn376qUYNC9Sy55Kbm8u2bdsqjPWcOHEizz//PAsXLqR+/fp8/vnnAPTp04dNmzYREhKCra0ts2bNAsDFxYWnn36aMWPGADBp0iRcXFwAmD59Oq+99hr5+fn07t2b3r17X/YcCoVCcSP4aM1R8opKKmzLKyrhozVHr8t7qSwsVp69e/cye/ZsALp37056ejrZ2dlVbgf5MW5jY3PNa6qKWjUudnZ27Ny5s8I2V1dXfv7550v21el0TJ8+vdLjjBkzpsy4lKddu3YsX778ku1VnUOhUChuBOfS865qe11ia1s7A9NUh75CoVDUMPVdKr9hV7W9pujUqRN//fUXADt37sTV1RUHB4cqt9cmyrgoFApFDTN1YEtsLc0rbLO1NGfqwJa1et5nnnmGgwcPMnz4cD755BM++OCDy26vTZTkfilKcl+hUFyOq5Xcr4tqsdpGSe4rFApFHRPa0eeWNybXgwqLKRQKhaLGUcZFoVAoFDWOMi4KhUKhqHGUcVEoFApFjaOMi0KhUChqHGVcFAqF4hYhOTmZF198kX79+jFq1CjGjRvH2rVrr+lYhw8fpmXLlmVDFmsaZVwUCoXiFkDTNCZNmkSnTp0IDw9n8eLFfPrppxXU4a+G5cuXc8cdd7BixYoaXqmgjItCobg6iovAYKjrVdz8RM+Hz9rCDBf5b/T86zrcjh07sLS05N577y3b5uPjw4MPPkhcXBz33XcfI0eOZOTIkezbtw+ApKQk7r//fkaMGMGwYcPYs2cPIIZq9erVfPDBB2zdupWCgoLrWltlqCZKhUJRfU6Ew6pXwNkXBs4CffU71v9VRM+HvydDUalQZUasvAZof/c1HfL48eO0adOm0vfc3d358ccfsba2JiYmhhdeeIHFixezfPlyevbsyVNPPUVJSQl5ebKeffv20aBBAxo2bEjXrl3ZuHEjAwcOvKZ1VYUyLgqFonoUF4lhSTkuj0NLlHGpivB3TIbFSFGebL9G43Ixb7/9Nnv37sXS0pKffvqJd955hyNHjmBmZkZMTAwgyvGvv/46xcXF9O/fv0y6pfycrSFDhrBs2TJlXBSKOiEzHlJOgmtTcGlQ16upG8zMxWNJOS6vbZzrdj03MxlxV7e9Gvj5+REWFlb2evr06aSmpjJmzBh++ukn6tWrx7JlyzAYDLRv3x6Azp078+uvv7Jp0yZeffVVJkyYwPDhwwkLCyM8PJxvvvkGTdPK5rvUpFKyyrkoFOVJPAgHl4ghMZIZD388AD8Ph9/GQNqZKx/HYICimo9j1ylmZhIK6/0yDJgF7e+p6xXdvDhX8QOkqu3VoFu3bhQUFPD777+XbcvPzwcgKys
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Heating\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 59,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"Heating\"]\n",
"# Also discard the associated ordinal variable.\n",
"del df[\"Heating QC\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### House Style\n",
"\n",
"In summary, this variable is very similar to the above derived variable *has 2nd Flr*. Therefore, it is dropped."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 60,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEGCAYAAACpXNjrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAACyaklEQVR4nOydd3hU1daH3+k1yUzapJJCqKGEDobeixCqYkHBgiKKihc7ioLlU+xcvXJFFL2KgnRQSuiCoRN6D6T3zKRMn/n+mDCAJBhIApbzPo+PmX322XvnZJg1e6+1fkvkdrvdCAgICAgI1CHiW70AAQEBAYG/H4JxERAQEBCocwTjIiAgICBQ5wjGRUBAQECgzhGMi4CAgIBAnSO91Qv4s9CpUyfCw8Nv9TIEBAQE/lJkZmaSkpJyVbtgXCoJDw9nyZIlt3oZAgICAn8pRo4cWWW7cCwmICAgIFDnCMZFQEBAQKDOEYyLgICAgECdI/hcBAQEBH6H3W4nIyMDi8Vyq5fyp0GpVBIREYFMJqtRf8G4CAgICPyOjIwMfHx8iI6ORiQS3erl3HLcbjeFhYVkZGQQExNTo3uEYzEBAQGB32GxWAgICBAMSyUikYiAgIDr2skJOxcBAYG/BW63m7ItW7GdO4u6fQdULVvUajzBsFzJ9T4PwbgICAj8LahISSFj0iRwu5EEBxP9w0LkoaG3eln/WIRjMQEBgb8F9rx8qCxP5czLw2U0Vtv3RstYtWnT5orXS5Ys4fXXX7+hsWrDgQMHGDNmDElJSQwaNIhPPvkEgJSUFPbt2/eH9z///PP88ssv9bpGYeciICDwt0DTqSOaHt0p/3UHARMfRh4be1Uft91O8cKFFP/wI76DBhIwYQJitfoWrLZ2PPfcc3z00Uc0bdoUp9PJuXPnANi1axdqtZq2bdve4hUKOxcBAYG/CTKDgYgPPyRuYzJBkycjlsuv6mM+coTcN97Edvo0BZ/MoXzPnjqbPyMjg/vuu4+hQ4dy//33k5WVBVy9S7i4+8nLy+Oee+4hKSmJ22+/nT2Va9m+fTt33nknI0aMYMqUKZSXl181V1FREUFBQQBIJBLi4uLIyMhg4cKFfPXVVyQlJbFnzx569+6N3W4HoKys7IrXFzl8+DD33nsvI0eO5MEHHyQvL69OnoewcxEQEPjbIFapEKtU1V4XKZQglYLD4emvVF7X+BaLhaSkJO9ro9FI7969AZg1axYjRoxgxIgRLF68mFmzZvHpp59WO9aqVavo2rUrkyZNwul0YjabKSoq4rPPPmP+/Pmo1Wrmzp3L/Pnzefzxx6+49/7772fgwIF07NiRbt26MWLECCIiIhg7dixqtZoHH3wQ8Ajybtmyhb59+7J69Wr69+9/RZ6K3W73rtPf3581a9bwwQcf8NZbb13Xc6kKwbgICAj8Y1A1a0r4Rx9SsWs3qhbxqNu3v677lUoly5cv975esmQJhw8fBmD//v1e30dSUhLvvvvuNcdq2bIlL774Ig6Hg759+9KsWTM2bdrE6dOnueuuuwDPh39CQsJV9z7++OMMGzaM7du3s2rVKlavXs0333xzVb/Ro0fzxRdf0LdvX5YsWcLMmTOvuH7u3DlOnjzJhAkTAHC5XN4dUW0RjIuAgMA/Ct8+ffDt0+emzSeRSHC5XIDnw/visVSHDh349ttv2bJlC88//zwTJkzA19eXxMRE3n///T8ct0GDBtx9993ccccddOnSheLi4qv6tGvXjtdee42UlBScTieNGze+4rrb7aZRo0b88MMPdfCbXongcxEQEBCoA9q0acPq1asBWLlyJe0rd0Xh4eEcOXIEgI0bN3qNS2ZmJoGBgdxxxx2MGTOGI0eOkJCQwL59+zh//jwAFRUVXmf95WzevNkb8Xb+/HnEYjG+vr5oNJqrfDTDhw/nmWeeqVIaPyYmhqKiIvbv3w94dkqnTp2qi8ch7FwEBAQE6oLp06fzwgsvMG/ePPz9/b1+izvuuIPHHnuMYcOG0a1bN9SV0Wm7du1i3rx5SKVS1Go1//d//+e9b+rUqdhsNgCeeuqpqyRXli9fzltvvYVSqUQikTB79mwkEgm9evViypQpJCcnM336dNq3b8/QoUP58MMPuf32269as1wu5+OPP2bWrFmUlpbidDq5//77adSoUa2fh8h9owHffzNGjhwpFAsTEBAA4NixYzRr1uxWL6NO+OWXX0hOTv5DH1BNqOq5VPfZKexcBAQEBP6mzJw5k61btzJ37tybPrdgXAQEBAT+pkyfPv2WzS049AUEBAQE6px6My5nz54lKSnJ+1/btm356quvKCkpYcKECfTv358JEyZgrNT/cbvdzJo1i379+jF06FBvdAXA0qVL6d+/P/3792fp0qXe9sOHDzN06FD69evHrFmzvNET1c0hICAgIHBzqDfjEhsby/Lly1m+fDlLlixBpVLRr18/5s6dS5cuXVi3bh1dunTxngVu3bqVtLQ01q1bx8yZM5kxYwbgMRRz5szhxx9/ZNGiRcyZM8drLGbMmMHMmTNZt24daWlpbN26FaDaOQQEBAQEbg435Vhs586dREZGEh4eTnJyMsOHDwc88dcbNmwA8LaLRCISEhIwmUzk5eWxfft2EhMT0el0+Pn5kZiYyLZt28jLy6OsrIyEhAREIhHDhw8nOTn5irF+P4eAgICAwM3hphiX1atXe2OsCwsLCQ4OBiAoKIjCwkIAcnNzCQkJ8d4TEhJCbm7uVe0Gg6HK9ov9rzWHgICAwF+FF154gS5dulyRn1Jbqf2bSb0bF5vNxsaNGxk4cOBV10QiUb1Xe7sZcwgICAjUNSNHjuSLL764ou25555j5syZLF++nFWrVjFo0CDAk5B5Mcu+pjgqxTvri3oPRd66dSvx8fEEBgYCEBAQQF5eHsHBweTl5eHv7w94diQ5OTne+3JycjAYDBgMBnbt2uVtz83NpWPHjtX2v9YcAgICAvXBsv2ZvLv2BFklZsJ0KqYNaMLwNuG1GrNDhw5kZGRc0XYtqX2xWMyKFSuYPn06ISEhvPjiixQXF3uz/sPCwnj++eeRy+UcO3aMtm3bsmnTJhYuXIi/vz8ul4sBAwbwww8/1MlnZr3vXFavXs2QIUO8r3v37s2yZcsAWLZsGX0qBeQutrvdbg4cOICPjw/BwcF07dqV7du3YzQaMRqNbN++na5duxIcHIxWq+XAgQO43e4qx/r9HAICAgJ1zbL9mbyw5BCZJWbcQGaJmReWHGLZ/sw6n+ui1P7kyZNZuHAhVqvVK7U/fvx4li9fTvv27b3y/ytXrmTo0KHMmjXLO0Zubi4LFy7khRdeYNiwYaxYsQKAHTt20LRp0zr7Ml6vxqWiooIdO3bQv39/b9vEiRP59ddf6d+/Pzt27GDixIkA9OjRg8jISPr168f06dN59dVXAdDpdDz22GOMHj2a0aNHM3nyZHQ6HQCvvvoqL7/8Mv369aNBgwZ07979mnMICAgI1DXvrj2B2e68os1sd/Lu2hN1Ptfjjz/OTz/9RGJiIqtWreKhhx6qst/+/fu9vpqkpCT27t3rvTZw4EAkEgkAo0aN8pYQ+Omnn6oUt7xR6vVYTK1Wk5KSckWbXq/n66+/vqqvSCTyGpTfc9Gw/J6WLVuyatWqq9qrm0NAQECgrskqMV9Xe22pidT+tVBdVkwtNDSUgIAAdu7cSWpqKrNnz66zdQoZ+gICAgK1IExXdeXL6tprQ02l9quT/6+KMWPGMG3atCt2NHWBYFwEBAQEasG0AU1Qya78UFbJJEwb0KRW406dOpWxY8dy7tw5unfvzqJFi1i+fDkDBw4kKSmJadOmXSG1v379epKSktizZw/Tp09nyZIlDB06lOXLl/PSSy9VO0/v3r2pqKio0yMxEIQrBQQEBGrFxaiwuo4Wq6oa5ZgxY6rsGxMTw8qVK69oW7BgwVX93n777avajh8/TtOmTWnYsOENrrRqBOMiICAgUEuGtwmvtTG5FcydO5fvv/++Tmq9/B7BuAgICAj8Q5k4cWK9RdMKPhcBAQEBgTpHMC4CAgICAnWOYFwEBAQEBOocwbg
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"House Style\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 61,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"House Style\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Land Contour\n",
"\n",
"This variable is assumed to contain the same information as the ordinal variable *Land Slope* and is dropped."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 62,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEGCAYAAACpXNjrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAACnZUlEQVR4nOzdd3hUVfrA8e/UzGQymUmbSe8JgRRC78VAQCmCgL1ib6ti77rW/alrWdl1ZXUtu64KSBFBQTpIryEhlJCE9Emdlkyf+f0xMIAUQRMRvZ/n8TG5c+89h5R5c+45531FPp/Ph0AgEAgEnUh8vjsgEAgEgt8fIbgIBAKBoNMJwUUgEAgEnU4ILgKBQCDodEJwEQgEAkGnk57vDvxWDBgwgLi4uPPdDYFAILig1NbWsnnz5pOOC8HliLi4OObNm3e+uyEQCAQXlClTppzyuPBYTCAQCASdTgguAoFAIOh0QnARCAQCQacT5lwEAsEfmsvloqamBrvdfr678pumUCiIj49HJpOd1flCcBEIBH9oNTU1qNVqkpOTEYlE57s7v0k+n4+WlhZqampISUk5q2uEx2ICgeAPzW63ExERIQSWMxCJRERERJzT6E4YuQgEgt8Fn8/Hqv2NlDVaGZgSQV6C9qyvFQLLTzvXr5EQXAQCwe/CxvIWbvlkGz4f6NRBLLxnCDFa5fnu1h+W8FhMIBD8LjSa7RytTtVocWDscJ323DOVserVq1dnd413332XDz/88JSvLViwgAkTJjBx4kQmT5582vN+SmlpKWvWrPkl3exUQnARCAS/CwNTI7ioWxRSsYj7CtJJ1alOOsfl8fLRDxWMeWst7yw/QIfDfR56esyaNWv45JNP+PDDD1m0aBGzZ89GrVb/rHt1ZnDx+Xx4vd5fdA/hsZhAIPhdiNYo+ce1vTHb3ESqg5CIT54j2FNj4s+L9gLw1vKD9EzQoj+Le69cuZL33nsPl8uFVqvljTfeIDIyknfffZe6ujpqamqoq6vjxhtv5IYbbgDgvffeY8GCBYSHhxMTE0N2dvZJ9501axaPPvooer2/F3K5nCuuuALwB4vnnnsOm81GYmIir7zyChqNhuuvv568vDw2b96MxWLh5ZdfJi8vj7/97W/Y7Xa2b9/OHXfcweDBg3nyySeprq5GqVTywgsvkJWVxbvvvktwcDC33HILABMmTOCf//wnALfccgs9e/akpKSEWbNm/aJ8i8LIRSAQ/G4o5VL0GsUpAwuAQiZGetxrCpnkrO7bp08fZs+ezYIFCxg/fjwffPBB4LWKigo+/PBD5syZw9///ndcLhfFxcUsWbKEBQsW8K9//Ys9e/ac8r4HDx4kJyfnlK89+uijPPzwwyxatIjMzExmzpwZeM3j8TB37lyefPJJZs6ciVwu57777mPcuHEsXLiQcePG8e6779KjRw8WLVrEjBkzeOyxx37y33n48GGuueYaFi9e/IsT+QojF4FA8IfRI1bDP67tzebyVnLjNfRPDmf//safvK6hoYEZM2bQ1NSE0+kkPj4+8NqIESOQy+WEh4cTHh5OS0sL27ZtY/To0SiV/gUFBQUF59RPi8WCxWKhf//+AFx22WXcf//9gdcLCwsByM7Opra29pT32L59O++++y4AgwYNwmg0YrVaz9hubGws+fn559TX0xGCi0Ag+EMZkx3NmOzoc7rmpZde4qabbmLUqFFs3rz5hFGEXC4PfCyRSHC7z34eJz09neLiYgYNGnRO/TnaplgsxuPxnNO1EonkhPkUh8MR+Dg4OPic7nUmwmMxgUAg+AkWiyUwL7JgwYKfPL9fv34sX74cu92O1Wpl1apVpzzvjjvu4PXXX6epqQkAp9PJnDlzUKvVhIaGsm3bNgAWLlxIv379ztimSqWivb098Hnfvn35+uuvAdi8eTNhYWGEhIQQFxfH3r3+eaeSkhJqamp+8t/zcwgjF4FAIDiOzWZj+PDhgc+nT5/Ovffey/33349Go2HAgAE/+YacnZ3NuHHjmDRpEuHh4eTm5p7yvBEjRtDc3Mz06dPx+XyIRCKmTp0KwP/93/8FJvQTEhJ49dVXz9jmgAEDmDVrFpMmTeKOO+7g3nvv5cknn2TixIkolUr+8pe/ADB27FgWLlzI+PHjycvLIzk5+Ry+OmdP5DvTgu8/kClTpgjFwgSCP6DS0lK6d+9+vrtxQTjV1+p0753CYzGBQCAQdDohuAgEAoGg0wnBRSAQCASdrsuCS3l5OZMmTQr817t3bz7++GOMRiPTp09nzJgxTJ8+HZPJBPjTDbz00ksUFhYyceJESkpKAveaP38+Y8aMYcyYMcyfPz9wvLi4mIkTJ1JYWMhLL70UyBd0ujYEAoFA8OvosuCSmprKwoULWbhwIfPmzUOpVFJYWMisWbMYNGgQy5YtY9CgQcyaNQuAtWvXUllZybJly3jxxRd5/vnnAX+gmDlzJrNnz2bOnDnMnDkzECyef/55XnzxRZYtW0ZlZSVr164FOG0bAoFAIPh1/CqPxTZu3EhCQgJxcXGsWLGCyZMnAzB58mSWL18OEDguEonIz8/HbDbT2NjI+vXrGTJkCFqtFo1Gw5AhQ1i3bh2NjY1YrVby8/MRiURMnjyZFStWnHCvH7chEAgEgl/HrxJcFi9ezIQJEwBoaWlBp9MBEBUVRUtLCwAGg4Ho6GO7ZqOjozEYDCcd1+v1pzx+9PwztSEQCAQXqnMpBfD444/z3XffdWFvflqXBxen08nKlSu5+OKLT3pNJBJ1eQW4X6MNgUAgEJyoy4PL2rVryc7OJjIyEoCIiAgaG/2J4hobGwkPDwf8I5KGhobAdQ0NDej1+pOOGwyGUx4/ev6Z2hAIBIJfw4KdtQz5y0pSHl/MkL+sZMHOUyeX/CUsFgsXXXRRIE9YR0cHI0aMwOU6fZG0X1OXB5fFixczfvz4wOcFBQWB3DwLFixg1KhRJxz3+Xzs2rULtVqNTqdj6NChrF+/HpPJhMlkYv369QwdOhSdTkdISAi7du3C5/Od8l4/bkMgEAi62oKdtTwxbw+1Rhs+oNZo44l5ezo9wKjVarKystiyZQsAq1evZujQochksk5t5+fq0uDS0dHBhg0bGDNmTODY7bffzg8//MCYMWPYsGEDt99+O+DPsZOQkEBhYSHPPPMMzz33HABarZa7776badOmMW3aNO655x60Wi0Azz33HE8//TSFhYUkJiYG8gGdrg2BQCDoaq8v3Y/NdWKmYpvLw+tL93d6W+PGjWPJkiWA/w/5cePGdXobP1eXJq4MDg5m8+bNJxwLCwvjk08+OelckUgUCCg/djSw/Fhubi7ffPPNScdP14ZAIBB0tTqj7ZyO/xIFBQW89dZbGI1GSkpKGDhwYKe38XMJO/QFAoGgE8Vqled0/JdQqVTk5OTw8ssvM3LkSCSSs6us+WsQgotAIBB0okfGdkP5o/LJSpmER8Z2+0X3PVoK4Oh/H330EeB/NPb111//ph6JgVDPRSAQCDrV5F7+2vOvL91PndFGrFbJI2O7BY7/XPv27Tvl8Ysvvpj9+0+czzlau+V8EoKLQCAQdLLJveJ+cTC50AmPxQQCgUDQ6YTgIhAIBIJOJwQXgUAgEHQ6IbgIBAKBoNMJwUUgEAgEnU4ILgKBQHAB6N69O5MmTeLSSy/lsssuY8eOHT95TUFBAa2trb9C704mLEUWCASCC4BCoWDhwoUArFu3jjfffJP//ve/57lXpyeMXAQCwTlxub14vb7z3Y3ftqLZ8FYOPK/1/79odqfe3mq1EhoaCsDmzZu5/vrrue+++7j44ot56KGH8PlO/P7Y7XZuvfVWZs/u3H6ciTByEQgEZ23tgSb+vKiEWK2SZyb0IFOvPt9d+u0pmg2L7gPXkUSVpmr/5wB5V/zs29rtdiZNmoTD4aCpqemE5Lx79+5l8eLF6HQ6rr76arZv307fvn0Bf3b6Bx98kMmTJwfKv/8ahJGLQCA4Ky63lz8vKuFQUzvrDjazuKj+fHfpt2nFC8cCy1Eum//4L3D0sdh3333HBx98wGOPPRYYoeTl5REdHY1YLCYrK4va2mO
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Land Contour\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 63,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"Land Contour\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Lot Configuration\n",
"\n",
"This variable shows no good pattern and is dropped."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 64,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEGCAYAAACpXNjrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAACq0ElEQVR4nOzdd3yT1f7A8U9mk440neneLS2lUDZlr7KngOIWB17HdV4cqICK+vNevKCiV1GvFxcONoKCguw9S2kZ3TvdTZNmJ78/AgFkiNoK6PN+vXzZPEmec5rQfHOec873K3I6nU4EAoFAIGhF4qvdAYFAIBD8+QjBRSAQCAStTgguAoFAIGh1QnARCAQCQasTgotAIBAIWp30anfgWtGzZ0/Cw8OvdjcEAoHgulJeXs6ePXsuOC4El9PCw8NZvnz51e6GQCAQXFduuOGGix4XLosJBAKBoNUJwUUgEAgErU4ILgKBQCBodcKcy2VYrVbKysowmUxXuyvXDYVCQUREBDKZ7Gp3RSAQXEVCcLmMsrIyfHx8iImJQSQSXe3uXPOcTid1dXWUlZURGxt7tbsjEAiuIuGy2GWYTCYCAgKEwHKFRCIRAQEBwkhPIBAII5dfIgSWX0d4vQRXjdMJpzZAzUmI6Qvhna92j/7ShOAiEAj+HIq2wZKbXEHGOwTu2wS+wsboq0W4LNYGOne+8m9My5cvR6vVXvL+jz76iBEjRjB+/HgmTZrEypUrf1OfLBYLd911F+PHj2fdunU899xz5OXl/aZzCQTXpOZKV2AB0FeBsf7SjxXKWLU5YeRyla1YsYLExEQ0Gs0F9y1ZsoSdO3eydOlSvL290ev1/PDDD7+pnZycHABWrVoFwKhRo357pwWCa1FMX0gYBgWboO8TEJh04WPsVtj3IRz4H6TeABkPg4fXH97VvwIhuPxBcnNzmT17NkajkaioKF599VV27dpFdnY2//jHP1AoFHz11VcoFAr3c95//30+/fRTvL29AfD29mbixIkA7Nq1i9dffx273U6HDh148cUXkcvlDB48mAkTJvDTTz9hs9lYsGABarWaGTNmUF9fz/jx43n77bd57rnneOqpp0hLS+Obb77hww8/xMfHh+TkZORyObNmzboqr5NA8JupwuHGxWBqAu9gEEsufEzFIfj+GdfPm1+F8C6QmPnH9vMvQrgs9gd56qmn+Mc//sGaNWtISkpi4cKFjBgxgg4dOjBv3jxWrVp1XmDR6/UYDAYiIyMvOJfZbOaZZ55h/vz5rFmzBrvdzhdffOG+38/PjxUrVjB16lT++9//EhAQwNy5c+nWrRurVq0iKirK/VitVst//vMfvvrqK5YsWUJBQUHbvhACQVuSe4Iq9OKBBUCqAPE536llyj+mX39BQnD5AzQ3N9Pc3EyPHj0AmDhxIvv37//N5yssLCQiIsK9l+Tn5xs2bBgAHTp0oLy8/LLnOnr0KN27d0etViOTyRgxYsRv7pdAcM0L7Qg3fgK9HoQbPoSo3le7R39awmWxa5S3tzeenp6UlpZedPRyOWd2x4vFYux2e1t0TyC4fiWPdv0naFPCyOUP4OPjg0qlco8uVq1aRffu3QHw8vLCYDBc9HnTp0/nxRdfRK/XA2AwGFi5ciWxsbGUl5dTXFx8wfl+rbS0NPbt20dTUxM2m40NGzb8pvMIBALBuYSRSxswGo3079/ffXvatGm8/vrr7gn9yMhIXnvtNcB1SWv27NkXndC/5ZZbaGlpYdKkSchkMqRSKdOmTcPDw4PXXnuNRx991D2hf/PNN/+mvmo0Gu6//36mTJmCr68vcXFx+Pj4/L4XQCAQ/OWJnE5hwTe4Ct78vFhYbm4uKSkpV6lHfxyDwYCXlxc2m42HH36YSZMmkZn521fQ/FVeN4FAcPHPThBGLgJg4cKF7Ny5E7PZTN++fRk6dOjV7pJAILjOCcFFwNNPP321uyAQCP5khAl9gUAgELS6NgsuBQUFjB8/3v1fly5d+N///kdjYyPTpk1j2LBhTJs2jaamJsBVC2Tu3LlkZmYyduxYjh075j7XihUrGDZsGMOGDWPFihXu49nZ2YwdO5bMzEzmzp3LmemjS7UhEAgEgj9GmwWXuLg4Vq1axapVq1i+fDlKpZLMzEwWLVpERkYGGzZsICMjg0WLFgGwdetWioqK2LBhAy+//DJz5swBXIFi4cKFfP3113zzzTcsXLjQHSzmzJnDyy+/zIYNGygqKmLr1q0Al2xDIBAIBH+MP+Sy2K5du4iMjCQ8PJyNGzcyYcIEACZMmMCPP/4I4D4uEolIT09Hp9NRXV3N9u3b6dOnD2q1Gl9fX/r06cO2bduorq5Gr9eTnp6OSCRiwoQJbNy48bxz/bwNgUAgEPwx/pDgsnbtWsaMGQNAXV0dwcHBAAQFBVFXVwe4clyFhIS4nxMSEoJWq73guEajuejxM4+/XBvXo1+Tvv9cS5YsuWh6/rKyMvd7IRAIBG2lzVeLWSwWNm3axJNPPnnBfSKRqM0rF/4RbVyLfuumSoFAIGgNbR5ctm7dSmpqKoGBgQAEBARQXV1NcHAw1dXV+Pv7A64RSVVVlft5VVVVaDQaNBoNe/fudR/XarX06NHjko+/XBttbeWhcv61/gQVjUbC1EpmDG/HhM6tUwlvz549LFy4ED8/P06ePElqairz5s1DJBIxb948Nm3ahEQioW/fvjz99NO8/fbbeHp6cs8995Cdnc3MmTMB6NOnj/ucdrudefPmsXfvXiwWC7feeitTp05tlf4KBIK/tja/LLZ27VpGjz6bJG7w4MHuyzUrV65kyJAh5x13Op0cPnwYHx8fgoOD6du3L9u3b6epqYmmpia2b99O3759CQ4Oxtvbm8OHD+N0Oi96rp+30ZZWHirn2eVHKW804gTKG408u/woKw9dPivxr5GTk8PMmTNZt24dZWVlHDhwgIaGBn744QfWrl3LmjVreOCBBy543rPPPssLL7zA6tWrzzu+dOlSfHx8WLZsGcuWLePrr7+mtLS01forEAj+uto0uLS0tLBz5053CnhwJWPcsWMHw4YNY+fOnUyfPh2AAQMGEBkZSWZmJi+88AKzZ88GQK1W8+CDDzJ58mQmT57MQw89hFqtBmD27Nk8//zzZGZmEhUV5c7ndak22tK/1p/AaD0/A7HRaudf60+0WhsdO3YkJCQEsVhMcnIy5eXl+Pj44OHhwcyZM9mwYcN5uckAdDodzc3N7sSW48ePd9+3Y8cOVq1axfjx45kyZQqNjY3uZJgCgUDwe7TpZTFPT0/27Nlz3jE/Pz8WL158wWNFIpE7oPzcmcDyc2lpaXz77bcXHL9UG22potH4q47/FnK53P2zRCLBbrcjlUpZunQpu3bt4vvvv+ezzz7jk08+uaLzOZ1Onn/+efr169dqfRQIBAIQdui3mjD1xSvaXep4azEYDDQ3NzNgwABmzpzJiRPnj5RUKhU+Pj7udP9r1qxx39e3b1+WLFmC1WoFXEXIWlpa2rS/AoHgr0HILdZKZgxvx7PLj553aUwpkzBjeLs2bddgMPDggw9iNpsBeOaZZy54zGuvvcbMmTMRiUTnTehPmTKF8vJybrjhBpxOJ35+frz77rtt2l+BQPDXIKTcP601Uu635Wqx64mQcl8g+OsQUu7/ASZ0Dv9LBhOBQCD4OWHORSAQCAStTgguAoFAIGh1QnARCAQCQasTgotAIBAIWp0QXAQCgUDQ6oTVYte4mpoaXn31VY4ePYpKpSIgIICZM2cSGxt7tbsmEAgElyQEl2uY0+nk4YcfZsKECcyfPx+A48ePU1dX94vBxWazIZX+vre3Nc4hEAj+moRPjtaU9TVsfAmaysA3AobMgo43/ubT7d69G6lUel5tluTkZJxOJ6+//jrbtm1DJBLxwAMPMGrUKPbs2cObb76JSqWisLCQl1566ZJp+rOzs/m///s/Wlpa8PPz47XXXiM4OJjbb7+d5ORkDhw4wJgxY7j77rtb45UR/IlY7VYkYglikXBVXXBpQnBpLVlfw5pHwHo6UWVTqes2/OYAc+rUKVJTUy84vmHDBo4fP86
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Lot Config\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 65,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"Lot Config\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### MS SubClass\n",
"\n",
"By looking at this variable's realizations, one can see that several distinct features are lumped together in one. In particular, the above variables *has 2nd Flr* and *build_type_\\** and the future age related features at the bottom of this notebook together should comprise the same patterns in a more advantagous way. Thus, the column is dropped."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 66,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['1-STORY 1946 & NEWER ALL STYLES',\n",
" '1-STORY 1945 & OLDER',\n",
" '1-STORY W/FINISHED ATTIC ALL AGES',\n",
" '1-1/2 STORY - UNFINISHED ALL AGES',\n",
" '1-1/2 STORY FINISHED ALL AGES',\n",
" '2-STORY 1946 & NEWER',\n",
" '2-STORY 1945 & OLDER',\n",
" '2-1/2 STORY ALL AGES',\n",
" 'SPLIT OR MULTI-LEVEL',\n",
" 'SPLIT FOYER',\n",
" 'DUPLEX - ALL STYLES AND AGES',\n",
" '1-STORY PUD (Planned Unit Development) - 1946 & NEWER',\n",
" '1-1/2 STORY PUD - ALL AGES',\n",
" '2-STORY PUD - 1946 & NEWER',\n",
" 'PUD - MULTILEVEL - INCL SPLIT LEV/FOYER',\n",
" '2 FAMILY CONVERSION - ALL STYLES AND AGES']"
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 66,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(ALL_COLUMNS[\"MS SubClass\"][\"lookups\"].values())"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 67,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"MS SubClass\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### MS Zoning\n",
"\n",
"This variable is dropped as most houses are located in a \"residential\" zone."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 68,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"MS Zoning\n",
2021-05-25 08:22:14 +02:00
"RL 2252\n",
"RM 459\n",
"FV 131\n",
"RH 27\n",
"C 25\n",
"A 2\n",
"I 2\n",
"RP 0\n",
"Name: count, dtype: int64"
2021-05-25 08:22:14 +02:00
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 68,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"MS Zoning\"].value_counts()"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 69,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEGCAYAAACpXNjrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAACpKklEQVR4nOzdd3hUVfrA8e+UTDLpfSa990ICgSQEEggGkBApoq4orth21f2566q7a3dtu+vqqrtusfddC1KkSAs1dAIhnfSeTHqfSab9/pgwEAkIkoDo/TyPj8mdO+ecTIa8c+85531FRqPRiEAgEAgE40h8pQcgEAgEgh8fIbgIBAKBYNwJwUUgEAgE404ILgKBQCAYd0JwEQgEAsG4k17pAfxQJCYm4uXldaWHIRAIBFeVxsZGDh06dNZxIbiM8PLyYvXq1Vd6GAKBQHBVWbp06ZjHhdtiAoFAIBh3QnARCAQCwbgTgotAIBAIxp0w5yIQCATfg1arpaGhAY1Gc6WHcllYWVnh7e2NhYXFBZ0vBBeBQCD4HhoaGrCzs8Pf3x+RSHSlhzOhjEYjHR0dNDQ0EBAQcEHPEW6LCQQCwfeg0WhwcXH50QcWAJFIhIuLy0VdpQlXLgKB4EfBaDRSffwonY31eEfGoAwKmfA+fwqB5ZSL/VmF4CIQCH4U6osKWPPSs2A0YuPkzPIX/oa9i+uVHtZPlnBbTCAQ/CgMdHXASHmqga5ONH195zx3ospYhYWF8fDDD5u/1+l0JCUl8Ytf/AKA9vZ2fvGLX3DdddexYMEC7r777rPa2LZtG4sWLRr1X3h4OLt37/5eY3r88cepqKj4fj/QJRCuXAQCwY+Cd2QMAfEJ1OYfZ9qiG3D28j7rHL1Ox4mtm8jfvpmw6TOYkrUUmaXVuI3B2tqa8vJyNBoNVlZW7Nu3D4VCYX7873//O9OnT+fnP/85AKWlpWe1kZGRQUZGhvn7zz//nPXr1zNz5szvNaYXXnjhez3vUglXLgKB4EfBzsWVrAcf5e5/vkfyDTcjHWPJrKqqgp0fvkVHYx37v/wvDSWF4z6OtLQ0du3aBcDGjRvJzMw0P9ba2opSqTR/Hx4eft62qqur+ec//8lLL72EWCzGaDTyl7/8hYULF5KVlcWmTZsAOHToECtWrOCBBx5g/vz5PPTQQ+arsxUrVlBQUABAfHw8r776Ktdddx033ngj7e3tANTV1XHjjTeSlZXFq6++Snx8/CW/DkJwEQgEPxoWlpbYOrkgFkvGfFwqkyGWnH7MQmY57mNYsGABmzZtYmhoiJMnTzJp0iTzY7fccguPP/44K1as4N///jcqleqc7Wi1Wh566CH+8Ic/4OnpCcDWrVspLS1l3bp1vP/++7z00ku0trYCUFxczGOPPcamTZtoaGggNzf3rDYHBweZNGkSX3/9NQkJCXzxxReA6ermtttuY/369aOC36UQgotAIPjJcPcPJOvBR5m8YBEL/u9hvMOjxr2P8PBwGhoa2LBhA2lpaaMemzlzJtu3b+fGG2+kqqqKJUuW0NnZOWY7r7/+OiEhISxYsMB8LDc3l8zMTCQSCa6urkydOtV8VRIbG4tSqUQsFhMeHk5jY+NZbVpYWDB79mwAoqOjzefk5eUxf/58ALKysi79RUCYcxEIBD8xwVOTCJ6aNKF9pKen89JLL/HRRx/R3d096jFHR0eysrLIysriF7/4BUeOHGHevHmjzjl06BBbt269qEztMpnM/LVEIkGv1591joWFhXlJsVgsHvOc8SJcuQgEAsE4W7ZsGffffz9hYWGjjh84cAC1Wg1Af38/dXV1eHh4jDqnp6eHRx99lL/85S/Y2tqOeiwhIYFvvvkGvV5PZ2cnR48eJTY29pLHO2nSJLZu3QqY5onGg3DlIhAIBONMqVRy2223nXW8qKiI5557DolEgtFo5IYbbjgrOHz22Wd0dnbyzDPPjDr+i1/8gmuvvZbjx4+zaNEiRCIRjzzyCG5ublRVVV3SeB977DEeeeQR/v3vfzNz5syzgtr3ITJO1ILvq8zSpUuFYmECgeCClZSUEBERcaWHMS7UajVWVlaIRCI2btzIhg0b+Pe//33WeWP9zOf62ylcuQgEAsFPXFFREc8++yxGoxF7e3tefPHFS25TCC4CgUDwE5eQkMDXX389rm0KE/oCgUAgGHcTFlyqqqpG5caZPHkyH3zwAd3d3axcuZK5c+eycuVKenp6AFOun+eff56MjAyysrIoKioyt7VmzRrmzp3L3LlzWbNmjfl4YWEhWVlZZGRk8Pzzz5t3pJ6rD4FAIBBcHhMWXAIDA1m3bh3r1q1j9erVyOVyMjIyeOutt0hOTmbr1q0kJyfz1ltvAbBnzx5qamrYunUrzz33nHmlRHd3N2+88QZffPEFX375JW+88YY5WDzzzDM899xzbN26lZqaGvbs2QNwzj4EAoFAcHlclttiBw4cwMfHBy8vL7Kzs1m8eDEAixcvZvv27QDm4yKRiLi4OHp7e2ltbSUnJ4eUlBQcHR1xcHAgJSWFvXv30traSn9/P3FxcYhEIhYvXkx2dvaotr7dh0AgEAguj8sSXDZu3MjChQsB6OjowN3dHQA3Nzc6OjoAUKlUo3LaKJVKVCrVWccVCsWYx0+df74+BAKB4Kdg+/bthIWFUVlZecXGMOHBZXh4mB07dpjz1pxJJBJNeCW3y9GHQCAQ/JBs2LCBKVOmjNtu++9jwoPLnj17iIqKwtXVVBHOxcXFnMWztbUVZ2dnwHRF0tLSYn5eS0sLCoXirOMqlWrM46fOP18fAoFA8EOx9ngjKX/eQcAfNpLy5x2sPX52osnvY2BggNzcXF544YUfd3D5dj2D9PR01q5dC8DatWuZM2fOqONGo5G8vDzs7Oxwd3dnxowZ5OTk0NPTQ09PDzk5OcyYMQN3d3dsbW3Jy8vDaDSO2da3+xAIBIIfgrXHG3l0dQGN3WqMQGO3mkdXF4xLgMnOzmbmzJkEBATg5OREYeH416y5EBMaXAYHB9m/fz9z5841H7vnnnvYt28fc+fOZf/+/dxzzz2AqcCOj48PGRkZPPnkkzz99NOAKYPofffdx7Jly8zJ4BwdHQF4+umneeKJJ8jIyMDX15fU1NTz9iEQCAQ/BH/dchK1dnRGYrVWz1+3nLzkts/8QL9gwYIrdvUi5BYbIeQWEwgEF+NScosF/GEjY/3hFQHVf84c45EL093dTVpaGs7OzohEIvR6PSKRiJ07d47L3PPF5BYTdugLBALBZebpKL+o4xdqy5YtLFq0iJ07d7Jjxw52796Nt7c3R48evaR2vw8huAgEAsFl9si8MOQWo0sxyy0kPDIv7BzPuDAbNmzgmmuuGXVs7ty5bNiw4ZLa/T6ExJUCgUBwmS2O9wJMcy9N3Wo8HeU8Mi/MfPz7+vjjj886NlZdmctBCC4CgUBwBSyO97rkYPJDJtwWEwgEAsG4E4KLQCAQCMadEFwEAoFAMO6E4CIQCASCcScEF4FAIBCMO2G1mEAgEPyItLW18eKLL1JQUIC9vT0uLi489thjBAQEXNZxCMFFIBAIfiSMRiO/+tWvWLx4Ma+++ioApaWldHR0CMFFIBD8sOl1WsRiCSKxcFf9kuR/AdnPQk8DOHjDnKcg9sZLavLgwYNIpVJuvvlm87Hw8PBLHen3IgQXgUBwwWpOHGPHB29h7+rGrNvuwtXH70oP6eqU/wWsfwC0atP3PfWm7+GSAkx5eTlRUVHjMMBLJ3z0EAgEF0Sv07Ljg7foamqgNv84Jw/kXOkhXb2ynz0dWE7Rqk3HfySE4CIQXIC+jnbqCk/Q2952pYdyxYjFEuxd3czfW9nYXMHRXOV6Gi7u+AUKCQmhqKjoktoYL0JwEQjO0FZXw8kDOXQ1N5mP9XW0s+7lF/jyucdZ/een6WltOU8LJkaDAZ1WO5FDvexEYjGzbruLpKU/I+22u4icmX6lh3T1cvC+uOMXKCkpieHhYT7//HPzsdLS0iuScl+YcxEIRrTX1/LlHx9D3d+Lk4cXy554HntXNzqbGlBVlQPQUV9HR0M9Du7Kc7bTrWpm98f
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"MS Zoning\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 70,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"MS Zoning\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Masonry Veneer Type\n",
"\n",
"None of the groups have a slope differing from the overall one."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 71,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEGCAYAAACpXNjrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAACqGklEQVR4nOydd3gU1frHP7N9N9lkUzc9kAqE3nsJvQRCEbAiFryWa/fahavIvfpTxHJt91qwIiJFRKX33kvoJZDe2ybZPr8/JmyItACJiM7nefJk9syZc87OJvvOOed9v68giqKIjIyMjIxMA6K43gOQkZGRkfnzIRsXGRkZGZkGRzYuMjIyMjINjmxcZGRkZGQaHNm4yMjIyMg0OKrrPYA/Cl26dCE8PPx6D0NGRkbmhiIrK4utW7eeVy4blxrCw8OZP3/+9R6GjIyMzA3FmDFjLlguL4vJyMjIyDQ4snGRkZGRkWlwZOMiIyMjI9PgyHsuMjIyf1kcDgeZmZlYrdbrPZQ/PDqdjoiICNRqdb3qy8ZFRkbmL0tmZiZGo5EmTZogCML1Hs4fFlEUKSoqIjMzk6ZNm9brGnlZTEZG5i+L1WolICBANiyXQRAEAgICrmiGJ89cZGRk/hyIIhxbBgVHoUlPCG9Xr8tkw1I/rvQ+ycZFRkbmz0H6evh2gmRkvEPg3lXgKwdGXy/kZTEZGZk/BxU5kmEBsORCdfFFq14ujVViYiJPPvmk57XT6aRr167cd9991zTEZ599ljlz5tQpW7FiBffcc89VtffDDz8watQoRo0aRcuWLUlJSWHUqFG88cYb1zTOhkCeucjIyPw5aNIT4gbByVXQ83EITDiviuhwUDJnDiXfzcVn6BDEvn0v2JTBYODYsWNYrVZ0Oh0bN27EbDZf8xCHDx/ORx99xMSJEz1lS5YsYcSIEfVuw+l0olJJX91jx45l7NixACQnJzN79mz8/f2veZwNgTxzkZGR+XPgEw7jZ8OjB6DvM6DSnlelOi2NvFdnYD9+nMJ330O02y/aXJ8+fVizZg0gGYDhw4d7zu3bt48JEyaQmprKxIkTOXnyJADHjh1j3LhxjBo1ipSUFNLT0+u02a1bN06dOkV+fj4AVVVVbNq0iQEDBpCZmcnQoUN54YUXGD58OHfddZdnA/3222/n1VdfZcyYMXzxxReXvA3z5s3j1Vdf9byeO3cuM2bMIDMzkyFDhvDEE08wdOhQHn74YaqrqwE4cOAAt912G2PGjOHuu+/2jO9akI2LjIzMnweNAXxCQaG84GlBqwPVOQs2l9ikHjZsGD///DM2m40jR47Qpk0bz7mYmBi+/vprFi5cyMMPP8xbb70FwJw5c7jjjjtYtGgRP/zwAyEhIXXaVCqVDBo0iF9++QWA1atX06VLF7y9vQE4ffo0t956K0uWLMFoNLJ06VLPtQ6Hg/nz53PXXXdd8hYMHTqU1atX43A4AJg/f75ndnPq1CluueUWfvnlF7y8vPjmm29wOBxMnz6dd955x1P37Pu5FuRlMRkZmb8M+ubNCH97FlXbtqNvmUS2RnPRus2aNSMzM5OffvqJPn361DlXUVHB008/zenTpxEEwfNF3rZtWz788ENyc3MZNGgQTZo0Oa/d4cOH8/rrrzNp0iSWLFnCqFGjPOciIiJo3rw5AElJSWRlZXnODRs2rF7v0cvLi65du7JmzRpiYmJwOBwkJiaSmZlJaGgoHTp0AGDkyJF8+eWX9OrVi6NHjzJ58mQA3G43QUFB9errUsjGRUZG5i+FT//++PTvD0D2oUOXrJucnMzrr7/OF198QWlpqaf87bffpkuXLvznP/8hMzOTO+64A4CUlBTatGnDmjVrmDJlCv/85z/p1q1bnTbbt29PQUEBhw8fZvfu3XVmCZpzjJ1SqcRms3le6/X6er/Hm266iQ8//JCYmJg6qsW/dScWBAFRFImPj+e7776rd/v1QV4Wk5GRkbkI48aN48EHHyQxMbFOeUVFhWeDf8GCBZ7yjIwMIiMjueOOO+jfvz9Hjhw5r01BEBg6dChPP/00vXv3Rqs9f2/oWmnTpg25ubn89NNPdZwFsrOz2b17NwA//fQTHTp0oGnTphQXF3vKHQ4Hx44du+YxyMZFRkZG5iKEhIR4ZiXncs899zBz5kxSU1NxOp2e8l9++YURI0YwatQojh49Smpq6gXbHTFiBIcPH67jJNDQDB06lPbt2+Pr6+spa9q0KV9//TVDhw6lvLycm2++GY1GwzvvvMMbb7zByJEjSU1N9Riaa0EQL+fw/RdhzJgxcrIwGZm/GIcOHfLscfzZuO+++7jzzjs9y3KZmZn87W9/46effrrqNi90vy723SnPXGRkZGT+RJSXlzN48GC0Wu15+z2/J/KGvoyMjMyfCB8fnzouzGeJiIi4plnLlSLPXGRkZGRkGpxGMy4nT570aN6MGjWK9u3b8/nnn1NaWsrkyZMZNGgQkydPpqysDJC0fqZPn87AgQNJSUkhLS3N09aCBQsYNGgQgwYNquOZceDAAVJSUhg4cCDTp0/36AVdrA8ZGRkZmd+HRjMuMTExLFq0iEWLFjF//nz0ej0DBw7k448/plu3bixbtoxu3brx8ccfA7Bu3TrS09NZtmwZr7zyCtOmTQMkQ/Hee+8xd+5cvv/+e9577z2PsZg2bRqvvPIKy5YtIz09nXXr1gFctA8ZGRkZmd+H32VZbPPmzURGRhIeHs7KlSs97nmpqamsWLECwFMuCAJt27alvLyc/Px8NmzYQI8ePTCZTPj6+tKjRw/Wr19Pfn4+FouFtm3bIggCqamprFy5sk5bv+1DRkZGRub34XfZ0D9X9bOoqIjg4GAAgoKCKCoqAiAvL6+ODk9ISAh5eXnnlZvN5guWn61/qT5kZGRk/mg0b96chIQERFFEqVTy4osv0r59+/PqXcqVeN++fbz22msUFhai1+tJSkrihRdeuKKo/oam0Y2L3W5n1apVPPHEE+edEwSh0bPA/R59yMjIyFwtOp2ORYsWAbB+/XpmzpzJV199VafOuYGav6WwsJBHHnmEmTNn0q6dlH3z119/pbKy8s9tXNatW0dSUhKBgYEABAQEkJ+fT3BwMPn5+Z7cA2azmdzcXM91ubm5mM1mzGYz27Zt85Tn5eXRuXPni9a/VB8yMjIy18LC3Vn839IjZJdWE2bS89TgRFLbNVy2S4vFgo+PDwBbt27l7bffxsfHh1OnTvHJJ5946mVkZPD3v/+dl19+mdWrV5OamuoxLABDhgwB4N133yUzM5OMjAxycnJ49tln2bNnD+vXryc4OJgPP/wQtVpNcnIyqamprF69GqfTyaxZs4iNjb2m99Loey6/zYOQnJzMwoULAVi4cCH9awTkzpaLosiePXswGo0EBwfTs2dPNmzYQFlZGWVlZWzYsIGePXsSHByMt7c3e/bsQRTFC7b12z5kZGRkrpaFu7N4dv5+skqrEYGs0mqenb+fhbuzLnvtpbBarYwaNYohQ4bwwgsv8MADD3jOHTx4kOeff75O3MrJkyf5+9//zr///W9at27NsWPHSEpKumj7Z86cYfbs2XzwwQc89dRTdOnShcWLF6PT6Vi7dq2nnp+fHwsWLGDixIl8+umn1/SeoJFnLmcT4bz88suesilTpvDoo48yb948wsLCmDVrFiAl5lm7di0DBw5Er9czY8YMAEwmEw888ADjxo0D4MEHH8RkMgEwdepUnn32WaxWK71796Z3796X7ENGRkbmavm/pUeodrjqlFU7XPzf0iPXNHs5d1ls9+7dPP300559lVatWhEZGempW1xczAMPPMB7771HXFxcvdrv3bs3arWahIQEXC6X53syISGBzMxMT71BgwYB0LJlS5YvX37V7+csjWpcDAYDW7durVPm5+fH7Nmzz6srCAJTp069YDvjxo3zGJdzadWq1QU3ty7Wh4yMjMzVkl1afUXlV0O7du0oKSmhuLgYkL5Dz8VoNBIWFsbOnTs9xiUuLo60tDQGDBhwwTbPyvgrFArUarVnD1qhUOBy1RpLtVp9wfKrRY7Ql5GRkakHYaYLb45
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Mas Vnr Type\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 72,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"Mas Vnr Type\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Miscellaneous Features\n",
"\n",
"This variable is basically a \"other\" field with no pattern."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 73,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEGCAYAAACpXNjrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAACtHElEQVR4nOzdZ2BUVdrA8f+UTHovk957DxCSEEggVEGkiw0Vdd21rGtZfXV3Rde+rgXbWtZeVlGkSJMSSuglkIT03pNJz6RMkmnvhwkDkWKARETv7wvJnTv3nEzCPHNPeR6RXq/XIxAIBALBCBJf6Q4IBAKB4LdHCC4CgUAgGHFCcBEIBALBiBOCi0AgEAhGnBBcBAKBQDDipFe6A78WCQkJeHh4XOluCAQCwVWlrq6Ow4cPn3VcCC6DPDw8WLNmzZXuhkAgEFxVFi5ceM7jwrCYQCAQCEacEFwEAoFAMOKE4CIQCASCESfMuQgEAsEZ1Go1tbW19PX1Xemu/KqYmZnh6emJiYnJsM4XgotAIBCcoba2Fmtra3x9fRGJRFe6O78Ker2e1tZWamtr8fPzG9ZzhGExgUAgOENfXx+Ojo5CYDmDSCTC0dHxou7mhDsXgUDwm6DX66k4cYy2uho8w6NwDQi65GsJgeVsF/uaCMFFIBD8JtTknWTty8+AXo+lvQM3Pf8aNo5OV7pbv1vCsJhAIPhN6GlvhcHyVD3tbfR1dZ333IspYxUSEsJf//pX4/cajYbExET++Mc/ApCens4HH3xwib02ePzxx0lLS2PevHnMmzePzz///KKvUVtby4YNGy6rHyNJuHMRCAS/CZ7hUfjFjaMq5wTj5y3BwcPzrHO0Gg3Z2zaTs+NHQiZMZOzchchMzS54XQsLC0pKSujr68PMzIz9+/cjl8uNj0+dOpWpU6dedv8fe+wxZs2adcnPr6urY+PGjcydO/einqfVapFIJJfc7vkIwUUgEPwmWDs6MfehJ+jv7cbC1g6x+Ow3TEV5Kbs+M9xlHPjuf8gDg/GPHfez105NTWX37t3MmjWLTZs2MWfOHDIzMwFYs2YNubm5rFixgi1btvDOO+8gFouxtrbmq6++QqvV8sorr7B3715EIhHXX389y5Yt+9k2e3t7efbZZykpKUGj0XD//fczbdo0amtreeyxx1CpVAA8+eSTjBkzhldffZWysjLmzZvHggULsLGxMfYL4I9//CN33HEHCQkJxMXFsXTpUg4cOMCKFSuoq6vjiy++QK1WExMTw1NPPXXZAUcYFhMIBL8ZJqamWNk7njOwAEhlMsRnvGmayEyHdd3Zs2ezefNm+vv7KSoqIiYm5pzn/ec//+Gjjz7ihx9+4N133wVg1apV1NXVsW7dOjZs2HDeO4uXX37ZOCxWVFTEe++9R2JiIqtXr+bzzz/n3//+N729vTg6OvLJJ5+wdu1aXn/9dZ577jkAHnnkEcaNG8f69eu5/fbbL/jz9Pb2Eh0dzQ8//IC9vT1btmzh66+/Zv369YjF4hEZXhPuXAQCwe+Gi68/cx96gpr8k7gGBOEZGjGs54WGhlJbW8vGjRtJTU0973lxcXE8/vjjXHPNNUyfPh2AgwcPcsMNNyCVGt5u7ezszvncnw6LPfHEE+zcuZOPP/4YgP7+fhoaGnBxceGZZ56hsLAQsVhMZWXlsH6GM0kkEmbOnGnsX25uLosXLwZOL8W+XEJwEQgEvyuB8YkExide9PPS0tJ4+eWX+fzzz+no6DjnOc888wzZ2dns3r2bRYsW8f33319WX9988038/f2HHHvrrbdwcnJi/fr16HQ6oqOjz/lciUSCTqczft/f32/82tTU1DjspdfrWbBgAY888shl9fWnhGExgUAgGIbFixdz3333ERISct5zqquriYmJ4S9/+Qv29vY0NjYyYcIEVq1ahUajAThvYPqpiRMn8uWXXxpXtuXn5wPQ1dWFs7MzYrGY9evXo9VqAbC0tKSnp8f4fA8PDwoLC9HpdDQ0NJCTk3POdpKSkti6dSutra3G/tXV1Q2rjxci3LkIBALBMLi6unLrrbde8JyXX36Zqqoq9Ho9iYmJhIaGEhQURGVlJddddx1SqZTrr7+eW2655Wfbu/fee3nhhRe47rrr0Ol0eHp68v7773PTTTfx5z//mXXr1jFp0iQsLCwAw5JpsVjMddddx8KFC7ntttvw8PBg9uzZBAQEEBFx7iHAwMBAHnzwQe644w50Oh0mJiasWLHisosnivQXs+D7N2zhwoVCsTCBQEBBQQFhYWFXuhu/Sud6bc733ikMiwkEAoFgxAnBRSAQCAQjTgguAoFAIBhxoxZcysvLjRuC5s2bx5gxY/j000/p6Ohg+fLlzJgxg+XLl9PZ2QkYlsM999xzTJ8+nblz55KXl2e81tq1a5kxYwYzZsxg7dq1xuO5ubnMnTuX6dOn89xzzxlXVZyvDYFAIBD8MkYtuPj7+7N+/XrWr1/PmjVrMDc3Z/r06XzwwQckJSWxbds2kpKSjAnfMjIyqKysZNu2bTz77LM8/fTTgCFQvP3223z77bd89913vP3228Zg8fTTT/Pss8+ybds2KisrycjIADhvGwKBQCD4Zfwiw2IHDx7Ey8sLDw8P0tPTmT9/PgDz589nx44dAMbjIpGI2NhYlEolTU1N7Nu3j+TkZOzs7LC1tSU5OZm9e/fS1NREd3c3sbGxiEQi5s+fT3p6+pBr/bQNgUAgEPwyfpF9Lps2beLaa68FoLW1FRcXFwCcnZ2NG3cUCgWurq7G57i6uqJQKM46LpfLz3n81PkXakMgEAiuRmFhYQQHBxu/nzNnDnfffTfLli3jscceIyoq6gr27txGPbgMDAywc+fOc6YWEIlEo17x7ZdoQyAQCEaTmZkZ69evv9LduCijPiyWkZFBREQETk6GinCOjo40NTUB0NTUhIODA2C4I2lsbDQ+r7GxEblcftZxhUJxzuOnzr9QGwKBQPBLWHeijuSXduL3+CaSX9rJuhOXn07l5+zbt4+lS5eyYMECHnjgAXp6esjIyOCBBx4wnnP48GFjkbPRNurB5VTtg1PS0tJYt24dAOvWrTMW2Tl1XK/Xk5WVhbW1NS4uLkycOJF9+/bR2dlJZ2cn+/btY+LEibi4uGBlZUVWVhZ6vf6c1/ppGwKBQDDa1p2o44k1J6nrUKEH6jpUPLHm5GUFmL6+viGrbzdv3jzk8ba2Nt59911jKv7IyEg++eQTJkyYQE5ODr29vQBs3ryZ2bNnX86PN2yjOizW29vLgQMHeOaZZ4zH7r77bh588EFWr16Nu7s7K1euBAzFePbs2cP06dMxNzfnhRdeAAzpqe+9915jOuj77rvPmLL6qaee4oknnqCvr4+UlBRSUlIu2IZAIBCMtn9vLUKl1g45plJr+ffWIubHXVq+rp8bFsvOzqa0tJQbb7wRALVaTWxsLFKplEmTJrFr1y5mzpzJnj17ePTRRy+pDxdrVIOLhYUFhw8fHnLM3t6ezz777KxzRSIRTz311Dmvs3jxYmNwOVNUVBQbN2486/j52hAIBILRVt+huqjjI0Gv15OcnMxrr7121mOzZ8/mq6++wtbWlsjISKysrEatH2cSdugLBALBCHK3M7+o4yMhNjaW48ePU1VVBRhGjSoqKgAYP348+fn5fPvtt7/YkBgIwUUgEAhG1KMzQzA3GVpm2dxEwqMzz18H5uf8dM7llVdeGfK4g4MDL774Ig8//DBz585l6dKllJeXA4aiYZMnT2bv3r1MmTLlkvtwsYSU+4OElPsCgQBGJuX+uhN1/HtrEfUdKtztzHl0Zsglz7f8mlxMyn2hWJhAIBCMsPlxHr+JYHI5hGExgUAgEIw4IbgIBAKBYMQJwUUgEAgEI04ILgKBQCAYcUJwEQgEAsGIE1aLCQQCwVWgpaWFF198kaysLGxtbTExMeGuu+5i+vTpw3p+Q0MDjz32GK2trYhEIq6//npuu+22UeuvEFwEAoHgV06v13Pfffcxf/58Xn31VQDq6urYuXPnsJ6v0WiQSCQ8/vjjRERE0N3dzaJFi0hOTiYwMHBU+iwEF4F
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Misc Feature\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 74,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"Misc Feature\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Roof\n",
"\n",
"Roofs in Ames, IA, are not special enough to make a difference in the price. Even \"hip\" roofs seem already priced in bigger houses."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 75,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEGCAYAAACpXNjrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAACXtklEQVR4nOydd1gUZ9eH7116b1IUUcQuFjR2rBiwILHH9GiKiZqYnphiiSYmb5opfkk0xfSmUYjdiL13sXcUkKL03na+Pw7LgoKigi3PfV17ZXd2dp6ZjczZ035Hp2mahkKhUCgU1Yj+Zp+AQqFQKO48lHFRKBQKRbWjjItCoVAoqh1lXBQKhUJR7SjjolAoFIpqx/xmn8CtQqdOnfD29r7Zp6FQKBS3FXFxcWzbtu2S7cq4lODt7c2CBQtu9mkoFArFbcXQoUMr3K7CYgqFQqGodpRxUSgUCkW1o4yLQqFQKKodlXNRKBR3LIWFhcTGxpKXl3ezT+W2x9ramrp162JhYVGl/ZVxUSgUdyyxsbE4ODjg6+uLTqe72adz26JpGsnJycTGxtKgQYMqfUaFxRQKxR1LXl4ebm5uyrBcJzqdDjc3t6vyAJXnolAo7gg0TeP48eOcP38eX1/f0r41ZViqh6v9HpVxUSgUdwTR0dH8/vvvaJqGvb09Tz755M0+pf80KiymUCjuCDIzMzGOp8rKyiI3N7fyna9hjFXz5s0ZNGgQAwcO5OmnnyYjI+OazvPkyZMMGjSIwYMHc/bs2XLvBQUF8cADD5TbZlzzcsTGxrJo0aLS19u2beOpp566pvOrLpRxUSgUdwS+vr40atQIvV5Pjx49qFWr1qU7aQbISoLzRyAzHgzFVT6+tbU1ERERLF68GCcnJ3799ddrOs/IyEj69u1LeHg49erVu+T97Oxs4uPjATFEVSEuLo7Fixdf0/nUFMq4KBSKOwJHR0fuvfdenn/+eXr16oW5eQVR/8JcyIiDojzITICC7GtaKyAggMTERAAOHz7MvffeS1hYGOPHjyc9Pb3S7evWrePHH3/k999/5+GHH67w2P3792fp0qUALF68mNDQ0NL3YmNjeeCBBxgyZAhDhgxh9+7dAHz88cfs3LmTQYMG8cMPP1zTNVU3yrgoFIo7BktLSxwdHdHrK7u16UoexpdXn+wvLi5my5YtBAUFAfDqq6/y8ssvs2jRIpo0acKsWbMq3d6zZ0/uu+8+Ro0axc8//1zh8UNCQvj3338BWLNmTek6AG5ubsydO5eFCxcyc+ZM3nnnHQBeeukl2rdvT0REBKNGjbrqa6oJVEJfoVD8d7C0BRdfKMgCC1uwtK/yR/Py8hg0aBCJiYk0bNiQwMBAMjMzyczMpGPHjgAMGTKE5557rtLtVcHZ2RlHR0eWLFlCw4YNsba2Ln2vqKiIadOmceTIEfR6PdHR0VU+/xuN8lwUCsV/CxtncKoLtq5X5bkYcy5r1qxB07RrzrlUhQEDBjBt2rRyITGAH374gVq1ahEREcHff/9NYWFhjZ3D9aKMi0KhUFwFNjY2vPXWW8ydOxcbGxscHR3ZuXMnABEREXTo0AEHB4cKt1eVu+++m8cff5xu3bqV256ZmYm7uzt6vZ6IiAiKi6Ugwc7Ojuzsa8sf1RQqLKZQKBRXSYsWLWjatCmLFy/mf//7H1OmTCE3NxcfHx/ee+89gEq3VwV7e3vGjBlzyfYHHniAZ599lvDwcLp3746trS0ATZs2Ra/Xc8899zB06FCaN29ePRd6Heg07RoKvu9Ahg4dqoaFKRR3GIcPH74lbrR3ChV9n5XdO1VYTKFQKBTVjjIuCoVCoah2lHFRKBQKRbVTY8bl1KlTDBo0qPTRrl07fvjhB9LS0hg9ejQhISGMHj26tJtV0zTeeecdgoODCQsL4+DBg6XHWrhwISEhIYSEhLBw4cLS7QcOHCAsLIzg4GDeeeedUl2hytZQKBQKxY2hxoyLn58fERERREREsGDBAmxsbAgODmbOnDl06dKFlStX0qVLF+bMmQPA+vXriY6OZuXKlUyfPp2pU6cCYihmzZrFX3/9xbx585g1a1apsZg6dSrTp09n5cqVREdHs379eoBK11AoFArFjeGGhMW2bNmCj48P3t7eREZGMnjwYAAGDx7MqlWrAEq363Q6AgICyMjIICkpiY0bNxIYGIizszNOTk4EBgayYcMGkpKSyMrKIiAgAJ1Ox+DBg4mMjCx3rIvXUCgUCsWN4Yb0uSxZsqRUMjo5ORkPDw8A3N3dSU5OBiAxMREvL6/Sz3h5eZGYmHjJdk9Pzwq3G/e/3BoKhUJxozl//jwzZsxg//79ODo64ubmxhtvvMGzzz5bqZLx33//zU8//QSIMnKDBg3Q6/V0794dCwsLOnToQNeuXXn44Yd59dVXadWq1Y28pCpR48aloKCA1atX89JLL13ynk6nq/EpcTdiDYVCoagITdN45plnGDx4MDNnzgTgyJEjV/zBO2zYMIYNGwbIjJcff/wRV1fXGj/f6qTGw2Lr16/H39+/dLaCm5sbSUlJACQlJZV+YZ6eniQkJJR+LiEhAU9Pz0u2JyYmVrjduP/l1lAoFIrLEb4njsD3V9Ng4hIC319N+J646zre1q1bMTc35/777y/d1qxZs3JRlwcffJDDhw+Xvr7//vs5cuRIpcecOHEiy5cvv2T7xo0bGTlyJEOGDGHChAk3XQ6mxo3LkiVLyomvBQUFER4eDkB4eDh9+vQpt13TNPbu3YuDgwMeHh5069aNjRs3kp6eTnp6Ohs3bqRbt254eHhgb2/P3r170TStwmNdvIZCoVBURvieOF5fsJ+4tFw0IC4tl9cX7L8uA3P8+HH8/f0vu8/w4cNLO9xPnz5Nfn4+zZo1u6p1UlJS+Oqrr0rl+Fu2bMncuXOv+byrgxo1Ljk5OWzevJmQkJDSbWPGjGHTpk2EhISwefPmUv2cnj174uPjQ3BwMJMmTWLKlCmAyE+PGzeO4cOHM3z4cMaPH4+zszMAU6ZM4a233iI4OJh69erRo0ePy66hUCgUlfHhiqPkFpafTJlbWMyHK47W6Lr9+vVj7dq1FBYW8vfffzN06NCrPsa+ffs4ceIE999/P4MGDSI8PJxz587VwNlWnRrNudja2rJt27Zy21xcXPjxxx8v2Ven05UalIsxGpaLadWqVYUJscrWUCgUiso4l5Z7VdurQuPGjVmxYsVl97GxsaFr165ERkaybNmya9I41DSNwMBAPvnkk2s91WpHdegrFAoFUMfZ5qq2V4XOnTtTUFDAn3/+WbrtyJEj5fLFACNGjOCdd96hVatWODk5XfU6AQEB7N69mzNnzgASNTp9+vQ1n3d1oIyLQqFQAK/0bYqNhVm5bTYWZrzSt+k1H1On0zFr1iw2b97M3XffTWhoKJ988klpgZORli1bYm9vf00hMQBXV1fee+89XnzxRcLCwhg5ciSnTp265vOuDpTkfglKcl+huPO4Wsn98D1xfLjiKOfScqnjbMMrfZsyuK13DZ6hkJiYyCOPPMKyZcvQ62/d3/xXI7mvhoUpFApFCYPbet8QY1KW8PBwZs6cycSJE29pw3K1KOOiUCgUN5HBgweXylXdSdw5ZlKhUCgUtwzKuCgUCoWi2lHGRaFQKBTVjjIuCoVCoah2VEJfoVAoapDKJPcbNGhQ7Wvl5uby1ltvcezYMTRNw8HBgW+//ZbU1FSefvrpSiX+awJlXBQKhaKGuJzkfk0Yl59++olatWrx8ccfAzJu3sLCotrXqQrKuCgUiqujqBD0ZnAH9WSUEvUXRE6D9Fhwqgt9JkPre6/5cJVJ7muaxv/+9z82bNiATqdj7NixDBgwgG3btvHFF1/g4ODAsWPH6N+/P02aNOGnn34iPz+f//u//6NevXpMnDgRS0tLDhw4QHZ2NhMnTqR3796cP3+eOnXqlK7l5+dX+ry4uJi33nqLPXv24OnpyZdffom1tTVRUVG8+eab6PV6unbtyoYNG6rFw7kD/3UoFIoa40QkfNUFfhkGiYevvP/tRNRfsGgCpMcAmvx30QTZfo1UJrm/cuVKjhw5QkR
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Roof Matl\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 76,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEGCAYAAACpXNjrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAACh+ElEQVR4nOydd3hURffHP3dbdrPpvQcSei9SQigChg6h2lFRbKAIVhQBFcv72kBfG/zEjgUQiBQFpXep0iGUkN572X5/f0zYECEYIAHR+3mePGxm587M7oZ7duac8z2SLMsyCgoKCgoKdYjqei9AQUFBQeGfh2JcFBQUFBTqHMW4KCgoKCjUOYpxUVBQUFCocxTjoqCgoKBQ52iu9wL+LnTp0oXQ0NDrvQwFBQWFG4q0tDR27tx5QbtiXCoJDQ1lyZIl13sZCgoKCjcUI0eOvGi7ciymoKCgoFDnKMZFQUFBQaHOUYyLgoKCgkKdo/hcFBQU/lVYrVZSU1MxmUzXeyk3FHq9nrCwMLRaba36K8ZFQUHhX0Vqairu7u40aNAASZKu93JuCGRZJi8vj9TUVBo2bFira5RjMQUFhX8VJpMJX19fxbBcBpIk4evre1m7PWXnoqCg8M9AliFxDeScgAbdIbR9jV0Vw3L5XO57phgXBQWFfwZJm+G724SRcQuCB9eBp5IYfb1QjsUUFBT+GZRkCMMCUJoJFfk1dr3SMlbNmzcnPj6eIUOG8Mgjj1BcXHxF45w6dYr4+HiGDx9OcnJytecWL17M0KFDGTp0KEOGDOG3334DYMmSJWRlZf3l2H369CE/v+bXfq1QjIuCgsI/gwbdoVE/UGmg57Pg1+SCLrLViqO0FPPJk1izs5Ht9suaQq/Xk5CQwIoVK/D09GTBggVXtNS1a9fSv39/li1bRkREhLM9MzOTTz75hG+//Zbly5fzww8/0LRpUwCWLl1Kdnb2Fc13PVCMi4KCwj8Dj1C49UuYfAhungoalwu6VBw+jL24GNlsxpadjaO8/Iqna9eunXMncfToUW699VaGDh3KxIkTKSoqqrF948aNfPnll3z33XeMHTu22ph5eXkYjUZcXV0BMBqNhIeH88svv3Do0CGefvpp4uPj2bBhAxMmTHBet3XrViZOnHjBGhMSEhg9ejTx8fHMmDED+2Ua06tBMS4KCgr/HHSu4BEMKvVFn5Zc9NUbVFd2C7Tb7Wzfvp0+ffoA8Oyzz/L000+zfPlymjRpwgcffFBje69evbj99tu57777+Prrr6uN26xZM/z8/Ojbty/PP/8869atA2DAgAG0atWKt99+m4SEBHr16sXp06edx19Llixh1KhR1cY6deoUP//8M9999x0JCQmoVCqWL19+Ra/3SlCMi4KCwr8GQ/NmqH180Pj6og0LQ1W5Q6gtJpOJ+Ph4YmNjycvLIzY2lpKSEkpKSujcuTMAI0aMYPfu3TW2Xwq1Ws2nn37K+++/T4MGDXjjjTf43//+d0E/SZKIj4/np59+ori4mH379tGzZ89qfbZv386hQ4ecO5ft27eTkpJyWa/3alCixRQUFP5VqPR6tMHBV3TtOZ9LRUUFDzzwAAsWLGDEiBF1uj5JkmjTpg1t2rShW7duvPDCCzz++OMX9Bs5ciSPPvooOp2OAQMGoNFUv53LssyIESN46qmn6nR9tUXZuSgoKChcJgaDgRdffJHPP/8cg8GAh4eHc1eSkJBAp06dcHd3v2j7pcjKyuLw4cPO348dO0ZISAgg/C9lZWXO5wIDAwkICODjjz++4EgMICYmhtWrV5OXlwdAYWEhaWlpV/fCLwNl56KgoKBwBbRo0YKmTZuyYsUK/vvf/zJz5kwqKioIDw/njTfeAKixvSZsNhv//e9/yc7OxsXFBR8fH15++WVAHKvNnDkTvV7PDz/8gF6vZ+jQoeTn5xMdHX3BWI0aNWLy5Mncf//9OBwOtFotM2bMuGZFESX5SgO+/2GMHDlSKRamoPAv4OjRozRv3vx6L6NOeOWVV2jevDljxoy5JvNd7L2r6d6pHIspKCgo3ICMHDmS48ePEx8ff72XclGUYzEFBQWFG5C/+0mLsnNRUFBQUKhz6s24nD59mvj4eOdPhw4d+OKLLygsLGTcuHH069ePcePGOTNZZVnm1VdfJS4ujqFDh1aLmFi6dCn9+vWjX79+LF261Nl+6NAhhg4dSlxcHK+++qpTL6imORQUFBQUrg31ZlyioqJISEggISGBJUuWYDAYiIuLY968ecTExLBmzRpiYmKYN28eAJs2bSIpKYk1a9Ywa9YsXnrpJUAYig8++ICFCxeyaNEiPvjgA6exeOmll5g1axZr1qwhKSmJTZs2AdQ4h4KCgoLCteGaHItt376d8PBwQkNDWbt2LcOHDwdg+PDhTsXPc+2SJNGuXTuKi4vJzs5my5YtxMbG4uXlhaenJ7GxsWzevJns7GxKS0tp164dkiQxfPhw1q5dW22sP8+hoKCgoHBtuCYO/ZUrVzJkyBBACLMFBAQA4O/v70zwycrKIigoyHlNUFAQWVlZF7QHBgZetP1c/0vNoaCgoPB3oHnz5jRpUqXa/OGHH5KWlsZnn33G3Llza7zu6NGjZGdn06tXr2uxzKui3o2LxWJh3bp1F5UgkCSp3ivCXYs5FBQUFC6HczIy51Ob7PmjR49y6NChG8K41Pux2KZNm2jZsiV+fn4A+Pr6OmsSZGdn4+PjA4gdSWZmpvO6zMxMAgMDL2jPysq6aPu5/peaQ0FBQeFyWbYvjdj/rKPh1JXE/mcdy/bVv4TKgQMHuO222xg+fDi33347p0+fxmKx8P7777Nq1Sri4+NZtWpVva/jaqh347Jy5UoGDx7s/L1Pnz4sW7YMgGXLltG3b99q7bIss3//ftzd3QkICKB79+5s2bKFoqIiioqK2LJlC927dycgIAA3Nzf279+PLMsXHevPcygoKChcDsv2pfH8koOkFVYgA2mFFTy/5OBVG5hz6srx8fEXrcMSFRXFggULWLZsGZMmTWL27NnodDomTZrEoEGDSEhIYNCgQVe1hvqmXo/FysvL2bZtG6+88oqz7aGHHmLy5MksXryYkJAQ5syZA0CvXr3YuHEjcXFxGAwGXn/9dQC8vLyYMGECo0ePBmDixIl4eXkBMHPmTJ5//nlMJhM9e/Z0Sk7XNIeCgoLC5fDW6uNUWKsX2Kqw2nlr9XGGt79yja6LHYudT0lJCc899xxnz55FkiSsVusVz3W9qFfj4urqys6dO6u1eXt78+WXX17QV5IkZs6cedFxRo8e7TQu59O6dWtWrFhxQXtNcygoKChcDumFFZfVXle89957dOnShQ8//JDU1FTuueeeep2vPlAy9BUUFBRqIMTLcFntdUVJSYnTh3x+4vifZff/zijGRUFBQaEGnunfFIO2eslkg1bNM/2b1uu848eP591332X48OHYbDZne5cuXTh58uQN4dBXhCsVFBQUauCcX+Wt1cdJL6wgxMvAM/2bXpW/BWDfvn0XtHXp0oUuXboA0L59e1avXu18bsqUKYDwQf/4449XNfe1QjEuCgoKCpdgePvQqzYm/0aUYzEFBQUFhTpHMS4KCgoKCnWOYlwUFBQUFOocxbgoKCgoKNQ5inFRUFBQUKhzFOOioKCgcI3Jzc3lqaeeom/fvowcOZLbbruNX3/9tcb+O3fu5OGHH77oc3369CE/P7++lnrFKKHICgoKCtcQWZaZOHEiw4cP55133gGE3P66deuu88rqFmXnoqCgcFnIViuyw3G9l3HtOLAQZreCl7zEvwcWXtVwO3bsQKvVcscddzjbQkNDGTt2LKmpqdx5552MGDGCESNGsHfvXmef0tJSHnroIfr378+MGTNwXOQzSEhIYPTo0cTHxzNjxgzsdvsFfa4VinFRUFCoNaVbtnJ6WDzJDz6EKTHxei+n/jmwEJZPgqIUQBb/Lp90VQYmMTGRFi1aXPQ5X19fPv/8c5YuXcrs2bN59dVXq5Zy4ADTp09n1apVpKSksGbNmmrXnjp1ip9//pnvvvuOhIQEVCoVy5cvv+J1Xi3KsZiCgkK
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Roof Style\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 77,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"Roof Matl\"]\n",
"del df[\"Roof Style\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Sale Info\n",
"\n",
"Partial and abnormal (= foreclosure) sales seem to make a change with higher and lower prices respectively. These two types will be encoded in factor variables *partial_sale* and *abnormal_sale*. The impact seems to be not big though."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 78,
2021-05-25 08:22:14 +02:00
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"Sale Condition\n",
2021-05-25 08:22:14 +02:00
"Normal 2396\n",
"Partial 233\n",
"Abnorml 189\n",
"Family 46\n",
"Alloca 22\n",
"AdjLand 12\n",
"Name: count, dtype: int64"
2021-05-25 08:22:14 +02:00
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 78,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"Sale Condition\"].value_counts()"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 79,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEGCAYAAACpXNjrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAACvtElEQVR4nOydd3iUVfbHP1OSmdSZ1EnvlUAINYQqoTfp9gKrYv2p2FZ0UVax7NoVdcXurmsBKVIUpEgn9BIIEFJIn0mdkkyf+f0xMIAEDJCArO/neXiYue99772ZTObMveec7xE5nU4nAgICAgIC7Yj4ai9AQEBAQOB/D8G4CAgICAi0O4JxERAQEBBodwTjIiAgICDQ7gjGRUBAQECg3ZFe7QX8UcjOziYyMvJqL0NAQEDgmqKyspK8vLxz2gXjcpLIyEgWLVp0tZchICAgcE0xadKkVtuFYzEBAQEBgXZHMC4CAgICAu2OYFwEBAQEBNodweciICDwp8FqtVJRUYHJZLraS7nmkMvlREVF4eHh0ab+gnEREBD401BRUYGfnx9xcXGIRKKrvZxrBqfTSX19PRUVFcTHx7fpHuFYTEBA4E+DyWQiKChIMCwXiUgkIigo6KJ2fMLORUBA4H8Cp9PJ+qMajmsM9IkPIjNa2Wo/wbBcGhf7ugnGRUBA4H+CbcX13PXlLpxOCPWTsfTBfoQrva72sv60CMdiAgIC/xNodCZOVafS6M00tVjP3/mMMlYffvghY8aMYdy4cYwfP579+/dfcJ6nn36an3/++aLWtmTJEsaOHcu4ceOYMGECn3766UXdfz5uv/12Dh48CMA999yDTqdDp9Px9ddfu/uo1WoefvjhdpnvYhB2LgICAv8T9EkIYnBqCJsK63jgukQSQn3O7eR0gkEDLfXgpWTvsSp+/fVXFi9ejKenJw0NDVitFzBKl8CGDRv48ssv+fTTT1GpVFgsFpYsWdKucwB8/PHHgCto4ZtvvuHWW28FQKVS8e6777b7fL+HYFwEBAT+JwhTePHBrd3RGW0E+8mQiFvxEdgtoKt0PdbXUFtdTkBAAJ6engAEBga6u86bN4/169djNpvp1q0bL7zwwjl+h/z8fF599VVaWloICAjglVdeITQ09Kw+8+fP56mnnkKlUgHg6enJDTfcAEBBQQHPP/88RqORmJgYXn75ZRQKBbfffjuZmZnk5eWh1+t56aWX6NmzJyaTiVmzZnHkyBESEhLOcrDn5uaycOFC3njjDcrKyhg/fjx9+/bl1ltv5b777mP58uWYzWbmzJlDfn4+EomEp59+mj59+rBo0SLWrVuH0WikvLycoUOH8tRTT13W70M4FhMQEPifwctTikohb92wAIhEwOlr/frlUF1dzYgRI5gzZw47duxwX7vtttv44YcfWL58OSaTifXr1581lNVqZe7cubz77rssWrSIyZMn89Zbb50zZWFhIZ07d251OU899RRPPPEEy5YtIyUlhXnz5rmv2e12Fi5cyDPPPONu/+abb5DL5fz000/83//9H4cOHTpnzMcff5yYmBiWLl3KX//617OunTouW7ZsGW+88QZPP/00ZrMZcBm6t99+m2XLlvHTTz9RXV3d+mvYRoSdi4CAwJ8HiScEhIPFAB7e+HgFsGjRInbt2kVeXh4zZ87k8ccfZ9KkSeTl5fHJJ59gMploamoiOTmZ3Nxc91AlJSUcO3aM6dOnA+BwOAgJCWnzUvR6PXq9nt69ewMwceJEHnnkEff1YcOGAZCRkUFlpWu3tXPnTm6//XYA0tLSSE1Nvagff/fu3dx2220AJCYmEhERQUlJCQA5OTn4+fm5r1VWVhIeHn5R45+JYFwEBAT+XHgpXf9OIpFIyM7OJjs7m5SUFJYsWcKYMWP4+9//zg8//EB4eDjvvfee+xv+KZxOJ8nJyXz33XcXnC4pKYn8/HxycnIuapmnjurEYjF2u/2i7r0UTs0HrtfkcucUjsUEBAT+tBQXF1NaWup+XlBQQEREhNuQBAQE0NzczKpVq865Nz4+noaGBvbu3Qu4jskKCwvP6Xfvvffy2muvUVtbC4DFYmHBggX4+fnh7+/Prl27AFi6dCm9evW64Hp79erF8uXLATh27BhHjx49p4+Pjw/Nzc2t3t+zZ0+WLVsGuHZe1dXVJCQkXHDOS0XYuQgICPxpaWlpYe7cueh0OiQSCbGxsbzwwgv4+/szdepUxo4dS3BwMF26dDnnXk9PT959913mzp2LXq/Hbrdz5513kpycfFa/QYMGUVdXx/Tp03E6nYhEIiZPngzAP/7xD7dDPzo6mldeeeWC67355puZNWsWo0aNIjExkYyMjHP6BAQE0L17d8aOHcuAAQPcUWMAt9xyC3PmzGHcuHFIJBJeeeWVs3Ys7YnI6Twj4PtPzKRJk4RiYQIC/+MUFBSQnp5+tZdxzdLa63e+z07hWExAQEBAoN0RjIuAgICAQLsjGBcBAQEBgXanw4xLcXEx48ePd//r3r07X3zxBU1NTUyfPp3hw4czffp0tFot4Arrmzt3LsOGDWPcuHFnJQctXryY4cOHM3z4cBYvXuxuz8/PZ9y4cQwbNoy5c+dyyn10vjkEBAQEBK4MHWZcEhISWLp0KUuXLmXRokV4eXkxbNgw5s+fT05ODqtXryYnJ4f58+cDsHHjRkpLS1m9ejUvvvgic+bMAVyGYt68eXz//fcsWLCAefPmuY3FnDlzePHFF1m9ejWlpaVs3LgR4LxzCAgICAhcGa7Isdi2bduIjo4mMjKStWvXMmHCBAAmTJjAmjVrANztIpGIrKwsdDodGo2GzZs3069fP5RKJQqFgn79+rFp0yY0Gg0Gg4GsrCxEIhETJkxg7dq1Z4312zkEBAQEBK4MV8S4rFixgrFjxwJQX1/vFnYLCQmhvr4ecMlCh4WFue8JCwtDrVaf065SqVptP9X/QnMICAgI/BFITU3l1VdfdT//9NNPee+9967oGs6U6+8IOty4WCwW1q1bx8iRI8+5JhKJOrwq3JWYQ0BAQOBi8PT0ZPXq1TQ0NFzS/TabrZ1X1P50eIb+xo0bycjIIDg4GICgoCA0Gg2hoaFoNBq3xLVKpaKmpsZ9X01NDSqVCpVKdZZSqVqtpnfv3uftf6E5BAQEBC6WJXsreW3VUaqajEQovXhyRCoTukVe1phSqZQbb7yRL7/8kpkzZ551raKigmeeeYbGxkYCAwN55ZVXiIiI4Omnn8bT05OCggK6d++OVqtFJpNRUFBAfX09L7/8MkuWLGHfvn107drVvTN6/vnnOXjwIGazmREjRlyxwmEdvnNZsWIFY8aMcT/Pzc11F8pZsmQJQ4YMOavd6XSyb98+/Pz8CA0NpX///mzevBmtVotWq2Xz5s3079+f0NBQfH192bdvH06ns9WxfjuHgICAwMWwZG8lsxYdpLLJiBOobDIya9FBluytvOyxb731VpYtW4Zerz+rfe7cuUycOJFly5Yxbtw45s6d676mVqv59ttvmTVrFgA6nY7vvvuOWbNmcf/99zNt2jRWrFjBsWPHKCgoAGDmzJksWrSIH3/8kZ07d3LkyJHLXntb6FDj0tLSwtatWxk+fLi7bcaMGWzZsoXhw4ezdetWZsyYAbj0d6Kjoxk2bBizZ8/m+eefB0CpVPLAAw8wZcoUpkyZwoMPPohSqQRcFvlvf/sbw4YNIyYmhoEDB15wDgEBAYGL4bVVRzFaz1YHNlrtvLbqXMHIi8XX15fx48fz1VdfndW+d+9et496/Pjx7N69231t5MiRSCQS9/PBgwcjEolITU0lODiY1NRUxGIxSUlJbpn+n376iYkTJzJhwgQKCwspKiq67LW3hQ49FvP29iYvL++stoCAAL788stz+opEIrdB+S2nDMtv6dKli1shtC1zCAgICFwMVU3Gi2q/WO68804mTZrEpEmT2tTfy8vrrOenRCdFItFZApRisRibzUZ5eTmfffYZCxcuRKFQnFUcrKMRMvQFBAQEzkOE0uui2i8WpVLJyJEjWbhwobutW7durFixAnBVjOzZs+clj9/c3IyXlxd+fn7U1dW
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Sale Condition\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 80,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABasAAALiCAYAAADXd4rWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAEAAElEQVR4nOzdd3hUddrG8e+ZmUwKaQTSBERAAgSRLqCIiiIWVFCx4yKo61pQRF11l3VX1HVX7O5awK6ri1L0pYiCBRBFRRHYgEgVMCQIpGfqOe8fQyYJ6ZBkQnJ/rssr5szMOc9Mwi9w55nnZ1iWZSEiIiIiIiIiIiIiEkK2UBcgIiIiIiIiIiIiIqKwWkRERERERERERERCTmG1iIiIiIiIiIiIiIScwmoRERERERERERERCTmF1SIiIiIiIiIiIiIScgqrRURERERERERERCTkFFbLYenWrRvjxo0LdRlNzrPPPku3bt1YtWpVueOH83rde++9dOvWjV27dtVniXIUmDNnDt26dWPOnDmhLkUagdbTymk9bXz6XtT3SnOn7/HKab1tOnbt2kW3bt249957yx1vDq9rc3gOUr+0JldOa3Ljq+y1rerrII3DEeoCpP75/X5mz57Nhx9+yKZNmygsLCQ2Npa2bdty4oknMnz4cM4888xQl1krRUVFzJo1i08//ZSff/6Z/Px8IiIiOO644zjllFO49NJL6dChQ6jLPCzPPvsszz33HG+88QaDBg0KdTkhV/J6APzlL3/h6quvrnCfOXPmcN9993HTTTcxefLkxi5RWiCtp0cHradVKy4u5tRTTyU/P59Ro0bx+OOPh7okkUppvT06aL2tWijW227dugHw008/Nfi1pGXRmnx00JpcNf0d+OimsLqZ8fv9/P73v2f58uXExsZy2mmnkZKSgtfrZfPmzcyfP5+tW7ceFT9Y1qxZw6RJk8jKyiIlJYXTTjuNpKQkioqK2LBhAzNmzODll1/mv//9Lz179gx1udVauHAhkZGRdXrMnXfeyQ033EBycnIDVdV0/etf/+Kiiy4iOjo61KVIC6b1tGnSelo3CxcuJD8/H8Mw+Pjjjzlw4ACtW7cOdVki5Wi9bZq03tZNQ663Lfl1lcanNblp0ppcN/o78NFNYXUzM3/+fJYvX0737t156623iImJKXd7cXExP/74Y4iqq70tW7YwceJEioqKmDJlChMmTMDhKP/tunPnTqZPn05BQUGIqqy9Ll261PkxSUlJJCUlNUA1TVvHjh3ZsWMHM2bMUPe0hJTW06ZJ62ndzJo1C5vNxoQJE5g5cybz5s3juuuuC3VZIuVovW2atN7WTUOuty35dZXGpzW5adKaXDf6O/DRTTOrm5kffvgBgDFjxlT4oQIQGRnJ4MGDyx3Lz89n5syZXHvttQwbNowTTjiBwYMHc9NNNwXPV1s+n4+3336byy67jH79+tG7d29Gjx7NW2+9hWmatT7PQw89REFBATfccAM33nhjhR8qAB06dODpp5+mb9++5Y5v376de+65h1NPPZUTTjiBoUOHcs8997B9+/YK5yg7h+ijjz7i0ksvpXfv3px00klMnjyZrKysSutbv349EydOpG/fvvTr14/x48dX+1odOgNp+PDhwZEX1157Ld26dQv+V6K6+VILFy7k6quvpn///px44olccMEFvPjii3g8ngr3HT58OMOHD6eoqIh//OMfnH766ZxwwgmMGDGCl156Ccuyqqw7FK655hqSkpJ47bXX2LNnT60fl52dzd/+9jeGDx8e/B6+9dZbWb9+fYX7lp0JvWzZMsaNG0f//v2Dr3/Z27/88kuuuuoq+vbty+DBg7nvvvvIy8sDICMjg9///vcMHDiQvn37ctNNN1X69Vq/fj0PPfQQF154ISeddBK9evXi7LPP5tFHHyU3N/cwXylpaFpPtZ4e6mhbTzdt2sSaNWsYMmQIN9xwA2FhYbz33ns1Pi4rK4u7776bIUOGcOKJJ3LxxRfzf//3fxXut2rVKrp168azzz7Lhg0buPHGGxkwYAC9e/fmmmuu4fvvv6/0/Pn5+Tz++OOMHDmSXr16MXDgQCZOnMjKlSurvcbatWu58cYbOemkk4Jfz7K3r1u3jokTJ9K/f38GDhzIbbfdRmZmJhD4x+jkyZMZPHgwJ554IuPGjWPjxo11fEWloWi91Xp7qOa+3hYUFPD3v/+dYcOG0atXL8455xxeffXVKp9XfcydXbJkCXfddRcjR46kT58+9OnTh4svvpg33nij0u/zstd89913ueCCC+jVqxcnn3wyU6dOJT8/v9LrrFy5kquuuoo+ffpw0kkncfPNN7Nly5bDrlsan9ZkrcmHau5rcl199dVXTJw4kZNOOokTTjiBkSNHMn369CrXxZycHJ588klGjRpF79696d+/PxdeeCHTp0+nqKgoeD/lBqXUWd3MxMfHA1S6iFZly5YtPPXUUwwYMIDTTz+d2NhYMjMz+fTTT1m+fDnPP/88w4YNq/E8Xq+Xm266iRUrVtCpUydGjRpFeHg4q1atYtq0afz444889thjNZ5n586drFy5kvDwcK6//voa7+90OoP/v3btWq677joKCwsZPnw4xx9/PFu3buXDDz9k6dKlvPrqq5x44okVzvGf//yHTz/9lOHDhzNw4EDWrl3LwoUL2bhxIx988EG5a3z//fdcd911eL1eRowYQceOHdmwYQPjxo2r8EO7Ktdeey1Lly7lm2++YcyYMbRr165WjwN44oknePHFF2ndujWjRo0iKiqK5cuX88QTT7BixQpefvnlcvVC4GszceJEsrOzGTZsGHa7nSVLlvD444/j8Xi49dZba339hhYZGcntt9/On/70J5588kn+8Y9/1PiYnTt3ctVVV5Gdnc3gwYM5//zzyczM5KOPPuLzzz/n2Wef5YwzzqjwuMWLF7N8+XKGDRvGFVdcwa+//lru9k8//ZTPP/+c008/nSuuuIIffviBOXPmsGvXLqZMmcL48ePp378/l156KZs2beKzzz5j165dfPjhh9hspb8LnDVrFkuWLGHgwIGcfPLJmKbJ//73P1599VWWLVvGrFmzNPKkCdJ6qvX0aF9PZ82aBQT+sRkfH8/w4cNZvHgx3333HQMGDKj0Mbm5uVx55ZXExMRw8cUXk5+fz6JFi7jrrrvIysqq9Pto/fr1zJw5kz59+jB27Fh+/fVXPv74Y8aPH8+8efPo3Llz8L55eXlceeWVbN68mV69evG73/2OAwcOsGjRIiZMmMBf//pXrrjiigrXWLNmDS+++CL9+/fnkksu4cCBA4SFhQVvX7duHTNmzGDgwIFcdtllbNq0iY8//phNmzbx73//m6uuuorOnTszevToYH3XXXcdS5YsoVWrVkf6UssR0nqr9bYlrbcej4fx48ezbt06unfvzgUXXEB+fj7//ve/+eabbxqsxunTp2Oz2TjxxBNJTk4mPz+fr7/+mocffph169ZV+X3+2GOPsWLFCs444wxOOeUUVq1axaxZs9ixYwdvvPFGuft+9NFHTJ48mbCwMM477zwSExNZvXo1V1xxRbkATZo2rclak1vSmlxX7777Ln/961+JjIzknHPOoU2bNnzzzTfMmDGDzz77jHfeeYfY2Njg/Xfu3Mnvfvc7du/eTc+ePbnyyisxTZPt27fz2muvccUVVxAVFRWsW7nBQZY0K//73/+snj17Wt26dbPuuusua/HixdauXbuqfUxeXp61b9++CsczMzOtU045xTrnnHMq3JaWlmZdc8015Y4988wzVlpamvXggw9aPp8veNzn81n33XeflZaWZn3yySc1Poe5c+daaWlp1hVXXFHjfcsyTdM655xzrLS0NOuDDz4od9uCBQustLQ0a+TIkZbf769Qc9++fa2NGzeWe8ydd95ppaWlWQsWLCh3jZEjR1b6XF577TUrLS3NSktLs77++utyt1X3eh163xJ//OMfrbS0NGvnzp3BY99//72VlpZmnXbaaVZ2dnbwuNfrtX7/+99baWlp1vPPP1/uPGeccYaVlpZmXX/99VZxcXHw+G+//Wb179/
"text/plain": [
"<Figure size 1440x720 with 6 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plot = sns.lmplot(\n",
" x=\"Gr Liv Area\", y=\"SalePrice\", col=\"Sale Condition\", hue=\"Sale Condition\",\n",
" data=df, robust=True, col_wrap=4, ci=None, truncate=True, scatter_kws={\"s\": 15},\n",
")\n",
"# Adjust font sizes.\n",
"for ax in plot.axes:\n",
" ax.set_title(ax.get_title(), fontsize=20)\n",
" ax.set_xlabel(ax.get_xlabel(), fontsize=16)\n",
" ax.set_ylabel(ax.get_ylabel(), fontsize=16)"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 81,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"df[\"partial_sale\"] = df[\"Sale Condition\"].apply(lambda x: 1 if x == \"Partial\" else 0)\n",
"df[\"abnormal_sale\"] = df[\"Sale Condition\"].apply(lambda x: 1 if x == \"Abnorml\" else 0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Homes that are sold for the first time cleare are priced higher. A factor variable *new_home* is introduced."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 82,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEGCAYAAACpXNjrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAACx/0lEQVR4nOydd3iTVfvHPxlN0qZNd9NJ94JSShml7L0RBNTXLQ5UcCsOVERB9HW84FZ+LtwiMkRQKgXZm5YCbaGDQme605Wd/P4IBJCCBVpwPJ/r4iI5z/Occ5ImuZ9z7vv+3iKbzWZDQEBAQECgHRFf7QkICAgICPzzEIyLgICAgEC7IxgXAQEBAYF2RzAuAgICAgLtjmBcBAQEBATaHenVnsBfhZSUFIKCgq72NAQEBAT+VpSWlrJr165z2gXjcpKgoCCWL19+tachICAg8Ldi8uTJrbYL22ICAgICAu2OYFwEBAQEBNodwbgICAgICLQ7gs9FQEBAoBVMJhMlJSXo9fqrPZW/BAqFguDgYJycnNp0vmBcBAQEBFqhpKQENzc3wsLCEIlEV3s6VxWbzUZNTQ0lJSWEh4e36RphW0xAQECgFfR6Pd7e3v96wwIgEonw9va+qFWcsHIREBD4R2Cz2dh4pJL8yib6hHuTGOJx2X0KhuU0F/teCMZFQEDgH8GOwhruWrIXmw383OSsmtmPAA/nqz2tfy2CcREQEPhHUNmg51R1qspGA/UtpvMaF5vN1u6rkg8++ICff/4ZsViMWCzmpZdeolu3buc9/+mnn2bw4MGMHj26TX3/+uuvABw9epSYmBgApkyZwm233dY+L6CdEYyLgIDAP4I+Ed4MifVlS141MwZHEuGnPOcck8XKVzuP882uE4xPDOCeARG4yC//ZzAjI4Pff/+dFStWIJPJqK2txWQyXXa/p7j//vu5//77AejevTurVq1qt747CsG4CAgI/CPwd3fm/ZuTadCZ8XGTIxGfuzI5WKLlxdXZACxcn0e3EA8Gx/pd9thVVVV4enoik8kA8PLychx799132bhxIwaDge7du/PSSy+ds2o6dOgQr776Ki0tLXh6evLKK6/g53fheb311lu4u7tzxx132F/PwoV4eXkRFxfH22+/jVKp5Pjx46SkpDB37lzEYjFbt27lnXfewWg0EhISwiuvvIJSea4Rbg+EaDEBAYF/DM4yKWp3RauGBUDhJEZ6xjGFk6Rdxu3Xrx/l5eWMGjWKuXPnsnv3bsexW265hR9//JGff/4ZvV7Pxo0bz7rWZDIxf/583n77bZYvX86UKVNYuHDhn445ZcoUxwrGarWyZs0arrnmGgCysrJ4/vnnWbt2LcXFxaSlpVFbW8sHH3zAZ599xooVK0hISOCzzz5rl9ffGsLKRUBA4F9D50B33r85mV2FtXQNdqd3mNefX9QGlEoly5cvZ+/evezatYtHH32Uxx9/nMmTJ7Nr1y4+/vhj9Ho99fX1REdHM3ToUMe1x44d4+jRo0ybNg2wGwpfX98/HTM4OBgPDw+ys7Oprq6mc+fOeHp6ApCYmEhISAgA48aNY9++fcjlcvLz87nxxhsBu1FLSkpql9ffGoJxERAQ+Fcxsos/I7v4t3u/EomElJQUUlJSiImJYeXKlYwbN44XX3yRH3/8kYCAAN555x0MBsNZ19lsNqKjo/n+++8veszrrruO5cuXU11dzZQpUxztf9x2E4lE2Gw2+vXrx//+979Le4EXibAtJiAgIHCZFBYWUlRU5Hiek5NDYGCgw5B4enrS3NzMunXrzrk2PDyc2tpaMjIyAPuKIi8vr03jDh8+nC1btnDw4EH69+/vaM/KyqK4uBir1covv/xCjx49SEpKYv/+/Rw/fhyAlpYWjh07dqkv+U8RVi4CAgICl0lLSwvz58+noaEBiURCaGgoL730EiqViuuuu47x48fj4+ND165dz7lWJpPx9ttvM3/+fBobG7FYLNx+++1ER0f/6bgymYyUlBRUKhUSyWn/UdeuXZk3b57DoT9ixAjEYjGvvPIKjz32GEajEYBHHnmkzXIuF4vIZjsVGf7vZvLkyUKxMAEBAQc5OTnEx8df7WlcEKvVyrXXXstbb71FWFgYALt27eLTTz/lo48+avfxWntPzvfbKWyLCQgICPwNyc/PZ8SIEaSmpjoMy18JYVtMQEBA4G9IVFQU6enp57SfCiq42ggrFwEBAQGBdqfDjEthYSETJ050/EtOTubzzz+nvr6eadOmMXLkSKZNm4ZWqwXs4Xjz589nxIgRTJgwgcOHDzv6WrFiBSNHjmTkyJGsWLHC0X7o0CEmTJjAiBEjmD9/PqfcR+cbQ0BAQEDgytBhxiUiIoJVq1axatUqli9fjrOzMyNGjGDx4sWkpqaSlpZGamoqixcvBmDz5s0UFRWRlpbGvHnzmDt3LmA3FO+++y5Lly7lhx9+4N1333UYi7lz5zJv3jzS0tIoKipi8+bNAOcdQ0BAQEDgynBFtsV27NhBSEgIQUFBpKenM2nSJAAmTZrE+vXrARztIpGIpKQkGhoaqKysZOvWrfTr1w8PDw/c3d3p168fW7ZsobKykqamJpKSkhCJREyaNMmx/3i+MQQEBAQErgxXxKG/Zs0axo8fD0BNTY1DkM3X15eamhoANBoN/v6ns2b9/f3RaDTntKvV6lbbT51/oTEEBAQE/i4sWLCAwMBAhzDlXXfdhb+/Py+//DIAr776Kmq1moULFxIREYHBYECpVHLTTTcxefLkqzhzOx2+cjEajWzYsKHVmgUikajDK71diTEEBAQE2pvk5GRH1r7VaqWuro78/HzH8YyMDLp3706nTp1YuXIlv/zyCwsXLmTJkiX8+OOPV2vaDjrcuGzevJkuXbrg4+MDgLe3N5WVlQBUVlY6pKnVajUVFRWO6yoqKlCr1ee0azSaVttPnX+hMQQEBAQ6ipUZpfR7dQPhT6+h36sbWJlReln9de/enczMTADy8vKIjo5GqVSi1WoxGo0UFBTg7u5+1jUhISE8/fTTfPnll5c1dnvQ4cZlzZo1jBs3zvF86NChrFy5EoCVK1cybNiws9ptNhuZmZm4ubnh5+dH//792bp1K1qtFq1Wy9atW+nfvz9+fn64urqSmZmJzWZrta8/jiEgICDQEazMKOWZ5QcprddhA0rrdTyz/OBlGRi1Wo1EIqGsrIyMjAySkpJITEwkMzOTgwcPEhMTg5OT0znXdenShcLCwst4Ne1Dh/pcWlpa2L59Oy+99JKjbfr06TzyyCMsW7aMwMBAFi1aBMCgQYPYtGkTI0aMwNnZmQULFgDg4eHBjBkzmDp1KgAzZ87Ew8MDgBdeeIFnnnkGvV7PwIEDGThw4AXHEBAQEOgIXl93BJ3JclabzmTh9XVHmNQ96JL77d69OxkZGWRkZDBt2jQ0Gg379+/Hzc2N5OTkVq/5qyh6dahxcXFxYdeuXWe1eXp6smTJknPOFYlEvPDCC632M3XqVIdxOZOuXbvy888/n9N+vjEEBAQEOoKyet1FtbeVU36Xo0ePEh0djb+/P59++imurq7nddpnZ2cTGRl5WeO2B0KGvoCAgMBlEujhfFHtbSU5OZmNGzfi7u6ORCLBw8ODxsZGMjMz6d69+znnl5SU8Nprr3HLLbdc1rjtgaAtJiAgIHCZzBoVyzPLD561NebsJGHWqNjL6jcmJoa6ujpHKseptubmZry8vGhpaeHEiRNMmjTJEYp86623/iVCkQXjIiAgIHCZnPKrvL7uCGX1OgI9nJk1Kvay/C1gr265f//+s9peffVVx+Pg4GCysrIua4yOQjAuAgICAu3ApO5Bl21M/kkIPhcBAQEBgXZHMC4CAgICAu2OYFwEBAQEBNodwbgICAgICLQ7gnEREBAQEGh3hGgxAQEBgb8oVVVVLFiwgIMHD6JSqfD29kYmk3HttdcyfPhwAEaNGsXEiROZMWMGAA8++CATJkzA3d2dGTNmEBISgk6nw8fHh7vvvpshQ4ZckbkLxkVAQEDgL4jNZuOBBx5g0qRJLFy4EIDc3FzS09PZv38/w4cPp66uDhcXF4d
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"Sale Type\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 83,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"df[\"new_home\"] = df[\"Sale Type\"].apply(lambda x: 1 if x == \"New\" else 0)"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 84,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"new_variables.extend([\"partial_sale\", \"abnormal_sale\", \"new_home\"])"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 85,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"Sale Condition\"]\n",
"del df[\"Sale Type\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Show summary of counts:"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 86,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"partial_sale 233\n",
"abnormal_sale 189\n",
"new_home 227\n",
"dtype: int64"
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 86,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[[\"partial_sale\", \"abnormal_sale\", \"new_home\"]].sum()"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 87,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>partial_sale</th>\n",
" <th>abnormal_sale</th>\n",
" <th>new_home</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" partial_sale abnormal_sale new_home\n",
"Order PID \n",
"1 526301100 0 0 0\n",
"2 526350040 0 0 0\n",
"3 526351010 0 0 0\n",
"4 526353030 0 0 0\n",
"5 527105010 0 0 0"
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 87,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[[\"partial_sale\", \"abnormal_sale\", \"new_home\"]].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Street Name\n",
"\n",
"Looking at the value counts this variable is pretty useless."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 88,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Street\n",
2021-05-25 08:22:14 +02:00
"Pave 2886\n",
"Grvl 12\n",
"Name: count, dtype: int64"
2021-05-25 08:22:14 +02:00
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 88,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"Street\"].value_counts()"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 89,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"Street\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Age & Remodeling\n",
"\n",
"The dataset was put together over several years. Therefore, the variables with year numbers need to be aligned to indicate the right ages."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 90,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"# For one house the year of being remodeled is one year\n",
"# before it was built. That input error is corrected.\n",
"input_error = (df[\"Year Remod/Add\"] < df[\"Year Built\"])\n",
"assert input_error.sum() == 1\n",
"df.loc[input_error, \"Year Remod/Add\"] = df.loc[input_error, \"Year Built\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Introduce a factor variable *remodeled*. Almost half the houses were remodeled at some point in time."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 91,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"46"
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 91,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"remodeled = (df[\"Year Remod/Add\"] > df[\"Year Built\"])\n",
"df[\"remodeled\"] = 0\n",
"df.loc[remodeled, \"remodeled\"] = 1\n",
"round(100 * remodeled.sum() / df.shape[0])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create discrete variables *years_since_built* and *years_since_remodeled*."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 92,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"df[\"years_since_built\"] = df[\"Yr Sold\"] - df[\"Year Built\"]\n",
"df[\"years_since_remodeled\"] = df[\"Yr Sold\"] - df[\"Year Remod/Add\"]"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 93,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEICAYAAACktLTqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAse0lEQVR4nO3deVhUV5rH8W8BKTEiLkQoNUx6NNrhwQTt1sbdFoOoSINrEhMTabt1jAlRFIM6GictxNhG6eSZRTpPHJLJOBNpxSROxwUjOolLFo1pJatjRIVigiDiwnrnD9oaQTZLlqrr7/M8Po91qu6t99469XLq3HPPsRiGYSAiIqbi0dYBiIhI81NyFxExISV3ERETUnIXETEhJXcRERNSchcRMSEldxdz/vx5BgwYQGVlZau/9+HDhxk5cqTT2w8YMICcnBwAEhMT2bBhQ3OFJi6mLeupqwkLC+Pjjz9u9HVnz57lpz/9KRUVFbf8Hs5sq+TuYnr06MHRo0fx9PRs61Bu2dGjRwkMDLyp/Hb/aIjrced6eqdQcr9NzvwVFmltZq6nZj6222Hq5P7666/z7LPP1ihbvXo1q1ev5tKlSyxbtozhw4czYsQINmzY4PiJeebMGZ588klCQ0MJDQ1l0aJFFBcXO/YRFhZGamoqUVFR9O/fn4qKClJTUxkxYgQDBgwgIiKCgwcPNhjb8ePHmTx5Mj/72c8YOnQoL730EnDzz6+ZM2eSkpLCo48+yoABA/j1r3/NhQsXHPv59NNPefTRRxk4cCCjRo1i69atAJSVlfHyyy/zy1/+kqFDh7Jy5UquXbvWpPP2L//yL4SGhhIWFsa7777rKJ85cyZbtmxxPN66dSuPPfaY4/FPf/pTfvjhhxr7unLlCr/97W/Jz89nwIABDBgwALvd3qQ47hSqp7dWT6//EkxNTWXYsGEsXbqUqqoqUlNTefjhhwkNDeW5556jqKioRqx/+tOfGDVqFIMGDWLz5s0cP36cqKgoBg4cyIsvvujYf1VVFf/0T//E6NGjGTJkCEuWLOHSpUuO5zMyMhg9ejShoaH88z//c43YGoqjtoY+28rKSl5++WVCQ0MZM2YMWVlZDZ6TOhkmZrfbjZCQEOPixYuGYRhGeXm5MXjwYOPLL780nn76aWPFihXG5cuXjR9//NGYMmWKsXnzZsMwDOP06dPGf//3fxulpaVGQUGBMWPGDGP16tWO/Y4ePdr41a9+ZZw/f964evWq8f333xsjR4408vLyDMMwjJycHOOHH35oMLbp06cb27ZtMwzDMEpKSoyjR486tu3bt69RXl5uGIZhPPHEE8aYMWOMU6dOGVevXjWeeOIJ4/e//71hGIZx9uxZo3///sZ7771nlJWVGRcuXDBOnjxpGIZhJCUlGXPnzjUKCwuNS5cuGXPnzjXWrVvXYEyHDh0ygoKCjOTkZKO0tNQ4fPiwERISYnz//feOWN555x3H6//0pz8Zjz76qONx3759jdOnTxuGYRjPP/+8sX79esd+R4wY0eB738lUT52rp2vXrjVKS0uNq1evGv/6r/9qTJs2zcjNzTVKS0uNFStWGAsXLqwR64oVK4xr164ZBw4cMPr162fMmzfP+PHHH428vDxj8ODBxuHDhw3DMIwtW7YYDz/8sHHmzBmjpKTEmD9/vrF48WLDMAzj22+/Nfr3728cOXLEKC0tNZKTk42goCDjo48+MgzDaFIc189ZQ5/tv//7vxsRERHG+fPnjcLCQuOJJ56osW1TmLrl7u/vz8CBA/nggw8AOHDgAF26dMFms5GVlcWyZcu4++678fPzY9asWezYsQOA++67j2HDhmG1WunatSuxsbF88sknNfY9c+ZMunfvjre3N56enpSVlfH9999TXl7Ovffey9/8zd80GJuXlxdnzpzhwoULdOjQgf79+9f72smTJ/O3f/u3eHt7M27cOLKzswF4//33GTp0KBMnTuSuu+6iS5cuBAUFYRgG77zzDsuWLaNz5874+Pgwd+5cx/E15rnnnsNqtfKLX/yCUaNG8ec//7lJ24lzVE9vvZ56eHgQFxeH1WrF29ub//iP/2DhwoXYbDasVivPPPMMO3furNFlM3/+fNq1a8fw4cO5++67mThxIn5+fgQEBDBw4EBOnjwJwHvvvcesWbMIDAykQ4cOxMfH81//9V9UVFTwwQcf8Mtf/pJBgwZhtVp57rnn8PD4/zTalDgAfvzxxwY/2z//+c889dRTdO/enc6dOzN37txGz0ltXre8hZuZNGkSmzdvZvr06bz77rtER0dz/vx5KioqGD58uON1VVVVdO/eHag+8UlJSXz66adcvnwZwzDw9fWtsd/rr4XqL9myZct47bXX+O677xg+fDiJiYkEBATUG1dSUhKvvvoq48eP59577+WZZ55h9OjRdb62W7dujv+3b9+eK1euAJCbm1vnl/PChQtcvXqVyZMnO8oMw6CqqqqhUwWAr68vd999t+Nxjx49yM/Pb3Q7uT2qp9WaWk+7dOlCu3btHI/Pnz/P/PnzayRaDw8PCgoKHI/9/Pwc/2/Xrt1Nj6/Hm5+fT8+ePR3P9ezZk4qKCgoKCsjPz8dmszmeu/vuu+ncufMtxXH9dQ19tvn5+TU+ux49ejR6TmozfXJ/+OGHWbVqFd988w379u0jISEBLy8vrFYrhw4dwsvr5lOwfv16LBYL7733Hp07d2bPnj01+uQALBZLjcdRUVFERUVRUlLCypUrWbduHb///e/rjesnP/kJ69evp6qqil27dhEXF8fhw4dv6di6d+/O8ePHbyrv0qUL3t7e7Nixo8Evbl2Ki4u5cuWKI8Hn5ubSp08foPoLe/XqVcdrf/zxxybts/a5kpupnt5aPa19XDabjeTkZH7+85/f9NqzZ8/e0r79/f05d+6c4/H58+fx8vLCz88Pf39/vv/+e8dzV69erdGn3tQ4rrfs6/tsu3XrRm5uruPxjf9vKlN3y0D1X+SIiAgWLVrEgw8+SI8ePfD392fYsGGsWbOGkpISqqqqOHPmDEeOHAHg8uXL3H333XTs2BG73c7rr7/e4HucOnWKgwcPUlZWhtVqpV27djX+ctdl+/btXLhwAQ8PD0drq7FtaouKiuLjjz92/GQsLCwkOzsbDw8Ppk2bRnJysqPFYLfbOXDgQJP2+9prr1FWVsann37Kvn37GDduHABBQUHs3r2bq1ev8sMPP5Cent6k/fn5+VFUVFTjopTUpHp66/X0Ro899hgpKSmOpHzhwgX27Nlzy/sBmDhxImlpaeTk5HD58mU2bNjA+PHj8fLyIiIign379vHpp59SVlbGq6++WuOXRlPjaOyzHT9+PG+99RZ5eXlcvHiR1NTUWz4O0yd3gJiYGL755huio6MdZWvXrqW8vJwJEyYwaNAg4uLi+N///V8AnnnmGU6ePMnAgQOZM2cOY8eObXD/ZWVlvPLKK4SGhjJ8+HAuXLhAfHx8g9scOHCAyMhIBgwYQFJSEhs2bMDb2/uWjqtHjx788Y9/ZNOmTfziF78gJiaGr776CoCEhATuu+8+pk+fzs9+9jNmzZrF//zP/zS6z3vuuQdfX19GjBjB4sWLWbVqFb179wbgqaee4q677mLo0KE8//zzREVFNSnO3r17ExkZycMPP8zAgQM1WqYeqqdNr6e1Pfnkk4SFhfHrX/+aAQMGMH369Dp/LTTFlClT+NWvfsUTTzzBmDFjsFqtrFixAoA+ffqwcuVKFi9ezIgRI/D19a3RTXMrcTT02U6fPp3hw4cTHR3NpEmTGv1s62IxDPMv1nH+/HnGjx/PRx99hI+PT1uHI1In1VNpTqZvuVdVVbFp0yYmTJigL4y4LNVTaW6mvqB65coVhg0bRo8ePRrtj2wJv/nNb/jss89uKp87dy5/93d/1+rxQPUNShs3bryp/Oc//3mbnCNRPa2L6untuyO6ZURE7jSm75YREbkTuUS3TGhoaI2bBmq7PnTLXSn+1nHu3LlbHoPdVhqr8zdyl/MPirUlNBRnQ3XeJZJ7z549HRMJ1SU7O5ugoKBWjKh5Kf7WceOdjq6usTp/I3c
"text/plain": [
"<Figure size 432x288 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df[[\"years_since_built\", \"years_since_remodeled\"]].hist(bins=20);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Two factor variables *recently_built* and *recently_remodeled* are created indicating that the corresponding action took place in the last 10 years. The two scatter plots below suggest that these groups of \"recent vs. old\" affect the price."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 94,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"df[\"recently_built\"] = df[\"years_since_built\"].apply(lambda x: 1 if x <= 10 else 0)\n",
"df[\"recently_remodeled\"] = df[\"years_since_remodeled\"].apply(lambda x: 1 if x <= 10 else 0)"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 95,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEGCAYAAACpXNjrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAACcSElEQVR4nOydd3hUVfrHP3dmMplJm/RJ74UQEkIndEOvImAXFQtrW+vae117WdlVWV1/tlXprCKChBJ6hxBqQhLSZ9Inbfr8/jiQgFKFiOj9PE8e7py595wzk3Dfe877vt9XcrlcLmRkZGRkZC4gios9ARkZGRmZPx6ycZGRkZGRueDIxkVGRkZG5oIjGxcZGRkZmQuObFxkZGRkZC44qos9gd8L/fr1Izw8/GJPQ0ZGRuaSory8nM2bN/+iXTYuRwkPD2fBggUXexoyMjIylxRTpkw5abu8LSYjIyMjc8GRjYuMjIyMzAVHNi4yMjIyMhcc2eciIyPzp8Vms1FWVobZbL7YU/ndo9FoiIiIwM3N7azOl42LjIzMn5aysjK8vb2JiYlBkqSLPZ3fLS6Xi9raWsrKyoiNjT2ra+RtMRkZmT8tZrOZgIAA2bCcAUmSCAgIOKcVnrxykZGR+UPgcrlYddBIgbGZ/rEBpEf6ntV1smE5O871e5KNi4yMzB+CjYW13PrZNlwuCPZ2Z/HdAwn11V7saf1pkbfFZGRk/hAYTWaOVacyNlloaLWd8ly5jFXnIxsXGRmZPwT94wK4LDkIlULi3qwE4oI9f3GOzeHk0/VFjHonh/dWHMJ5kY2MyWTiq6++an9dVlbGhAkTzrmf999/n08++eScrnnyyScpKCgAICsri7q6ul/M53yQjYuMjMwfghCdln9d35P1j2Zx34gk3FXKX5yzp6yR57/bR76xmXdW5GO1O0/bp8vlwuk8/Tnng8lk4uuvv+60/k/Hyy+/TEJCQqfNRzYuMjIyfxi0ahV6nQal4uTOZ42bAtVx753MR11WVsbo0aN55JFHmDBhAv/617+YOnUqEydO5B//+Ef7eYsWLWLixIlMmjSJhx9+GIC6ujr++te/MnXqVKZOncr27dsBsbJ4/PHHmT59OsOHD+fzzz8H4K233qKkpITLL7+c11577YR5XH/99ezfv7/99bXXXsuBAwdO+dkPHDjA1VdfzahRo5gzZw4Amzdv5i9/+Uv7OS+88EK7huL06dPZs2fPCX2cbj7niuzQl5GR+dPQNUzHv67vyebCOtIidKiVppOed+TIEV577TWam5tZtmwZ8+bNw+Vyceedd7J161Z8fX354IMP+Prrr/H396ehoQEQq4GbbrqJ3r17U1FRwa233srSpUsBKCoq4vPPP6e5uZmxY8dy7bXX8tBDD5Gfn8/ixYsBYdiOMW3aNBYsWMCTTz5JUVERFouFLl26nPKzHTx4kDlz5tDa2soVV1zB0KFDz/n7+fl8zgfZuMjIyPypGJUawqjUEAD27z+5cQkLCyMjI4PXXnuN9evXM3nyZABaW1spLi7GbDYzZswY/P39AfD19QVgw4YN7X4MgObmZlpaWgAYOnQoarUaf39//P39qa2tPe08x4wZw7/+9S8eeeQR5s+ff0r14WMMHz4cjUaDRqOhX79+7NmzB29v7zN+H52FbFxkZGRkfoaHhwcgfC4zZ87kmmuuOeH9L7744qTXOZ1O5syZg7u7+y/eU6vV7cdKpRK73X7aOWi1WgYMGEB2djZLly49Y0mQk+WhKJXKE3xGFovltH1cSGSfi4yMjMwpGDRoEPPnz29ffRgMBmpra+nfvz8//vgj9fX1AO3bYoMGDTrB8BzvMzkZnp6e7X2fjCuvvJKXXnqJtLQ0dDrdafvKzs7GYrFQX1/Pli1bSEtLIzw8nMOHD2O1WjGZTGzcuPG85nMuyCsXGRkZmVMwaNAgDh8+3L5y8fDw4I033iAxMZE77riD6dOno1Ao6Nq1K6+++ipPPvkkL7zwAhMnTsThcNC7d29eeOGFU/bv5+dHz549mTBhAoMHD+b6668/4f1u3brh5eV1xi0xgOTkZG688Ubq6+u566670Ov1gNhemzBhAhEREXTt2vW0ffx8Po8++ugZxz0VkkvOJgJENTW5EqWMzJ+L/fv3k5KScrGncUoMBgM33ngjS5cuRaG4+BtNJ/u+TnXvvPizlZGRkZH5BYsWLeKqq67i/vvv/10YlnNF3haTkZGR+R0yefLk9ii1Y8yfP789R+YYPXv25Nlnn/0NZ3Z2dJpxKSws5IEHHmh/XVpayr333svkyZN54IEHKC8vJzw8nHfffRedTofL5eLll19mzZo1aDQaXn31VVJTUwFYuHAhH3zwAQB33nknV1xxBQB5eXk8/vjjmM1mhg4dypNPPokkSTQ0NJx0DBkZGZlLmWPJmZcCnbbWiouLY/HixSxevJgFCxag1WoZOXIks2fPJjMzk+XLl5OZmcns2bMByMnJobi4mOXLl/Piiy/y3HPPASIKY9asWcyZM4e5c+cya9YsGhsbAXjuued48cUXWb58OcXFxeTk5ACccgwZGRkZmd+G32Qjb+PGjURGRhIeHk52dnb7Um/y5MmsWLECoL1dkiQyMjIwmUwYjUbWrVvHwIED8fX1RafTMXDgQNauXYvRaKS5uZmMjAwkSWLy5MlkZ2ef0NfPx5CRkZGR+W34TYzLkiVL2pU+a2trCQ4OBiAoKKg9S9VgMBASEtJ+TUhICAaD4Rfter3+pO3Hzj/dGDIyMjIyvw2dblysVisrV65kzJgxv3hPkqROrwL3W4whIyMjczHJyclh9OjR7a6H3wOdblxycnJITU0lMDAQgICAAIxGIwBGo7Fdm0ev11NVVdV+XVVVFXq9/hftBoPhpO3Hzj/dGDIyMjJ/NBwOBy+88AIff/wxS5Ys4fvvvz9B3+xi0enGZcmSJYwfP779dVZWFosWLQJEHPfw4cNPaHe5XOzatQtvb2+Cg4MZNGgQ69ato7GxkcbGRtatW8egQYMIDg7Gy8uLXbt24XK5TtrXz8eQkZGRudgs2lnOwFdXEvvYEga+upJFO8vPq7/c3Fyio6OJjIxErVYzfvz4dv/zxaRT81xaW1vZsGHDCfIHM2fO5P7772fevHmEhYXx7rvvAkIxdM2aNYwcORKtVssrr7wCCLXRu+66i2nTpgFw9913tyuQPvvss+2hyEOGDGHIkCGnHUNGRkbmYrJoZzmPL9hDm80BQHlDG48vEDVVJvcI/1V9nswvnZube/6TPU861bh4eHiwefPmE9r8/Pz47LPPfnGuJEmnTASaNm1au3E5nrS0NL7//vtftJ9qDBkZGZmLyRvLDrYblmO02Ry8sezgrzYuv1cuPU0BGRkZmUuUioa2c2o/G07ll77YyMZFRkZG5jcizFd7Tu1nQ1paGsXFxZSWlmK1WlmyZAlZWVm/ur8LhWxcZGRkZH4jHh6djNZNeUKb1k3Jw6OTf3WfKpWKZ555httuu41x48YxduxYEhMTz3eq540sXCkjIyPzG3HMr/LGsoNUNLQR5qvl4dHJ5+1vGTp0KEOHDr0QU7xgyMZFRkZG5jdkco/wP5zz/mTI22IyMjIyMhcc2bjIyMjIyFxwZOMiIyMjI3PBkY2LjIyMjMwFRzYuMjIyMjIXHNm4yMjIyFziPP7442RmZrbXzfo9IBsXGRmZc8Jmd+J0ui72NGSOY8qUKXz88ccXexonIBsXGRmZsybnUDVj3svhpk+3cMjQdLGnc2mSOwfe6QbP+Yp/c+ecd5d9+vRBp9Od/9wuILJxkZGROStsdifPf7eXw9UtrM2vYUlu5cWe0qVH7hz47l5oLAVc4t/v7r0gBub3hmxcZGTOBlM5FOVAQ9nFnslFQ6mQThBY9NHKAh/nTPYLYPuZArKtTbT/wZD/OmRkjsewF2oOQUg6BMSLNlM5fHMDVOyAoBS47lvwiz59P04nOGzg5t75c/6NUCgknp7Qle92V+CrdeOKP4GEyQWn8RQPJ6dqv4SRjYuMzDGM++H/JkBbHQQkwo2LQBcBNfnCsABU74fqA6c3LnVFsPxJqMq
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"recently_built\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 96,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEGCAYAAACpXNjrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAACc7klEQVR4nOydd3hU1daH36mZ9J5J74UQEkInhGboIEhVUVGxYL2oeFGxF/Ta+70q1/JhV/pVVEoAQ+8QSoCE9N4zadPP98eGBKSIQqR43ueZhzN7ztl7z0w4a/Zea/2WQpIkCRkZGRkZmQuI8mJPQEZGRkbmykM2LjIyMjIyFxzZuMjIyMjIXHBk4yIjIyMjc8GRjYuMjIyMzAVHfbEncKnQp08fgoKCLvY0ZGRkZC4rSkpK2Lp16yntsnE5RlBQEIsXL77Y05CRkZG5rJg4ceJp2+VtMRkZGRmZC45sXGRkZGRkLjiycZGRkZGRueDIPpezYLFYKC4uxmg0XuypyJwFnU5HcHAwGo3mYk9FRkbmGLJxOQvFxcW4uroSHh6OQqG42NOROQ2SJFFTU0NxcTEREREXezoyMjLHkLfFzoLRaMTb21s2LJcwCoUCb29veXUpI3OJIa9cfgfZsFz6yN+RDIhV7NrDleRUNtE3wpukEI+LPaW/NbJxkZGRuSLYnFvD7fN3IEng5+rAsvtSCfBwvNjT+tsib4vJyMhcEVQajByvTlXZaKK+xXLGc+UyVh2PbFwuYQwGA1999VXb8+LiYq6++uqLOKNzZ+vWrdx1113nfc5vmTZtGvv27TufqclcofSN9OaqOF/USgUz06KJ9HM+5RyLzc5nG/MY/lYG76w+QovJehFm+vdA3hY7DyRJQpIklMqOsdEGg4FvvvmGG2+88YL1abVaUavlr13mysPf3ZH/3NgdQ6sVH1cHVMpTfXH7iht47oeDALy1OpuuIR4MjvP7q6f6t0C+y/xBiouLuf322+natSsHDhxg1KhRrF27FrPZzLBhw5g5cyYAS5cu5ZNPPkGhUBAXF8drr71GbW0tzzzzDKWlpQA8/vjj9OjRg/fee4/S0lKKi4spLS3llltu4eabb+aNN96gsLCQa665hn79+p1kZG688UaefPJJ4uPjAZg6dSrPPPMMnTp1OmXO7733HoWFhRQVFREYGMiTTz55xnkUFxdTVFREWVkZc+bMYc+ePaxfvx4/Pz8+/PBDNBoNmzdv5pVXXsFms9GlSxeee+45tFotGRkZvPTSSzg6OtKjR4+28VtaWnjhhRfIzs7GarVy//33M3To0JPmeKZzjEYjc+bM4dChQ0RGRspRYTJnxVGrxlF75tuaTqNErVRgtUvHnqv+qqn9/ZBkJEmSpAkTJpzSdvDgwVPaioqKpLi4OGn37t3S+vXrpSeffFKy2+2SzWaTZsyYIW3btk06cuSINHz4cKmmpkaSJEmqq6uTJEmSZs2aJW3fvl2SJEkqKSmRRo4cKUmSJL377rvSddddJ5lMJqmmpkbq3bu3ZDabpaKiImnMmDEnjX38+eLFi6W5c+dKkiRJubm5p53/cd59911pwoQJUmtr6+/O4/rrr5fMZrOUlZUlJSUlSevWrZMkSZLuvfdeadWqVZLRaJQGDhwo5ebmSpIkSbNnz5Y+++yztva8vDzJbrdLM2fOlGbMmCFJkiS98cYb0tKlSyVJkqSGhgZp+PDhUnNzs7Rly5bfPefTTz+VHnvsMUmSJCkrK0uKj4+XMjMzz+m7kpE5HSv2l0nP/++AtGRXsWSz2S/2dC57znTvkVcuf4LAwECSk5N55ZVX2LhxI+PHjwfEr+/8/HyMRiMjR47Ey8sLAA8PDwA2bdpETk5OWz9NTU00NzcDMGjQILRaLV5eXnh5eVFTU3PWOYwcOZL//Oc/PPLIIyxatOiMyqTHSUtLQ6fT/e48Bg4ciEajITY2FpvNxsCBAwGIjY2luLiYvLw8goOD2xIWJ0yYwFdffUWfPn0IDg4mPDwcgHHjxvH9998DsGHDBtasWcOnn34KgMlkoqys7KT5nemc7du3M23aNAA6depEXFzcWd+njMzvMTzBn+EJ/hd7Glc8snH5Ezg5OQHC5zJjxgyuv/76k17/4osvTnud3W7n+++/x8HB4ZTXtFpt27FKpcJqPbuj0dHRkX79+pGens7PP//8u+UCHB3bQzLPZR5KpRKNRtOWQ6JUKrHZbGcd42y8++67REZGntRWXV39u+fIyMhcnsjRYudB//79WbRoUduv/oqKCmpqaujbty+//PILdXV1ANTX17edf6LhycrKOmv/zs7ObX2fjilTpjB37lwSExNxd3f/Q/P+I/M4kYiICEpKSigoKABg2bJl9OrVi8jISEpKSigsLARg+fLlJ4335ZdftoV/Hjx48LRzOt05vXr14scffwTgyJEjHD58+JznKiMjc/GQjct50L9/f66++mquv/56xo4dy8yZM2lubiYmJoa7776badOmMW7cOF5++WUAnnjiCfbv38/YsWMZPXo033zzzVn79/T0pHv37lx99dW88sorp7zepUsXXFxcfndL7Lf80XmciIODA//617944IEHGDt2LAqFgqlTp+Lg4MDzzz/PjBkzmDBhQtuWIMC9996L1Wpl3LhxjBkzhnfeeeeUfs90ztSpU2lpaWHUqFG8++67JCQk/KH3KiMjc3FQSJKcTQSimtpvt5aysrLaorEuRSoqKrj55pv5+eefOywc+nLhUv+uZGSuVE537wR55XLZsnTpUq699loefPDBv71hkZGRufSQHfqXKePHj2+LUjvOokWL+Pzzz09q6969O88888xfODMZGRmZDjQuubm5PPTQQ23Pi4qKmDlzJuPHj+ehhx6ipKSEoKAg3n77bdzd3ZEkiRdffJFff/0VnU7Hyy+/3La/vmTJEj744AMA7rnnHiZMmADA/v37mTNnDkajkUGDBvHEE0+gUCior68/7RhXOpMmTWLSpEkXexoyMjIyHbctFhkZybJly1i2bBmLFy/G0dGRYcOGMW/ePFJSUli5ciUpKSnMmzcPgIyMDPLz81m5ciUvvPACzz77LCAird5//32+//57FixYwPvvv09DQwMAzz77LC+88AIrV64kPz+fjIwMgDOOISMjIyPz1/CXbNZv3ryZkJAQgoKCSE9Pb9vOGT9+PKtXrwZoa1coFCQnJ2MwGKisrGTDhg2kpqbi4eGBu7s7qamprF+/nsrKSpqamkhOTkahUDB+/HjS09NP6uu3Y8jIyMjI/DX8JcZl+fLlbWq+NTU1+PkJoThfX9+2TPSKigr8/duzZv39/amoqDilXa/Xn7b9+PlnG0NGRkZG5q+hw42L2WxmzZo1jBw58pTXFApFh1cR/CvGuNLJyMhgxIgRbduaMjIyMr9HhxuXjIwMEhIS8PHxAcDb25vKykoAKisr25Lt9Ho95eXlbdeVl5ej1+tPaa+oqDht+/HzzzaGzB/HZrPx/PPP8/HHH7N8+XJ+/PHHk3TJZGRkZE5HhxuX5cuXM2bMmLbnaWlpLF26FBC5GkOGDDmpXZIk9uzZg6urK35+fvTv358NGzbQ0NBAQ0MDGzZsoH///vj5+eHi4sKePXuQJOm0ff12jCudpbtLSH15DRGPLSf15TUs3V1y3n1mZmYSFhZGSEgIWq2WMWPGtPm2ZGRkZM5Eh+a5tLS0sGnTJp5//vm2thkzZvDggw+ycOFCAgMDefvttwGhCvzrr78ybNgwHB0deemllwChKHzvvfcyefJkAO677742leFnnnmmLRR54MCBbQq+ZxrjSmbp7hLmLN5Hq0WIS5bUtzJnsajYOL5b0J/u93Q+r8zMzPObrIyMzBVPhxoXJycntm7delKbp6cn8+fPP+VchUJxxmS/yZMntxmXE0lMTGwTNTyXMa5kXltxuM2wHKfVYuO1FYfPy7jIyMjI/Blk3ZArhNL61j/Ufq6cyeclIyMjczZk43KFEOjh+Ifaz5XExETy8/MpKirCbDazfPly0tLSzqtPGRmZKx/ZuFwhzB4Rh+Nv6oE7alTMHnF+lRvVajVPP/00d9x
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"recently_remodeled\", s=15, data=df);"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 97,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"del df[\"Yr Sold\"]\n",
"del df[\"Year Built\"]\n",
"del df[\"Year Remod/Add\"]"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 98,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"age_columns = [\n",
" \"remodeled\", \"years_since_built\", \"years_since_remodeled\",\n",
" \"recently_built\", \"recently_remodeled\",\n",
"]\n",
"new_variables.extend(age_columns)"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 99,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>remodeled</th>\n",
" <th>years_since_built</th>\n",
" <th>years_since_remodeled</th>\n",
" <th>recently_built</th>\n",
" <th>recently_remodeled</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>0</td>\n",
" <td>50</td>\n",
" <td>50</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>0</td>\n",
" <td>49</td>\n",
" <td>49</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>0</td>\n",
" <td>52</td>\n",
" <td>52</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>0</td>\n",
" <td>42</td>\n",
" <td>42</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>1</td>\n",
" <td>13</td>\n",
" <td>12</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" remodeled years_since_built years_since_remodeled \\\n",
"Order PID \n",
"1 526301100 0 50 50 \n",
"2 526350040 0 49 49 \n",
"3 526351010 0 52 52 \n",
"4 526353030 0 42 42 \n",
"5 527105010 1 13 12 \n",
"\n",
" recently_built recently_remodeled \n",
"Order PID \n",
"1 526301100 0 0 \n",
"2 526350040 0 0 \n",
"3 526351010 0 0 \n",
"4 526353030 0 0 \n",
"5 527105010 0 0 "
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 99,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[age_columns].head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Outliers\n",
"\n",
"The instructors' notes state:\n",
"\n",
"> **Five observations** that an instructor may wish to remove from the data set before giving it to students (a plot of SALE PRICE versus GR LIV AREA will quickly indicate these\n",
"points). Three of them are true **outliers** (Partial Sales that likely don’t represent actual market values) and two of them are simply unusual sales (very large houses priced\n",
"relatively appropriately). I would **recommend removing any houses with more than\n",
"4000 square feet** from the data set (which eliminates these five unusual observations)\n",
"before assigning it to students.\n",
"\n",
"To apply a more \"rigorous\" approach, outlier detection is conducted with a so-called Isolation Forest."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 100,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"# Use only numeric columns that are strongly correlated with the target.\n",
"# This mitigates the risk that a \"not so good\" chosen factor variable introduced\n",
"# in this notebook causes an observation to be removed as an outlier.\n",
"with open(\"data/correlated_variables.json\", \"r\") as file:\n",
" content = json.loads(file.read())\n",
"strongly_correlated = content[\"strongly_correlated\"]\n",
"df_encoded = encode_ordinals(df[list(set(strongly_correlated) & set(df.columns))])\n",
2024-07-10 01:31:28 +02:00
"iso = IsolationForest(n_estimators=100, bootstrap=True, contamination=0.005, random_state=random_state)\n",
2021-05-25 08:22:14 +02:00
"outliers = pd.DataFrame(\n",
" iso.fit_predict(df_encoded), columns=[\"outlier\"], index=df.index\n",
")\n",
"outliers[\"outlier\"] = outliers[\"outlier\"].apply(lambda x: 1 if x < 0 else 0)\n",
"df = pd.concat([df, outliers], axis=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The five aforementioned outliers are among the ones detected."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 101,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAEGCAYAAACpXNjrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAACfGklEQVR4nOzdd3xV9f348dfdI8kdGfdmkr3IIGzClikgioir1VasUke/1t1aW/Xr6vfXWrWt/fYr1bq1CjJUUBAQArJnCDuL7HuTm9x7c3P3+P1xwwVkiEJE9DwfDx6Ec+8955DAfd/PeL/folAoFEIgEAgEggtIfLFvQCAQCAQ/PEJwEQgEAsEFJwQXgUAgEFxwQnARCAQCwQUnBBeBQCAQXHDSi30D3xfDhw8nJSXlYt+GQCAQXFKam5vZsmXLKceF4NIrJSWFRYsWXezbEAgEgkvK7NmzT3tcmBYTCAQCwQUnBBeBQCAQXHBCcBEIBALBBSesuQgEAsFF5PP5aGpqwu12X+xbOSulUklqaioymeycni8EF4FAILiImpqaiImJISMjA5FIdLFv57RCoRAWi4WmpiYyMzPP6TXCtJhAIBBcRG63m7i4uO9tYAEQiUTExcV9o9GVMHIRCAQ/DKEQHFkJ7YchYzSkDLzYd3TOvs+B5Zhveo9CcBEIBD8M9evhvevDQSY6EW5fA1ohMfpiEabFBALBD0N3aziwADjawNV55uf+ANtYLVq0CJPJFPnzzTffzN69ewG4/fbbsdvt3+n9CMFFIBD8MGSMhpwpIJbC2IchPu/U5wR8sPmf8L8jYO3/A0/Pd3+ffWTx4sWYzebTPvavf/0LjUZzzucKBALnfT/CtJhAIPhh0KTAdW+A2wbRBhBLTn1Oyy747Lfhr9c+CymDIHfyd3uf38Brr73Ghx9+CMCcOXOYNGkSd9xxB5988gkAr776Kk6nk9zcXKqqqnjwwQdRKpW8//77J51nwoQJLFy4kNjYWJYuXcpbb72Fz+djwIABPP7440gkEgYOHMj111/Pxo0beeyxxxgyZMh53bswchEIBD8ccjVokk4fWACkyvDI5hiZ6ru5r2+hqqqKRYsW8cEHH/D++++zYMGCM05tXX755RQXF/Pcc8+xdOlSlErlaZ9XU1PDp59+ynvvvcfSpUsRi8V8/PHHADidTkpLS/noo4/OO7CAMHIRCAQ/JkmlcN2bUL8BkgdBv5EX+47OaMeOHUyaNAm1Wg3A5MmT2b59+3mdc9OmTVRVVTFnzhzg+DZoAIlEwtSpU8/vpk8gBBeBQPDjUjAj/OsSZLfbCQaDkT97PJ5v9PpQKMTVV1/NAw88cMpjCoUCieQMI75vQZgWEwgEgu+hIUOGsGrVKlwuF06nk1WrVjF27FgsFgtdXV14vV7Wrl0beX5UVBQ9PWffoFBeXs6KFSuwWCwAWK1Wmpub++T+hZGLQCAQfA8VFRUxe/Zsrr32WiC8oF9aWsrdd9/Ntddei9FoJCsrK/L8q6++mscff/y0C/rH5OTkcO+993LrrbcSDAaRyWQ89thjfdIoURQK/QA3fH8Ls2fPFpqFCQSC79yBAwcoLCy82LdxTk53r2d67xSmxQQCgUBwwQnBRSAQCAQXnBBcBAKBQHDB9Vlwqa2t5aqrror8GjRoEK+//jpWq5W5c+cyZcoU5s6di81mA8Jb5J5++mkmT57MzJkz2bdvX+RcixcvZsqUKUyZMoXFixdHjldVVTFz5kwmT57M008/zbHlozNdQyAQCATfjT4LLllZWSxdupSlS5eyaNEiVCoVkydPZv78+ZSXl7Ny5UrKy8uZP38+ABUVFdTX17Ny5UqeeuopnnjiCSAcKF566SU++OADFixYwEsvvRQJFk888QRPPfUUK1eupL6+noqKCoAzXkMgEAgE343vZFps06ZNpKWlkZKSwurVq5k1axYAs2bNYtWqVQCR4yKRiLKyMux2O2azmQ0bNjBq1Ch0Oh1arZZRo0axfv16zGYzDoeDsrIyRCIRs2bNYvXq1Sed66vXEAgEAsF34zsJLsuWLeOKK64AwGKxYDAYAEhISIgk85hMJhITEyOvSUxMxGQynXLcaDSe9vix55/tGgKBQCA4vYqKCqZOnRqZYTpffR5cvF4va9as4fLLLz/lMZFI1Ocd2L6LawgEAsGlLBAI8OSTT/LKK6+wbNkyPvnkE6qrq8/rnH0eXCoqKigqKiI+Ph6AuLi4SM8Bs9lMbGwsEB6RtLW1RV7X1taG0Wg85bjJZDrt8WPPP9s1BAKB4FK3ZFczo/5nDZm/Xcao/1nDkl3nX76lsrKS9PR00tLSkMvlzJgxI7LM8G31eXBZtmwZM2YcLxI3YcIElixZAsCSJUuYOHHiScdDoRC7d+8mJiYGg8HA6NGj2bBhAzabDZvNxoYNGxg9ejQGg4Ho6Gh2795NKBQ67bm+eg2BQCC4lC3Z1cwji/bSbHURApqtLh5ZtPe8A8yZlh/OR5/WFnM6nWzcuJEnn3wycmzevHnce++9LFy4kOTkZF588UUAxo0bx7p165g8eTIqlYpnn30WAJ1Ox1133RUpEX333Xej0+kAePzxx3nkkUdwu92MHTuWsWPHnvUaAoFAcCn784pDuHwnd4l0+QL8ecUhZg288PXBzkefBhe1Ws2WLVtOOqbX63njjTdOea5IJOLxxx8/7XnmzJkTCS4nKikpiXRkO5drCAQCwaWsxer6RsfP1ZmWH86HkKEvEAgEl4hk3ek7Z57p+LkqKSmhvr6exsZGvF4vy5YtY8KECed1TiG4CAQCwSXioan5qGQnN/RSySQ8NDX/vM4rlUp57LHHuO2225g+fTrTpk0jNzf3/M55Xq8WCAQCwXfm2LrKn1ccosXqIlmn4qGp+RdkvWXcuHGMGzfuvM9zjBBcBAKB4BIya2DK927x/nSEaTGBQCAQXHBCcBEIBALBBScEF4FAIBBccEJwEQgEAsEFJwQXgUAgEFxwQnARCASCH7lHHnmE8vLySGuUC0EILgKBQPAjN3v2bF555ZULek4huAgEgm/G74Ng8GLfxY9X5QfwQjE8oQv/XvnBeZ9y6NChaLXa87+3EwjBRSAQnLvq1fDPcnj7GjAduNh38+NT+QF8fA/YGoFQ+PeP77kgAeZCE4KLQCA4N34ffPobsByB2jWwf/HFvqMfn9VPgu8rFZB9rvDx7xkhuAgE56DN5mJjdQctVufFvpWLRywBbdrxPysv7DSK4BzYmr7Z8YtIqC0mEJzgYKudmnYHRclaMuKjgHBgmffmDiqbbeQZonn1lqGkxarPfqJgEAI+kCm+g7v+jojFMPVZ2LcIlDooveFi39GPjza1d0rsNMe/Z4SRi0DQ63BbNzf+azN3v7uLW1/fRnNvA6bq9h4qm23h55gdHDE7zn6izjr44Cb4x1DY/1Ff3/Z3y1gIEx6FkXdDVNzFvpsfn4mPgewrvVtkqvDx83D//fdzww03UFdXx9ixY1mwYMF5nQ/6eORit9v5/e9/z+HDhxGJRDz77LNkZmZy33330dzcTEpKCi+++CJarZZQKMQzzzzDunXrUCqV/M///A9FRUUALF68mH/+858A3HnnnVx99dUAVFVVRdocjxs3jkcffRSRSITVaj3tNQSCs6nrcNDl9AFQ29FDc5eLFJ2KzLgocgxRVJt7SNOryUqIOvuJDi4L/wL46FeQPkp4IxZcGKXXhX9f/WR4KkybGg4sx45/S88///wFuLmT9enI5ZlnnmHMmDF89tlnLF26lOzsbObPn095eTkrV66kvLyc+fPnA1BRUUF9fT0rV67kqaee4oknngDAarXy0ksv8cEHH7BgwQJeeuklbLbwp8gnnniCp556ipUrV1JfX09FRQXAGa8hEJxNYbKW/kkxAIzJjSe7N4ik6FX8+5ahvPrzIbz1i2FkxH1NcFFqjn+tSQWpvK9uWfBjVHod3FcFT1jDv59nYOkrfRZcuru72bZtG3PmzAFALpej0WhYvXo1s2bNAmDWrFmsWrUKIHJcJBJRVlaG3W7HbDazYcMGRo0ahU6nQ6vVMmrUKNavX4/ZbMb
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x=\"Gr Liv Area\", y=\"SalePrice\", hue=\"outlier\", s=15, data=df);"
]
},
2024-07-10 01:31:28 +02:00
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We concur with the notes from the paper and remove only the sales of houses with more than 4000 square feet."
]
},
2021-05-25 08:22:14 +02:00
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 102,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"# Remove the outliers.\n",
2024-07-10 01:31:28 +02:00
"df = df[df[\"Gr Liv Area\"] <= 4000]"
2021-05-25 08:22:14 +02:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Save the Results"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Save the Data"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 103,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"# Re-order the columns for convenience.\n",
"final_columns = (\n",
" sorted(set(list(ALL_COLUMNS.keys()) + new_variables) & set(df.columns))\n",
" + TARGET_VARIABLES\n",
")\n",
"df = df[final_columns]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Discarding useless and adding new predictors changed the final dataset significantly."
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 104,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2024-07-10 01:31:28 +02:00
"(2893, 109)"
2021-05-25 08:22:14 +02:00
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 104,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 105,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>1st Flr SF</th>\n",
" <th>1st Flr SF (box-cox-0)</th>\n",
" <th>2nd Flr SF</th>\n",
" <th>3Ssn Porch</th>\n",
" <th>Bedroom AbvGr</th>\n",
" <th>Bsmt Cond</th>\n",
" <th>Bsmt Exposure</th>\n",
" <th>Bsmt Full Bath</th>\n",
" <th>Bsmt Half Bath</th>\n",
" <th>Bsmt Qual</th>\n",
" <th>Bsmt Unf SF</th>\n",
" <th>BsmtFin SF 1</th>\n",
" <th>BsmtFin SF 2</th>\n",
" <th>BsmtFin Type 1</th>\n",
" <th>BsmtFin Type 2</th>\n",
" <th>Electrical</th>\n",
" <th>Enclosed Porch</th>\n",
" <th>Fence</th>\n",
" <th>Fireplace Qu</th>\n",
" <th>Fireplaces</th>\n",
" <th>Full Bath</th>\n",
" <th>Functional</th>\n",
" <th>Garage Area</th>\n",
" <th>Garage Cars</th>\n",
" <th>Garage Cond</th>\n",
" <th>Garage Finish</th>\n",
" <th>Garage Qual</th>\n",
" <th>Gr Liv Area</th>\n",
" <th>Gr Liv Area (box-cox-0)</th>\n",
" <th>Half Bath</th>\n",
" <th>Kitchen AbvGr</th>\n",
" <th>Kitchen Qual</th>\n",
" <th>Land Slope</th>\n",
" <th>Lot Area</th>\n",
" <th>Lot Area (box-cox-0.1)</th>\n",
" <th>Lot Shape</th>\n",
" <th>Low Qual Fin SF</th>\n",
" <th>Mas Vnr Area</th>\n",
" <th>Misc Val</th>\n",
" <th>Mo Sold</th>\n",
" <th>Open Porch SF</th>\n",
" <th>Overall Cond</th>\n",
" <th>Overall Qual</th>\n",
" <th>Paved Drive</th>\n",
" <th>Pool Area</th>\n",
" <th>Pool QC</th>\n",
" <th>Screen Porch</th>\n",
" <th>TotRms AbvGrd</th>\n",
" <th>Total Bath</th>\n",
" <th>Total Bsmt SF</th>\n",
" <th>Total Porch SF</th>\n",
" <th>Total SF</th>\n",
" <th>Total SF (box-cox-0.2)</th>\n",
" <th>Utilities</th>\n",
" <th>Wood Deck SF</th>\n",
" <th>abnormal_sale</th>\n",
" <th>air_cond</th>\n",
" <th>build_type_1Fam</th>\n",
" <th>build_type_2Fam</th>\n",
" <th>build_type_Twnhs</th>\n",
" <th>found_BrkTil</th>\n",
" <th>found_CBlock</th>\n",
" <th>found_PConc</th>\n",
" <th>has 2nd Flr</th>\n",
" <th>has Bsmt</th>\n",
" <th>has Fireplace</th>\n",
" <th>has Garage</th>\n",
" <th>has Pool</th>\n",
" <th>has Porch</th>\n",
" <th>major_street</th>\n",
" <th>new_home</th>\n",
" <th>nhood_Blmngtn</th>\n",
" <th>nhood_Blueste</th>\n",
" <th>nhood_BrDale</th>\n",
" <th>nhood_BrkSide</th>\n",
" <th>nhood_ClearCr</th>\n",
" <th>nhood_CollgCr</th>\n",
" <th>nhood_Crawfor</th>\n",
" <th>nhood_Edwards</th>\n",
" <th>nhood_Gilbert</th>\n",
" <th>nhood_Greens</th>\n",
" <th>nhood_GrnHill</th>\n",
" <th>nhood_IDOTRR</th>\n",
" <th>nhood_Landmrk</th>\n",
" <th>nhood_MeadowV</th>\n",
" <th>nhood_Mitchel</th>\n",
" <th>nhood_NPkVill</th>\n",
" <th>nhood_NWAmes</th>\n",
" <th>nhood_Names</th>\n",
" <th>nhood_NoRidge</th>\n",
" <th>nhood_NridgHt</th>\n",
" <th>nhood_OldTown</th>\n",
" <th>nhood_SWISU</th>\n",
" <th>nhood_Sawyer</th>\n",
" <th>nhood_SawyerW</th>\n",
" <th>nhood_Somerst</th>\n",
" <th>nhood_StoneBr</th>\n",
" <th>nhood_Timber</th>\n",
" <th>nhood_Veenker</th>\n",
" <th>park</th>\n",
" <th>partial_sale</th>\n",
" <th>railway</th>\n",
" <th>recently_built</th>\n",
" <th>recently_remodeled</th>\n",
" <th>remodeled</th>\n",
" <th>years_since_built</th>\n",
" <th>years_since_remodeled</th>\n",
" <th>SalePrice</th>\n",
" <th>SalePrice (box-cox-0)</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>526301100</th>\n",
" <td>1656.0</td>\n",
" <td>7.412160</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>Gd</td>\n",
" <td>Gd</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>TA</td>\n",
" <td>441.0</td>\n",
" <td>639.0</td>\n",
" <td>0.0</td>\n",
" <td>BLQ</td>\n",
" <td>Unf</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>Gd</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>Typ</td>\n",
" <td>528.0</td>\n",
" <td>2</td>\n",
" <td>TA</td>\n",
" <td>Fin</td>\n",
" <td>TA</td>\n",
" <td>1656.0</td>\n",
" <td>7.412160</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Gtl</td>\n",
" <td>31770.0</td>\n",
" <td>18.196923</td>\n",
" <td>IR1</td>\n",
" <td>0.0</td>\n",
" <td>112.0</td>\n",
" <td>0.0</td>\n",
" <td>5</td>\n",
" <td>62.0</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>P</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>0.0</td>\n",
" <td>7</td>\n",
" <td>2.0</td>\n",
" <td>1080.0</td>\n",
" <td>272.0</td>\n",
" <td>2736.0</td>\n",
" <td>19.344072</td>\n",
" <td>AllPub</td>\n",
" <td>210.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>50</td>\n",
" <td>50</td>\n",
" <td>215000.0</td>\n",
" <td>12.278393</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <th>526350040</th>\n",
" <td>896.0</td>\n",
" <td>6.797940</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>TA</td>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>TA</td>\n",
" <td>270.0</td>\n",
" <td>468.0</td>\n",
" <td>144.0</td>\n",
" <td>Rec</td>\n",
" <td>LwQ</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>MnPrv</td>\n",
" <td>NA</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>Typ</td>\n",
" <td>730.0</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Unf</td>\n",
" <td>TA</td>\n",
" <td>896.0</td>\n",
" <td>6.797940</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Gtl</td>\n",
" <td>11622.0</td>\n",
" <td>15.499290</td>\n",
" <td>Reg</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>6</td>\n",
" <td>0.0</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>Y</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>120.0</td>\n",
" <td>5</td>\n",
" <td>1.0</td>\n",
" <td>882.0</td>\n",
" <td>260.0</td>\n",
" <td>1778.0</td>\n",
" <td>17.333478</td>\n",
" <td>AllPub</td>\n",
" <td>140.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>49</td>\n",
" <td>49</td>\n",
" <td>105000.0</td>\n",
" <td>11.561716</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <th>526351010</th>\n",
" <td>1329.0</td>\n",
" <td>7.192182</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>TA</td>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>TA</td>\n",
" <td>406.0</td>\n",
" <td>923.0</td>\n",
" <td>0.0</td>\n",
" <td>ALQ</td>\n",
" <td>Unf</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>NA</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>Typ</td>\n",
" <td>312.0</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Unf</td>\n",
" <td>TA</td>\n",
" <td>1329.0</td>\n",
" <td>7.192182</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Gd</td>\n",
" <td>Gtl</td>\n",
" <td>14267.0</td>\n",
" <td>16.027549</td>\n",
" <td>IR1</td>\n",
" <td>0.0</td>\n",
" <td>108.0</td>\n",
" <td>12500.0</td>\n",
" <td>6</td>\n",
" <td>36.0</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>Y</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>0.0</td>\n",
" <td>6</td>\n",
" <td>1.5</td>\n",
" <td>1329.0</td>\n",
" <td>429.0</td>\n",
" <td>2658.0</td>\n",
" <td>19.203658</td>\n",
" <td>AllPub</td>\n",
" <td>393.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>52</td>\n",
" <td>52</td>\n",
" <td>172000.0</td>\n",
" <td>12.055250</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <th>526353030</th>\n",
" <td>2110.0</td>\n",
" <td>7.654443</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>TA</td>\n",
" <td>No</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>TA</td>\n",
" <td>1045.0</td>\n",
" <td>1065.0</td>\n",
" <td>0.0</td>\n",
" <td>ALQ</td>\n",
" <td>Unf</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>TA</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>Typ</td>\n",
" <td>522.0</td>\n",
" <td>2</td>\n",
" <td>TA</td>\n",
" <td>Fin</td>\n",
" <td>TA</td>\n",
" <td>2110.0</td>\n",
" <td>7.654443</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Ex</td>\n",
" <td>Gtl</td>\n",
" <td>11160.0</td>\n",
" <td>15.396064</td>\n",
" <td>Reg</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>5</td>\n",
" <td>7</td>\n",
" <td>Y</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>0.0</td>\n",
" <td>8</td>\n",
" <td>3.5</td>\n",
" <td>2110.0</td>\n",
" <td>0.0</td>\n",
" <td>4220.0</td>\n",
" <td>21.548042</td>\n",
" <td>AllPub</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>42</td>\n",
" <td>42</td>\n",
" <td>244000.0</td>\n",
" <td>12.404924</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>527105010</th>\n",
" <td>928.0</td>\n",
" <td>6.833032</td>\n",
" <td>701.0</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>TA</td>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>Gd</td>\n",
" <td>137.0</td>\n",
" <td>791.0</td>\n",
" <td>0.0</td>\n",
" <td>GLQ</td>\n",
" <td>Unf</td>\n",
" <td>SBrkr</td>\n",
" <td>0.0</td>\n",
" <td>MnPrv</td>\n",
" <td>TA</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>Typ</td>\n",
" <td>482.0</td>\n",
" <td>2</td>\n",
" <td>TA</td>\n",
" <td>Fin</td>\n",
" <td>TA</td>\n",
" <td>1629.0</td>\n",
" <td>7.395722</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>TA</td>\n",
" <td>Gtl</td>\n",
" <td>13830.0</td>\n",
" <td>15.946705</td>\n",
" <td>IR1</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>34.0</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>Y</td>\n",
" <td>0.0</td>\n",
" <td>NA</td>\n",
" <td>0.0</td>\n",
" <td>6</td>\n",
" <td>2.5</td>\n",
" <td>928.0</td>\n",
" <td>246.0</td>\n",
" <td>2557.0</td>\n",
" <td>19.016856</td>\n",
" <td>AllPub</td>\n",
" <td>212.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>13</td>\n",
" <td>12</td>\n",
" <td>189900.0</td>\n",
" <td>12.154253</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 1st Flr SF 1st Flr SF (box-cox-0) 2nd Flr SF 3Ssn Porch \\\n",
"Order PID \n",
"1 526301100 1656.0 7.412160 0.0 0.0 \n",
"2 526350040 896.0 6.797940 0.0 0.0 \n",
"3 526351010 1329.0 7.192182 0.0 0.0 \n",
"4 526353030 2110.0 7.654443 0.0 0.0 \n",
"5 527105010 928.0 6.833032 701.0 0.0 \n",
"\n",
" Bedroom AbvGr Bsmt Cond Bsmt Exposure Bsmt Full Bath \\\n",
"Order PID \n",
"1 526301100 3 Gd Gd 1 \n",
"2 526350040 2 TA No 0 \n",
"3 526351010 3 TA No 0 \n",
"4 526353030 3 TA No 1 \n",
"5 527105010 3 TA No 0 \n",
"\n",
" Bsmt Half Bath Bsmt Qual Bsmt Unf SF BsmtFin SF 1 \\\n",
"Order PID \n",
"1 526301100 0 TA 441.0 639.0 \n",
"2 526350040 0 TA 270.0 468.0 \n",
"3 526351010 0 TA 406.0 923.0 \n",
"4 526353030 0 TA 1045.0 1065.0 \n",
"5 527105010 0 Gd 137.0 791.0 \n",
"\n",
" BsmtFin SF 2 BsmtFin Type 1 BsmtFin Type 2 Electrical \\\n",
"Order PID \n",
"1 526301100 0.0 BLQ Unf SBrkr \n",
"2 526350040 144.0 Rec LwQ SBrkr \n",
"3 526351010 0.0 ALQ Unf SBrkr \n",
"4 526353030 0.0 ALQ Unf SBrkr \n",
"5 527105010 0.0 GLQ Unf SBrkr \n",
"\n",
" Enclosed Porch Fence Fireplace Qu Fireplaces Full Bath \\\n",
"Order PID \n",
"1 526301100 0.0 NA Gd 2 1 \n",
"2 526350040 0.0 MnPrv NA 0 1 \n",
"3 526351010 0.0 NA NA 0 1 \n",
"4 526353030 0.0 NA TA 2 2 \n",
"5 527105010 0.0 MnPrv TA 1 2 \n",
"\n",
" Functional Garage Area Garage Cars Garage Cond \\\n",
"Order PID \n",
"1 526301100 Typ 528.0 2 TA \n",
"2 526350040 Typ 730.0 1 TA \n",
"3 526351010 Typ 312.0 1 TA \n",
"4 526353030 Typ 522.0 2 TA \n",
"5 527105010 Typ 482.0 2 TA \n",
"\n",
" Garage Finish Garage Qual Gr Liv Area \\\n",
"Order PID \n",
"1 526301100 Fin TA 1656.0 \n",
"2 526350040 Unf TA 896.0 \n",
"3 526351010 Unf TA 1329.0 \n",
"4 526353030 Fin TA 2110.0 \n",
"5 527105010 Fin TA 1629.0 \n",
"\n",
" Gr Liv Area (box-cox-0) Half Bath Kitchen AbvGr \\\n",
"Order PID \n",
"1 526301100 7.412160 0 1 \n",
"2 526350040 6.797940 0 1 \n",
"3 526351010 7.192182 1 1 \n",
"4 526353030 7.654443 1 1 \n",
"5 527105010 7.395722 1 1 \n",
"\n",
" Kitchen Qual Land Slope Lot Area Lot Area (box-cox-0.1) \\\n",
"Order PID \n",
"1 526301100 TA Gtl 31770.0 18.196923 \n",
"2 526350040 TA Gtl 11622.0 15.499290 \n",
"3 526351010 Gd Gtl 14267.0 16.027549 \n",
"4 526353030 Ex Gtl 11160.0 15.396064 \n",
"5 527105010 TA Gtl 13830.0 15.946705 \n",
"\n",
" Lot Shape Low Qual Fin SF Mas Vnr Area Misc Val Mo Sold \\\n",
"Order PID \n",
"1 526301100 IR1 0.0 112.0 0.0 5 \n",
"2 526350040 Reg 0.0 0.0 0.0 6 \n",
"3 526351010 IR1 0.0 108.0 12500.0 6 \n",
"4 526353030 Reg 0.0 0.0 0.0 4 \n",
"5 527105010 IR1 0.0 0.0 0.0 3 \n",
"\n",
" Open Porch SF Overall Cond Overall Qual Paved Drive \\\n",
"Order PID \n",
"1 526301100 62.0 5 6 P \n",
"2 526350040 0.0 6 5 Y \n",
"3 526351010 36.0 6 6 Y \n",
"4 526353030 0.0 5 7 Y \n",
"5 527105010 34.0 5 5 Y \n",
"\n",
" Pool Area Pool QC Screen Porch TotRms AbvGrd Total Bath \\\n",
"Order PID \n",
"1 526301100 0.0 NA 0.0 7 2.0 \n",
"2 526350040 0.0 NA 120.0 5 1.0 \n",
"3 526351010 0.0 NA 0.0 6 1.5 \n",
"4 526353030 0.0 NA 0.0 8 3.5 \n",
"5 527105010 0.0 NA 0.0 6 2.5 \n",
"\n",
" Total Bsmt SF Total Porch SF Total SF \\\n",
"Order PID \n",
"1 526301100 1080.0 272.0 2736.0 \n",
"2 526350040 882.0 260.0 1778.0 \n",
"3 526351010 1329.0 429.0 2658.0 \n",
"4 526353030 2110.0 0.0 4220.0 \n",
"5 527105010 928.0 246.0 2557.0 \n",
"\n",
" Total SF (box-cox-0.2) Utilities Wood Deck SF \\\n",
"Order PID \n",
"1 526301100 19.344072 AllPub 210.0 \n",
"2 526350040 17.333478 AllPub 140.0 \n",
"3 526351010 19.203658 AllPub 393.0 \n",
"4 526353030 21.548042 AllPub 0.0 \n",
"5 527105010 19.016856 AllPub 212.0 \n",
"\n",
" abnormal_sale air_cond build_type_1Fam build_type_2Fam \\\n",
"Order PID \n",
"1 526301100 0 1 1 0 \n",
"2 526350040 0 1 1 0 \n",
"3 526351010 0 1 1 0 \n",
"4 526353030 0 1 1 0 \n",
"5 527105010 0 1 1 0 \n",
"\n",
" build_type_Twnhs found_BrkTil found_CBlock found_PConc \\\n",
"Order PID \n",
"1 526301100 0 0 1 0 \n",
"2 526350040 0 0 1 0 \n",
"3 526351010 0 0 1 0 \n",
"4 526353030 0 0 1 0 \n",
"5 527105010 0 0 0 1 \n",
"\n",
" has 2nd Flr has Bsmt has Fireplace has Garage has Pool \\\n",
"Order PID \n",
"1 526301100 0 1 1 1 0 \n",
"2 526350040 0 1 0 1 0 \n",
"3 526351010 0 1 0 1 0 \n",
"4 526353030 0 1 1 1 0 \n",
"5 527105010 1 1 1 1 0 \n",
"\n",
" has Porch major_street new_home nhood_Blmngtn \\\n",
"Order PID \n",
"1 526301100 1 0 0 0 \n",
"2 526350040 1 1 0 0 \n",
"3 526351010 1 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 1 0 0 0 \n",
"\n",
" nhood_Blueste nhood_BrDale nhood_BrkSide nhood_ClearCr \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_CollgCr nhood_Crawfor nhood_Edwards nhood_Gilbert \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 1 \n",
"\n",
" nhood_Greens nhood_GrnHill nhood_IDOTRR nhood_Landmrk \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_MeadowV nhood_Mitchel nhood_NPkVill nhood_NWAmes \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_Names nhood_NoRidge nhood_NridgHt nhood_OldTown \\\n",
"Order PID \n",
"1 526301100 1 0 0 0 \n",
"2 526350040 1 0 0 0 \n",
"3 526351010 1 0 0 0 \n",
"4 526353030 1 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_SWISU nhood_Sawyer nhood_SawyerW nhood_Somerst \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" nhood_StoneBr nhood_Timber nhood_Veenker park \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" partial_sale railway recently_built recently_remodeled \\\n",
"Order PID \n",
"1 526301100 0 0 0 0 \n",
"2 526350040 0 0 0 0 \n",
"3 526351010 0 0 0 0 \n",
"4 526353030 0 0 0 0 \n",
"5 527105010 0 0 0 0 \n",
"\n",
" remodeled years_since_built years_since_remodeled \\\n",
"Order PID \n",
"1 526301100 0 50 50 \n",
"2 526350040 0 49 49 \n",
"3 526351010 0 52 52 \n",
"4 526353030 0 42 42 \n",
"5 527105010 1 13 12 \n",
"\n",
" SalePrice SalePrice (box-cox-0) \n",
"Order PID \n",
"1 526301100 215000.0 12.278393 \n",
"2 526350040 105000.0 11.561716 \n",
"3 526351010 172000.0 12.055250 \n",
"4 526353030 244000.0 12.404924 \n",
"5 527105010 189900.0 12.154253 "
]
},
2024-07-10 01:31:28 +02:00
"execution_count": 105,
2021-05-25 08:22:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
2024-07-10 01:31:28 +02:00
"execution_count": 106,
2021-05-25 08:22:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"df.to_csv(\"data/data_clean_with_transformations_and_factors.csv\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "ames-housing",
2021-05-25 08:22:14 +02:00
"language": "python",
"name": "ames-housing"
2021-05-25 08:22:14 +02:00
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.4"
2021-05-25 08:22:14 +02:00
}
},
"nbformat": 4,
"nbformat_minor": 4
}