ames-housing/4_predictive_models.ipynb

6368 lines
216 KiB
Text
Raw Normal View History

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Predictive Models"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## \"Housekeeping\""
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2018-09-05 22:14:29 CEST\n",
"\n",
"CPython 3.6.5\n",
"IPython 6.5.0\n",
"\n",
"numpy 1.15.1\n",
"pandas 0.23.4\n",
"sklearn 0.20rc1\n"
]
}
],
"source": [
"% load_ext watermark\n",
"% watermark -d -t -v -z -p numpy,pandas,sklearn"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"from sklearn.ensemble import RandomForestRegressor\n",
"from sklearn.linear_model import Lasso, LinearRegression, Ridge\n",
"from sklearn.metrics import (\n",
" make_scorer,\n",
" mean_absolute_error,\n",
" mean_squared_error,\n",
" r2_score,\n",
")\n",
"from sklearn.model_selection import GridSearchCV, KFold\n",
"from sklearn.svm import SVR\n",
"from sklearn.utils import shuffle\n",
"from tqdm import tqdm_notebook as progress_bar\n",
"\n",
"from utils import (\n",
" CONTINUOUS_VARIABLES,\n",
" DISCRETE_VARIABLES,\n",
" NOMINAL_VARIABLES,\n",
" ORDINAL_VARIABLES,\n",
" TARGET_VARIABLES,\n",
" bias_score,\n",
" encode_ordinals,\n",
" load_clean_data,\n",
" max_deviation,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"random_state = np.random.RandomState(42)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"pd.set_option(\"display.max_columns\", 250)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"%load_ext blackcellmagic"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load the Data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1. Original Data\n",
"\n",
"The DataFrame `df1` holds the cleaned data from notebook 1 with the all the nominal and ordinal features automatically translated to factor variables and ordered integer values."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"df1 = load_clean_data(\"data/data_clean.csv\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This cell basically **replaces** all the manual work that went into generating new and identifying \"interesting\" features in notebooks 2 and 3."
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"df1 = pd.concat([\n",
" df1[CONTINUOUS_VARIABLES + DISCRETE_VARIABLES + ORDINAL_VARIABLES + TARGET_VARIABLES],\n",
" pd.get_dummies(df1[NOMINAL_VARIABLES], dtype=int),\n",
"], axis=1)\n",
"# Re-order the columns for convenience.\n",
"df1 = df1[sorted(set(df1.columns) - set(TARGET_VARIABLES)) + TARGET_VARIABLES]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"df1 = encode_ordinals(df1)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"df1 = shuffle(df1, random_state=random_state)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"MultiIndex: 2898 entries, (144, 535153070) to (867, 907253130)\n",
"Columns: 248 entries, 1st Flr SF to SalePrice\n",
"dtypes: float64(19), int64(229)\n",
"memory usage: 5.5 MB\n"
]
}
],
"source": [
"df1.info()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>1st Flr SF</th>\n",
" <th>2nd Flr SF</th>\n",
" <th>3Ssn Porch</th>\n",
" <th>Alley_Grvl</th>\n",
" <th>Alley_NA</th>\n",
" <th>Alley_Pave</th>\n",
" <th>Bedroom AbvGr</th>\n",
" <th>Bldg Type_1Fam</th>\n",
" <th>Bldg Type_2FmCon</th>\n",
" <th>Bldg Type_Duplx</th>\n",
" <th>Bldg Type_TwnhsE</th>\n",
" <th>Bldg Type_TwnhsI</th>\n",
" <th>Bsmt Cond</th>\n",
" <th>Bsmt Exposure</th>\n",
" <th>Bsmt Full Bath</th>\n",
" <th>Bsmt Half Bath</th>\n",
" <th>Bsmt Qual</th>\n",
" <th>Bsmt Unf SF</th>\n",
" <th>BsmtFin SF 1</th>\n",
" <th>BsmtFin SF 2</th>\n",
" <th>BsmtFin Type 1</th>\n",
" <th>BsmtFin Type 2</th>\n",
" <th>Central Air_N</th>\n",
" <th>Central Air_Y</th>\n",
" <th>Condition 1_Artery</th>\n",
" <th>Condition 1_Feedr</th>\n",
" <th>Condition 1_Norm</th>\n",
" <th>Condition 1_PosA</th>\n",
" <th>Condition 1_PosN</th>\n",
" <th>Condition 1_RRAe</th>\n",
" <th>Condition 1_RRAn</th>\n",
" <th>Condition 1_RRNe</th>\n",
" <th>Condition 1_RRNn</th>\n",
" <th>Condition 2_Artery</th>\n",
" <th>Condition 2_Feedr</th>\n",
" <th>Condition 2_Norm</th>\n",
" <th>Condition 2_PosA</th>\n",
" <th>Condition 2_PosN</th>\n",
" <th>Condition 2_RRAe</th>\n",
" <th>Condition 2_RRAn</th>\n",
" <th>Condition 2_RRNe</th>\n",
" <th>Condition 2_RRNn</th>\n",
" <th>Electrical</th>\n",
" <th>Enclosed Porch</th>\n",
" <th>Exter Cond</th>\n",
" <th>Exter Qual</th>\n",
" <th>Exterior 1st_AsbShng</th>\n",
" <th>Exterior 1st_AsphShn</th>\n",
" <th>Exterior 1st_BrkComm</th>\n",
" <th>Exterior 1st_BrkFace</th>\n",
" <th>Exterior 1st_CBlock</th>\n",
" <th>Exterior 1st_CemntBd</th>\n",
" <th>Exterior 1st_HdBoard</th>\n",
" <th>Exterior 1st_ImStucc</th>\n",
" <th>Exterior 1st_MetalSd</th>\n",
" <th>Exterior 1st_Other</th>\n",
" <th>Exterior 1st_Plywood</th>\n",
" <th>Exterior 1st_PreCast</th>\n",
" <th>Exterior 1st_Stone</th>\n",
" <th>Exterior 1st_Stucco</th>\n",
" <th>Exterior 1st_VinylSd</th>\n",
" <th>Exterior 1st_Wd Sdng</th>\n",
" <th>Exterior 1st_WdShing</th>\n",
" <th>Exterior 2nd_AsbShng</th>\n",
" <th>Exterior 2nd_AsphShn</th>\n",
" <th>Exterior 2nd_BrkComm</th>\n",
" <th>Exterior 2nd_BrkFace</th>\n",
" <th>Exterior 2nd_CBlock</th>\n",
" <th>Exterior 2nd_CemntBd</th>\n",
" <th>Exterior 2nd_HdBoard</th>\n",
" <th>Exterior 2nd_ImStucc</th>\n",
" <th>Exterior 2nd_MetalSd</th>\n",
" <th>Exterior 2nd_Other</th>\n",
" <th>Exterior 2nd_Plywood</th>\n",
" <th>Exterior 2nd_PreCast</th>\n",
" <th>Exterior 2nd_Stone</th>\n",
" <th>Exterior 2nd_Stucco</th>\n",
" <th>Exterior 2nd_VinylSd</th>\n",
" <th>Exterior 2nd_Wd Sdng</th>\n",
" <th>Exterior 2nd_WdShing</th>\n",
" <th>Fence</th>\n",
" <th>Fireplace Qu</th>\n",
" <th>Fireplaces</th>\n",
" <th>Foundation_BrkTil</th>\n",
" <th>Foundation_CBlock</th>\n",
" <th>Foundation_PConc</th>\n",
" <th>Foundation_Slab</th>\n",
" <th>Foundation_Stone</th>\n",
" <th>Foundation_Wood</th>\n",
" <th>Full Bath</th>\n",
" <th>Functional</th>\n",
" <th>Garage Area</th>\n",
" <th>Garage Cars</th>\n",
" <th>Garage Cond</th>\n",
" <th>Garage Finish</th>\n",
" <th>Garage Qual</th>\n",
" <th>Garage Type_2Types</th>\n",
" <th>Garage Type_Attchd</th>\n",
" <th>Garage Type_Basment</th>\n",
" <th>Garage Type_BuiltIn</th>\n",
" <th>Garage Type_CarPort</th>\n",
" <th>Garage Type_Detchd</th>\n",
" <th>Garage Type_NA</th>\n",
" <th>Gr Liv Area</th>\n",
" <th>Half Bath</th>\n",
" <th>Heating QC</th>\n",
" <th>Heating_Floor</th>\n",
" <th>Heating_GasA</th>\n",
" <th>Heating_GasW</th>\n",
" <th>Heating_Grav</th>\n",
" <th>Heating_OthW</th>\n",
" <th>Heating_Wall</th>\n",
" <th>House Style_1.5Fin</th>\n",
" <th>House Style_1.5Unf</th>\n",
" <th>House Style_1Story</th>\n",
" <th>House Style_2.5Fin</th>\n",
" <th>House Style_2.5Unf</th>\n",
" <th>House Style_2Story</th>\n",
" <th>House Style_SFoyer</th>\n",
" <th>House Style_SLvl</th>\n",
" <th>Kitchen AbvGr</th>\n",
" <th>Kitchen Qual</th>\n",
" <th>Land Contour_Bnk</th>\n",
" <th>Land Contour_HLS</th>\n",
" <th>Land Contour_Low</th>\n",
" <th>Land Contour_Lvl</th>\n",
" <th>Land Slope</th>\n",
" <th>Lot Area</th>\n",
" <th>Lot Config_Corner</th>\n",
" <th>Lot Config_CulDSac</th>\n",
" <th>Lot Config_FR2</th>\n",
" <th>Lot Config_FR3</th>\n",
" <th>Lot Config_Inside</th>\n",
" <th>Lot Shape</th>\n",
" <th>Low Qual Fin SF</th>\n",
" <th>MS SubClass_020</th>\n",
" <th>MS SubClass_030</th>\n",
" <th>MS SubClass_040</th>\n",
" <th>MS SubClass_045</th>\n",
" <th>MS SubClass_050</th>\n",
" <th>MS SubClass_060</th>\n",
" <th>MS SubClass_070</th>\n",
" <th>MS SubClass_075</th>\n",
" <th>MS SubClass_080</th>\n",
" <th>MS SubClass_085</th>\n",
" <th>MS SubClass_090</th>\n",
" <th>MS SubClass_120</th>\n",
" <th>MS SubClass_150</th>\n",
" <th>MS SubClass_160</th>\n",
" <th>MS SubClass_180</th>\n",
" <th>MS SubClass_190</th>\n",
" <th>MS Zoning_A</th>\n",
" <th>MS Zoning_C</th>\n",
" <th>MS Zoning_FV</th>\n",
" <th>MS Zoning_I</th>\n",
" <th>MS Zoning_RH</th>\n",
" <th>MS Zoning_RL</th>\n",
" <th>MS Zoning_RM</th>\n",
" <th>MS Zoning_RP</th>\n",
" <th>Mas Vnr Area</th>\n",
" <th>Mas Vnr Type_BrkCmn</th>\n",
" <th>Mas Vnr Type_BrkFace</th>\n",
" <th>Mas Vnr Type_CBlock</th>\n",
" <th>Mas Vnr Type_None</th>\n",
" <th>Mas Vnr Type_Stone</th>\n",
" <th>Misc Feature_Elev</th>\n",
" <th>Misc Feature_Gar2</th>\n",
" <th>Misc Feature_NA</th>\n",
" <th>Misc Feature_Othr</th>\n",
" <th>Misc Feature_Shed</th>\n",
" <th>Misc Feature_TenC</th>\n",
" <th>Misc Val</th>\n",
" <th>Mo Sold</th>\n",
" <th>Neighborhood_Blmngtn</th>\n",
" <th>Neighborhood_Blueste</th>\n",
" <th>Neighborhood_BrDale</th>\n",
" <th>Neighborhood_BrkSide</th>\n",
" <th>Neighborhood_ClearCr</th>\n",
" <th>Neighborhood_CollgCr</th>\n",
" <th>Neighborhood_Crawfor</th>\n",
" <th>Neighborhood_Edwards</th>\n",
" <th>Neighborhood_Gilbert</th>\n",
" <th>Neighborhood_Greens</th>\n",
" <th>Neighborhood_GrnHill</th>\n",
" <th>Neighborhood_IDOTRR</th>\n",
" <th>Neighborhood_Landmrk</th>\n",
" <th>Neighborhood_MeadowV</th>\n",
" <th>Neighborhood_Mitchel</th>\n",
" <th>Neighborhood_NPkVill</th>\n",
" <th>Neighborhood_NWAmes</th>\n",
" <th>Neighborhood_Names</th>\n",
" <th>Neighborhood_NoRidge</th>\n",
" <th>Neighborhood_NridgHt</th>\n",
" <th>Neighborhood_OldTown</th>\n",
" <th>Neighborhood_SWISU</th>\n",
" <th>Neighborhood_Sawyer</th>\n",
" <th>Neighborhood_SawyerW</th>\n",
" <th>Neighborhood_Somerst</th>\n",
" <th>Neighborhood_StoneBr</th>\n",
" <th>Neighborhood_Timber</th>\n",
" <th>Neighborhood_Veenker</th>\n",
" <th>Open Porch SF</th>\n",
" <th>Overall Cond</th>\n",
" <th>Overall Qual</th>\n",
" <th>Paved Drive</th>\n",
" <th>Pool Area</th>\n",
" <th>Pool QC</th>\n",
" <th>Roof Matl_ClyTile</th>\n",
" <th>Roof Matl_CompShg</th>\n",
" <th>Roof Matl_Membran</th>\n",
" <th>Roof Matl_Metal</th>\n",
" <th>Roof Matl_Roll</th>\n",
" <th>Roof Matl_Tar&amp;Grv</th>\n",
" <th>Roof Matl_WdShake</th>\n",
" <th>Roof Matl_WdShngl</th>\n",
" <th>Roof Style_Flat</th>\n",
" <th>Roof Style_Gable</th>\n",
" <th>Roof Style_Gambrel</th>\n",
" <th>Roof Style_Hip</th>\n",
" <th>Roof Style_Mansard</th>\n",
" <th>Roof Style_Shed</th>\n",
" <th>Sale Condition_Abnorml</th>\n",
" <th>Sale Condition_AdjLand</th>\n",
" <th>Sale Condition_Alloca</th>\n",
" <th>Sale Condition_Family</th>\n",
" <th>Sale Condition_Normal</th>\n",
" <th>Sale Condition_Partial</th>\n",
" <th>Sale Type_COD</th>\n",
" <th>Sale Type_CWD</th>\n",
" <th>Sale Type_Con</th>\n",
" <th>Sale Type_ConLD</th>\n",
" <th>Sale Type_ConLI</th>\n",
" <th>Sale Type_ConLw</th>\n",
" <th>Sale Type_New</th>\n",
" <th>Sale Type_Oth</th>\n",
" <th>Sale Type_VWD</th>\n",
" <th>Sale Type_WD</th>\n",
" <th>Screen Porch</th>\n",
" <th>Street_Grvl</th>\n",
" <th>Street_Pave</th>\n",
" <th>TotRms AbvGrd</th>\n",
" <th>Total Bsmt SF</th>\n",
" <th>Utilities</th>\n",
" <th>Wood Deck SF</th>\n",
" <th>Year Built</th>\n",
" <th>Year Remod/Add</th>\n",
" <th>Yr Sold</th>\n",
" <th>SalePrice</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>144</th>\n",
" <th>535153070</th>\n",
" <td>1194.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1194.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>120.0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>312.0</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1194.0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>8760.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>220.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>1194.0</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" <td>1959</td>\n",
" <td>1959</td>\n",
" <td>2010</td>\n",
" <td>148000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1574</th>\n",
" <th>916380060</th>\n",
" <td>1537.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>482.0</td>\n",
" <td>1036.0</td>\n",
" <td>0.0</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>788.0</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1537.0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>11563.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>258.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>26.0</td>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>1518.0</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" <td>2006</td>\n",
" <td>2007</td>\n",
" <td>2008</td>\n",
" <td>294000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>490</th>\n",
" <th>528290190</th>\n",
" <td>774.0</td>\n",
" <td>656.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>384.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>400.0</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1430.0</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>7750.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>384.0</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" <td>1999</td>\n",
" <td>2000</td>\n",
" <td>2009</td>\n",
" <td>156000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1730</th>\n",
" <th>528218050</th>\n",
" <td>783.0</td>\n",
" <td>701.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>783.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>393.0</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1484.0</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>10237.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>7</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>72.0</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>783.0</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" <td>2005</td>\n",
" <td>2007</td>\n",
" <td>2007</td>\n",
" <td>178900.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2276</th>\n",
" <th>921128030</th>\n",
" <td>1824.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>1824.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>932.0</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1824.0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>12633.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>242.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>9</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>36.0</td>\n",
" <td>4</td>\n",
" <td>9</td>\n",
" <td>2</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>108.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>1824.0</td>\n",
" <td>3</td>\n",
" <td>160.0</td>\n",
" <td>2006</td>\n",
" <td>2007</td>\n",
" <td>2007</td>\n",
" <td>392000.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 1st Flr SF 2nd Flr SF 3Ssn Porch Alley_Grvl Alley_NA \\\n",
"Order PID \n",
"144 535153070 1194.0 0.0 0.0 0 1 \n",
"1574 916380060 1537.0 0.0 0.0 0 1 \n",
"490 528290190 774.0 656.0 0.0 0 1 \n",
"1730 528218050 783.0 701.0 0.0 0 1 \n",
"2276 921128030 1824.0 0.0 0.0 0 1 \n",
"\n",
" Alley_Pave Bedroom AbvGr Bldg Type_1Fam Bldg Type_2FmCon \\\n",
"Order PID \n",
"144 535153070 0 3 1 0 \n",
"1574 916380060 0 3 1 0 \n",
"490 528290190 0 3 1 0 \n",
"1730 528218050 0 3 1 0 \n",
"2276 921128030 0 3 1 0 \n",
"\n",
" Bldg Type_Duplx Bldg Type_TwnhsE Bldg Type_TwnhsI \\\n",
"Order PID \n",
"144 535153070 0 0 0 \n",
"1574 916380060 0 0 0 \n",
"490 528290190 0 0 0 \n",
"1730 528218050 0 0 0 \n",
"2276 921128030 0 0 0 \n",
"\n",
" Bsmt Cond Bsmt Exposure Bsmt Full Bath Bsmt Half Bath \\\n",
"Order PID \n",
"144 535153070 3 1 1 0 \n",
"1574 916380060 3 4 1 0 \n",
"490 528290190 3 1 0 0 \n",
"1730 528218050 3 1 0 0 \n",
"2276 921128030 3 4 0 0 \n",
"\n",
" Bsmt Qual Bsmt Unf SF BsmtFin SF 1 BsmtFin SF 2 \\\n",
"Order PID \n",
"144 535153070 3 1194.0 0.0 0.0 \n",
"1574 916380060 5 482.0 1036.0 0.0 \n",
"490 528290190 4 384.0 0.0 0.0 \n",
"1730 528218050 4 783.0 0.0 0.0 \n",
"2276 921128030 5 1824.0 0.0 0.0 \n",
"\n",
" BsmtFin Type 1 BsmtFin Type 2 Central Air_N Central Air_Y \\\n",
"Order PID \n",
"144 535153070 1 1 0 1 \n",
"1574 916380060 6 1 0 1 \n",
"490 528290190 1 1 0 1 \n",
"1730 528218050 1 1 0 1 \n",
"2276 921128030 1 1 0 1 \n",
"\n",
" Condition 1_Artery Condition 1_Feedr Condition 1_Norm \\\n",
"Order PID \n",
"144 535153070 0 0 1 \n",
"1574 916380060 0 0 1 \n",
"490 528290190 0 0 1 \n",
"1730 528218050 0 0 0 \n",
"2276 921128030 0 0 0 \n",
"\n",
" Condition 1_PosA Condition 1_PosN Condition 1_RRAe \\\n",
"Order PID \n",
"144 535153070 0 0 0 \n",
"1574 916380060 0 0 0 \n",
"490 528290190 0 0 0 \n",
"1730 528218050 0 0 0 \n",
"2276 921128030 1 0 0 \n",
"\n",
" Condition 1_RRAn Condition 1_RRNe Condition 1_RRNn \\\n",
"Order PID \n",
"144 535153070 0 0 0 \n",
"1574 916380060 0 0 0 \n",
"490 528290190 0 0 0 \n",
"1730 528218050 1 0 0 \n",
"2276 921128030 0 0 0 \n",
"\n",
" Condition 2_Artery Condition 2_Feedr Condition 2_Norm \\\n",
"Order PID \n",
"144 535153070 0 0 1 \n",
"1574 916380060 0 0 1 \n",
"490 528290190 0 0 1 \n",
"1730 528218050 0 0 1 \n",
"2276 921128030 0 0 0 \n",
"\n",
" Condition 2_PosA Condition 2_PosN Condition 2_RRAe \\\n",
"Order PID \n",
"144 535153070 0 0 0 \n",
"1574 916380060 0 0 0 \n",
"490 528290190 0 0 0 \n",
"1730 528218050 0 0 0 \n",
"2276 921128030 1 0 0 \n",
"\n",
" Condition 2_RRAn Condition 2_RRNe Condition 2_RRNn \\\n",
"Order PID \n",
"144 535153070 0 0 0 \n",
"1574 916380060 0 0 0 \n",
"490 528290190 0 0 0 \n",
"1730 528218050 0 0 0 \n",
"2276 921128030 0 0 0 \n",
"\n",
" Electrical Enclosed Porch Exter Cond Exter Qual \\\n",
"Order PID \n",
"144 535153070 4 120.0 2 2 \n",
"1574 916380060 4 0.0 2 3 \n",
"490 528290190 4 0.0 2 2 \n",
"1730 528218050 4 0.0 2 3 \n",
"2276 921128030 4 0.0 2 4 \n",
"\n",
" Exterior 1st_AsbShng Exterior 1st_AsphShn \\\n",
"Order PID \n",
"144 535153070 0 0 \n",
"1574 916380060 0 0 \n",
"490 528290190 0 0 \n",
"1730 528218050 0 0 \n",
"2276 921128030 0 0 \n",
"\n",
" Exterior 1st_BrkComm Exterior 1st_BrkFace \\\n",
"Order PID \n",
"144 535153070 0 0 \n",
"1574 916380060 0 0 \n",
"490 528290190 0 0 \n",
"1730 528218050 0 0 \n",
"2276 921128030 0 0 \n",
"\n",
" Exterior 1st_CBlock Exterior 1st_CemntBd \\\n",
"Order PID \n",
"144 535153070 0 0 \n",
"1574 916380060 0 0 \n",
"490 528290190 0 0 \n",
"1730 528218050 0 0 \n",
"2276 921128030 0 0 \n",
"\n",
" Exterior 1st_HdBoard Exterior 1st_ImStucc \\\n",
"Order PID \n",
"144 535153070 0 0 \n",
"1574 916380060 0 0 \n",
"490 528290190 0 0 \n",
"1730 528218050 0 0 \n",
"2276 921128030 0 0 \n",
"\n",
" Exterior 1st_MetalSd Exterior 1st_Other \\\n",
"Order PID \n",
"144 535153070 1 0 \n",
"1574 916380060 0 0 \n",
"490 528290190 0 0 \n",
"1730 528218050 0 0 \n",
"2276 921128030 1 0 \n",
"\n",
" Exterior 1st_Plywood Exterior 1st_PreCast \\\n",
"Order PID \n",
"144 535153070 0 0 \n",
"1574 916380060 0 0 \n",
"490 528290190 0 0 \n",
"1730 528218050 0 0 \n",
"2276 921128030 0 0 \n",
"\n",
" Exterior 1st_Stone Exterior 1st_Stucco \\\n",
"Order PID \n",
"144 535153070 0 0 \n",
"1574 916380060 0 0 \n",
"490 528290190 0 0 \n",
"1730 528218050 0 0 \n",
"2276 921128030 0 0 \n",
"\n",
" Exterior 1st_VinylSd Exterior 1st_Wd Sdng \\\n",
"Order PID \n",
"144 535153070 0 0 \n",
"1574 916380060 1 0 \n",
"490 528290190 1 0 \n",
"1730 528218050 1 0 \n",
"2276 921128030 0 0 \n",
"\n",
" Exterior 1st_WdShing Exterior 2nd_AsbShng \\\n",
"Order PID \n",
"144 535153070 0 0 \n",
"1574 916380060 0 0 \n",
"490 528290190 0 0 \n",
"1730 528218050 0 0 \n",
"2276 921128030 0 0 \n",
"\n",
" Exterior 2nd_AsphShn Exterior 2nd_BrkComm \\\n",
"Order PID \n",
"144 535153070 0 0 \n",
"1574 916380060 0 0 \n",
"490 528290190 0 0 \n",
"1730 528218050 0 0 \n",
"2276 921128030 0 0 \n",
"\n",
" Exterior 2nd_BrkFace Exterior 2nd_CBlock \\\n",
"Order PID \n",
"144 535153070 0 0 \n",
"1574 916380060 0 0 \n",
"490 528290190 0 0 \n",
"1730 528218050 0 0 \n",
"2276 921128030 0 0 \n",
"\n",
" Exterior 2nd_CemntBd Exterior 2nd_HdBoard \\\n",
"Order PID \n",
"144 535153070 0 0 \n",
"1574 916380060 0 0 \n",
"490 528290190 0 0 \n",
"1730 528218050 0 0 \n",
"2276 921128030 0 0 \n",
"\n",
" Exterior 2nd_ImStucc Exterior 2nd_MetalSd \\\n",
"Order PID \n",
"144 535153070 0 1 \n",
"1574 916380060 0 0 \n",
"490 528290190 0 0 \n",
"1730 528218050 0 0 \n",
"2276 921128030 0 1 \n",
"\n",
" Exterior 2nd_Other Exterior 2nd_Plywood \\\n",
"Order PID \n",
"144 535153070 0 0 \n",
"1574 916380060 0 0 \n",
"490 528290190 0 0 \n",
"1730 528218050 0 0 \n",
"2276 921128030 0 0 \n",
"\n",
" Exterior 2nd_PreCast Exterior 2nd_Stone \\\n",
"Order PID \n",
"144 535153070 0 0 \n",
"1574 916380060 0 0 \n",
"490 528290190 0 0 \n",
"1730 528218050 0 0 \n",
"2276 921128030 0 0 \n",
"\n",
" Exterior 2nd_Stucco Exterior 2nd_VinylSd \\\n",
"Order PID \n",
"144 535153070 0 0 \n",
"1574 916380060 0 1 \n",
"490 528290190 0 1 \n",
"1730 528218050 0 1 \n",
"2276 921128030 0 0 \n",
"\n",
" Exterior 2nd_Wd Sdng Exterior 2nd_WdShing Fence \\\n",
"Order PID \n",
"144 535153070 0 0 0 \n",
"1574 916380060 0 0 0 \n",
"490 528290190 0 0 0 \n",
"1730 528218050 0 0 0 \n",
"2276 921128030 0 0 0 \n",
"\n",
" Fireplace Qu Fireplaces Foundation_BrkTil \\\n",
"Order PID \n",
"144 535153070 0 0 0 \n",
"1574 916380060 0 0 0 \n",
"490 528290190 3 1 0 \n",
"1730 528218050 4 1 0 \n",
"2276 921128030 4 1 0 \n",
"\n",
" Foundation_CBlock Foundation_PConc Foundation_Slab \\\n",
"Order PID \n",
"144 535153070 1 0 0 \n",
"1574 916380060 0 1 0 \n",
"490 528290190 0 1 0 \n",
"1730 528218050 0 1 0 \n",
"2276 921128030 0 1 0 \n",
"\n",
" Foundation_Stone Foundation_Wood Full Bath Functional \\\n",
"Order PID \n",
"144 535153070 0 0 1 7 \n",
"1574 916380060 0 0 2 7 \n",
"490 528290190 0 0 2 7 \n",
"1730 528218050 0 0 2 7 \n",
"2276 921128030 0 0 2 7 \n",
"\n",
" Garage Area Garage Cars Garage Cond Garage Finish \\\n",
"Order PID \n",
"144 535153070 312.0 1 3 2 \n",
"1574 916380060 788.0 3 3 3 \n",
"490 528290190 400.0 2 3 2 \n",
"1730 528218050 393.0 2 3 3 \n",
"2276 921128030 932.0 3 3 3 \n",
"\n",
" Garage Qual Garage Type_2Types Garage Type_Attchd \\\n",
"Order PID \n",
"144 535153070 3 0 1 \n",
"1574 916380060 3 0 1 \n",
"490 528290190 3 0 0 \n",
"1730 528218050 3 0 1 \n",
"2276 921128030 3 0 1 \n",
"\n",
" Garage Type_Basment Garage Type_BuiltIn \\\n",
"Order PID \n",
"144 535153070 0 0 \n",
"1574 916380060 0 0 \n",
"490 528290190 0 1 \n",
"1730 528218050 0 0 \n",
"2276 921128030 0 0 \n",
"\n",
" Garage Type_CarPort Garage Type_Detchd Garage Type_NA \\\n",
"Order PID \n",
"144 535153070 0 0 0 \n",
"1574 916380060 0 0 0 \n",
"490 528290190 0 0 0 \n",
"1730 528218050 0 0 0 \n",
"2276 921128030 0 0 0 \n",
"\n",
" Gr Liv Area Half Bath Heating QC Heating_Floor \\\n",
"Order PID \n",
"144 535153070 1194.0 0 2 0 \n",
"1574 916380060 1537.0 0 4 0 \n",
"490 528290190 1430.0 1 4 0 \n",
"1730 528218050 1484.0 1 4 0 \n",
"2276 921128030 1824.0 0 4 0 \n",
"\n",
" Heating_GasA Heating_GasW Heating_Grav Heating_OthW \\\n",
"Order PID \n",
"144 535153070 1 0 0 0 \n",
"1574 916380060 1 0 0 0 \n",
"490 528290190 1 0 0 0 \n",
"1730 528218050 1 0 0 0 \n",
"2276 921128030 1 0 0 0 \n",
"\n",
" Heating_Wall House Style_1.5Fin House Style_1.5Unf \\\n",
"Order PID \n",
"144 535153070 0 0 0 \n",
"1574 916380060 0 0 0 \n",
"490 528290190 0 0 0 \n",
"1730 528218050 0 0 0 \n",
"2276 921128030 0 0 0 \n",
"\n",
" House Style_1Story House Style_2.5Fin House Style_2.5Unf \\\n",
"Order PID \n",
"144 535153070 1 0 0 \n",
"1574 916380060 1 0 0 \n",
"490 528290190 0 0 0 \n",
"1730 528218050 0 0 0 \n",
"2276 921128030 1 0 0 \n",
"\n",
" House Style_2Story House Style_SFoyer House Style_SLvl \\\n",
"Order PID \n",
"144 535153070 0 0 0 \n",
"1574 916380060 0 0 0 \n",
"490 528290190 0 0 1 \n",
"1730 528218050 1 0 0 \n",
"2276 921128030 0 0 0 \n",
"\n",
" Kitchen AbvGr Kitchen Qual Land Contour_Bnk \\\n",
"Order PID \n",
"144 535153070 1 2 0 \n",
"1574 916380060 1 3 0 \n",
"490 528290190 1 2 0 \n",
"1730 528218050 1 3 0 \n",
"2276 921128030 1 4 0 \n",
"\n",
" Land Contour_HLS Land Contour_Low Land Contour_Lvl \\\n",
"Order PID \n",
"144 535153070 0 0 1 \n",
"1574 916380060 1 0 0 \n",
"490 528290190 0 0 1 \n",
"1730 528218050 0 0 1 \n",
"2276 921128030 1 0 0 \n",
"\n",
" Land Slope Lot Area Lot Config_Corner Lot Config_CulDSac \\\n",
"Order PID \n",
"144 535153070 2 8760.0 0 0 \n",
"1574 916380060 2 11563.0 0 0 \n",
"490 528290190 2 7750.0 0 0 \n",
"1730 528218050 2 10237.0 0 0 \n",
"2276 921128030 2 12633.0 0 0 \n",
"\n",
" Lot Config_FR2 Lot Config_FR3 Lot Config_Inside Lot Shape \\\n",
"Order PID \n",
"144 535153070 0 0 1 3 \n",
"1574 916380060 0 0 1 2 \n",
"490 528290190 0 0 1 3 \n",
"1730 528218050 0 0 1 3 \n",
"2276 921128030 0 0 1 2 \n",
"\n",
" Low Qual Fin SF MS SubClass_020 MS SubClass_030 \\\n",
"Order PID \n",
"144 535153070 0.0 1 0 \n",
"1574 916380060 0.0 1 0 \n",
"490 528290190 0.0 0 0 \n",
"1730 528218050 0.0 0 0 \n",
"2276 921128030 0.0 1 0 \n",
"\n",
" MS SubClass_040 MS SubClass_045 MS SubClass_050 \\\n",
"Order PID \n",
"144 535153070 0 0 0 \n",
"1574 916380060 0 0 0 \n",
"490 528290190 0 0 0 \n",
"1730 528218050 0 0 0 \n",
"2276 921128030 0 0 0 \n",
"\n",
" MS SubClass_060 MS SubClass_070 MS SubClass_075 \\\n",
"Order PID \n",
"144 535153070 0 0 0 \n",
"1574 916380060 0 0 0 \n",
"490 528290190 0 0 0 \n",
"1730 528218050 1 0 0 \n",
"2276 921128030 0 0 0 \n",
"\n",
" MS SubClass_080 MS SubClass_085 MS SubClass_090 \\\n",
"Order PID \n",
"144 535153070 0 0 0 \n",
"1574 916380060 0 0 0 \n",
"490 528290190 1 0 0 \n",
"1730 528218050 0 0 0 \n",
"2276 921128030 0 0 0 \n",
"\n",
" MS SubClass_120 MS SubClass_150 MS SubClass_160 \\\n",
"Order PID \n",
"144 535153070 0 0 0 \n",
"1574 916380060 0 0 0 \n",
"490 528290190 0 0 0 \n",
"1730 528218050 0 0 0 \n",
"2276 921128030 0 0 0 \n",
"\n",
" MS SubClass_180 MS SubClass_190 MS Zoning_A MS Zoning_C \\\n",
"Order PID \n",
"144 535153070 0 0 0 0 \n",
"1574 916380060 0 0 0 0 \n",
"490 528290190 0 0 0 0 \n",
"1730 528218050 0 0 0 0 \n",
"2276 921128030 0 0 0 0 \n",
"\n",
" MS Zoning_FV MS Zoning_I MS Zoning_RH MS Zoning_RL \\\n",
"Order PID \n",
"144 535153070 0 0 0 1 \n",
"1574 916380060 0 0 0 1 \n",
"490 528290190 0 0 0 1 \n",
"1730 528218050 0 0 0 1 \n",
"2276 921128030 0 0 0 1 \n",
"\n",
" MS Zoning_RM MS Zoning_RP Mas Vnr Area \\\n",
"Order PID \n",
"144 535153070 0 0 220.0 \n",
"1574 916380060 0 0 258.0 \n",
"490 528290190 0 0 0.0 \n",
"1730 528218050 0 0 0.0 \n",
"2276 921128030 0 0 242.0 \n",
"\n",
" Mas Vnr Type_BrkCmn Mas Vnr Type_BrkFace \\\n",
"Order PID \n",
"144 535153070 0 1 \n",
"1574 916380060 0 0 \n",
"490 528290190 0 0 \n",
"1730 528218050 0 0 \n",
"2276 921128030 0 1 \n",
"\n",
" Mas Vnr Type_CBlock Mas Vnr Type_None Mas Vnr Type_Stone \\\n",
"Order PID \n",
"144 535153070 0 0 0 \n",
"1574 916380060 0 0 1 \n",
"490 528290190 0 1 0 \n",
"1730 528218050 0 1 0 \n",
"2276 921128030 0 0 0 \n",
"\n",
" Misc Feature_Elev Misc Feature_Gar2 Misc Feature_NA \\\n",
"Order PID \n",
"144 535153070 0 0 1 \n",
"1574 916380060 0 0 1 \n",
"490 528290190 0 0 1 \n",
"1730 528218050 0 0 1 \n",
"2276 921128030 0 0 1 \n",
"\n",
" Misc Feature_Othr Misc Feature_Shed Misc Feature_TenC \\\n",
"Order PID \n",
"144 535153070 0 0 0 \n",
"1574 916380060 0 0 0 \n",
"490 528290190 0 0 0 \n",
"1730 528218050 0 0 0 \n",
"2276 921128030 0 0 0 \n",
"\n",
" Misc Val Mo Sold Neighborhood_Blmngtn \\\n",
"Order PID \n",
"144 535153070 0.0 4 0 \n",
"1574 916380060 0.0 4 0 \n",
"490 528290190 0.0 3 0 \n",
"1730 528218050 0.0 7 0 \n",
"2276 921128030 0.0 9 0 \n",
"\n",
" Neighborhood_Blueste Neighborhood_BrDale \\\n",
"Order PID \n",
"144 535153070 0 0 \n",
"1574 916380060 0 0 \n",
"490 528290190 0 0 \n",
"1730 528218050 0 0 \n",
"2276 921128030 0 0 \n",
"\n",
" Neighborhood_BrkSide Neighborhood_ClearCr \\\n",
"Order PID \n",
"144 535153070 0 0 \n",
"1574 916380060 0 0 \n",
"490 528290190 0 0 \n",
"1730 528218050 0 0 \n",
"2276 921128030 0 0 \n",
"\n",
" Neighborhood_CollgCr Neighborhood_Crawfor \\\n",
"Order PID \n",
"144 535153070 0 0 \n",
"1574 916380060 0 0 \n",
"490 528290190 0 0 \n",
"1730 528218050 0 0 \n",
"2276 921128030 0 0 \n",
"\n",
" Neighborhood_Edwards Neighborhood_Gilbert \\\n",
"Order PID \n",
"144 535153070 0 0 \n",
"1574 916380060 0 0 \n",
"490 528290190 0 1 \n",
"1730 528218050 0 1 \n",
"2276 921128030 0 0 \n",
"\n",
" Neighborhood_Greens Neighborhood_GrnHill \\\n",
"Order PID \n",
"144 535153070 0 0 \n",
"1574 916380060 0 0 \n",
"490 528290190 0 0 \n",
"1730 528218050 0 0 \n",
"2276 921128030 0 0 \n",
"\n",
" Neighborhood_IDOTRR Neighborhood_Landmrk \\\n",
"Order PID \n",
"144 535153070 0 0 \n",
"1574 916380060 0 0 \n",
"490 528290190 0 0 \n",
"1730 528218050 0 0 \n",
"2276 921128030 0 0 \n",
"\n",
" Neighborhood_MeadowV Neighborhood_Mitchel \\\n",
"Order PID \n",
"144 535153070 0 0 \n",
"1574 916380060 0 0 \n",
"490 528290190 0 0 \n",
"1730 528218050 0 0 \n",
"2276 921128030 0 0 \n",
"\n",
" Neighborhood_NPkVill Neighborhood_NWAmes \\\n",
"Order PID \n",
"144 535153070 0 0 \n",
"1574 916380060 0 0 \n",
"490 528290190 0 0 \n",
"1730 528218050 0 0 \n",
"2276 921128030 0 0 \n",
"\n",
" Neighborhood_Names Neighborhood_NoRidge \\\n",
"Order PID \n",
"144 535153070 1 0 \n",
"1574 916380060 0 0 \n",
"490 528290190 0 0 \n",
"1730 528218050 0 0 \n",
"2276 921128030 0 0 \n",
"\n",
" Neighborhood_NridgHt Neighborhood_OldTown \\\n",
"Order PID \n",
"144 535153070 0 0 \n",
"1574 916380060 0 0 \n",
"490 528290190 0 0 \n",
"1730 528218050 0 0 \n",
"2276 921128030 0 0 \n",
"\n",
" Neighborhood_SWISU Neighborhood_Sawyer \\\n",
"Order PID \n",
"144 535153070 0 0 \n",
"1574 916380060 0 0 \n",
"490 528290190 0 0 \n",
"1730 528218050 0 0 \n",
"2276 921128030 0 0 \n",
"\n",
" Neighborhood_SawyerW Neighborhood_Somerst \\\n",
"Order PID \n",
"144 535153070 0 0 \n",
"1574 916380060 0 0 \n",
"490 528290190 0 0 \n",
"1730 528218050 0 0 \n",
"2276 921128030 0 0 \n",
"\n",
" Neighborhood_StoneBr Neighborhood_Timber \\\n",
"Order PID \n",
"144 535153070 0 0 \n",
"1574 916380060 0 1 \n",
"490 528290190 0 0 \n",
"1730 528218050 0 0 \n",
"2276 921128030 0 1 \n",
"\n",
" Neighborhood_Veenker Open Porch SF Overall Cond \\\n",
"Order PID \n",
"144 535153070 0 0.0 5 \n",
"1574 916380060 0 26.0 4 \n",
"490 528290190 0 0.0 4 \n",
"1730 528218050 0 72.0 4 \n",
"2276 921128030 0 36.0 4 \n",
"\n",
" Overall Qual Paved Drive Pool Area Pool QC \\\n",
"Order PID \n",
"144 535153070 5 2 0.0 0 \n",
"1574 916380060 7 2 0.0 0 \n",
"490 528290190 6 2 0.0 0 \n",
"1730 528218050 5 2 0.0 0 \n",
"2276 921128030 9 2 0.0 0 \n",
"\n",
" Roof Matl_ClyTile Roof Matl_CompShg Roof Matl_Membran \\\n",
"Order PID \n",
"144 535153070 0 1 0 \n",
"1574 916380060 0 1 0 \n",
"490 528290190 0 1 0 \n",
"1730 528218050 0 1 0 \n",
"2276 921128030 0 1 0 \n",
"\n",
" Roof Matl_Metal Roof Matl_Roll Roof Matl_Tar&Grv \\\n",
"Order PID \n",
"144 535153070 0 0 0 \n",
"1574 916380060 0 0 0 \n",
"490 528290190 0 0 0 \n",
"1730 528218050 0 0 0 \n",
"2276 921128030 0 0 0 \n",
"\n",
" Roof Matl_WdShake Roof Matl_WdShngl Roof Style_Flat \\\n",
"Order PID \n",
"144 535153070 0 0 0 \n",
"1574 916380060 0 0 0 \n",
"490 528290190 0 0 0 \n",
"1730 528218050 0 0 0 \n",
"2276 921128030 0 0 0 \n",
"\n",
" Roof Style_Gable Roof Style_Gambrel Roof Style_Hip \\\n",
"Order PID \n",
"144 535153070 0 0 1 \n",
"1574 916380060 0 0 1 \n",
"490 528290190 1 0 0 \n",
"1730 528218050 1 0 0 \n",
"2276 921128030 0 0 1 \n",
"\n",
" Roof Style_Mansard Roof Style_Shed Sale Condition_Abnorml \\\n",
"Order PID \n",
"144 535153070 0 0 0 \n",
"1574 916380060 0 0 0 \n",
"490 528290190 0 0 0 \n",
"1730 528218050 0 0 0 \n",
"2276 921128030 0 0 0 \n",
"\n",
" Sale Condition_AdjLand Sale Condition_Alloca \\\n",
"Order PID \n",
"144 535153070 0 0 \n",
"1574 916380060 0 0 \n",
"490 528290190 0 0 \n",
"1730 528218050 0 0 \n",
"2276 921128030 0 0 \n",
"\n",
" Sale Condition_Family Sale Condition_Normal \\\n",
"Order PID \n",
"144 535153070 0 1 \n",
"1574 916380060 0 1 \n",
"490 528290190 0 1 \n",
"1730 528218050 0 0 \n",
"2276 921128030 0 0 \n",
"\n",
" Sale Condition_Partial Sale Type_COD Sale Type_CWD \\\n",
"Order PID \n",
"144 535153070 0 0 0 \n",
"1574 916380060 0 0 0 \n",
"490 528290190 0 0 0 \n",
"1730 528218050 1 0 0 \n",
"2276 921128030 1 0 0 \n",
"\n",
" Sale Type_Con Sale Type_ConLD Sale Type_ConLI \\\n",
"Order PID \n",
"144 535153070 0 0 0 \n",
"1574 916380060 0 0 0 \n",
"490 528290190 0 0 0 \n",
"1730 528218050 0 0 0 \n",
"2276 921128030 0 0 0 \n",
"\n",
" Sale Type_ConLw Sale Type_New Sale Type_Oth Sale Type_VWD \\\n",
"Order PID \n",
"144 535153070 0 0 0 0 \n",
"1574 916380060 0 0 0 0 \n",
"490 528290190 0 0 0 0 \n",
"1730 528218050 0 1 0 0 \n",
"2276 921128030 0 1 0 0 \n",
"\n",
" Sale Type_WD Screen Porch Street_Grvl Street_Pave \\\n",
"Order PID \n",
"144 535153070 1 0.0 0 1 \n",
"1574 916380060 1 0.0 0 1 \n",
"490 528290190 1 0.0 0 1 \n",
"1730 528218050 0 0.0 0 1 \n",
"2276 921128030 0 108.0 0 1 \n",
"\n",
" TotRms AbvGrd Total Bsmt SF Utilities Wood Deck SF \\\n",
"Order PID \n",
"144 535153070 6 1194.0 3 0.0 \n",
"1574 916380060 8 1518.0 3 0.0 \n",
"490 528290190 7 384.0 3 0.0 \n",
"1730 528218050 8 783.0 3 0.0 \n",
"2276 921128030 8 1824.0 3 160.0 \n",
"\n",
" Year Built Year Remod/Add Yr Sold SalePrice \n",
"Order PID \n",
"144 535153070 1959 1959 2010 148000.0 \n",
"1574 916380060 2006 2007 2008 294000.0 \n",
"490 528290190 1999 2000 2009 156000.0 \n",
"1730 528218050 2005 2007 2007 178900.0 \n",
"2276 921128030 2006 2007 2007 392000.0 "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Obtain the raw numpy arrays:"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"X1 = df1.drop(columns=\"SalePrice\").values\n",
"y1 = df1[\"SalePrice\"].values"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2. Improved Data\n",
"\n",
"The DataFrame `df2` holds the data as manually processed in notebooks 2 and 3.\n",
"\n",
"New features like the *years_since_\\** columns were generated or derived from other variables like *has 2nd Flr* (from the continuous *2nd Flr SF*). Further, factor variables were created taking into account patterns in the visualizations. For example, *Bldg Type*'s (from `df1`) five categories were condensed into just three. In summary, `df2` has less than half as many dimensions as `df1` to accomodate for a potential curse of dimensionality."
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"df2 = load_clean_data(\"data/data_clean_with_transformations_and_factors.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"df2 = encode_ordinals(df2)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"df2 = shuffle(df2, random_state=random_state)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"MultiIndex: 2883 entries, (2774, 907175030) to (2659, 902305090)\n",
"Columns: 106 entries, 1st Flr SF to SalePrice (box-cox-0.0)\n",
"dtypes: float64(24), int64(82)\n",
"memory usage: 2.4 MB\n"
]
}
],
"source": [
"df2.info()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>1st Flr SF</th>\n",
" <th>2nd Flr SF</th>\n",
" <th>3Ssn Porch</th>\n",
" <th>Bedroom AbvGr</th>\n",
" <th>Bsmt Cond</th>\n",
" <th>Bsmt Exposure</th>\n",
" <th>Bsmt Full Bath</th>\n",
" <th>Bsmt Half Bath</th>\n",
" <th>Bsmt Qual</th>\n",
" <th>Bsmt Unf SF</th>\n",
" <th>BsmtFin SF 1</th>\n",
" <th>BsmtFin SF 2</th>\n",
" <th>BsmtFin Type 1</th>\n",
" <th>BsmtFin Type 2</th>\n",
" <th>Electrical</th>\n",
" <th>Enclosed Porch</th>\n",
" <th>Fence</th>\n",
" <th>Fireplace Qu</th>\n",
" <th>Fireplaces</th>\n",
" <th>Full Bath</th>\n",
" <th>Functional</th>\n",
" <th>Garage Area</th>\n",
" <th>Garage Cars</th>\n",
" <th>Garage Cond</th>\n",
" <th>Garage Finish</th>\n",
" <th>Garage Qual</th>\n",
" <th>Gr Liv Area</th>\n",
" <th>Half Bath</th>\n",
" <th>Kitchen AbvGr</th>\n",
" <th>Kitchen Qual</th>\n",
" <th>Land Slope</th>\n",
" <th>Lot Area</th>\n",
" <th>Lot Shape</th>\n",
" <th>Low Qual Fin SF</th>\n",
" <th>Mas Vnr Area</th>\n",
" <th>Misc Val</th>\n",
" <th>Mo Sold</th>\n",
" <th>Open Porch SF</th>\n",
" <th>Overall Cond</th>\n",
" <th>Overall Qual</th>\n",
" <th>Paved Drive</th>\n",
" <th>Pool Area</th>\n",
" <th>Pool QC</th>\n",
" <th>Screen Porch</th>\n",
" <th>TotRms AbvGrd</th>\n",
" <th>Total Bath</th>\n",
" <th>Total Bsmt SF</th>\n",
" <th>Total Porch SF</th>\n",
" <th>Total SF</th>\n",
" <th>Total SF (box-cox-0.0)</th>\n",
" <th>Utilities</th>\n",
" <th>Wood Deck SF</th>\n",
" <th>abnormal_sale</th>\n",
" <th>air_cond</th>\n",
" <th>build_type_1Fam</th>\n",
" <th>build_type_2Fam</th>\n",
" <th>build_type_Twnhs</th>\n",
" <th>found_BrkTil</th>\n",
" <th>found_CBlock</th>\n",
" <th>found_PConc</th>\n",
" <th>has 2nd Flr</th>\n",
" <th>has Bsmt</th>\n",
" <th>has Fireplace</th>\n",
" <th>has Garage</th>\n",
" <th>has Pool</th>\n",
" <th>has Porch</th>\n",
" <th>major_street</th>\n",
" <th>new_home</th>\n",
" <th>nhood_Blmngtn</th>\n",
" <th>nhood_Blueste</th>\n",
" <th>nhood_BrDale</th>\n",
" <th>nhood_BrkSide</th>\n",
" <th>nhood_ClearCr</th>\n",
" <th>nhood_CollgCr</th>\n",
" <th>nhood_Crawfor</th>\n",
" <th>nhood_Edwards</th>\n",
" <th>nhood_Gilbert</th>\n",
" <th>nhood_Greens</th>\n",
" <th>nhood_GrnHill</th>\n",
" <th>nhood_IDOTRR</th>\n",
" <th>nhood_Landmrk</th>\n",
" <th>nhood_MeadowV</th>\n",
" <th>nhood_Mitchel</th>\n",
" <th>nhood_NPkVill</th>\n",
" <th>nhood_NWAmes</th>\n",
" <th>nhood_Names</th>\n",
" <th>nhood_NoRidge</th>\n",
" <th>nhood_NridgHt</th>\n",
" <th>nhood_OldTown</th>\n",
" <th>nhood_SWISU</th>\n",
" <th>nhood_Sawyer</th>\n",
" <th>nhood_SawyerW</th>\n",
" <th>nhood_Somerst</th>\n",
" <th>nhood_StoneBr</th>\n",
" <th>nhood_Timber</th>\n",
" <th>nhood_Veenker</th>\n",
" <th>park</th>\n",
" <th>partial_sale</th>\n",
" <th>railway</th>\n",
" <th>recently_built</th>\n",
" <th>recently_remodeled</th>\n",
" <th>remodeled</th>\n",
" <th>years_since_built</th>\n",
" <th>years_since_remodeled</th>\n",
" <th>SalePrice</th>\n",
" <th>SalePrice (box-cox-0.0)</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2774</th>\n",
" <th>907175030</th>\n",
" <td>1525.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>125.0</td>\n",
" <td>1400.0</td>\n",
" <td>0.0</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>541.0</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>1525.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>9100.0</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" <td>244.0</td>\n",
" <td>0.0</td>\n",
" <td>9</td>\n",
" <td>36.0</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>6</td>\n",
" <td>3.0</td>\n",
" <td>1525.0</td>\n",
" <td>255.0</td>\n",
" <td>3050.0</td>\n",
" <td>8.022897</td>\n",
" <td>3</td>\n",
" <td>219.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>235000.0</td>\n",
" <td>12.367341</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2139</th>\n",
" <th>907202100</th>\n",
" <td>907.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>399.0</td>\n",
" <td>60.0</td>\n",
" <td>417.0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>308.0</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>907.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>16300.0</td>\n",
" <td>2</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>5</td>\n",
" <td>2.0</td>\n",
" <td>876.0</td>\n",
" <td>0.0</td>\n",
" <td>1783.0</td>\n",
" <td>7.486053</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>30</td>\n",
" <td>30</td>\n",
" <td>130000.0</td>\n",
" <td>11.775290</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1044</th>\n",
" <th>527451290</th>\n",
" <td>483.0</td>\n",
" <td>504.0</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>96.0</td>\n",
" <td>387.0</td>\n",
" <td>0.0</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>264.0</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>987.0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1680.0</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" <td>232.0</td>\n",
" <td>0.0</td>\n",
" <td>7</td>\n",
" <td>0.0</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>4</td>\n",
" <td>1.5</td>\n",
" <td>483.0</td>\n",
" <td>0.0</td>\n",
" <td>1470.0</td>\n",
" <td>7.293018</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>37</td>\n",
" <td>37</td>\n",
" <td>85400.0</td>\n",
" <td>11.355101</td>\n",
" </tr>\n",
" <tr>\n",
" <th>643</th>\n",
" <th>535303110</th>\n",
" <td>1062.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>572.0</td>\n",
" <td>490.0</td>\n",
" <td>0.0</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>297.0</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1062.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>8128.0</td>\n",
" <td>2</td>\n",
" <td>0.0</td>\n",
" <td>80.0</td>\n",
" <td>0.0</td>\n",
" <td>2</td>\n",
" <td>0.0</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>6</td>\n",
" <td>2.0</td>\n",
" <td>1062.0</td>\n",
" <td>0.0</td>\n",
" <td>2124.0</td>\n",
" <td>7.661056</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>55</td>\n",
" <td>55</td>\n",
" <td>131000.0</td>\n",
" <td>11.782953</td>\n",
" </tr>\n",
" <tr>\n",
" <th>159</th>\n",
" <th>535377090</th>\n",
" <td>1056.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" <td>1056.0</td>\n",
" <td>0.0</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>7</td>\n",
" <td>576.0</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1056.0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>6979.0</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>600.0</td>\n",
" <td>6</td>\n",
" <td>56.0</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>0.0</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>4</td>\n",
" <td>2.0</td>\n",
" <td>1056.0</td>\n",
" <td>320.0</td>\n",
" <td>2112.0</td>\n",
" <td>7.655391</td>\n",
" <td>3</td>\n",
" <td>264.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>30</td>\n",
" <td>30</td>\n",
" <td>144000.0</td>\n",
" <td>11.877569</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 1st Flr SF 2nd Flr SF 3Ssn Porch Bedroom AbvGr Bsmt Cond \\\n",
"Order PID \n",
"2774 907175030 1525.0 0.0 0.0 3 3 \n",
"2139 907202100 907.0 0.0 0.0 3 3 \n",
"1044 527451290 483.0 504.0 0.0 2 3 \n",
"643 535303110 1062.0 0.0 0.0 3 3 \n",
"159 535377090 1056.0 0.0 0.0 0 3 \n",
"\n",
" Bsmt Exposure Bsmt Full Bath Bsmt Half Bath Bsmt Qual \\\n",
"Order PID \n",
"2774 907175030 3 1 0 4 \n",
"2139 907202100 3 1 0 4 \n",
"1044 527451290 1 0 0 3 \n",
"643 535303110 1 1 0 3 \n",
"159 535377090 1 2 0 3 \n",
"\n",
" Bsmt Unf SF BsmtFin SF 1 BsmtFin SF 2 BsmtFin Type 1 \\\n",
"Order PID \n",
"2774 907175030 125.0 1400.0 0.0 6 \n",
"2139 907202100 399.0 60.0 417.0 3 \n",
"1044 527451290 96.0 387.0 0.0 5 \n",
"643 535303110 572.0 490.0 0.0 5 \n",
"159 535377090 0.0 1056.0 0.0 6 \n",
"\n",
" BsmtFin Type 2 Electrical Enclosed Porch Fence \\\n",
"Order PID \n",
"2774 907175030 1 4 0.0 0 \n",
"2139 907202100 4 4 0.0 3 \n",
"1044 527451290 1 4 0.0 0 \n",
"643 535303110 1 4 0.0 0 \n",
"159 535377090 1 4 0.0 4 \n",
"\n",
" Fireplace Qu Fireplaces Full Bath Functional Garage Area \\\n",
"Order PID \n",
"2774 907175030 0 0 2 7 541.0 \n",
"2139 907202100 0 0 1 7 308.0 \n",
"1044 527451290 0 0 1 7 264.0 \n",
"643 535303110 0 0 1 7 297.0 \n",
"159 535377090 0 0 0 7 576.0 \n",
"\n",
" Garage Cars Garage Cond Garage Finish Garage Qual \\\n",
"Order PID \n",
"2774 907175030 2 3 2 3 \n",
"2139 907202100 1 3 2 3 \n",
"1044 527451290 1 3 1 3 \n",
"643 535303110 1 3 1 3 \n",
"159 535377090 2 3 1 3 \n",
"\n",
" Gr Liv Area Half Bath Kitchen AbvGr Kitchen Qual \\\n",
"Order PID \n",
"2774 907175030 1525.0 0 1 3 \n",
"2139 907202100 907.0 0 1 2 \n",
"1044 527451290 987.0 1 1 2 \n",
"643 535303110 1062.0 0 1 2 \n",
"159 535377090 1056.0 0 2 2 \n",
"\n",
" Land Slope Lot Area Lot Shape Low Qual Fin SF \\\n",
"Order PID \n",
"2774 907175030 2 9100.0 3 0.0 \n",
"2139 907202100 2 16300.0 2 0.0 \n",
"1044 527451290 2 1680.0 3 0.0 \n",
"643 535303110 2 8128.0 2 0.0 \n",
"159 535377090 2 6979.0 3 0.0 \n",
"\n",
" Mas Vnr Area Misc Val Mo Sold Open Porch SF Overall Cond \\\n",
"Order PID \n",
"2774 907175030 244.0 0.0 9 36.0 4 \n",
"2139 907202100 0.0 0.0 1 0.0 3 \n",
"1044 527451290 232.0 0.0 7 0.0 4 \n",
"643 535303110 80.0 0.0 2 0.0 6 \n",
"159 535377090 0.0 600.0 6 56.0 4 \n",
"\n",
" Overall Qual Paved Drive Pool Area Pool QC Screen Porch \\\n",
"Order PID \n",
"2774 907175030 6 2 0.0 0 0.0 \n",
"2139 907202100 4 2 0.0 0 0.0 \n",
"1044 527451290 5 2 0.0 0 0.0 \n",
"643 535303110 5 2 0.0 0 0.0 \n",
"159 535377090 5 2 0.0 0 0.0 \n",
"\n",
" TotRms AbvGrd Total Bath Total Bsmt SF Total Porch SF \\\n",
"Order PID \n",
"2774 907175030 6 3.0 1525.0 255.0 \n",
"2139 907202100 5 2.0 876.0 0.0 \n",
"1044 527451290 4 1.5 483.0 0.0 \n",
"643 535303110 6 2.0 1062.0 0.0 \n",
"159 535377090 4 2.0 1056.0 320.0 \n",
"\n",
" Total SF Total SF (box-cox-0.0) Utilities Wood Deck SF \\\n",
"Order PID \n",
"2774 907175030 3050.0 8.022897 3 219.0 \n",
"2139 907202100 1783.0 7.486053 3 0.0 \n",
"1044 527451290 1470.0 7.293018 3 0.0 \n",
"643 535303110 2124.0 7.661056 3 0.0 \n",
"159 535377090 2112.0 7.655391 3 264.0 \n",
"\n",
" abnormal_sale air_cond build_type_1Fam build_type_2Fam \\\n",
"Order PID \n",
"2774 907175030 0 1 1 0 \n",
"2139 907202100 0 1 1 0 \n",
"1044 527451290 1 1 0 0 \n",
"643 535303110 0 1 1 0 \n",
"159 535377090 0 1 0 1 \n",
"\n",
" build_type_Twnhs found_BrkTil found_CBlock found_PConc \\\n",
"Order PID \n",
"2774 907175030 0 0 0 1 \n",
"2139 907202100 0 0 1 0 \n",
"1044 527451290 1 0 1 0 \n",
"643 535303110 0 0 1 0 \n",
"159 535377090 0 0 1 0 \n",
"\n",
" has 2nd Flr has Bsmt has Fireplace has Garage has Pool \\\n",
"Order PID \n",
"2774 907175030 0 1 0 1 0 \n",
"2139 907202100 0 1 0 1 0 \n",
"1044 527451290 1 1 0 1 0 \n",
"643 535303110 0 1 0 1 0 \n",
"159 535377090 0 1 0 1 0 \n",
"\n",
" has Porch major_street new_home nhood_Blmngtn \\\n",
"Order PID \n",
"2774 907175030 1 0 0 0 \n",
"2139 907202100 0 0 0 0 \n",
"1044 527451290 0 0 0 0 \n",
"643 535303110 0 0 0 0 \n",
"159 535377090 1 0 0 0 \n",
"\n",
" nhood_Blueste nhood_BrDale nhood_BrkSide nhood_ClearCr \\\n",
"Order PID \n",
"2774 907175030 0 0 0 0 \n",
"2139 907202100 0 0 0 0 \n",
"1044 527451290 0 1 0 0 \n",
"643 535303110 0 0 0 0 \n",
"159 535377090 0 0 0 0 \n",
"\n",
" nhood_CollgCr nhood_Crawfor nhood_Edwards nhood_Gilbert \\\n",
"Order PID \n",
"2774 907175030 1 0 0 0 \n",
"2139 907202100 1 0 0 0 \n",
"1044 527451290 0 0 0 0 \n",
"643 535303110 0 0 0 0 \n",
"159 535377090 0 0 0 0 \n",
"\n",
" nhood_Greens nhood_GrnHill nhood_IDOTRR nhood_Landmrk \\\n",
"Order PID \n",
"2774 907175030 0 0 0 0 \n",
"2139 907202100 0 0 0 0 \n",
"1044 527451290 0 0 0 0 \n",
"643 535303110 0 0 0 0 \n",
"159 535377090 0 0 0 0 \n",
"\n",
" nhood_MeadowV nhood_Mitchel nhood_NPkVill nhood_NWAmes \\\n",
"Order PID \n",
"2774 907175030 0 0 0 0 \n",
"2139 907202100 0 0 0 0 \n",
"1044 527451290 0 0 0 0 \n",
"643 535303110 0 0 0 0 \n",
"159 535377090 0 0 0 0 \n",
"\n",
" nhood_Names nhood_NoRidge nhood_NridgHt nhood_OldTown \\\n",
"Order PID \n",
"2774 907175030 0 0 0 0 \n",
"2139 907202100 0 0 0 0 \n",
"1044 527451290 0 0 0 0 \n",
"643 535303110 1 0 0 0 \n",
"159 535377090 0 0 0 1 \n",
"\n",
" nhood_SWISU nhood_Sawyer nhood_SawyerW nhood_Somerst \\\n",
"Order PID \n",
"2774 907175030 0 0 0 0 \n",
"2139 907202100 0 0 0 0 \n",
"1044 527451290 0 0 0 0 \n",
"643 535303110 0 0 0 0 \n",
"159 535377090 0 0 0 0 \n",
"\n",
" nhood_StoneBr nhood_Timber nhood_Veenker park \\\n",
"Order PID \n",
"2774 907175030 0 0 0 0 \n",
"2139 907202100 0 0 0 0 \n",
"1044 527451290 0 0 0 0 \n",
"643 535303110 0 0 0 0 \n",
"159 535377090 0 0 0 0 \n",
"\n",
" partial_sale railway recently_built recently_remodeled \\\n",
"Order PID \n",
"2774 907175030 0 0 1 1 \n",
"2139 907202100 0 0 0 0 \n",
"1044 527451290 0 0 0 0 \n",
"643 535303110 0 0 0 0 \n",
"159 535377090 0 0 0 0 \n",
"\n",
" remodeled years_since_built years_since_remodeled \\\n",
"Order PID \n",
"2774 907175030 0 6 6 \n",
"2139 907202100 0 30 30 \n",
"1044 527451290 0 37 37 \n",
"643 535303110 0 55 55 \n",
"159 535377090 0 30 30 \n",
"\n",
" SalePrice SalePrice (box-cox-0.0) \n",
"Order PID \n",
"2774 907175030 235000.0 12.367341 \n",
"2139 907202100 130000.0 11.775290 \n",
"1044 527451290 85400.0 11.355101 \n",
"643 535303110 131000.0 11.782953 \n",
"159 535377090 144000.0 11.877569 "
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Obtain the raw numpy arrays:"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"X2 = df2.drop(columns=[\"SalePrice\", \"SalePrice (box-cox-0.0)\"]).values\n",
"y2 = df2[\"SalePrice\"].values\n",
"y2l = df2[\"SalePrice (box-cox-0.0)\"].values"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3. Manual Feature Pre-Selection\n",
"\n",
"Also, notebooks 2 and 3 collect variables that correlate either weakly ($0.33 < \\vert\\rho\\vert < 0.66$) or strongly ($\\vert\\rho\\vert > 0.66$) with the *SalePrice* (or the logarithm thereof) or that reveal \"interesting\" visual patterns. These variables serve as a \"naive\" feature pre-selection."
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"with open(\"data/correlated_variables.json\", \"r\") as file:\n",
" _ = json.loads(file.read())\n",
" weakly_correlated = _[\"weakly_correlated\"]\n",
" strongly_correlated = _[\"strongly_correlated\"]"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"with open(\"data/interesting_variables.json\", \"r\") as file:\n",
" interesting_variables = json.loads(file.read())"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"pre_selection = sorted(\n",
" set(weakly_correlated + strongly_correlated + interesting_variables)\n",
" & set(df2.columns)\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The `df3` DataFrame is just a subset of `df2` (71 columns)."
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"df3 = df2[pre_selection + TARGET_VARIABLES]"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"MultiIndex: 2883 entries, (2774, 907175030) to (2659, 902305090)\n",
"Columns: 71 entries, 1st Flr SF to SalePrice (box-cox-0.0)\n",
"dtypes: float64(13), int64(58)\n",
"memory usage: 1.6 MB\n"
]
}
],
"source": [
"df3.info(verbose=False)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>1st Flr SF</th>\n",
" <th>Bsmt Exposure</th>\n",
" <th>Bsmt Qual</th>\n",
" <th>BsmtFin SF 1</th>\n",
" <th>BsmtFin Type 1</th>\n",
" <th>Fireplace Qu</th>\n",
" <th>Fireplaces</th>\n",
" <th>Full Bath</th>\n",
" <th>Garage Area</th>\n",
" <th>Garage Cars</th>\n",
" <th>Garage Cond</th>\n",
" <th>Garage Finish</th>\n",
" <th>Garage Qual</th>\n",
" <th>Gr Liv Area</th>\n",
" <th>Half Bath</th>\n",
" <th>Kitchen Qual</th>\n",
" <th>Lot Shape</th>\n",
" <th>Mas Vnr Area</th>\n",
" <th>Overall Qual</th>\n",
" <th>Paved Drive</th>\n",
" <th>TotRms AbvGrd</th>\n",
" <th>Total Bath</th>\n",
" <th>Total Bsmt SF</th>\n",
" <th>Total Porch SF</th>\n",
" <th>Total SF</th>\n",
" <th>Total SF (box-cox-0.0)</th>\n",
" <th>Wood Deck SF</th>\n",
" <th>air_cond</th>\n",
" <th>build_type_1Fam</th>\n",
" <th>build_type_2Fam</th>\n",
" <th>build_type_Twnhs</th>\n",
" <th>has 2nd Flr</th>\n",
" <th>has Fireplace</th>\n",
" <th>has Garage</th>\n",
" <th>major_street</th>\n",
" <th>new_home</th>\n",
" <th>nhood_Blmngtn</th>\n",
" <th>nhood_Blueste</th>\n",
" <th>nhood_BrDale</th>\n",
" <th>nhood_BrkSide</th>\n",
" <th>nhood_ClearCr</th>\n",
" <th>nhood_CollgCr</th>\n",
" <th>nhood_Crawfor</th>\n",
" <th>nhood_Edwards</th>\n",
" <th>nhood_Gilbert</th>\n",
" <th>nhood_Greens</th>\n",
" <th>nhood_GrnHill</th>\n",
" <th>nhood_IDOTRR</th>\n",
" <th>nhood_Landmrk</th>\n",
" <th>nhood_MeadowV</th>\n",
" <th>nhood_Mitchel</th>\n",
" <th>nhood_NPkVill</th>\n",
" <th>nhood_NWAmes</th>\n",
" <th>nhood_Names</th>\n",
" <th>nhood_NoRidge</th>\n",
" <th>nhood_NridgHt</th>\n",
" <th>nhood_OldTown</th>\n",
" <th>nhood_SWISU</th>\n",
" <th>nhood_Sawyer</th>\n",
" <th>nhood_SawyerW</th>\n",
" <th>nhood_Somerst</th>\n",
" <th>nhood_StoneBr</th>\n",
" <th>nhood_Timber</th>\n",
" <th>nhood_Veenker</th>\n",
" <th>recently_built</th>\n",
" <th>recently_remodeled</th>\n",
" <th>remodeled</th>\n",
" <th>years_since_built</th>\n",
" <th>years_since_remodeled</th>\n",
" <th>SalePrice</th>\n",
" <th>SalePrice (box-cox-0.0)</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Order</th>\n",
" <th>PID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2774</th>\n",
" <th>907175030</th>\n",
" <td>1525.0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>1400.0</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>541.0</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>1525.0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>244.0</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>6</td>\n",
" <td>3.0</td>\n",
" <td>1525.0</td>\n",
" <td>255.0</td>\n",
" <td>3050.0</td>\n",
" <td>8.022897</td>\n",
" <td>219.0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>235000.0</td>\n",
" <td>12.367341</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2139</th>\n",
" <th>907202100</th>\n",
" <td>907.0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>60.0</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>308.0</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>907.0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>0.0</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>2.0</td>\n",
" <td>876.0</td>\n",
" <td>0.0</td>\n",
" <td>1783.0</td>\n",
" <td>7.486053</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>30</td>\n",
" <td>30</td>\n",
" <td>130000.0</td>\n",
" <td>11.775290</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1044</th>\n",
" <th>527451290</th>\n",
" <td>483.0</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>387.0</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>264.0</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>987.0</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>232.0</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>1.5</td>\n",
" <td>483.0</td>\n",
" <td>0.0</td>\n",
" <td>1470.0</td>\n",
" <td>7.293018</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>37</td>\n",
" <td>37</td>\n",
" <td>85400.0</td>\n",
" <td>11.355101</td>\n",
" </tr>\n",
" <tr>\n",
" <th>643</th>\n",
" <th>535303110</th>\n",
" <td>1062.0</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>490.0</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>297.0</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1062.0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>80.0</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>6</td>\n",
" <td>2.0</td>\n",
" <td>1062.0</td>\n",
" <td>0.0</td>\n",
" <td>2124.0</td>\n",
" <td>7.661056</td>\n",
" <td>0.0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>55</td>\n",
" <td>55</td>\n",
" <td>131000.0</td>\n",
" <td>11.782953</td>\n",
" </tr>\n",
" <tr>\n",
" <th>159</th>\n",
" <th>535377090</th>\n",
" <td>1056.0</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1056.0</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>576.0</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1056.0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>0.0</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>2.0</td>\n",
" <td>1056.0</td>\n",
" <td>320.0</td>\n",
" <td>2112.0</td>\n",
" <td>7.655391</td>\n",
" <td>264.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>30</td>\n",
" <td>30</td>\n",
" <td>144000.0</td>\n",
" <td>11.877569</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 1st Flr SF Bsmt Exposure Bsmt Qual BsmtFin SF 1 \\\n",
"Order PID \n",
"2774 907175030 1525.0 3 4 1400.0 \n",
"2139 907202100 907.0 3 4 60.0 \n",
"1044 527451290 483.0 1 3 387.0 \n",
"643 535303110 1062.0 1 3 490.0 \n",
"159 535377090 1056.0 1 3 1056.0 \n",
"\n",
" BsmtFin Type 1 Fireplace Qu Fireplaces Full Bath \\\n",
"Order PID \n",
"2774 907175030 6 0 0 2 \n",
"2139 907202100 3 0 0 1 \n",
"1044 527451290 5 0 0 1 \n",
"643 535303110 5 0 0 1 \n",
"159 535377090 6 0 0 0 \n",
"\n",
" Garage Area Garage Cars Garage Cond Garage Finish \\\n",
"Order PID \n",
"2774 907175030 541.0 2 3 2 \n",
"2139 907202100 308.0 1 3 2 \n",
"1044 527451290 264.0 1 3 1 \n",
"643 535303110 297.0 1 3 1 \n",
"159 535377090 576.0 2 3 1 \n",
"\n",
" Garage Qual Gr Liv Area Half Bath Kitchen Qual Lot Shape \\\n",
"Order PID \n",
"2774 907175030 3 1525.0 0 3 3 \n",
"2139 907202100 3 907.0 0 2 2 \n",
"1044 527451290 3 987.0 1 2 3 \n",
"643 535303110 3 1062.0 0 2 2 \n",
"159 535377090 3 1056.0 0 2 3 \n",
"\n",
" Mas Vnr Area Overall Qual Paved Drive TotRms AbvGrd \\\n",
"Order PID \n",
"2774 907175030 244.0 6 2 6 \n",
"2139 907202100 0.0 4 2 5 \n",
"1044 527451290 232.0 5 2 4 \n",
"643 535303110 80.0 5 2 6 \n",
"159 535377090 0.0 5 2 4 \n",
"\n",
" Total Bath Total Bsmt SF Total Porch SF Total SF \\\n",
"Order PID \n",
"2774 907175030 3.0 1525.0 255.0 3050.0 \n",
"2139 907202100 2.0 876.0 0.0 1783.0 \n",
"1044 527451290 1.5 483.0 0.0 1470.0 \n",
"643 535303110 2.0 1062.0 0.0 2124.0 \n",
"159 535377090 2.0 1056.0 320.0 2112.0 \n",
"\n",
" Total SF (box-cox-0.0) Wood Deck SF air_cond \\\n",
"Order PID \n",
"2774 907175030 8.022897 219.0 1 \n",
"2139 907202100 7.486053 0.0 1 \n",
"1044 527451290 7.293018 0.0 1 \n",
"643 535303110 7.661056 0.0 1 \n",
"159 535377090 7.655391 264.0 1 \n",
"\n",
" build_type_1Fam build_type_2Fam build_type_Twnhs \\\n",
"Order PID \n",
"2774 907175030 1 0 0 \n",
"2139 907202100 1 0 0 \n",
"1044 527451290 0 0 1 \n",
"643 535303110 1 0 0 \n",
"159 535377090 0 1 0 \n",
"\n",
" has 2nd Flr has Fireplace has Garage major_street \\\n",
"Order PID \n",
"2774 907175030 0 0 1 0 \n",
"2139 907202100 0 0 1 0 \n",
"1044 527451290 1 0 1 0 \n",
"643 535303110 0 0 1 0 \n",
"159 535377090 0 0 1 0 \n",
"\n",
" new_home nhood_Blmngtn nhood_Blueste nhood_BrDale \\\n",
"Order PID \n",
"2774 907175030 0 0 0 0 \n",
"2139 907202100 0 0 0 0 \n",
"1044 527451290 0 0 0 1 \n",
"643 535303110 0 0 0 0 \n",
"159 535377090 0 0 0 0 \n",
"\n",
" nhood_BrkSide nhood_ClearCr nhood_CollgCr nhood_Crawfor \\\n",
"Order PID \n",
"2774 907175030 0 0 1 0 \n",
"2139 907202100 0 0 1 0 \n",
"1044 527451290 0 0 0 0 \n",
"643 535303110 0 0 0 0 \n",
"159 535377090 0 0 0 0 \n",
"\n",
" nhood_Edwards nhood_Gilbert nhood_Greens nhood_GrnHill \\\n",
"Order PID \n",
"2774 907175030 0 0 0 0 \n",
"2139 907202100 0 0 0 0 \n",
"1044 527451290 0 0 0 0 \n",
"643 535303110 0 0 0 0 \n",
"159 535377090 0 0 0 0 \n",
"\n",
" nhood_IDOTRR nhood_Landmrk nhood_MeadowV nhood_Mitchel \\\n",
"Order PID \n",
"2774 907175030 0 0 0 0 \n",
"2139 907202100 0 0 0 0 \n",
"1044 527451290 0 0 0 0 \n",
"643 535303110 0 0 0 0 \n",
"159 535377090 0 0 0 0 \n",
"\n",
" nhood_NPkVill nhood_NWAmes nhood_Names nhood_NoRidge \\\n",
"Order PID \n",
"2774 907175030 0 0 0 0 \n",
"2139 907202100 0 0 0 0 \n",
"1044 527451290 0 0 0 0 \n",
"643 535303110 0 0 1 0 \n",
"159 535377090 0 0 0 0 \n",
"\n",
" nhood_NridgHt nhood_OldTown nhood_SWISU nhood_Sawyer \\\n",
"Order PID \n",
"2774 907175030 0 0 0 0 \n",
"2139 907202100 0 0 0 0 \n",
"1044 527451290 0 0 0 0 \n",
"643 535303110 0 0 0 0 \n",
"159 535377090 0 1 0 0 \n",
"\n",
" nhood_SawyerW nhood_Somerst nhood_StoneBr nhood_Timber \\\n",
"Order PID \n",
"2774 907175030 0 0 0 0 \n",
"2139 907202100 0 0 0 0 \n",
"1044 527451290 0 0 0 0 \n",
"643 535303110 0 0 0 0 \n",
"159 535377090 0 0 0 0 \n",
"\n",
" nhood_Veenker recently_built recently_remodeled remodeled \\\n",
"Order PID \n",
"2774 907175030 0 1 1 0 \n",
"2139 907202100 0 0 0 0 \n",
"1044 527451290 0 0 0 0 \n",
"643 535303110 0 0 0 0 \n",
"159 535377090 0 0 0 0 \n",
"\n",
" years_since_built years_since_remodeled SalePrice \\\n",
"Order PID \n",
"2774 907175030 6 6 235000.0 \n",
"2139 907202100 30 30 130000.0 \n",
"1044 527451290 37 37 85400.0 \n",
"643 535303110 55 55 131000.0 \n",
"159 535377090 30 30 144000.0 \n",
"\n",
" SalePrice (box-cox-0.0) \n",
"Order PID \n",
"2774 907175030 12.367341 \n",
"2139 907202100 11.775290 \n",
"1044 527451290 11.355101 \n",
"643 535303110 11.782953 \n",
"159 535377090 11.877569 "
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df3.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Obtain the raw numpy arrays:"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"X3 = df3.drop(columns=[\"SalePrice\", \"SalePrice (box-cox-0.0)\"]).values\n",
"y3 = df3[\"SalePrice\"].values\n",
"y3l = df3[\"SalePrice (box-cox-0.0)\"].values"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Re-usable Components\n",
"\n",
"Define a function to run k-fold cross validation to obtain unbiased estimators for the following scores / errors:\n",
"- Bias\n",
"- Mean Absolute Error\n",
"- Maximum Deviation (just to see the worst case prediction of a model)\n",
"- R2 (coefficient of determination)\n",
"- Root Mean Squared Error (default for comparison)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"def cross_validation(X, y, *, model, k=10, log=False, desc=None):\n",
" \"\"\"Perform a k-fold cross validation.\"\"\"\n",
" bias, mae, max_dev, r2, rmse = [], [], [], [], []\n",
" # Iterate over the k folds.\n",
" for train, test in progress_bar(KFold(n_splits=k).split(X), desc=desc, total=k):\n",
" model.fit(X[train], y[train])\n",
" y_pred = model.predict(X[test])\n",
" # If the sales price is provided on a log scale,\n",
" # take the exponent first so that scores and\n",
" # errors are comparable to the non-logged counterparts.\n",
" if log:\n",
" y_true, y_pred = np.exp(y[test]), np.exp(y_pred) \n",
" else:\n",
" y_true, y_pred = y[test], y_pred\n",
" # Collect the scores/errors for each fold.\n",
" bias.append(bias_score(y_true, y_pred))\n",
" mae.append(mean_absolute_error(y_true, y_pred))\n",
" max_dev.append(max_deviation(y_true, y_pred))\n",
" r2.append(r2_score(y_true, y_pred))\n",
" rmse.append(mean_squared_error(y_true, y_pred))\n",
" # Round for convenience.\n",
" return {\n",
" \"bias\": np.round(np.mean(bias)),\n",
" \"mae\": np.round(np.mean(mae)),\n",
" \"max_dev\": np.round(np.mean(max_dev)),\n",
" \"r2\": np.round(np.mean(r2), 3),\n",
" \"rmse\": np.round(np.sqrt(np.mean(rmse))),\n",
" }"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Use one dictionary to store all the results in a systematic way."
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"results = {}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Linear Regression\n",
"\n",
"A plain OLS regression model serves as the base case for benchmarking."
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"lm = LinearRegression()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1. Original Data\n",
"\n",
"Given the unprocessed data, the linear model is not able to make a good fit at all."
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "718a516b1cb64ff188ca952e99fc11a0",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=10), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"text/plain": [
"{'bias': 43942720.0,\n",
" 'mae': 50106151.0,\n",
" 'max_dev': 13358158031.0,\n",
" 'r2': -664342652.78,\n",
" 'rmse': 2056998619.0}"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results[('lm','o')] = cross_validation(X1, y1, model=lm)\n",
"results[('lm','o')]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2. Improved Data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### a) Normal Scale"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6bf68bfa85504d30ab2ecd507dccfb6c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=10), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"text/plain": [
"{'bias': -63.0,\n",
" 'mae': 15393.0,\n",
" 'max_dev': 122981.0,\n",
" 'r2': 0.918,\n",
" 'rmse': 21992.0}"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results[('lm','i')] = cross_validation(X2, y2, model=lm)\n",
"results[('lm','i')]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### b) Log Scale"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "22c04a812ac34a41848040d697db5395",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=10), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"text/plain": [
"{'bias': -887.0,\n",
" 'mae': 13090.0,\n",
" 'max_dev': 115077.0,\n",
" 'r2': 0.935,\n",
" 'rmse': 19526.0}"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results[('lm','il')] = cross_validation(X2, y2l, model=lm, log=True)\n",
"results[('lm','il')]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3. Improved Data with pre-selected Features"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### a) Normal Scale"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6076f36e3abc4e808116eb736ef1c981",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=10), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"text/plain": [
"{'bias': 140914.0,\n",
" 'mae': 1144261.0,\n",
" 'max_dev': 254073652.0,\n",
" 'r2': -300003.791,\n",
" 'rmse': 38864649.0}"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results[('lm','p')] = cross_validation(X3, y3, model=lm)\n",
"results[('lm','p')]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### b) Log Scale"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "20342cfdbd1d40ee95e7c2391da5d459",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=10), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"text/plain": [
"{'bias': -1355.0,\n",
" 'mae': 14751.0,\n",
" 'max_dev': 149008.0,\n",
" 'r2': 0.913,\n",
" 'rmse': 22606.0}"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results[('lm','pl')] = cross_validation(X3, y3l, model=lm, log=True)\n",
"results[('lm','pl')]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## LASSO"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"tol = 0.1\n",
"grid_search = GridSearchCV(\n",
" estimator=Lasso(tol=tol, selection=\"random\", random_state=random_state),\n",
" param_grid={\"alpha\": [2 ** x for x in range(-8, 4)] + list(range(12, 65, 4))},\n",
" cv=KFold(n_splits=4),\n",
" n_jobs=-1,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1. Original Data"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"20"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grid_search.fit(X1, y1)\n",
"alpha = grid_search.best_params_[\"alpha\"]\n",
"alpha"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "275f6d85a1da40e7ae8a123a8e9a2fd7",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=10), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"text/plain": [
"{'bias': 278.0,\n",
" 'mae': 20892.0,\n",
" 'max_dev': 269314.0,\n",
" 'r2': 0.817,\n",
" 'rmse': 33496.0}"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results[(\"lasso\", \"o\")] = cross_validation(X1, y1, model=Lasso(alpha=alpha, tol=tol))\n",
"results[(\"lasso\", \"o\")]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2. Improved Data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### a) Normal Scale"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"32"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grid_search.fit(X2, y2)\n",
"alpha = grid_search.best_params_[\"alpha\"]\n",
"alpha"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d58394144f554d13a8bf430baad544b7",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=10), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"text/plain": [
"{'bias': -41.0,\n",
" 'mae': 17545.0,\n",
" 'max_dev': 135602.0,\n",
" 'r2': 0.897,\n",
" 'rmse': 24586.0}"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results[(\"lasso\", \"i\")] = cross_validation(X2, y2, model=Lasso(alpha=alpha, tol=tol))\n",
"results[(\"lasso\", \"i\")]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### b) Log Scale"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.00390625"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grid_search.fit(X2, y2l)\n",
"alpha = grid_search.best_params_[\"alpha\"]\n",
"alpha"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "fc8fcc1e4e1b4cc4a46b7cb58a401a24",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=10), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"text/plain": [
"{'bias': -868.0,\n",
" 'mae': 14664.0,\n",
" 'max_dev': 126763.0,\n",
" 'r2': 0.921,\n",
" 'rmse': 21641.0}"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results[(\"lasso\", \"il\")] = cross_validation(X2, y2l, model=Lasso(alpha=alpha, tol=tol), log=True)\n",
"results[(\"lasso\", \"il\")]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3. Improved Data with pre-selected Features"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### a) Normal Scale"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"52"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grid_search.fit(X3, y3)\n",
"alpha = grid_search.best_params_[\"alpha\"]\n",
"alpha"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "84cc328896c148a9bc4f2d6fff9d7e66",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=10), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"text/plain": [
"{'bias': -57.0,\n",
" 'mae': 20634.0,\n",
" 'max_dev': 152574.0,\n",
" 'r2': 0.859,\n",
" 'rmse': 28793.0}"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results[(\"lasso\", \"p\")] = cross_validation(X3, y3, model=Lasso(alpha=alpha, tol=tol))\n",
"results[(\"lasso\", \"p\")]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### b) Log Scale"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.00390625"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grid_search.fit(X3, y3l)\n",
"alpha = grid_search.best_params_[\"alpha\"]\n",
"alpha"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "98653a7e2a7d493b81431ffe1bb05424",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=10), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"text/plain": [
"{'bias': -1041.0,\n",
" 'mae': 15805.0,\n",
" 'max_dev': 131190.0,\n",
" 'r2': 0.91,\n",
" 'rmse': 23041.0}"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results[(\"lasso\", \"pl\")] = cross_validation(X3, y3l, model=Lasso(alpha=alpha, tol=tol), log=True)\n",
"results[(\"lasso\", \"pl\")]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Ridge Regression"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"grid_search = GridSearchCV(\n",
" estimator=Ridge(),\n",
" param_grid={\"alpha\": [2 ** x for x in range(-8, 4)] + list(range(12, 65, 4))},\n",
" cv=KFold(n_splits=4),\n",
" n_jobs=-1,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1. Original Data"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.125"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grid_search.fit(X1, y1)\n",
"alpha = grid_search.best_params_[\"alpha\"]\n",
"alpha"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a88a9298736d4dbc9de2823db4f668a3",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=10), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"text/plain": [
"{'bias': 152.0,\n",
" 'mae': 17064.0,\n",
" 'max_dev': 263561.0,\n",
" 'r2': 0.853,\n",
" 'rmse': 29970.0}"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results[(\"ridge\", \"o\")] = cross_validation(X1, y1, model=Ridge(alpha=alpha))\n",
"results[(\"ridge\", \"o\")]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2. Improved Data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### a) Normal Scale"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.25"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grid_search.fit(X2, y2)\n",
"alpha = grid_search.best_params_[\"alpha\"]\n",
"alpha"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d37e164bda314ddb8282044d0976efcd",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=10), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"text/plain": [
"{'bias': -49.0,\n",
" 'mae': 15371.0,\n",
" 'max_dev': 123212.0,\n",
" 'r2': 0.918,\n",
" 'rmse': 21971.0}"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results[(\"ridge\", \"i\")] = cross_validation(X2, y2, model=Ridge(alpha=alpha))\n",
"results[(\"ridge\", \"i\")]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### b) Log Scale"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grid_search.fit(X2, y2l)\n",
"alpha = grid_search.best_params_[\"alpha\"]\n",
"alpha"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "0faab768c5504fb5997b462817f85ae7",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=10), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"text/plain": [
"{'bias': -868.0,\n",
" 'mae': 13065.0,\n",
" 'max_dev': 114246.0,\n",
" 'r2': 0.936,\n",
" 'rmse': 19442.0}"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results[(\"ridge\", \"il\")] = cross_validation(X2, y2l, model=Ridge(alpha=alpha), log=True)\n",
"results[(\"ridge\", \"il\")]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3. Improved Data with pre-selected Features"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### a) Normal Scale"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.5"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grid_search.fit(X3, y3)\n",
"alpha = grid_search.best_params_[\"alpha\"]\n",
"alpha"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2900ffb411e146dfa03672b755adc7ea",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=10), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"text/plain": [
"{'bias': -51.0,\n",
" 'mae': 16471.0,\n",
" 'max_dev': 126394.0,\n",
" 'r2': 0.908,\n",
" 'rmse': 23255.0}"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results[(\"ridge\", \"p\")] = cross_validation(X3, y3, model=Ridge(alpha=alpha))\n",
"results[(\"ridge\", \"p\")]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### b) Log Scale"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grid_search.fit(X3, y3l)\n",
"alpha = grid_search.best_params_[\"alpha\"]\n",
"alpha"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "fcad8f88d4cb404389747b9e185f204f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=10), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"text/plain": [
"{'bias': -1114.0,\n",
" 'mae': 14457.0,\n",
" 'max_dev': 123534.0,\n",
" 'r2': 0.924,\n",
" 'rmse': 21169.0}"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results[(\"ridge\", \"pl\")] = cross_validation(X3, y3l, model=Ridge(alpha=alpha), log=True)\n",
"results[(\"ridge\", \"pl\")]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Random Forest"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
"rf = RandomForestRegressor(\n",
" n_estimators=500,\n",
" n_jobs=-1, random_state=random_state\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1. Original Data"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "5f57ede479174cacb7dd5be49972d446",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=10), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"text/plain": [
"{'bias': -26.0,\n",
" 'mae': 15322.0,\n",
" 'max_dev': 164505.0,\n",
" 'r2': 0.898,\n",
" 'rmse': 25354.0}"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results[(\"rf\", \"o\")] = cross_validation(X1, y1, model=rf)\n",
"results[(\"rf\", \"o\")]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2. Improved Data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### a) Normal Scale"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9bc68055dd914a26a96ac1882e0a6c90",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=10), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"text/plain": [
"{'bias': -75.0,\n",
" 'mae': 14957.0,\n",
" 'max_dev': 130725.0,\n",
" 'r2': 0.911,\n",
" 'rmse': 22960.0}"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results[(\"rf\", \"i\")] = cross_validation(X2, y2, model=rf)\n",
"results[(\"rf\", \"i\")]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### b) Log Scale"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "918c0c34b8e84d46bc2f0007114fd14f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=10), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"text/plain": [
"{'bias': -2061.0,\n",
" 'mae': 15023.0,\n",
" 'max_dev': 134360.0,\n",
" 'r2': 0.908,\n",
" 'rmse': 23260.0}"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results[(\"rf\", \"il\")] = cross_validation(X2, y2l, model=rf, log=True)\n",
"results[(\"rf\", \"il\")]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3. Improved Data with pre-selected Features"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### a) Normal Scale"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "5fe3529d68734889921e35d5c29bf5ee",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=10), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"text/plain": [
"{'bias': -430.0,\n",
" 'mae': 15447.0,\n",
" 'max_dev': 133420.0,\n",
" 'r2': 0.906,\n",
" 'rmse': 23572.0}"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results[(\"rf\", \"p\")] = cross_validation(X3, y3, model=rf)\n",
"results[(\"rf\", \"p\")]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### b) Log Scale"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "39aa0c8e2bc44ba294aee4711afeaa4c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=10), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"text/plain": [
"{'bias': -2343.0,\n",
" 'mae': 15626.0,\n",
" 'max_dev': 134375.0,\n",
" 'r2': 0.902,\n",
" 'rmse': 23973.0}"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results[(\"rf\", \"pl\")] = cross_validation(X3, y3l, model=rf, log=True)\n",
"results[(\"rf\", \"pl\")]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Analysis of Results\n",
"\n",
"This notebook did not focus on hyper-parameter optimization. Therefore, the predictions by Lasso, Ridge, and the Random Forest can potentially be improved by fine-graining the grid search even more.\n",
"\n",
"In general, the manually \"improved\" data clearly outperform the data that were only cleaned with the minimum effort. Also, the result suggests to allow the model to select its features. The manually pre-selected features perform well but not as good as the full feature set."
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
"def scores_by_source(source, score=\"rmse\", *, ascending=True):\n",
" rv = [\n",
" (model, scores[score])\n",
" for (model, data_source), scores in results.items()\n",
" if data_source == source\n",
" ]\n",
" return sorted(rv, key=lambda x: x[1], reverse=(not ascending))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Root Mean Squared Error"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('rf', 25354.0), ('ridge', 29970.0), ('lasso', 33496.0), ('lm', 2056998619.0)]"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scores_by_source(\"o\", \"rmse\")"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('ridge', 21971.0), ('lm', 21992.0), ('rf', 22960.0), ('lasso', 24586.0)]"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scores_by_source(\"i\", \"rmse\")"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('ridge', 19442.0), ('lm', 19526.0), ('lasso', 21641.0), ('rf', 23260.0)]"
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scores_by_source(\"il\", \"rmse\")"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('ridge', 23255.0), ('rf', 23572.0), ('lasso', 28793.0), ('lm', 38864649.0)]"
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scores_by_source(\"p\", \"rmse\")"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('ridge', 21169.0), ('lm', 22606.0), ('lasso', 23041.0), ('rf', 23973.0)]"
]
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scores_by_source(\"pl\", \"rmse\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### R2"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('rf', 0.898), ('ridge', 0.853), ('lasso', 0.817), ('lm', -664342652.78)]"
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scores_by_source(\"o\", \"r2\", ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('lm', 0.918), ('ridge', 0.918), ('rf', 0.911), ('lasso', 0.897)]"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scores_by_source(\"i\", \"r2\", ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('ridge', 0.936), ('lm', 0.935), ('lasso', 0.921), ('rf', 0.908)]"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scores_by_source(\"il\", \"r2\", ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('ridge', 0.908), ('rf', 0.906), ('lasso', 0.859), ('lm', -300003.791)]"
]
},
"execution_count": 75,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scores_by_source(\"p\", \"r2\", ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('ridge', 0.924), ('lm', 0.913), ('lasso', 0.91), ('rf', 0.902)]"
]
},
"execution_count": 74,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scores_by_source(\"pl\", \"r2\", ascending=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}