Add utility function to update helper dicts / lists

This commit is contained in:
Alexander Hess 2018-08-29 11:20:47 +02:00
commit 441f121350
2 changed files with 25 additions and 23 deletions

View file

@ -30,7 +30,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"2018-08-29 10:31:14 CEST\n",
"2018-08-29 11:19:59 CEST\n",
"\n",
"CPython 3.6.5\n",
"IPython 6.5.0\n",
@ -94,6 +94,7 @@
" ORDINAL_COLUMNS,\n",
" ORDINAL_VARIABLES,\n",
" correct_column_names,\n",
" update_column_descriptions,\n",
")"
]
},
@ -2175,19 +2176,8 @@
"metadata": {},
"outputs": [],
"source": [
"# Also remove the discarded columns from the helper lists.\n",
"ALL_VARIABLES = sorted(set(ALL_VARIABLES) - set(missing_a_lot))\n",
"CONTINUOUS_VARIABLES = sorted(set(CONTINUOUS_VARIABLES) - set(missing_a_lot))\n",
"DISCRETE_VARIABLES = sorted(set(DISCRETE_VARIABLES) - set(missing_a_lot))\n",
"NUMERIC_VARIABLES = sorted(set(NUMERIC_VARIABLES) - set(missing_a_lot))"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"# Remove the discarded columns from the helper dictionaries / lists.\n",
"update_column_descriptions(df.columns)\n",
"# Without any more missing data, convert\n",
"# the discrete columns to the correct data type.\n",
"for column in DISCRETE_VARIABLES:\n",
@ -2210,7 +2200,7 @@
},
{
"cell_type": "code",
"execution_count": 35,
"execution_count": 34,
"metadata": {},
"outputs": [
{
@ -2219,7 +2209,7 @@
"(2898, 78)"
]
},
"execution_count": 35,
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
@ -2230,7 +2220,7 @@
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": 35,
"metadata": {},
"outputs": [
{
@ -2970,7 +2960,7 @@
"5 527105010 1997 1998 2010 189900 "
]
},
"execution_count": 36,
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
@ -2981,7 +2971,7 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [