Re-factor code into function to print a column list

This commit is contained in:
Alexander Hess 2018-09-01 16:52:46 +02:00
parent ffeed18376
commit 488fb69da9
2 changed files with 42 additions and 22 deletions

View file

@ -30,7 +30,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"2018-08-29 17:18:17 CEST\n",
"2018-09-01 16:51:42 CEST\n",
"\n",
"CPython 3.6.5\n",
"IPython 6.5.0\n",
@ -60,9 +60,7 @@
"source": [
"import missingno as msno\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"from tabulate import tabulate"
"import pandas as pd"
]
},
{
@ -96,6 +94,7 @@
" ORDINAL_COLUMNS,\n",
" ORDINAL_VARIABLES,\n",
" correct_column_names,\n",
" print_column_list,\n",
" update_column_descriptions,\n",
")"
]
@ -383,8 +382,7 @@
}
],
"source": [
"table = ((key, value[\"description\"]) for (key, value) in CONTINUOUS_COLUMNS.items())\n",
"print(tabulate(sorted(table), tablefmt=\"plain\"))"
"print_column_list(CONTINUOUS_COLUMNS)"
]
},
{
@ -749,8 +747,7 @@
}
],
"source": [
"table = ((key, value[\"description\"]) for (key, value) in DISCRETE_COLUMNS.items())\n",
"print(tabulate(sorted(table), tablefmt=\"plain\"))"
"print_column_list(DISCRETE_COLUMNS)"
]
},
{
@ -1081,8 +1078,7 @@
}
],
"source": [
"table = ((key, value[\"description\"]) for (key, value) in NOMINAL_COLUMNS.items())\n",
"print(tabulate(sorted(table), tablefmt=\"plain\"))"
"print_column_list(NOMINAL_COLUMNS)"
]
},
{
@ -1635,8 +1631,7 @@
}
],
"source": [
"table = ((key, value[\"description\"]) for (key, value) in ORDINAL_COLUMNS.items())\n",
"print(tabulate(sorted(table), tablefmt=\"plain\"))"
"print_column_list(ORDINAL_COLUMNS)"
]
},
{
@ -2146,7 +2141,9 @@
"<Figure size 1800x720 with 2 Axes>"
]
},
"metadata": {},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
@ -2166,7 +2163,9 @@
"<Figure size 1800x720 with 2 Axes>"
]
},
"metadata": {},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
@ -2186,7 +2185,9 @@
"<Figure size 1800x720 with 2 Axes>"
]
},
"metadata": {},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
@ -2206,7 +2207,9 @@
"<Figure size 1800x720 with 2 Axes>"
]
},
"metadata": {},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
@ -2262,8 +2265,7 @@
}
],
"source": [
"table = ((col, ALL_COLUMNS[col][\"description\"]) for col in sorted(missing_a_lot))\n",
"print(tabulate(table, tablefmt=\"plain\"))"
"print_column_list(missing_a_lot)"
]
},
{

View file

@ -24,11 +24,12 @@ Implementation Note:
This file defines the "constants" it exports dynamically. This is a bit
advanced but intentional!
"""
# pragma pylint:disable=W0603
# pragma pylint:disable=global-statement
import re
import requests
import tabulate
INDEX_COLUMNS = ["Order", "PID"]
@ -204,7 +205,7 @@ def _rename_column(old_name, new_name):
del ALL_COLUMNS[old_name]
def correct_column_names(data_columns):
def correct_column_names(data_columns, *, repopulate=True):
"""Cross-check the column names between data and description file.
In rare cases, the variable name in the data description file was slightly
@ -235,16 +236,19 @@ def correct_column_names(data_columns):
_rename_column(desc_column, data_column)
break
# Propagate the change to all "secondary" dictionaries and lists.
_populate_dicts_and_lists()
if repopulate:
_populate_dicts_and_lists()
def update_column_descriptions(columns_to_be_kept):
def update_column_descriptions(columns_to_be_kept, *, correct_columns=False):
"""Remove discarded columns for all the module's exported data structures.
After dropping some columns from the DataFrame, these removals must be
propagated to the helper data structures defined in this module.
"""
global ALL_COLUMNS
if correct_columns:
correct_column_names(columns_to_be_kept, repopulate=False)
columns_to_be_removed = list(set(ALL_COLUMNS) - set(columns_to_be_kept))
for column in columns_to_be_removed:
del ALL_COLUMNS[column]
@ -252,6 +256,20 @@ def update_column_descriptions(columns_to_be_kept):
_populate_dicts_and_lists()
def print_column_list(subset=None):
"""Print (a subset of) the data's column headers.
Note that this function is built to handle both *_COLUMNS dicts and
*_VARIABLES lists.
"""
if subset is None:
subset = ALL_VARIABLES
else:
assert set(list(subset)) <= set(list(ALL_VARIABLES))
columns = sorted((c, ALL_COLUMNS[c]["description"]) for c in subset)
print(tabulate.tabulate(columns, tablefmt="plain"))
# This code is executed once during import time and
# populates all the "constants" directly or indirectly.
_extract_meta_data(_get_lines())