Re-factor code into function to print a column list
This commit is contained in:
parent
ffeed18376
commit
488fb69da9
2 changed files with 42 additions and 22 deletions
|
@ -30,7 +30,7 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2018-08-29 17:18:17 CEST\n",
|
||||
"2018-09-01 16:51:42 CEST\n",
|
||||
"\n",
|
||||
"CPython 3.6.5\n",
|
||||
"IPython 6.5.0\n",
|
||||
|
@ -60,9 +60,7 @@
|
|||
"source": [
|
||||
"import missingno as msno\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"from tabulate import tabulate"
|
||||
"import pandas as pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -96,6 +94,7 @@
|
|||
" ORDINAL_COLUMNS,\n",
|
||||
" ORDINAL_VARIABLES,\n",
|
||||
" correct_column_names,\n",
|
||||
" print_column_list,\n",
|
||||
" update_column_descriptions,\n",
|
||||
")"
|
||||
]
|
||||
|
@ -383,8 +382,7 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"table = ((key, value[\"description\"]) for (key, value) in CONTINUOUS_COLUMNS.items())\n",
|
||||
"print(tabulate(sorted(table), tablefmt=\"plain\"))"
|
||||
"print_column_list(CONTINUOUS_COLUMNS)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -749,8 +747,7 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"table = ((key, value[\"description\"]) for (key, value) in DISCRETE_COLUMNS.items())\n",
|
||||
"print(tabulate(sorted(table), tablefmt=\"plain\"))"
|
||||
"print_column_list(DISCRETE_COLUMNS)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1081,8 +1078,7 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"table = ((key, value[\"description\"]) for (key, value) in NOMINAL_COLUMNS.items())\n",
|
||||
"print(tabulate(sorted(table), tablefmt=\"plain\"))"
|
||||
"print_column_list(NOMINAL_COLUMNS)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -1635,8 +1631,7 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"table = ((key, value[\"description\"]) for (key, value) in ORDINAL_COLUMNS.items())\n",
|
||||
"print(tabulate(sorted(table), tablefmt=\"plain\"))"
|
||||
"print_column_list(ORDINAL_COLUMNS)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -2146,7 +2141,9 @@
|
|||
"<Figure size 1800x720 with 2 Axes>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"needs_background": "light"
|
||||
},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
|
@ -2166,7 +2163,9 @@
|
|||
"<Figure size 1800x720 with 2 Axes>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"needs_background": "light"
|
||||
},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
|
@ -2186,7 +2185,9 @@
|
|||
"<Figure size 1800x720 with 2 Axes>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"needs_background": "light"
|
||||
},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
|
@ -2206,7 +2207,9 @@
|
|||
"<Figure size 1800x720 with 2 Axes>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"needs_background": "light"
|
||||
},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
|
@ -2262,8 +2265,7 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"table = ((col, ALL_COLUMNS[col][\"description\"]) for col in sorted(missing_a_lot))\n",
|
||||
"print(tabulate(table, tablefmt=\"plain\"))"
|
||||
"print_column_list(missing_a_lot)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
26
utils.py
26
utils.py
|
@ -24,11 +24,12 @@ Implementation Note:
|
|||
This file defines the "constants" it exports dynamically. This is a bit
|
||||
advanced but intentional!
|
||||
"""
|
||||
# pragma pylint:disable=W0603
|
||||
# pragma pylint:disable=global-statement
|
||||
|
||||
import re
|
||||
|
||||
import requests
|
||||
import tabulate
|
||||
|
||||
|
||||
INDEX_COLUMNS = ["Order", "PID"]
|
||||
|
@ -204,7 +205,7 @@ def _rename_column(old_name, new_name):
|
|||
del ALL_COLUMNS[old_name]
|
||||
|
||||
|
||||
def correct_column_names(data_columns):
|
||||
def correct_column_names(data_columns, *, repopulate=True):
|
||||
"""Cross-check the column names between data and description file.
|
||||
|
||||
In rare cases, the variable name in the data description file was slightly
|
||||
|
@ -235,16 +236,19 @@ def correct_column_names(data_columns):
|
|||
_rename_column(desc_column, data_column)
|
||||
break
|
||||
# Propagate the change to all "secondary" dictionaries and lists.
|
||||
_populate_dicts_and_lists()
|
||||
if repopulate:
|
||||
_populate_dicts_and_lists()
|
||||
|
||||
|
||||
def update_column_descriptions(columns_to_be_kept):
|
||||
def update_column_descriptions(columns_to_be_kept, *, correct_columns=False):
|
||||
"""Remove discarded columns for all the module's exported data structures.
|
||||
|
||||
After dropping some columns from the DataFrame, these removals must be
|
||||
propagated to the helper data structures defined in this module.
|
||||
"""
|
||||
global ALL_COLUMNS
|
||||
if correct_columns:
|
||||
correct_column_names(columns_to_be_kept, repopulate=False)
|
||||
columns_to_be_removed = list(set(ALL_COLUMNS) - set(columns_to_be_kept))
|
||||
for column in columns_to_be_removed:
|
||||
del ALL_COLUMNS[column]
|
||||
|
@ -252,6 +256,20 @@ def update_column_descriptions(columns_to_be_kept):
|
|||
_populate_dicts_and_lists()
|
||||
|
||||
|
||||
def print_column_list(subset=None):
|
||||
"""Print (a subset of) the data's column headers.
|
||||
|
||||
Note that this function is built to handle both *_COLUMNS dicts and
|
||||
*_VARIABLES lists.
|
||||
"""
|
||||
if subset is None:
|
||||
subset = ALL_VARIABLES
|
||||
else:
|
||||
assert set(list(subset)) <= set(list(ALL_VARIABLES))
|
||||
columns = sorted((c, ALL_COLUMNS[c]["description"]) for c in subset)
|
||||
print(tabulate.tabulate(columns, tablefmt="plain"))
|
||||
|
||||
|
||||
# This code is executed once during import time and
|
||||
# populates all the "constants" directly or indirectly.
|
||||
_extract_meta_data(_get_lines())
|
||||
|
|
Loading…
Reference in a new issue