Re-factor code into function to print a column list
This commit is contained in:
parent
ffeed18376
commit
488fb69da9
2 changed files with 42 additions and 22 deletions
|
@ -30,7 +30,7 @@
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"2018-08-29 17:18:17 CEST\n",
|
"2018-09-01 16:51:42 CEST\n",
|
||||||
"\n",
|
"\n",
|
||||||
"CPython 3.6.5\n",
|
"CPython 3.6.5\n",
|
||||||
"IPython 6.5.0\n",
|
"IPython 6.5.0\n",
|
||||||
|
@ -60,9 +60,7 @@
|
||||||
"source": [
|
"source": [
|
||||||
"import missingno as msno\n",
|
"import missingno as msno\n",
|
||||||
"import numpy as np\n",
|
"import numpy as np\n",
|
||||||
"import pandas as pd\n",
|
"import pandas as pd"
|
||||||
"\n",
|
|
||||||
"from tabulate import tabulate"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -96,6 +94,7 @@
|
||||||
" ORDINAL_COLUMNS,\n",
|
" ORDINAL_COLUMNS,\n",
|
||||||
" ORDINAL_VARIABLES,\n",
|
" ORDINAL_VARIABLES,\n",
|
||||||
" correct_column_names,\n",
|
" correct_column_names,\n",
|
||||||
|
" print_column_list,\n",
|
||||||
" update_column_descriptions,\n",
|
" update_column_descriptions,\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
|
@ -383,8 +382,7 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"table = ((key, value[\"description\"]) for (key, value) in CONTINUOUS_COLUMNS.items())\n",
|
"print_column_list(CONTINUOUS_COLUMNS)"
|
||||||
"print(tabulate(sorted(table), tablefmt=\"plain\"))"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -749,8 +747,7 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"table = ((key, value[\"description\"]) for (key, value) in DISCRETE_COLUMNS.items())\n",
|
"print_column_list(DISCRETE_COLUMNS)"
|
||||||
"print(tabulate(sorted(table), tablefmt=\"plain\"))"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -1081,8 +1078,7 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"table = ((key, value[\"description\"]) for (key, value) in NOMINAL_COLUMNS.items())\n",
|
"print_column_list(NOMINAL_COLUMNS)"
|
||||||
"print(tabulate(sorted(table), tablefmt=\"plain\"))"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -1635,8 +1631,7 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"table = ((key, value[\"description\"]) for (key, value) in ORDINAL_COLUMNS.items())\n",
|
"print_column_list(ORDINAL_COLUMNS)"
|
||||||
"print(tabulate(sorted(table), tablefmt=\"plain\"))"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -2146,7 +2141,9 @@
|
||||||
"<Figure size 1800x720 with 2 Axes>"
|
"<Figure size 1800x720 with 2 Axes>"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"needs_background": "light"
|
||||||
|
},
|
||||||
"output_type": "display_data"
|
"output_type": "display_data"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -2166,7 +2163,9 @@
|
||||||
"<Figure size 1800x720 with 2 Axes>"
|
"<Figure size 1800x720 with 2 Axes>"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"needs_background": "light"
|
||||||
|
},
|
||||||
"output_type": "display_data"
|
"output_type": "display_data"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -2186,7 +2185,9 @@
|
||||||
"<Figure size 1800x720 with 2 Axes>"
|
"<Figure size 1800x720 with 2 Axes>"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"needs_background": "light"
|
||||||
|
},
|
||||||
"output_type": "display_data"
|
"output_type": "display_data"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -2206,7 +2207,9 @@
|
||||||
"<Figure size 1800x720 with 2 Axes>"
|
"<Figure size 1800x720 with 2 Axes>"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"needs_background": "light"
|
||||||
|
},
|
||||||
"output_type": "display_data"
|
"output_type": "display_data"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -2262,8 +2265,7 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"table = ((col, ALL_COLUMNS[col][\"description\"]) for col in sorted(missing_a_lot))\n",
|
"print_column_list(missing_a_lot)"
|
||||||
"print(tabulate(table, tablefmt=\"plain\"))"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
26
utils.py
26
utils.py
|
@ -24,11 +24,12 @@ Implementation Note:
|
||||||
This file defines the "constants" it exports dynamically. This is a bit
|
This file defines the "constants" it exports dynamically. This is a bit
|
||||||
advanced but intentional!
|
advanced but intentional!
|
||||||
"""
|
"""
|
||||||
# pragma pylint:disable=W0603
|
# pragma pylint:disable=global-statement
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
import tabulate
|
||||||
|
|
||||||
|
|
||||||
INDEX_COLUMNS = ["Order", "PID"]
|
INDEX_COLUMNS = ["Order", "PID"]
|
||||||
|
@ -204,7 +205,7 @@ def _rename_column(old_name, new_name):
|
||||||
del ALL_COLUMNS[old_name]
|
del ALL_COLUMNS[old_name]
|
||||||
|
|
||||||
|
|
||||||
def correct_column_names(data_columns):
|
def correct_column_names(data_columns, *, repopulate=True):
|
||||||
"""Cross-check the column names between data and description file.
|
"""Cross-check the column names between data and description file.
|
||||||
|
|
||||||
In rare cases, the variable name in the data description file was slightly
|
In rare cases, the variable name in the data description file was slightly
|
||||||
|
@ -235,16 +236,19 @@ def correct_column_names(data_columns):
|
||||||
_rename_column(desc_column, data_column)
|
_rename_column(desc_column, data_column)
|
||||||
break
|
break
|
||||||
# Propagate the change to all "secondary" dictionaries and lists.
|
# Propagate the change to all "secondary" dictionaries and lists.
|
||||||
_populate_dicts_and_lists()
|
if repopulate:
|
||||||
|
_populate_dicts_and_lists()
|
||||||
|
|
||||||
|
|
||||||
def update_column_descriptions(columns_to_be_kept):
|
def update_column_descriptions(columns_to_be_kept, *, correct_columns=False):
|
||||||
"""Remove discarded columns for all the module's exported data structures.
|
"""Remove discarded columns for all the module's exported data structures.
|
||||||
|
|
||||||
After dropping some columns from the DataFrame, these removals must be
|
After dropping some columns from the DataFrame, these removals must be
|
||||||
propagated to the helper data structures defined in this module.
|
propagated to the helper data structures defined in this module.
|
||||||
"""
|
"""
|
||||||
global ALL_COLUMNS
|
global ALL_COLUMNS
|
||||||
|
if correct_columns:
|
||||||
|
correct_column_names(columns_to_be_kept, repopulate=False)
|
||||||
columns_to_be_removed = list(set(ALL_COLUMNS) - set(columns_to_be_kept))
|
columns_to_be_removed = list(set(ALL_COLUMNS) - set(columns_to_be_kept))
|
||||||
for column in columns_to_be_removed:
|
for column in columns_to_be_removed:
|
||||||
del ALL_COLUMNS[column]
|
del ALL_COLUMNS[column]
|
||||||
|
@ -252,6 +256,20 @@ def update_column_descriptions(columns_to_be_kept):
|
||||||
_populate_dicts_and_lists()
|
_populate_dicts_and_lists()
|
||||||
|
|
||||||
|
|
||||||
|
def print_column_list(subset=None):
|
||||||
|
"""Print (a subset of) the data's column headers.
|
||||||
|
|
||||||
|
Note that this function is built to handle both *_COLUMNS dicts and
|
||||||
|
*_VARIABLES lists.
|
||||||
|
"""
|
||||||
|
if subset is None:
|
||||||
|
subset = ALL_VARIABLES
|
||||||
|
else:
|
||||||
|
assert set(list(subset)) <= set(list(ALL_VARIABLES))
|
||||||
|
columns = sorted((c, ALL_COLUMNS[c]["description"]) for c in subset)
|
||||||
|
print(tabulate.tabulate(columns, tablefmt="plain"))
|
||||||
|
|
||||||
|
|
||||||
# This code is executed once during import time and
|
# This code is executed once during import time and
|
||||||
# populates all the "constants" directly or indirectly.
|
# populates all the "constants" directly or indirectly.
|
||||||
_extract_meta_data(_get_lines())
|
_extract_meta_data(_get_lines())
|
||||||
|
|
Loading…
Reference in a new issue