Re-factor code into function to print a column list

This commit is contained in:
Alexander Hess 2018-09-01 16:52:46 +02:00
parent ffeed18376
commit 488fb69da9
2 changed files with 42 additions and 22 deletions

View file

@ -30,7 +30,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"2018-08-29 17:18:17 CEST\n", "2018-09-01 16:51:42 CEST\n",
"\n", "\n",
"CPython 3.6.5\n", "CPython 3.6.5\n",
"IPython 6.5.0\n", "IPython 6.5.0\n",
@ -60,9 +60,7 @@
"source": [ "source": [
"import missingno as msno\n", "import missingno as msno\n",
"import numpy as np\n", "import numpy as np\n",
"import pandas as pd\n", "import pandas as pd"
"\n",
"from tabulate import tabulate"
] ]
}, },
{ {
@ -96,6 +94,7 @@
" ORDINAL_COLUMNS,\n", " ORDINAL_COLUMNS,\n",
" ORDINAL_VARIABLES,\n", " ORDINAL_VARIABLES,\n",
" correct_column_names,\n", " correct_column_names,\n",
" print_column_list,\n",
" update_column_descriptions,\n", " update_column_descriptions,\n",
")" ")"
] ]
@ -383,8 +382,7 @@
} }
], ],
"source": [ "source": [
"table = ((key, value[\"description\"]) for (key, value) in CONTINUOUS_COLUMNS.items())\n", "print_column_list(CONTINUOUS_COLUMNS)"
"print(tabulate(sorted(table), tablefmt=\"plain\"))"
] ]
}, },
{ {
@ -749,8 +747,7 @@
} }
], ],
"source": [ "source": [
"table = ((key, value[\"description\"]) for (key, value) in DISCRETE_COLUMNS.items())\n", "print_column_list(DISCRETE_COLUMNS)"
"print(tabulate(sorted(table), tablefmt=\"plain\"))"
] ]
}, },
{ {
@ -1081,8 +1078,7 @@
} }
], ],
"source": [ "source": [
"table = ((key, value[\"description\"]) for (key, value) in NOMINAL_COLUMNS.items())\n", "print_column_list(NOMINAL_COLUMNS)"
"print(tabulate(sorted(table), tablefmt=\"plain\"))"
] ]
}, },
{ {
@ -1635,8 +1631,7 @@
} }
], ],
"source": [ "source": [
"table = ((key, value[\"description\"]) for (key, value) in ORDINAL_COLUMNS.items())\n", "print_column_list(ORDINAL_COLUMNS)"
"print(tabulate(sorted(table), tablefmt=\"plain\"))"
] ]
}, },
{ {
@ -2146,7 +2141,9 @@
"<Figure size 1800x720 with 2 Axes>" "<Figure size 1800x720 with 2 Axes>"
] ]
}, },
"metadata": {}, "metadata": {
"needs_background": "light"
},
"output_type": "display_data" "output_type": "display_data"
} }
], ],
@ -2166,7 +2163,9 @@
"<Figure size 1800x720 with 2 Axes>" "<Figure size 1800x720 with 2 Axes>"
] ]
}, },
"metadata": {}, "metadata": {
"needs_background": "light"
},
"output_type": "display_data" "output_type": "display_data"
} }
], ],
@ -2186,7 +2185,9 @@
"<Figure size 1800x720 with 2 Axes>" "<Figure size 1800x720 with 2 Axes>"
] ]
}, },
"metadata": {}, "metadata": {
"needs_background": "light"
},
"output_type": "display_data" "output_type": "display_data"
} }
], ],
@ -2206,7 +2207,9 @@
"<Figure size 1800x720 with 2 Axes>" "<Figure size 1800x720 with 2 Axes>"
] ]
}, },
"metadata": {}, "metadata": {
"needs_background": "light"
},
"output_type": "display_data" "output_type": "display_data"
} }
], ],
@ -2262,8 +2265,7 @@
} }
], ],
"source": [ "source": [
"table = ((col, ALL_COLUMNS[col][\"description\"]) for col in sorted(missing_a_lot))\n", "print_column_list(missing_a_lot)"
"print(tabulate(table, tablefmt=\"plain\"))"
] ]
}, },
{ {

View file

@ -24,11 +24,12 @@ Implementation Note:
This file defines the "constants" it exports dynamically. This is a bit This file defines the "constants" it exports dynamically. This is a bit
advanced but intentional! advanced but intentional!
""" """
# pragma pylint:disable=W0603 # pragma pylint:disable=global-statement
import re import re
import requests import requests
import tabulate
INDEX_COLUMNS = ["Order", "PID"] INDEX_COLUMNS = ["Order", "PID"]
@ -204,7 +205,7 @@ def _rename_column(old_name, new_name):
del ALL_COLUMNS[old_name] del ALL_COLUMNS[old_name]
def correct_column_names(data_columns): def correct_column_names(data_columns, *, repopulate=True):
"""Cross-check the column names between data and description file. """Cross-check the column names between data and description file.
In rare cases, the variable name in the data description file was slightly In rare cases, the variable name in the data description file was slightly
@ -235,16 +236,19 @@ def correct_column_names(data_columns):
_rename_column(desc_column, data_column) _rename_column(desc_column, data_column)
break break
# Propagate the change to all "secondary" dictionaries and lists. # Propagate the change to all "secondary" dictionaries and lists.
if repopulate:
_populate_dicts_and_lists() _populate_dicts_and_lists()
def update_column_descriptions(columns_to_be_kept): def update_column_descriptions(columns_to_be_kept, *, correct_columns=False):
"""Remove discarded columns for all the module's exported data structures. """Remove discarded columns for all the module's exported data structures.
After dropping some columns from the DataFrame, these removals must be After dropping some columns from the DataFrame, these removals must be
propagated to the helper data structures defined in this module. propagated to the helper data structures defined in this module.
""" """
global ALL_COLUMNS global ALL_COLUMNS
if correct_columns:
correct_column_names(columns_to_be_kept, repopulate=False)
columns_to_be_removed = list(set(ALL_COLUMNS) - set(columns_to_be_kept)) columns_to_be_removed = list(set(ALL_COLUMNS) - set(columns_to_be_kept))
for column in columns_to_be_removed: for column in columns_to_be_removed:
del ALL_COLUMNS[column] del ALL_COLUMNS[column]
@ -252,6 +256,20 @@ def update_column_descriptions(columns_to_be_kept):
_populate_dicts_and_lists() _populate_dicts_and_lists()
def print_column_list(subset=None):
"""Print (a subset of) the data's column headers.
Note that this function is built to handle both *_COLUMNS dicts and
*_VARIABLES lists.
"""
if subset is None:
subset = ALL_VARIABLES
else:
assert set(list(subset)) <= set(list(ALL_VARIABLES))
columns = sorted((c, ALL_COLUMNS[c]["description"]) for c in subset)
print(tabulate.tabulate(columns, tablefmt="plain"))
# This code is executed once during import time and # This code is executed once during import time and
# populates all the "constants" directly or indirectly. # populates all the "constants" directly or indirectly.
_extract_meta_data(_get_lines()) _extract_meta_data(_get_lines())