Create a data folder for all static files

This commit is contained in:
Alexander Hess 2018-09-03 16:12:15 +02:00
parent 069691cca1
commit fd91de812d
8 changed files with 10 additions and 10 deletions

View file

@ -30,7 +30,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"2018-09-03 15:32:42 CEST\n", "2018-09-03 16:10:26 CEST\n",
"\n", "\n",
"CPython 3.6.5\n", "CPython 3.6.5\n",
"IPython 6.5.0\n", "IPython 6.5.0\n",
@ -174,13 +174,13 @@
"}\n", "}\n",
"\n", "\n",
"try:\n", "try:\n",
" df = pd.read_excel(\"data_raw.xls\", **kwargs)\n", " df = pd.read_excel(\"data/data_raw.xls\", **kwargs)\n",
"except FileNotFoundError:\n", "except FileNotFoundError:\n",
" df = pd.read_excel(\n", " df = pd.read_excel(\n",
" \"https://www.amstat.org/publications/jse/v19n3/decock/AmesHousing.xls\", **kwargs\n", " \"https://www.amstat.org/publications/jse/v19n3/decock/AmesHousing.xls\", **kwargs\n",
" )\n", " )\n",
" # Cache the obtained file.\n", " # Cache the obtained file.\n",
" df.to_excel(\"data_raw.xls\")" " df.to_excel(\"data/data_raw.xls\")"
] ]
}, },
{ {
@ -3085,7 +3085,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"df.to_csv(\"data_clean.csv\")" "df.to_csv(\"data/data_clean.csv\")"
] ]
} }
], ],

View file

@ -27,7 +27,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"2018-09-03 15:55:55 CEST\n", "2018-09-03 16:11:06 CEST\n",
"\n", "\n",
"CPython 3.6.5\n", "CPython 3.6.5\n",
"IPython 6.5.0\n", "IPython 6.5.0\n",
@ -2130,7 +2130,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"with open(\"weakly_and_strongly_correlated_variables.json\", \"w\") as file:\n", "with open(\"data/weakly_and_strongly_correlated_variables.json\", \"w\") as file:\n",
" file.write(json.dumps({\n", " file.write(json.dumps({\n",
" \"weakly_correlated\": sorted(\n", " \"weakly_correlated\": sorted(\n",
" list(pearson_weakly_correlated) + list(spearman_weakly_correlated)\n", " list(pearson_weakly_correlated) + list(spearman_weakly_correlated)\n",
@ -3009,7 +3009,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"df.to_csv(\"data_clean_with_transformations.csv\")" "df.to_csv(\"data/data_clean_with_transformations.csv\")"
] ]
} }
], ],

View file

Can't render this file because it is too large.

View file

Can't render this file because it is too large.

View file

@ -58,7 +58,7 @@ def _get_lines():
"""Obtain the non-empty lines of the data description file.""" """Obtain the non-empty lines of the data description file."""
# Read cached data file. # Read cached data file.
try: try:
with open("data_documentation.txt", "r") as file: with open("data/data_documentation.txt", "r") as file:
lines = file.readlines() lines = file.readlines()
# If there is no cached file, obtain in from the original source. # If there is no cached file, obtain in from the original source.
except FileNotFoundError: except FileNotFoundError:
@ -67,7 +67,7 @@ def _get_lines():
"/jse/v19n3/decock/DataDocumentation.txt" "/jse/v19n3/decock/DataDocumentation.txt"
) )
# Cache the retrieved file. # Cache the retrieved file.
with open("data_documentation.txt", "w") as file: with open("data/data_documentation.txt", "w") as file:
file.write(response.text) file.write(response.text)
lines = response.text.split("\r\n") lines = response.text.split("\r\n")
# Remove header, footer, and empty lines. # Remove header, footer, and empty lines.
@ -318,7 +318,7 @@ def load_clean_data(subset=None, ordinal_encoded=False):
""" """
# pragma pylint:disable=invalid-name # pragma pylint:disable=invalid-name
df = pd.read_csv( df = pd.read_csv(
"data_clean.csv", "data/data_clean.csv",
index_col=INDEX_COLUMNS, index_col=INDEX_COLUMNS,
dtype=object, dtype=object,
na_values="", # There are no missing values in the clean data file. na_values="", # There are no missing values in the clean data file.