Create a data folder for all static files
This commit is contained in:
parent
069691cca1
commit
fd91de812d
8 changed files with 10 additions and 10 deletions
|
@ -30,7 +30,7 @@
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"2018-09-03 15:32:42 CEST\n",
|
"2018-09-03 16:10:26 CEST\n",
|
||||||
"\n",
|
"\n",
|
||||||
"CPython 3.6.5\n",
|
"CPython 3.6.5\n",
|
||||||
"IPython 6.5.0\n",
|
"IPython 6.5.0\n",
|
||||||
|
@ -174,13 +174,13 @@
|
||||||
"}\n",
|
"}\n",
|
||||||
"\n",
|
"\n",
|
||||||
"try:\n",
|
"try:\n",
|
||||||
" df = pd.read_excel(\"data_raw.xls\", **kwargs)\n",
|
" df = pd.read_excel(\"data/data_raw.xls\", **kwargs)\n",
|
||||||
"except FileNotFoundError:\n",
|
"except FileNotFoundError:\n",
|
||||||
" df = pd.read_excel(\n",
|
" df = pd.read_excel(\n",
|
||||||
" \"https://www.amstat.org/publications/jse/v19n3/decock/AmesHousing.xls\", **kwargs\n",
|
" \"https://www.amstat.org/publications/jse/v19n3/decock/AmesHousing.xls\", **kwargs\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
" # Cache the obtained file.\n",
|
" # Cache the obtained file.\n",
|
||||||
" df.to_excel(\"data_raw.xls\")"
|
" df.to_excel(\"data/data_raw.xls\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -3085,7 +3085,7 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"df.to_csv(\"data_clean.csv\")"
|
"df.to_csv(\"data/data_clean.csv\")"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|
|
@ -27,7 +27,7 @@
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"2018-09-03 15:55:55 CEST\n",
|
"2018-09-03 16:11:06 CEST\n",
|
||||||
"\n",
|
"\n",
|
||||||
"CPython 3.6.5\n",
|
"CPython 3.6.5\n",
|
||||||
"IPython 6.5.0\n",
|
"IPython 6.5.0\n",
|
||||||
|
@ -2130,7 +2130,7 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"with open(\"weakly_and_strongly_correlated_variables.json\", \"w\") as file:\n",
|
"with open(\"data/weakly_and_strongly_correlated_variables.json\", \"w\") as file:\n",
|
||||||
" file.write(json.dumps({\n",
|
" file.write(json.dumps({\n",
|
||||||
" \"weakly_correlated\": sorted(\n",
|
" \"weakly_correlated\": sorted(\n",
|
||||||
" list(pearson_weakly_correlated) + list(spearman_weakly_correlated)\n",
|
" list(pearson_weakly_correlated) + list(spearman_weakly_correlated)\n",
|
||||||
|
@ -3009,7 +3009,7 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"df.to_csv(\"data_clean_with_transformations.csv\")"
|
"df.to_csv(\"data/data_clean_with_transformations.csv\")"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|
Can't render this file because it is too large.
|
Can't render this file because it is too large.
|
6
utils.py
6
utils.py
|
@ -58,7 +58,7 @@ def _get_lines():
|
||||||
"""Obtain the non-empty lines of the data description file."""
|
"""Obtain the non-empty lines of the data description file."""
|
||||||
# Read cached data file.
|
# Read cached data file.
|
||||||
try:
|
try:
|
||||||
with open("data_documentation.txt", "r") as file:
|
with open("data/data_documentation.txt", "r") as file:
|
||||||
lines = file.readlines()
|
lines = file.readlines()
|
||||||
# If there is no cached file, obtain in from the original source.
|
# If there is no cached file, obtain in from the original source.
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
|
@ -67,7 +67,7 @@ def _get_lines():
|
||||||
"/jse/v19n3/decock/DataDocumentation.txt"
|
"/jse/v19n3/decock/DataDocumentation.txt"
|
||||||
)
|
)
|
||||||
# Cache the retrieved file.
|
# Cache the retrieved file.
|
||||||
with open("data_documentation.txt", "w") as file:
|
with open("data/data_documentation.txt", "w") as file:
|
||||||
file.write(response.text)
|
file.write(response.text)
|
||||||
lines = response.text.split("\r\n")
|
lines = response.text.split("\r\n")
|
||||||
# Remove header, footer, and empty lines.
|
# Remove header, footer, and empty lines.
|
||||||
|
@ -318,7 +318,7 @@ def load_clean_data(subset=None, ordinal_encoded=False):
|
||||||
"""
|
"""
|
||||||
# pragma pylint:disable=invalid-name
|
# pragma pylint:disable=invalid-name
|
||||||
df = pd.read_csv(
|
df = pd.read_csv(
|
||||||
"data_clean.csv",
|
"data/data_clean.csv",
|
||||||
index_col=INDEX_COLUMNS,
|
index_col=INDEX_COLUMNS,
|
||||||
dtype=object,
|
dtype=object,
|
||||||
na_values="", # There are no missing values in the clean data file.
|
na_values="", # There are no missing values in the clean data file.
|
||||||
|
|
Loading…
Reference in a new issue