Create a data folder for all static files

2018-09-03 16:12:15 +02:00 · 2018-09-03 16:12:15 +02:00 · fd91de812d
commit fd91de812d
parent 069691cca1
8 changed files with 10 additions and 10 deletions
--- a/1_data_cleaning.ipynb
+++ b/1_data_cleaning.ipynb
@ -30,7 +30,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "2018-09-03 15:32:42 CEST\n",
+      "2018-09-03 16:10:26 CEST\n",
      "\n",
      "CPython 3.6.5\n",
      "IPython 6.5.0\n",
@ -174,13 +174,13 @@
    "}\n",
    "\n",
    "try:\n",
-    "    df = pd.read_excel(\"data_raw.xls\", **kwargs)\n",
+    "    df = pd.read_excel(\"data/data_raw.xls\", **kwargs)\n",
    "except FileNotFoundError:\n",
    "    df = pd.read_excel(\n",
    "        \"https://www.amstat.org/publications/jse/v19n3/decock/AmesHousing.xls\", **kwargs\n",
    "    )\n",
    "    # Cache the obtained file.\n",
-    "    df.to_excel(\"data_raw.xls\")"
+    "    df.to_excel(\"data/data_raw.xls\")"
   ]
  },
  {
@ -3085,7 +3085,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "df.to_csv(\"data_clean.csv\")"
+    "df.to_csv(\"data/data_clean.csv\")"
   ]
  }
 ],
--- a/2_pairwise_correlations.ipynb
+++ b/2_pairwise_correlations.ipynb
@ -27,7 +27,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "2018-09-03 15:55:55 CEST\n",
+      "2018-09-03 16:11:06 CEST\n",
      "\n",
      "CPython 3.6.5\n",
      "IPython 6.5.0\n",
@ -2130,7 +2130,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "with open(\"weakly_and_strongly_correlated_variables.json\", \"w\") as file:\n",
+    "with open(\"data/weakly_and_strongly_correlated_variables.json\", \"w\") as file:\n",
    "    file.write(json.dumps({\n",
    "        \"weakly_correlated\": sorted(\n",
    "            list(pearson_weakly_correlated) + list(spearman_weakly_correlated)\n",
@ -3009,7 +3009,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "df.to_csv(\"data_clean_with_transformations.csv\")"
+    "df.to_csv(\"data/data_clean_with_transformations.csv\")"
   ]
  }
 ],
--- a/data/data_clean.csv
+++ b/data/data_clean.csv
--- a/data/data_clean_with_transformations.csv
+++ b/data/data_clean_with_transformations.csv
--- a/data/data_documentation.txt
+++ b/data/data_documentation.txt
--- a/data/data_raw.xls
+++ b/data/data_raw.xls
--- a/data/weakly_and_strongly_correlated_variables.json
+++ b/data/weakly_and_strongly_correlated_variables.json
--- a/utils.py
+++ b/utils.py
@ -58,7 +58,7 @@ def _get_lines():
    """Obtain the non-empty lines of the data description file."""
    # Read cached data file.
    try:
-        with open("data_documentation.txt", "r") as file:
+        with open("data/data_documentation.txt", "r") as file:
            lines = file.readlines()
    # If there is no cached file, obtain in from the original source.
    except FileNotFoundError:
@ -67,7 +67,7 @@ def _get_lines():
            "/jse/v19n3/decock/DataDocumentation.txt"
        )
        # Cache the retrieved file.
-        with open("data_documentation.txt", "w") as file:
+        with open("data/data_documentation.txt", "w") as file:
            file.write(response.text)
        lines = response.text.split("\r\n")
    # Remove header, footer, and empty lines.
@ -318,7 +318,7 @@ def load_clean_data(subset=None, ordinal_encoded=False):
    """
    # pragma pylint:disable=invalid-name
    df = pd.read_csv(
-        "data_clean.csv",
+        "data/data_clean.csv",
        index_col=INDEX_COLUMNS,
        dtype=object,
        na_values="",  # There are no missing values in the clean data file.