diff --git a/.gitignore b/.gitignore index 267d6b19884255674e73f9905b977ddb2e03e36e..203b2ce152a335058b03aa4796835afeeaf59d13 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ .ipynb_checkpoints/ .python-version .venv/ +.DS_Store diff --git a/1_column_headers_are_values.ipynb b/1_encabezados_columnas_son_valores.ipynb similarity index 64% rename from 1_column_headers_are_values.ipynb rename to 1_encabezados_columnas_son_valores.ipynb index b01a4b58f54a31ccfe324fb368eda03324b5825c..4df9158b58adb662568892e71c0484be60be0a65 100644 --- a/1_column_headers_are_values.ipynb +++ b/1_encabezados_columnas_son_valores.ipynb @@ -4,12 +4,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Column Headers are Values, not Variable Names\n", + "# Encabezados de las columnas son valores\n", "\n", - "This notebook shows two examples of how column headers display values. These type of messy datasets have practical use in two kinds of settings:\n", + "Este notebook muestra dos ejemplos de como los encabezados o nombres de las columnas muestran valores. Este tipo de \"messy datasets\" tienen uso práctico en dos tipos de situaciones:\n", "\n", - "1. Presentations\n", - "2. Recordings of regularly spaced observations over time" + "1. Presentaciones\n", + "2. Registro de observaciones espaciadas regularmente en el tiempo." ] }, { @@ -21,118 +21,351 @@ }, { "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext lab_black" - ] - }, - { - "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "import datetime\n", "import re\n", "\n", - "import pandas as pd\n", - "import savReaderWriter as spss" + "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Example 1: Religion vs. Income\n", + "## Ejemplo 1: Religion vs. Income\n", "\n", - "> A common type of messy dataset is tabular data designed for **presentation**, where variables\n", - "form both the rows and columns, and column headers are values, not variable names.\n", + "> Un tipo de dataset messy común son los datos tabulares diseñados para **presentación**, donde las variables forman tanto filas y columnas, y los encabezados de las columnas son valores, y no nombres de las variables.\n", "\n", - "The [Pew Research Center](http://www.pewresearch.org/) provides many studies on all kinds of aspects of life in the USA. The following examples uses data taken from its [Religious Landscape Study](http://www.pewforum.org/religious-landscape-study/)." + "El [Pew Research Center](http://www.pewresearch.org/) es un centro de estudios muy prolífico e influyente en investigación sobre todo tipo de aspectos de la vida en EEUU. Los siguientes ejemplos usan datos tomados del [Religious Landscape Study](http://www.pewforum.org/religious-landscape-study/)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Load the Data\n", + "### Cargando la data\n", "\n", - "The data are provided as a SPSS data file. This is a binary specification with a built-in header section describing the data, for example, what variables / columns are included and what the realizations categorical data can have." + "Los datos son entregados en un archivo de datos de SPSS. Esta es una especificación binaria con una sección de encabezado describiendo los datos, por ejemplo, que variables/columnas están incluidas y que instancias pueden tener los datos categóricos." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Load the dataset's meta data." + "Cargando la \"metadata\" del dataset." ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weightpsraidint_datelangtypecregionstateusrusr1form...q63educincomeregistregicertpartypartylnideopvote04apvote04b
04.51282110000001.050807.0EnglishRDDNortheastConnecticutSuburbanSuburbanForm A...Yes, father born outside U.S.Technical, trade, or vocational school AFTER h...75 to under $100,000Yes, registeredAbsolutely certainRepublicanNaNModerateVotedBush
12.10256410000002.050807.0EnglishRDDNortheastMaineRuralRuralForm B...No, both parents born in U.S.High school graduate (Grade 12 or GED certific...20 to under $30,000No, not registeredNaNRepublicanNaNConservativeDid not vote (includes too young to vote)NaN
21.28205110000003.050807.0EnglishRDDNortheastMaineRuralRuralForm A...No, both parents born in U.S.College graduate (B.S., B.A., or other 4-year ...30 to under $40,000No, not registeredNaNIndependentDemocratConservativeDid not vote (includes too young to vote)NaN
31.35532310000004.050807.0EnglishRDDNortheastMaineRuralRuralForm B...No, both parents born in U.S.Some college, no 4-year degree (including asso...Less than $10,000No, not registeredNaNIndependentDemocratModerateDid not vote (includes too young to vote)NaN
41.58974410000005.050807.0EnglishRDDNortheastNew YorkUrbanUrbanForm A...Yes, father born outside U.S.Post-graduate training or professional schooli...50 to under $75,000Yes, registeredAbsolutely certainIndependentDemocratModerateVotedOther candidate
\n", + "

5 rows × 135 columns

\n", + "
" + ], + "text/plain": [ + " weight psraid int_date lang type cregion state \\\n", + "0 4.512821 10000001.0 50807.0 English RDD Northeast Connecticut \n", + "1 2.102564 10000002.0 50807.0 English RDD Northeast Maine \n", + "2 1.282051 10000003.0 50807.0 English RDD Northeast Maine \n", + "3 1.355323 10000004.0 50807.0 English RDD Northeast Maine \n", + "4 1.589744 10000005.0 50807.0 English RDD Northeast New York \n", + "\n", + " usr usr1 form ... q63 \\\n", + "0 Suburban Suburban Form A ... Yes, father born outside U.S. \n", + "1 Rural Rural Form B ... No, both parents born in U.S. \n", + "2 Rural Rural Form A ... No, both parents born in U.S. \n", + "3 Rural Rural Form B ... No, both parents born in U.S. \n", + "4 Urban Urban Form A ... Yes, father born outside U.S. \n", + "\n", + " educ income \\\n", + "0 Technical, trade, or vocational school AFTER h... 75 to under $100,000 \n", + "1 High school graduate (Grade 12 or GED certific... 20 to under $30,000 \n", + "2 College graduate (B.S., B.A., or other 4-year ... 30 to under $40,000 \n", + "3 Some college, no 4-year degree (including asso... Less than $10,000 \n", + "4 Post-graduate training or professional schooli... 50 to under $75,000 \n", + "\n", + " regist regicert party partyln \\\n", + "0 Yes, registered Absolutely certain Republican NaN \n", + "1 No, not registered NaN Republican NaN \n", + "2 No, not registered NaN Independent Democrat \n", + "3 No, not registered NaN Independent Democrat \n", + "4 Yes, registered Absolutely certain Independent Democrat \n", + "\n", + " ideo pvote04a pvote04b \n", + "0 Moderate Voted Bush \n", + "1 Conservative Did not vote (includes too young to vote) NaN \n", + "2 Conservative Did not vote (includes too young to vote) NaN \n", + "3 Moderate Did not vote (includes too young to vote) NaN \n", + "4 Moderate Voted Other candidate \n", + "\n", + "[5 rows x 135 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "columns = [\"q16\", \"reltrad\", \"income\"]\n", - "encodings = {}\n", - "\n", - "# For the sake of simplicity, all data cleaning operations\n", - "# are done within the for-loop for all columns.\n", - "with spss.SavHeaderReader(\"data/pew.sav\") as pew:\n", - " for column in columns:\n", - " encodings[column] = {\n", - " int(key): (\n", - " re.sub(\n", - " r\"\\(.*\\)\",\n", - " \"\",\n", - " (\n", - " value.decode(\"iso-8859-1\")\n", - " .replace(\"\\x92\", \"'\")\n", - " .replace(\" Churches\", \"\")\n", - " .replace(\"Less than $10,000\", \"<$10k\")\n", - " .replace(\"10 to under $20,000\", \"$10-20k\")\n", - " .replace(\"20 to under $30,000\", \"$20-30k\")\n", - " .replace(\"30 to under $40,000\", \"$30-40k\")\n", - " .replace(\"40 to under $50,000\", \"$40-50k\")\n", - " .replace(\"50 to under $75,000\", \"$50-75k\")\n", - " .replace(\"75 to under $100,000\", \"$75-100k\")\n", - " .replace(\"100 to under $150,000\", \"$100-150k\")\n", - " .replace(\"$150,000 or more\", \">150k\")\n", - " ),\n", - " ).strip()\n", - " )\n", - " for (key, value) in pew.all().valueLabels[column.encode()].items()\n", - " }" + "pew_data = pd.read_spss(\"data/pew.sav\")\n", + "pew_data.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Load the actual data and prepare them as they are presented in the paper." + "Cargando la data y preparándola tal como está presentada en el *paper*." ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Evangelical Protestant Churches' 'Mainline Protestant Churches'\n", + " 'Unaffiliated' 'Jewish' 'Don’t know/refused' 'Other Faiths'\n", + " 'Historically Black Protestant Churches' \"Jehovah's Witness\" 'Catholic'\n", + " 'Buddhist' 'Mormon' 'Muslim' 'Hindu' 'Other Christian' 'Orthodox'\n", + " 'Other World Religions']\n", + "['$75-100k' '$20-30k' '$30-40k' '<$10k' '$50-75k' '>150k' '$40-50k'\n", + " \"Don't know/Refused\" '$100-150k' '$10-20k']\n", + "['Protestant' 'Nothing in particular' 'Jewish' 'Not Interpretable'\n", + " 'Liberal faith' 'Jehovah’s Witness' 'Atheist' 'Christian' 'Agnostic'\n", + " 'Unitarian' 'Roman Catholic' 'Buddhist' 'Mormon' 'Muslim'\n", + " 'Don’t know/Refused' 'Pagan' 'Eclectic, a bit of everything, own beliefs'\n", + " 'New Age' 'Hindu' 'Spiritual but not religious'\n", + " 'Unity; Unity Church Christ Church Unity' 'Deist' 'Orthodox'\n", + " 'Mixed Christians' 'Mixed Christian and non-Christian' 'Pantheist'\n", + " 'Native American Religions' 'Shinto' 'Wica' 'Bahai' 'Religious science'\n", + " 'Armenian Catholic' 'Zoroastrianism' 'New Apostolic Church' 'Asatru'\n", + " 'Spiritualist' 'Messianic Jews' 'Christian Scientists' 'Shamanism'\n", + " 'Nihilist' 'Humanist' 'Eckankar' 'Metaphysical'\n", + " 'Transcendental meditation/Meditation' 'Scientology'\n", + " 'Mixed non-Christians' 'National Catholic; Polish National Catholic'\n", + " 'Indian Shaker Church' 'Hebrew Israelite/African Hebrew Israelites'\n", + " 'International Bible Students' 'Tao' 'Druid' 'Rastafarian' 'Sikh'\n", + " 'Siddhayoga' 'New Thought' 'Maronite Catholic' 'Self realization'\n", + " 'Theosophy' 'Satanism' 'Unification Church' 'Greek rite Catholic'\n", + " 'Old Catholic'\n", + " 'Lutheran Orthodox Church/The Catholic Church - Lutheran Rite' 'Animism']\n" + ] + } + ], "source": [ - "with spss.SavReader(\n", - " \"data/pew.sav\", selectVars=[column.encode() for column in columns]\n", - ") as pew:\n", - " pew = list(pew)\n", + "pew_data['reltrad'] = pew_data['reltrad'].replace( to_replace=\" Churches\", value=\"\").str.replace(\n", + " pat= \"\\(.*\\)\",repl=\"\",regex=True).str.strip()\n", + "\n", + "pew_data['income'] = pew_data['income'].replace(\"Less than $10,000\", \"<$10k\").replace(\n", + " \"10 to under $20,000\", \"$10-20k\").replace(\"20 to under $30,000\", \"$20-30k\").replace(\n", + " \"30 to under $40,000\", \"$30-40k\").replace(\"40 to under $50,000\", \"$40-50k\").replace(\n", + " \"50 to under $75,000\", \"$50-75k\").replace(\n", + " \"75 to under $100,000\", \"$75-100k\").replace(\n", + " \"100 to under $150,000\", \"$100-150k\").replace(\n", + " \"$150,000 or more\", \">150k\").str.replace(\n", + " pat= \"\\(.*\\)\",repl=\"\",regex=True).str.strip()\n", "\n", - "# Use the above encodings to map the numeric data\n", - "# to the actual labels.\n", - "pew = pd.DataFrame(pew, columns=columns, dtype=int)\n", - "for column in columns:\n", - " pew[column] = pew[column].map(encodings[column])\n", + "pew_data['q16'] = pew_data['q16'].str.replace(pat= \"\\(.*\\)\",repl=\"\",regex=True).str.strip()\n", "\n", + "\n", + "print(pew_data['reltrad'].unique())\n", + "\n", + "print(pew_data['income'].unique())\n", + "\n", + "print(pew_data['q16'].unique())" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ "for value in (\"Atheist\", \"Agnostic\"):\n", - " pew.loc[(pew[\"q16\"] == value), \"reltrad\"] = value\n", + " pew_data.loc[(pew_data[\"q16\"] == value), \"reltrad\"] = value\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ "\n", "income_columns = [\n", " \"<$10k\",\n", @@ -146,8 +379,16 @@ " \">150k\",\n", " \"Don't know/Refused\",\n", "]\n", - "\n", - "pew = pew.groupby([\"reltrad\", \"income\"]).size().unstack(\"income\")\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "pew = pew_data.groupby([\"reltrad\", \"income\"]).size().unstack(\"income\")\n", "pew = pew[income_columns]\n", "pew.index.name = \"religion\"" ] @@ -158,12 +399,12 @@ "source": [ "### Messy Data\n", "\n", - "The next cell shows the data as they can actually be provided as \"raw\" data (i.e., the pre-processing as done above is assumed to be done by someone else and the data analyst is only presented with the below dataset)." + "La siguiente celda muestra la data \"sin procesar\" (\"cruda\", raw-data) (es decir, se supone que el preprocesamiento realizado anteriormente lo realiza otra persona y al analista de datos solo se le entrega el conjunto de datos a continuación)." ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -172,7 +413,7 @@ "(18, 10)" ] }, - "execution_count": 5, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -183,7 +424,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -286,7 +527,7 @@ " 1489\n", " \n", " \n", - " Don't know/refused\n", + " Don’t know/refused\n", " 15\n", " 14\n", " 15\n", @@ -299,7 +540,7 @@ " 116\n", " \n", " \n", - " Evangelical Protestant\n", + " Evangelical Protestant Churches\n", " 575\n", " 869\n", " 1064\n", @@ -325,7 +566,7 @@ " 37\n", " \n", " \n", - " Historically Black Protestant\n", + " Historically Black Protestant Churches\n", " 228\n", " 244\n", " 236\n", @@ -368,47 +609,47 @@ "" ], "text/plain": [ - "income <$10k $10-20k $20-30k $30-40k $40-50k \\\n", + "income <$10k $10-20k $20-30k $30-40k \\\n", "religion \n", - "Agnostic 27 34 60 81 76 \n", - "Atheist 12 27 37 52 35 \n", - "Buddhist 27 21 30 34 33 \n", - "Catholic 418 617 732 670 638 \n", - "Don't know/refused 15 14 15 11 10 \n", - "Evangelical Protestant 575 869 1064 982 881 \n", - "Hindu 1 9 7 9 11 \n", - "Historically Black Protestant 228 244 236 238 197 \n", - "Jehovah's Witness 20 27 24 24 21 \n", - "Jewish 19 19 25 25 30 \n", + "Agnostic 27 34 60 81 \n", + "Atheist 12 27 37 52 \n", + "Buddhist 27 21 30 34 \n", + "Catholic 418 617 732 670 \n", + "Don’t know/refused 15 14 15 11 \n", + "Evangelical Protestant Churches 575 869 1064 982 \n", + "Hindu 1 9 7 9 \n", + "Historically Black Protestant Churches 228 244 236 238 \n", + "Jehovah's Witness 20 27 24 24 \n", + "Jewish 19 19 25 25 \n", "\n", - "income $50-75k $75-100k $100-150k >150k \\\n", - "religion \n", - "Agnostic 137 122 109 84 \n", - "Atheist 70 73 59 74 \n", - "Buddhist 58 62 39 53 \n", - "Catholic 1116 949 792 633 \n", - "Don't know/refused 35 21 17 18 \n", - "Evangelical Protestant 1486 949 723 414 \n", - "Hindu 34 47 48 54 \n", - "Historically Black Protestant 223 131 81 78 \n", - "Jehovah's Witness 30 15 11 6 \n", - "Jewish 95 69 87 151 \n", + "income $40-50k $50-75k $75-100k $100-150k \\\n", + "religion \n", + "Agnostic 76 137 122 109 \n", + "Atheist 35 70 73 59 \n", + "Buddhist 33 58 62 39 \n", + "Catholic 638 1116 949 792 \n", + "Don’t know/refused 10 35 21 17 \n", + "Evangelical Protestant Churches 881 1486 949 723 \n", + "Hindu 11 34 47 48 \n", + "Historically Black Protestant Churches 197 223 131 81 \n", + "Jehovah's Witness 21 30 15 11 \n", + "Jewish 30 95 69 87 \n", "\n", - "income Don't know/Refused \n", - "religion \n", - "Agnostic 96 \n", - "Atheist 76 \n", - "Buddhist 54 \n", - "Catholic 1489 \n", - "Don't know/refused 116 \n", - "Evangelical Protestant 1529 \n", - "Hindu 37 \n", - "Historically Black Protestant 339 \n", - "Jehovah's Witness 37 \n", - "Jewish 162 " + "income >150k Don't know/Refused \n", + "religion \n", + "Agnostic 84 96 \n", + "Atheist 74 76 \n", + "Buddhist 53 54 \n", + "Catholic 633 1489 \n", + "Don’t know/refused 18 116 \n", + "Evangelical Protestant Churches 414 1529 \n", + "Hindu 54 37 \n", + "Historically Black Protestant Churches 78 339 \n", + "Jehovah's Witness 6 37 \n", + "Jewish 151 162 " ] }, - "execution_count": 6, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -423,16 +664,16 @@ "source": [ "### Tidy Data\n", "\n", - "> This dataset has **three** variables, **religion**, **income** and **frequency**. To tidy it, we need to **melt**, or stack it. In other words, we need to turn columns into rows.\n", + "> Este dataset tiene **tres** variables, **religion**, **income** y **frequency**. Para hacerlo *tidy*, necesitamos *\"fundirlo\"* (hacer un **melt**), o apilarlo. En otras palabras, necesitamos convertir columnas en filas.\n", "\n", - "pandas provides a [pd.melt()](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.melt.html) function to un-pivot the dataset.\n", + "`pandas` provee un método [pd.melt()](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.melt.html) para *des-pivotear* el dataset.\n", "\n", - "**Notes:** `.reset_index()` transforms the religion index column into a data column (`pd.melt()` needs that). Further, the resulting table is sorted implicitly by the `\"religion\"` column. To get to the same ordering as in the paper, the molten table is explicitly sorted." + "**Notas:** `.reset_index()` transforma la columna de índice de religión en una columna de datos (`pd.melt()` lo necesita). Además, la tabla resultante se ordena implícitamente por la columna `\"religión\"`. Para llegar al mismo orden que en el *paper*, la tabla *fundida* se ordena explícitamente." ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -441,7 +682,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -453,7 +694,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -462,7 +703,7 @@ "(180, 3)" ] }, - "execution_count": 9, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -473,7 +714,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -581,7 +822,7 @@ "9 Agnostic Don't know/Refused 96" ] }, - "execution_count": 10, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -594,23 +835,23 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Example 2: Billboard\n", + "## Ejemplo 2: Billboard\n", "\n", - "> Another common use of this data format is to record regularly spaced observations over time. For example, the Billboard dataset shown in Table 7 records the date a song first entered the Billboard Top 100. It has variables for **artist**, **track**, **date.entered**, **rank** and **week**. The rank in each week after it enters the top 100 is recorded in 75 columns, wk1 to wk75. If a song is in the Top 100 for less than 75 weeks the remaining columns are filled with missing values. This form of storage is not tidy, but it is useful for data entry. It reduces duplication since otherwise each song in each week would need its own row, and song metadata like title and artist would need to be repeated." + "> Otro uso común de este formato de datos es registrar observaciones espaciadas regularmente a lo largo del tiempo. Por ejemplo, el conjunto de datos de Billboard que se muestra en la Tabla 7, registra la fecha en que una canción ingresó por primera vez al Billboard Top 100. Tiene variables para **artist**, **track**, **date.entered**, **rank** y **week**. El rango en cada semana después de que ingresa al top 100 se registra en 75 columnas, `wk1` a `wk75`. Si una canción está en el Top 100 por menos de 75 semanas, las columnas restantes se llenan con valores faltantes. Esta forma de almacenamiento no es ordenada, pero es útil para la entrada de datos. Reduce la duplicación ya que, de lo contrario, cada canción de cada semana necesitaría su propia fila, y los metadatos de la canción, como el título y el artista, tendrían que repetirse.\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Load the Data\n", + "### Cargando la data\n", "\n", - "The data come in a CSV file with tediously named week columns." + "Los datos vienen en un archivo CSV con columnas con nombre de número de semana de una manera engorrosa." ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -657,12 +898,12 @@ "source": [ "### Messy Data\n", "\n", - "Again, the next cell shows the data as they were actually provided as \"raw\" data." + "De nuevo, la siguiente celda muestra los datos como si fueran realmente entregados como datos \"crudos\"." ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -671,7 +912,7 @@ "(267, 80)" ] }, - "execution_count": 12, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -682,7 +923,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -1015,7 +1256,7 @@ "[10 rows x 80 columns]" ] }, - "execution_count": 13, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -1030,12 +1271,12 @@ "source": [ "### \"Tidy\" Data\n", "\n", - "As before the `pd.melt()` function is used to transform the data from \"wide\" to \"long\" form." + "Como antes, el método `pd.melt()` se usa para transformar los datos desde un formato \"wide\" (\"ancho\") a uno \"largo\" (\"long\")." ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -1051,12 +1292,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "In contrast to R, pandas keeps (unneccesary) rows for weeks where the song was already out of the charts. These are discarded. Also, a new column`\"date\"` indicating when exactly a particular song was at a certain rank in the charts is added." + "A diferencia de `R`, `pandas` mantiene (innecesariamente según algunos) filas para semanas donde la canción ya estaba fuera del ranking. Estas observaciones son descartadas. También, una nueva columna `\"date\"` es añadida, indicando cuando exactamente una canción en particular estuvo en un cierto ranking." ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -1084,12 +1325,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Note that this dataset is not yet fully tidy as will be explained in notebook No. 4." + "Ten en cuenta que este conjunto de datos aún no está completamente `tidy`, como se explicará en el notebook 4." ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -1313,7 +1554,7 @@ "14 Kryptonite 2000-05-06 5 66 " ] }, - "execution_count": 16, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -1326,14 +1567,13 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Save the Data\n", - "\n", - "The above \"tidy\" billboard dataset is saved as input for notebook No. 4." + "### Guardando los datos\n", + "El dataset de bilboard ya \"ordenado\" (\"tidy\") es guardado como input para el notebook 4." ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ @@ -1343,7 +1583,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3.8.14 ('tidy')", "language": "python", "name": "python3" }, @@ -1357,7 +1597,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.9" + "version": "3.8.14" + }, + "vscode": { + "interpreter": { + "hash": "af7127df06252d69ce1b5fcf0f303ad2193973c1f3767a585df649c7fbdfe99b" + } } }, "nbformat": 4, diff --git a/2_multiple_variables_stored_in_one_column.ipynb b/2_multiples_variables_almacenadas_en_una_columna.ipynb similarity index 93% rename from 2_multiple_variables_stored_in_one_column.ipynb rename to 2_multiples_variables_almacenadas_en_una_columna.ipynb index 535b8c0da43f8a3ba9e9694c0e1265c2ccdd416f..61ee7cd0784fb2b189869e23bc81a4036f607bc6 100644 --- a/2_multiple_variables_stored_in_one_column.ipynb +++ b/2_multiples_variables_almacenadas_en_una_columna.ipynb @@ -4,9 +4,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Multiple Variables stored in one Column\n", + "# Múltiples variables almacenadas en una columna\n", "\n", - "This notebook shows how multiple variables stored in the same column can be isolated." + "Este cuaderno muestra cómo se pueden aislar varias variables almacenadas en la misma columna." ] }, { @@ -21,15 +21,6 @@ "execution_count": 1, "metadata": {}, "outputs": [], - "source": [ - "%load_ext lab_black" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], "source": [ "import pandas as pd" ] @@ -38,21 +29,21 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Example: Tuberculosis" + "## Ejemplo: Tuberculosis" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Load the Data\n", + "### Cargando la Data\n", "\n", - "Select the same columns as in the paper and name them accordingly." + "Seleccionamos las mismas columnas que en el *paper* y le asignamos el nombre correspondiente." ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -88,13 +79,14 @@ "metadata": {}, "source": [ "### Messy Data\n", + "Se asume que los datos se proporcionan de la siguiente manera. Excepto por las columnas `\"country\"` y `\"year\"`, las columnas restantes son en realidad realizaciones conjuntas de dos variables `\"sex\"` y `\"age\"`.\n", "\n", "The data are assumed to be provided as below. Except for the `\"country\"` and `\"year\"` columns, the remaining columns are actually joint realizations of two variables `\"sex\"` and `\"age\"`." ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -379,7 +371,7 @@ "265 NaN NaN NaN NaN 1.0 NaN NaN NaN " ] }, - "execution_count": 4, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -392,14 +384,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Molten Data\n", + "### *Molten* Data\n", "\n", - "As in the previous notebook the [pd.melt()](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.melt.html) function can be used to un-pivot the columns. As before, pandas keeps rows for columns with missing data that are discarded. Then, without any more missing values, the column's data type is casted as `int`. Furthermore, the resulting *molten* dataset is sorted as in the paper." + "Al igual que en el notebook anterior, la función [pd.melt()](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.melt.html) se puede usar para despivotear las columnas. Como antes, pandas mantiene las filas de las columnas con *missing data*, los que se descartan. Luego, sin más *missing data*, el tipo de datos de la columna se convierte como `int`. Luego, el conjunto de datos *fundido* resultante es ordenado como en el paper.\n" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -413,7 +405,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -532,7 +524,7 @@ "167 AE 2000 f2534 1" ] }, - "execution_count": 6, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -546,13 +538,12 @@ "metadata": {}, "source": [ "### Tidy Data\n", - "\n", - "Using the [pd.Series.str.extract()](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.str.extract.html) method the two variables are isolated. The age labels are renamed as in the paper." + "Usando el método [pd.Series.str.extract()](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.str.extract.html) las dos variables son aisladas/separadas (\"splitted\"). Las etiquetas de `age` se renombran como en el documento." ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -575,7 +566,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -705,7 +696,7 @@ "167 AE 2000 f 25-34 1" ] }, - "execution_count": 8, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -717,7 +708,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3.8.14 ('tidy')", "language": "python", "name": "python3" }, @@ -731,7 +722,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.9" + "version": "3.8.14" + }, + "vscode": { + "interpreter": { + "hash": "af7127df06252d69ce1b5fcf0f303ad2193973c1f3767a585df649c7fbdfe99b" + } } }, "nbformat": 4, diff --git a/3_variables_are_stored_in_both_rows_and_columns.ipynb b/3_variables_almacenadas_tanto_en_filas_y_en_columnas.ipynb similarity index 90% rename from 3_variables_are_stored_in_both_rows_and_columns.ipynb rename to 3_variables_almacenadas_tanto_en_filas_y_en_columnas.ipynb index 1c1d3d54438bb6163f677b2ae2fea6f8404840a6..4b60f7a804f2ab2feaf593bd9595f49bcba0ace7 100644 --- a/3_variables_are_stored_in_both_rows_and_columns.ipynb +++ b/3_variables_almacenadas_tanto_en_filas_y_en_columnas.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Variables are stored in both Rows and Columns" + "# Variables son almacenadas tanto en filas como en columnas" ] }, { @@ -19,15 +19,6 @@ "execution_count": 1, "metadata": {}, "outputs": [], - "source": [ - "%load_ext lab_black" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd" @@ -35,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -46,23 +37,23 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Example: Weather\n", + "## Ejemplo: Clima\n", "\n", - "The [Global Historical Climatology Network](https://www.ncdc.noaa.gov/data-access/land-based-station-data/land-based-datasets/global-historical-climatology-network-ghcn) collects daily weather. For this example, data for one weather station (MX17004) in Mexico are used." + "El [Global Historical Climatology Network](https://www.ncdc.noaa.gov/data-access/land-based-station-data/land-based-datasets/global-historical-climatology-network-ghcn) recolecta datos de clima de manera diaria. Para este ejemplo, se usan datos de una estación climática en Mexico (MX17004)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Load the Data\n", + "### Cargando datos\n", "\n", - "The raw dataset comes in a format that is a mixture of a fixed-width style with occasional usage of characters as seperators. Some tedious cleaning work is necessary." + "El raw dataset viene en un formato que es una mezcla de datos de ancho fijo con uso ocasional de caracteres como separadores. Se necesita hacer una limpieza al respecto." ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -70,7 +61,7 @@ "# use string slicing to obtain groups of columns.\n", "weather = pd.read_csv(\"data/weather.txt\", header=None, sep=\"^\")\n", "\n", - "# First, remove the weird character seperators,\n", + "# First, remove the weird character separators,\n", "# then split the columns by whitespace, and\n", "# finally name them appropriately.\n", "days = (\n", @@ -118,15 +109,15 @@ "source": [ "### Messy Data\n", "\n", - "Below is a dataset assumed to have been provided like this as \"raw\", i.e., the data analyst did not do the above parsing work but some third party instead.\n", + "A continuación hay un dataset que asumimos que ha sido proporcionado \"crudo\" como está al analista de datos, es decir, el analista no realizó el trabajo de \"parsing\" anterior, sino un tercero anteriormente.\n", "\n", - "> The most complicated form of messy data occurs when variables are stored in both rows and columns. Table 11 shows daily weather data from the Global Historical Climatology Network for one weather station (MX17004) in Mexico for five months in 2010. It has variables in\n", - "individual columns (`\"id\"`, `\"year\"`, `\"month\"`), spread across columns (day, `\"d1\"`–`\"d31\"`) and across rows (`\"tmin\"` and `\"tmax\"` for the minimum and maximum temperatures). Months with less than 31 days have missing values for the last day(s) of the month. The `\"element\"` column is not a variable: it stores the *names* of variables." + "> La forma más complicada de *messy data* ocurre cuando las variables se almacenan tanto en filas como en columnas. La Tabla 11 muestra datos meteorológicos diarios del Global Historical Climatology Network para una estación meteorológica (MX17004) en México durante cinco meses en 2010. Tiene variables en\n", + "columnas individuales (`\"id\"`, `\"year\"`, `\"month\"`), distribuidas en columnas (`day`, `\"d1\"`–`\"d31\"`) y en filas (`\"tmin\"` y `\"tmax\"` para las temperaturas mínima y máxima). Los meses con menos de 31 días tienen valores faltantes para los últimos días del mes. La columna `\"element\"` no es una variable: almacena los *nombres* de las variables." ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -610,7 +601,7 @@ "1108 NaN NaN NaN NaN 18.2 NaN NaN NaN NaN " ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -630,12 +621,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "> To tidy this dataset we first melt it with colvars `\"id\"`, `\"year\"`, `\"month\"`, and the column that contains the actual variable names, `\"element\"` [...]. For presentation, we have dropped the missing values, making them implicit rather than explicit. This is permissible because we know how many days are in each month and can easily reconstruct the explicit missing values." + "> Para hacer este dataset tidy, primero le hacemos un `melt` con colvars `\"id\"`, `\"year\"`, `\"month\"`, y la columna que contiene los nombres de las variables reales, `\"element\"` [...]. Para la presentación, hemos descartado los valores faltantes, haciéndolos implícitos en lugar de explícitos. Esto es permisible porque sabemos cuántos días hay en cada mes y podemos reconstruir fácilmente los valores faltantes explícitos." ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -659,12 +650,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "> This dataset is mostly tidy, but we have two variables stored in rows: `\"tmin\"` and `\"tmax\"`, the type of observation." + "> Este conjunto de datos está mayormente tidy, pero tenemos dos variables almacenadas en filas: `\"tmin\"` y `\"tmax\"`, el tipo de observación." ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -783,7 +774,7 @@ "23192 MX000017004 2010-02-23 tmin 10.7" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -803,14 +794,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "> Fixing this requires the cast, or unstack, operation. This performs the inverse of melting by rotating the element variable back out into the columns\n", + "> Arreglar esto requiere la operación `cast`(conversión) o `unstack`(desapilar). Esto realiza el inverso del \"fundido\" (melting), rotando la variable `element` de vuelta hacia las columnas.\n", "\n", - "Below, [pd.DataFrame.unstack()](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.unstack.html) uses a DataFrame's index as columns to unstack over." + "A continuación, [pd.DataFrame.unstack()](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.unstack.html) usa un DataFrame's index como columnas para poder desapilar.\n" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -826,12 +817,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "> This form is tidy. There is one variable in each column, and each row represents a day’s observations." + "> Esta forma está tidy. Hay una variable en cada columna, y cada fila representa la observación de un día." ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -950,7 +941,7 @@ "12096 MX000017004 2010-05-27 33.2 18.2" ] }, - "execution_count": 9, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -962,7 +953,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3.8.14 ('tidy')", "language": "python", "name": "python3" }, @@ -976,7 +967,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.9" + "version": "3.8.14" + }, + "vscode": { + "interpreter": { + "hash": "af7127df06252d69ce1b5fcf0f303ad2193973c1f3767a585df649c7fbdfe99b" + } } }, "nbformat": 4, diff --git a/4_multiple_types_in_one_table.ipynb b/4_multiples_tipos_de_observaciones_en_una_tabla.ipynb similarity index 92% rename from 4_multiple_types_in_one_table.ipynb rename to 4_multiples_tipos_de_observaciones_en_una_tabla.ipynb index 11c23ab3be196322fc7c8b3747c38abdced28968..1b0c5ce84f5845f95e49f5b52acd963674c74ad4 100644 --- a/4_multiple_types_in_one_table.ipynb +++ b/4_multiples_tipos_de_observaciones_en_una_tabla.ipynb @@ -4,9 +4,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Multiple Types in one Table\n", + "# Multiples Tipos en una Tabla\n", "\n", - "> Datasets often involve values collected at multiple levels, on different types of observational units. During tidying, each type of observational unit should be stored in its own table. This is closely related to the idea of database normalisation, where each fact is expressed in only one place. If this is not done, it’s possible for inconsistencies to occur." + ">Los datasets a menudo involucran valores recopilados en múltiples niveles, en diferentes tipos de unidades de observación. Durante el proceso de *tidying*, cada tipo de unidad de observación debe almacenarse en su propia tabla/dataframe. Esto está estrechamente relacionado con la idea de la normalización de la base de datos, donde cada hecho se expresa en un solo lugar. Si esto no se hace, es posible que ocurran inconsistencias." ] }, { @@ -21,15 +21,6 @@ "execution_count": 1, "metadata": {}, "outputs": [], - "source": [ - "%load_ext lab_black" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], "source": [ "import pandas as pd" ] @@ -38,21 +29,20 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Example: Billboard revisited" + "## Ejemplo: Billboard revisited" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Load the Data\n", - "\n", - "Load the cleaned and almost tidy dataset from notebook No. 1." + "### Cargando la data\n", + "Cargando el dataset limpio y casi \"tidy\" del notebook 1." ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -65,13 +55,13 @@ "source": [ "### Messy Data\n", "\n", - "> The Billboard dataset described in Table 8 actually contains observations on two types of\n", - "observational units: the **song** and its **rank** in each week. This manifests itself through the duplication of facts about the song: `\"artist\"` and `\"time\"` are repeated for every song in each `\"week\"`." + "> El conjunto de datos de Billboard descrito en la Tabla 8 en realidad contiene observaciones sobre dos tipos de\n", + "unidades de observación: la **song** y su **rank** en cada semana. Esto se manifiesta a través de la duplicación de \"hechos\" sobre la canción: `\"artist\"` y `\"time\"` se repiten para cada canción en cada `\"semana\"`." ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -295,7 +285,7 @@ "14 Kryptonite 2000-05-06 5 66 " ] }, - "execution_count": 4, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -310,14 +300,14 @@ "source": [ "### Tidy Data\n", "\n", - "> The billboard dataset needs to be broken down into two datasets: a **song** dataset which stores `\"artist\"`, `\"song name\"` and `\"time\"`, and a **ranking** dataset which gives the `\"rank\"` of the song in each `\"week\".\n", + "> El billboard dataset necesita dividirse en dos conjuntos de datos: un dataframe de **song**, que almacena `\"artist\"`, `\"song name\"` y `\"time\"`, y un conjunto de datos de **rank** que proporciona el `\"rank\"` de la canción en cada `\"week\"`.\n", "\n", - "Transforming data columns into index columns is enough in pandas to obtain unique `tuple`s from several columns. So, no real \"function\" is needed to tidy up the dataset." + "Transformar columnas de datos en columnas de índice es suficiente en pandas para obtener 'tuplas' únicas de varias columnas. Por lo tanto, no se necesita una \"función\" real para ordenar el conjunto de datos." ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -346,7 +336,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -514,7 +504,7 @@ "15 4:18 " ] }, - "execution_count": 6, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -525,7 +515,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -658,7 +648,7 @@ "3 2000-05-06 66" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -670,7 +660,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3.8.14 ('tidy')", "language": "python", "name": "python3" }, @@ -684,7 +674,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.9" + "version": "3.8.14" + }, + "vscode": { + "interpreter": { + "hash": "af7127df06252d69ce1b5fcf0f303ad2193973c1f3767a585df649c7fbdfe99b" + } } }, "nbformat": 4, diff --git a/5_one_type_in_multiple_tables.ipynb b/5_one_type_in_multiple_tables.ipynb deleted file mode 100644 index b6812ea8f7f24c6d9871e151101a4d42be9cf3f6..0000000000000000000000000000000000000000 --- a/5_one_type_in_multiple_tables.ipynb +++ /dev/null @@ -1,55 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# One Type in multiple Tables\n", - "\n", - "The repository with the original R code does not provide code for this case but only refers to other projects that cannot be replicated any more (because the source website is *not* available any more)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Messy Data\n", - "\n", - "> It’s also common to find data values about a single type of observational unit spread out over multiple tables or files. These tables and files are often split up by another variable, so that each represents a single year, person, or location." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Tidy Data\n", - "\n", - "> As long as the format for individual records is consistent, this is an easy problem to fix:\n", - "1. Read the files into a list of tables.\n", - "2. For each table, add a new column that records the original file name (because the file name is often the value of an important variable).\n", - "3. Combine all tables into a single table" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.9" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/5_un_tipo_de_obs_en_multiples_tablas.ipynb b/5_un_tipo_de_obs_en_multiples_tablas.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..0fa087fc33bdefb78567864113287bfd4a7f9d12 --- /dev/null +++ b/5_un_tipo_de_obs_en_multiples_tablas.ipynb @@ -0,0 +1,61 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Un tipo en múltiples tablas\n", + "\n", + "El repositorio con el código original en `R` no incluye código para este caso, sino solo se refiere a otros proyectos que no pueden ser replicados ya que el sitio web donde estaban los datos no está disponible actualmente." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Messy Data\n", + "\n", + "> Es también bastante común encontrar datos sobre un solo tipo de unidad de observación, distribuidos en varias tablas o archivos. Estas tablas y archivos a menudo se dividen por otra variable, de modo que cada uno representa un solo año, persona o ubicación." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tidy Data\n", + "> Siempre que el formato de los registros individuales sea consistente, este es un problema fácil de solucionar:\n", + "1. Se leen los archivos en una lista de tablas.\n", + "2. Para cada tabla, se agregue una nueva columna que registre el nombre del archivo original (porque el nombre del archivo suele ser el valor de una variable importante).\n", + "3. Se combinan todas las tablas en una sola tabla, usualmente usando `pd.merge()`. \n", + "\n", + "**El dolor de cabeza viene cuando los registros no son consistentes, problema ampliamente extendido en muchos lugares, donde un caso clásico son los datos de observaciones sin un número único de identificación, recolectados por distintas entidades de gobierno, y donde el caso paradigmático que ha llevado a un sinfin de problemas son los registros de personas en EEUU, debido a la falta de un número único de identificación.**" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.8.14 ('tidy')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.14" + }, + "vscode": { + "interpreter": { + "hash": "af7127df06252d69ce1b5fcf0f303ad2193973c1f3767a585df649c7fbdfe99b" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/6_case_study.ipynb b/6_case_study.ipynb deleted file mode 100644 index fb2b3b9ddf80376adc62f232eb2bb1ca4b0d86a3..0000000000000000000000000000000000000000 --- a/6_case_study.ipynb +++ /dev/null @@ -1,1000 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Case Study: Unusual Deaths in Mexico\n", - "\n", - "> The following case study illustrates how tidy data and tidy tools make data analysis easier by easing the transitions between manipulation, visualisation and modelling. You will not see any code that exists solely to get the output of one function into the right format to input to another.\n", - "\n", - "> The case study uses individual-level mortality data from Mexico. The goal is to find causes of death with unusual temporal patterns within a day." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## \"Housekeeping\"" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext lab_black" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/webartifex/repos/tidy-data/.venv/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:11: FutureWarning: pandas.core.index is deprecated and will be removed in a future version. The public classes are available in the top-level namespace.\n", - " from pandas.core.index import Index as PandasIndex\n" - ] - } - ], - "source": [ - "import math\n", - "import textwrap\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "import seaborn as sns\n", - "from matplotlib import pyplot as plt\n", - "from rpy2 import robjects # leads to a FutureWarning that can be safely ignored\n", - "from rpy2.robjects import pandas2ri\n", - "from sklearn.linear_model import HuberRegressor" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib inline" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "sns.set()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Load the Data" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "deaths = pandas2ri.ri2py(robjects.r[\"readRDS\"](\"data/deaths.rds\"))\n", - "deaths = deaths[(deaths[\"yod\"] == 2008) & (deaths[\"mod\"] != 0) & (deaths[\"dod\"] != 0)]\n", - "deaths = deaths[~(deaths[\"hod\"] < 0)]\n", - "deaths = deaths.reset_index(drop=True)\n", - "\n", - "assert set(deaths[\"hod\"].unique()) <= set(range(24))" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(502520, 5)" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "deaths.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
yodmoddodhodcod
02008111B20
12008111B22
22008111C18
32008111C34
42008111C50
\n", - "
" - ], - "text/plain": [ - " yod mod dod hod cod\n", - "0 2008 1 1 1 B20\n", - "1 2008 1 1 1 B22\n", - "2 2008 1 1 1 C18\n", - "3 2008 1 1 1 C34\n", - "4 2008 1 1 1 C50" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "deaths.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "# The file contains 7 duplicates that are discarded.\n", - "codes = pd.read_csv(\"data/icd-main.csv\")\n", - "codes = codes[(codes[\"code\"] != codes[\"code\"].shift())].set_index(\"code\")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(1851, 1)" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "codes.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
disease
code
A00Cholera
A01Typhoid and paratyphoid fevers
A02Other salmonella infections
A03Shigellosis
A04Other bacterial intestinal infections
\n", - "
" - ], - "text/plain": [ - " disease\n", - "code \n", - "A00 Cholera\n", - "A01 Typhoid and paratyphoid fevers\n", - "A02 Other salmonella infections\n", - "A03 Shigellosis\n", - "A04 Other bacterial intestinal infections" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "codes.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Counts\n", - "\n", - "Count the number of deaths by `\"hod\"` (=\"hour of the day\") and `\"cod\"` (=\"cause of death\"), and also join in the more descriptive labels for the various causes." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
hodcodfreqdisease
01A013Typhoid and paratyphoid fevers
11A023Other salmonella infections
21A047Other bacterial intestinal infections
31A051Other bacterial foodborne intoxications, not e...
41A062Amebiasis
\n", - "
" - ], - "text/plain": [ - " hod cod freq disease\n", - "0 1 A01 3 Typhoid and paratyphoid fevers\n", - "1 1 A02 3 Other salmonella infections\n", - "2 1 A04 7 Other bacterial intestinal infections\n", - "3 1 A05 1 Other bacterial foodborne intoxications, not e...\n", - "4 1 A06 2 Amebiasis" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "counts = (\n", - " pd.DataFrame(deaths.groupby([\"hod\", \"cod\"]).size(), columns=[\"freq\"])\n", - " .reset_index()\n", - " .join(codes, on=\"cod\")\n", - ")\n", - "# This is to ensure that no duplicates are created\n", - "# because of duplicate entries in the codes DataFrame.\n", - "assert counts[\"cod\"].value_counts().max() <= 24\n", - "\n", - "# Keep only causes where a death happened in every hour.\n", - "counts = counts[counts[\"cod\"].isin(list((counts[\"cod\"].value_counts() == 24).index))]\n", - "\n", - "counts.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Add a `\"prop\"` (=\"proportion\") column indicating the relative frequency of a given cause of death on an hourly basis." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
hodcodfreqdiseaseprop
01A013Typhoid and paratyphoid fevers0.062500
11A023Other salmonella infections0.048387
21A047Other bacterial intestinal infections0.051095
31A051Other bacterial foodborne intoxications, not e...0.050000
41A062Amebiasis0.024390
\n", - "
" - ], - "text/plain": [ - " hod cod freq disease prop\n", - "0 1 A01 3 Typhoid and paratyphoid fevers 0.062500\n", - "1 1 A02 3 Other salmonella infections 0.048387\n", - "2 1 A04 7 Other bacterial intestinal infections 0.051095\n", - "3 1 A05 1 Other bacterial foodborne intoxications, not e... 0.050000\n", - "4 1 A06 2 Amebiasis 0.024390" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "counts = counts.set_index(\"cod\")\n", - "counts[\"prop\"] = counts[\"freq\"] / deaths.groupby([\"cod\"]).size().reindex(counts.index)\n", - "counts = counts.reset_index()\n", - "# Re-order the columns as in the paper.\n", - "counts = counts[[\"hod\", \"cod\", \"freq\", \"disease\", \"prop\"]]\n", - "\n", - "counts.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Add `\"freq_all\"` and `\"prop_all\"` columns that show the absolute number of deaths for a given hour of day (disregarding cause of death) and the proportion of deaths for a certain hour of day with respect to the whole day." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
hodcodfreqdiseasepropfreq_allprop_all
01A013Typhoid and paratyphoid fevers0.062500200380.039875
11A023Other salmonella infections0.048387200380.039875
21A047Other bacterial intestinal infections0.051095200380.039875
31A051Other bacterial foodborne intoxications, not e...0.050000200380.039875
41A062Amebiasis0.024390200380.039875
\n", - "
" - ], - "text/plain": [ - " hod cod freq disease \\\n", - "0 1 A01 3 Typhoid and paratyphoid fevers \n", - "1 1 A02 3 Other salmonella infections \n", - "2 1 A04 7 Other bacterial intestinal infections \n", - "3 1 A05 1 Other bacterial foodborne intoxications, not e... \n", - "4 1 A06 2 Amebiasis \n", - "\n", - " prop freq_all prop_all \n", - "0 0.062500 20038 0.039875 \n", - "1 0.048387 20038 0.039875 \n", - "2 0.051095 20038 0.039875 \n", - "3 0.050000 20038 0.039875 \n", - "4 0.024390 20038 0.039875 " - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "counts = counts.set_index(\"hod\")\n", - "counts[\"freq_all\"] = deaths.groupby(\"hod\").size()\n", - "counts[\"prop_all\"] = counts[\"freq_all\"] / deaths.shape[0]\n", - "counts = counts.reset_index()\n", - "\n", - "counts.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Distance between temporal Patterns\n", - "\n", - "> Next we compute a distance between the temporal pattern of each cause of death and the overall temporal pattern. There are many ways to measure this distance, but I found a simple mean squared deviation to be revealing. We also record the sample size, the total number of deaths from that cause. To ensure that the diseases we consider are sufficiently representative we’ll only work with diseases with more than 50 total deaths (∼2/hour)." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
diseasendist
A02Other salmonella infections620.000738
A04Other bacterial intestinal infections1370.000208
A06Amebiasis820.000405
A09Diarrhea and gastroenteritis of infectious origin30160.000028
A16Respiratory tuberculosis, not confirmed bacter...16420.000029
\n", - "
" - ], - "text/plain": [ - " disease n dist\n", - "A02 Other salmonella infections 62 0.000738\n", - "A04 Other bacterial intestinal infections 137 0.000208\n", - "A06 Amebiasis 82 0.000405\n", - "A09 Diarrhea and gastroenteritis of infectious origin 3016 0.000028\n", - "A16 Respiratory tuberculosis, not confirmed bacter... 1642 0.000029" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "devi = (\n", - " codes.join(deaths.groupby(\"cod\").count()[\"yod\"].to_frame(), how=\"inner\")\n", - " .join(\n", - " counts.groupby(\"cod\")\n", - " .apply(lambda x: ((x[\"prop\"] - x[\"prop_all\"]) ** 2).mean())\n", - " .to_frame(),\n", - " how=\"inner\",\n", - " )\n", - " .rename(columns={\"yod\": \"n\", 0: \"dist\"})\n", - ")\n", - "devi = devi[(devi[\"n\"] > 50)]\n", - "\n", - "devi.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Plot `\"dist\"` vs. `\"n\"`. Not a whole lot can be seen here." - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "_, ax = plt.subplots(figsize=(4, 4))\n", - "ax.set_xlim(0, 50000)\n", - "ax.set_ylim(0, 0.006)\n", - "sns.regplot(x=\"n\", y=\"dist\", data=devi, ax=ax, fit_reg=False, scatter_kws={\"s\": 1})" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The relationship becomes more obvious if one plots the same points on a `\"log\"`-`\"log\"` scale." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "_, ax = plt.subplots(figsize=(4, 4))\n", - "ax.set_xscale(\"log\")\n", - "ax.set_yscale(\"log\")\n", - "ax.set_xlim(30, 150000)\n", - "ax.set_ylim(0.00001, 0.1)\n", - "sns.regplot(\"n\", \"dist\", data=devi, ax=ax, fit_reg=False, scatter_kws={\"s\": 1})" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "> We are interested in points that have high y-values, relative to their x-neighbours. Controlling for the number of deaths, these points represent the diseases which depart the most from the overall pattern. To find these unusual points, we fit a robust linear model and plot the residuals, Figure 3. The plot shows an empty region around a residual of 1.5. So somewhat arbitrarily, we’ll select those diseases with a residual greater than 1.5." - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "# Note that the HuberRegressor is not the exact\n", - "# same method as in the paper but close.\n", - "X = np.log(devi[\"n\"]).values[:, np.newaxis]\n", - "y = np.log(devi[\"dist\"]).values\n", - "rlm = HuberRegressor()\n", - "rlm.fit(X, y)\n", - "devi[\"residuals\"] = y - rlm.predict(X)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Plot the threshold for \"unusual\" deaths, set arbitrarily at 1.5." - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "_, ax = plt.subplots(figsize=(4, 4))\n", - "ax.set_xscale(\"log\")\n", - "ax.set_xlim(50, 200000)\n", - "ax.set_ylim(-1, 3)\n", - "sns.regplot(\"n\", \"residuals\", data=devi, ax=ax, fit_reg=False, scatter_kws={\"s\": 1})\n", - "ax.hlines(1.5, 0, 200000)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "> Finally, we plot the temporal course for each unusual cause, Figure 4. We split the diseases into two plots because of differences in variability. The top plot shows diseases with over 350 deaths and the bottom with under 350. The causes of death fall into three main groups: murder, drowning, and transportation related. Murder is more common at night, drowning in the afternoon, and transportation related deaths during commute times. The pale gray line in the background shows the temporal course across all diseases." - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Get all temporarily \"unusual\" deaths.\n", - "unusual = devi.loc[(devi[\"residuals\"] > 1.5), [\"disease\", \"n\"]].sort_values(\"disease\")\n", - "# Helper dataset for easy indexing / value retrieval.\n", - "plot_data = counts[[\"cod\", \"hod\", \"prop\", \"prop_all\"]].set_index(\"cod\")\n", - "# Divide the plots in two big categories.\n", - "for header, cond, ylim in [\n", - " (\"> 350 Deaths / Year\", (unusual[\"n\"] > 350), 0.125),\n", - " (\"< 350 Deaths / Year\", (unusual[\"n\"] <= 350), 0.3),\n", - "]:\n", - " nrows = math.ceil(len(unusual[cond]) / 3)\n", - " fig = plt.figure(figsize=(16, 12),)\n", - " for i, (cod, (disease, _)) in enumerate(unusual[cond].iterrows(), 1):\n", - " ax = fig.add_subplot(nrows, 3, i)\n", - " ax.set_title(\"\\n\".join(textwrap.wrap(disease, 40)))\n", - " ax.set_xlim(0, 24)\n", - " ax.set_ylim(0, ylim)\n", - " ax.plot(plot_data.loc[cod, \"hod\"], plot_data.loc[cod, \"prop\"])\n", - " ax.plot(plot_data.loc[cod, \"hod\"], plot_data.loc[cod, \"prop_all\"])\n", - " # Show only lower and left axes.\n", - " if i not in (3 * nrows - 2, 3 * nrows - 1, 3 * nrows):\n", - " plt.setp(ax.get_xticklabels(), visible=False)\n", - " if i % 3 != 1:\n", - " plt.setp(ax.get_yticklabels(), visible=False)\n", - " fig.suptitle(header, fontsize=20)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.9" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/6_estudio_de_caso.ipynb b/6_estudio_de_caso.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..d052eaa26e9a2cebf41be9425bfaf333b818e4d5 --- /dev/null +++ b/6_estudio_de_caso.ipynb @@ -0,0 +1,1046 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Estudio de caso: muertes inusuales en México\n", + "\n", + "> El siguiente estudio de caso ilustra cómo los datos *tidy* y las *tidy tools* facilitan el análisis de datos al facilitar las transiciones entre la manipulación, la visualización y el modelado. No verás ningún código que exista únicamente para obtener el output de una función en el formato correcto para que sea el input para otra.\n", + "\n", + "> El estudio de caso utiliza datos de mortalidad a nivel individual de México. El objetivo es encontrar causas de muerte con patrones temporales inusuales dentro de un día.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## \"Housekeeping\"" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "import math\n", + "import textwrap" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "from matplotlib import pyplot as plt\n", + "from sklearn.linear_model import HuberRegressor" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "sns.set()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load the Data" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "odict_keys([None])\n", + " yod mod dod hod cod\n", + "0 0 0 0 NaN E14\n", + "1 0 0 0 NaN E46\n", + "2 0 0 0 NaN I21\n", + "3 0 0 0 NaN K70\n", + "4 0 0 0 NaN P21\n" + ] + } + ], + "source": [ + "\n", + "import pyreadr\n", + "\n", + "deaths = pyreadr.read_r('data/deaths.rds')\n", + "\n", + "# done! let's see what we got\n", + "print(deaths.keys()) # let's check what objects we got: there is only None\n", + "deaths = deaths[None] # extract the pandas data frame for the only object available\n", + "print(deaths.head())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,\n", + " 20, 21, 22, 23, nan], dtype=object)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "deaths = deaths[(deaths[\"yod\"] == 2008) & (deaths[\"mod\"] != 0) & (deaths[\"dod\"] != 0)]\n", + "deaths = deaths[~(deaths[\"hod\"] < 0)]\n", + "deaths = deaths.reset_index(drop=True)\n", + "\n", + "deaths.hod.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(527429, 5)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "deaths.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yodmoddodhodcod
02008111B20
12008111B22
22008111C18
32008111C34
42008111C50
\n", + "
" + ], + "text/plain": [ + " yod mod dod hod cod\n", + "0 2008 1 1 1 B20\n", + "1 2008 1 1 1 B22\n", + "2 2008 1 1 1 C18\n", + "3 2008 1 1 1 C34\n", + "4 2008 1 1 1 C50" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "deaths.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# The file contains 7 duplicates that are discarded.\n", + "codes = pd.read_csv(\"data/icd-main.csv\")\n", + "codes = codes[(codes[\"code\"] != codes[\"code\"].shift())].set_index(\"code\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1851, 1)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "codes.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
disease
code
A00Cholera
A01Typhoid and paratyphoid fevers
A02Other salmonella infections
A03Shigellosis
A04Other bacterial intestinal infections
\n", + "
" + ], + "text/plain": [ + " disease\n", + "code \n", + "A00 Cholera\n", + "A01 Typhoid and paratyphoid fevers\n", + "A02 Other salmonella infections\n", + "A03 Shigellosis\n", + "A04 Other bacterial intestinal infections" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "codes.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Counts\n", + "\n", + "Cuenta el número de muertos por `\"hod\"` (=\"hour of the day\") y `\"cod\"` (=\"cause of death\"), y también hace un `join` con las etiquetas que son más descriptivas para las varias causas." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
hodcodfreqdisease
01A013Typhoid and paratyphoid fevers
11A023Other salmonella infections
21A047Other bacterial intestinal infections
31A051Other bacterial foodborne intoxications, not e...
41A062Amebiasis
\n", + "
" + ], + "text/plain": [ + " hod cod freq disease\n", + "0 1 A01 3 Typhoid and paratyphoid fevers\n", + "1 1 A02 3 Other salmonella infections\n", + "2 1 A04 7 Other bacterial intestinal infections\n", + "3 1 A05 1 Other bacterial foodborne intoxications, not e...\n", + "4 1 A06 2 Amebiasis" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "counts = (\n", + " pd.DataFrame(deaths.groupby([\"hod\", \"cod\"]).size(), columns=[\"freq\"])\n", + " .reset_index()\n", + " .join(codes, on=\"cod\")\n", + ")\n", + "# This is to ensure that no duplicates are created\n", + "# because of duplicate entries in the codes DataFrame.\n", + "assert counts[\"cod\"].value_counts().max() <= 24\n", + "\n", + "# Keep only causes where a death happened in every hour.\n", + "counts = counts[counts[\"cod\"].isin(list((counts[\"cod\"].value_counts() == 24).index))]\n", + "\n", + "counts.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Añade una columna `\"prop\"` (=\"proportion\") indicando la frecuencia relativa de una determinada causa de muerte por horas." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
hodcodfreqdiseaseprop
01A013Typhoid and paratyphoid fevers0.062500
11A023Other salmonella infections0.046875
21A047Other bacterial intestinal infections0.048951
31A051Other bacterial foodborne intoxications, not e...0.050000
41A062Amebiasis0.022989
\n", + "
" + ], + "text/plain": [ + " hod cod freq disease prop\n", + "0 1 A01 3 Typhoid and paratyphoid fevers 0.062500\n", + "1 1 A02 3 Other salmonella infections 0.046875\n", + "2 1 A04 7 Other bacterial intestinal infections 0.048951\n", + "3 1 A05 1 Other bacterial foodborne intoxications, not e... 0.050000\n", + "4 1 A06 2 Amebiasis 0.022989" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "counts = counts.set_index(\"cod\")\n", + "counts[\"prop\"] = counts[\"freq\"] / deaths.groupby([\"cod\"]).size().reindex(counts.index)\n", + "counts = counts.reset_index()\n", + "# Re-order the columns as in the paper.\n", + "counts = counts[[\"hod\", \"cod\", \"freq\", \"disease\", \"prop\"]]\n", + "\n", + "counts.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Añade las columnas `\"freq_all\"` y `\"prop_all\"`, que muestran el número absoluto de muertes para una hora determinada del día (sin tener en cuenta la causa de la muerte), y la proporción de muertes de una determinada hora del día con respecto al día completo." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
hodcodfreqdiseasepropfreq_allprop_all
01A013Typhoid and paratyphoid fevers0.062500200380.037992
11A023Other salmonella infections0.046875200380.037992
21A047Other bacterial intestinal infections0.048951200380.037992
31A051Other bacterial foodborne intoxications, not e...0.050000200380.037992
41A062Amebiasis0.022989200380.037992
\n", + "
" + ], + "text/plain": [ + " hod cod freq disease \\\n", + "0 1 A01 3 Typhoid and paratyphoid fevers \n", + "1 1 A02 3 Other salmonella infections \n", + "2 1 A04 7 Other bacterial intestinal infections \n", + "3 1 A05 1 Other bacterial foodborne intoxications, not e... \n", + "4 1 A06 2 Amebiasis \n", + "\n", + " prop freq_all prop_all \n", + "0 0.062500 20038 0.037992 \n", + "1 0.046875 20038 0.037992 \n", + "2 0.048951 20038 0.037992 \n", + "3 0.050000 20038 0.037992 \n", + "4 0.022989 20038 0.037992 " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "counts = counts.set_index(\"hod\")\n", + "counts[\"freq_all\"] = deaths.groupby(\"hod\").size()\n", + "counts[\"prop_all\"] = counts[\"freq_all\"] / deaths.shape[0]\n", + "counts = counts.reset_index()\n", + "\n", + "counts.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Distancia entre patrones temporales\n", + "\n", + "> A continuación calculamos una distancia entre el patrón temporal de cada causa de muerte y el patrón temporal general. Hay muchas maneras de medir esta distancia, pero encontré que una simple desviación cuadrática media es reveladora. También registramos el tamaño de la muestra, el número total de muertes por esa causa. Para asegurarnos de que las enfermedades que consideramos sean suficientemente representativas, solo trabajaremos con enfermedades con más de 50 muertes totales (∼2/hora)." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
diseasendist
A02Other salmonella infections640.000692
A04Other bacterial intestinal infections1430.000191
A06Amebiasis870.000360
A09Diarrhea and gastroenteritis of infectious origin31140.000027
A16Respiratory tuberculosis, not confirmed bacter...17090.000027
\n", + "
" + ], + "text/plain": [ + " disease n dist\n", + "A02 Other salmonella infections 64 0.000692\n", + "A04 Other bacterial intestinal infections 143 0.000191\n", + "A06 Amebiasis 87 0.000360\n", + "A09 Diarrhea and gastroenteritis of infectious origin 3114 0.000027\n", + "A16 Respiratory tuberculosis, not confirmed bacter... 1709 0.000027" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "devi = (\n", + " codes.join(deaths.groupby(\"cod\").count()[\"yod\"].to_frame(), how=\"inner\")\n", + " .join(\n", + " counts.groupby(\"cod\")\n", + " .apply(lambda x: ((x[\"prop\"] - x[\"prop_all\"]) ** 2).mean())\n", + " .to_frame(),\n", + " how=\"inner\",\n", + " )\n", + " .rename(columns={\"yod\": \"n\", 0: \"dist\"})\n", + ")\n", + "devi = devi[(devi[\"n\"] > 50)]\n", + "\n", + "devi.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Grafica `\"dist\"` vs. `\"n\"`. No se aprecia mucho en este gráfico." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "_, ax = plt.subplots(figsize=(8, 8))\n", + "ax.set_xlim(0, 50000)\n", + "ax.set_ylim(0, 0.006)\n", + "sns.regplot(x=\"n\", y=\"dist\", data=devi, ax=ax, fit_reg=False, scatter_kws={\"s\": 1})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "La relación se vuelve más obvia si uno grafica los mimos puntos en una escala `\"log\"`-`\"log\"`." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "_, ax = plt.subplots(figsize=(4, 4))\n", + "ax.set_xscale(\"log\")\n", + "ax.set_yscale(\"log\")\n", + "ax.set_xlim(30, 150000)\n", + "ax.set_ylim(0.00001, 0.1)\n", + "sns.regplot(x=\"n\", y=\"dist\", data=devi, ax=ax, fit_reg=False, scatter_kws={\"s\": 1})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> Estamos interesados en puntos que tienen valores de `y` altos, en relación con sus vecinos de `x`. \n", + "> Controlando por el número de muertes, estos puntos representan las enfermedades que más se apartan del patrón general. \n", + "> Para encontrar estos puntos inusuales, ajustamos un modelo lineal robusto y graficamos los residuos, Figura 3. \n", + "> El gráfico muestra una región vacía alrededor de un residuo de 1,5. \n", + "> Entonces, de manera un tanto arbitraria, seleccionaremos aquellas enfermedades con un residuo mayor a 1.5.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# Note that the HuberRegressor is not the exact\n", + "# same method as in the paper but close.\n", + "X = np.log(devi[\"n\"]).values[:, np.newaxis]\n", + "y = np.log(devi[\"dist\"]).values\n", + "rlm = HuberRegressor()\n", + "rlm.fit(X, y)\n", + "devi[\"residuals\"] = y - rlm.predict(X)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Graficamos el umbral de muertes \"inusuales\", establecido arbitrariamente en 1.5" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "_, ax = plt.subplots(figsize=(8, 8))\n", + "ax.set_xscale(\"log\")\n", + "ax.set_xlim(50, 200000)\n", + "ax.set_ylim(-1, 3)\n", + "sns.regplot(x=\"n\", y = \"residuals\", data=devi, ax=ax, fit_reg=False, scatter_kws={\"s\": 1})\n", + "ax.hlines(1.5, 0, 200000)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> Finalmente, graficamos el curso temporal de cada causa inusual, Figura 4. \n", + "> Dividimos las enfermedades en dos gráficos debido a las diferencias en la variabilidad. \n", + "> La gráfica superior muestra enfermedades con más de 350 muertes y la inferior con menos de 350. \n", + "> Las causas de muerte se dividen en tres grupos principales: las relacionadas con asesinato, con ahogamiento y con transporte. \n", + "> Los asesinatos son más comunes por la noche, los ahogamientos por la tarde y las muertes relacionadas con el transporte durante los viajes diarios. \n", + "> La línea naranja en el fondo muestra el curso temporal de todas las enfermedades." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Get all temporarily \"unusual\" deaths.\n", + "unusual = devi.loc[(devi[\"residuals\"] > 1.5), [\"disease\", \"n\"]].sort_values(\"disease\")\n", + "# Helper dataset for easy indexing / value retrieval.\n", + "plot_data = counts[[\"cod\", \"hod\", \"prop\", \"prop_all\"]].set_index(\"cod\")\n", + "# Divide the plots in two big categories.\n", + "for header, cond, ylim in [\n", + " (\"> 350 Deaths / Year\", (unusual[\"n\"] > 350), 0.125),\n", + " (\"< 350 Deaths / Year\", (unusual[\"n\"] <= 350), 0.3),\n", + "]:\n", + " nrows = math.ceil(len(unusual[cond]) / 3)\n", + " fig = plt.figure(figsize=(16, 12),)\n", + " for i, (cod, (disease, _)) in enumerate(unusual[cond].iterrows(), 1):\n", + " ax = fig.add_subplot(nrows, 3, i)\n", + " ax.set_title(\"\\n\".join(textwrap.wrap(disease, 40)))\n", + " ax.set_xlim(0, 24)\n", + " ax.set_ylim(0, ylim)\n", + " ax.plot(plot_data.loc[cod, \"hod\"], plot_data.loc[cod, \"prop\"])\n", + " ax.plot(plot_data.loc[cod, \"hod\"], plot_data.loc[cod, \"prop_all\"])\n", + " # Show only lower and left axes.\n", + " if i not in (3 * nrows - 2, 3 * nrows - 1, 3 * nrows):\n", + " plt.setp(ax.get_xticklabels(), visible=False)\n", + " if i % 3 != 1:\n", + " plt.setp(ax.get_yticklabels(), visible=False)\n", + " fig.suptitle(header, fontsize=20)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.8.14 ('tidy')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.14" + }, + "vscode": { + "interpreter": { + "hash": "af7127df06252d69ce1b5fcf0f303ad2193973c1f3767a585df649c7fbdfe99b" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/README.md b/README.md index a336f9f5c33437aba3a4537cdf40ce10c6f686e5..d6835fca7dffa6e304524c1b37c2d9ed2090ae8a 100644 --- a/README.md +++ b/README.md @@ -1,70 +1,44 @@ # Tidy Data -The purpose of this repository is to illustrate how the data cleaning process described - in the paper "[Tidy Data](tidy-data.pdf)" by Hadley Wickham, a member of the - [RStudio](https://rstudio.com/) team, can be done in - [Python](https://www.python.org/). +El propósito de este repositorio es ilustrar cómo se puede realizar en Python (con pandas) el proceso de limpieza de datos descrito en el artículo "Tidy Data" de Hadley Wickham. Este repositorio fue traducido al español del siguiente repositorio: https://github.com/webartifex/tidy-data -The paper was published in 2014 in the [Journal of Statistical Software](https://www.jstatsoft.org/article/view/v059i10). -The author offers it for free [here](http://vita.had.co.nz/papers/tidy-data.html). -Furthermore, the original [R](https://www.r-project.org/) code is available [here](https://github.com/hadley/tidy-data). +## Resumen -After installing the dependencies for this project (cf., the [installation notes](https://github.com/webartifex/tidy-data#installation) - below), it is recommended to first read the paper to get the big picture and - then work through the six Jupyter notebooks listed below. +### Definición -## Summary +**Tidy data** se definen como datos que vienen en forma de tabla que se adhieren a los siguientes requisitos: +1. cada variable es una columna, +2. cada observación una fila, y +3. cada tipo de unidad de observación forma una tabla. - -### Definition - -**Tidy** data is defined as data that comes in a table form adhering to the - following requirements: -1. each variable is a column, -2. each observation a row, and -3. each type of observational unit forms a table. - -This is equivalent to [Codd's 3rd normal form](https://en.wikipedia.org/wiki/Third_normal_form), - a concept from the theory on relational databases. -A dataset that does *not* satisfy these properties is called **messy**. +Esto es equivalente a [la tercera forma normal de Codd] (https://en.wikipedia.org/wiki/Third_normal_form), un concepto de la teoría sobre bases de datos relacionales. Un conjunto de datos que *no* satisface estas propiedades se llama **messy data**. ### Tidying Data -The five most common problems with messy data are: - -- column headers are values, not variable names - (cf., [notebook 1](https://nbviewer.jupyter.org/github/webartifex/tidy-data/blob/master/1_column_headers_are_values.ipynb)) -- multiple variables are stored in one column - (cf., [notebook 2](https://nbviewer.jupyter.org/github/webartifex/tidy-data/blob/master/2_multiple_variables_stored_in_one_column.ipynb)) -- variables are stored in both rows and columns - (cf., [notebook 3](https://nbviewer.jupyter.org/github/webartifex/tidy-data/blob/master/3_variables_are_stored_in_both_rows_and_columns.ipynb)) -- multiple types of observational units are stored in the same table - (cf., [notebook 4](https://nbviewer.jupyter.org/github/webartifex/tidy-data/blob/master/4_multiple_types_in_one_table.ipynb)) -- a single observational unit is stored in multiple tables - (cf., [notebook 5](https://nbviewer.jupyter.org/github/webartifex/tidy-data/blob/master/5_one_type_in_multiple_tables.ipynb)) - +Los cinco problemas más comunes con los datos desordenados son: -### Case Study +1. Los encabezados de las columnas son valores, no nombres de variables. +2. Múltiples variables se almacenan en una columna +3. Las variables se almacenan tanto en filas como en columnas +4. Múltiples tipos de unidades de observación se almacenan en la misma tabla +5. Una sola unidad de observación se almacena en varias tablas -A case study (cf., [notebook 6](https://nbviewer.jupyter.org/github/webartifex/tidy-data/blob/master/6_case_study.ipynb)) - shows the advantages of tidy data as a standardized input to statistical functions. +### Estudio de caso -## Installation +Un estudio de caso muestra las ventajas de **tidy data** como un input estandarizado para funciones estadísticas y de visualización. -Get a local copy of this repository with [git](https://git-scm.com/). +## Instalación -`git clone https://github.com/webartifex/tidy-data.git` +Obten una copia local de este repositorio con [git](https://git-scm.com/). -If you are not familiar with [git](https://git-scm.com/), simply download the latest - version of the files in a zip archive [here](https://github.com/webartifex/tidy-data/archive/master.zip). +`https://github.com/vmlandae/tidy-data-mds-espanol.git` -This project uses [poetry](https://python-poetry.org/docs/) to manage its dependencies. -Install all third-party packages into a [virtual environment](https://docs.python.org/3/library/venv.html). +En esta versión actualizada, este proyecto usa [conda](https://docs.conda.io/en/latest/) para manejar sus dependencias. +El archivo `environment.yml` tiene las dependencias -`poetry install` +```python -Alternatively, use the [Anaconda Distribution](https://www.anaconda.com/products/individual) - that *should* also suffice to run the provided notebooks. +``` diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000000000000000000000000000000000000..e15e7f9883ee41fd75b1ab8684788ac2bff781b6 --- /dev/null +++ b/environment.yml @@ -0,0 +1,164 @@ +name: tidy +channels: + - defaults + - conda-forge +dependencies: + - _r-mutex=1.0.0=anacondar_1 + - appnope=0.1.2=py38hecd8cb5_1001 + - asttokens=2.0.5=pyhd3eb1b0_0 + - backcall=0.2.0=pyhd3eb1b0_0 + - bottleneck=1.3.5=py38h67323c0_0 + - brotli=1.0.9=hca72f7f_7 + - brotli-bin=1.0.9=hca72f7f_7 + - bwidget=1.9.11=1 + - bzip2=1.0.8=h1de35cc_0 + - c-ares=1.18.1=hca72f7f_0 + - ca-certificates=2022.9.24=h033912b_0 + - cairo=1.16.0=h691a603_2 + - cctools_osx-64=949.0.1=hc7db93f_25 + - certifi=2022.9.24=pyhd8ed1ab_0 + - cffi=1.15.1=py38hc55c11b_0 + - clang=14.0.6=hecd8cb5_0 + - clang-14=14.0.6=default_h32c6d10_0 + - clang_osx-64=14.0.6=hb1e4b1b_0 + - clangxx=14.0.6=default_h32c6d10_0 + - clangxx_osx-64=14.0.6=hd8b9576_0 + - compiler-rt=14.0.6=hda8b6b8_0 + - compiler-rt_osx-64=14.0.6=h8d5cb93_0 + - curl=7.85.0=hca72f7f_0 + - cycler=0.11.0=pyhd3eb1b0_0 + - debugpy=1.5.1=py38he9d5cce_0 + - decorator=5.1.1=pyhd3eb1b0_0 + - entrypoints=0.4=py38hecd8cb5_0 + - executing=0.8.3=pyhd3eb1b0_0 + - expat=2.4.9=he9d5cce_0 + - font-ttf-dejavu-sans-mono=2.37=hd3eb1b0_0 + - font-ttf-inconsolata=2.001=hcb22688_0 + - font-ttf-source-code-pro=2.030=hd3eb1b0_0 + - font-ttf-ubuntu=0.83=h8b1ccd4_0 + - fontconfig=2.14.1=h5bb23bf_0 + - fonts-anaconda=1=h8fa9717_0 + - fonts-conda-ecosystem=1=hd3eb1b0_0 + - fonttools=4.25.0=pyhd3eb1b0_0 + - freetype=2.12.1=hd8bbffd_0 + - fribidi=1.0.10=haf1e3a3_0 + - gettext=0.21.1=h8a4c099_0 + - gfortran_impl_osx-64=11.3.0=h29cdc64_28 + - gfortran_osx-64=11.3.0=h96634ac_0 + - giflib=5.2.1=haf1e3a3_0 + - glib=2.74.1=hbc0c0cd_0 + - glib-tools=2.74.1=hbc0c0cd_0 + - gmp=6.2.1=he9d5cce_3 + - graphite2=1.3.14=he9d5cce_1 + - gsl=2.7.1=hdbe807d_1 + - harfbuzz=5.3.0=h08f8713_0 + - icu=70.1=h96cf925_0 + - ipykernel=6.15.2=py38hecd8cb5_0 + - ipython=8.6.0=py38hecd8cb5_0 + - isl=0.22.1=he9d5cce_3 + - jedi=0.18.1=py38hecd8cb5_1 + - jinja2=3.1.2=py38hecd8cb5_0 + - joblib=1.1.1=py38hecd8cb5_0 + - jpeg=9e=hca72f7f_0 + - jupyter_client=7.4.7=py38hecd8cb5_0 + - jupyter_core=4.11.2=py38hecd8cb5_0 + - kiwisolver=1.4.2=py38he9d5cce_0 + - krb5=1.19.3=hb49756b_0 + - lcms2=2.12=hf1fd2bf_0 + - ld64_osx-64=530=h70f3046_25 + - ldid=2.1.2=h2d21305_2 + - lerc=3.0=he9d5cce_0 + - libblas=3.9.0=16_osx64_openblas + - libbrotlicommon=1.0.9=hca72f7f_7 + - libbrotlidec=1.0.9=hca72f7f_7 + - libbrotlienc=1.0.9=hca72f7f_7 + - libcblas=3.9.0=16_osx64_openblas + - libclang-cpp14=14.0.6=default_h32c6d10_0 + - libcurl=7.85.0=h6dfd666_0 + - libcxx=14.0.6=h9765a3e_0 + - libdeflate=1.8=h9ed2024_5 + - libedit=3.1.20210910=hca72f7f_0 + - libev=4.33=h9ed2024_1 + - libffi=3.4.2=hecd8cb5_5 + - libgfortran=5.0.0=11_3_0_hecd8cb5_28 + - libgfortran-devel_osx-64=11.3.0=h9dfd629_28 + - libgfortran5=11.3.0=h9dfd629_28 + - libglib=2.74.1=h3ba3332_0 + - libiconv=1.17=hac89ed1_0 + - liblapack=3.9.0=16_osx64_openblas + - libllvm14=14.0.6=h5b596cc_1 + - libnghttp2=1.46.0=ha29bfda_0 + - libopenblas=0.3.21=h54e7dc3_0 + - libpng=1.6.39=ha978bb4_0 + - libsodium=1.0.18=h1de35cc_0 + - libsqlite=3.40.0=ha978bb4_0 + - libssh2=1.10.0=h0a4fc7d_0 + - libtiff=4.4.0=h2cd0358_2 + - libwebp=1.2.4=h56c3ce4_0 + - libwebp-base=1.2.4=hca72f7f_0 + - libxml2=2.10.3=hb9e07b5_0 + - libzlib=1.2.13=hfd90126_4 + - llvm-openmp=14.0.6=h0dcd299_0 + - llvm-tools=14.0.6=h5b596cc_1 + - lz4-c=1.9.3=h23ab428_1 + - make=4.2.1=h3efe00b_1 + - markupsafe=2.1.1=py38hca72f7f_0 + - matplotlib=3.5.3=py38hecd8cb5_0 + - matplotlib-base=3.5.3=py38hfb0c5b7_0 + - matplotlib-inline=0.1.6=py38hecd8cb5_0 + - mpc=1.1.0=h6ef4df4_1 + - mpfr=4.0.2=h9066e36_1 + - munkres=1.1.4=py_0 + - ncurses=6.3=hca72f7f_3 + - nest-asyncio=1.5.5=py38hecd8cb5_0 + - numexpr=2.8.3=py38hec72209_1 + - numpy=1.23.5=py38hc2f29e8_0 + - openssl=1.1.1s=hfd90126_0 + - packaging=21.3=pyhd3eb1b0_0 + - pandas=1.5.1=py38h07fba90_0 + - pango=1.50.12=h7fca291_0 + - parso=0.8.3=pyhd3eb1b0_0 + - pcre2=10.37=he7042d7_1 + - pexpect=4.8.0=pyhd3eb1b0_3 + - pickleshare=0.7.5=pyhd3eb1b0_1003 + - pillow=9.2.0=py38hde71d04_1 + - pip=22.2.2=py38hecd8cb5_0 + - pixman=0.40.0=h9ed2024_1 + - prompt-toolkit=3.0.20=pyhd3eb1b0_0 + - psutil=5.9.0=py38hca72f7f_0 + - ptyprocess=0.7.0=pyhd3eb1b0_2 + - pure_eval=0.2.2=pyhd3eb1b0_0 + - pycparser=2.21=pyhd3eb1b0_0 + - pygments=2.11.2=pyhd3eb1b0_0 + - pyparsing=3.0.9=py38hecd8cb5_0 + - pyreadr=0.4.7=py38h748759a_2 + - pyreadstat=1.2.0=py38h01a1b83_1 + - python=3.8.14=hc915b28_0_cpython + - python-dateutil=2.8.2=pyhd3eb1b0_0 + - python_abi=3.8=3_cp38 + - pytz=2022.1=py38hecd8cb5_0 + - pyzmq=23.2.0=py38he9d5cce_0 + - r-base=4.1.3=he54549f_3 + - readline=8.2=hca72f7f_0 + - rpy2=3.5.6=py38r41hbd87e4b_0 + - scikit-learn=1.1.3=py38he9d5cce_0 + - scipy=1.9.3=py38hfb8b963_2 + - seaborn=0.12.0=py38hecd8cb5_0 + - setuptools=65.5.0=py38hecd8cb5_0 + - simplegeneric=0.8.1=py38_2 + - six=1.16.0=pyhd3eb1b0_1 + - stack_data=0.2.0=pyhd3eb1b0_0 + - tapi=1000.10.8=ha1b3eb9_0 + - threadpoolctl=2.2.0=pyh0d69192_0 + - tk=8.6.12=h5d9f67b_0 + - tktable=2.10=h1de35cc_0 + - tornado=6.2=py38hca72f7f_0 + - traitlets=5.1.1=pyhd3eb1b0_0 + - tzlocal=2.1=py38hecd8cb5_1 + - wcwidth=0.2.5=pyhd3eb1b0_0 + - wheel=0.37.1=pyhd3eb1b0_0 + - xz=5.2.6=hca72f7f_0 + - zeromq=4.3.4=h23ab428_0 + - zlib=1.2.13=hfd90126_4 + - zstd=1.5.2=hcb37349_0 +prefix: /Users/vmlandae/opt/anaconda3/envs/tidy diff --git a/environment_from_history.yml b/environment_from_history.yml new file mode 100644 index 0000000000000000000000000000000000000000..281f76e45a1b3e3a7597ebc55d8954d8afc88f43 --- /dev/null +++ b/environment_from_history.yml @@ -0,0 +1,63 @@ +name: tidy +channels: + - defaults + - conda-forge +dependencies: + - python=3.8 + - matplotlib + - seaborn + - pandas + - pyreadstat + - rpy2 + - scikit-learn + - readline + - ptyprocess + - executing + - pyparsing + - tornado + - zlib + - ca-certificates + - libsqlite + - pyzmq + - pip + - ncurses + - pure_eval + - jedi + - wheel + - entrypoints + - prompt-toolkit + - setuptools + - libcxx + - zeromq + - six + - traitlets + - openssl + - decorator + - ipython + - xz + - pexpect + - appnope + - backcall + - python-dateutil + - pygments + - stack_data + - jupyter_client + - libsodium + - libffi + - wcwidth + - jupyter_core + - matplotlib-inline + - nest-asyncio + - psutil + - asttokens + - packaging + - tk + - pickleshare + - certifi + - bzip2 + - parso + - debugpy + - libzlib + - ipykernel + - pyreadr +prefix: /Users/vmlandae/opt/anaconda3/envs/tidy diff --git a/tidy-data.pdf b/paper/tidy-data.pdf similarity index 100% rename from tidy-data.pdf rename to paper/tidy-data.pdf diff --git a/poetry.lock b/poetry.lock deleted file mode 100644 index 1c09045d5dd99d96a3e29ef8092345948bf77947..0000000000000000000000000000000000000000 --- a/poetry.lock +++ /dev/null @@ -1,1558 +0,0 @@ -[[package]] -category = "main" -description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." -name = "appdirs" -optional = false -python-versions = "*" -version = "1.4.4" - -[[package]] -category = "main" -description = "Disable App Nap on OS X 10.9" -marker = "sys_platform == \"darwin\" or platform_system == \"Darwin\"" -name = "appnope" -optional = false -python-versions = "*" -version = "0.1.0" - -[[package]] -category = "main" -description = "The secure Argon2 password hashing algorithm." -name = "argon2-cffi" -optional = false -python-versions = "*" -version = "20.1.0" - -[package.dependencies] -cffi = ">=1.0.0" -six = "*" - -[package.extras] -dev = ["coverage (>=5.0.2)", "hypothesis", "pytest", "sphinx", "wheel", "pre-commit"] -docs = ["sphinx"] -tests = ["coverage (>=5.0.2)", "hypothesis", "pytest"] - -[[package]] -category = "main" -description = "Classes Without Boilerplate" -name = "attrs" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -version = "20.1.0" - -[package.extras] -dev = ["coverage (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "zope.interface", "sphinx", "sphinx-rtd-theme", "pre-commit"] -docs = ["sphinx", "sphinx-rtd-theme", "zope.interface"] -tests = ["coverage (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "zope.interface"] - -[[package]] -category = "main" -description = "Specifications for callback functions passed in to an API" -name = "backcall" -optional = false -python-versions = "*" -version = "0.2.0" - -[[package]] -category = "main" -description = "The uncompromising code formatter." -name = "black" -optional = false -python-versions = ">=3.6" -version = "19.10b0" - -[package.dependencies] -appdirs = "*" -attrs = ">=18.1.0" -click = ">=6.5" -pathspec = ">=0.6,<1" -regex = "*" -toml = ">=0.9.4" -typed-ast = ">=1.4.0" - -[package.extras] -d = ["aiohttp (>=3.3.2)", "aiohttp-cors"] - -[[package]] -category = "main" -description = "An easy safelist-based HTML-sanitizing tool." -name = "bleach" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -version = "3.1.5" - -[package.dependencies] -packaging = "*" -six = ">=1.9.0" -webencodings = "*" - -[[package]] -category = "main" -description = "Python package for providing Mozilla's CA Bundle." -name = "certifi" -optional = false -python-versions = "*" -version = "2020.6.20" - -[[package]] -category = "main" -description = "Foreign Function Interface for Python calling C code." -name = "cffi" -optional = false -python-versions = "*" -version = "1.14.2" - -[package.dependencies] -pycparser = "*" - -[[package]] -category = "main" -description = "Universal encoding detector for Python 2 and 3" -name = "chardet" -optional = false -python-versions = "*" -version = "3.0.4" - -[[package]] -category = "main" -description = "Composable command line interface toolkit" -name = "click" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -version = "7.1.2" - -[[package]] -category = "main" -description = "Cross-platform colored terminal text." -marker = "sys_platform == \"win32\"" -name = "colorama" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -version = "0.4.3" - -[[package]] -category = "main" -description = "Composable style cycles" -name = "cycler" -optional = false -python-versions = "*" -version = "0.10.0" - -[package.dependencies] -six = "*" - -[[package]] -category = "main" -description = "Decorators for Humans" -name = "decorator" -optional = false -python-versions = ">=2.6, !=3.0.*, !=3.1.*" -version = "4.4.2" - -[[package]] -category = "main" -description = "XML bomb protection for Python stdlib modules" -name = "defusedxml" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -version = "0.6.0" - -[[package]] -category = "main" -description = "Discover and load entry points from installed packages." -name = "entrypoints" -optional = false -python-versions = ">=2.7" -version = "0.3" - -[[package]] -category = "main" -description = "Internationalized Domain Names in Applications (IDNA)" -name = "idna" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -version = "2.10" - -[[package]] -category = "main" -description = "Read metadata from Python packages" -marker = "python_version < \"3.8\"" -name = "importlib-metadata" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" -version = "1.7.0" - -[package.dependencies] -zipp = ">=0.5" - -[package.extras] -docs = ["sphinx", "rst.linker"] -testing = ["packaging", "pep517", "importlib-resources (>=1.3)"] - -[[package]] -category = "main" -description = "IPython Kernel for Jupyter" -name = "ipykernel" -optional = false -python-versions = ">=3.5" -version = "5.3.4" - -[package.dependencies] -appnope = "*" -ipython = ">=5.0.0" -jupyter-client = "*" -tornado = ">=4.2" -traitlets = ">=4.1.0" - -[package.extras] -test = ["pytest (!=5.3.4)", "pytest-cov", "flaky", "nose"] - -[[package]] -category = "main" -description = "IPython: Productive Interactive Computing" -name = "ipython" -optional = false -python-versions = ">=3.7" -version = "7.17.0" - -[package.dependencies] -appnope = "*" -backcall = "*" -colorama = "*" -decorator = "*" -jedi = ">=0.10" -pexpect = "*" -pickleshare = "*" -prompt-toolkit = ">=2.0.0,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.1.0" -pygments = "*" -setuptools = ">=18.5" -traitlets = ">=4.2" - -[package.extras] -all = ["Sphinx (>=1.3)", "ipykernel", "ipyparallel", "ipywidgets", "nbconvert", "nbformat", "nose (>=0.10.1)", "notebook", "numpy (>=1.14)", "pygments", "qtconsole", "requests", "testpath"] -doc = ["Sphinx (>=1.3)"] -kernel = ["ipykernel"] -nbconvert = ["nbconvert"] -nbformat = ["nbformat"] -notebook = ["notebook", "ipywidgets"] -parallel = ["ipyparallel"] -qtconsole = ["qtconsole"] -test = ["nose (>=0.10.1)", "requests", "testpath", "pygments", "nbformat", "ipykernel", "numpy (>=1.14)"] - -[[package]] -category = "main" -description = "Vestigial utilities from IPython" -name = "ipython-genutils" -optional = false -python-versions = "*" -version = "0.2.0" - -[[package]] -category = "main" -description = "An autocompletion tool for Python that can be used for text editors." -name = "jedi" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -version = "0.17.2" - -[package.dependencies] -parso = ">=0.7.0,<0.8.0" - -[package.extras] -qa = ["flake8 (3.7.9)"] -testing = ["Django (<3.1)", "colorama", "docopt", "pytest (>=3.9.0,<5.0.0)"] - -[[package]] -category = "main" -description = "A very fast and expressive template engine." -name = "jinja2" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -version = "2.11.2" - -[package.dependencies] -MarkupSafe = ">=0.23" - -[package.extras] -i18n = ["Babel (>=0.8)"] - -[[package]] -category = "main" -description = "Lightweight pipelining: using Python functions as pipeline jobs." -name = "joblib" -optional = false -python-versions = ">=3.6" -version = "0.16.0" - -[[package]] -category = "main" -description = "A Python implementation of the JSON5 data format." -name = "json5" -optional = false -python-versions = "*" -version = "0.9.5" - -[package.extras] -dev = ["hypothesis"] - -[[package]] -category = "main" -description = "An implementation of JSON Schema validation for Python" -name = "jsonschema" -optional = false -python-versions = "*" -version = "3.2.0" - -[package.dependencies] -attrs = ">=17.4.0" -pyrsistent = ">=0.14.0" -setuptools = "*" -six = ">=1.11.0" - -[package.dependencies.importlib-metadata] -python = "<3.8" -version = "*" - -[package.extras] -format = ["idna", "jsonpointer (>1.13)", "rfc3987", "strict-rfc3339", "webcolors"] -format_nongpl = ["idna", "jsonpointer (>1.13)", "webcolors", "rfc3986-validator (>0.1.0)", "rfc3339-validator"] - -[[package]] -category = "main" -description = "Jupyter protocol implementation and client libraries" -name = "jupyter-client" -optional = false -python-versions = ">=3.5" -version = "6.1.7" - -[package.dependencies] -jupyter-core = ">=4.6.0" -python-dateutil = ">=2.1" -pyzmq = ">=13" -tornado = ">=4.1" -traitlets = "*" - -[package.extras] -test = ["ipykernel", "ipython", "mock", "pytest", "pytest-asyncio", "async-generator", "pytest-timeout"] - -[[package]] -category = "main" -description = "Jupyter core package. A base package on which Jupyter projects rely." -name = "jupyter-core" -optional = false -python-versions = "!=3.0,!=3.1,!=3.2,!=3.3,!=3.4,>=2.7" -version = "4.6.3" - -[package.dependencies] -pywin32 = ">=1.0" -traitlets = "*" - -[[package]] -category = "main" -description = "The JupyterLab notebook server extension." -name = "jupyterlab" -optional = false -python-versions = ">=3.5" -version = "2.2.6" - -[package.dependencies] -jinja2 = ">=2.10" -jupyterlab-server = ">=1.1.5,<2.0" -notebook = ">=4.3.1" -tornado = "<6.0.0 || >6.0.0,<6.0.1 || >6.0.1,<6.0.2 || >6.0.2" - -[package.extras] -docs = ["jsx-lexer", "recommonmark", "sphinx", "sphinx-rtd-theme", "sphinx-copybutton"] -test = ["pytest", "pytest-check-links", "requests", "wheel", "virtualenv"] - -[[package]] -category = "main" -description = "JupyterLab Server" -name = "jupyterlab-server" -optional = false -python-versions = ">=3.5" -version = "1.2.0" - -[package.dependencies] -jinja2 = ">=2.10" -json5 = "*" -jsonschema = ">=3.0.1" -notebook = ">=4.2.0" -requests = "*" - -[package.extras] -test = ["pytest", "requests"] - -[[package]] -category = "main" -description = "A fast implementation of the Cassowary constraint solver" -name = "kiwisolver" -optional = false -python-versions = ">=3.6" -version = "1.2.0" - -[[package]] -category = "main" -description = "Safely add untrusted strings to HTML/XML markup." -name = "markupsafe" -optional = false -python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*" -version = "1.1.1" - -[[package]] -category = "main" -description = "Python plotting package" -name = "matplotlib" -optional = false -python-versions = ">=3.6" -version = "3.3.1" - -[package.dependencies] -certifi = ">=2020.06.20" -cycler = ">=0.10" -kiwisolver = ">=1.0.1" -numpy = ">=1.15" -pillow = ">=6.2.0" -pyparsing = ">=2.0.3,<2.0.4 || >2.0.4,<2.1.2 || >2.1.2,<2.1.6 || >2.1.6" -python-dateutil = ">=2.1" - -[[package]] -category = "main" -description = "The fastest markdown parser in pure Python" -name = "mistune" -optional = false -python-versions = "*" -version = "0.8.4" - -[[package]] -category = "main" -description = "A simple extension for Jupyter Notebook and Jupyter Lab to beautify Python code automatically using Black." -name = "nb-black" -optional = false -python-versions = "*" -version = "1.0.7" - -[package.dependencies] -ipython = "*" - -[[package]] -category = "main" -description = "Converting Jupyter Notebooks" -name = "nbconvert" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -version = "5.6.1" - -[package.dependencies] -bleach = "*" -defusedxml = "*" -entrypoints = ">=0.2.2" -jinja2 = ">=2.4" -jupyter-core = "*" -mistune = ">=0.8.1,<2" -nbformat = ">=4.4" -pandocfilters = ">=1.4.1" -pygments = "*" -testpath = "*" -traitlets = ">=4.2" - -[package.extras] -all = ["pytest", "pytest-cov", "ipykernel", "jupyter-client (>=5.3.1)", "ipywidgets (>=7)", "pebble", "tornado (>=4.0)", "sphinx (>=1.5.1)", "sphinx-rtd-theme", "nbsphinx (>=0.2.12)", "sphinxcontrib-github-alt", "ipython", "mock"] -docs = ["sphinx (>=1.5.1)", "sphinx-rtd-theme", "nbsphinx (>=0.2.12)", "sphinxcontrib-github-alt", "ipython", "jupyter-client (>=5.3.1)"] -execute = ["jupyter-client (>=5.3.1)"] -serve = ["tornado (>=4.0)"] -test = ["pytest", "pytest-cov", "ipykernel", "jupyter-client (>=5.3.1)", "ipywidgets (>=7)", "pebble", "mock"] - -[[package]] -category = "main" -description = "The Jupyter Notebook format" -name = "nbformat" -optional = false -python-versions = ">=3.5" -version = "5.0.7" - -[package.dependencies] -ipython-genutils = "*" -jsonschema = ">=2.4,<2.5.0 || >2.5.0" -jupyter-core = "*" -traitlets = ">=4.1" - -[package.extras] -test = ["pytest", "pytest-cov", "testpath"] - -[[package]] -category = "main" -description = "A web-based notebook environment for interactive computing" -name = "notebook" -optional = false -python-versions = ">=3.5" -version = "6.1.3" - -[package.dependencies] -Send2Trash = "*" -argon2-cffi = "*" -ipykernel = "*" -ipython-genutils = "*" -jinja2 = "*" -jupyter-client = ">=5.3.4" -jupyter-core = ">=4.6.1" -nbconvert = "*" -nbformat = "*" -prometheus-client = "*" -pyzmq = ">=17" -terminado = ">=0.8.3" -tornado = ">=5.0" -traitlets = ">=4.2.1" - -[package.extras] -docs = ["sphinx", "nbsphinx", "sphinxcontrib-github-alt"] -test = ["nose", "coverage", "requests", "nose-warnings-filters", "nbval", "nose-exclude", "selenium", "pytest", "pytest-cov", "requests-unixsocket"] - -[[package]] -category = "main" -description = "NumPy is the fundamental package for array computing with Python." -name = "numpy" -optional = false -python-versions = ">=3.6" -version = "1.19.1" - -[[package]] -category = "main" -description = "Core utilities for Python packages" -name = "packaging" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -version = "20.4" - -[package.dependencies] -pyparsing = ">=2.0.2" -six = "*" - -[[package]] -category = "main" -description = "Powerful data structures for data analysis, time series, and statistics" -name = "pandas" -optional = false -python-versions = ">=3.6.1" -version = "1.1.1" - -[package.dependencies] -numpy = ">=1.15.4" -python-dateutil = ">=2.7.3" -pytz = ">=2017.2" - -[package.extras] -test = ["pytest (>=4.0.2)", "pytest-xdist", "hypothesis (>=3.58)"] - -[[package]] -category = "main" -description = "Utilities for writing pandoc filters in python" -name = "pandocfilters" -optional = false -python-versions = "*" -version = "1.4.2" - -[[package]] -category = "main" -description = "A Python Parser" -name = "parso" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -version = "0.7.1" - -[package.extras] -testing = ["docopt", "pytest (>=3.0.7)"] - -[[package]] -category = "main" -description = "Utility library for gitignore style pattern matching of file paths." -name = "pathspec" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -version = "0.8.0" - -[[package]] -category = "main" -description = "Pexpect allows easy control of interactive console applications." -marker = "sys_platform != \"win32\"" -name = "pexpect" -optional = false -python-versions = "*" -version = "4.8.0" - -[package.dependencies] -ptyprocess = ">=0.5" - -[[package]] -category = "main" -description = "Tiny 'shelve'-like database with concurrency support" -name = "pickleshare" -optional = false -python-versions = "*" -version = "0.7.5" - -[[package]] -category = "main" -description = "Python Imaging Library (Fork)" -name = "pillow" -optional = false -python-versions = ">=3.5" -version = "7.2.0" - -[[package]] -category = "main" -description = "Python client for the Prometheus monitoring system." -name = "prometheus-client" -optional = false -python-versions = "*" -version = "0.8.0" - -[package.extras] -twisted = ["twisted"] - -[[package]] -category = "main" -description = "Library for building powerful interactive command lines in Python" -name = "prompt-toolkit" -optional = false -python-versions = ">=3.6.1" -version = "3.0.6" - -[package.dependencies] -wcwidth = "*" - -[[package]] -category = "main" -description = "Run a subprocess in a pseudo terminal" -marker = "sys_platform != \"win32\" or os_name != \"nt\"" -name = "ptyprocess" -optional = false -python-versions = "*" -version = "0.6.0" - -[[package]] -category = "main" -description = "C parser in Python" -name = "pycparser" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -version = "2.20" - -[[package]] -category = "main" -description = "Pygments is a syntax highlighting package written in Python." -name = "pygments" -optional = false -python-versions = ">=3.5" -version = "2.6.1" - -[[package]] -category = "main" -description = "Python parsing module" -name = "pyparsing" -optional = false -python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" -version = "2.4.7" - -[[package]] -category = "main" -description = "Persistent/Functional/Immutable data structures" -name = "pyrsistent" -optional = false -python-versions = "*" -version = "0.16.0" - -[package.dependencies] -six = "*" - -[[package]] -category = "main" -description = "Extensions to the standard Python datetime module" -name = "python-dateutil" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" -version = "2.8.1" - -[package.dependencies] -six = ">=1.5" - -[[package]] -category = "main" -description = "World timezone definitions, modern and historical" -name = "pytz" -optional = false -python-versions = "*" -version = "2020.1" - -[[package]] -category = "main" -description = "Python for Window Extensions" -marker = "sys_platform == \"win32\"" -name = "pywin32" -optional = false -python-versions = "*" -version = "228" - -[[package]] -category = "main" -description = "Python bindings for the winpty library" -marker = "os_name == \"nt\"" -name = "pywinpty" -optional = false -python-versions = "*" -version = "0.5.7" - -[[package]] -category = "main" -description = "Python bindings for 0MQ" -name = "pyzmq" -optional = false -python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*" -version = "19.0.2" - -[[package]] -category = "main" -description = "Alternative regular expression module, to replace re." -name = "regex" -optional = false -python-versions = "*" -version = "2020.7.14" - -[[package]] -category = "main" -description = "Python HTTP for Humans." -name = "requests" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -version = "2.24.0" - -[package.dependencies] -certifi = ">=2017.4.17" -chardet = ">=3.0.2,<4" -idna = ">=2.5,<3" -urllib3 = ">=1.21.1,<1.25.0 || >1.25.0,<1.25.1 || >1.25.1,<1.26" - -[package.extras] -security = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)"] -socks = ["PySocks (>=1.5.6,<1.5.7 || >1.5.7)", "win-inet-pton"] - -[[package]] -category = "main" -description = "Python interface to the R language (embedded R)" -name = "rpy2" -optional = false -python-versions = "*" -version = "2.8.6" - -[package.dependencies] -six = "*" - -[[package]] -category = "main" -description = "Read and write SPSS files" -name = "savreaderwriter" -optional = false -python-versions = "*" -version = "3.4.2" - -[package.extras] -Cython = ["cython"] -numpy = ["numpy"] - -[[package]] -category = "main" -description = "A set of python modules for machine learning and data mining" -name = "scikit-learn" -optional = false -python-versions = ">=3.6" -version = "0.23.2" - -[package.dependencies] -joblib = ">=0.11" -numpy = ">=1.13.3" -scipy = ">=0.19.1" -threadpoolctl = ">=2.0.0" - -[package.extras] -alldeps = ["numpy (>=1.13.3)", "scipy (>=0.19.1)"] - -[[package]] -category = "main" -description = "SciPy: Scientific Library for Python" -name = "scipy" -optional = false -python-versions = ">=3.6" -version = "1.5.2" - -[package.dependencies] -numpy = ">=1.14.5" - -[[package]] -category = "main" -description = "seaborn: statistical data visualization" -name = "seaborn" -optional = false -python-versions = ">=3.6" -version = "0.10.1" - -[package.dependencies] -matplotlib = ">=2.1.2" -numpy = ">=1.13.3" -pandas = ">=0.22.0" -scipy = ">=1.0.1" - -[[package]] -category = "main" -description = "Send file to trash natively under Mac OS X, Windows and Linux." -name = "send2trash" -optional = false -python-versions = "*" -version = "1.5.0" - -[[package]] -category = "main" -description = "Python 2 and 3 compatibility utilities" -name = "six" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" -version = "1.15.0" - -[[package]] -category = "main" -description = "A set of python modules for machine learning and data mining" -name = "sklearn" -optional = false -python-versions = "*" -version = "0.0" - -[package.dependencies] -scikit-learn = "*" - -[[package]] -category = "main" -description = "Terminals served to xterm.js using Tornado websockets" -name = "terminado" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -version = "0.8.3" - -[package.dependencies] -ptyprocess = "*" -pywinpty = ">=0.5" -tornado = ">=4" - -[[package]] -category = "main" -description = "Test utilities for code working with files and commands" -name = "testpath" -optional = false -python-versions = "*" -version = "0.4.4" - -[package.extras] -test = ["pathlib2"] - -[[package]] -category = "main" -description = "threadpoolctl" -name = "threadpoolctl" -optional = false -python-versions = ">=3.5" -version = "2.1.0" - -[[package]] -category = "main" -description = "Python Library for Tom's Obvious, Minimal Language" -name = "toml" -optional = false -python-versions = "*" -version = "0.10.1" - -[[package]] -category = "main" -description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." -name = "tornado" -optional = false -python-versions = ">= 3.5" -version = "6.0.4" - -[[package]] -category = "main" -description = "Traitlets Python config system" -name = "traitlets" -optional = false -python-versions = "*" -version = "4.3.3" - -[package.dependencies] -decorator = "*" -ipython-genutils = "*" -six = "*" - -[package.extras] -test = ["pytest", "mock"] - -[[package]] -category = "main" -description = "a fork of Python 2 and 3 ast modules with type comment support" -name = "typed-ast" -optional = false -python-versions = "*" -version = "1.4.1" - -[[package]] -category = "main" -description = "HTTP library with thread-safe connection pooling, file post, and more." -name = "urllib3" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4" -version = "1.25.10" - -[package.extras] -brotli = ["brotlipy (>=0.6.0)"] -secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "pyOpenSSL (>=0.14)", "ipaddress"] -socks = ["PySocks (>=1.5.6,<1.5.7 || >1.5.7,<2.0)"] - -[[package]] -category = "main" -description = "Measures the displayed width of unicode strings in a terminal" -name = "wcwidth" -optional = false -python-versions = "*" -version = "0.2.5" - -[[package]] -category = "main" -description = "Character encoding aliases for legacy web content" -name = "webencodings" -optional = false -python-versions = "*" -version = "0.5.1" - -[[package]] -category = "main" -description = "Backport of pathlib-compatible object wrapper for zip files" -marker = "python_version < \"3.8\"" -name = "zipp" -optional = false -python-versions = ">=3.6" -version = "3.1.0" - -[package.extras] -docs = ["sphinx", "jaraco.packaging (>=3.2)", "rst.linker (>=1.9)"] -testing = ["jaraco.itertools", "func-timeout"] - -[metadata] -content-hash = "d86824def3f779f19f9b5a77c8f619f224dfb208ac1eb7609828e0f4819463f7" -lock-version = "1.0" -python-versions = "^3.7" - -[metadata.files] -appdirs = [ - {file = "appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128"}, - {file = "appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41"}, -] -appnope = [ - {file = "appnope-0.1.0-py2.py3-none-any.whl", hash = "sha256:5b26757dc6f79a3b7dc9fab95359328d5747fcb2409d331ea66d0272b90ab2a0"}, - {file = "appnope-0.1.0.tar.gz", hash = "sha256:8b995ffe925347a2138d7ac0fe77155e4311a0ea6d6da4f5128fe4b3cbe5ed71"}, -] -argon2-cffi = [ - {file = "argon2-cffi-20.1.0.tar.gz", hash = "sha256:d8029b2d3e4b4cea770e9e5a0104dd8fa185c1724a0f01528ae4826a6d25f97d"}, - {file = "argon2_cffi-20.1.0-cp27-cp27m-macosx_10_6_intel.whl", hash = "sha256:6ea92c980586931a816d61e4faf6c192b4abce89aa767ff6581e6ddc985ed003"}, - {file = "argon2_cffi-20.1.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:05a8ac07c7026542377e38389638a8a1e9b78f1cd8439cd7493b39f08dd75fbf"}, - {file = "argon2_cffi-20.1.0-cp27-cp27m-win32.whl", hash = "sha256:0bf066bc049332489bb2d75f69216416329d9dc65deee127152caeb16e5ce7d5"}, - {file = "argon2_cffi-20.1.0-cp27-cp27m-win_amd64.whl", hash = "sha256:57358570592c46c420300ec94f2ff3b32cbccd10d38bdc12dc6979c4a8484fbc"}, - {file = "argon2_cffi-20.1.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:7d455c802727710e9dfa69b74ccaab04568386ca17b0ad36350b622cd34606fe"}, - {file = "argon2_cffi-20.1.0-cp35-abi3-manylinux1_x86_64.whl", hash = "sha256:b160416adc0f012fb1f12588a5e6954889510f82f698e23ed4f4fa57f12a0647"}, - {file = "argon2_cffi-20.1.0-cp35-cp35m-win32.whl", hash = "sha256:9bee3212ba4f560af397b6d7146848c32a800652301843df06b9e8f68f0f7361"}, - {file = "argon2_cffi-20.1.0-cp35-cp35m-win_amd64.whl", hash = "sha256:392c3c2ef91d12da510cfb6f9bae52512a4552573a9e27600bdb800e05905d2b"}, - {file = "argon2_cffi-20.1.0-cp36-cp36m-win32.whl", hash = "sha256:ba7209b608945b889457f949cc04c8e762bed4fe3fec88ae9a6b7765ae82e496"}, - {file = "argon2_cffi-20.1.0-cp36-cp36m-win_amd64.whl", hash = "sha256:da7f0445b71db6d3a72462e04f36544b0de871289b0bc8a7cc87c0f5ec7079fa"}, - {file = "argon2_cffi-20.1.0-cp37-abi3-macosx_10_6_intel.whl", hash = "sha256:cc0e028b209a5483b6846053d5fd7165f460a1f14774d79e632e75e7ae64b82b"}, - {file = "argon2_cffi-20.1.0-cp37-cp37m-win32.whl", hash = "sha256:18dee20e25e4be86680b178b35ccfc5d495ebd5792cd00781548d50880fee5c5"}, - {file = "argon2_cffi-20.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:6678bb047373f52bcff02db8afab0d2a77d83bde61cfecea7c5c62e2335cb203"}, - {file = "argon2_cffi-20.1.0-cp38-cp38-win32.whl", hash = "sha256:77e909cc756ef81d6abb60524d259d959bab384832f0c651ed7dcb6e5ccdbb78"}, - {file = "argon2_cffi-20.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:9dfd5197852530294ecb5795c97a823839258dfd5eb9420233c7cfedec2058f2"}, -] -attrs = [ - {file = "attrs-20.1.0-py2.py3-none-any.whl", hash = "sha256:2867b7b9f8326499ab5b0e2d12801fa5c98842d2cbd22b35112ae04bf85b4dff"}, - {file = "attrs-20.1.0.tar.gz", hash = "sha256:0ef97238856430dcf9228e07f316aefc17e8939fc8507e18c6501b761ef1a42a"}, -] -backcall = [ - {file = "backcall-0.2.0-py2.py3-none-any.whl", hash = "sha256:fbbce6a29f263178a1f7915c1940bde0ec2b2a967566fe1c65c1dfb7422bd255"}, - {file = "backcall-0.2.0.tar.gz", hash = "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e"}, -] -black = [ - {file = "black-19.10b0-py36-none-any.whl", hash = "sha256:1b30e59be925fafc1ee4565e5e08abef6b03fe455102883820fe5ee2e4734e0b"}, - {file = "black-19.10b0.tar.gz", hash = "sha256:c2edb73a08e9e0e6f65a0e6af18b059b8b1cdd5bef997d7a0b181df93dc81539"}, -] -bleach = [ - {file = "bleach-3.1.5-py2.py3-none-any.whl", hash = "sha256:2bce3d8fab545a6528c8fa5d9f9ae8ebc85a56da365c7f85180bfe96a35ef22f"}, - {file = "bleach-3.1.5.tar.gz", hash = "sha256:3c4c520fdb9db59ef139915a5db79f8b51bc2a7257ea0389f30c846883430a4b"}, -] -certifi = [ - {file = "certifi-2020.6.20-py2.py3-none-any.whl", hash = "sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41"}, - {file = "certifi-2020.6.20.tar.gz", hash = "sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3"}, -] -cffi = [ - {file = "cffi-1.14.2-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:da9d3c506f43e220336433dffe643fbfa40096d408cb9b7f2477892f369d5f82"}, - {file = "cffi-1.14.2-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:23e44937d7695c27c66a54d793dd4b45889a81b35c0751ba91040fe825ec59c4"}, - {file = "cffi-1.14.2-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:0da50dcbccd7cb7e6c741ab7912b2eff48e85af217d72b57f80ebc616257125e"}, - {file = "cffi-1.14.2-cp27-cp27m-win32.whl", hash = "sha256:76ada88d62eb24de7051c5157a1a78fd853cca9b91c0713c2e973e4196271d0c"}, - {file = "cffi-1.14.2-cp27-cp27m-win_amd64.whl", hash = "sha256:15a5f59a4808f82d8ec7364cbace851df591c2d43bc76bcbe5c4543a7ddd1bf1"}, - {file = "cffi-1.14.2-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:e4082d832e36e7f9b2278bc774886ca8207346b99f278e54c9de4834f17232f7"}, - {file = "cffi-1.14.2-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:57214fa5430399dffd54f4be37b56fe22cedb2b98862550d43cc085fb698dc2c"}, - {file = "cffi-1.14.2-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:6843db0343e12e3f52cc58430ad559d850a53684f5b352540ca3f1bc56df0731"}, - {file = "cffi-1.14.2-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:577791f948d34d569acb2d1add5831731c59d5a0c50a6d9f629ae1cefd9ca4a0"}, - {file = "cffi-1.14.2-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:8662aabfeab00cea149a3d1c2999b0731e70c6b5bac596d95d13f643e76d3d4e"}, - {file = "cffi-1.14.2-cp35-cp35m-win32.whl", hash = "sha256:837398c2ec00228679513802e3744d1e8e3cb1204aa6ad408b6aff081e99a487"}, - {file = "cffi-1.14.2-cp35-cp35m-win_amd64.whl", hash = "sha256:bf44a9a0141a082e89c90e8d785b212a872db793a0080c20f6ae6e2a0ebf82ad"}, - {file = "cffi-1.14.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:29c4688ace466a365b85a51dcc5e3c853c1d283f293dfcc12f7a77e498f160d2"}, - {file = "cffi-1.14.2-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:99cc66b33c418cd579c0f03b77b94263c305c389cb0c6972dac420f24b3bf123"}, - {file = "cffi-1.14.2-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:65867d63f0fd1b500fa343d7798fa64e9e681b594e0a07dc934c13e76ee28fb1"}, - {file = "cffi-1.14.2-cp36-cp36m-win32.whl", hash = "sha256:f5033952def24172e60493b68717792e3aebb387a8d186c43c020d9363ee7281"}, - {file = "cffi-1.14.2-cp36-cp36m-win_amd64.whl", hash = "sha256:7057613efefd36cacabbdbcef010e0a9c20a88fc07eb3e616019ea1692fa5df4"}, - {file = "cffi-1.14.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6539314d84c4d36f28d73adc1b45e9f4ee2a89cdc7e5d2b0a6dbacba31906798"}, - {file = "cffi-1.14.2-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:672b539db20fef6b03d6f7a14b5825d57c98e4026401fce838849f8de73fe4d4"}, - {file = "cffi-1.14.2-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:95e9094162fa712f18b4f60896e34b621df99147c2cee216cfa8f022294e8e9f"}, - {file = "cffi-1.14.2-cp37-cp37m-win32.whl", hash = "sha256:b9aa9d8818c2e917fa2c105ad538e222a5bce59777133840b93134022a7ce650"}, - {file = "cffi-1.14.2-cp37-cp37m-win_amd64.whl", hash = "sha256:e4b9b7af398c32e408c00eb4e0d33ced2f9121fd9fb978e6c1b57edd014a7d15"}, - {file = "cffi-1.14.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e613514a82539fc48291d01933951a13ae93b6b444a88782480be32245ed4afa"}, - {file = "cffi-1.14.2-cp38-cp38-manylinux1_i686.whl", hash = "sha256:9b219511d8b64d3fa14261963933be34028ea0e57455baf6781fe399c2c3206c"}, - {file = "cffi-1.14.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:c0b48b98d79cf795b0916c57bebbc6d16bb43b9fc9b8c9f57f4cf05881904c75"}, - {file = "cffi-1.14.2-cp38-cp38-win32.whl", hash = "sha256:15419020b0e812b40d96ec9d369b2bc8109cc3295eac6e013d3261343580cc7e"}, - {file = "cffi-1.14.2-cp38-cp38-win_amd64.whl", hash = "sha256:12a453e03124069b6896107ee133ae3ab04c624bb10683e1ed1c1663df17c13c"}, - {file = "cffi-1.14.2.tar.gz", hash = "sha256:ae8f34d50af2c2154035984b8b5fc5d9ed63f32fe615646ab435b05b132ca91b"}, -] -chardet = [ - {file = "chardet-3.0.4-py2.py3-none-any.whl", hash = "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"}, - {file = "chardet-3.0.4.tar.gz", hash = "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae"}, -] -click = [ - {file = "click-7.1.2-py2.py3-none-any.whl", hash = "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc"}, - {file = "click-7.1.2.tar.gz", hash = "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a"}, -] -colorama = [ - {file = "colorama-0.4.3-py2.py3-none-any.whl", hash = "sha256:7d73d2a99753107a36ac6b455ee49046802e59d9d076ef8e47b61499fa29afff"}, - {file = "colorama-0.4.3.tar.gz", hash = "sha256:e96da0d330793e2cb9485e9ddfd918d456036c7149416295932478192f4436a1"}, -] -cycler = [ - {file = "cycler-0.10.0-py2.py3-none-any.whl", hash = "sha256:1d8a5ae1ff6c5cf9b93e8811e581232ad8920aeec647c37316ceac982b08cb2d"}, - {file = "cycler-0.10.0.tar.gz", hash = "sha256:cd7b2d1018258d7247a71425e9f26463dfb444d411c39569972f4ce586b0c9d8"}, -] -decorator = [ - {file = "decorator-4.4.2-py2.py3-none-any.whl", hash = "sha256:41fa54c2a0cc4ba648be4fd43cff00aedf5b9465c9bf18d64325bc225f08f760"}, - {file = "decorator-4.4.2.tar.gz", hash = "sha256:e3a62f0520172440ca0dcc823749319382e377f37f140a0b99ef45fecb84bfe7"}, -] -defusedxml = [ - {file = "defusedxml-0.6.0-py2.py3-none-any.whl", hash = "sha256:6687150770438374ab581bb7a1b327a847dd9c5749e396102de3fad4e8a3ef93"}, - {file = "defusedxml-0.6.0.tar.gz", hash = "sha256:f684034d135af4c6cbb949b8a4d2ed61634515257a67299e5f940fbaa34377f5"}, -] -entrypoints = [ - {file = "entrypoints-0.3-py2.py3-none-any.whl", hash = "sha256:589f874b313739ad35be6e0cd7efde2a4e9b6fea91edcc34e58ecbb8dbe56d19"}, - {file = "entrypoints-0.3.tar.gz", hash = "sha256:c70dd71abe5a8c85e55e12c19bd91ccfeec11a6e99044204511f9ed547d48451"}, -] -idna = [ - {file = "idna-2.10-py2.py3-none-any.whl", hash = "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"}, - {file = "idna-2.10.tar.gz", hash = "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6"}, -] -importlib-metadata = [ - {file = "importlib_metadata-1.7.0-py2.py3-none-any.whl", hash = "sha256:dc15b2969b4ce36305c51eebe62d418ac7791e9a157911d58bfb1f9ccd8e2070"}, - {file = "importlib_metadata-1.7.0.tar.gz", hash = "sha256:90bb658cdbbf6d1735b6341ce708fc7024a3e14e99ffdc5783edea9f9b077f83"}, -] -ipykernel = [ - {file = "ipykernel-5.3.4-py3-none-any.whl", hash = "sha256:d6fbba26dba3cebd411382bc484f7bc2caa98427ae0ddb4ab37fe8bfeb5c7dd3"}, - {file = "ipykernel-5.3.4.tar.gz", hash = "sha256:9b2652af1607986a1b231c62302d070bc0534f564c393a5d9d130db9abbbe89d"}, -] -ipython = [ - {file = "ipython-7.17.0-py3-none-any.whl", hash = "sha256:5a8f159ca8b22b9a0a1f2a28befe5ad2b703339afb58c2ffe0d7c8d7a3af5999"}, - {file = "ipython-7.17.0.tar.gz", hash = "sha256:b70974aaa2674b05eb86a910c02ed09956a33f2dd6c71afc60f0b128a77e7f28"}, -] -ipython-genutils = [ - {file = "ipython_genutils-0.2.0-py2.py3-none-any.whl", hash = "sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8"}, - {file = "ipython_genutils-0.2.0.tar.gz", hash = "sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8"}, -] -jedi = [ - {file = "jedi-0.17.2-py2.py3-none-any.whl", hash = "sha256:98cc583fa0f2f8304968199b01b6b4b94f469a1f4a74c1560506ca2a211378b5"}, - {file = "jedi-0.17.2.tar.gz", hash = "sha256:86ed7d9b750603e4ba582ea8edc678657fb4007894a12bcf6f4bb97892f31d20"}, -] -jinja2 = [ - {file = "Jinja2-2.11.2-py2.py3-none-any.whl", hash = "sha256:f0a4641d3cf955324a89c04f3d94663aa4d638abe8f733ecd3582848e1c37035"}, - {file = "Jinja2-2.11.2.tar.gz", hash = "sha256:89aab215427ef59c34ad58735269eb58b1a5808103067f7bb9d5836c651b3bb0"}, -] -joblib = [ - {file = "joblib-0.16.0-py3-none-any.whl", hash = "sha256:d348c5d4ae31496b2aa060d6d9b787864dd204f9480baaa52d18850cb43e9f49"}, - {file = "joblib-0.16.0.tar.gz", hash = "sha256:8f52bf24c64b608bf0b2563e0e47d6fcf516abc8cfafe10cfd98ad66d94f92d6"}, -] -json5 = [ - {file = "json5-0.9.5-py2.py3-none-any.whl", hash = "sha256:af1a1b9a2850c7f62c23fde18be4749b3599fd302f494eebf957e2ada6b9e42c"}, - {file = "json5-0.9.5.tar.gz", hash = "sha256:703cfee540790576b56a92e1c6aaa6c4b0d98971dc358ead83812aa4d06bdb96"}, -] -jsonschema = [ - {file = "jsonschema-3.2.0-py2.py3-none-any.whl", hash = "sha256:4e5b3cf8216f577bee9ce139cbe72eca3ea4f292ec60928ff24758ce626cd163"}, - {file = "jsonschema-3.2.0.tar.gz", hash = "sha256:c8a85b28d377cc7737e46e2d9f2b4f44ee3c0e1deac6bf46ddefc7187d30797a"}, -] -jupyter-client = [ - {file = "jupyter_client-6.1.7-py3-none-any.whl", hash = "sha256:c958d24d6eacb975c1acebb68ac9077da61b5f5c040f22f6849928ad7393b950"}, - {file = "jupyter_client-6.1.7.tar.gz", hash = "sha256:49e390b36fe4b4226724704ea28d9fb903f1a3601b6882ce3105221cd09377a1"}, -] -jupyter-core = [ - {file = "jupyter_core-4.6.3-py2.py3-none-any.whl", hash = "sha256:a4ee613c060fe5697d913416fc9d553599c05e4492d58fac1192c9a6844abb21"}, - {file = "jupyter_core-4.6.3.tar.gz", hash = "sha256:394fd5dd787e7c8861741880bdf8a00ce39f95de5d18e579c74b882522219e7e"}, -] -jupyterlab = [ - {file = "jupyterlab-2.2.6-py3-none-any.whl", hash = "sha256:ae557386633fcb74359f436f2b87788a451260a07f2f14a1880fca8f4a9f64de"}, - {file = "jupyterlab-2.2.6.tar.gz", hash = "sha256:6554b022d2cd120100e165ec537c6511d70de7f89e253b3c667ea28f2a9263ff"}, -] -jupyterlab-server = [ - {file = "jupyterlab_server-1.2.0-py3-none-any.whl", hash = "sha256:55d256077bf13e5bc9e8fbd5aac51bef82f6315111cec6b712b9a5ededbba924"}, - {file = "jupyterlab_server-1.2.0.tar.gz", hash = "sha256:5431d9dde96659364b7cc877693d5d21e7b80cea7ae3959ecc2b87518e5f5d8c"}, -] -kiwisolver = [ - {file = "kiwisolver-1.2.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:443c2320520eda0a5b930b2725b26f6175ca4453c61f739fef7a5847bd262f74"}, - {file = "kiwisolver-1.2.0-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:efcf3397ae1e3c3a4a0a0636542bcad5adad3b1dd3e8e629d0b6e201347176c8"}, - {file = "kiwisolver-1.2.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:fccefc0d36a38c57b7bd233a9b485e2f1eb71903ca7ad7adacad6c28a56d62d2"}, - {file = "kiwisolver-1.2.0-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:be046da49fbc3aa9491cc7296db7e8d27bcf0c3d5d1a40259c10471b014e4e0c"}, - {file = "kiwisolver-1.2.0-cp36-none-win32.whl", hash = "sha256:60a78858580761fe611d22127868f3dc9f98871e6fdf0a15cc4203ed9ba6179b"}, - {file = "kiwisolver-1.2.0-cp36-none-win_amd64.whl", hash = "sha256:556da0a5f60f6486ec4969abbc1dd83cf9b5c2deadc8288508e55c0f5f87d29c"}, - {file = "kiwisolver-1.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:7cc095a4661bdd8a5742aaf7c10ea9fac142d76ff1770a0f84394038126d8fc7"}, - {file = "kiwisolver-1.2.0-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:c955791d80e464da3b471ab41eb65cf5a40c15ce9b001fdc5bbc241170de58ec"}, - {file = "kiwisolver-1.2.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:603162139684ee56bcd57acc74035fceed7dd8d732f38c0959c8bd157f913fec"}, - {file = "kiwisolver-1.2.0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:63f55f490b958b6299e4e5bdac66ac988c3d11b7fafa522800359075d4fa56d1"}, - {file = "kiwisolver-1.2.0-cp37-none-win32.whl", hash = "sha256:03662cbd3e6729f341a97dd2690b271e51a67a68322affab12a5b011344b973c"}, - {file = "kiwisolver-1.2.0-cp37-none-win_amd64.whl", hash = "sha256:4eadb361baf3069f278b055e3bb53fa189cea2fd02cb2c353b7a99ebb4477ef1"}, - {file = "kiwisolver-1.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c31bc3c8e903d60a1ea31a754c72559398d91b5929fcb329b1c3a3d3f6e72113"}, - {file = "kiwisolver-1.2.0-cp38-cp38-manylinux1_i686.whl", hash = "sha256:d52b989dc23cdaa92582ceb4af8d5bcc94d74b2c3e64cd6785558ec6a879793e"}, - {file = "kiwisolver-1.2.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:e586b28354d7b6584d8973656a7954b1c69c93f708c0c07b77884f91640b7657"}, - {file = "kiwisolver-1.2.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:38d05c9ecb24eee1246391820ed7137ac42a50209c203c908154782fced90e44"}, - {file = "kiwisolver-1.2.0-cp38-none-win32.whl", hash = "sha256:d069ef4b20b1e6b19f790d00097a5d5d2c50871b66d10075dab78938dc2ee2cf"}, - {file = "kiwisolver-1.2.0-cp38-none-win_amd64.whl", hash = "sha256:18d749f3e56c0480dccd1714230da0f328e6e4accf188dd4e6884bdd06bf02dd"}, - {file = "kiwisolver-1.2.0.tar.gz", hash = "sha256:247800260cd38160c362d211dcaf4ed0f7816afb5efe56544748b21d6ad6d17f"}, -] -markupsafe = [ - {file = "MarkupSafe-1.1.1-cp27-cp27m-macosx_10_6_intel.whl", hash = "sha256:09027a7803a62ca78792ad89403b1b7a73a01c8cb65909cd876f7fcebd79b161"}, - {file = "MarkupSafe-1.1.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:e249096428b3ae81b08327a63a485ad0878de3fb939049038579ac0ef61e17e7"}, - {file = "MarkupSafe-1.1.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183"}, - {file = "MarkupSafe-1.1.1-cp27-cp27m-win32.whl", hash = "sha256:b2051432115498d3562c084a49bba65d97cf251f5a331c64a12ee7e04dacc51b"}, - {file = "MarkupSafe-1.1.1-cp27-cp27m-win_amd64.whl", hash = "sha256:98c7086708b163d425c67c7a91bad6e466bb99d797aa64f965e9d25c12111a5e"}, - {file = "MarkupSafe-1.1.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:cd5df75523866410809ca100dc9681e301e3c27567cf498077e8551b6d20e42f"}, - {file = "MarkupSafe-1.1.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1"}, - {file = "MarkupSafe-1.1.1-cp34-cp34m-macosx_10_6_intel.whl", hash = "sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5"}, - {file = "MarkupSafe-1.1.1-cp34-cp34m-manylinux1_i686.whl", hash = "sha256:62fe6c95e3ec8a7fad637b7f3d372c15ec1caa01ab47926cfdf7a75b40e0eac1"}, - {file = "MarkupSafe-1.1.1-cp34-cp34m-manylinux1_x86_64.whl", hash = "sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735"}, - {file = "MarkupSafe-1.1.1-cp34-cp34m-win32.whl", hash = "sha256:ade5e387d2ad0d7ebf59146cc00c8044acbd863725f887353a10df825fc8ae21"}, - {file = "MarkupSafe-1.1.1-cp34-cp34m-win_amd64.whl", hash = "sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235"}, - {file = "MarkupSafe-1.1.1-cp35-cp35m-macosx_10_6_intel.whl", hash = "sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b"}, - {file = "MarkupSafe-1.1.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:c8716a48d94b06bb3b2524c2b77e055fb313aeb4ea620c8dd03a105574ba704f"}, - {file = "MarkupSafe-1.1.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905"}, - {file = "MarkupSafe-1.1.1-cp35-cp35m-win32.whl", hash = "sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1"}, - {file = "MarkupSafe-1.1.1-cp35-cp35m-win_amd64.whl", hash = "sha256:9add70b36c5666a2ed02b43b335fe19002ee5235efd4b8a89bfcf9005bebac0d"}, - {file = "MarkupSafe-1.1.1-cp36-cp36m-macosx_10_6_intel.whl", hash = "sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff"}, - {file = "MarkupSafe-1.1.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:00bc623926325b26bb9605ae9eae8a215691f33cae5df11ca5424f06f2d1f473"}, - {file = "MarkupSafe-1.1.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e"}, - {file = "MarkupSafe-1.1.1-cp36-cp36m-win32.whl", hash = "sha256:535f6fc4d397c1563d08b88e485c3496cf5784e927af890fb3c3aac7f933ec66"}, - {file = "MarkupSafe-1.1.1-cp36-cp36m-win_amd64.whl", hash = "sha256:b1282f8c00509d99fef04d8ba936b156d419be841854fe901d8ae224c59f0be5"}, - {file = "MarkupSafe-1.1.1-cp37-cp37m-macosx_10_6_intel.whl", hash = "sha256:8defac2f2ccd6805ebf65f5eeb132adcf2ab57aa11fdf4c0dd5169a004710e7d"}, - {file = "MarkupSafe-1.1.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e"}, - {file = "MarkupSafe-1.1.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:ba59edeaa2fc6114428f1637ffff42da1e311e29382d81b339c1817d37ec93c6"}, - {file = "MarkupSafe-1.1.1-cp37-cp37m-win32.whl", hash = "sha256:b00c1de48212e4cc9603895652c5c410df699856a2853135b3967591e4beebc2"}, - {file = "MarkupSafe-1.1.1-cp37-cp37m-win_amd64.whl", hash = "sha256:9bf40443012702a1d2070043cb6291650a0841ece432556f784f004937f0f32c"}, - {file = "MarkupSafe-1.1.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6788b695d50a51edb699cb55e35487e430fa21f1ed838122d722e0ff0ac5ba15"}, - {file = "MarkupSafe-1.1.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:cdb132fc825c38e1aeec2c8aa9338310d29d337bebbd7baa06889d09a60a1fa2"}, - {file = "MarkupSafe-1.1.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:13d3144e1e340870b25e7b10b98d779608c02016d5184cfb9927a9f10c689f42"}, - {file = "MarkupSafe-1.1.1-cp38-cp38-win32.whl", hash = "sha256:596510de112c685489095da617b5bcbbac7dd6384aeebeda4df6025d0256a81b"}, - {file = "MarkupSafe-1.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:e8313f01ba26fbbe36c7be1966a7b7424942f670f38e666995b88d012765b9be"}, - {file = "MarkupSafe-1.1.1.tar.gz", hash = "sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b"}, -] -matplotlib = [ - {file = "matplotlib-3.3.1-1-cp36-cp36m-win32.whl", hash = "sha256:fab11637734eb14affb9c5e20d44d69429c18b49595d6e67c69295de24827fc4"}, - {file = "matplotlib-3.3.1-1-cp36-cp36m-win_amd64.whl", hash = "sha256:24392ac1a382ed753505286f1a1483bcfd67ed0c72d51be10c4c2013e386d0b7"}, - {file = "matplotlib-3.3.1-1-cp37-cp37m-win32.whl", hash = "sha256:c4ffb25b9855bdb6cdaf21bbd4ab2c229be539248304ac5215b94c816ea6e32e"}, - {file = "matplotlib-3.3.1-1-cp37-cp37m-win_amd64.whl", hash = "sha256:5a42c84264a1acbbf01c073a7bd05a0e80d99f94f10020d613b1b0526af9dcc2"}, - {file = "matplotlib-3.3.1-1-cp38-cp38-win32.whl", hash = "sha256:bc978374b43737f2bbc4a6ec48e52ae8c92be6278a80d0e2ce92f0eb0841f15c"}, - {file = "matplotlib-3.3.1-1-cp38-cp38-win_amd64.whl", hash = "sha256:6d0f03079f655ca0a2d2e0bf49c28e1ec43d9d544c33d8da1a88765f23018ecc"}, - {file = "matplotlib-3.3.1-1-cp39-cp39-win32.whl", hash = "sha256:2375f039b8c6ad6c1d03f01bf31f086bbbf997bf25e246f3b67f69969cde3d98"}, - {file = "matplotlib-3.3.1-1-cp39-cp39-win_amd64.whl", hash = "sha256:233bef5e3b3494f3b7057595ca814f23ba0ce67a03632ddf677be5132128b3db"}, - {file = "matplotlib-3.3.1-1-pp36-pypy36_pp73-win32.whl", hash = "sha256:f62c0b9a5d38c26673a8862cbae4d26cffcda260848e4278246b4e00f5a95eaf"}, - {file = "matplotlib-3.3.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:282f8a077a1217f9f2ac178596f27c1ae94abbc6e7b785e1b8f25e83918e9199"}, - {file = "matplotlib-3.3.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:83ae7261f4d5ab387be2caee29c4f499b1566f31c8ac97a0b8ab61afd9e3da92"}, - {file = "matplotlib-3.3.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:1f9cf2b8500b833714a193cb24281153f5072d55b2e486009f1e81f0b7da3410"}, - {file = "matplotlib-3.3.1-cp36-cp36m-win32.whl", hash = "sha256:0dc15e1ad84ec06bf0c315e6c4c2cced13a21ce4c2b4955bb75097064a4b1e92"}, - {file = "matplotlib-3.3.1-cp36-cp36m-win_amd64.whl", hash = "sha256:ffbae66e2db70dc330cb3299525f97e1c0efdfc763e04e1a4e08f968c7ad21f0"}, - {file = "matplotlib-3.3.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:88c6ab4a32a7447dad236b8371612aaba5c967d632ff11999e0478dd687f2c58"}, - {file = "matplotlib-3.3.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:cc2d6b47c8fee89da982a312b54949ec0cd6a7976a8cafb5b62dea6c9883a14d"}, - {file = "matplotlib-3.3.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:636c6330a7dcb18bac114dbeaff314fbbb0c11682f9a9601de69a50e331d18d7"}, - {file = "matplotlib-3.3.1-cp37-cp37m-win32.whl", hash = "sha256:73a493e340064e8fe03207d9333b68baca30d9f0da543ae4af6b6b4f13f0fe05"}, - {file = "matplotlib-3.3.1-cp37-cp37m-win_amd64.whl", hash = "sha256:6739b6cd9278d5cb337df0bd4400ad37bbd04c6dc7aa2c65e1e83a02bc4cc6fd"}, - {file = "matplotlib-3.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:79f0c4730ad422ecb6bda814c9a9b375df36d6bd5a49eaa14e92e5f5e3e95ac3"}, - {file = "matplotlib-3.3.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:e4d6d3afc454b4afc0d9d0ed52a8fa40a1b0d8f33c8e143e49a5833a7e32266b"}, - {file = "matplotlib-3.3.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:96a5e667308dbf45670370d9dffb974e73b15bac0df0b5f3fb0b0ac7a572290e"}, - {file = "matplotlib-3.3.1-cp38-cp38-win32.whl", hash = "sha256:bd8fceaa3494b531d43b6206966ba15705638137fc2dc5da5ee560cf9476867b"}, - {file = "matplotlib-3.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:1507c2a8e4662f6fa1d3ecc760782b158df8a3244ecc21c1d8dbb1cd0b3f872e"}, - {file = "matplotlib-3.3.1-pp36-pypy36_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2c3619ec2a5ead430a4536ebf8c77ea55d8ce36418919f831d35bc657ed5f27e"}, - {file = "matplotlib-3.3.1-pp36-pypy36_pp73-manylinux2010_x86_64.whl", hash = "sha256:9703bc00a94a94c4e94b2ea0fbfbc9d2bb21159733134639fd931b6606c5c47e"}, - {file = "matplotlib-3.3.1.tar.gz", hash = "sha256:87f53bcce90772f942c2db56736788b39332d552461a5cb13f05ff45c1680f0e"}, -] -mistune = [ - {file = "mistune-0.8.4-py2.py3-none-any.whl", hash = "sha256:88a1051873018da288eee8538d476dffe1262495144b33ecb586c4ab266bb8d4"}, - {file = "mistune-0.8.4.tar.gz", hash = "sha256:59a3429db53c50b5c6bcc8a07f8848cb00d7dc8bdb431a4ab41920d201d4756e"}, -] -nb-black = [ - {file = "nb_black-1.0.7.tar.gz", hash = "sha256:1ca52e3a46675f6a0a6d79ac73a1f8f951bef60f919eced56173e76ab1b6d62b"}, -] -nbconvert = [ - {file = "nbconvert-5.6.1-py2.py3-none-any.whl", hash = "sha256:f0d6ec03875f96df45aa13e21fd9b8450c42d7e1830418cccc008c0df725fcee"}, - {file = "nbconvert-5.6.1.tar.gz", hash = "sha256:21fb48e700b43e82ba0e3142421a659d7739b65568cc832a13976a77be16b523"}, -] -nbformat = [ - {file = "nbformat-5.0.7-py3-none-any.whl", hash = "sha256:ea55c9b817855e2dfcd3f66d74857342612a60b1f09653440f4a5845e6e3523f"}, - {file = "nbformat-5.0.7.tar.gz", hash = "sha256:54d4d6354835a936bad7e8182dcd003ca3dc0cedfee5a306090e04854343b340"}, -] -notebook = [ - {file = "notebook-6.1.3-py3-none-any.whl", hash = "sha256:964cc40cff68e473f3778aef9266e867f7703cb4aebdfd250f334efe02f64c86"}, - {file = "notebook-6.1.3.tar.gz", hash = "sha256:9990d51b9931a31e681635899aeb198b4c4b41586a9e87fbfaaed1a71d0a05b6"}, -] -numpy = [ - {file = "numpy-1.19.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:b1cca51512299841bf69add3b75361779962f9cee7d9ee3bb446d5982e925b69"}, - {file = "numpy-1.19.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:c9591886fc9cbe5532d5df85cb8e0cc3b44ba8ce4367bd4cf1b93dc19713da72"}, - {file = "numpy-1.19.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:cf1347450c0b7644ea142712619533553f02ef23f92f781312f6a3553d031fc7"}, - {file = "numpy-1.19.1-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:ed8a311493cf5480a2ebc597d1e177231984c818a86875126cfd004241a73c3e"}, - {file = "numpy-1.19.1-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:3673c8b2b29077f1b7b3a848794f8e11f401ba0b71c49fbd26fb40b71788b132"}, - {file = "numpy-1.19.1-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:56ef7f56470c24bb67fb43dae442e946a6ce172f97c69f8d067ff8550cf782ff"}, - {file = "numpy-1.19.1-cp36-cp36m-win32.whl", hash = "sha256:aaf42a04b472d12515debc621c31cf16c215e332242e7a9f56403d814c744624"}, - {file = "numpy-1.19.1-cp36-cp36m-win_amd64.whl", hash = "sha256:082f8d4dd69b6b688f64f509b91d482362124986d98dc7dc5f5e9f9b9c3bb983"}, - {file = "numpy-1.19.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e4f6d3c53911a9d103d8ec9518190e52a8b945bab021745af4939cfc7c0d4a9e"}, - {file = "numpy-1.19.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:5b6885c12784a27e957294b60f97e8b5b4174c7504665333c5e94fbf41ae5d6a"}, - {file = "numpy-1.19.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:1bc0145999e8cb8aed9d4e65dd8b139adf1919e521177f198529687dbf613065"}, - {file = "numpy-1.19.1-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:5a936fd51049541d86ccdeef2833cc89a18e4d3808fe58a8abeb802665c5af93"}, - {file = "numpy-1.19.1-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:ef71a1d4fd4858596ae80ad1ec76404ad29701f8ca7cdcebc50300178db14dfc"}, - {file = "numpy-1.19.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:b9792b0ac0130b277536ab8944e7b754c69560dac0415dd4b2dbd16b902c8954"}, - {file = "numpy-1.19.1-cp37-cp37m-win32.whl", hash = "sha256:b12e639378c741add21fbffd16ba5ad25c0a1a17cf2b6fe4288feeb65144f35b"}, - {file = "numpy-1.19.1-cp37-cp37m-win_amd64.whl", hash = "sha256:8343bf67c72e09cfabfab55ad4a43ce3f6bf6e6ced7acf70f45ded9ebb425055"}, - {file = "numpy-1.19.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e45f8e981a0ab47103181773cc0a54e650b2aef8c7b6cd07405d0fa8d869444a"}, - {file = "numpy-1.19.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:667c07063940e934287993366ad5f56766bc009017b4a0fe91dbd07960d0aba7"}, - {file = "numpy-1.19.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:480fdd4dbda4dd6b638d3863da3be82873bba6d32d1fc12ea1b8486ac7b8d129"}, - {file = "numpy-1.19.1-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:935c27ae2760c21cd7354402546f6be21d3d0c806fffe967f745d5f2de5005a7"}, - {file = "numpy-1.19.1-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:309cbcfaa103fc9a33ec16d2d62569d541b79f828c382556ff072442226d1968"}, - {file = "numpy-1.19.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:7ed448ff4eaffeb01094959b19cbaf998ecdee9ef9932381420d514e446601cd"}, - {file = "numpy-1.19.1-cp38-cp38-win32.whl", hash = "sha256:de8b4a9b56255797cbddb93281ed92acbc510fb7b15df3f01bd28f46ebc4edae"}, - {file = "numpy-1.19.1-cp38-cp38-win_amd64.whl", hash = "sha256:92feb989b47f83ebef246adabc7ff3b9a59ac30601c3f6819f8913458610bdcc"}, - {file = "numpy-1.19.1-pp36-pypy36_pp73-manylinux2010_x86_64.whl", hash = "sha256:e1b1dc0372f530f26a03578ac75d5e51b3868b9b76cd2facba4c9ee0eb252ab1"}, - {file = "numpy-1.19.1.zip", hash = "sha256:b8456987b637232602ceb4d663cb34106f7eb780e247d51a260b84760fd8f491"}, -] -packaging = [ - {file = "packaging-20.4-py2.py3-none-any.whl", hash = "sha256:998416ba6962ae7fbd6596850b80e17859a5753ba17c32284f67bfff33784181"}, - {file = "packaging-20.4.tar.gz", hash = "sha256:4357f74f47b9c12db93624a82154e9b120fa8293699949152b22065d556079f8"}, -] -pandas = [ - {file = "pandas-1.1.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:8c9ec12c480c4d915e23ee9c8a2d8eba8509986f35f307771045c1294a2e5b73"}, - {file = "pandas-1.1.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:e4b6c98f45695799990da328e6fd7d6187be32752ed64c2f22326ad66762d179"}, - {file = "pandas-1.1.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:16ae070c47474008769fc443ac765ffd88c3506b4a82966e7a605592978896f9"}, - {file = "pandas-1.1.1-cp36-cp36m-win32.whl", hash = "sha256:88930c74f69e97b17703600233c0eaf1f4f4dd10c14633d522724c5c1b963ec4"}, - {file = "pandas-1.1.1-cp36-cp36m-win_amd64.whl", hash = "sha256:fe6f1623376b616e03d51f0dd95afd862cf9a33c18cf55ce0ed4bbe1c4444391"}, - {file = "pandas-1.1.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a81c4bf9c59010aa3efddbb6b9fc84a9b76dc0b4da2c2c2d50f06a9ef6ac0004"}, - {file = "pandas-1.1.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:1acc2bd7fc95e5408a4456897c2c2a1ae7c6acefe108d90479ab6d98d34fcc3d"}, - {file = "pandas-1.1.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:84c101d0f7bbf0d9f1be9a2f29f6fcc12415442558d067164e50a56edfb732b4"}, - {file = "pandas-1.1.1-cp37-cp37m-win32.whl", hash = "sha256:391db82ebeb886143b96b9c6c6166686c9a272d00020e4e39ad63b792542d9e2"}, - {file = "pandas-1.1.1-cp37-cp37m-win_amd64.whl", hash = "sha256:0366150fe8ee37ef89a45d3093e05026b5f895e42bbce3902ce3b6427f1b8471"}, - {file = "pandas-1.1.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d9644ac996149b2a51325d48d77e25c911e01aa6d39dc1b64be679cd71f683ec"}, - {file = "pandas-1.1.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:41675323d4fcdd15abde068607cad150dfe17f7d32290ee128e5fea98442bd09"}, - {file = "pandas-1.1.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:0246c67cbaaaac8d25fed8d4cf2d8897bd858f0e540e8528a75281cee9ac516d"}, - {file = "pandas-1.1.1-cp38-cp38-win32.whl", hash = "sha256:01b1e536eb960822c5e6b58357cad8c4b492a336f4a5630bf0b598566462a578"}, - {file = "pandas-1.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:57c5f6be49259cde8e6f71c2bf240a26b071569cabc04c751358495d09419e56"}, - {file = "pandas-1.1.1.tar.gz", hash = "sha256:53328284a7bb046e2e885fd1b8c078bd896d7fc4575b915d4936f54984a2ba67"}, -] -pandocfilters = [ - {file = "pandocfilters-1.4.2.tar.gz", hash = "sha256:b3dd70e169bb5449e6bc6ff96aea89c5eea8c5f6ab5e207fc2f521a2cf4a0da9"}, -] -parso = [ - {file = "parso-0.7.1-py2.py3-none-any.whl", hash = "sha256:97218d9159b2520ff45eb78028ba8b50d2bc61dcc062a9682666f2dc4bd331ea"}, - {file = "parso-0.7.1.tar.gz", hash = "sha256:caba44724b994a8a5e086460bb212abc5a8bc46951bf4a9a1210745953622eb9"}, -] -pathspec = [ - {file = "pathspec-0.8.0-py2.py3-none-any.whl", hash = "sha256:7d91249d21749788d07a2d0f94147accd8f845507400749ea19c1ec9054a12b0"}, - {file = "pathspec-0.8.0.tar.gz", hash = "sha256:da45173eb3a6f2a5a487efba21f050af2b41948be6ab52b6a1e3ff22bb8b7061"}, -] -pexpect = [ - {file = "pexpect-4.8.0-py2.py3-none-any.whl", hash = "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937"}, - {file = "pexpect-4.8.0.tar.gz", hash = "sha256:fc65a43959d153d0114afe13997d439c22823a27cefceb5ff35c2178c6784c0c"}, -] -pickleshare = [ - {file = "pickleshare-0.7.5-py2.py3-none-any.whl", hash = "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56"}, - {file = "pickleshare-0.7.5.tar.gz", hash = "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca"}, -] -pillow = [ - {file = "Pillow-7.2.0-cp35-cp35m-macosx_10_10_intel.whl", hash = "sha256:1ca594126d3c4def54babee699c055a913efb01e106c309fa6b04405d474d5ae"}, - {file = "Pillow-7.2.0-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:c92302a33138409e8f1ad16731568c55c9053eee71bb05b6b744067e1b62380f"}, - {file = "Pillow-7.2.0-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:8dad18b69f710bf3a001d2bf3afab7c432785d94fcf819c16b5207b1cfd17d38"}, - {file = "Pillow-7.2.0-cp35-cp35m-manylinux2014_aarch64.whl", hash = "sha256:431b15cffbf949e89df2f7b48528be18b78bfa5177cb3036284a5508159492b5"}, - {file = "Pillow-7.2.0-cp35-cp35m-win32.whl", hash = "sha256:09d7f9e64289cb40c2c8d7ad674b2ed6105f55dc3b09aa8e4918e20a0311e7ad"}, - {file = "Pillow-7.2.0-cp35-cp35m-win_amd64.whl", hash = "sha256:0295442429645fa16d05bd567ef5cff178482439c9aad0411d3f0ce9b88b3a6f"}, - {file = "Pillow-7.2.0-cp36-cp36m-macosx_10_10_x86_64.whl", hash = "sha256:ec29604081f10f16a7aea809ad42e27764188fc258b02259a03a8ff7ded3808d"}, - {file = "Pillow-7.2.0-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:612cfda94e9c8346f239bf1a4b082fdd5c8143cf82d685ba2dba76e7adeeb233"}, - {file = "Pillow-7.2.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:0a80dd307a5d8440b0a08bd7b81617e04d870e40a3e46a32d9c246e54705e86f"}, - {file = "Pillow-7.2.0-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:06aba4169e78c439d528fdeb34762c3b61a70813527a2c57f0540541e9f433a8"}, - {file = "Pillow-7.2.0-cp36-cp36m-win32.whl", hash = "sha256:f7e30c27477dffc3e85c2463b3e649f751789e0f6c8456099eea7ddd53be4a8a"}, - {file = "Pillow-7.2.0-cp36-cp36m-win_amd64.whl", hash = "sha256:ffe538682dc19cc542ae7c3e504fdf54ca7f86fb8a135e59dd6bc8627eae6cce"}, - {file = "Pillow-7.2.0-cp37-cp37m-macosx_10_10_x86_64.whl", hash = "sha256:94cf49723928eb6070a892cb39d6c156f7b5a2db4e8971cb958f7b6b104fb4c4"}, - {file = "Pillow-7.2.0-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:6edb5446f44d901e8683ffb25ebdfc26988ee813da3bf91e12252b57ac163727"}, - {file = "Pillow-7.2.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:52125833b070791fcb5710fabc640fc1df07d087fc0c0f02d3661f76c23c5b8b"}, - {file = "Pillow-7.2.0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:9ad7f865eebde135d526bb3163d0b23ffff365cf87e767c649550964ad72785d"}, - {file = "Pillow-7.2.0-cp37-cp37m-win32.whl", hash = "sha256:c79f9c5fb846285f943aafeafda3358992d64f0ef58566e23484132ecd8d7d63"}, - {file = "Pillow-7.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d350f0f2c2421e65fbc62690f26b59b0bcda1b614beb318c81e38647e0f673a1"}, - {file = "Pillow-7.2.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:6d7741e65835716ceea0fd13a7d0192961212fd59e741a46bbed7a473c634ed6"}, - {file = "Pillow-7.2.0-cp38-cp38-manylinux1_i686.whl", hash = "sha256:edf31f1150778abd4322444c393ab9c7bd2af271dd4dafb4208fb613b1f3cdc9"}, - {file = "Pillow-7.2.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:d08b23fdb388c0715990cbc06866db554e1822c4bdcf6d4166cf30ac82df8c41"}, - {file = "Pillow-7.2.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:5e51ee2b8114def244384eda1c82b10e307ad9778dac5c83fb0943775a653cd8"}, - {file = "Pillow-7.2.0-cp38-cp38-win32.whl", hash = "sha256:725aa6cfc66ce2857d585f06e9519a1cc0ef6d13f186ff3447ab6dff0a09bc7f"}, - {file = "Pillow-7.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:a060cf8aa332052df2158e5a119303965be92c3da6f2d93b6878f0ebca80b2f6"}, - {file = "Pillow-7.2.0-pp36-pypy36_pp73-macosx_10_10_x86_64.whl", hash = "sha256:9c87ef410a58dd54b92424ffd7e28fd2ec65d2f7fc02b76f5e9b2067e355ebf6"}, - {file = "Pillow-7.2.0-pp36-pypy36_pp73-manylinux2010_x86_64.whl", hash = "sha256:e901964262a56d9ea3c2693df68bc9860b8bdda2b04768821e4c44ae797de117"}, - {file = "Pillow-7.2.0-pp36-pypy36_pp73-win32.whl", hash = "sha256:25930fadde8019f374400f7986e8404c8b781ce519da27792cbe46eabec00c4d"}, - {file = "Pillow-7.2.0.tar.gz", hash = "sha256:97f9e7953a77d5a70f49b9a48da7776dc51e9b738151b22dacf101641594a626"}, -] -prometheus-client = [ - {file = "prometheus_client-0.8.0-py2.py3-none-any.whl", hash = "sha256:983c7ac4b47478720db338f1491ef67a100b474e3bc7dafcbaefb7d0b8f9b01c"}, - {file = "prometheus_client-0.8.0.tar.gz", hash = "sha256:c6e6b706833a6bd1fd51711299edee907857be10ece535126a158f911ee80915"}, -] -prompt-toolkit = [ - {file = "prompt_toolkit-3.0.6-py3-none-any.whl", hash = "sha256:683397077a64cd1f750b71c05afcfc6612a7300cb6932666531e5a54f38ea564"}, - {file = "prompt_toolkit-3.0.6.tar.gz", hash = "sha256:7630ab85a23302839a0f26b31cc24f518e6155dea1ed395ea61b42c45941b6a6"}, -] -ptyprocess = [ - {file = "ptyprocess-0.6.0-py2.py3-none-any.whl", hash = "sha256:d7cc528d76e76342423ca640335bd3633420dc1366f258cb31d05e865ef5ca1f"}, - {file = "ptyprocess-0.6.0.tar.gz", hash = "sha256:923f299cc5ad920c68f2bc0bc98b75b9f838b93b599941a6b63ddbc2476394c0"}, -] -pycparser = [ - {file = "pycparser-2.20-py2.py3-none-any.whl", hash = "sha256:7582ad22678f0fcd81102833f60ef8d0e57288b6b5fb00323d101be910e35705"}, - {file = "pycparser-2.20.tar.gz", hash = "sha256:2d475327684562c3a96cc71adf7dc8c4f0565175cf86b6d7a404ff4c771f15f0"}, -] -pygments = [ - {file = "Pygments-2.6.1-py3-none-any.whl", hash = "sha256:ff7a40b4860b727ab48fad6360eb351cc1b33cbf9b15a0f689ca5353e9463324"}, - {file = "Pygments-2.6.1.tar.gz", hash = "sha256:647344a061c249a3b74e230c739f434d7ea4d8b1d5f3721bc0f3558049b38f44"}, -] -pyparsing = [ - {file = "pyparsing-2.4.7-py2.py3-none-any.whl", hash = "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"}, - {file = "pyparsing-2.4.7.tar.gz", hash = "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1"}, -] -pyrsistent = [ - {file = "pyrsistent-0.16.0.tar.gz", hash = "sha256:28669905fe725965daa16184933676547c5bb40a5153055a8dee2a4bd7933ad3"}, -] -python-dateutil = [ - {file = "python-dateutil-2.8.1.tar.gz", hash = "sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c"}, - {file = "python_dateutil-2.8.1-py2.py3-none-any.whl", hash = "sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a"}, -] -pytz = [ - {file = "pytz-2020.1-py2.py3-none-any.whl", hash = "sha256:a494d53b6d39c3c6e44c3bec237336e14305e4f29bbf800b599253057fbb79ed"}, - {file = "pytz-2020.1.tar.gz", hash = "sha256:c35965d010ce31b23eeb663ed3cc8c906275d6be1a34393a1d73a41febf4a048"}, -] -pywin32 = [ - {file = "pywin32-228-cp27-cp27m-win32.whl", hash = "sha256:37dc9935f6a383cc744315ae0c2882ba1768d9b06700a70f35dc1ce73cd4ba9c"}, - {file = "pywin32-228-cp27-cp27m-win_amd64.whl", hash = "sha256:11cb6610efc2f078c9e6d8f5d0f957620c333f4b23466931a247fb945ed35e89"}, - {file = "pywin32-228-cp35-cp35m-win32.whl", hash = "sha256:1f45db18af5d36195447b2cffacd182fe2d296849ba0aecdab24d3852fbf3f80"}, - {file = "pywin32-228-cp35-cp35m-win_amd64.whl", hash = "sha256:6e38c44097a834a4707c1b63efa9c2435f5a42afabff634a17f563bc478dfcc8"}, - {file = "pywin32-228-cp36-cp36m-win32.whl", hash = "sha256:ec16d44b49b5f34e99eb97cf270806fdc560dff6f84d281eb2fcb89a014a56a9"}, - {file = "pywin32-228-cp36-cp36m-win_amd64.whl", hash = "sha256:a60d795c6590a5b6baeacd16c583d91cce8038f959bd80c53bd9a68f40130f2d"}, - {file = "pywin32-228-cp37-cp37m-win32.whl", hash = "sha256:af40887b6fc200eafe4d7742c48417529a8702dcc1a60bf89eee152d1d11209f"}, - {file = "pywin32-228-cp37-cp37m-win_amd64.whl", hash = "sha256:00eaf43dbd05ba6a9b0080c77e161e0b7a601f9a3f660727a952e40140537de7"}, - {file = "pywin32-228-cp38-cp38-win32.whl", hash = "sha256:fa6ba028909cfc64ce9e24bcf22f588b14871980d9787f1e2002c99af8f1850c"}, - {file = "pywin32-228-cp38-cp38-win_amd64.whl", hash = "sha256:9b3466083f8271e1a5eb0329f4e0d61925d46b40b195a33413e0905dccb285e8"}, - {file = "pywin32-228-cp39-cp39-win32.whl", hash = "sha256:ed74b72d8059a6606f64842e7917aeee99159ebd6b8d6261c518d002837be298"}, - {file = "pywin32-228-cp39-cp39-win_amd64.whl", hash = "sha256:8319bafdcd90b7202c50d6014efdfe4fde9311b3ff15fd6f893a45c0868de203"}, -] -pywinpty = [ - {file = "pywinpty-0.5.7-cp27-cp27m-win32.whl", hash = "sha256:b358cb552c0f6baf790de375fab96524a0498c9df83489b8c23f7f08795e966b"}, - {file = "pywinpty-0.5.7-cp27-cp27m-win_amd64.whl", hash = "sha256:1e525a4de05e72016a7af27836d512db67d06a015aeaf2fa0180f8e6a039b3c2"}, - {file = "pywinpty-0.5.7-cp35-cp35m-win32.whl", hash = "sha256:2740eeeb59297593a0d3f762269b01d0285c1b829d6827445fcd348fb47f7e70"}, - {file = "pywinpty-0.5.7-cp35-cp35m-win_amd64.whl", hash = "sha256:33df97f79843b2b8b8bc5c7aaf54adec08cc1bae94ee99dfb1a93c7a67704d95"}, - {file = "pywinpty-0.5.7-cp36-cp36m-win32.whl", hash = "sha256:e854211df55d107f0edfda8a80b39dfc87015bef52a8fe6594eb379240d81df2"}, - {file = "pywinpty-0.5.7-cp36-cp36m-win_amd64.whl", hash = "sha256:dbd838de92de1d4ebf0dce9d4d5e4fc38d0b7b1de837947a18b57a882f219139"}, - {file = "pywinpty-0.5.7-cp37-cp37m-win32.whl", hash = "sha256:5fb2c6c6819491b216f78acc2c521b9df21e0f53b9a399d58a5c151a3c4e2a2d"}, - {file = "pywinpty-0.5.7-cp37-cp37m-win_amd64.whl", hash = "sha256:dd22c8efacf600730abe4a46c1388355ce0d4ab75dc79b15d23a7bd87bf05b48"}, - {file = "pywinpty-0.5.7-cp38-cp38-win_amd64.whl", hash = "sha256:8fc5019ff3efb4f13708bd3b5ad327589c1a554cb516d792527361525a7cb78c"}, - {file = "pywinpty-0.5.7.tar.gz", hash = "sha256:2d7e9c881638a72ffdca3f5417dd1563b60f603e1b43e5895674c2a1b01f95a0"}, -] -pyzmq = [ - {file = "pyzmq-19.0.2-cp27-cp27m-macosx_10_9_intel.whl", hash = "sha256:59f1e54627483dcf61c663941d94c4af9bf4163aec334171686cdaee67974fe5"}, - {file = "pyzmq-19.0.2-cp27-cp27m-win32.whl", hash = "sha256:c36ffe1e5aa35a1af6a96640d723d0d211c5f48841735c2aa8d034204e87eb87"}, - {file = "pyzmq-19.0.2-cp27-cp27m-win_amd64.whl", hash = "sha256:0a422fc290d03958899743db091f8154958410fc76ce7ee0ceb66150f72c2c97"}, - {file = "pyzmq-19.0.2-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:c20dd60b9428f532bc59f2ef6d3b1029a28fc790d408af82f871a7db03e722ff"}, - {file = "pyzmq-19.0.2-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:d46fb17f5693244de83e434648b3dbb4f4b0fec88415d6cbab1c1452b6f2ae17"}, - {file = "pyzmq-19.0.2-cp35-cp35m-macosx_10_9_intel.whl", hash = "sha256:f1a25a61495b6f7bb986accc5b597a3541d9bd3ef0016f50be16dbb32025b302"}, - {file = "pyzmq-19.0.2-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:ab0d01148d13854de716786ca73701012e07dff4dfbbd68c4e06d8888743526e"}, - {file = "pyzmq-19.0.2-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:720d2b6083498a9281eaee3f2927486e9fe02cd16d13a844f2e95217f243efea"}, - {file = "pyzmq-19.0.2-cp35-cp35m-win32.whl", hash = "sha256:29d51279060d0a70f551663bc592418bcad7f4be4eea7b324f6dd81de05cb4c1"}, - {file = "pyzmq-19.0.2-cp35-cp35m-win_amd64.whl", hash = "sha256:5120c64646e75f6db20cc16b9a94203926ead5d633de9feba4f137004241221d"}, - {file = "pyzmq-19.0.2-cp36-cp36m-macosx_10_9_intel.whl", hash = "sha256:8a6ada5a3f719bf46a04ba38595073df8d6b067316c011180102ba2a1925f5b5"}, - {file = "pyzmq-19.0.2-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:fa411b1d8f371d3a49d31b0789eb6da2537dadbb2aef74a43aa99a78195c3f76"}, - {file = "pyzmq-19.0.2-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:00dca814469436455399660247d74045172955459c0bd49b54a540ce4d652185"}, - {file = "pyzmq-19.0.2-cp36-cp36m-win32.whl", hash = "sha256:046b92e860914e39612e84fa760fc3f16054d268c11e0e25dcb011fb1bc6a075"}, - {file = "pyzmq-19.0.2-cp36-cp36m-win_amd64.whl", hash = "sha256:99cc0e339a731c6a34109e5c4072aaa06d8e32c0b93dc2c2d90345dd45fa196c"}, - {file = "pyzmq-19.0.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e36f12f503511d72d9bdfae11cadbadca22ff632ff67c1b5459f69756a029c19"}, - {file = "pyzmq-19.0.2-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:c40fbb2b9933369e994b837ee72193d6a4c35dfb9a7c573257ef7ff28961272c"}, - {file = "pyzmq-19.0.2-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:5d9fc809aa8d636e757e4ced2302569d6e60e9b9c26114a83f0d9d6519c40493"}, - {file = "pyzmq-19.0.2-cp37-cp37m-win32.whl", hash = "sha256:3fa6debf4bf9412e59353defad1f8035a1e68b66095a94ead8f7a61ae90b2675"}, - {file = "pyzmq-19.0.2-cp37-cp37m-win_amd64.whl", hash = "sha256:73483a2caaa0264ac717af33d6fb3f143d8379e60a422730ee8d010526ce1913"}, - {file = "pyzmq-19.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:36ab114021c0cab1a423fe6689355e8f813979f2c750968833b318c1fa10a0fd"}, - {file = "pyzmq-19.0.2-cp38-cp38-manylinux1_i686.whl", hash = "sha256:8b66b94fe6243d2d1d89bca336b2424399aac57932858b9a30309803ffc28112"}, - {file = "pyzmq-19.0.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:654d3e06a4edc566b416c10293064732516cf8871a4522e0a2ba00cc2a2e600c"}, - {file = "pyzmq-19.0.2-cp38-cp38-win32.whl", hash = "sha256:276ad604bffd70992a386a84bea34883e696a6b22e7378053e5d3227321d9702"}, - {file = "pyzmq-19.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:09d24a80ccb8cbda1af6ed8eb26b005b6743e58e9290566d2a6841f4e31fa8e0"}, - {file = "pyzmq-19.0.2-pp27-pypy_73-macosx_10_9_x86_64.whl", hash = "sha256:c1a31cd42905b405530e92bdb70a8a56f048c8a371728b8acf9d746ecd4482c0"}, - {file = "pyzmq-19.0.2-pp36-pypy36_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a7e7f930039ee0c4c26e4dfee015f20bd6919cd8b97c9cd7afbde2923a5167b6"}, - {file = "pyzmq-19.0.2.tar.gz", hash = "sha256:296540a065c8c21b26d63e3cea2d1d57902373b16e4256afe46422691903a438"}, -] -regex = [ - {file = "regex-2020.7.14-cp27-cp27m-win32.whl", hash = "sha256:e46d13f38cfcbb79bfdb2964b0fe12561fe633caf964a77a5f8d4e45fe5d2ef7"}, - {file = "regex-2020.7.14-cp27-cp27m-win_amd64.whl", hash = "sha256:6961548bba529cac7c07af2fd4d527c5b91bb8fe18995fed6044ac22b3d14644"}, - {file = "regex-2020.7.14-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:c50a724d136ec10d920661f1442e4a8b010a4fe5aebd65e0c2241ea41dbe93dc"}, - {file = "regex-2020.7.14-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:8a51f2c6d1f884e98846a0a9021ff6861bdb98457879f412fdc2b42d14494067"}, - {file = "regex-2020.7.14-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:9c568495e35599625f7b999774e29e8d6b01a6fb684d77dee1f56d41b11b40cd"}, - {file = "regex-2020.7.14-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:51178c738d559a2d1071ce0b0f56e57eb315bcf8f7d4cf127674b533e3101f88"}, - {file = "regex-2020.7.14-cp36-cp36m-win32.whl", hash = "sha256:9eddaafb3c48e0900690c1727fba226c4804b8e6127ea409689c3bb492d06de4"}, - {file = "regex-2020.7.14-cp36-cp36m-win_amd64.whl", hash = "sha256:14a53646369157baa0499513f96091eb70382eb50b2c82393d17d7ec81b7b85f"}, - {file = "regex-2020.7.14-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:1269fef3167bb52631ad4fa7dd27bf635d5a0790b8e6222065d42e91bede4162"}, - {file = "regex-2020.7.14-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:d0a5095d52b90ff38592bbdc2644f17c6d495762edf47d876049cfd2968fbccf"}, - {file = "regex-2020.7.14-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:4c037fd14c5f4e308b8370b447b469ca10e69427966527edcab07f52d88388f7"}, - {file = "regex-2020.7.14-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:bc3d98f621898b4a9bc7fecc00513eec8f40b5b83913d74ccb445f037d58cd89"}, - {file = "regex-2020.7.14-cp37-cp37m-win32.whl", hash = "sha256:46bac5ca10fb748d6c55843a931855e2727a7a22584f302dd9bb1506e69f83f6"}, - {file = "regex-2020.7.14-cp37-cp37m-win_amd64.whl", hash = "sha256:0dc64ee3f33cd7899f79a8d788abfbec168410be356ed9bd30bbd3f0a23a7204"}, - {file = "regex-2020.7.14-cp38-cp38-manylinux1_i686.whl", hash = "sha256:5ea81ea3dbd6767873c611687141ec7b06ed8bab43f68fad5b7be184a920dc99"}, - {file = "regex-2020.7.14-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:bbb332d45b32df41200380fff14712cb6093b61bd142272a10b16778c418e98e"}, - {file = "regex-2020.7.14-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:c11d6033115dc4887c456565303f540c44197f4fc1a2bfb192224a301534888e"}, - {file = "regex-2020.7.14-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:75aaa27aa521a182824d89e5ab0a1d16ca207318a6b65042b046053cfc8ed07a"}, - {file = "regex-2020.7.14-cp38-cp38-win32.whl", hash = "sha256:d6cff2276e502b86a25fd10c2a96973fdb45c7a977dca2138d661417f3728341"}, - {file = "regex-2020.7.14-cp38-cp38-win_amd64.whl", hash = "sha256:7a2dd66d2d4df34fa82c9dc85657c5e019b87932019947faece7983f2089a840"}, - {file = "regex-2020.7.14.tar.gz", hash = "sha256:3a3af27a8d23143c49a3420efe5b3f8cf1a48c6fc8bc6856b03f638abc1833bb"}, -] -requests = [ - {file = "requests-2.24.0-py2.py3-none-any.whl", hash = "sha256:fe75cc94a9443b9246fc7049224f75604b113c36acb93f87b80ed42c44cbb898"}, - {file = "requests-2.24.0.tar.gz", hash = "sha256:b3559a131db72c33ee969480840fff4bb6dd111de7dd27c8ee1f820f4f00231b"}, -] -rpy2 = [ - {file = "rpy2-2.8.6.tar.gz", hash = "sha256:004d13734a7b9a85cbc1e7a93ec87df741e28db1273ab5b0d9efaac04a9c5f98"}, -] -savreaderwriter = [ - {file = "savReaderWriter-3.4.2.tar.gz", hash = "sha256:868fe96db95706eb17168f9ccb5d5827e3bf9e7f11bb6ab6b47970654d980e89"}, -] -scikit-learn = [ - {file = "scikit-learn-0.23.2.tar.gz", hash = "sha256:20766f515e6cd6f954554387dfae705d93c7b544ec0e6c6a5d8e006f6f7ef480"}, - {file = "scikit_learn-0.23.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:98508723f44c61896a4e15894b2016762a55555fbf09365a0bb1870ecbd442de"}, - {file = "scikit_learn-0.23.2-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:a64817b050efd50f9abcfd311870073e500ae11b299683a519fbb52d85e08d25"}, - {file = "scikit_learn-0.23.2-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:daf276c465c38ef736a79bd79fc80a249f746bcbcae50c40945428f7ece074f8"}, - {file = "scikit_learn-0.23.2-cp36-cp36m-win32.whl", hash = "sha256:cb3e76380312e1f86abd20340ab1d5b3cc46a26f6593d3c33c9ea3e4c7134028"}, - {file = "scikit_learn-0.23.2-cp36-cp36m-win_amd64.whl", hash = "sha256:0a127cc70990d4c15b1019680bfedc7fec6c23d14d3719fdf9b64b22d37cdeca"}, - {file = "scikit_learn-0.23.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2aa95c2f17d2f80534156215c87bee72b6aa314a7f8b8fe92a2d71f47280570d"}, - {file = "scikit_learn-0.23.2-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:6c28a1d00aae7c3c9568f61aafeaad813f0f01c729bee4fd9479e2132b215c1d"}, - {file = "scikit_learn-0.23.2-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:da8e7c302003dd765d92a5616678e591f347460ac7b53e53d667be7dfe6d1b10"}, - {file = "scikit_learn-0.23.2-cp37-cp37m-win32.whl", hash = "sha256:d9a1ce5f099f29c7c33181cc4386660e0ba891b21a60dc036bf369e3a3ee3aec"}, - {file = "scikit_learn-0.23.2-cp37-cp37m-win_amd64.whl", hash = "sha256:914ac2b45a058d3f1338d7736200f7f3b094857758895f8667be8a81ff443b5b"}, - {file = "scikit_learn-0.23.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7671bbeddd7f4f9a6968f3b5442dac5f22bf1ba06709ef888cc9132ad354a9ab"}, - {file = "scikit_learn-0.23.2-cp38-cp38-manylinux1_i686.whl", hash = "sha256:d0dcaa54263307075cb93d0bee3ceb02821093b1b3d25f66021987d305d01dce"}, - {file = "scikit_learn-0.23.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:5ce7a8021c9defc2b75620571b350acc4a7d9763c25b7593621ef50f3bd019a2"}, - {file = "scikit_learn-0.23.2-cp38-cp38-win32.whl", hash = "sha256:0d39748e7c9669ba648acf40fb3ce96b8a07b240db6888563a7cb76e05e0d9cc"}, - {file = "scikit_learn-0.23.2-cp38-cp38-win_amd64.whl", hash = "sha256:1b8a391de95f6285a2f9adffb7db0892718950954b7149a70c783dc848f104ea"}, -] -scipy = [ - {file = "scipy-1.5.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:cca9fce15109a36a0a9f9cfc64f870f1c140cb235ddf27fe0328e6afb44dfed0"}, - {file = "scipy-1.5.2-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:1c7564a4810c1cd77fcdee7fa726d7d39d4e2695ad252d7c86c3ea9d85b7fb8f"}, - {file = "scipy-1.5.2-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:07e52b316b40a4f001667d1ad4eb5f2318738de34597bd91537851365b6c61f1"}, - {file = "scipy-1.5.2-cp36-cp36m-win32.whl", hash = "sha256:d56b10d8ed72ec1be76bf10508446df60954f08a41c2d40778bc29a3a9ad9bce"}, - {file = "scipy-1.5.2-cp36-cp36m-win_amd64.whl", hash = "sha256:8e28e74b97fc8d6aa0454989db3b5d36fc27e69cef39a7ee5eaf8174ca1123cb"}, - {file = "scipy-1.5.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6e86c873fe1335d88b7a4bfa09d021f27a9e753758fd75f3f92d714aa4093768"}, - {file = "scipy-1.5.2-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:a0afbb967fd2c98efad5f4c24439a640d39463282040a88e8e928db647d8ac3d"}, - {file = "scipy-1.5.2-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:eecf40fa87eeda53e8e11d265ff2254729d04000cd40bae648e76ff268885d66"}, - {file = "scipy-1.5.2-cp37-cp37m-win32.whl", hash = "sha256:315aa2165aca31375f4e26c230188db192ed901761390be908c9b21d8b07df62"}, - {file = "scipy-1.5.2-cp37-cp37m-win_amd64.whl", hash = "sha256:ec5fe57e46828d034775b00cd625c4a7b5c7d2e354c3b258d820c6c72212a6ec"}, - {file = "scipy-1.5.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:fc98f3eac993b9bfdd392e675dfe19850cc8c7246a8fd2b42443e506344be7d9"}, - {file = "scipy-1.5.2-cp38-cp38-manylinux1_i686.whl", hash = "sha256:a785409c0fa51764766840185a34f96a0a93527a0ff0230484d33a8ed085c8f8"}, - {file = "scipy-1.5.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:0a0e9a4e58a4734c2eba917f834b25b7e3b6dc333901ce7784fd31aefbd37b2f"}, - {file = "scipy-1.5.2-cp38-cp38-win32.whl", hash = "sha256:dac09281a0eacd59974e24525a3bc90fa39b4e95177e638a31b14db60d3fa806"}, - {file = "scipy-1.5.2-cp38-cp38-win_amd64.whl", hash = "sha256:92eb04041d371fea828858e4fff182453c25ae3eaa8782d9b6c32b25857d23bc"}, - {file = "scipy-1.5.2.tar.gz", hash = "sha256:066c513d90eb3fd7567a9e150828d39111ebd88d3e924cdfc9f8ce19ab6f90c9"}, -] -seaborn = [ - {file = "seaborn-0.10.1-py3-none-any.whl", hash = "sha256:c901ce494541fb4714cfa7db79d0232dc3f4c4dfd3f273bacf17816084df5b53"}, - {file = "seaborn-0.10.1.tar.gz", hash = "sha256:2d1a0c9d6bd1bc3cadb0364b8f06540f51322a670cf8438d0fde1c1c7317adc0"}, -] -send2trash = [ - {file = "Send2Trash-1.5.0-py3-none-any.whl", hash = "sha256:f1691922577b6fa12821234aeb57599d887c4900b9ca537948d2dac34aea888b"}, - {file = "Send2Trash-1.5.0.tar.gz", hash = "sha256:60001cc07d707fe247c94f74ca6ac0d3255aabcb930529690897ca2a39db28b2"}, -] -six = [ - {file = "six-1.15.0-py2.py3-none-any.whl", hash = "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"}, - {file = "six-1.15.0.tar.gz", hash = "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259"}, -] -sklearn = [ - {file = "sklearn-0.0.tar.gz", hash = "sha256:e23001573aa194b834122d2b9562459bf5ae494a2d59ca6b8aa22c85a44c0e31"}, -] -terminado = [ - {file = "terminado-0.8.3-py2.py3-none-any.whl", hash = "sha256:a43dcb3e353bc680dd0783b1d9c3fc28d529f190bc54ba9a229f72fe6e7a54d7"}, - {file = "terminado-0.8.3.tar.gz", hash = "sha256:4804a774f802306a7d9af7322193c5390f1da0abb429e082a10ef1d46e6fb2c2"}, -] -testpath = [ - {file = "testpath-0.4.4-py2.py3-none-any.whl", hash = "sha256:bfcf9411ef4bf3db7579063e0546938b1edda3d69f4e1fb8756991f5951f85d4"}, - {file = "testpath-0.4.4.tar.gz", hash = "sha256:60e0a3261c149755f4399a1fff7d37523179a70fdc3abdf78de9fc2604aeec7e"}, -] -threadpoolctl = [ - {file = "threadpoolctl-2.1.0-py3-none-any.whl", hash = "sha256:38b74ca20ff3bb42caca8b00055111d74159ee95c4370882bbff2b93d24da725"}, - {file = "threadpoolctl-2.1.0.tar.gz", hash = "sha256:ddc57c96a38beb63db45d6c159b5ab07b6bced12c45a1f07b2b92f272aebfa6b"}, -] -toml = [ - {file = "toml-0.10.1-py2.py3-none-any.whl", hash = "sha256:bda89d5935c2eac546d648028b9901107a595863cb36bae0c73ac804a9b4ce88"}, - {file = "toml-0.10.1.tar.gz", hash = "sha256:926b612be1e5ce0634a2ca03470f95169cf16f939018233a670519cb4ac58b0f"}, -] -tornado = [ - {file = "tornado-6.0.4-cp35-cp35m-win32.whl", hash = "sha256:5217e601700f24e966ddab689f90b7ea4bd91ff3357c3600fa1045e26d68e55d"}, - {file = "tornado-6.0.4-cp35-cp35m-win_amd64.whl", hash = "sha256:c98232a3ac391f5faea6821b53db8db461157baa788f5d6222a193e9456e1740"}, - {file = "tornado-6.0.4-cp36-cp36m-win32.whl", hash = "sha256:5f6a07e62e799be5d2330e68d808c8ac41d4a259b9cea61da4101b83cb5dc673"}, - {file = "tornado-6.0.4-cp36-cp36m-win_amd64.whl", hash = "sha256:c952975c8ba74f546ae6de2e226ab3cc3cc11ae47baf607459a6728585bb542a"}, - {file = "tornado-6.0.4-cp37-cp37m-win32.whl", hash = "sha256:2c027eb2a393d964b22b5c154d1a23a5f8727db6fda837118a776b29e2b8ebc6"}, - {file = "tornado-6.0.4-cp37-cp37m-win_amd64.whl", hash = "sha256:5618f72e947533832cbc3dec54e1dffc1747a5cb17d1fd91577ed14fa0dc081b"}, - {file = "tornado-6.0.4-cp38-cp38-win32.whl", hash = "sha256:22aed82c2ea340c3771e3babc5ef220272f6fd06b5108a53b4976d0d722bcd52"}, - {file = "tornado-6.0.4-cp38-cp38-win_amd64.whl", hash = "sha256:c58d56003daf1b616336781b26d184023ea4af13ae143d9dda65e31e534940b9"}, - {file = "tornado-6.0.4.tar.gz", hash = "sha256:0fe2d45ba43b00a41cd73f8be321a44936dc1aba233dee979f17a042b83eb6dc"}, -] -traitlets = [ - {file = "traitlets-4.3.3-py2.py3-none-any.whl", hash = "sha256:70b4c6a1d9019d7b4f6846832288f86998aa3b9207c6821f3578a6a6a467fe44"}, - {file = "traitlets-4.3.3.tar.gz", hash = "sha256:d023ee369ddd2763310e4c3eae1ff649689440d4ae59d7485eb4cfbbe3e359f7"}, -] -typed-ast = [ - {file = "typed_ast-1.4.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:73d785a950fc82dd2a25897d525d003f6378d1cb23ab305578394694202a58c3"}, - {file = "typed_ast-1.4.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:aaee9905aee35ba5905cfb3c62f3e83b3bec7b39413f0a7f19be4e547ea01ebb"}, - {file = "typed_ast-1.4.1-cp35-cp35m-win32.whl", hash = "sha256:0c2c07682d61a629b68433afb159376e24e5b2fd4641d35424e462169c0a7919"}, - {file = "typed_ast-1.4.1-cp35-cp35m-win_amd64.whl", hash = "sha256:4083861b0aa07990b619bd7ddc365eb7fa4b817e99cf5f8d9cf21a42780f6e01"}, - {file = "typed_ast-1.4.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:269151951236b0f9a6f04015a9004084a5ab0d5f19b57de779f908621e7d8b75"}, - {file = "typed_ast-1.4.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:24995c843eb0ad11a4527b026b4dde3da70e1f2d8806c99b7b4a7cf491612652"}, - {file = "typed_ast-1.4.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:fe460b922ec15dd205595c9b5b99e2f056fd98ae8f9f56b888e7a17dc2b757e7"}, - {file = "typed_ast-1.4.1-cp36-cp36m-win32.whl", hash = "sha256:4e3e5da80ccbebfff202a67bf900d081906c358ccc3d5e3c8aea42fdfdfd51c1"}, - {file = "typed_ast-1.4.1-cp36-cp36m-win_amd64.whl", hash = "sha256:249862707802d40f7f29f6e1aad8d84b5aa9e44552d2cc17384b209f091276aa"}, - {file = "typed_ast-1.4.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8ce678dbaf790dbdb3eba24056d5364fb45944f33553dd5869b7580cdbb83614"}, - {file = "typed_ast-1.4.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:c9e348e02e4d2b4a8b2eedb48210430658df6951fa484e59de33ff773fbd4b41"}, - {file = "typed_ast-1.4.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:bcd3b13b56ea479b3650b82cabd6b5343a625b0ced5429e4ccad28a8973f301b"}, - {file = "typed_ast-1.4.1-cp37-cp37m-win32.whl", hash = "sha256:d5d33e9e7af3b34a40dc05f498939f0ebf187f07c385fd58d591c533ad8562fe"}, - {file = "typed_ast-1.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:0666aa36131496aed8f7be0410ff974562ab7eeac11ef351def9ea6fa28f6355"}, - {file = "typed_ast-1.4.1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:d205b1b46085271b4e15f670058ce182bd1199e56b317bf2ec004b6a44f911f6"}, - {file = "typed_ast-1.4.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:6daac9731f172c2a22ade6ed0c00197ee7cc1221aa84cfdf9c31defeb059a907"}, - {file = "typed_ast-1.4.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:498b0f36cc7054c1fead3d7fc59d2150f4d5c6c56ba7fb150c013fbc683a8d2d"}, - {file = "typed_ast-1.4.1-cp38-cp38-win32.whl", hash = "sha256:715ff2f2df46121071622063fc7543d9b1fd19ebfc4f5c8895af64a77a8c852c"}, - {file = "typed_ast-1.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:fc0fea399acb12edbf8a628ba8d2312f583bdbdb3335635db062fa98cf71fca4"}, - {file = "typed_ast-1.4.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:d43943ef777f9a1c42bf4e552ba23ac77a6351de620aa9acf64ad54933ad4d34"}, - {file = "typed_ast-1.4.1.tar.gz", hash = "sha256:8c8aaad94455178e3187ab22c8b01a3837f8ee50e09cf31f1ba129eb293ec30b"}, -] -urllib3 = [ - {file = "urllib3-1.25.10-py2.py3-none-any.whl", hash = "sha256:e7983572181f5e1522d9c98453462384ee92a0be7fac5f1413a1e35c56cc0461"}, - {file = "urllib3-1.25.10.tar.gz", hash = "sha256:91056c15fa70756691db97756772bb1eb9678fa585d9184f24534b100dc60f4a"}, -] -wcwidth = [ - {file = "wcwidth-0.2.5-py2.py3-none-any.whl", hash = "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784"}, - {file = "wcwidth-0.2.5.tar.gz", hash = "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83"}, -] -webencodings = [ - {file = "webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78"}, - {file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"}, -] -zipp = [ - {file = "zipp-3.1.0-py3-none-any.whl", hash = "sha256:aa36550ff0c0b7ef7fa639055d797116ee891440eac1a56f378e2d3179e0320b"}, - {file = "zipp-3.1.0.tar.gz", hash = "sha256:c599e4d75c98f6798c509911d08a22e6c021d074469042177c8c86fb92eefd96"}, -] diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index b04cc768ab008c7890811709a27e18aeab517540..0000000000000000000000000000000000000000 --- a/pyproject.toml +++ /dev/null @@ -1,37 +0,0 @@ -[build-system] -build-backend = "poetry.masonry.api" -requires = ["poetry>=0.12"] - -[tool.poetry] -name = "tidy-data" -version = "0.1.0" - -authors = ["Alexander Hess "] -description = "A Python implementation for Hadley Wickham's Tidy Data paper" -keywords = [ - "data-cleaning", - "data-science", - "messy-data", - "python", - "tidy-data", -] -license = "MIT" - -[tool.poetry.dependencies] -python = "^3.7" - -# Data Science Tools -jupyterlab = "^2.2.6" -matplotlib = "^3.3.1" -numpy = "^1.19.1" -pandas = "^1.1.1" -seaborn = "^0.10.1" -sklearn = "^0.0" - -# Interfaces to other tools -rpy2 = "==2.8.*" # R support -savreaderwriter = "^3.4.2" # IBM SPSS support - -# Code Formatters -black = "^19.10b0" -nb_black = "^1.0.7"