Merge branch 'develop' into main

This commit is contained in:
Alexander Hess 2024-07-15 12:16:36 +02:00
commit cc56869d98
Signed by: alexander
GPG key ID: 344EA5AB10D868E0
18 changed files with 2803 additions and 1439 deletions

View file

@ -536,9 +536,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "intro-to-data-science",
"language": "python",
"name": "python3"
"name": "intro-to-data-science"
},
"language_info": {
"codemirror_mode": {
@ -550,7 +550,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
"version": "3.12.4"
},
"toc": {
"base_numbering": 1,

View file

@ -149,9 +149,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "intro-to-data-science",
"language": "python",
"name": "python3"
"name": "intro-to-data-science"
},
"language_info": {
"codemirror_mode": {
@ -163,7 +163,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
"version": "3.12.4"
},
"toc": {
"base_numbering": 1,

View file

@ -507,7 +507,7 @@
"\n",
"The indented line constitues the `for`-loop's body. In the example, we simply take each of the numbers in `numbers`, one at a time, and add it to a `total` that is initialized at `0`. In other words, we calculate the sum of all the elements in `numbers`.\n",
"\n",
"Many beginners struggle with the term \"loop.\" To visualize the looping behavior of this code, we use the online tool [PythonTutor <img height=\"12\" style=\"display: inline-block\" src=\"../static/link/to_py.png\">](http://pythontutor.com/visualize.html#code=numbers%20%3D%20%5B1,%202,%203,%204%5D%0A%0Atotal%20%3D%200%0A%0Afor%20number%20in%20numbers%3A%0A%20%20%20%20total%20%3D%20total%20%2B%20number%0A%0Atotal&cumulative=false&curInstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false). That tool is helpful for two reasons:\n",
"Many beginners struggle with the term \"loop.\" To visualize the looping behavior of this code, we use the online tool [PythonTutor <img height=\"12\" style=\"display: inline-block\" src=\"../static/link/to_py.png\">](http://pythontutor.com/visualize.html#code=numbers%20%3D%20%5B1,%202,%203,%204%5D%0A%0Atotal%20%3D%200%0A%0Afor%20number%20in%20numbers%3A%0A%20%20%20%20total%20%3D%20total%20%2B%20number%0A%0Atotal&cumulative=false&curstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false). That tool is helpful for two reasons:\n",
"1. It allows us to execute code in \"slow motion\" (i.e., by clicking the \"next\" button on the left side, only the next atomic step of the code snippet is executed).\n",
"2. It shows what happens inside the computer's memory on the right-hand side."
]
@ -999,9 +999,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "intro-to-data-science",
"language": "python",
"name": "python3"
"name": "intro-to-data-science"
},
"language_info": {
"codemirror_mode": {
@ -1013,7 +1013,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
"version": "3.12.4"
},
"toc": {
"base_numbering": 1,

View file

@ -178,9 +178,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "intro-to-data-science",
"language": "python",
"name": "python3"
"name": "intro-to-data-science"
},
"language_info": {
"codemirror_mode": {
@ -192,7 +192,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
"version": "3.12.4"
},
"toc": {
"base_numbering": 1,

View file

@ -112,9 +112,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "intro-to-data-science",
"language": "python",
"name": "python3"
"name": "intro-to-data-science"
},
"language_info": {
"codemirror_mode": {
@ -126,7 +126,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
"version": "3.12.4"
},
"toc": {
"base_numbering": 1,

View file

@ -73,7 +73,7 @@
"\n",
"Let's execute the function with `numbers` as the input. We see the same `6` below the cell as we do above where we run the code without a function. Without the `return` statement in the function's body, we would not see any output here.\n",
"\n",
"To see what happens in detail, take a look at [PythonTutor <img height=\"12\" style=\"display: inline-block\" src=\"../static/link/to_py.png\">](https://pythontutor.com/visualize.html#code=numbers%20%3D%20%5B1,%202,%203,%204%5D%0A%0Adef%20add_evens%28numbers%29%3A%0A%20%20%20%20%22%22%22Sum%20up%20all%20the%20even%20numbers%20in%20a%20list.%22%22%22%0A%20%20%20%20result%20%3D%200%0A%0A%20%20%20%20for%20number%20in%20numbers%3A%0A%20%20%20%20%20%20%20%20if%20number%20%25%202%20%3D%3D%200%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20result%20%3D%20result%20%2B%20number%0A%0A%20%20%20%20return%20result%0A%0Atotal%20%3D%20add_evens%28numbers%29&cumulative=false&curInstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false) again. You should notice how there are two variables by the name `numbers` in memory. Python manages the memory with a concept called **namespaces** or **scopes**, which are just fancy terms for saying that Python can tell variables from different contexts apart."
"To see what happens in detail, take a look at [PythonTutor <img height=\"12\" style=\"display: inline-block\" src=\"../static/link/to_py.png\">](https://pythontutor.com/visualize.html#code=numbers%20%3D%20%5B1,%202,%203,%204%5D%0A%0Adef%20add_evens%28numbers%29%3A%0A%20%20%20%20%22%22%22Sum%20up%20all%20the%20even%20numbers%20in%20a%20list.%22%22%22%0A%20%20%20%20result%20%3D%200%0A%0A%20%20%20%20for%20number%20in%20numbers%3A%0A%20%20%20%20%20%20%20%20if%20number%20%25%202%20%3D%3D%200%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20result%20%3D%20result%20%2B%20number%0A%0A%20%20%20%20return%20result%0A%0Atotal%20%3D%20add_evens%28numbers%29&cumulative=false&curstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false) again. You should notice how there are two variables by the name `numbers` in memory. Python manages the memory with a concept called **namespaces** or **scopes**, which are just fancy terms for saying that Python can tell variables from different contexts apart."
]
},
{
@ -151,7 +151,7 @@
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/tmp/user/1000/ipykernel_707190/1049141082.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"Cell \u001b[0;32mIn[5], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mresult\u001b[49m\n",
"\u001b[0;31mNameError\u001b[0m: name 'result' is not defined"
]
}
@ -418,21 +418,39 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"To access a function inside the [random <img height=\"12\" style=\"display: inline-block\" src=\"../static/link/to_py.png\">](https://docs.python.org/3/library/random.html) module, for example, the [random() <img height=\"12\" style=\"display: inline-block\" src=\"../static/link/to_py.png\">](https://docs.python.org/3/library/random.html#random.random) function, we use the `.` operator, formally called the attribute access operator. The [random() <img height=\"12\" style=\"display: inline-block\" src=\"../static/link/to_py.png\">](https://docs.python.org/3/library/random.html#random.random) function simply returns a random decimal number between `0` and `1`."
"To access a function inside the [random <img height=\"12\" style=\"display: inline-block\" src=\"../static/link/to_py.png\">](https://docs.python.org/3/library/random.html) module, for example, the [seed() <img height=\"12\" style=\"display: inline-block\" src=\"../static/link/to_py.png\">](https://docs.python.org/3/library/random.html#random.seed) function, we use the `.` operator, formally called the attribute access operator. \n",
"\n",
"We use [random.seed() <img height=\"12\" style=\"display: inline-block\" src=\"../static/link/to_py.png\">](https://docs.python.org/3/library/random.html#random.seed) to make the random numbers *replicable* on separate runs of this notebook."
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"random.seed(42)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The [random() <img height=\"12\" style=\"display: inline-block\" src=\"../static/link/to_py.png\">](https://docs.python.org/3/library/random.html#random.random) function simply returns a random decimal number between `0` and `1`."
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.7021021034327006"
"0.6394267984578837"
]
},
"execution_count": 16,
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
@ -450,16 +468,16 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
"True"
]
},
"execution_count": 17,
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
@ -477,7 +495,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 19,
"metadata": {},
"outputs": [
{
@ -486,7 +504,7 @@
"3"
]
},
"execution_count": 18,
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
@ -505,9 +523,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "intro-to-data-science",
"language": "python",
"name": "python3"
"name": "intro-to-data-science"
},
"language_info": {
"codemirror_mode": {
@ -519,7 +537,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
"version": "3.12.4"
},
"toc": {
"base_numbering": 1,

View file

@ -257,9 +257,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "intro-to-data-science",
"language": "python",
"name": "python3"
"name": "intro-to-data-science"
},
"language_info": {
"codemirror_mode": {
@ -271,7 +271,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
"version": "3.12.4"
},
"toc": {
"base_numbering": 1,

View file

@ -237,19 +237,18 @@
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "'int' object has no attribute 'is_integer'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/tmp/user/1000/ipykernel_306555/2418692311.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0ma\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m: 'int' object has no attribute 'is_integer'"
"data": {
"text/plain": [
"True"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a.is_integer()"
"a.is_integer() # Note: In Python versions < 3.12 this cell raises an `AttributeError`"
]
},
{
@ -494,7 +493,7 @@
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/tmp/user/1000/ipykernel_306555/2667408552.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmore_numbers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"Cell \u001b[0;32mIn[21], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mmore_numbers\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mappend\u001b[49m(\u001b[38;5;241m10\u001b[39m)\n",
"\u001b[0;31mAttributeError\u001b[0m: 'tuple' object has no attribute 'append'"
]
}
@ -607,7 +606,7 @@
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/tmp/user/1000/ipykernel_306555/3320204082.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mto_words\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"zero\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"Cell \u001b[0;32mIn[26], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mto_words\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mzero\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\n",
"\u001b[0;31mKeyError\u001b[0m: 'zero'"
]
}
@ -673,9 +672,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "intro-to-data-science",
"language": "python",
"name": "python3"
"name": "intro-to-data-science"
},
"language_info": {
"codemirror_mode": {
@ -687,7 +686,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
"version": "3.12.4"
},
"toc": {
"base_numbering": 1,

File diff suppressed because one or more lines are too long

View file

@ -244,9 +244,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "intro-to-data-science",
"language": "python",
"name": "python3"
"name": "intro-to-data-science"
},
"language_info": {
"codemirror_mode": {
@ -258,7 +258,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
"version": "3.12.4"
},
"toc": {
"base_numbering": 1,

View file

@ -32,17 +32,17 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: pandas in /home/webartifex/repos/intro-to-data-science/.venv/lib/python3.8/site-packages (1.3.3)\n",
"Requirement already satisfied: numpy>=1.17.3 in /home/webartifex/repos/intro-to-data-science/.venv/lib/python3.8/site-packages (from pandas) (1.21.1)\n",
"Requirement already satisfied: python-dateutil>=2.7.3 in /home/webartifex/repos/intro-to-data-science/.venv/lib/python3.8/site-packages (from pandas) (2.8.2)\n",
"Requirement already satisfied: pytz>=2017.3 in /home/webartifex/repos/intro-to-data-science/.venv/lib/python3.8/site-packages (from pandas) (2021.3)\n",
"Requirement already satisfied: six>=1.5 in /home/webartifex/repos/intro-to-data-science/.venv/lib/python3.8/site-packages (from python-dateutil>=2.7.3->pandas) (1.16.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
"Requirement already satisfied: pandas in /home/instructor/Repositories/intro-to-data-science/.venv/lib/python3.12/site-packages (2.2.2)\n",
"Requirement already satisfied: numpy>=1.26.0 in /home/instructor/Repositories/intro-to-data-science/.venv/lib/python3.12/site-packages (from pandas) (2.0.0)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in /home/instructor/Repositories/intro-to-data-science/.venv/lib/python3.12/site-packages (from pandas) (2.9.0.post0)\n",
"Requirement already satisfied: pytz>=2020.1 in /home/instructor/Repositories/intro-to-data-science/.venv/lib/python3.12/site-packages (from pandas) (2024.1)\n",
"Requirement already satisfied: tzdata>=2022.7 in /home/instructor/Repositories/intro-to-data-science/.venv/lib/python3.12/site-packages (from pandas) (2024.1)\n",
"Requirement already satisfied: six>=1.5 in /home/instructor/Repositories/intro-to-data-science/.venv/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n"
]
}
],
"source": [
"%pip install pandas"
"!pip install pandas"
]
},
{
@ -927,7 +927,7 @@
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 694 entries, 192594 to 211519\n",
"Index: 694 entries, 192594 to 211519\n",
"Data columns (total 19 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
@ -1852,10 +1852,7 @@
}
],
"source": [
"df.loc[\n",
" 200300:200800,\n",
" [\"o_street\", \"o_zip\", \"o_city\", \"o_latitude\", \"o_longitude\"]\n",
"]"
"df.loc[200300:200800, [\"o_street\", \"o_zip\", \"o_city\", \"o_latitude\", \"o_longitude\"]]"
]
},
{
@ -1882,7 +1879,7 @@
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 694 entries, 192594 to 211519\n",
"Index: 694 entries, 192594 to 211519\n",
"Data columns (total 19 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
@ -1982,11 +1979,13 @@
"metadata": {},
"outputs": [],
"source": [
"df = df.astype({\n",
"df = df.astype(\n",
" {\n",
" \"pickup_at\": \"datetime64[ns]\",\n",
" \"delivery_at\": \"datetime64[ns]\",\n",
" \"cancelled\": bool,\n",
"})"
" }\n",
")"
]
},
{
@ -2006,7 +2005,7 @@
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 694 entries, 192594 to 211519\n",
"Index: 694 entries, 192594 to 211519\n",
"Data columns (total 19 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
@ -2686,7 +2685,7 @@
"source": [
"df.loc[\n",
" max_a_table,\n",
" [\"customer_id\", \"d_street\", \"d_zip\", \"d_city\", \"d_latitude\", \"d_longitude\"]\n",
" [\"customer_id\", \"d_street\", \"d_zip\", \"d_city\", \"d_latitude\", \"d_longitude\"],\n",
"].head()"
]
},
@ -2821,12 +2820,10 @@
" max_a_table\n",
" &\n",
" (\n",
" (df[\"d_latitude\"] > 44.85)\n",
" |\n",
" (df[\"d_longitude\"] < -0.59)\n",
" (df[\"d_latitude\"] > 44.85) | (df[\"d_longitude\"] < -0.59)\n",
" )\n",
" ),\n",
" [\"customer_id\", \"d_street\", \"d_zip\", \"d_city\", \"d_latitude\", \"d_longitude\"]\n",
" [\"customer_id\", \"d_street\", \"d_zip\", \"d_city\", \"d_latitude\", \"d_longitude\"],\n",
"].head()"
]
},
@ -2933,12 +2930,8 @@
],
"source": [
"df.loc[\n",
" (\n",
" max_a_table\n",
" &\n",
" df[\"customer_id\"].isin([6037, 79900, 80095])\n",
" ),\n",
" [\"placed_at\", \"customer_id\", \"d_street\", \"d_zip\", \"d_city\", \"total\"]\n",
" (max_a_table & df[\"customer_id\"].isin([6037, 79900, 80095])),\n",
" [\"placed_at\", \"customer_id\", \"d_street\", \"d_zip\", \"d_city\", \"total\"],\n",
"].head()"
]
},
@ -3067,12 +3060,8 @@
],
"source": [
"df.loc[\n",
" (\n",
" max_a_table\n",
" &\n",
" ~df[\"customer_id\"].isin([6037, 79900, 80095])\n",
" ),\n",
" [\"placed_at\", \"customer_id\", \"d_street\", \"d_zip\", \"d_city\", \"total\"]\n",
" (max_a_table & ~df[\"customer_id\"].isin([6037, 79900, 80095])),\n",
" [\"placed_at\", \"customer_id\", \"d_street\", \"d_zip\", \"d_city\", \"total\"],\n",
"].head()"
]
},
@ -3166,10 +3155,7 @@
}
],
"source": [
"df.loc[\n",
" max_a_table,\n",
" \"customer_id\"\n",
"].unique()"
"df.loc[max_a_table, \"customer_id\"].unique()"
]
},
{
@ -3189,6 +3175,7 @@
{
"data": {
"text/plain": [
"restaurant_id\n",
"1254 78\n",
"1207 47\n",
"1204 39\n",
@ -3199,7 +3186,7 @@
"1249 23\n",
"1242 19\n",
"1221 18\n",
"Name: restaurant_id, dtype: int64"
"Name: count, dtype: int64"
]
},
"execution_count": 35,
@ -3219,6 +3206,7 @@
{
"data": {
"text/plain": [
"customer_id\n",
"73919 14\n",
"10298 12\n",
"6037 8\n",
@ -3229,7 +3217,7 @@
"76838 3\n",
"75905 3\n",
"74791 3\n",
"Name: customer_id, dtype: int64"
"Name: count, dtype: int64"
]
},
"execution_count": 36,
@ -3258,7 +3246,7 @@
{
"data": {
"text/plain": [
"15924.78"
"np.float64(15924.78)"
]
},
"execution_count": 37,
@ -3278,7 +3266,7 @@
{
"data": {
"text/plain": [
"885.0"
"np.float64(885.0)"
]
},
"execution_count": 38,
@ -3287,10 +3275,7 @@
}
],
"source": [
"df.loc[\n",
" max_a_table,\n",
" \"total\"\n",
"].sum() / 100"
"df.loc[max_a_table, \"total\"].sum() / 100"
]
},
{
@ -3301,7 +3286,7 @@
{
"data": {
"text/plain": [
"3.5"
"np.float64(3.5)"
]
},
"execution_count": 39,
@ -3321,7 +3306,7 @@
{
"data": {
"text/plain": [
"83.7"
"np.float64(83.7)"
]
},
"execution_count": 40,
@ -3341,7 +3326,7 @@
{
"data": {
"text/plain": [
"12.5"
"np.float64(12.5)"
]
},
"execution_count": 41,
@ -3350,10 +3335,7 @@
}
],
"source": [
"df.loc[\n",
" max_a_table,\n",
" \"total\"\n",
"].min() / 100"
"df.loc[max_a_table, \"total\"].min() / 100"
]
},
{
@ -3364,7 +3346,7 @@
{
"data": {
"text/plain": [
"60.0"
"np.float64(60.0)"
]
},
"execution_count": 42,
@ -3373,10 +3355,7 @@
}
],
"source": [
"df.loc[\n",
" max_a_table,\n",
" \"total\"\n",
"].max() / 100"
"df.loc[max_a_table, \"total\"].max() / 100"
]
},
{
@ -3387,7 +3366,7 @@
{
"data": {
"text/plain": [
"22.94636887608069"
"np.float64(22.94636887608069)"
]
},
"execution_count": 43,
@ -3407,7 +3386,7 @@
{
"data": {
"text/plain": [
"22.95"
"np.float64(22.95)"
]
},
"execution_count": 44,
@ -3427,7 +3406,7 @@
{
"data": {
"text/plain": [
"22.69"
"np.float64(22.69)"
]
},
"execution_count": 45,
@ -3436,18 +3415,15 @@
}
],
"source": [
"df.loc[\n",
" max_a_table,\n",
" \"total\"\n",
"].mean().round() / 100"
"df.loc[max_a_table, \"total\"].mean().round() / 100"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "intro-to-data-science",
"language": "python",
"name": "python3"
"name": "intro-to-data-science"
},
"language_info": {
"codemirror_mode": {
@ -3459,7 +3435,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
"version": "3.12.4"
},
"toc": {
"base_numbering": 1,

View file

@ -56,7 +56,7 @@
" \"orders.csv\",\n",
" index_col=\"order_id\",\n",
" dtype={\"cancelled\": bool},\n",
" parse_dates=[\"placed_at\", \"pickup_at\", \"delivery_at\"]\n",
" parse_dates=[\"placed_at\", \"pickup_at\", \"delivery_at\"],\n",
")"
]
},
@ -180,9 +180,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "intro-to-data-science",
"language": "python",
"name": "python3"
"name": "intro-to-data-science"
},
"language_info": {
"codemirror_mode": {
@ -194,7 +194,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
"version": "3.12.4"
},
"toc": {
"base_numbering": 1,

File diff suppressed because one or more lines are too long

View file

@ -40,16 +40,21 @@ It is only expected that the student has:
### Getting started & Installation
To follow this workshop, an installation of **Python 3.8** or higher is expected.
To follow this workshop, an installation of **Python 3.9** or higher is expected.
A popular and beginner friendly way is
to install the [Anaconda Distribution](https://www.anaconda.com/products/individual)
to install the [Anaconda Distribution](https://www.anaconda.com/download)
that not only ships Python itself
but also comes pre-packaged with a lot of third-party libraries
including [Python's scientific stack](https://scipy.org/about.html).
including [Python's scientific stack](https://scipy.org/).
Detailed instructions can be found [here <img height="12" style="display: inline-block" src="static/link/to_gh.png">](https://github.com/webartifex/intro-to-python#installation).
If you are *not* using the Anaconda Distribution,
you must install the third-party libraries via the command
`pip install -r requirements.txt` (or something equivalent)
before working with the notebook files.
## Contributing

3529
poetry.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,10 +1,11 @@
[build-system]
requires = ["poetry-core>=1.0.0"]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "intro-to-data-science"
version = "0.1.0"
version = "0.3.0"
authors = [
"Alexander Hess <alexander@webartifex.biz>",
@ -25,13 +26,18 @@ readme = "README.md"
homepage = "https://github.com/webartifex/intro-to-data-science"
repository = "https://github.com/webartifex/intro-to-data-science"
package-mode = false
[tool.poetry.dependencies]
python = "^3.8"
python = "^3.9"
jupyterlab = "^3.1"
matplotlib = "^3.4"
numpy = "^1.21"
pandas = "^1.3"
scikit-learn = "^1.0"
jupyterlab = "^4.2"
matplotlib = "^3.9"
numpy = "^2.0"
pandas = "^2.2"
scikit-learn = "^1.5"
[tool.poetry.dev-dependencies]
[tool.poetry.group.dev.dependencies]
black = {extras = ["jupyter"], version = "^24.4"}
invoke = "^2.2"

115
requirements.txt Normal file
View file

@ -0,0 +1,115 @@
anyio==4.4.0 ; python_version >= "3.9" and python_version < "4.0"
appnope==0.1.4 ; python_version >= "3.9" and python_version < "4.0" and platform_system == "Darwin"
argon2-cffi-bindings==21.2.0 ; python_version >= "3.9" and python_version < "4.0"
argon2-cffi==23.1.0 ; python_version >= "3.9" and python_version < "4.0"
arrow==1.3.0 ; python_version >= "3.9" and python_version < "4.0"
asttokens==2.4.1 ; python_version >= "3.9" and python_version < "4.0"
async-lru==2.0.4 ; python_version >= "3.9" and python_version < "4.0"
attrs==23.2.0 ; python_version >= "3.9" and python_version < "4.0"
babel==2.15.0 ; python_version >= "3.9" and python_version < "4.0"
beautifulsoup4==4.12.3 ; python_version >= "3.9" and python_version < "4.0"
bleach==6.1.0 ; python_version >= "3.9" and python_version < "4.0"
certifi==2024.7.4 ; python_version >= "3.9" and python_version < "4.0"
cffi==1.16.0 ; python_version >= "3.9" and python_version < "4.0"
charset-normalizer==3.3.2 ; python_version >= "3.9" and python_version < "4.0"
colorama==0.4.6 ; python_version >= "3.9" and python_version < "4.0" and sys_platform == "win32"
comm==0.2.2 ; python_version >= "3.9" and python_version < "4.0"
contourpy==1.2.1 ; python_version >= "3.9" and python_version < "4.0"
cycler==0.12.1 ; python_version >= "3.9" and python_version < "4.0"
debugpy==1.8.2 ; python_version >= "3.9" and python_version < "4.0"
decorator==5.1.1 ; python_version >= "3.9" and python_version < "4.0"
defusedxml==0.7.1 ; python_version >= "3.9" and python_version < "4.0"
exceptiongroup==1.2.2 ; python_version >= "3.9" and python_version < "3.11"
executing==2.0.1 ; python_version >= "3.9" and python_version < "4.0"
fastjsonschema==2.20.0 ; python_version >= "3.9" and python_version < "4.0"
fonttools==4.53.1 ; python_version >= "3.9" and python_version < "4.0"
fqdn==1.5.1 ; python_version >= "3.9" and python_version < "4"
h11==0.14.0 ; python_version >= "3.9" and python_version < "4.0"
httpcore==1.0.5 ; python_version >= "3.9" and python_version < "4.0"
httpx==0.27.0 ; python_version >= "3.9" and python_version < "4.0"
idna==3.7 ; python_version >= "3.9" and python_version < "4.0"
importlib-metadata==8.0.0 ; python_version >= "3.9" and python_version < "3.10"
importlib-resources==6.4.0 ; python_version >= "3.9" and python_version < "3.10"
ipykernel==6.29.5 ; python_version >= "3.9" and python_version < "4.0"
ipython==8.18.1 ; python_version >= "3.9" and python_version < "4.0"
isoduration==20.11.0 ; python_version >= "3.9" and python_version < "4.0"
jedi==0.19.1 ; python_version >= "3.9" and python_version < "4.0"
jinja2==3.1.4 ; python_version >= "3.9" and python_version < "4.0"
joblib==1.4.2 ; python_version >= "3.9" and python_version < "4.0"
json5==0.9.25 ; python_version >= "3.9" and python_version < "4.0"
jsonpointer==3.0.0 ; python_version >= "3.9" and python_version < "4.0"
jsonschema-specifications==2023.12.1 ; python_version >= "3.9" and python_version < "4.0"
jsonschema==4.23.0 ; python_version >= "3.9" and python_version < "4.0"
jsonschema[format-nongpl]==4.23.0 ; python_version >= "3.9" and python_version < "4.0"
jupyter-client==8.6.2 ; python_version >= "3.9" and python_version < "4.0"
jupyter-core==5.7.2 ; python_version >= "3.9" and python_version < "4.0"
jupyter-events==0.10.0 ; python_version >= "3.9" and python_version < "4.0"
jupyter-lsp==2.2.5 ; python_version >= "3.9" and python_version < "4.0"
jupyter-server-terminals==0.5.3 ; python_version >= "3.9" and python_version < "4.0"
jupyter-server==2.14.2 ; python_version >= "3.9" and python_version < "4.0"
jupyterlab-pygments==0.3.0 ; python_version >= "3.9" and python_version < "4.0"
jupyterlab-server==2.27.2 ; python_version >= "3.9" and python_version < "4.0"
jupyterlab==4.2.3 ; python_version >= "3.9" and python_version < "4.0"
kiwisolver==1.4.5 ; python_version >= "3.9" and python_version < "4.0"
markupsafe==2.1.5 ; python_version >= "3.9" and python_version < "4.0"
matplotlib-inline==0.1.7 ; python_version >= "3.9" and python_version < "4.0"
matplotlib==3.9.1 ; python_version >= "3.9" and python_version < "4.0"
mistune==3.0.2 ; python_version >= "3.9" and python_version < "4.0"
nbclient==0.10.0 ; python_version >= "3.9" and python_version < "4.0"
nbconvert==7.16.4 ; python_version >= "3.9" and python_version < "4.0"
nbformat==5.10.4 ; python_version >= "3.9" and python_version < "4.0"
nest-asyncio==1.6.0 ; python_version >= "3.9" and python_version < "4.0"
notebook-shim==0.2.4 ; python_version >= "3.9" and python_version < "4.0"
numpy==2.0.0 ; python_version >= "3.9" and python_version < "4.0"
overrides==7.7.0 ; python_version >= "3.9" and python_version < "4.0"
packaging==24.1 ; python_version >= "3.9" and python_version < "4.0"
pandas==2.2.2 ; python_version >= "3.9" and python_version < "4.0"
pandocfilters==1.5.1 ; python_version >= "3.9" and python_version < "4.0"
parso==0.8.4 ; python_version >= "3.9" and python_version < "4.0"
pexpect==4.9.0 ; python_version >= "3.9" and python_version < "4.0" and sys_platform != "win32"
pillow==10.4.0 ; python_version >= "3.9" and python_version < "4.0"
platformdirs==4.2.2 ; python_version >= "3.9" and python_version < "4.0"
prometheus-client==0.20.0 ; python_version >= "3.9" and python_version < "4.0"
prompt-toolkit==3.0.47 ; python_version >= "3.9" and python_version < "4.0"
psutil==6.0.0 ; python_version >= "3.9" and python_version < "4.0"
ptyprocess==0.7.0 ; python_version >= "3.9" and python_version < "4.0" and (os_name != "nt" or sys_platform != "win32")
pure-eval==0.2.2 ; python_version >= "3.9" and python_version < "4.0"
pycparser==2.22 ; python_version >= "3.9" and python_version < "4.0"
pygments==2.18.0 ; python_version >= "3.9" and python_version < "4.0"
pyparsing==3.1.2 ; python_version >= "3.9" and python_version < "4.0"
python-dateutil==2.9.0.post0 ; python_version >= "3.9" and python_version < "4.0"
python-json-logger==2.0.7 ; python_version >= "3.9" and python_version < "4.0"
pytz==2024.1 ; python_version >= "3.9" and python_version < "4.0"
pywin32==306 ; sys_platform == "win32" and platform_python_implementation != "PyPy" and python_version >= "3.9" and python_version < "4.0"
pywinpty==2.0.13 ; python_version >= "3.9" and python_version < "4.0" and os_name == "nt"
pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "4.0"
pyzmq==26.0.3 ; python_version >= "3.9" and python_version < "4.0"
referencing==0.35.1 ; python_version >= "3.9" and python_version < "4.0"
requests==2.32.3 ; python_version >= "3.9" and python_version < "4.0"
rfc3339-validator==0.1.4 ; python_version >= "3.9" and python_version < "4.0"
rfc3986-validator==0.1.1 ; python_version >= "3.9" and python_version < "4.0"
rpds-py==0.19.0 ; python_version >= "3.9" and python_version < "4.0"
scikit-learn==1.5.1 ; python_version >= "3.9" and python_version < "4.0"
scipy==1.13.1 ; python_version >= "3.9" and python_version < "4.0"
send2trash==1.8.3 ; python_version >= "3.9" and python_version < "4.0"
setuptools==70.3.0 ; python_version >= "3.9" and python_version < "4.0"
six==1.16.0 ; python_version >= "3.9" and python_version < "4.0"
sniffio==1.3.1 ; python_version >= "3.9" and python_version < "4.0"
soupsieve==2.5 ; python_version >= "3.9" and python_version < "4.0"
stack-data==0.6.3 ; python_version >= "3.9" and python_version < "4.0"
terminado==0.18.1 ; python_version >= "3.9" and python_version < "4.0"
threadpoolctl==3.5.0 ; python_version >= "3.9" and python_version < "4.0"
tinycss2==1.3.0 ; python_version >= "3.9" and python_version < "4.0"
tomli==2.0.1 ; python_version >= "3.9" and python_version < "3.11"
tornado==6.4.1 ; python_version >= "3.9" and python_version < "4.0"
traitlets==5.14.3 ; python_version >= "3.9" and python_version < "4.0"
types-python-dateutil==2.9.0.20240316 ; python_version >= "3.9" and python_version < "4.0"
typing-extensions==4.12.2 ; python_version >= "3.9" and python_version < "3.11"
tzdata==2024.1 ; python_version >= "3.9" and python_version < "4.0"
uri-template==1.3.0 ; python_version >= "3.9" and python_version < "4.0"
urllib3==2.2.2 ; python_version >= "3.9" and python_version < "4.0"
wcwidth==0.2.13 ; python_version >= "3.9" and python_version < "4.0"
webcolors==24.6.0 ; python_version >= "3.9" and python_version < "4.0"
webencodings==0.5.1 ; python_version >= "3.9" and python_version < "4.0"
websocket-client==1.8.0 ; python_version >= "3.9" and python_version < "4.0"
zipp==3.19.2 ; python_version >= "3.9" and python_version < "3.10"

73
tasks.py Normal file
View file

@ -0,0 +1,73 @@
"""Maintenance tasks for the project."""
import json
import os
import sys
import tempfile
import tomllib
import invoke
try:
from jupyter_client import kernelspec
except ImportError:
raise RuntimeError('Install the "ipykernel" package first') from None
def _ensure_venv():
# Source: https://stackoverflow.com/questions/1871549/how-to-determine-if-python-is-running-inside-a-virtualenv # pylint:disable=C0301
if sys.prefix == sys.base_prefix:
raise RuntimeError("Run this command in an activated `virtualenv`")
def _get_pyproject_name():
with open("pyproject.toml", "rb") as fp:
data = tomllib.load(fp)
try:
project_name = data["tool"]["poetry"]["name"]
except KeyError:
raise RuntimeError('"pyproject.toml" seems to be malformed') from None
return project_name
@invoke.task
def install_kernel(_c):
"""Install the activated `virtualenv` as a `jupyter kernel`.
This helper task
"""
_ensure_venv()
project_name = _get_pyproject_name()
with tempfile.TemporaryDirectory() as tmpdir:
spec = {
"argv": [
sys.prefix + "/bin/python",
"-m",
"ipykernel_launcher",
"-f",
"{connection_file}",
],
"display_name": project_name,
"env": {"PATH": os.environ["PATH"]},
"interrupt_mode": "signal",
"language": "python",
"metadata": {"debugger": True},
}
with open(os.path.join(tmpdir, "kernel.json"), "w", encoding="utf-8") as fp:
json.dump(spec, fp, indent=4)
manager = kernelspec.KernelSpecManager()
manager.install_kernel_spec(tmpdir, kernel_name=project_name, user=True)
@invoke.task
def remove_kernel(_c):
"""Remove the `jupyter kernel` corresponding to the activated `virtualenv`."""
_ensure_venv()
project_name = _get_pyproject_name()
manager = kernelspec.KernelSpecManager()
manager.remove_kernel_spec(project_name)