Compare commits

...

18 commits

Author SHA1 Message Date
cc56869d98
Merge branch 'develop' into main 2024-07-15 12:16:36 +02:00
bbfec01401
Merge branch 'refurbish-project' into develop 2024-07-15 12:13:33 +02:00
51a5dcc8ee
Run black on all the notebooks
- we use black's default settings
- some cells are NOT kept in black's format to:
  - increase readability
  - or show Python's flexibility with regard to style
2024-07-15 12:12:51 +02:00
0ed024e020
Fix wrong symbol as cell magic 2024-07-15 12:12:51 +02:00
bd282f4528
Use higher resolution plots 2024-07-15 12:12:50 +02:00
f0d92ed229
Set random seeds where applicable 2024-07-15 12:12:50 +02:00
3125c82096
Run notebooks with updates and custom kernel 2024-07-15 12:12:50 +02:00
79a2e45e49
Add tasks to install and remove kernels 2024-07-15 12:12:49 +02:00
4ed5551d0e
Add invoke to the dev dependencies 2024-07-15 12:12:49 +02:00
2f99461c9c
Add black to the dev dependencies 2024-07-15 12:12:49 +02:00
401301e5dc
Add requirements.txt as an alternative to poetry.lock 2024-07-15 12:12:48 +02:00
75d4c22f0b
Update links 2024-07-15 12:12:48 +02:00
4d95a73ac3
Pin the dependencies 2024-07-15 12:12:48 +02:00
16c571d462
Update the (un-pinned) dependencies 2024-07-15 12:12:47 +02:00
906dfa0345
Use poetry's latest config style and options 2024-07-15 12:12:47 +02:00
f5fec203e1
Bump version 2024-07-15 09:10:17 +02:00
da3eb9344d
Merge branch 'develop' into main 2021-10-05 10:49:20 +02:00
297efe8298
Merge branch 'develop' into main 2021-05-25 03:14:31 +02:00
18 changed files with 2803 additions and 1439 deletions

View file

@ -123,7 +123,7 @@
}
],
"source": [
"2 ** 3"
"2**3"
]
},
{
@ -143,7 +143,7 @@
}
],
"source": [
"2 * 2 ** 3"
"2 * 2**3"
]
},
{
@ -536,9 +536,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "intro-to-data-science",
"language": "python",
"name": "python3"
"name": "intro-to-data-science"
},
"language_info": {
"codemirror_mode": {
@ -550,7 +550,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
"version": "3.12.4"
},
"toc": {
"base_numbering": 1,

View file

@ -149,9 +149,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "intro-to-data-science",
"language": "python",
"name": "python3"
"name": "intro-to-data-science"
},
"language_info": {
"codemirror_mode": {
@ -163,7 +163,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
"version": "3.12.4"
},
"toc": {
"base_numbering": 1,

View file

@ -507,7 +507,7 @@
"\n",
"The indented line constitues the `for`-loop's body. In the example, we simply take each of the numbers in `numbers`, one at a time, and add it to a `total` that is initialized at `0`. In other words, we calculate the sum of all the elements in `numbers`.\n",
"\n",
"Many beginners struggle with the term \"loop.\" To visualize the looping behavior of this code, we use the online tool [PythonTutor <img height=\"12\" style=\"display: inline-block\" src=\"../static/link/to_py.png\">](http://pythontutor.com/visualize.html#code=numbers%20%3D%20%5B1,%202,%203,%204%5D%0A%0Atotal%20%3D%200%0A%0Afor%20number%20in%20numbers%3A%0A%20%20%20%20total%20%3D%20total%20%2B%20number%0A%0Atotal&cumulative=false&curInstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false). That tool is helpful for two reasons:\n",
"Many beginners struggle with the term \"loop.\" To visualize the looping behavior of this code, we use the online tool [PythonTutor <img height=\"12\" style=\"display: inline-block\" src=\"../static/link/to_py.png\">](http://pythontutor.com/visualize.html#code=numbers%20%3D%20%5B1,%202,%203,%204%5D%0A%0Atotal%20%3D%200%0A%0Afor%20number%20in%20numbers%3A%0A%20%20%20%20total%20%3D%20total%20%2B%20number%0A%0Atotal&cumulative=false&curstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false). That tool is helpful for two reasons:\n",
"1. It allows us to execute code in \"slow motion\" (i.e., by clicking the \"next\" button on the left side, only the next atomic step of the code snippet is executed).\n",
"2. It shows what happens inside the computer's memory on the right-hand side."
]
@ -999,9 +999,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "intro-to-data-science",
"language": "python",
"name": "python3"
"name": "intro-to-data-science"
},
"language_info": {
"codemirror_mode": {
@ -1013,7 +1013,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
"version": "3.12.4"
},
"toc": {
"base_numbering": 1,

View file

@ -178,9 +178,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "intro-to-data-science",
"language": "python",
"name": "python3"
"name": "intro-to-data-science"
},
"language_info": {
"codemirror_mode": {
@ -192,7 +192,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
"version": "3.12.4"
},
"toc": {
"base_numbering": 1,

View file

@ -112,9 +112,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "intro-to-data-science",
"language": "python",
"name": "python3"
"name": "intro-to-data-science"
},
"language_info": {
"codemirror_mode": {
@ -126,7 +126,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
"version": "3.12.4"
},
"toc": {
"base_numbering": 1,

View file

@ -73,7 +73,7 @@
"\n",
"Let's execute the function with `numbers` as the input. We see the same `6` below the cell as we do above where we run the code without a function. Without the `return` statement in the function's body, we would not see any output here.\n",
"\n",
"To see what happens in detail, take a look at [PythonTutor <img height=\"12\" style=\"display: inline-block\" src=\"../static/link/to_py.png\">](https://pythontutor.com/visualize.html#code=numbers%20%3D%20%5B1,%202,%203,%204%5D%0A%0Adef%20add_evens%28numbers%29%3A%0A%20%20%20%20%22%22%22Sum%20up%20all%20the%20even%20numbers%20in%20a%20list.%22%22%22%0A%20%20%20%20result%20%3D%200%0A%0A%20%20%20%20for%20number%20in%20numbers%3A%0A%20%20%20%20%20%20%20%20if%20number%20%25%202%20%3D%3D%200%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20result%20%3D%20result%20%2B%20number%0A%0A%20%20%20%20return%20result%0A%0Atotal%20%3D%20add_evens%28numbers%29&cumulative=false&curInstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false) again. You should notice how there are two variables by the name `numbers` in memory. Python manages the memory with a concept called **namespaces** or **scopes**, which are just fancy terms for saying that Python can tell variables from different contexts apart."
"To see what happens in detail, take a look at [PythonTutor <img height=\"12\" style=\"display: inline-block\" src=\"../static/link/to_py.png\">](https://pythontutor.com/visualize.html#code=numbers%20%3D%20%5B1,%202,%203,%204%5D%0A%0Adef%20add_evens%28numbers%29%3A%0A%20%20%20%20%22%22%22Sum%20up%20all%20the%20even%20numbers%20in%20a%20list.%22%22%22%0A%20%20%20%20result%20%3D%200%0A%0A%20%20%20%20for%20number%20in%20numbers%3A%0A%20%20%20%20%20%20%20%20if%20number%20%25%202%20%3D%3D%200%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20result%20%3D%20result%20%2B%20number%0A%0A%20%20%20%20return%20result%0A%0Atotal%20%3D%20add_evens%28numbers%29&cumulative=false&curstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false) again. You should notice how there are two variables by the name `numbers` in memory. Python manages the memory with a concept called **namespaces** or **scopes**, which are just fancy terms for saying that Python can tell variables from different contexts apart."
]
},
{
@ -151,7 +151,7 @@
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/tmp/user/1000/ipykernel_707190/1049141082.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"Cell \u001b[0;32mIn[5], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mresult\u001b[49m\n",
"\u001b[0;31mNameError\u001b[0m: name 'result' is not defined"
]
}
@ -385,7 +385,7 @@
],
"source": [
"for number in numbers:\n",
" square = number ** 2\n",
" square = number**2\n",
" print(\"The square of\", number, \"is\", square)"
]
},
@ -418,21 +418,39 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"To access a function inside the [random <img height=\"12\" style=\"display: inline-block\" src=\"../static/link/to_py.png\">](https://docs.python.org/3/library/random.html) module, for example, the [random() <img height=\"12\" style=\"display: inline-block\" src=\"../static/link/to_py.png\">](https://docs.python.org/3/library/random.html#random.random) function, we use the `.` operator, formally called the attribute access operator. The [random() <img height=\"12\" style=\"display: inline-block\" src=\"../static/link/to_py.png\">](https://docs.python.org/3/library/random.html#random.random) function simply returns a random decimal number between `0` and `1`."
"To access a function inside the [random <img height=\"12\" style=\"display: inline-block\" src=\"../static/link/to_py.png\">](https://docs.python.org/3/library/random.html) module, for example, the [seed() <img height=\"12\" style=\"display: inline-block\" src=\"../static/link/to_py.png\">](https://docs.python.org/3/library/random.html#random.seed) function, we use the `.` operator, formally called the attribute access operator. \n",
"\n",
"We use [random.seed() <img height=\"12\" style=\"display: inline-block\" src=\"../static/link/to_py.png\">](https://docs.python.org/3/library/random.html#random.seed) to make the random numbers *replicable* on separate runs of this notebook."
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"random.seed(42)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The [random() <img height=\"12\" style=\"display: inline-block\" src=\"../static/link/to_py.png\">](https://docs.python.org/3/library/random.html#random.random) function simply returns a random decimal number between `0` and `1`."
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.7021021034327006"
"0.6394267984578837"
]
},
"execution_count": 16,
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
@ -450,16 +468,16 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
"True"
]
},
"execution_count": 17,
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
@ -477,7 +495,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 19,
"metadata": {},
"outputs": [
{
@ -486,7 +504,7 @@
"3"
]
},
"execution_count": 18,
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
@ -505,9 +523,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "intro-to-data-science",
"language": "python",
"name": "python3"
"name": "intro-to-data-science"
},
"language_info": {
"codemirror_mode": {
@ -519,7 +537,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
"version": "3.12.4"
},
"toc": {
"base_numbering": 1,

View file

@ -257,9 +257,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "intro-to-data-science",
"language": "python",
"name": "python3"
"name": "intro-to-data-science"
},
"language_info": {
"codemirror_mode": {
@ -271,7 +271,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
"version": "3.12.4"
},
"toc": {
"base_numbering": 1,

View file

@ -237,19 +237,18 @@
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "'int' object has no attribute 'is_integer'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/tmp/user/1000/ipykernel_306555/2418692311.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0ma\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m: 'int' object has no attribute 'is_integer'"
]
"data": {
"text/plain": [
"True"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a.is_integer()"
"a.is_integer() # Note: In Python versions < 3.12 this cell raises an `AttributeError`"
]
},
{
@ -494,7 +493,7 @@
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/tmp/user/1000/ipykernel_306555/2667408552.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmore_numbers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"Cell \u001b[0;32mIn[21], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mmore_numbers\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mappend\u001b[49m(\u001b[38;5;241m10\u001b[39m)\n",
"\u001b[0;31mAttributeError\u001b[0m: 'tuple' object has no attribute 'append'"
]
}
@ -607,7 +606,7 @@
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/tmp/user/1000/ipykernel_306555/3320204082.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mto_words\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"zero\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"Cell \u001b[0;32mIn[26], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mto_words\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mzero\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\n",
"\u001b[0;31mKeyError\u001b[0m: 'zero'"
]
}
@ -673,9 +672,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "intro-to-data-science",
"language": "python",
"name": "python3"
"name": "intro-to-data-science"
},
"language_info": {
"codemirror_mode": {
@ -687,7 +686,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
"version": "3.12.4"
},
"toc": {
"base_numbering": 1,

File diff suppressed because one or more lines are too long

View file

@ -244,9 +244,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "intro-to-data-science",
"language": "python",
"name": "python3"
"name": "intro-to-data-science"
},
"language_info": {
"codemirror_mode": {
@ -258,7 +258,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
"version": "3.12.4"
},
"toc": {
"base_numbering": 1,

View file

@ -32,17 +32,17 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: pandas in /home/webartifex/repos/intro-to-data-science/.venv/lib/python3.8/site-packages (1.3.3)\n",
"Requirement already satisfied: numpy>=1.17.3 in /home/webartifex/repos/intro-to-data-science/.venv/lib/python3.8/site-packages (from pandas) (1.21.1)\n",
"Requirement already satisfied: python-dateutil>=2.7.3 in /home/webartifex/repos/intro-to-data-science/.venv/lib/python3.8/site-packages (from pandas) (2.8.2)\n",
"Requirement already satisfied: pytz>=2017.3 in /home/webartifex/repos/intro-to-data-science/.venv/lib/python3.8/site-packages (from pandas) (2021.3)\n",
"Requirement already satisfied: six>=1.5 in /home/webartifex/repos/intro-to-data-science/.venv/lib/python3.8/site-packages (from python-dateutil>=2.7.3->pandas) (1.16.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
"Requirement already satisfied: pandas in /home/instructor/Repositories/intro-to-data-science/.venv/lib/python3.12/site-packages (2.2.2)\n",
"Requirement already satisfied: numpy>=1.26.0 in /home/instructor/Repositories/intro-to-data-science/.venv/lib/python3.12/site-packages (from pandas) (2.0.0)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in /home/instructor/Repositories/intro-to-data-science/.venv/lib/python3.12/site-packages (from pandas) (2.9.0.post0)\n",
"Requirement already satisfied: pytz>=2020.1 in /home/instructor/Repositories/intro-to-data-science/.venv/lib/python3.12/site-packages (from pandas) (2024.1)\n",
"Requirement already satisfied: tzdata>=2022.7 in /home/instructor/Repositories/intro-to-data-science/.venv/lib/python3.12/site-packages (from pandas) (2024.1)\n",
"Requirement already satisfied: six>=1.5 in /home/instructor/Repositories/intro-to-data-science/.venv/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n"
]
}
],
"source": [
"%pip install pandas"
"!pip install pandas"
]
},
{
@ -927,7 +927,7 @@
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 694 entries, 192594 to 211519\n",
"Index: 694 entries, 192594 to 211519\n",
"Data columns (total 19 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
@ -1852,10 +1852,7 @@
}
],
"source": [
"df.loc[\n",
" 200300:200800,\n",
" [\"o_street\", \"o_zip\", \"o_city\", \"o_latitude\", \"o_longitude\"]\n",
"]"
"df.loc[200300:200800, [\"o_street\", \"o_zip\", \"o_city\", \"o_latitude\", \"o_longitude\"]]"
]
},
{
@ -1882,7 +1879,7 @@
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 694 entries, 192594 to 211519\n",
"Index: 694 entries, 192594 to 211519\n",
"Data columns (total 19 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
@ -1982,11 +1979,13 @@
"metadata": {},
"outputs": [],
"source": [
"df = df.astype({\n",
" \"pickup_at\": \"datetime64[ns]\",\n",
" \"delivery_at\": \"datetime64[ns]\",\n",
" \"cancelled\": bool,\n",
"})"
"df = df.astype(\n",
" {\n",
" \"pickup_at\": \"datetime64[ns]\",\n",
" \"delivery_at\": \"datetime64[ns]\",\n",
" \"cancelled\": bool,\n",
" }\n",
")"
]
},
{
@ -2006,7 +2005,7 @@
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 694 entries, 192594 to 211519\n",
"Index: 694 entries, 192594 to 211519\n",
"Data columns (total 19 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
@ -2686,7 +2685,7 @@
"source": [
"df.loc[\n",
" max_a_table,\n",
" [\"customer_id\", \"d_street\", \"d_zip\", \"d_city\", \"d_latitude\", \"d_longitude\"]\n",
" [\"customer_id\", \"d_street\", \"d_zip\", \"d_city\", \"d_latitude\", \"d_longitude\"],\n",
"].head()"
]
},
@ -2821,12 +2820,10 @@
" max_a_table\n",
" &\n",
" (\n",
" (df[\"d_latitude\"] > 44.85)\n",
" |\n",
" (df[\"d_longitude\"] < -0.59)\n",
" ) \n",
" (df[\"d_latitude\"] > 44.85) | (df[\"d_longitude\"] < -0.59)\n",
" )\n",
" ),\n",
" [\"customer_id\", \"d_street\", \"d_zip\", \"d_city\", \"d_latitude\", \"d_longitude\"]\n",
" [\"customer_id\", \"d_street\", \"d_zip\", \"d_city\", \"d_latitude\", \"d_longitude\"],\n",
"].head()"
]
},
@ -2933,12 +2930,8 @@
],
"source": [
"df.loc[\n",
" (\n",
" max_a_table\n",
" &\n",
" df[\"customer_id\"].isin([6037, 79900, 80095])\n",
" ),\n",
" [\"placed_at\", \"customer_id\", \"d_street\", \"d_zip\", \"d_city\", \"total\"]\n",
" (max_a_table & df[\"customer_id\"].isin([6037, 79900, 80095])),\n",
" [\"placed_at\", \"customer_id\", \"d_street\", \"d_zip\", \"d_city\", \"total\"],\n",
"].head()"
]
},
@ -3067,12 +3060,8 @@
],
"source": [
"df.loc[\n",
" (\n",
" max_a_table\n",
" &\n",
" ~df[\"customer_id\"].isin([6037, 79900, 80095])\n",
" ),\n",
" [\"placed_at\", \"customer_id\", \"d_street\", \"d_zip\", \"d_city\", \"total\"]\n",
" (max_a_table & ~df[\"customer_id\"].isin([6037, 79900, 80095])),\n",
" [\"placed_at\", \"customer_id\", \"d_street\", \"d_zip\", \"d_city\", \"total\"],\n",
"].head()"
]
},
@ -3166,10 +3155,7 @@
}
],
"source": [
"df.loc[\n",
" max_a_table,\n",
" \"customer_id\"\n",
"].unique()"
"df.loc[max_a_table, \"customer_id\"].unique()"
]
},
{
@ -3189,6 +3175,7 @@
{
"data": {
"text/plain": [
"restaurant_id\n",
"1254 78\n",
"1207 47\n",
"1204 39\n",
@ -3199,7 +3186,7 @@
"1249 23\n",
"1242 19\n",
"1221 18\n",
"Name: restaurant_id, dtype: int64"
"Name: count, dtype: int64"
]
},
"execution_count": 35,
@ -3219,6 +3206,7 @@
{
"data": {
"text/plain": [
"customer_id\n",
"73919 14\n",
"10298 12\n",
"6037 8\n",
@ -3229,7 +3217,7 @@
"76838 3\n",
"75905 3\n",
"74791 3\n",
"Name: customer_id, dtype: int64"
"Name: count, dtype: int64"
]
},
"execution_count": 36,
@ -3258,7 +3246,7 @@
{
"data": {
"text/plain": [
"15924.78"
"np.float64(15924.78)"
]
},
"execution_count": 37,
@ -3278,7 +3266,7 @@
{
"data": {
"text/plain": [
"885.0"
"np.float64(885.0)"
]
},
"execution_count": 38,
@ -3287,10 +3275,7 @@
}
],
"source": [
"df.loc[\n",
" max_a_table,\n",
" \"total\"\n",
"].sum() / 100"
"df.loc[max_a_table, \"total\"].sum() / 100"
]
},
{
@ -3301,7 +3286,7 @@
{
"data": {
"text/plain": [
"3.5"
"np.float64(3.5)"
]
},
"execution_count": 39,
@ -3321,7 +3306,7 @@
{
"data": {
"text/plain": [
"83.7"
"np.float64(83.7)"
]
},
"execution_count": 40,
@ -3341,7 +3326,7 @@
{
"data": {
"text/plain": [
"12.5"
"np.float64(12.5)"
]
},
"execution_count": 41,
@ -3350,10 +3335,7 @@
}
],
"source": [
"df.loc[\n",
" max_a_table,\n",
" \"total\"\n",
"].min() / 100"
"df.loc[max_a_table, \"total\"].min() / 100"
]
},
{
@ -3364,7 +3346,7 @@
{
"data": {
"text/plain": [
"60.0"
"np.float64(60.0)"
]
},
"execution_count": 42,
@ -3373,10 +3355,7 @@
}
],
"source": [
"df.loc[\n",
" max_a_table,\n",
" \"total\"\n",
"].max() / 100"
"df.loc[max_a_table, \"total\"].max() / 100"
]
},
{
@ -3387,7 +3366,7 @@
{
"data": {
"text/plain": [
"22.94636887608069"
"np.float64(22.94636887608069)"
]
},
"execution_count": 43,
@ -3407,7 +3386,7 @@
{
"data": {
"text/plain": [
"22.95"
"np.float64(22.95)"
]
},
"execution_count": 44,
@ -3427,7 +3406,7 @@
{
"data": {
"text/plain": [
"22.69"
"np.float64(22.69)"
]
},
"execution_count": 45,
@ -3436,18 +3415,15 @@
}
],
"source": [
"df.loc[\n",
" max_a_table,\n",
" \"total\"\n",
"].mean().round() / 100"
"df.loc[max_a_table, \"total\"].mean().round() / 100"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "intro-to-data-science",
"language": "python",
"name": "python3"
"name": "intro-to-data-science"
},
"language_info": {
"codemirror_mode": {
@ -3459,7 +3435,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
"version": "3.12.4"
},
"toc": {
"base_numbering": 1,

View file

@ -56,7 +56,7 @@
" \"orders.csv\",\n",
" index_col=\"order_id\",\n",
" dtype={\"cancelled\": bool},\n",
" parse_dates=[\"placed_at\", \"pickup_at\", \"delivery_at\"]\n",
" parse_dates=[\"placed_at\", \"pickup_at\", \"delivery_at\"],\n",
")"
]
},
@ -180,9 +180,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "intro-to-data-science",
"language": "python",
"name": "python3"
"name": "intro-to-data-science"
},
"language_info": {
"codemirror_mode": {
@ -194,7 +194,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
"version": "3.12.4"
},
"toc": {
"base_numbering": 1,

File diff suppressed because one or more lines are too long

View file

@ -40,16 +40,21 @@ It is only expected that the student has:
### Getting started & Installation
To follow this workshop, an installation of **Python 3.8** or higher is expected.
To follow this workshop, an installation of **Python 3.9** or higher is expected.
A popular and beginner friendly way is
to install the [Anaconda Distribution](https://www.anaconda.com/products/individual)
to install the [Anaconda Distribution](https://www.anaconda.com/download)
that not only ships Python itself
but also comes pre-packaged with a lot of third-party libraries
including [Python's scientific stack](https://scipy.org/about.html).
including [Python's scientific stack](https://scipy.org/).
Detailed instructions can be found [here <img height="12" style="display: inline-block" src="static/link/to_gh.png">](https://github.com/webartifex/intro-to-python#installation).
If you are *not* using the Anaconda Distribution,
you must install the third-party libraries via the command
`pip install -r requirements.txt` (or something equivalent)
before working with the notebook files.
## Contributing

3531
poetry.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,10 +1,11 @@
[build-system]
requires = ["poetry-core>=1.0.0"]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "intro-to-data-science"
version = "0.1.0.dev0"
version = "0.3.0"
authors = [
"Alexander Hess <alexander@webartifex.biz>",
@ -25,13 +26,18 @@ readme = "README.md"
homepage = "https://github.com/webartifex/intro-to-data-science"
repository = "https://github.com/webartifex/intro-to-data-science"
package-mode = false
[tool.poetry.dependencies]
python = "^3.8"
python = "^3.9"
jupyterlab = "^3.1"
matplotlib = "^3.4"
numpy = "^1.21"
pandas = "^1.3"
scikit-learn = "^1.0"
jupyterlab = "^4.2"
matplotlib = "^3.9"
numpy = "^2.0"
pandas = "^2.2"
scikit-learn = "^1.5"
[tool.poetry.dev-dependencies]
[tool.poetry.group.dev.dependencies]
black = {extras = ["jupyter"], version = "^24.4"}
invoke = "^2.2"

115
requirements.txt Normal file
View file

@ -0,0 +1,115 @@
anyio==4.4.0 ; python_version >= "3.9" and python_version < "4.0"
appnope==0.1.4 ; python_version >= "3.9" and python_version < "4.0" and platform_system == "Darwin"
argon2-cffi-bindings==21.2.0 ; python_version >= "3.9" and python_version < "4.0"
argon2-cffi==23.1.0 ; python_version >= "3.9" and python_version < "4.0"
arrow==1.3.0 ; python_version >= "3.9" and python_version < "4.0"
asttokens==2.4.1 ; python_version >= "3.9" and python_version < "4.0"
async-lru==2.0.4 ; python_version >= "3.9" and python_version < "4.0"
attrs==23.2.0 ; python_version >= "3.9" and python_version < "4.0"
babel==2.15.0 ; python_version >= "3.9" and python_version < "4.0"
beautifulsoup4==4.12.3 ; python_version >= "3.9" and python_version < "4.0"
bleach==6.1.0 ; python_version >= "3.9" and python_version < "4.0"
certifi==2024.7.4 ; python_version >= "3.9" and python_version < "4.0"
cffi==1.16.0 ; python_version >= "3.9" and python_version < "4.0"
charset-normalizer==3.3.2 ; python_version >= "3.9" and python_version < "4.0"
colorama==0.4.6 ; python_version >= "3.9" and python_version < "4.0" and sys_platform == "win32"
comm==0.2.2 ; python_version >= "3.9" and python_version < "4.0"
contourpy==1.2.1 ; python_version >= "3.9" and python_version < "4.0"
cycler==0.12.1 ; python_version >= "3.9" and python_version < "4.0"
debugpy==1.8.2 ; python_version >= "3.9" and python_version < "4.0"
decorator==5.1.1 ; python_version >= "3.9" and python_version < "4.0"
defusedxml==0.7.1 ; python_version >= "3.9" and python_version < "4.0"
exceptiongroup==1.2.2 ; python_version >= "3.9" and python_version < "3.11"
executing==2.0.1 ; python_version >= "3.9" and python_version < "4.0"
fastjsonschema==2.20.0 ; python_version >= "3.9" and python_version < "4.0"
fonttools==4.53.1 ; python_version >= "3.9" and python_version < "4.0"
fqdn==1.5.1 ; python_version >= "3.9" and python_version < "4"
h11==0.14.0 ; python_version >= "3.9" and python_version < "4.0"
httpcore==1.0.5 ; python_version >= "3.9" and python_version < "4.0"
httpx==0.27.0 ; python_version >= "3.9" and python_version < "4.0"
idna==3.7 ; python_version >= "3.9" and python_version < "4.0"
importlib-metadata==8.0.0 ; python_version >= "3.9" and python_version < "3.10"
importlib-resources==6.4.0 ; python_version >= "3.9" and python_version < "3.10"
ipykernel==6.29.5 ; python_version >= "3.9" and python_version < "4.0"
ipython==8.18.1 ; python_version >= "3.9" and python_version < "4.0"
isoduration==20.11.0 ; python_version >= "3.9" and python_version < "4.0"
jedi==0.19.1 ; python_version >= "3.9" and python_version < "4.0"
jinja2==3.1.4 ; python_version >= "3.9" and python_version < "4.0"
joblib==1.4.2 ; python_version >= "3.9" and python_version < "4.0"
json5==0.9.25 ; python_version >= "3.9" and python_version < "4.0"
jsonpointer==3.0.0 ; python_version >= "3.9" and python_version < "4.0"
jsonschema-specifications==2023.12.1 ; python_version >= "3.9" and python_version < "4.0"
jsonschema==4.23.0 ; python_version >= "3.9" and python_version < "4.0"
jsonschema[format-nongpl]==4.23.0 ; python_version >= "3.9" and python_version < "4.0"
jupyter-client==8.6.2 ; python_version >= "3.9" and python_version < "4.0"
jupyter-core==5.7.2 ; python_version >= "3.9" and python_version < "4.0"
jupyter-events==0.10.0 ; python_version >= "3.9" and python_version < "4.0"
jupyter-lsp==2.2.5 ; python_version >= "3.9" and python_version < "4.0"
jupyter-server-terminals==0.5.3 ; python_version >= "3.9" and python_version < "4.0"
jupyter-server==2.14.2 ; python_version >= "3.9" and python_version < "4.0"
jupyterlab-pygments==0.3.0 ; python_version >= "3.9" and python_version < "4.0"
jupyterlab-server==2.27.2 ; python_version >= "3.9" and python_version < "4.0"
jupyterlab==4.2.3 ; python_version >= "3.9" and python_version < "4.0"
kiwisolver==1.4.5 ; python_version >= "3.9" and python_version < "4.0"
markupsafe==2.1.5 ; python_version >= "3.9" and python_version < "4.0"
matplotlib-inline==0.1.7 ; python_version >= "3.9" and python_version < "4.0"
matplotlib==3.9.1 ; python_version >= "3.9" and python_version < "4.0"
mistune==3.0.2 ; python_version >= "3.9" and python_version < "4.0"
nbclient==0.10.0 ; python_version >= "3.9" and python_version < "4.0"
nbconvert==7.16.4 ; python_version >= "3.9" and python_version < "4.0"
nbformat==5.10.4 ; python_version >= "3.9" and python_version < "4.0"
nest-asyncio==1.6.0 ; python_version >= "3.9" and python_version < "4.0"
notebook-shim==0.2.4 ; python_version >= "3.9" and python_version < "4.0"
numpy==2.0.0 ; python_version >= "3.9" and python_version < "4.0"
overrides==7.7.0 ; python_version >= "3.9" and python_version < "4.0"
packaging==24.1 ; python_version >= "3.9" and python_version < "4.0"
pandas==2.2.2 ; python_version >= "3.9" and python_version < "4.0"
pandocfilters==1.5.1 ; python_version >= "3.9" and python_version < "4.0"
parso==0.8.4 ; python_version >= "3.9" and python_version < "4.0"
pexpect==4.9.0 ; python_version >= "3.9" and python_version < "4.0" and sys_platform != "win32"
pillow==10.4.0 ; python_version >= "3.9" and python_version < "4.0"
platformdirs==4.2.2 ; python_version >= "3.9" and python_version < "4.0"
prometheus-client==0.20.0 ; python_version >= "3.9" and python_version < "4.0"
prompt-toolkit==3.0.47 ; python_version >= "3.9" and python_version < "4.0"
psutil==6.0.0 ; python_version >= "3.9" and python_version < "4.0"
ptyprocess==0.7.0 ; python_version >= "3.9" and python_version < "4.0" and (os_name != "nt" or sys_platform != "win32")
pure-eval==0.2.2 ; python_version >= "3.9" and python_version < "4.0"
pycparser==2.22 ; python_version >= "3.9" and python_version < "4.0"
pygments==2.18.0 ; python_version >= "3.9" and python_version < "4.0"
pyparsing==3.1.2 ; python_version >= "3.9" and python_version < "4.0"
python-dateutil==2.9.0.post0 ; python_version >= "3.9" and python_version < "4.0"
python-json-logger==2.0.7 ; python_version >= "3.9" and python_version < "4.0"
pytz==2024.1 ; python_version >= "3.9" and python_version < "4.0"
pywin32==306 ; sys_platform == "win32" and platform_python_implementation != "PyPy" and python_version >= "3.9" and python_version < "4.0"
pywinpty==2.0.13 ; python_version >= "3.9" and python_version < "4.0" and os_name == "nt"
pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "4.0"
pyzmq==26.0.3 ; python_version >= "3.9" and python_version < "4.0"
referencing==0.35.1 ; python_version >= "3.9" and python_version < "4.0"
requests==2.32.3 ; python_version >= "3.9" and python_version < "4.0"
rfc3339-validator==0.1.4 ; python_version >= "3.9" and python_version < "4.0"
rfc3986-validator==0.1.1 ; python_version >= "3.9" and python_version < "4.0"
rpds-py==0.19.0 ; python_version >= "3.9" and python_version < "4.0"
scikit-learn==1.5.1 ; python_version >= "3.9" and python_version < "4.0"
scipy==1.13.1 ; python_version >= "3.9" and python_version < "4.0"
send2trash==1.8.3 ; python_version >= "3.9" and python_version < "4.0"
setuptools==70.3.0 ; python_version >= "3.9" and python_version < "4.0"
six==1.16.0 ; python_version >= "3.9" and python_version < "4.0"
sniffio==1.3.1 ; python_version >= "3.9" and python_version < "4.0"
soupsieve==2.5 ; python_version >= "3.9" and python_version < "4.0"
stack-data==0.6.3 ; python_version >= "3.9" and python_version < "4.0"
terminado==0.18.1 ; python_version >= "3.9" and python_version < "4.0"
threadpoolctl==3.5.0 ; python_version >= "3.9" and python_version < "4.0"
tinycss2==1.3.0 ; python_version >= "3.9" and python_version < "4.0"
tomli==2.0.1 ; python_version >= "3.9" and python_version < "3.11"
tornado==6.4.1 ; python_version >= "3.9" and python_version < "4.0"
traitlets==5.14.3 ; python_version >= "3.9" and python_version < "4.0"
types-python-dateutil==2.9.0.20240316 ; python_version >= "3.9" and python_version < "4.0"
typing-extensions==4.12.2 ; python_version >= "3.9" and python_version < "3.11"
tzdata==2024.1 ; python_version >= "3.9" and python_version < "4.0"
uri-template==1.3.0 ; python_version >= "3.9" and python_version < "4.0"
urllib3==2.2.2 ; python_version >= "3.9" and python_version < "4.0"
wcwidth==0.2.13 ; python_version >= "3.9" and python_version < "4.0"
webcolors==24.6.0 ; python_version >= "3.9" and python_version < "4.0"
webencodings==0.5.1 ; python_version >= "3.9" and python_version < "4.0"
websocket-client==1.8.0 ; python_version >= "3.9" and python_version < "4.0"
zipp==3.19.2 ; python_version >= "3.9" and python_version < "3.10"

73
tasks.py Normal file
View file

@ -0,0 +1,73 @@
"""Maintenance tasks for the project."""
import json
import os
import sys
import tempfile
import tomllib
import invoke
try:
from jupyter_client import kernelspec
except ImportError:
raise RuntimeError('Install the "ipykernel" package first') from None
def _ensure_venv():
# Source: https://stackoverflow.com/questions/1871549/how-to-determine-if-python-is-running-inside-a-virtualenv # pylint:disable=C0301
if sys.prefix == sys.base_prefix:
raise RuntimeError("Run this command in an activated `virtualenv`")
def _get_pyproject_name():
with open("pyproject.toml", "rb") as fp:
data = tomllib.load(fp)
try:
project_name = data["tool"]["poetry"]["name"]
except KeyError:
raise RuntimeError('"pyproject.toml" seems to be malformed') from None
return project_name
@invoke.task
def install_kernel(_c):
"""Install the activated `virtualenv` as a `jupyter kernel`.
This helper task
"""
_ensure_venv()
project_name = _get_pyproject_name()
with tempfile.TemporaryDirectory() as tmpdir:
spec = {
"argv": [
sys.prefix + "/bin/python",
"-m",
"ipykernel_launcher",
"-f",
"{connection_file}",
],
"display_name": project_name,
"env": {"PATH": os.environ["PATH"]},
"interrupt_mode": "signal",
"language": "python",
"metadata": {"debugger": True},
}
with open(os.path.join(tmpdir, "kernel.json"), "w", encoding="utf-8") as fp:
json.dump(spec, fp, indent=4)
manager = kernelspec.KernelSpecManager()
manager.install_kernel_spec(tmpdir, kernel_name=project_name, user=True)
@invoke.task
def remove_kernel(_c):
"""Remove the `jupyter kernel` corresponding to the activated `virtualenv`."""
_ensure_venv()
project_name = _get_pyproject_name()
manager = kernelspec.KernelSpecManager()
manager.remove_kernel_spec(project_name)