From de06d4859dcf590daf7fc6da8fff963c5d8fd646 Mon Sep 17 00:00:00 2001 From: Alexander Hess Date: Tue, 5 Oct 2021 01:28:43 +0200 Subject: [PATCH] Add exercises for pandas --- .../03_exercises_simple_queries.ipynb | 215 ++++++++++++++++++ 1 file changed, 215 insertions(+) create mode 100644 01_scientific_stack/03_exercises_simple_queries.ipynb diff --git a/01_scientific_stack/03_exercises_simple_queries.ipynb b/01_scientific_stack/03_exercises_simple_queries.ipynb new file mode 100644 index 0000000..e588608 --- /dev/null +++ b/01_scientific_stack/03_exercises_simple_queries.ipynb @@ -0,0 +1,215 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Note**: Click on \"*Kernel*\" > \"*Restart Kernel and Run All*\" in [JupyterLab](https://jupyterlab.readthedocs.io/en/stable/) *after* finishing the exercises to ensure that your solution runs top to bottom *without* any errors. If you cannot run this file on your machine, you may want to open it [in the cloud ](https://mybinder.org/v2/gh/webartifex/intro-to-data-science/main?urlpath=lab/tree/01_scientific_stack/03_exercises_simple_queries.ipynb)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Chapter 0: Python's Scientific Stack (Coding Exercises)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The exercises below assume that you have read the preceeding content sections.\n", + "\n", + "The `...`'s in the code cells indicate where you need to fill in code snippets. The number of `...`'s within a code cell give you a rough idea of how many lines of code are needed to solve the task. You should not need to create any additional code cells for your final solution. However, you may want to use temporary code cells to try out some ideas." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Simple Queries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The [pd.read_csv() ](https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html#pandas.read_csv) function is very customizable. For example, it takes `dtype` and `parse_dates` inputs, which allows us to parse the timestamp and \"cancelled\" columns correctly right away." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\n", + " \"orders.csv\",\n", + " index_col=\"order_id\",\n", + " dtype={\"cancelled\": bool},\n", + " parse_dates=[\"placed_at\", \"pickup_at\", \"delivery_at\"]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q1**: Write a filter to pull out orders of the restaurant with the name \"Funky Burger\" only!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "funky_burger = ...\n", + "\n", + "df[funky_burger].head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q2**: Ensure that there is only one restaurant by this name!\n", + "\n", + "Hint: While several restaurants may share the same name, their ID is still different" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q3**: How many orders did \"Funky Burger\" *receive* in the target horizon?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "len(df.loc[...])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q4**: How many orders did \"Funky Burger\" *deliver* in the target horizon?\n", + "\n", + "Hint: \"deliver\" implies that an order must not be cancelled" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "len(...)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q5**: How much revenue did \"Funky Burger\" make?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "...\n", + " ... & ...,\n", + " \"...\"\n", + "..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q6**: What was the average order's total at \"Funky Burger\"?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "...\n", + " ...\n", + " ...\n", + "..." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": false, + "sideBar": true, + "skip_h1_title": true, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": false, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}