diff --git a/00_start_up.ipynb b/00_start_up.ipynb index 7551dcd..29175db 100644 --- a/00_start_up.ipynb +++ b/00_start_up.ipynb @@ -19,7 +19,7 @@ } }, "source": [ - "This course is set up to be a *thorough* introduction to programming in [Python](https://www.python.org/).\n", + "This book is set up to be a *thorough* introduction to programming in [Python](https://www.python.org/).\n", "\n", "It teaches the concepts behind and the syntax of the core Python language as defined by the [Python Software Foundation](https://www.python.org/psf/) in the official [language reference](https://docs.python.org/3/reference/index.html) and introduces additions to the language as distributed with the [standard library](https://docs.python.org/3/library/index.html) that come with every installation.\n", "\n", @@ -58,10 +58,10 @@ "source": [ "To be suitable for *total beginners*, there are *no* formal prerequisites. It is only expected that the student has:\n", "\n", - "- a *solid* understanding of the **English language** (i.e., usage of *technical* terms with *narrow* and *distinct* meanings),\n", - "- knowledge of **basic mathematics** from high school (i.e., addition, subtraction, multiplication, division, and a little bit of calculus and statistics),\n", - "- the ability to **think conceptually** and **reason logically** (i.e., *not* just memorizing), and\n", - "- the willingness to **invest 2-4 hours a day for a month** (cf., \"ABC\"-rule at the end)" + "- a *solid* understanding of the **English language**,\n", + "- knowledge of **basic mathematics** from high school,\n", + "- the ability to **think conceptually** and **reason logically**, and\n", + "- the willingness to **invest 2-4 hours a day for a month**." ] }, { @@ -83,7 +83,7 @@ } }, "source": [ - "The course's **main goal** is to **prepare** the student **for further studies** in the \"field\" of **data science**." + "The **main goal** of this introduction is to **prepare** the student **for further studies** in the \"field\" of **data science**." ] }, { @@ -94,7 +94,7 @@ } }, "source": [ - "This includes but is not limited to more advanced courses on topics such as:\n", + "This includes but is not limited to topics such as:\n", "- linear algebra\n", "- statistics & econometrics\n", "- data cleaning & wrangling\n", @@ -154,7 +154,7 @@ } }, "source": [ - "To follow this course, a working installation of **Python 3.6** or higher is needed.\n", + "To follow this book, a working installation of **Python 3.6** or higher is needed.\n", "\n", "A popular and beginner friendly way is to install the [Anaconda Distribution](https://www.anaconda.com/distribution/) that not only ships Python and the standard library but comes pre-packaged with a lot of third-party libraries from the so-called \"scientific stack\". Just go to the [download](https://www.anaconda.com/download/) page and install the latest version (i.e., *2019-07* with Python 3.7 at the time of this writing) for your operating system.\n", "\n", @@ -180,7 +180,7 @@ } }, "source": [ - "To download the course's materials as a ZIP file, open the accompanying [GitHub repository](https://github.com/webartifex/intro-to-python) in a web browser and click on the green \"Clone or download\" button on the right. Then, unpack the ZIP file into a folder of your choosing (ideally somewhere within your personal user folder so that the files show up right away)." + "To download the materials accompanying this book as a ZIP file, open this [GitHub repository](https://github.com/webartifex/intro-to-python) in a web browser and click on the green \"Clone or download\" button on the right. Then, unpack the ZIP file into a folder of your choosing (ideally somewhere within your personal user folder so that the files show up right away)." ] }, { @@ -252,7 +252,7 @@ "\n", "Furthermore, Jupyter notebooks have become a de-facto standard for communicating and exchanging results in the data science community (both in academia and business) and often provide a more intuitive alternative to terminal based ways of running Python (e.g., the default [Python interpreter](https://docs.python.org/3/tutorial/interpreter.html) as shown above or a more advanced interactive version like [IPython](https://ipython.org/)) or even a full-fledged [Integrated Development Environment](https://en.wikipedia.org/wiki/Integrated_development_environment) (e.g., the commercial [PyCharm](https://www.jetbrains.com/pycharm/) or the free [Spyder](https://github.com/spyder-ide/spyder)).\n", "\n", - "In particular, they allow to mix plain English text with Python code cells. The plain text can be formatted using the [Markdown](https://guides.github.com/features/mastering-markdown/) language and mathematical expressions can be typeset with [LaTeX](https://www.overleaf.com/learn/latex/Free_online_introduction_to_LaTeX_%28part_1%29). Lastly, we can include pictures, plots, and even videos. Because of these features, the notebooks developed for this course come in a self-contained \"tutorial\" style that enables students to learn and review the material on their own." + "In particular, they allow to mix plain English text with Python code cells. The plain text can be formatted using the [Markdown](https://guides.github.com/features/mastering-markdown/) language and mathematical expressions can be typeset with [LaTeX](https://www.overleaf.com/learn/latex/Free_online_introduction_to_LaTeX_%28part_1%29). Lastly, we can include pictures, plots, and even videos. Because of these features, the notebooks developed for this book come in a self-contained \"tutorial\" style that enables students to learn and review the material on their own." ] }, { @@ -295,7 +295,15 @@ "slide_type": "slide" } }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hello world\n" + ] + } + ], "source": [ "print(\"Hello world\")" ] @@ -351,7 +359,8 @@ } }, "source": [ - "For this course *programming* is \"defined\" as\n", + "In this book *programming* is \"defined\" as:\n", + "\n", "- a **structured** way of **problem solving**\n", "- by **expressing** the steps of a **computation / process**\n", "- and thereby **documenting** the process in a formal way\n", @@ -369,7 +378,8 @@ } }, "source": [ - "That is different from *computer science*, which is\n", + "That is different from *computer science*, which is:\n", + "\n", "- a field of study comparable to (applied) **mathematics** that\n", "- asks **abstract** questions (\"Is something computable at all?\"),\n", "- develops and analyses **algorithms** and **data structures**,\n", @@ -419,6 +429,8 @@ } }, "source": [ + "Here is a brief history of and some background on Python (cf., also this [TechRepublic article](https://www.techrepublic.com/article/python-is-eating-the-world-how-one-developers-side-project-became-the-hottest-programming-language-on-the-planet/) for a more elaborate story):\n", + "\n", "- [Guido van Rossum](https://en.wikipedia.org/wiki/Guido_van_Rossum) (Python’s **[Benevolent Dictator for Life](https://en.wikipedia.org/wiki/Benevolent_dictator_for_life)**) was bored during a week around Christmas 1989 and started Python as a hobby project \"that would keep \\[him\\] occupied\" for some days\n", "- the idea was to create a **general-purpose scripting** language that would allow fast **prototyping** and would **run on every operating system**\n", "- Python grew through the 90s as van Rossum promoted it via his \"Computer Programming for Everybody\" initiative that had the **goal to encourage a basic level of coding literacy** as an equal knowledge alongside English literacy and math skills\n", @@ -446,7 +458,7 @@ } }, "source": [ - "Python is a **general-purpose** programming language that allows for **fast development**, is **easy to comprehend**, **open-source**, long established, unifies the knowledge of **hundreds of thousands of experts** around the world, runs on basically every machine, and can handle the complexities of applications involving **big data**." + "Python is a **general-purpose** programming language that allows for **fast development**, is **easy to read**, **open-source**, long established, unifies the knowledge of **hundreds of thousands of experts** around the world, runs on basically every machine, and can handle the complexities of applications involving **big data**." ] }, { @@ -769,7 +781,7 @@ " 1. Elements of a Program\n", " 2. Functions & Modularization\n", "- What is the flow of execution? How can we form sentences from words?\n", - " 3. Boolean Logic & Conditionals\n", + " 3. Conditionals & Exceptions\n", " 4. Recursion & Looping" ] }, @@ -812,7 +824,7 @@ } }, "source": [ - "As with every good lecture, there has to be a [xkcd](https://xkcd.com/353/) comic somewhere." + "As with every good book, there has to be a [xkcd](https://xkcd.com/353/) comic somewhere." ] }, { diff --git a/00_start_up_review_and_exercises.ipynb b/00_start_up_review_and_exercises.ipynb index 26eb93c..e1a435f 100644 --- a/00_start_up_review_and_exercises.ipynb +++ b/00_start_up_review_and_exercises.ipynb @@ -18,7 +18,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Read chapter 0 of the book. Then work through the ten review questions." + "Read Chapter 0 of the book. Then work through the ten review questions." ] }, { diff --git a/01_elements_of_a_program.ipynb b/01_elements.ipynb similarity index 90% rename from 01_elements_of_a_program.ipynb rename to 01_elements.ipynb index 66c8e64..e1d8b09 100644 --- a/01_elements_of_a_program.ipynb +++ b/01_elements.ipynb @@ -120,7 +120,7 @@ "\n", "Lastly, the `average` is calculated as the ratio of the final **values** of `total` and `count`. Overall, we divide the sum of all even numbers by the count of all even numbers, which is exactly what we are looking for.\n", "\n", - "We also observe how the lines of code \"within\" the `for` and `if` **statements** are *indented* and *aligned* with multiples of four spaces: This shows immediately how the lines relate to each other." + "We also observe how the lines of code \"within\" the `for` and `if` **statements** are *indented* and *aligned* with multiples of **four spaces**: This shows immediately how the lines relate to each other." ] }, { @@ -152,7 +152,7 @@ } }, "source": [ - "Yet, we do not see any **output** and obtain the value of `average` by simply referencing it again." + "We do not see any **output** yet but can obtain the value of `average` by simply referencing it again." ] }, { @@ -200,9 +200,7 @@ "source": [ "Note how only two of the previous four code cells generate an **output** while two remained \"silent\" (i.e., there is no \"**Out[...]**\" after running the cell).\n", "\n", - "By default, Jupyter notebooks show the value of a cell's last so-called **expression**. This output can be suppressed by ending the last line with a semicolon.\n", - "\n", - "To visualize something before the end of the cell, we can use the [print()](https://docs.python.org/3/library/functions.html#print) built-in function." + "By default, Jupyter notebooks show the value of a cell's last so-called **expression**. This output can be suppressed by ending the last line with a semicolon `;`." ] }, { @@ -243,6 +241,17 @@ "\"I am invisible!\";" ] }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "To visualize something before the end of the cell, we can use the [print()](https://docs.python.org/3/library/functions.html#print) built-in **function**." + ] + }, { "cell_type": "code", "execution_count": 7, @@ -266,6 +275,39 @@ "print(\"I am feeling great :-)\")" ] }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Outside Jupyter notebooks, the semicolon `;` is actually used as a **seperator** between several statements that would otherwise have to be on a line on their own. However, it as *not* considered good practice to use it as it makes code less readable." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hello, World!\n", + "I am feeling great :-)\n" + ] + } + ], + "source": [ + "print(\"Hello, World!\"); print(\"I am feeling great :-)\")" + ] + }, { "cell_type": "markdown", "metadata": { @@ -285,7 +327,7 @@ } }, "source": [ - "Python comes with basic mathematical operators built in. **Operators** are built-in **tokens** that have a special meaning to the Python interpreter. Most operators either \"operate\" with the object immediately following them (= **unary** operators; e.g., negation) or somehow \"process\" the two objects \"around\" them (= **binary** operators; e.g., addition). But we will see some exceptions from that as well.\n", + "Python comes with basic mathematical operators built in. **[Operators](https://docs.python.org/3/reference/lexical_analysis.html#operators)** are built-in **tokens** that have a special meaning to the Python interpreter. Most operators either \"operate\" with the object immediately following them (= **unary** operators; e.g., negation) or somehow \"process\" the two objects \"around\" them (= **binary** operators; e.g., addition). But we will see some exceptions from that as well.\n", "\n", "By definition, operators have **no** permanent **side effects** in the computer's memory. Although the code cells in this section do indeed lead to *new* objects being created in memory, they are immediately \"forgotten\" as they are not stored in a **variable** (like `numbers` above). We will revisit this idea further below when we compare **expressions** with **statements**.\n", "\n", @@ -294,7 +336,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": { "slideshow": { "slide_type": "slide" @@ -307,7 +349,7 @@ "90" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -318,7 +360,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": { "slideshow": { "slide_type": "-" @@ -331,7 +373,7 @@ "8" ] }, - "execution_count": 9, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -353,7 +395,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": { "slideshow": { "slide_type": "fragment" @@ -366,7 +408,7 @@ "-1" ] }, - "execution_count": 10, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -388,7 +430,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": { "slideshow": { "slide_type": "slide" @@ -401,7 +443,7 @@ "42" ] }, - "execution_count": 11, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -412,7 +454,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": { "slideshow": { "slide_type": "-" @@ -425,7 +467,7 @@ "42.0" ] }, - "execution_count": 12, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -447,7 +489,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": { "slideshow": { "slide_type": "slide" @@ -460,7 +502,7 @@ "42" ] }, - "execution_count": 13, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -471,7 +513,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "metadata": { "slideshow": { "slide_type": "-" @@ -484,7 +526,7 @@ "42" ] }, - "execution_count": 14, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -506,7 +548,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "metadata": { "slideshow": { "slide_type": "slide" @@ -519,7 +561,7 @@ "1" ] }, - "execution_count": 15, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -541,7 +583,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "metadata": { "slideshow": { "slide_type": "fragment" @@ -554,7 +596,7 @@ "0" ] }, - "execution_count": 16, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -576,7 +618,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "metadata": { "slideshow": { "slide_type": "fragment" @@ -589,7 +631,7 @@ "3" ] }, - "execution_count": 17, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -600,7 +642,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "metadata": { "slideshow": { "slide_type": "-" @@ -613,7 +655,7 @@ "23" ] }, - "execution_count": 18, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -630,12 +672,12 @@ } }, "source": [ - "The [divmod()](https://docs.python.org/3/library/functions.html#divmod) built-in **function** combines the integer and modulo divisions into one operation. However, this is not an operator any more (but a function). Also observe that [divmod()](https://docs.python.org/3/library/functions.html#divmod) returns a \"pair\" of integers." + "The [divmod()](https://docs.python.org/3/library/functions.html#divmod) built-in function combines the integer and modulo divisions into one operation. However, this is not an operator any more (but a function). Also observe that [divmod()](https://docs.python.org/3/library/functions.html#divmod) returns a \"pair\" of integers." ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "metadata": { "slideshow": { "slide_type": "fragment" @@ -648,7 +690,7 @@ "(4, 2)" ] }, - "execution_count": 19, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -670,7 +712,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "metadata": { "slideshow": { "slide_type": "slide" @@ -683,7 +725,7 @@ "8" ] }, - "execution_count": 20, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -700,12 +742,12 @@ } }, "source": [ - "The normal order of precedence from mathematics applies (i.e., \"PEMDAS\" rule) but parentheses help avoid confusion." + "The normal [order of precedence](https://docs.python.org/3/reference/expressions.html#operator-precedence) from mathematics applies (i.e., \"PEMDAS\" rule) but parentheses help avoid confusion." ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 22, "metadata": { "slideshow": { "slide_type": "fragment" @@ -718,7 +760,7 @@ "18" ] }, - "execution_count": 21, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -740,7 +782,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 23, "metadata": { "slideshow": { "slide_type": "-" @@ -753,7 +795,7 @@ "18" ] }, - "execution_count": 22, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -764,7 +806,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 24, "metadata": { "slideshow": { "slide_type": "-" @@ -777,7 +819,7 @@ "81" ] }, - "execution_count": 23, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -799,7 +841,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 25, "metadata": { "slideshow": { "slide_type": "skip" @@ -812,7 +854,7 @@ "18" ] }, - "execution_count": 24, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -840,7 +882,7 @@ } }, "source": [ - "## Objects vs. Values vs. Types" + "## Objects vs. Types vs. Values" ] }, { @@ -860,7 +902,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 26, "metadata": { "slideshow": { "slide_type": "slide" @@ -897,7 +939,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 27, "metadata": { "slideshow": { "slide_type": "slide" @@ -907,31 +949,7 @@ { "data": { "text/plain": [ - "140382247181072" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "id(a)" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": { - "slideshow": { - "slide_type": "-" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "140382247352144" + "139940106427216" ] }, "execution_count": 27, @@ -940,7 +958,7 @@ } ], "source": [ - "id(b)" + "id(a)" ] }, { @@ -955,7 +973,7 @@ { "data": { "text/plain": [ - "140382247028656" + "139940106602344" ] }, "execution_count": 28, @@ -963,6 +981,30 @@ "output_type": "execute_result" } ], + "source": [ + "id(b)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "139940105769456" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "id(c)" ] @@ -975,12 +1017,12 @@ } }, "source": [ - "These addresses are really not that meaningful for anything other than checking if two variables actually point at the same object. This may be helpful as different objects can of course have the same value." + "These addresses are really not that meaningful for anything other than checking if two variables actually **point** at the same object. This may be helpful as different objects can of course have the same value." ] }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 30, "metadata": { "slideshow": { "slide_type": "slide" @@ -999,12 +1041,12 @@ } }, "source": [ - "`a` and `d` indeed have the same value as can be checked with the **equality operator** `==`. The resulting `True` (and the `False` below) is yet another data type, a so-called **boolean**." + "`a` and `d` indeed have the same value as can be checked with the **equality operator** `==`. The resulting `True` (and the `False` below) is yet another data type, a so-called **boolean**. We will look into that closely in Chapter 3." ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 31, "metadata": { "slideshow": { "slide_type": "-" @@ -1017,7 +1059,7 @@ "True" ] }, - "execution_count": 30, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -1039,7 +1081,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 32, "metadata": { "slideshow": { "slide_type": "-" @@ -1052,7 +1094,7 @@ "False" ] }, - "execution_count": 31, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -1080,12 +1122,12 @@ } }, "source": [ - "The [type()](https://docs.python.org/3/library/functions.html#type) built-in function shows an object's type. For example, `a` is an integer (`int`) while `b` is a so-called [floating-point number](https://en.wikipedia.org/wiki/Floating-point_arithmetic) (`float`)." + "The [type()](https://docs.python.org/3/library/functions.html#type) built-in function shows an object's type. For example, `a` is an integer (i.e., `int`) while `b` is a so-called [floating-point number](https://en.wikipedia.org/wiki/Floating-point_arithmetic) (i.e., `float`)." ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 33, "metadata": { "slideshow": { "slide_type": "slide" @@ -1098,7 +1140,7 @@ "int" ] }, - "execution_count": 32, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -1109,7 +1151,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 34, "metadata": { "slideshow": { "slide_type": "-" @@ -1122,7 +1164,7 @@ "float" ] }, - "execution_count": 33, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -1148,7 +1190,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 35, "metadata": { "slideshow": { "slide_type": "fragment" @@ -1161,7 +1203,7 @@ "True" ] }, - "execution_count": 34, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -1183,7 +1225,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 36, "metadata": { "slideshow": { "slide_type": "-" @@ -1197,7 +1239,7 @@ "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0ma\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0ma\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mAttributeError\u001b[0m: 'int' object has no attribute 'is_integer'" ] } @@ -1214,12 +1256,12 @@ } }, "source": [ - "The `c` object is a so-called **string** type `str`, which we can view as Python's way of representing \"text\". Strings also come with their own behaviors, for example, to convert a text to lower or upper case." + "The `c` object is a so-called **string** type (i.e., `str`), which we can view as Python's way of representing \"text\". Strings also come with their own behaviors, for example, to convert a text to lower or upper case." ] }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 37, "metadata": { "slideshow": { "slide_type": "slide" @@ -1232,7 +1274,7 @@ "str" ] }, - "execution_count": 36, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } @@ -1243,7 +1285,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 38, "metadata": { "slideshow": { "slide_type": "fragment" @@ -1256,7 +1298,7 @@ "'python rocks'" ] }, - "execution_count": 37, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } @@ -1267,7 +1309,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 39, "metadata": { "slideshow": { "slide_type": "-" @@ -1280,7 +1322,7 @@ "'PYTHON ROCKS'" ] }, - "execution_count": 38, + "execution_count": 39, "metadata": {}, "output_type": "execute_result" } @@ -1291,7 +1333,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 40, "metadata": { "slideshow": { "slide_type": "skip" @@ -1304,7 +1346,7 @@ "'Python Rocks'" ] }, - "execution_count": 39, + "execution_count": 40, "metadata": {}, "output_type": "execute_result" } @@ -1339,7 +1381,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 41, "metadata": { "slideshow": { "slide_type": "slide" @@ -1352,7 +1394,7 @@ "789" ] }, - "execution_count": 40, + "execution_count": 41, "metadata": {}, "output_type": "execute_result" } @@ -1363,7 +1405,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 42, "metadata": { "slideshow": { "slide_type": "-" @@ -1376,7 +1418,7 @@ "42.0" ] }, - "execution_count": 41, + "execution_count": 42, "metadata": {}, "output_type": "execute_result" } @@ -1393,12 +1435,12 @@ } }, "source": [ - "In this book, we follow the convention of creating strings with **double quotes** `\"` instead of the **single quotes** `'` to which Python defaults in its literal output. Both types of quotes can be used interchangebly." + "In this book, we follow the convention of creating strings with **double quotes** `\"` instead of the **single quotes** `'` to which Python defaults in its literal output for `str` objects. Both types of quotes can be used interchangebly." ] }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 43, "metadata": { "slideshow": { "slide_type": "-" @@ -1411,13 +1453,13 @@ "'Python rocks'" ] }, - "execution_count": 42, + "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "c" + "c # we defined c = \"Python rocks\" with double quotes \" above" ] }, { @@ -1463,14 +1505,14 @@ } }, "source": [ - "If we do not follow the rules, the code cannot be **parsed** correctly, i.e., the program does not even start to run but raises a **syntax error** (indicated as `SyntaxError`). Computers are very dumb in the sense that the slightest syntax error leads to the machine not understanding our code.\n", + "If we do not follow the rules, the code cannot be **parsed** correctly, i.e., the program does not even start to run but **raises** a **syntax error** indicated as `SyntaxError` in the output. Computers are very dumb in the sense that the slightest syntax error leads to the machine not understanding our code.\n", "\n", "For example, if we wanted to write an accounting program that adds up currencies, we would have to model dollar prices as `float` objects as the dollar symbol cannot be read by Python." ] }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 44, "metadata": { "slideshow": { "slide_type": "slide" @@ -1479,10 +1521,10 @@ "outputs": [ { "ename": "SyntaxError", - "evalue": "invalid syntax (, line 1)", + "evalue": "invalid syntax (, line 1)", "output_type": "error", "traceback": [ - "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m 3.99 $ + 10.40 $\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" + "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m 3.99 $ + 10.40 $\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" ] } ], @@ -1503,7 +1545,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 45, "metadata": { "slideshow": { "slide_type": "slide" @@ -1512,10 +1554,10 @@ "outputs": [ { "ename": "SyntaxError", - "evalue": "invalid syntax (, line 1)", + "evalue": "invalid syntax (, line 1)", "output_type": "error", "traceback": [ - "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m for number in numbers\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" + "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m for number in numbers\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" ] } ], @@ -1532,12 +1574,12 @@ } }, "source": [ - "... and relies on whitespace / indentation unlike many other programming languages. A `IndentationError` is just a special type of a `SyntaxError`." + "... and relies on whitespace (i.e., indentation) unlike many other programming languages. An `IndentationError` is just a special type of a `SyntaxError`." ] }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 46, "metadata": { "slideshow": { "slide_type": "slide" @@ -1546,10 +1588,10 @@ "outputs": [ { "ename": "IndentationError", - "evalue": "expected an indented block (, line 2)", + "evalue": "expected an indented block (, line 2)", "output_type": "error", "traceback": [ - "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m2\u001b[0m\n\u001b[0;31m print(number)\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mIndentationError\u001b[0m\u001b[0;31m:\u001b[0m expected an indented block\n" + "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m2\u001b[0m\n\u001b[0;31m print(number)\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mIndentationError\u001b[0m\u001b[0;31m:\u001b[0m expected an indented block\n" ] } ], @@ -1579,14 +1621,14 @@ "source": [ "Syntax errors as above are easy to find as the code will not even run to begin with.\n", "\n", - "However, there are also so-called **runtime errors** (also called **exceptions**) that occur if the code would run given correct input.\n", + "However, there are also so-called **runtime errors**, often called **exceptions**, that occur whenever otherwise (i.e., syntactically) correct code does not run because of invalid input.\n", "\n", "This example does not work because just like in the \"real\" world, Python does not know how to divide by $0$. The syntactically correct code leads to a `ZeroDivisionError`." ] }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 47, "metadata": { "slideshow": { "slide_type": "slide" @@ -1600,7 +1642,7 @@ "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mZeroDivisionError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;36m1\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;36m1\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mZeroDivisionError\u001b[0m: division by zero" ] } @@ -1628,14 +1670,14 @@ } }, "source": [ - "So-called **semantic errors**, on the contrary, can be very hard to spot as they do *not* crash the program. The only way to find such errors is to run the program with test input for which we know the answer already and can then verify it. However, testing software is a whole discipline on its own and often very hard to do in practice.\n", + "So-called **semantic errors**, on the contrary, can be very hard to spot as they do *not* crash the program. The only way to find such errors is to run a program with test input for which we know the answer already and can thus check the output. However, testing software is a whole discipline on its own and often very hard to do in practice.\n", "\n", - "The cell below copies our introductory example from above with a \"tiny\" error." + "The cell below copies our introductory example from above with a \"tiny\" error. How fast could you have spotted it without the comment?" ] }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 48, "metadata": { "code_folding": [], "slideshow": { @@ -1657,7 +1699,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 49, "metadata": { "slideshow": { "slide_type": "-" @@ -1670,7 +1712,7 @@ "3.0" ] }, - "execution_count": 48, + "execution_count": 49, "metadata": {}, "output_type": "execute_result" } @@ -1687,7 +1729,7 @@ } }, "source": [ - "Finding errors is is called **debugging**. For the history of the term, check this [link](https://en.wikipedia.org/wiki/Debugging)." + "Finding errors in a systematic way is called **debugging**. For the history of the term, see this [article](https://en.wikipedia.org/wiki/Debugging)." ] }, { @@ -1709,14 +1751,14 @@ } }, "source": [ - "Adhering to just syntax rules is therefore *never* enough. Over time, **best practices** and common **style guides** were created to make it less likely for a developer to mess up a program and also to allow \"onboarding\" him as a contributor to an established code base (often called **legacy code**) faster. These rules are not enforced by Python itself: Badly styled and un-readable code will still run. At the very least, Python programs should be styled according to [PEP 8](https://www.python.org/dev/peps/pep-0008/) and documented \"inline\" (i.e., in the code itself) according to [PEP 257](https://www.python.org/dev/peps/pep-0257/).\n", + "Adhering to just syntax rules is therefore *never* enough. Over time, **best practices** and common **style guides** were created to make it less likely for a developer to mess up a program and also to allow \"onboarding\" him as a contributor to an established code base, often called **legacy code**, faster. These rules are not enforced by Python itself: Badly styled and un-readable code will still run. At the very least, Python programs should be styled according to [PEP 8](https://www.python.org/dev/peps/pep-0008/) and documented \"inline\" (i.e., in the code itself) according to [PEP 257](https://www.python.org/dev/peps/pep-0257/).\n", "\n", "An easier to read version of PEP 8 can be found [here](https://pep8.org/). The video below features a well known \"[Pythonista](https://en.wiktionary.org/wiki/Pythonista)\" talking about the importance of code style." ] }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 50, "metadata": { "slideshow": { "slide_type": "skip" @@ -1738,10 +1780,10 @@ " " ], "text/plain": [ - "" + "" ] }, - "execution_count": 49, + "execution_count": 50, "metadata": {}, "output_type": "execute_result" } @@ -1759,12 +1801,12 @@ } }, "source": [ - "For example, while the above code to calculate the average of the even numbers from 1 through 10 is correct, a Pythonista would re-write it in a more \"Pythonic\" way and use the [sum()](https://docs.python.org/3/library/functions.html#sum) and [len()](https://docs.python.org/3/library/functions.html#len) (= \"length\") built-in functions. Pythonic code runs faster in many cases and is less error prone." + "For example, while the above code to calculate the average of the even numbers from 1 through 10 is correct, a Pythonista would re-write it in a more \"Pythonic\" way and use the [sum()](https://docs.python.org/3/library/functions.html#sum) and [len()](https://docs.python.org/3/library/functions.html#len) (= \"length\") built-in functions (cf., Chapter 2) as well as a so-called **list comprehension** (cf., Chapter 7). Pythonic code runs faster in many cases and is less error prone." ] }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 51, "metadata": { "slideshow": { "slide_type": "slide" @@ -1777,7 +1819,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 52, "metadata": { "slideshow": { "slide_type": "-" @@ -1785,12 +1827,12 @@ }, "outputs": [], "source": [ - "evens = [n for n in numbers if n % 2 == 0] # example for a so-called list comprehension" + "evens = [n for n in numbers if n % 2 == 0] # example of a so-called list comprehension" ] }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 53, "metadata": { "slideshow": { "slide_type": "-" @@ -1803,7 +1845,7 @@ "[2, 4, 6, 8, 10]" ] }, - "execution_count": 52, + "execution_count": 53, "metadata": {}, "output_type": "execute_result" } @@ -1814,7 +1856,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 54, "metadata": { "slideshow": { "slide_type": "-" @@ -1827,7 +1869,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 55, "metadata": { "slideshow": { "slide_type": "-" @@ -1840,7 +1882,7 @@ "6.0" ] }, - "execution_count": 54, + "execution_count": 55, "metadata": {}, "output_type": "execute_result" } @@ -1862,7 +1904,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 56, "metadata": { "slideshow": { "slide_type": "slide" @@ -1961,7 +2003,7 @@ "\n", "At the same time, for a beginner's course it is often easier to just code in a linear fashion.\n", "\n", - "In real data science projects one would probably employ a mixed approach and put re-usable code into so-called Python modules (= \\*.py files) and then use Jupyter notebooks to built up a linear report or story line for a business argument to be made." + "In real data science projects one would probably employ a mixed approach and put re-usable code into so-called Python modules (i.e., *.py* files; cf., Chapter 2) and then use Jupyter notebooks to built up a linear report or story line for a business argument to be made." ] }, { @@ -1972,7 +2014,7 @@ } }, "source": [ - "## Variables / Names" + "## Variables vs. Names vs. Identifiers" ] }, { @@ -1983,12 +2025,14 @@ } }, "source": [ - "**Variables** are created with the **assignment statement** `=`. As its name suggests, it is *not* an operator, mainly because of its side effect of making a **name** \"point\" to an object in memory." + "**Variables** are created with the **[assignment statement](https://docs.python.org/3/reference/simple_stmts.html#assignment-statements)** `=`, which is *not* an operator, mainly because of its side effect of making a **[name](https://docs.python.org/3/reference/lexical_analysis.html#identifiers)** point to an object in memory.\n", + "\n", + "We will read the terms **variable**, **name**, and **identifier** used interchangebly in many Python related texts. In this book, we adopt the following convention: First, we treat *name* and *identifier* as perfect synonyms but only use the term *name* in the text for clarity. Second, whereas *name* only refers to a string of letters, numbers, and some other symbols, a *variable* refers to the combination of a *name* and a *pointer* to some object in memory." ] }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 57, "metadata": { "slideshow": { "slide_type": "slide" @@ -2008,14 +2052,12 @@ } }, "source": [ - "When referenced, a variable just evaluates to the value of the object it points to. Colloquially, we could say that `a` evaluates to `20.0` here but this would not be an accurate description of what is really going on in memory.\n", - "\n", - "We will see some more colloquial jargons in this section but should always remind ourselves what we better said instead." + "When referenced, a variable evaluates to the value of the object it points to. Colloquially, we could say that `a` evaluates to `20.0` here but this would not be a full description of what is really going on in memory. We will see some more colloquial jargons in this section but should always relate this to what Python actually does in memory." ] }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 58, "metadata": { "slideshow": { "slide_type": "fragment" @@ -2028,7 +2070,7 @@ "20.0" ] }, - "execution_count": 57, + "execution_count": 58, "metadata": {}, "output_type": "execute_result" } @@ -2045,12 +2087,12 @@ } }, "source": [ - "A variable can be **re-assigned** as often as we wish. Thereby, we could also assign an object of a different type. Because this is allowed, Python is said to be a **dynamically typed** language. On the contrary, a **statically typed** language like C also allows re-assignment but only with objects of the same type. This subtle distinction is one reason why Python is slower at execution than C: As it runs a program, it needs to figure out an object's type each time it is referenced. But as mentioned before, this can be mitigated with third-party libraries." + "A variable can be **re-assigned** as often as we wish. Thereby, we could also assign an object of a *different* type. Because this is allowed, Python is said to be a **dynamically typed** language. On the contrary, a **statically typed** language like C also allows re-assignment but only with objects of the *same* type. This subtle distinction is one reason why Python is slower at execution than C: As it runs a program, it needs to figure out an object's type each time it is referenced. But as mentioned before, this can be mitigated with third-party libraries." ] }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 59, "metadata": { "slideshow": { "slide_type": "fragment" @@ -2058,12 +2100,12 @@ }, "outputs": [], "source": [ - "a = 20" + "a = 20 # this makes a point to an object of a different type" ] }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 60, "metadata": { "slideshow": { "slide_type": "-" @@ -2076,7 +2118,7 @@ "20" ] }, - "execution_count": 59, + "execution_count": 60, "metadata": {}, "output_type": "execute_result" } @@ -2093,12 +2135,12 @@ } }, "source": [ - "If we want to re-assign a variable while referencing its \"old\" (i.e., current) object, we can also **update** it using a so-called **augmented assignment statement** (*not* operator). This implicitly inserts the current \"value\" as the first token on the right-hand side." + "If we want to re-assign a variable while referencing its \"old\" (i.e., current) object, we can also **update** it using a so-called **[augmented assignment statement](https://docs.python.org/3/reference/simple_stmts.html#augmented-assignment-statements)** (*not* operator), originally introduced with [PEP 203](https://www.python.org/dev/peps/pep-0203/). This implicitly inserts the currently mapped object as the first operand on the right-hand side." ] }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 61, "metadata": { "slideshow": { "slide_type": "slide" @@ -2111,7 +2153,7 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 62, "metadata": { "slideshow": { "slide_type": "-" @@ -2124,7 +2166,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 63, "metadata": { "slideshow": { "slide_type": "-" @@ -2137,7 +2179,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 64, "metadata": { "slideshow": { "slide_type": "-" @@ -2150,7 +2192,7 @@ "42" ] }, - "execution_count": 63, + "execution_count": 64, "metadata": {}, "output_type": "execute_result" } @@ -2167,12 +2209,12 @@ } }, "source": [ - "Variables can be **de-referenced** (i.e., \"deleted\") with the `del` statement. This does *not* \"delete\" the object to which a variable points to. It merely removes the variable from the \"list of all variables\"." + "Variables can be **[de-referenced](https://docs.python.org/3/reference/simple_stmts.html#the-del-statement)** (i.e., \"deleted\") with the `del` statement. This does *not* delete the object to which a variable points to. It merely removes the variable's name from the \"global list of all names\"." ] }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 65, "metadata": { "slideshow": { "slide_type": "slide" @@ -2185,7 +2227,7 @@ "789" ] }, - "execution_count": 64, + "execution_count": 65, "metadata": {}, "output_type": "execute_result" } @@ -2196,7 +2238,7 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 66, "metadata": { "slideshow": { "slide_type": "-" @@ -2215,12 +2257,12 @@ } }, "source": [ - "If we refer to an unknown name, a runtime exception occurs, namely a `NameError`." + "If we refer to an unknown name, a *runtime* error occurs, namely a `NameError`. The `Name` in `NameError` gives a hint as to why we prefer the term *name* over *identifier*: Python just uses it more often in its error messages." ] }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 67, "metadata": { "slideshow": { "slide_type": "-" @@ -2234,7 +2276,7 @@ "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mb\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mb\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mNameError\u001b[0m: name 'b' is not defined" ] } @@ -2251,12 +2293,12 @@ } }, "source": [ - "Some names magically exist when we start Python. In this introductory book, we can safely ignore such variables." + "Some variables magically exist when we start a Python process or are added by Jupyter. We can safely ignore the former until Chapter 10 and the latter for good." ] }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 68, "metadata": { "slideshow": { "slide_type": "skip" @@ -2269,7 +2311,7 @@ "'__main__'" ] }, - "execution_count": 67, + "execution_count": 68, "metadata": {}, "output_type": "execute_result" } @@ -2291,7 +2333,7 @@ }, { "cell_type": "code", - "execution_count": 68, + "execution_count": 69, "metadata": { "slideshow": { "slide_type": "slide" @@ -2321,15 +2363,15 @@ " '_22',\n", " '_23',\n", " '_24',\n", - " '_26',\n", + " '_25',\n", " '_27',\n", " '_28',\n", - " '_30',\n", + " '_29',\n", " '_31',\n", " '_32',\n", " '_33',\n", " '_34',\n", - " '_36',\n", + " '_35',\n", " '_37',\n", " '_38',\n", " '_39',\n", @@ -2337,17 +2379,17 @@ " '_40',\n", " '_41',\n", " '_42',\n", - " '_48',\n", + " '_43',\n", " '_49',\n", " '_5',\n", - " '_52',\n", - " '_54',\n", - " '_57',\n", - " '_59',\n", - " '_63',\n", + " '_50',\n", + " '_53',\n", + " '_55',\n", + " '_58',\n", + " '_60',\n", " '_64',\n", - " '_67',\n", - " '_8',\n", + " '_65',\n", + " '_68',\n", " '_9',\n", " '__',\n", " '___',\n", @@ -2425,6 +2467,7 @@ " '_i66',\n", " '_i67',\n", " '_i68',\n", + " '_i69',\n", " '_i7',\n", " '_i8',\n", " '_i9',\n", @@ -2447,7 +2490,7 @@ " 'total']" ] }, - "execution_count": 68, + "execution_count": 69, "metadata": {}, "output_type": "execute_result" } @@ -2477,20 +2520,7 @@ "source": [ "It is important to understand that *several* variables can point to the *same* object in memory. This can be counter-intuitive in the beginning and lead to many hard to track down bugs.\n", "\n", - "This makes `b` point to whatever object `a` points to." - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "outputs": [], - "source": [ - "b = a # this is different from b == a" + "This makes `b` point to whatever object `a` is pointing to." ] }, { @@ -2498,23 +2528,12 @@ "execution_count": 70, "metadata": { "slideshow": { - "slide_type": "-" + "slide_type": "slide" } }, - "outputs": [ - { - "data": { - "text/plain": [ - "42" - ] - }, - "execution_count": 70, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "a" + "b = a" ] }, { @@ -2537,6 +2556,30 @@ "output_type": "execute_result" } ], + "source": [ + "a" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "42" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "b" ] @@ -2549,14 +2592,14 @@ } }, "source": [ - "For \"simple\" types like `int` or `float` this will never cause confusion.\n", + "For \"simple\" types like `int` or `float` this will never cause troubles.\n", "\n", "Let's \"change the value\" of `a`. Really, let's create a *new* `123` object and make `a` point to it." ] }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 73, "metadata": { "slideshow": { "slide_type": "fragment" @@ -2569,7 +2612,7 @@ }, { "cell_type": "code", - "execution_count": 73, + "execution_count": 74, "metadata": { "slideshow": { "slide_type": "-" @@ -2582,7 +2625,7 @@ "123" ] }, - "execution_count": 73, + "execution_count": 74, "metadata": {}, "output_type": "execute_result" } @@ -2604,7 +2647,7 @@ }, { "cell_type": "code", - "execution_count": 74, + "execution_count": 75, "metadata": { "slideshow": { "slide_type": "-" @@ -2617,7 +2660,7 @@ "42" ] }, - "execution_count": 74, + "execution_count": 75, "metadata": {}, "output_type": "execute_result" } @@ -2634,12 +2677,12 @@ } }, "source": [ - "However, if a name points to an object of a more \"complex\" object, for example, of type `list`, \"weird\" things can happen." + "However, if a variable points to an object of a more \"complex\" type (e.g., `list`), \"weird\" things can happen." ] }, { "cell_type": "code", - "execution_count": 75, + "execution_count": 76, "metadata": { "slideshow": { "slide_type": "slide" @@ -2652,7 +2695,7 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 77, "metadata": { "slideshow": { "slide_type": "skip" @@ -2665,7 +2708,7 @@ "list" ] }, - "execution_count": 76, + "execution_count": 77, "metadata": {}, "output_type": "execute_result" } @@ -2676,7 +2719,7 @@ }, { "cell_type": "code", - "execution_count": 77, + "execution_count": 78, "metadata": { "slideshow": { "slide_type": "-" @@ -2697,14 +2740,14 @@ "source": [ "Let's change the first element of `x`.\n", "\n", - "Chapter 7 discusses lists in more depth. For now, let's just view a list as some sort of **container** object that holds an arbitrary number of pointers to other objects and treat the brackets `[...]` attached to `x` as just another operator, called the **indexing operator**. `x[0]` instructs Python to first \"follow\" the pointer from the \"global\" directory of all names to the list object. Then, it follows the first pointer it finds there to the `1` object. The indexing operator must be an operator as we merely read the first element an do not change anything in memory.\n", + "Chapter 7 discusses lists in more depth. For now, let's just view a `list` object as some sort of **container** that holds an arbitrary number of pointers to other objects and treat the brackets `[]` attached to it as just another operator, called the **indexing operator**. `x[0]` instructs Python to first follow the pointer from the global list of all names to the `x` object. Then, it follows the first pointer it finds there to the `1` object. The indexing operator must be an operator as we merely read the first element and do not change anything in memory.\n", "\n", - "Note how Python **begins counting at 0**. This is not the case for many other languages, for example, MATLAB, R, or Stata. To understand why this makes sense, see this short [note](https://www.cs.utexas.edu/users/EWD/transcriptions/EWD08xx/EWD831.html) by one of the all-time greats in computer science, the late [Edsger Dijkstra](https://en.wikipedia.org/wiki/Edsger_W._Dijkstra)." + "Note how Python **begins counting at 0**. This is not the case for many other languages, for example, [MATLAB](https://en.wikipedia.org/wiki/MATLAB), [R](https://en.wikipedia.org/wiki/R_%28programming_language%29), or [Stata](https://en.wikipedia.org/wiki/Stata). To understand why this makes sense, see this short [note](https://www.cs.utexas.edu/users/EWD/transcriptions/EWD08xx/EWD831.html) by one of the all-time greats in computer science, the late [Edsger Dijkstra](https://en.wikipedia.org/wiki/Edsger_W._Dijkstra)." ] }, { "cell_type": "code", - "execution_count": 78, + "execution_count": 79, "metadata": { "slideshow": { "slide_type": "fragment" @@ -2717,7 +2760,7 @@ "1" ] }, - "execution_count": 78, + "execution_count": 79, "metadata": {}, "output_type": "execute_result" } @@ -2734,12 +2777,12 @@ } }, "source": [ - "To change the first entry in the list, we use the assignment statement `=` again. Here, this does actually *not* create a *new* variable but only changes the object to which the first pointer in the `x` list points to. As we only change parts of the `x` list, we say that we change its **state**." + "To change the first entry in the list, we use the assignment statement `=` again. Here, this does actually *not* create a *new* variable (or overwrite an existing one) but only changes the object to which the first pointer in the `x` list points to. As we only change parts of the `x` object, we say that we **mutate** (i.e., \"change\") its **state**. To use the bag analogy from above, we keep the same bag but \"flip\" some of the $0$s into $1$s and some of the $1$s into $0$s." ] }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 80, "metadata": { "slideshow": { "slide_type": "slide" @@ -2752,7 +2795,7 @@ }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 81, "metadata": { "slideshow": { "slide_type": "-" @@ -2765,7 +2808,7 @@ "[99, 2, 3]" ] }, - "execution_count": 80, + "execution_count": 81, "metadata": {}, "output_type": "execute_result" } @@ -2782,12 +2825,12 @@ } }, "source": [ - "The changes made to `x` can also be seen through the `y` variable." + "The changes made to the object `x` is pointing to can also be seen through the `y` variable!" ] }, { "cell_type": "code", - "execution_count": 81, + "execution_count": 82, "metadata": { "slideshow": { "slide_type": "fragment" @@ -2800,7 +2843,7 @@ "[99, 2, 3]" ] }, - "execution_count": 81, + "execution_count": 82, "metadata": {}, "output_type": "execute_result" } @@ -2823,7 +2866,7 @@ "\n", "In the second case, `x[0] = 99` creates a *new* integer object `99` and merely changes the first pointer in the `x` list.\n", "\n", - "In general, the assignment statement (re-)creates a variable and makes it point to whatever object is on the right-hand side *if* the left-hand side is a *pure* variable name. Otherwise, it changes some object on the left-hand side (this is strictly not a must but we should expect it).\n", + "In general, the assignment statement creates (or overwrites) a variable and makes it point to whatever object is on the right-hand side *only if* the left-hand side is a *pure* name (i.e., it contains no operators like the indexing operator in the example). Otherwise, it mutates some already existing object. And we always have to expect that the latter might have more than one variable pointing at it.\n", "\n", "In the beginning, visualizing the memory with a tool like [PythonTutor](http://pythontutor.com/visualize.html#code=x%20%3D%20%5B1,%202,%203%5D%0Ay%20%3D%20x%0Ax%5B0%5D%20%3D%2099%0Adel%20x,%20y%0Ax%20%3D%20%5B1,%202,%203%5D%0Ay%20%3D%20x.copy%28%29%0Ax%5B0%5D%20%3D%2099&cumulative=false&curInstr=0&heapPrimitives=nevernest&mode=display&origin=opt-frontend.js&py=3&rawInputLstJSON=%5B%5D&textReferences=false) will assist in understanding what is going on." ] @@ -2847,9 +2890,9 @@ } }, "source": [ - "**[Phil Karlton](https://skeptics.stackexchange.com/questions/19836/has-phil-karlton-ever-said-there-are-only-two-hard-things-in-computer-science)** famously noted (during his time at [Netscape](https://en.wikipedia.org/wiki/Netscape)):\n", + "[Phil Karlton](https://skeptics.stackexchange.com/questions/19836/has-phil-karlton-ever-said-there-are-only-two-hard-things-in-computer-science) famously noted during his time at [Netscape](https://en.wikipedia.org/wiki/Netscape):\n", "\n", - "> \"There are two hard problems in computer science: naming things and cache invalidation ... and off-by-one errors.\"" + "> \"There are *two* hard problems in computer science: *naming things* and *cache invalidation* ... and *off-by-one* errors.\"" ] }, { @@ -2880,7 +2923,7 @@ }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 83, "metadata": { "slideshow": { "slide_type": "slide" @@ -2893,7 +2936,7 @@ }, { "cell_type": "code", - "execution_count": 83, + "execution_count": 84, "metadata": { "slideshow": { "slide_type": "-" @@ -2906,7 +2949,7 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": 85, "metadata": { "slideshow": { "slide_type": "-" @@ -2919,7 +2962,7 @@ }, { "cell_type": "code", - "execution_count": 85, + "execution_count": 86, "metadata": { "slideshow": { "slide_type": "-" @@ -2943,7 +2986,7 @@ }, { "cell_type": "code", - "execution_count": 86, + "execution_count": 87, "metadata": { "slideshow": { "slide_type": "skip" @@ -2956,7 +2999,7 @@ }, { "cell_type": "code", - "execution_count": 87, + "execution_count": 88, "metadata": { "slideshow": { "slide_type": "skip" @@ -2969,7 +3012,7 @@ }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 89, "metadata": { "slideshow": { "slide_type": "skip" @@ -2977,12 +3020,12 @@ }, "outputs": [], "source": [ - "name = \"Alexander\" # name of what ?" + "name = \"Alexander\" # name of what?" ] }, { "cell_type": "code", - "execution_count": 89, + "execution_count": 90, "metadata": { "slideshow": { "slide_type": "skip" @@ -2991,10 +3034,10 @@ "outputs": [ { "ename": "SyntaxError", - "evalue": "can't assign to operator (, line 1)", + "evalue": "can't assign to operator (, line 1)", "output_type": "error", "traceback": [ - "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m address@work = \"Burgplatz 2, Vallendar\"\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m can't assign to operator\n" + "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m address@work = \"Burgplatz 2, Vallendar\"\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m can't assign to operator\n" ] } ], @@ -3015,7 +3058,7 @@ }, { "cell_type": "code", - "execution_count": 90, + "execution_count": 91, "metadata": { "slideshow": { "slide_type": "skip" @@ -3034,12 +3077,12 @@ } }, "source": [ - "Variables with leading and trailing double underscores (referred to as **dunder** in Python \"slang\") are used for important built-in variables. Do *not* use this style for custom variables!" + "Variables with leading and trailing double underscores, referred to as **dunder** in Python jargon, are used for important built-in functionalities. Do *not* use this style for custom variables unless you know exactly what you are doing!" ] }, { "cell_type": "code", - "execution_count": 91, + "execution_count": 92, "metadata": { "slideshow": { "slide_type": "skip" @@ -3052,7 +3095,7 @@ "'__main__'" ] }, - "execution_count": 91, + "execution_count": 92, "metadata": {}, "output_type": "execute_result" } @@ -3080,12 +3123,12 @@ } }, "source": [ - "This PyCon talk by [Ned Batchelder](https://nedbatchelder.com/) (a software engineer at [edX](https://www.edx.org/) and the organizer of the [Python User Group](https://www.meetup.com/bostonpython/) in Boston) summarizes all situations where some sort of variable assignment is done in Python. The content is intermediate and therefore it is ok if you do not understand everything at this point. However, the contents should be known by everyone claiming to be a Pythonista." + "This PyCon talk by [Ned Batchelder](https://nedbatchelder.com/), a well-known Pythonista and the organizer of the [Python User Group](https://www.meetup.com/bostonpython/) in Boston, summarizes all situations where some sort of variable assignment is done in Python. The content is intermediate and therefore it is ok if you do not understand everything at this point. However, the contents should be known by everyone claiming to be proficient in Python." ] }, { "cell_type": "code", - "execution_count": 92, + "execution_count": 93, "metadata": { "slideshow": { "slide_type": "skip" @@ -3107,10 +3150,10 @@ " " ], "text/plain": [ - "" + "" ] }, - "execution_count": 92, + "execution_count": 93, "metadata": {}, "output_type": "execute_result" } @@ -3139,18 +3182,18 @@ } }, "source": [ - "An **expression** is any syntactically correct **combination** of **variables** and **literals** with **operators**. See the [language reference](https://docs.python.org/3/reference/expressions.html) for a full list.\n", + "An **[expression](https://docs.python.org/3/reference/expressions.html)** is any syntactically correct **combination** of **variables** and **literals** with **operators**.\n", "\n", "In simple words, anything that can be used on the right-hand side of an assignment statement without creating a `SyntaxError` is an expression.\n", "\n", - "What we said about individual operators above (namely that they have *no* side effects) should have been put here to begin with. The examples in the section on operators were actually all expressions.\n", + "What we said about individual operators above, namely that they have *no* side effects, should have been put here to begin with. The examples in the section on operators above were actually all expressions!\n", "\n", "The simplest possible expression contains only one variable (or literal)." ] }, { "cell_type": "code", - "execution_count": 93, + "execution_count": 94, "metadata": { "slideshow": { "slide_type": "slide" @@ -3163,7 +3206,7 @@ "123" ] }, - "execution_count": 93, + "execution_count": 94, "metadata": {}, "output_type": "execute_result" } @@ -3174,7 +3217,7 @@ }, { "cell_type": "code", - "execution_count": 94, + "execution_count": 95, "metadata": { "slideshow": { "slide_type": "-" @@ -3187,7 +3230,7 @@ "165" ] }, - "execution_count": 94, + "execution_count": 95, "metadata": {}, "output_type": "execute_result" } @@ -3204,12 +3247,12 @@ } }, "source": [ - "The definition of an expression is **recursive**. So here the sub-expression `a + b` is combined with the literal `3` by the operator `**` to form another expression." + "The definition of an expression is **recursive**. So here the sub-expression `a + b` is combined with the literal `3` by the operator `**` to form the full expression." ] }, { "cell_type": "code", - "execution_count": 95, + "execution_count": 96, "metadata": { "slideshow": { "slide_type": "-" @@ -3222,7 +3265,7 @@ "4492125" ] }, - "execution_count": 95, + "execution_count": 96, "metadata": {}, "output_type": "execute_result" } @@ -3239,12 +3282,12 @@ } }, "source": [ - "As before, the bracket operator `[...]` can be used for indexing." + "Here, the variable `y` is combined with the literal `2` by the indexing operator `[]`. The resulting expression evaluates to the " ] }, { "cell_type": "code", - "execution_count": 96, + "execution_count": 97, "metadata": { "slideshow": { "slide_type": "-" @@ -3257,7 +3300,7 @@ "3" ] }, - "execution_count": 96, + "execution_count": 97, "metadata": {}, "output_type": "execute_result" } @@ -3274,12 +3317,12 @@ } }, "source": [ - "When not used as a **delimiter**, parentheses also constitute an operator, namely the **call operator** `(...)`. We have seen this syntax above when we \"called\" (i.e., executed) built-in functions and methods." + "When not used as a **delimiter**, parentheses also constitute an operator, namely the **call operator** `()`. We have seen this syntax above when we \"called\" (i.e., executed) built-in functions and methods." ] }, { "cell_type": "code", - "execution_count": 97, + "execution_count": 98, "metadata": { "slideshow": { "slide_type": "-" @@ -3292,7 +3335,7 @@ "104" ] }, - "execution_count": 97, + "execution_count": 98, "metadata": {}, "output_type": "execute_result" } @@ -3325,7 +3368,7 @@ }, { "cell_type": "code", - "execution_count": 98, + "execution_count": 99, "metadata": { "slideshow": { "slide_type": "slide" @@ -3339,7 +3382,7 @@ }, { "cell_type": "code", - "execution_count": 99, + "execution_count": 100, "metadata": { "slideshow": { "slide_type": "fragment" @@ -3352,7 +3395,7 @@ "'Hi class'" ] }, - "execution_count": 99, + "execution_count": 100, "metadata": {}, "output_type": "execute_result" } @@ -3374,7 +3417,7 @@ }, { "cell_type": "code", - "execution_count": 100, + "execution_count": 101, "metadata": { "slideshow": { "slide_type": "fragment" @@ -3387,7 +3430,7 @@ "'Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi Hi '" ] }, - "execution_count": 100, + "execution_count": 101, "metadata": {}, "output_type": "execute_result" } @@ -3415,14 +3458,14 @@ } }, "source": [ - "A **statement** is anything that changes the state of the program's memory or has some other side effect. Statements do not just evaluate to a value like expressions; instead, they create or change values. See the [language reference](https://docs.python.org/3/reference/simple_stmts.html) for a full list.\n", + "A **[statement](https://docs.python.org/3/reference/simple_stmts.html)** is anything that changes the state of the program's memory or has some other side effect. Statements do not just evaluate to a value like expressions; instead, they create or change values.\n", "\n", "Most notably of course are the `=` and `del` statements." ] }, { "cell_type": "code", - "execution_count": 101, + "execution_count": 102, "metadata": { "slideshow": { "slide_type": "slide" @@ -3435,7 +3478,7 @@ }, { "cell_type": "code", - "execution_count": 102, + "execution_count": 103, "metadata": { "slideshow": { "slide_type": "-" @@ -3459,7 +3502,7 @@ }, { "cell_type": "code", - "execution_count": 103, + "execution_count": 104, "metadata": { "slideshow": { "slide_type": "skip" @@ -3506,7 +3549,7 @@ }, { "cell_type": "code", - "execution_count": 104, + "execution_count": 105, "metadata": { "slideshow": { "slide_type": "slide" @@ -3534,7 +3577,7 @@ }, { "cell_type": "code", - "execution_count": 105, + "execution_count": 106, "metadata": { "slideshow": { "slide_type": "fragment" @@ -3547,7 +3590,7 @@ }, { "cell_type": "code", - "execution_count": 106, + "execution_count": 107, "metadata": { "slideshow": { "slide_type": "-" @@ -3655,7 +3698,7 @@ "\n", "- flow control (cf., Chapter 3)\n", " - expression of **logic** or an **algorithm**\n", - " - conditional execution of a small **branch** within a program (i.e., `if`-statements)\n", + " - conditional execution of a small **branch** within a program (i.e., `if` statements)\n", " - repetitive execution of parts of a program (i.e., `for`-loops and `while`-loops)" ] } diff --git a/01_elements_of_a_program_review_and_exercises.ipynb b/01_elements_review_and_exercises.ipynb similarity index 92% rename from 01_elements_of_a_program_review_and_exercises.ipynb rename to 01_elements_review_and_exercises.ipynb index 088001b..2207ccc 100644 --- a/01_elements_of_a_program_review_and_exercises.ipynb +++ b/01_elements_review_and_exercises.ipynb @@ -19,7 +19,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Read chapter 1 of the book. Then work through the ten review questions." + "Read Chapter 1 of the book. Then work through the ten review questions." ] }, { @@ -180,7 +180,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Q10**: [PEP 8](https://www.python.org/dev/peps/pep-0008/) suggests that developers use **$8$ spaces** per level of indentation." + "**Q10**: [PEP 8](https://www.python.org/dev/peps/pep-0008/) suggests that developers use **8 spaces** per level of indentation." ] }, { @@ -263,7 +263,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Q11.3**: Lastly, what does the `end=\"\\n\"` mean in the documentation? Use it in the `for`-loop to print the numbers $1$ through $10$ in just one line." + "**Q11.3**: Lastly, what does the `end=\"\\n\"` mean in the documentation? Use it in the `for`-loop to print the numbers 1 through 10 in just one line." ] }, { @@ -298,7 +298,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Q11.1**: First, create a list `numbers` with the numbers from $1$ through $100$. You could type all numbers manually but there is of course a smarter way. The built-in [range()](https://docs.python.org/3/library/functions.html#func-range) may be useful here. Read how it works in the documentation. To make the output of [range()](https://docs.python.org/3/library/functions.html#func-range) a `list` object, you have to \"wrap\" it with the [list()](https://docs.python.org/3/library/functions.html#func-list) built-in (i.e., `list(range(...))`)." + "**Q11.1**: First, create a list `numbers` with the numbers from 1 through 100. You could type all numbers manually but there is of course a smarter way. The built-in [range()](https://docs.python.org/3/library/functions.html#func-range) may be useful here. Read how it works in the documentation. To make the output of [range()](https://docs.python.org/3/library/functions.html#func-range) a `list` object, you have to \"wrap\" it with the [list()](https://docs.python.org/3/library/functions.html#func-list) built-in (i.e., `list(range(...))`)." ] }, { @@ -335,7 +335,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**Q11.3**: Create a loop that prints out either the number or any of the Fizz Buzz substitutes. Do it in such a way that we do not end up with $100$ lines of output here." + "**Q11.3**: Create a loop that prints out either the number or any of the Fizz Buzz substitutes. Do it in such a way that we do not end up with 100 lines of output here." ] }, { diff --git a/02_functions.ipynb b/02_functions.ipynb new file mode 100644 index 0000000..2c284f9 --- /dev/null +++ b/02_functions.ipynb @@ -0,0 +1,3478 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "# Chapter 2: Functions & Modularization" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "In Chapter 1 we typed the **business logic** of our little program to calculate the mean of a subset of a list of numbers right into the code cells. Then, we executed them one after another. We had no way of **re-using** the code except for either re-executing the cells or copying and pasting their contents into other cells. And whenever we find ourselves doing repetitive manual work, we can be sure that there must be a way of automating what we are doing.\n", + "\n", + "At the same time, we executed built-in functions (e.g., [print()](https://docs.python.org/3/library/functions.html#print), [sum()](https://docs.python.org/3/library/functions.html#sum), [len()](https://docs.python.org/3/library/functions.html#len), [id()](https://docs.python.org/3/library/functions.html#id), or [type()](https://docs.python.org/3/library/functions.html#type)) that obviously must be re-using the same parts inside core Python every time we use them.\n", + "\n", + "This chapter shows how Python offers language constructs that let us **define** our own functions that we can then **call** just like the built-in ones." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Function Definition" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "So-called **[user-defined functions](https://docs.python.org/3/reference/compound_stmts.html#function-definitions)** can be created with the `def` statement. To extend an already familiar example, we re-use the introductory example from Chapter 1 in its final Pythonic version and transform it into the function `average_evens()` below. \n", + "\n", + "A function's **name** must be chosen according to the same naming rules as for ordinary variables. In fact, Python manages function names just like variables. In this book, we further adopt the convention of ending function names with parentheses \"`()`\" in text cells for faster comprehension when reading (i.e., `average_evens()` vs. `average_evens`). These are not actually part of the name but must always be written out in the `def` statement for syntactic reasons.\n", + "\n", + "Functions may define an arbitrary number of **parameters** as inputs that can then be referenced within the indented **code block**: They are simply listed within the parentheses in the `def` statement (i.e., `numbers` below). \n", + "\n", + "The code block is often also called a function's **body** while the first line with the `def` in it is the **header** and must end with a colon.\n", + "\n", + "Together, the name and the list of parameters are also referred to as the function's **[signature](https://en.wikipedia.org/wiki/Type_signature)** (i.e., `average_evens(numbers)` below).\n", + "\n", + "A function may come with an *explicit* **[return value](https://docs.python.org/3/reference/simple_stmts.html#the-return-statement)** (i.e., \"result\" or \"output\") specified with the `return` statement: Functions that have one are considered **fruitful**; otherwise, they are **void**. Functions of the latter kind are still useful because of their **side effects** (e.g., the [print()](https://docs.python.org/3/library/functions.html#print) built-in). Strictly speaking, they also have an *implicit* return value of `None` that is different from the `False` we saw in Chapter 1.\n", + "\n", + "To maintain good coding practices, a function should define a **docstring** that describes what it does in a short subject line, what parameters it expects (i.e., their types), and what it returns (if anything). A docstring is a syntactically valid multi-line string (i.e., type `str`) defined with **triple-double quotes** (strings are covered in depth in Chapter 6). Good standards as to how to format a docstring are [PEP 257](https://www.python.org/dev/peps/pep-0257/) and section 3.8 in [Google's Python Style Guide](https://github.com/google/styleguide/blob/gh-pages/pyguide.md)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "def average_evens(numbers):\n", + " \"\"\"Calculate the average of all even numbers in a list.\n", + "\n", + " Args:\n", + " numbers (list): a list of numbers; may be integers or floats\n", + "\n", + " Returns:\n", + " float: average\n", + " \"\"\"\n", + " evens = [n for n in numbers if n % 2 == 0]\n", + " average = sum(evens) / len(evens)\n", + " return average" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Once defined, a function can be referenced just like any other variable by its name (i.e., *without* the parenthesis). Its value might seem awkward at first: It consists of the location where we defined the function (i.e., `__main__`, which is Python's way of saying \"in this notebook\") and the signature." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "average_evens" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "A function is an **object** on its own with an **identity** (i.e., memory location) and a **type**, namely `function`." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "139655430681056" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "id(average_evens)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "function" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(average_evens)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "The built-in [help()](https://docs.python.org/3/library/functions.html#help) function shows a function's docstring.\n", + "\n", + "Whenever we use code to analyze or obtain information on an object, we say that we **[introspect](https://en.wikipedia.org/wiki/Type_introspection)** it." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on function average_evens in module __main__:\n", + "\n", + "average_evens(numbers)\n", + " Calculate the average of all even numbers in a list.\n", + " \n", + " Args:\n", + " numbers (list): a list of numbers; may be integers or floats\n", + " \n", + " Returns:\n", + " float: average\n", + "\n" + ] + } + ], + "source": [ + "help(average_evens)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "In a Jupyter notebook, we can just as well add a question mark to a function's name to achieve the same. Then, a small tab opens in our browser." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "outputs": [], + "source": [ + "average_evens?" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Two questions marks even show a function's source code." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "outputs": [], + "source": [ + "average_evens??" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Function Calls" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Once defined we can **call** (i.e., \"execute\") a function with the **call operator** `()`. The formal parameters are filled in by passing variables or expressions as **arguments** to the function within the parentheses." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "nums = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "6.0" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "average_evens(nums)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "The return value is usually assigned to a new variable for later reference. Otherwise we would loose access to it in memory right away." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "avg = average_evens(nums)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "6.0" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "avg" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Scoping Rules" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Local Scope disappears" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Notice how the parameters listed in a function's definition (i.e., `numbers`) and variables created inside it during execution (i.e., `evens` and `average`) are **local** to that function. That means they are only mapped to an object in memory while the function is being executed and de-referenced immediately when the function returns. We say they **go out of scope** once the function terminates." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'numbers' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mnumbers\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'numbers' is not defined" + ] + } + ], + "source": [ + "numbers" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'evens' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mevens\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'evens' is not defined" + ] + } + ], + "source": [ + "evens" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'average' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0maverage\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'average' is not defined" + ] + } + ], + "source": [ + "average" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Global Scope is everywhere" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "On the contrary, while a function is being executed, it can \"see\" the variables of the **enclosing scope** (i.e., \"outside\" of it). This is a common source of *semantic* errors. Consider the following stylized (and incorrect) example `average_wrong()`. The error is hard to spot with eyes: The function never references the `numbers` parameter but the `nums` variable in the **global scope** instead." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "def average_wrong(numbers):\n", + " \"\"\"Calculate the average of all even numbers in a list.\n", + "\n", + " Args:\n", + " numbers (list): a list of numbers; may be integers or floats\n", + "\n", + " Returns:\n", + " float: average\n", + " \"\"\"\n", + " evens = [n for n in nums if n % 2 == 0] # should reference numbers, not nums\n", + " average = sum(evens) / len(evens)\n", + " return average" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nums" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "6.0" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "average_wrong(nums) # the result is correct by accident!" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "6.0" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "average_wrong([123, 456, 789])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Also, observe how both `average_evens()` and `average_wrong()` use the same names for their respective parameters and variables internally. For sure, Python is smart enough to not mix them up. This is because each function call creates a temporary **[namespace](https://en.wikipedia.org/wiki/Namespace)** that *isolates* the local scope's names for usage only from within the function. As we saw in the [Zen of Python](https://www.python.org/dev/peps/pep-0020/), \"namespaces are one honking great idea\" (cf., `import this`)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Shadowing" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Code gets even more confusing when variables by the same name from different scopes collide. In particular, what should we expect to happen if a function changes a globally defined variable internally?\n", + "\n", + "`average_odds()` below works like `average_evens()` above except that it **[casts](https://en.wikipedia.org/wiki/Type_conversion)** (i.e., \"converts\") the elements of `numbers` as objects of type `int` with the [int()](https://docs.python.org/3/library/functions.html#int) built-in first before filtering and averaging them. In doing so, it introduces an *internal* variable `nums` whose name collides with the one in the global scope. The **inequality operator** `!=` is just the **reversed** version of `==`." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "def average_odds(numbers):\n", + " \"\"\"Calculate the average of all odd numbers in a list.\n", + "\n", + " Args:\n", + " numbers (list): a list of numbers; must be integers\n", + "\n", + " Returns:\n", + " float: average\n", + " \"\"\"\n", + " nums = [int(n) for n in numbers] # cast all numbers as integers first\n", + " odds = [n for n in nums if n % 2 != 0] # before filtering for odd numbers\n", + " average = sum(odds) / len(odds)\n", + " return average" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "`nums` in the global scope is of course the same list from above." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nums" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "As good practice, let's first use inputs for which we can calculate the answer in our heads to verify that `average_odds()` is correct." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "3.0" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "average_odds([1, 100, 3, 100, 5]) # verify the function's correctness with predictable inputs" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "To make the confusion even bigger, let's also pass the global `nums` as an argument to `average_odds()`." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "5.0" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "average_odds(nums)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Python, however, is again smart enough to keep the two `nums` variables apart. So the global `nums` is still pointing to the very same list object as before." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nums" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "The reason why everything works just fine is that *every time* we (re-)assign an object to a variable inside a function with the `=` statement, this is done in the local scope by default. There are ways to change variables existing in an outer scope from within a function but we save that for a later chapter.\n", + "\n", + "Variables whose names collide with the ones of variables in enclosing scopes - and the global scope is just the most enclosing scope - are said to **shadow** them.\n", + "\n", + "While this is not a problem for Python as we have observed, it may lead to less readable code for us humans and should be avoided if possible. But, as we have also heard, \"[naming things](https://skeptics.stackexchange.com/questions/19836/has-phil-karlton-ever-said-there-are-only-two-hard-things-in-computer-science)\" is often considered hard as well and we have to be prepared to encounter shadowing variables." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Built-in Functions" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Python comes with plenty of useful functions built in, some of which we have already seen before. The [documentation](https://docs.python.org/3/library/functions.html) has the full list. Just as core Python itself, they are implemented in C and thus very fast.\n", + "\n", + "[len()](https://docs.python.org/3/library/functions.html#len) counts the number of elements in a container object while [sum()](https://docs.python.org/3/library/functions.html#sum) adds up all the elements." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "10" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(nums)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "55" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sum(nums)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "We can cast certain objects as a different type. For example, to \"convert\" a float or a text into an integer, we use the [int()](https://docs.python.org/3/library/functions.html#int) built-in. This actually creates a *new* object of type `int` from the provided `avg` or `\"6\"` objects who continue to exist in memory unchanged." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "6.0" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "avg" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "6" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "int(avg)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "6" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "int(\"6\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Observe that casting as an integer is different from rounding with the [round()](https://docs.python.org/3/library/functions.html#round) built-in function." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "7" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "int(7.99)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "8" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "round(7.99)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Not all conversions are valid and *runtime* errors can occur as the `ValueError` shows." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "ename": "ValueError", + "evalue": "invalid literal for int() with base 10: 'six'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"six\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mValueError\u001b[0m: invalid literal for int() with base 10: 'six'" + ] + } + ], + "source": [ + "int(\"six\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "We can also go in the other direction with the [float()](https://docs.python.org/3/library/functions.html#float) built-in function." + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "42.0" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "float(42)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Positional vs. Keyword Arguments" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "So far we have only specified one parameter in each of our user-defined functions. In Chapter 1, however, we saw the built-in function [divmod()](https://docs.python.org/3/library/functions.html#divmod) taking two arguments. Obviously, the order of the numbers passed in mattered. Whenever we call a function and list its arguments in a comma seperated manner, we say that we pass in the arguments by position or refer to them as **positional arguments**." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(4, 2)" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "divmod(42, 10)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(0, 10)" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "divmod(10, 42)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "For many functions there is a natural order to the arguments. But what if this is not the case? For example, let's create a close relative of the above `average_evens()` function that also scales the resulting average by a factor. What is more natural? Passing in `numbers` first? Or `scalar`? There is no obvious way and we continue with the first alternative for no real reason." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "def scaled_average_evens(numbers, scalar):\n", + " \"\"\"Calculate a scaled average of all even numbers in a list.\n", + "\n", + " Args:\n", + " numbers (list): a list of numbers; may be integers or floats\n", + " scalar (float): the scalar that multiplies the average\n", + " of the even numbers\n", + "\n", + " Returns:\n", + " float: scaled average\n", + " \"\"\"\n", + " evens = [n for n in numbers if n % 2 == 0]\n", + " average = sum(evens) / len(evens)\n", + " return scalar * average" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "As with [divmod()](https://docs.python.org/3/library/functions.html#divmod), we can pass in the arguments by position." + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "12.0" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scaled_average_evens(nums, 2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "However, now the function call is a bit harder to comprehend as we need to always remember what the `2` means. This becomes even harder the more parameters we specify.\n", + "\n", + "Luckily, we can also reference the formal parameter names as **keyword arguments**. We can even combine positional and keyword arguments in the same function call. Each of the following does the exact same thing." + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "12.0" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scaled_average_evens(nums, scalar=2)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "12.0" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scaled_average_evens(numbers=nums, scalar=2)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "12.0" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scaled_average_evens(scalar=2, numbers=nums)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Unfortunately, there are ways to screw this up with a `SyntaxError`: If positional and keyword arguments are mixed, the keyword arguments *must* come last." + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "ename": "SyntaxError", + "evalue": "positional argument follows keyword argument (, line 1)", + "output_type": "error", + "traceback": [ + "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m scaled_average_evens(numbers=nums, 2)\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m positional argument follows keyword argument\n" + ] + } + ], + "source": [ + "scaled_average_evens(numbers=nums, 2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Default Argument Values" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Defining both `average_evens()` and `scaled_average_evens()` is also kind of repetitive as most of their code is the same. Such a redundancy will make a code base harder to maintain in the long run as whenever we change the logic in one function we must *not* forget to do so for the other function as well.\n", + "\n", + "A better way is to design related functions in a **modular** fashion such that they re-use each other's logic.\n", + "\n", + "For example, as not scaling an average is just a special case of scaling it with `1`, we could re-define the two functions like below: In this setting, the function resembling the *special* case (i.e., `average_evens()`) simply **forwards** the call to the more *general* function (i.e., `scaled_average_evens()`) using a `scalar=1` argument." + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "def scaled_average_evens(numbers, scalar):\n", + " \"\"\"Calculate a scaled average of all even numbers in a list.\n", + "\n", + " ...\n", + " \"\"\"\n", + " evens = [n for n in numbers if n % 2 == 0]\n", + " average = sum(evens) / len(evens)\n", + " return scalar * average" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [], + "source": [ + "def average_evens(numbers):\n", + " \"\"\"Calculate the average of all even numbers in a list.\n", + "\n", + " ...\n", + " \"\"\"\n", + " return scaled_average_evens(numbers, scalar=1) # refactored to use the logic in scaled_average_evens()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "The outcome of `average_evens(nums)` is of course still `6.0`." + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "6.0" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "average_evens(nums)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "If we *assume* that scaling the average occurs rarely, we could also handle both cases in *one* function definition by providing a **default value** for the `scalar` parameter." + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "def average_evens(numbers, scalar=1):\n", + " \"\"\"Calculate the average of all even numbers in a list.\n", + "\n", + " Args:\n", + " numbers (list): list of numbers; may be integers or floats\n", + " scalar (float, optional): the scalar that multiplies the\n", + " average of the even numbers\n", + "\n", + " Returns:\n", + " float: (scaled) average\n", + " \"\"\"\n", + " evens = [n for n in numbers if n % 2 == 0]\n", + " average = sum(evens) / len(evens)\n", + " return scalar * average" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Now we can call the function either with or without the `scalar` argument.\n", + "\n", + "If `scalar` is passed in, this can be done as either a positional or a keyword argument. Which of the two versions where `scalar` is `2` is easier to comprehend in a large program?" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "6.0" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "average_evens(nums)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "12.0" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "average_evens(nums, 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "12.0" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "average_evens(nums, scalar=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Keyword-only Arguments" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Since we *assumed* that scaling will occur *rarely*, we'd prefer that our new version of `average_evens()` be called with a *keyword argument* whenever `scalar` is passed in explicitly. Then, the second argument is never ambiguous as we could always read its name.\n", + "\n", + "Luckily, Python offers a **keyword-only** syntax, where all we need to do is to place the arguments for which we require *explicit* keyword use after an asterix `*`." + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "def average_evens(numbers, *, scalar=1):\n", + " \"\"\"Calculate the average of all even numbers in a list.\n", + "\n", + " Args:\n", + " numbers (list): list of numbers; may be integers or floats\n", + " scalar (float, optional): the scalar that multiplies the\n", + " average of the even numbers\n", + "\n", + " Returns:\n", + " float: (scaled) average\n", + " \"\"\"\n", + " evens = [n for n in numbers if n % 2 == 0]\n", + " average = sum(evens) / len(evens)\n", + " return scalar * average" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "If we now call the function with a `scalar` argument passed in, we *must* use keyword notation." + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "6.0" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "average_evens(nums)" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "12.0" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "average_evens(nums, scalar=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "If we pass in `scalar` as a positional argument instead, we obtain a `TypeError`." + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "ename": "TypeError", + "evalue": "average_evens() takes 1 positional argument but 2 were given", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0maverage_evens\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnums\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m: average_evens() takes 1 positional argument but 2 were given" + ] + } + ], + "source": [ + "average_evens(nums, 2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Anonymous Functions" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "The `def` statement is a statement because of its side effect of creating a *new* name that points to a *new* `function` object in memory.\n", + "\n", + "We can thus think of it as doing *two* things at once (i.e., either both of them happen or none). First, a `function` object is created that contains the concrete $0$s and $1$s that resemble the instructions we put into the function's body. In the context of a function, these $0$s and $1$s are also called **[byte code](https://en.wikipedia.org/wiki/Bytecode)**. Then, a name is created pointing at the new `function` object.\n", + "\n", + "Only this second aspect makes `def` a statement: Merely creating a new object in memory without making it accessible for later reference does *not* constitute a side effect. This is because the state the program is *not* changed. After all, if we cannot reference an object, how do we know that it is actually existing?\n", + "\n", + "Python provides a so-called **[lambda expression](https://docs.python.org/3/reference/expressions.html#lambda)** syntax that allows us to *only* create a `function` object in memory *without* making a name point to it.\n", + "\n", + "It starts with the keyword `lambda` followed by an optional comma seperated enumeration of parameters, a mandatory colon, and *one* expression that also is the resulting `function` object's return value.\n", + "\n", + "Because it does not create a name pointing to the object, we effectively create \"anonymous\" functions with it. In the example, we create a `function` object that adds `3` to the only argument passed in as the parameter `x` and returns that sum." + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(x)>" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lambda x: x + 3" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "If you think this is a rather pointless thing to do, you are absolutely correct!\n", + "\n", + "We created a `function` object, dit *not* call it, and Python immediately forgot about it. So what's the point?\n", + "\n", + "Just to prove that the `lambda` expression really creates a callable `function` object, we use the simple `=` statement to assign it to the variable `add_three`, which is really `add_three()` as per our convention from above." + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "add_three = lambda x: x + 3 # we could and should use def instead" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Now we can call `add_three()` as if we defined it with the `def` statement to begin with." + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "13" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "add_three(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Alternatively, we could call any anonymous `function` object created with an `lambda` expression right away (i.e. without assigning it to a variable), which looks really weird for now as we need *two* pairs of parentheses: The first is just a delimiter whereas the second the call operator." + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "42" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(lambda x: x + 3)(39) # this looks weird but will become very useful" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "The main point of having functions without a name is to use them in a situation where we know ahead of time that we will use the function only once.\n", + "\n", + "Very popular contexts where we will apply lambda expressions are with the **map-filter-reduce** paradigm in Chapter 7 or when we do \"number crunching\" with **arrays** and **data frames** in Chapter 9." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Extending Core Python" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "So far, we have only used what we refer to as **core** Python in this book. By this, we mean all the syntactical rules as specified in the [language reference](https://docs.python.org/3/reference/) and a minimal set of about 50 built-in [functions](https://docs.python.org/3/library/functions.html). With this we could already implement any algorithm or business logic we can think of!\n", + "\n", + "However, after our first couple of programs, we would already start seeing recurring patterns in the code we write. In other words, we would constantly be \"re-inventing the wheel\" in each new project.\n", + "\n", + "Would it not be smarter to pull out the re-usable components from our programs and put them into some project independent **library** of generically useful functionalities? Then we would only need a way of including these **utilities** in our projects.\n", + "\n", + "As all programmers across all languages face this very same issue, most programming languages come with a so-called **[standard library](https://en.wikipedia.org/wiki/Standard_library)** that provides utilities to accomplish common tasks without a lot of code. Examples are making an HTTP request to some website, open and read popular file types (e.g., CSV or Excel files), do something on a computer's file system, and many more." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Standard Library" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Python also comes with its own [standard library](https://docs.python.org/3/library/index.html) that is structured into coherent modules and packages for given topics: A **module** is just a plain text file with the file extension *.py* that contains Python code while a **package** is a folder that groups several related modules.\n", + "\n", + "The code in the [standard library](https://docs.python.org/3/library/index.html) is contributed and maintained by many volunteers around the world. In contrast to so-called \"third-party\" packages (cf., the next section below), the Python core development team closely monitors and tests the code in the [standard library](https://docs.python.org/3/library/index.html). Consequently, we can be reasonably sure that anything provided by it works correctly independent of our computer's operating system and will most likely also be there in the next Python versions. Parts in the [standard library](https://docs.python.org/3/library/index.html) that are computationally expensive are often re-written in C and therefore much faster than anything we could code in Python ourselves. So, whenever we can solve a problem with the help of the [standard library](https://docs.python.org/3/library/index.html), it is almost always the best way to do so as well.\n", + "\n", + "The [standard library](https://docs.python.org/3/library/index.html) has grown very big over the years and we refer to the website [PYMOTW](https://pymotw.com/3/index.html) (i.e., \"Python Module of the Week\") that features well written introductory tutorials and how-to guides to most parts of the library. The same author also published a [book](https://www.amazon.com/Python-Standard-Library-Example-Developers/dp/0134291050/ref=as_li_ss_tl?ie=UTF8&qid=1493563121&sr=8-1&keywords=python+3+standard+library+by+example) that many Pythonistas keep on their shelf for reference. Knowing what is in the [standard library](https://docs.python.org/3/library/index.html) is quite valuable for solving real world tasks quickly.\n", + "\n", + "Throughout this book we will look at many modules and packages from the [standard library](https://docs.python.org/3/library/index.html) in more depth, starting with the [math](https://docs.python.org/3/library/math.html) and [random](https://docs.python.org/3/library/random.html) modules in this chapter." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "#### The [math](https://docs.python.org/3/library/math.html) Module" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "The [math](https://docs.python.org/3/library/math.html) module provides non-trivial mathematical functions like $sin(x)$ and constants like $\\pi$ or $\\text{e}$.\n", + "\n", + "To make functions and variables defined \"somewhere else\" available in our current program, we must first **[import](https://docs.python.org/3/reference/simple_stmts.html#import)** them with the `import` statement. " + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "import math" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "This creates the variable `math` that points to a **[module object](https://docs.python.org/3/glossary.html#term-module)** (i.e., type `module`) in memory." + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "math" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "139655519584648" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "id(math)" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "module" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(math)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "`module` objects serve as namespaces to organize the names inside a module. In this context, a namespace is nothing but a prefix that avoids collision with the variables already defined at the location where we import the module into.\n", + "\n", + "Let's see what we can do with the `math` module.\n", + "\n", + "The [dir()](https://docs.python.org/3/library/functions.html#dir) built-in function can also be used with an argument passed in. Ignoring the dunder-style names, `math` offers quite a lot of ... names. As we cannot know at this point in time if a listed name refers to a function or an ordinary variable, we use the more generic term **attribute** to mean either one of them." + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['__doc__',\n", + " '__file__',\n", + " '__loader__',\n", + " '__name__',\n", + " '__package__',\n", + " '__spec__',\n", + " 'acos',\n", + " 'acosh',\n", + " 'asin',\n", + " 'asinh',\n", + " 'atan',\n", + " 'atan2',\n", + " 'atanh',\n", + " 'ceil',\n", + " 'copysign',\n", + " 'cos',\n", + " 'cosh',\n", + " 'degrees',\n", + " 'e',\n", + " 'erf',\n", + " 'erfc',\n", + " 'exp',\n", + " 'expm1',\n", + " 'fabs',\n", + " 'factorial',\n", + " 'floor',\n", + " 'fmod',\n", + " 'frexp',\n", + " 'fsum',\n", + " 'gamma',\n", + " 'gcd',\n", + " 'hypot',\n", + " 'inf',\n", + " 'isclose',\n", + " 'isfinite',\n", + " 'isinf',\n", + " 'isnan',\n", + " 'ldexp',\n", + " 'lgamma',\n", + " 'log',\n", + " 'log10',\n", + " 'log1p',\n", + " 'log2',\n", + " 'modf',\n", + " 'nan',\n", + " 'pi',\n", + " 'pow',\n", + " 'radians',\n", + " 'remainder',\n", + " 'sin',\n", + " 'sinh',\n", + " 'sqrt',\n", + " 'tan',\n", + " 'tanh',\n", + " 'tau',\n", + " 'trunc']" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dir(math)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Common mathematical constants and functions are now available via the dot operator `.` on the `math` object. This operator is sometimes also called the **attribute access operator**, in line with the just introduced term." + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "3.141592653589793" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "math.pi" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2.718281828459045" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "math.e" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "math.sqrt" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on built-in function sqrt in module math:\n", + "\n", + "sqrt(x, /)\n", + " Return the square root of x.\n", + "\n" + ] + } + ], + "source": [ + "help(math.sqrt)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1.4142135623730951" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "math.sqrt(2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Observe how the arguments passed to functions do not need to be just variables or simple literals. Instead, we can pass in any *expression* that evaluates to a *new* object of the type the function expects.\n", + "\n", + "So just as a reminder from the expression vs. statement discussion in Chapter 1: An expression is *any* syntactically correct combination of variables and literals with operators. And the call operator `()` is just ... well another operator. So both of the next two code cells are just expressions! They have no permanent side effect in memory. We can execute them as often as we want *without* changing the state of the program (i.e., this Jupyter notebook).\n", + "\n", + "So, regarding the very next cell in particular: Although the `2 ** 2` creates a *new* object `4` in memory that is then immediately passed into the [math.sqrt()](https://docs.python.org/3/library/math.html#math.sqrt) function, once that function call returns, \"all is lost\" and the newly created `4` object is forgotten again, as well as the return value of [math.sqrt()](https://docs.python.org/3/library/math.html#math.sqrt)." + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2.0" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "math.sqrt(2 ** 2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Even the **composition** of several function calls only constitutes another expression." + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "10.0" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "math.sqrt(average_evens([99, 100, 101]))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "If we only need one particular function from a module, we can also use the alternative `from ... import ...` syntax.\n", + "\n", + "This does *not* create a module object but only makes a variable in our current location point to an object defined inside a module directly." + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "outputs": [], + "source": [ + "from math import sqrt" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "4.0" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sqrt(16)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "#### The [random](https://docs.python.org/3/library/random.html) Module" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Often times, we need a random variable, for example, when we want to build a simulation program. The [random](https://docs.python.org/3/library/random.html) module in the [standard library](https://docs.python.org/3/library/index.html) often suffices for that." + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "import random" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "random" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Besides the usual dunder-style attributes, the [dir()](https://docs.python.org/3/library/functions.html#dir) built-in function lists some attributes in an upper case naming convention and many others starting with a single underscore \"\\_\". To understand the former, we have to wait until Chapter 10 while the latter are explained further below." + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['BPF',\n", + " 'LOG4',\n", + " 'NV_MAGICCONST',\n", + " 'RECIP_BPF',\n", + " 'Random',\n", + " 'SG_MAGICCONST',\n", + " 'SystemRandom',\n", + " 'TWOPI',\n", + " '_BuiltinMethodType',\n", + " '_MethodType',\n", + " '_Sequence',\n", + " '_Set',\n", + " '__all__',\n", + " '__builtins__',\n", + " '__cached__',\n", + " '__doc__',\n", + " '__file__',\n", + " '__loader__',\n", + " '__name__',\n", + " '__package__',\n", + " '__spec__',\n", + " '_acos',\n", + " '_bisect',\n", + " '_ceil',\n", + " '_cos',\n", + " '_e',\n", + " '_exp',\n", + " '_inst',\n", + " '_itertools',\n", + " '_log',\n", + " '_os',\n", + " '_pi',\n", + " '_random',\n", + " '_sha512',\n", + " '_sin',\n", + " '_sqrt',\n", + " '_test',\n", + " '_test_generator',\n", + " '_urandom',\n", + " '_warn',\n", + " 'betavariate',\n", + " 'choice',\n", + " 'choices',\n", + " 'expovariate',\n", + " 'gammavariate',\n", + " 'gauss',\n", + " 'getrandbits',\n", + " 'getstate',\n", + " 'lognormvariate',\n", + " 'normalvariate',\n", + " 'paretovariate',\n", + " 'randint',\n", + " 'random',\n", + " 'randrange',\n", + " 'sample',\n", + " 'seed',\n", + " 'setstate',\n", + " 'shuffle',\n", + " 'triangular',\n", + " 'uniform',\n", + " 'vonmisesvariate',\n", + " 'weibullvariate']" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dir(random)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "The [random.random()](https://docs.python.org/3/library/random.html#random.random) function generates a uniformly distributed `float` number between $0$ (including) and $1$ (excluding)." + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "random.random" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on built-in function random:\n", + "\n", + "random(...) method of random.Random instance\n", + " random() -> x in the interval [0, 1).\n", + "\n" + ] + } + ], + "source": [ + "help(random.random)" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.12717011866176486" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "random.random()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "While we could build some conditional logic with an `if` statement to map the number generated by [random.random()](https://docs.python.org/3/library/random.html#random.random) to a finite set of elements manually, the [random.choice()](https://docs.python.org/3/library/random.html#random.choice) function provides a lot more **convenience** for us. We simply call it with, for example, the `nums` list and it draws one element out of it with equal chance." + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + ">" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "random.choice" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on method choice in module random:\n", + "\n", + "choice(seq) method of random.Random instance\n", + " Choose a random element from a non-empty sequence.\n", + "\n" + ] + } + ], + "source": [ + "help(random.choice)" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "8" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "random.choice(nums)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "In order to re-produce the same random numbers in a simulation each time we run it, we can set the **[random seed](https://en.wikipedia.org/wiki/Random_seed)**. It is good practice to do this at the beginning of a program or notebook. Then every time we re-start the program, we will get the exact same random numbers again. This becomes very important, for example, when we employ certain machine learning algorithms that rely on randomization, like the infamous [Random Forest](https://en.wikipedia.org/wiki/Random_forest), and want to obtain **re-producable** results.\n", + "\n", + "The [random](https://docs.python.org/3/library/random.html) module provides the [random.seed()](https://docs.python.org/3/library/random.html#random.seed) function to do that." + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "random.seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.6394267984578837" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "random.random()" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [], + "source": [ + "random.seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.6394267984578837" + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "random.random()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Third-party Packages" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "As the Python community is based around open source, many developers publish their code, for example, on the Python Package Index [PyPI](https://pypi.org) from where anyone can download and install it for free using command line based tools like [pip](https://pip.pypa.io/en/stable/) or [conda](https://conda.io/en/latest/). This way, we can always customize our Python installation even more. Managing many such packages is actually quite a deep topic on its own, sometimes fearfully called **[dependency hell](https://en.wikipedia.org/wiki/Dependency_hell)**.\n", + "\n", + "The difference between the [standard library](https://docs.python.org/3/library/index.html) and such **third-party** packages is that in the first case the code goes through a much more formalized review process and is officially endorsed by the Python core developers. Yet, many third-party projects also offer the highest quality standards and a lot of such software is actually also relied on by many businesses and researchers.\n", + "\n", + "Throughout this book, we will look at many third-party libraries, mostly from Python's [scientific stack](https://scipy.org/about.html), a tightly coupled set of third-party libraries for storing **big data** efficiently ([numpy](http://www.numpy.org/)), \"wrangling\" ([pandas](https://pandas.pydata.org/)) and visualizing them ([matplotlib](https://matplotlib.org/) and [seaborn](https://seaborn.pydata.org/)), fitting classical statistical models ([statsmodels](http://www.statsmodels.org/)), training machine learning models ([sklearn](http://scikit-learn.org/)), and much more.\n", + "\n", + "Below, we briefly show how to install third-party libraries." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "#### The [numpy](http://www.numpy.org/) Library" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "[numpy](http://www.numpy.org/) is the de-facto standard in the Python world for handling **array-like** data. That is a fancy word for data that can be put into a matrix or vector format. We will look at it in depth in Chapter 9.\n", + "\n", + "As [numpy](http://www.numpy.org/) is *not* in the [standard library](https://docs.python.org/3/library/index.html), it must be *manually* installed, for example, with the [pip](https://pip.pypa.io/en/stable/) tool. As mentioned in Chapter 0, to execute terminal commands from within a Jupyter notebook, we just need to start a code cell with an exclamation mark.\n", + "\n", + "If you are running this notebook with an installation of the [Anaconda Distribution](https://www.anaconda.com/distribution/), then [numpy](http://www.numpy.org/) is probably already installed. Running the cell below, will just confirm that." + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: numpy in /home/webartifex/.pyenv/versions/anaconda3-2019.07/lib/python3.7/site-packages (1.16.4)\r\n" + ] + } + ], + "source": [ + "!pip install numpy" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "[numpy](http://www.numpy.org/) is conventionally imported with the shorter **idiomatic** name `np`. The `as` in the import statement just changes the resulting variable name. It is a shortcut for the three lines `import numpy`, `np = numpy`, and `del numpy`." + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "`np` can be used in the same way as `math` or `random` above." + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Let's convert the above `nums` list into a vector-like object of type `numpy.ndarray`." + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "vec = np.array(nums)" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])" + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vec" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "numpy.ndarray" + ] + }, + "execution_count": 88, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(vec)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "[numpy](http://www.numpy.org/) somehow magically adds new behavior to Python's built-in arithmetic operators. For example, we can now [scalar-multiply](https://en.wikipedia.org/wiki/Scalar_multiplication) `vec`.\n", + "\n", + "[numpy](http://www.numpy.org/)'s functions are implemented in highly optimized C code and therefore fast, especially when it comes to big data." + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 2, 4, 6, 8, 10, 12, 14, 16, 18, 20])" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "2 * vec" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "This scalar multiplication would \"fail\" if we used a plain `list` object like `nums` instead of an `numpy.ndarray` object like `vec`. The two types exhibit different **behavior** when used with the same operator, another example of **operator overloading**." + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]" + ] + }, + "execution_count": 90, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "2 * nums # surprise, surprise" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "[numpy](http://www.numpy.org/)'s `numpy.ndarray` objects integrate nicely with Python's built-in functions (e.g., [sum()](https://docs.python.org/3/library/functions.html#sum)) or functions from the [standard library](https://docs.python.org/3/library/index.html) (e.g., [random.choice()](https://docs.python.org/3/library/random.html#random.choice))." + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "55" + ] + }, + "execution_count": 91, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sum(vec)" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 92, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "random.choice(vec)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Local Modules and Packages" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "For sure, we can create our own modules and packages. In the repository's main directory, there is a [*sample_module.py*](https://github.com/webartifex/intro-to-python/blob/master/sample_module.py) file that contains, among others, a function equivalent to the final version of `average_evens()`. To be realistic, this sample module is structured in a modular manner with several functions building on each other. It is best to skim over it *now* before reading on.\n", + "\n", + "To make code we put into a *.py* file available in our program, we import it as a module just as we did above with modules in the [standard library](https://docs.python.org/3/library/index.html) or third-party packages.\n", + "\n", + "The *name* to be imported is the file's name except for the *.py* part. In order for this to work, the file's name *must* adhere to the *same* rules as hold for [variable names](https://docs.python.org/3/reference/lexical_analysis.html#identifiers) in general.\n", + "\n", + "What happens during an import is conceptually as follows. When Python sees the `import sample_module` part, it first creates a *new* object of type `module` in memory. This is effectively an *empty* namespace. Then, it executes the imported file's code from top to bottom. Whatever variables are still defined at the end of this, are put into the module's namespace. Only if the file's code does *not* raise an error, will Python make a variable in our current location (i.e., `mod` here) point to the created `module` object. Otherwise, it is discarded. In essence, it is as if we copied and pasted the file's code in place of the import statement. If we import an already imported module again, Python is smart enough to avoid doing all this work all over and does nothing." + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "import sample_module as mod" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 94, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mod" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Disregarding the dunder-style attributes, `mod` defines the five attributes `_default_scalar`, `_scaled_average`, `average`, `average_evens`, and `average_odds`, which are exactly the ones we would expect from reading the [*sample_module.py*](https://github.com/webartifex/intro-to-python/blob/master/sample_module.py) file.\n", + "\n", + "An important convention when working with imported code is to *disregard* any attributes starting with an underscore \"\\_\". These are considered **private** and constitute **implementation details** the author of the imported code might change in a future version of his software. We *must* not rely on them in any way.\n", + "\n", + "In contrast, the three remaining **public** attributes are the functions `average()`, `average_evens()`, and `average_odds()` that we may use after the import." + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['__builtins__',\n", + " '__cached__',\n", + " '__doc__',\n", + " '__file__',\n", + " '__loader__',\n", + " '__name__',\n", + " '__package__',\n", + " '__spec__',\n", + " '_default_scalar',\n", + " '_scaled_average',\n", + " 'average',\n", + " 'average_evens',\n", + " 'average_odds']" + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dir(mod)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "We can use the imported `mod.average_evens()` just like `average_evens()` defined above. The advantage we get from **modularization** with *.py* files is that we can now easily re-use functions across different Jupyter notebooks without re-defining them again and again. Also, we can \"source out\" code that distracts from the storyline told in a notebook." + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on function average_evens in module sample_module:\n", + "\n", + "average_evens(numbers, *, scalar=1)\n", + " Calculate the average of all even numbers in a list.\n", + " \n", + " Args:\n", + " numbers (list): list of numbers; may be integers or floats\n", + " scalar (float, optional): the scalar that multiplies the\n", + " average of the even numbers\n", + " \n", + " Returns:\n", + " float: (scaled) average\n", + "\n" + ] + } + ], + "source": [ + "help(mod.average_evens)" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "6.0" + ] + }, + "execution_count": 97, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mod.average_evens(nums)" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "12.0" + ] + }, + "execution_count": 98, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mod.average_evens(nums, scalar=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Packages are a generalization of modules and we will look at one in Chapter 10 in detail. You can, however, already look at a [sample package](https://github.com/webartifex/intro-to-python/tree/master/sample_package) in the repository, which is nothing but a folder with *.py* files in it.\n", + "\n", + "As a further references on modules, we refer to the [official tutorial](https://docs.python.org/3/tutorial/modules.html)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "## TL;DR" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "A **function** is a **named sequence** of statements that perform a computation.\n", + "\n", + "Functions provide benefits as they:\n", + "\n", + "- make programs easier to comprehend and debug for humans as they give names to the smaller parts of a larger program (i.e., they **modularize** a code base), and\n", + "- eliminate redundancies by allowing **re-use of code**.\n", + "\n", + "Functions are **defined** once with the `def` statement. Then, they can be **called** many times with the call operator `()`.\n", + "\n", + "They may process **parameterized** inputs, **passed** in as **arguments**, and output a **return value**.\n", + "\n", + "Arguments can be passed in by **position** or **keyword**. Some functions may even require **keyword-only** arguments.\n", + "\n", + "**Lambda expressions** create anonymous functions.\n", + "\n", + "Core Python can be extended with code from either the **standard library** or **third-party** libraries.\n", + "\n", + "Outside Jupyter notebooks, Python code is put into **modules** that are grouped in **packages**." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + }, + "livereveal": { + "auto_select": "code", + "auto_select_fragment": true, + "scroll": true, + "theme": "serif" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": false, + "sideBar": true, + "skip_h1_title": true, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": { + "height": "calc(100% - 180px)", + "left": "10px", + "top": "150px", + "width": "384px" + }, + "toc_section_display": false, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/02_functions_review_and_exercises.ipynb b/02_functions_review_and_exercises.ipynb new file mode 100644 index 0000000..647e47f --- /dev/null +++ b/02_functions_review_and_exercises.ipynb @@ -0,0 +1,397 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Chapter 2: Functions & Modularization" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Content Review" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Read Chapter 2 of the book. Then work through the ten review questions." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Essay Questions " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Answer the following questions briefly with *at most* 300 characters per question!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q1**: What property of the `def` statement makes it a **statement**? Is there a way to use an **expression** to create a function?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q2**: One of the first confusions of experienced programmers coming from other languages to Python regards the observation that **\"everything in Python is an object\"** (cf., this [discussion](https://www.reddit.com/r/learnpython/comments/8rypx9/everything_in_python_is_an_object/)). How does this relate to **functions**?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q3**: What does it mean for a variable to **go out of scope**?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q4**: How can a **global** variable be **shadowed**? Is this good or bad?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q5**: Explain the concept of **forwarding** a function **call**." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q6**: What are **keyword-only arguments** and when is it appropriate to use them?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### True / False Questions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Motivate your answer with *one short* sentence!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q7**: A mere function **call** is just an **expression**." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q8**: When using the `import` statement, we need to ensure that the imported attributes do **not** overwrite any already defined variables and functions." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q9:** Functions always have a name by which we can call them." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q10**: The [standard library](https://docs.python.org/3/library/index.html) is a collection of numerical tools often used in scientific computing, for example, advanced mathematical functions or utilities for simulation." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Coding Exercises" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Volume of a Sphere" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q11.1**: The [volume of a sphere](https://en.wikipedia.org/wiki/Sphere) is defined as $\\frac{4}{3} * \\pi * r^3$. Calculate this value for $r=10.0$ and round it to 10 digits after the comma. Use the [standard library](https://docs.python.org/3/library/index.html) to obtain a good approximation of $\\pi$." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q11.2**: Encapsulate the logic into a function `sphere_volume()` that takes one *positional* argument `radius` and one *keyword-only* argument `digits` defaulting to `5`. The volume should be returned as a `float` object under *all* circumstances. Document your work appropriately in a docstring according to [Google's Python Style Guide](https://github.com/google/styleguide/blob/gh-pages/pyguide.md)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q11.3**: Evaluate the function with `radius = 100.0` and 1, 5, 10, 15, and 20 digits respectively." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q11.4**: What observation do you make?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q11.5**: Using the [range()](https://docs.python.org/3/library/functions.html#func-range) built-in, write a `for`-loop and calculate the volume of a sphere with `radius = 42.0` for all `digits` from `1` through `20`. Print out each volume on a seperate line.\n", + "\n", + "Note: This is the first task where you actually need to use the [print()](https://docs.python.org/3/library/functions.html#print) built-in function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q11.6**: What important lesson did you learn about the `float` type?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": false, + "sideBar": true, + "skip_h1_title": true, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": false, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/03_conditionals.ipynb b/03_conditionals.ipynb new file mode 100644 index 0000000..acd509d --- /dev/null +++ b/03_conditionals.ipynb @@ -0,0 +1,1805 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "# Chapter 3: Conditionals & Exceptions" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "We analyzed every aspect of the `average_evens()` function in Chapter 2 except for the `if` part. While it seems to intuitively do what we expect it to, there is a whole lot more to be learned from taking it apart. In particular, the `if` can occur within both a **statement** as in our introductory example in Chapter 1 but also an **expression** as in `average_evens()`. This is analogous as to how a noun in a natural language is *either* the subject of *or* an object in a sentence. What is common to both versions of the `if` is that it leads to code being executed for *parts* of the input only. It is our first way of **controlling** the **flow of execution** in a program.\n", + "\n", + "After deconstructing `if` in the first part of this chapter, we take a close look at a similar concept, namely handling and raising **exceptions**." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Boolean Expressions" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Any expression that is either true or not is called a **boolean expression**. If you think such expressions are boring or just not so useful, read a bit on [propositional logic](https://en.wikipedia.org/wiki/Propositional_calculus) and you will quickly realize how mathematicians and originally philosophers base their rules of how to prove or disprove a conclusion on simple true-or-false \"statements\" about the world. It is the underlying principle of all of reasoning.\n", + "\n", + "A trivial example involves the equality operator `==` that evaluates to either `True` or `False` depending on its operands \"comparing equal\" or not." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "42 == 42" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "42 == 123" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Observe how `==` can handle objects of *different* type. This shows how it implements a notion of equality in line with how we humans think of things being equal or not. After all, `42` and `42.0` are totally different $0$s and $1$s for a computer and many programming languages would actually say `False` here! Technically, this is yet another example of operator overloading." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "42 == 42.0" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "There are, however, cases where even well-behaved Python does not make us happy. Chapter 5 will provide more insights on that." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "42 == 42.000000000000001" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "`True` and `False` are special built-in *objects* of type `bool`." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "94709180875744" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "id(True)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "94709180875712" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "id(False)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "bool" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(True)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "bool" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Let's not confuse the boolean `False` with `None`, another special built-in object! We saw the latter before in Chapter 2 as the *implicit* return value of a function without a `return` statement.\n", + "\n", + "We might think of `None` in a boolean context indicating a \"maybe\" or even an \"unknown\" answer. But for Python, there are no \"maybe\" or \"unknown\" objects as we will see further below!\n", + "\n", + "Whereas `False` is of type `bool`, `None` is of type `NoneType`. So, they are totally unrelated. On the contrary, as both `True` and `False` are of the same type, we could call them \"siblings\"." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "None" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "94709180862704" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "id(None)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "NoneType" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(None)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "`True`, `False`, and `None` have the property that they each exist in memory only *once*. Objects designed this way are so-called **singletons**. This **[design pattern](https://en.wikipedia.org/wiki/Design_Patterns)** was originally developed to keep a program's memory usage at a minimum. It may only be employed in situations where we know that an object will never mutate its value in place (i.e., to re-use the bag analogy from Chapter 1, no flipping of $0$s and $1$s in the bag is allowed). In languages \"closer\" to the memory like C we would have to code this singleton logic ourselves but Python has this already built in for *some* types.\n", + "\n", + "We can verify this with either the `is` operator or by comparing memory addresses." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "True is True" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "id(True) == id(True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "So the following expression regards *four* objects in memory: *One* `list` object holding ten pointers to *three* other objects." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[True, False, None, None, None, True, False, None, None, None]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[True, False, None, None, None, True, False, None, None, None]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Relational Operators" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "The equality operator is only one of several **relational (i.e., \"comparison\") operators** who all evaluate to a boolean object." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "42 == 123" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "42 != 123 # = \"not equal to\"; other programming languages sometimes use \"<>\" instead" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "The \"less than\" `<` or \"greater than\" `>` operators on their own mean \"strictly less than\" or \"strictly greater than\" but can be combined with the equality operator into just `<=` and `>=`. This is a shortcut for using the logical `or` operator as described in the next section." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "42 < 123" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "42 <= 123 # same as 42 < 123 or 42 == 123; cf., next section" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "42 > 123" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "42 >= 123 # same as 42 > 123 or 42 == 123; cf., next section" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Logical Operators" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Boolean expressions can be combined or negated with the **logical operators** `and`, `or`, and `not` to form new boolean expressions. Of course, this may be done *recursively* as well to obtain boolean expressions of arbitrary complexity.\n", + "\n", + "Their usage is similar to how the equivalent words are used in plain English:\n", + "\n", + "- `and` evaluates to `True` if *both* sub-expressions evaluate to `True` and `False` otherwise,\n", + "- `or` evaluates to `True` if either one *or* both sub-expressions evaluate to `True` and `False` otherwise, and\n", + "- `not` evaluates to `True` if its *only* sub-expression evaluates to `False` and vice versa." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "x = 42\n", + "y = 87" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Relational operators have a *higher precedence* over logical operators (cf., the [reference](https://docs.python.org/3/reference/expressions.html#operator-precedence)). So the following expression means what we intuitively think it does." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x > 5 and y <= 100" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "However, sometimes it is good to use *parentheses* around each sub-expression for clarity." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(x > 5) and (y <= 100)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "This is especially useful when several logical operators are combined." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x <= 5 or not y > 100" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(x <= 5) or not (y > 100)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(x <= 5) or (not (y > 100)) # but no need to \"over do\" it" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "For even better readability, [some practitioner](https://llewellynfalco.blogspot.com/2016/02/dont-use-greater-than-sign-in.html) suggest to never use the `>` and `>=` operators (note that the included example is written in [Java](https://en.wikipedia.org/wiki/Java_%28programming_language%29) and `&&` means `and` and `||` means `or`).\n", + "\n", + "Python allows **chaining** relational operators that are combined with the `and` operator. For example, the following two cells implement the same logic where the second is a lot easier to read." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(5 < x) and (x < 21)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "5 < x < 21" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Truthy vs. Falsy" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "The operands of the logical operators do not actually have to be *boolean* expressions as defined above but may be *any* kind of expression. If a sub-expression does *not* evaluate to an object of type `bool`, Python automatically casts the resulting object as such.\n", + "\n", + "For example, any non-zero numeric object effectively becomes `True`. While this behavior allows writing more concise and thus \"beautiful\" code, it is also a common source of confusion." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(x - 9) and (y < 100) # = 33 and (y < 100)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Whenever we are unsure as to how Python will evaluate a non-boolean expression in a boolean context, the [bool()](https://docs.python.org/3/library/functions.html#bool) built-in allows us to check it ourselves." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bool(x - 9) # = bool(33)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bool(x - 42) # = bool(0)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Keep in mind that negative numbers also evaluate to `True`." + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bool(x - 99) # = bool(-57)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "In a boolean context `None` is casted as `False`. So, `None` is really *not* a \"maybe\" answer but a \"no\"." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bool(None)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Another good rule to know is that container types (e.g., `list`) evaluate to `True` whenever they are not empty and `False` otherwise." + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bool([])" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bool([False])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Pythonistas often use the terms **truthy** or **falsy** to describe a non-boolean expression's behavior when used in place of a boolean one." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Conditional Statements" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "In order to write useful programs, we need to control the flow of execution, for example, to react to user input.\n", + "\n", + "One major language construct to do so is the **conditional statement** or `if` **statement** (cf., the [reference](https://docs.python.org/3/reference/compound_stmts.html#the-if-statement)). It consists of:\n", + "\n", + "- *one* mandatory `if`-clause,\n", + "- an *arbitrary* number of `elif`-clauses (i.e. \"else if\"), and\n", + "- an *optional* `else`-clause.\n", + "\n", + "The `if`- and `elif`-clauses each specify one *boolean* expression, also called **condition**, while the `else`-clause serves as a \"catch everything else\" case.\n", + "\n", + "In terms of syntax, the header lines end with a colon and the code blocks are indented.\n", + "\n", + "In contrast to our intuitive interpretation in natural languages, only the code in *one* of the alternatives, also called **branches**, is executed. To be precise, it is always the code in the first branch whose condition evaluates to `True`." + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "code_folding": [], + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "z = 101" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "code_folding": [], + "slideshow": { + "slide_type": "skip" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "z is positive but odd\n" + ] + } + ], + "source": [ + "if (z % 2 == 0) and (z > 0):\n", + " print(\"z is even and positive\")\n", + "elif z % 2 == 0:\n", + " print(\"z is even but negative\")\n", + "elif z > 0:\n", + " print(\"z is positive but odd\")\n", + "else:\n", + " print(\"z is neither even nor positive\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "In many situations, we only need a reduced form of the `if` statement.\n", + "\n", + "We could **inject** code only at random to, for example, implement some sort of [A/B testing](https://en.wikipedia.org/wiki/A/B_testing)." + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "import random" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "You will read this just as often as you see heads when tossing a coin\n" + ] + } + ], + "source": [ + "if random.random() > 0.5:\n", + " print(\"You will read this just as often as you see heads when tossing a coin\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "More often than not, we might model a binary choice." + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "z is positive\n" + ] + } + ], + "source": [ + "if z > 0:\n", + " print(\"z is positive\")\n", + "else:\n", + " print(\"z is negative\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "We may **nest** `if` statements to control the flow of execution in a more granular way. Every additional layer, however, makes the code less readable, in particular, if we have more than one line per code block." + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "z is odd\n" + ] + } + ], + "source": [ + "if random.random() > 0.5:\n", + " if z % 2: # no need to write out the \"== 0\"\n", + " print(\"z is odd\")\n", + " else:\n", + " print(\"z is even\")\n", + "else:\n", + " if z > 0:\n", + " print(\"z is positive\")\n", + " else:\n", + " print(\"z is negative\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "A good way to make this code more readable is to introduce **temporary variables** *in combination* with using the `and` operator to **flatten** the branching logic. The `if` statement then reads almost like plain English. In contrast to many other languages, creating variables is a computationally *cheap* operation in Python and also helps to document the code *inline*. Without temporary variables, the `and` flattening could actually lead to more sub-expressions in the conditions be evaluated than necessary. Do you see why?" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "z is positive\n" + ] + } + ], + "source": [ + "check_oddness = (random.random() > 0.5)\n", + "is_odd = (z % 2)\n", + "is_positive = (z > 0)\n", + "\n", + "if check_oddness and is_odd:\n", + " print(\"z is odd\")\n", + "elif check_oddness and not is_odd:\n", + " print(\"z is even\")\n", + "elif not check_oddness and is_positive:\n", + " print(\"z is positive\")\n", + "else:\n", + " print(\"z is negative\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Conditional Expressions" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "When all we do with an `if` statement is to assign an object to a variable with respect to a single true-or-false condition (cf., binary choice above), there is a shortcut for that: We could simply assign the result of a so-called **conditional expression** or `if` expression to the variable.\n", + "\n", + "Think of a situation where we evaluate a piece-wise functional relationship $y = f(x)$ at a given $x$, for example:" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "$\n", + "y = f(x) =\n", + "\\begin{cases}\n", + "0, \\text{ if } x \\le 0 \\\\\n", + "x^2, \\text{ otherwise}\n", + "\\end{cases}\n", + "$" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "x = 3" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Of course, we could use an `if` statement as above to do the job. Yet, this is rather lengthy." + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [], + "source": [ + "if x <= 0:\n", + " y = 0\n", + "else:\n", + " y = x ** 2" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "9" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "On the contrary, the `if` expression fits into one line. The main downside here is a potential loss in readability, in particular, if the functional relationship is not that simple." + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "y = 0 if x <= 0 else x ** 2" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "9" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "In this concrete example, however, the most elegant solution would be to use the built-in [max()](https://docs.python.org/3/library/functions.html#max) function." + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "y = max(0, x) ** 2" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "9" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Conditional expressions may not only be used in the way described in this section. We already saw them as part of a list comprehension that is introduced in Chapter 7." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Exceptions" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "In the previous two chapters we already encountered a couple of *runtime* errors. A natural urge we might have after reading about conditional statements is to write code that somehow reacts to the occurence of such exceptions. All we need for that is a way to formulate a condition for that.\n", + "\n", + "For sure, this is such a common thing to do that Python provides its own language construct for it, namely the `try` statement (cf., the [reference](https://docs.python.org/3/reference/compound_stmts.html#the-try-statement)).\n", + "\n", + "In its simplest form, it comes with just two branches: `try` and `except`. The following basically tells Python to execute the code in the `try`-branch and if *anything* goes wrong, continue in the `except`-branch instead of **raising** an error to us. Of course, if nothing goes wrong, the `except`-branch is *not* executed." + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "user_input = 0" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "slideshow": { + "slide_type": "-" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Something went wrong\n" + ] + } + ], + "source": [ + "try:\n", + " 1 / user_input\n", + "except:\n", + " print(\"Something went wrong\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "However, it is good practise to *not* **handle** *any* possible exception but only the ones we may expect from the code in the `try`-branch. The reasoning why this is done is a bit involved. We only remark here that the code base becomes easier to understand as we clearly communicate to any human reader what could go wrong during execution. Python comes with a lot of [built-in exceptions](https://docs.python.org/3/library/exceptions.html#concrete-exceptions) that we should familiarize ourselves with.\n", + "\n", + "Another good practise is to always keep the code in the `try`-branch short so as to not accidently handle an exception we do not want to handle.\n", + "\n", + "In the example, we are dividing numbers and may therefore expect a `ZeroDivisionError`." + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Something went wrong\n" + ] + } + ], + "source": [ + "try:\n", + " 1 / user_input\n", + "except ZeroDivisionError:\n", + " print(\"Something went wrong\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "Often, we must have some code run independent of an exception occuring (e.g., to close a connection to a database). To achieve that, we can add an optional `finally`-branch to the `try` statement.\n", + "\n", + "Similarly, we might have some code that must be run exactly when no exception occurs but we do not want to put it in the `try`-branch as per the good practice mentioned. To achieve that, we can add an optional `else`-branch to the `try` statement.\n", + "\n", + "To showcase everything together, we look at one last example. To spice it up a bit, we randomize the input. So run the cell several times and see for yourself. It's actually quite easy." + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Yes, division worked smoothly.\n", + "I am always printed\n" + ] + } + ], + "source": [ + "divisor = random.choice([0, 1])\n", + "\n", + "try:\n", + " 1 / divisor\n", + "except ZeroDivisionError:\n", + " print(\"Oops. Division by 0. How does that work?\")\n", + "else:\n", + " print(\"Yes, division worked smoothly.\")\n", + "finally:\n", + " print(\"I am always printed\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "## TL;DR" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "- **boolean expressions** evaluate either to `True` or `False`\n", + "- **relational operators** compare operands according to \"human\" interpretations\n", + "- **logical operators** combine boolean sub-expressions to more \"complex\" expressions\n", + "- the **conditional statement** is a *major* concept to **control** the **flow of execution** depending on some **conditions**\n", + "- a **conditional expression** is a short form of a conditional statement\n", + "- **exception handling** is also a common way of **controlling** the **flow of execution**, in particular if we have to be prepared for bad input data" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + }, + "livereveal": { + "auto_select": "code", + "auto_select_fragment": true, + "scroll": true, + "theme": "serif" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": false, + "sideBar": true, + "skip_h1_title": true, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": { + "height": "calc(100% - 180px)", + "left": "10px", + "top": "150px", + "width": "384px" + }, + "toc_section_display": false, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/03_conditionals_review_and_exercises.ipynb b/03_conditionals_review_and_exercises.ipynb new file mode 100644 index 0000000..f60bc2b --- /dev/null +++ b/03_conditionals_review_and_exercises.ipynb @@ -0,0 +1,389 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "# Chapter 3: Conditionals & Exceptions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Content Review" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Read Chapter 3 of the book. Then work through the seven review questions." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Essay Questions " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Answer the following questions briefly with *at most* 300 characters per question!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q1**: What is the **singleton** design pattern? How many objects does the expression `[True, False, True, False]` generate in memory?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q2**: What do we mean when we talk about **truthy** and **falsy** expressions?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q3**: Explain how the conceptual difference between a **statement** and an **expression** relates to the difference between a **conditional statement** and a **conditional expression**." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q4**: Why is the use of **temporary variables** encouraged in Python?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q5**: What does the `finally`-branch enforce in this code snippet? How can a `try` statement be useful *without* an `except`-branch?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```python\n", + "try:\n", + " print(\"Make a request to a service on the internet\")\n", + "finally:\n", + " print(\"This could be clean-up code\")\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### True / False Questions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Motivate your answer with *one short* sentence!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q6**: The objects `True`, `False`, and `None` represent the idea of \"yes\", \"no\", and \"maybe\" answers in a natural language.\n", + "\n", + "Hint: you also respond with a code cell." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q7**: The `try` statement is useful for handling **syntax** errors." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Coding Exercises" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Discounting Customer Orders" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q8.1**: Write a function `discounted_price()` that takes the positional arguments `unit_price` (of type `float`) and `quantity` (of type `int`) and implements a discount scheme for a line item in a customer order as follows:\n", + "\n", + "- if the unit price is over 100 dollars, grant 10% relative discount\n", + "- if a customer orders more than 10 items, one in every five items is for free\n", + "\n", + "Only one of the two discounts is granted, whichever is better for the customer.\n", + "\n", + "The function should then return the overall price for the line item. Do not forget to round appropriately." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q8.2**: Calculate the final price for the following line items of an order:\n", + "- $7$ smartphones @ $99.00$ USD\n", + "- $3$ workstations @ $999.00$ USD\n", + "- $19$ GPUs @ $879.95$ USD\n", + "- $14$ Raspberry Pis @ $35.00$ USD" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q8.3**: Re-calculate the last two line items with order quantities of $20$ and $15$. What do you observe?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " (your observation)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q8.4**: Looking at the `if`-`else`-logic in the function, why do you think the four example line items in **Q8.2** were chosen as they were?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Fizz Buzz revisited" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When you worked on the Fizz Buzz exercise in Chapter 1, you actually did not know about the `elif` and `else` keywords yet. Well, now you do." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "numbers = list(range(1, 101))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Q9**: Copy and paste your answer to **Q11.2** in Chapter 1 here and instead of three consecutive `if` statements re-write it with *one* compound `if` statement.\n", + "\n", + "This code will then be a lot more robust as the order of the three `if` statements cannot be screwed up." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": false, + "sideBar": true, + "skip_h1_title": true, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": false, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/README.md b/README.md index 78bdff9..951bf75 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +**Important**: The notebooks are being added throughout the fall semester of 2019! + # An Introduction to Python and Programming The purpose of this repository is to serve as an interactive "book" for a @@ -13,7 +15,9 @@ science professionals and researchers. As such they can be viewed in a plain web browser: - [00 - Start up](https://nbviewer.jupyter.org/github/webartifex/intro-to-python/blob/master/00_start_up.ipynb) -- [01 - Elements of a Program](https://nbviewer.jupyter.org/github/webartifex/intro-to-python/blob/master/01_elements_of_a_program.ipynb) +- [01 - Elements of a Program](https://nbviewer.jupyter.org/github/webartifex/intro-to-python/blob/master/01_elements.ipynb) +- [02 - Functions & Modularization](https://nbviewer.jupyter.org/github/webartifex/intro-to-python/blob/master/02_functions.ipynb) +- [03 - Conditionals & Exceptions](https://nbviewer.jupyter.org/github/webartifex/intro-to-python/blob/master/03_conditionals.ipynb) However, it is recommended that students **install Python and Jupyter locally** and run the code in the notebooks on their own. diff --git a/poetry.lock b/poetry.lock index 75af38f..ad1610a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -423,6 +423,14 @@ terminado = ">=0.8.1" tornado = ">=5.0" traitlets = ">=4.2.1" +[[package]] +category = "main" +description = "NumPy is the fundamental package for array computing with Python." +name = "numpy" +optional = false +python-versions = ">=3.5" +version = "1.17.2" + [[package]] category = "main" description = "Utilities for writing pandoc filters in python" @@ -673,7 +681,7 @@ version = "3.5.1" notebook = ">=4.4.1" [metadata] -content-hash = "431dace6d8d5c3e390b59e964116bb6219fb45c7a8c09cbfe99d70c704a54320" +content-hash = "7c3d541c65a27324b49fc7ed2b84067e223eb4e20c20310ff9e485b016f65f91" python-versions = "^3.6" [metadata.hashes] @@ -711,6 +719,7 @@ mistune = ["59a3429db53c50b5c6bcc8a07f8848cb00d7dc8bdb431a4ab41920d201d4756e", " nbconvert = ["427a468ec26e7d68a529b95f578d5cbf018cb4c1f889e897681c2b6d11897695", "48d3c342057a2cf21e8df820d49ff27ab9f25fc72b8f15606bd47967333b2709"] nbformat = ["b9a0dbdbd45bb034f4f8893cafd6f652ea08c8c1674ba83f2dc55d3955743b0b", "f7494ef0df60766b7cabe0a3651556345a963b74dbc16bc7c18479041170d402"] notebook = ["660976fe4fe45c7aa55e04bf4bccb9f9566749ff637e9020af3422f9921f9a5d", "b0a290f5cc7792d50a21bec62b3c221dd820bf00efa916ce9aeec4b5354bde20"] +numpy = ["05dbfe72684cc14b92568de1bc1f41e5f62b00f714afc9adee42f6311738091f", "0d82cb7271a577529d07bbb05cb58675f2deb09772175fab96dc8de025d8ac05", "10132aa1fef99adc85a905d82e8497a580f83739837d7cbd234649f2e9b9dc58", "12322df2e21f033a60c80319c25011194cd2a21294cc66fee0908aeae2c27832", "16f19b3aa775dddc9814e02a46b8e6ae6a54ed8cf143962b4e53f0471dbd7b16", "3d0b0989dd2d066db006158de7220802899a1e5c8cf622abe2d0bd158fd01c2c", "438a3f0e7b681642898fd7993d38e2bf140a2d1eafaf3e89bb626db7f50db355", "5fd214f482ab53f2cea57414c5fb3e58895b17df6e6f5bca5be6a0bb6aea23bb", "73615d3edc84dd7c4aeb212fa3748fb83217e00d201875a47327f55363cef2df", "7bd355ad7496f4ce1d235e9814ec81ee3d28308d591c067ce92e49f745ba2c2f", "7d077f2976b8f3de08a0dcf5d72083f4af5411e8fddacd662aae27baa2601196", "a4092682778dc48093e8bda8d26ee8360153e2047826f95a3f5eae09f0ae3abf", "b458de8624c9f6034af492372eb2fee41a8e605f03f4732f43fc099e227858b2", "e70fc8ff03a961f13363c2c95ef8285e0cf6a720f8271836f852cc0fa64e97c8", "ee8e9d7cad5fe6dde50ede0d2e978d81eafeaa6233fb0b8719f60214cf226578", "f4a4f6aba148858a5a5d546a99280f71f5ee6ec8182a7d195af1a914195b21a2"] pandocfilters = ["b3dd70e169bb5449e6bc6ff96aea89c5eea8c5f6ab5e207fc2f521a2cf4a0da9"] parso = ["63854233e1fadb5da97f2744b6b24346d2750b85965e7e399bec1620232797dc", "666b0ee4a7a1220f65d367617f2cd3ffddff3e205f3f16a0284df30e774c2a9c"] pexpect = ["2094eefdfcf37a1fdbfb9aa090862c1a4878e5c7e0e7e7088bdb511c558e5cd1", "9e2c1fd0e6ee3a49b28f95d4b33bc389c89b20af6a1255906e90ff1262ce62eb"] diff --git a/pyproject.toml b/pyproject.toml index 1cd387e..1e69fc5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,7 @@ license = "MIT" [tool.poetry.dependencies] python = "^3.6" jupyter = "^1.0" +numpy = "^1.17" [tool.poetry.dev-dependencies] black = {version = "^18.3-alpha.0", allows-prereleases = true} diff --git a/requirements.txt b/requirements.txt index 41753e1..28405c8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -30,6 +30,7 @@ mistune==0.8.4 nbconvert==5.6.0 nbformat==4.4.0 notebook==6.0.1 +numpy==1.17.2 pandocfilters==1.4.2 parso==0.5.1 pexpect==4.7.0 diff --git a/sample_module.py b/sample_module.py new file mode 100644 index 0000000..2efcda4 --- /dev/null +++ b/sample_module.py @@ -0,0 +1,75 @@ +"""This is a sample module. + +It defines three functions average(), average_evens(), and average_odds(). +The point is to show how we can put Python code in a .py file to be re-used +in some other place. + +We should never forget to document the code as well, both on the module +level (i.e., this docstring) but also in every function it defines. + +When imported, Python modules are executed top to bottom before the flow of +execution returns to wherever they were imported into. + +An important convention is to prefix variables and functions that are not to +be used outside the module with a single underscore "_". This way, we can +design the code within a module in a modular fashion and only "export" what we +want. + +Here, all three functions internally forward the computation to an internal +utility function _scaled_average() that contains all the logic common to the +three functions. Also, we define one _default_scalar variable that is used as +the default for the scalar parameter in each of the functions. + +While this example is stylized, it shows how Python modules are often +designed. +""" + +_default_scalar = 1 + + +def _scaled_average(numbers, scalar): + """Internal utility function to calculate scaled averages.""" + average = sum(numbers) / len(numbers) + return scalar * average + + +def average(numbers, *, scalar=_default_scalar): + """Calculate the average of all numbers in a list. + + Args: + numbers (list): list of numbers; may be integers or floats + scalar (float, optional): the scalar that multiplies the + average of the even numbers + + Returns: + float: (scaled) average + """ + return _scaled_average(numbers, scalar) + + +def average_evens(numbers, *, scalar=_default_scalar): + """Calculate the average of all even numbers in a list. + + Args: + numbers (list): list of numbers; may be integers or floats + scalar (float, optional): the scalar that multiplies the + average of the even numbers + + Returns: + float: (scaled) average + """ + return _scaled_average([n for n in numbers if n % 2 == 0], scalar) + + +def average_odds(numbers, *, scalar=_default_scalar): + """Calculate the average of all odd numbers in a list. + + Args: + numbers (list): list of numbers; may be integers or floats + scalar (float, optional): the scalar that multiplies the + average of the even numbers + + Returns: + float: (scaled) average + """ + return _scaled_average([n for n in numbers if n % 2 != 0], scalar) diff --git a/sample_package/__init__.py b/sample_package/__init__.py new file mode 100644 index 0000000..76b1b09 --- /dev/null +++ b/sample_package/__init__.py @@ -0,0 +1,5 @@ +"""This package provides Vectors and Matrices.""" + +from .matrix import Matrix +from .utils import norm +from .vector import Vector diff --git a/sample_package/matrix.py b/sample_package/matrix.py new file mode 100644 index 0000000..c6b982f --- /dev/null +++ b/sample_package/matrix.py @@ -0,0 +1,254 @@ +"""This module defines a Matrix class.""" + +class Matrix: + """A standard m-by-n-dimensional matrix from linear algebra. + + The class is designed for sub-classing in such a way that + the user can adapt the typing class attribute to change, + for example, how the entries are stored (e.g., as integers). + + Attributes: + storage (callable): must return an iterable that is used + to store the entries of the matrix; defaults to tuple + typing (callable): type casting applied to all vector + entries upon creation; defaults to float + zero_threshold (float): maximum difference allowed when + comparing an entry to zero; defaults to 1e-12 + """ + + storage = tuple + typing = float + zero_threshold = 1e-12 + + def __init__(self, data): + """Initiate a new matrix. + + Args: + data (iterable of iterables): the matrix's entries; + must be provided with rows first, then column; + the number of column entries must be consistent across rows + where the first row sets the standard; + must have at least one element in total + + Raises: + ValueError: + - if the number of columns is inconsistent across the rows + - if the provided data do not have enough entries + """ + self._entries = self.storage( + self.storage(self.typing(x) for x in r) for r in data + ) + for row in self._entries[1:]: + if len(row) != self.n_cols: + raise ValueError("each row must have the same number of entries") + if len(self) == 0: + raise ValueError("the matrix must have at least one entry") + + @classmethod + def from_columns(cls, data): + """Initiate a new matrix. + + This is an alternative constructor for data provided in column-major order. + + Args: + data (iterable of iterables): the matrix's entries in column-major order; + the number of column entries must be consistent per row + while the first row sets the correct number; + must have at least one element in total + + Raises: + ValueError: + - if the number of columns is inconsistent across the rows + - if the provided data do not have enough entries + """ + return cls(data).transpose() + + def __repr__(self): + name = self.__class__.__name__ + args = ", ".join( + "(" + ", ".join(f"{c:.3f}" for c in r) + ",)" for r in self._entries + ) + return f"{name}(({args}))" + + def __str__(self): + name = self.__class__.__name__ + first, last, m, n = self[0], self[-1], self.n_rows, self.n_cols + return f"{name}(({first:.1f}, ...), ..., (..., {last:.1f}))[{m:d}x{n:d}]" + + @property + def n_rows(self): + """Number of rows in the matrix.""" + return len(self._entries) + + @property + def n_cols(self): + """Number of columns in the matrix.""" + return len(self._entries[0]) + + def __len__(self): + return self.n_rows * self.n_cols + + def __getitem__(self, index): + if isinstance(index, int): + if index < 0: + index += len(self) + if not (0 <= index < len(self)): + raise IndexError("integer index out of range") + row, col = divmod(index, self.n_cols) + return self._entries[row][col] + elif ( + isinstance(index, tuple) + and len(index) == 2 + and isinstance(index[0], int) + and isinstance(index[1], int) + ): + return self._entries[index[0]][index[1]] + raise TypeError("index must be either an integer or a tuple of two integers") + + def rows(self): + """Iterate over the rows of the matrix. + + Returns: + rows (Generator): produces Vector instances + representing individual rows of the matrix + """ + return (Vector(r) for r in self._entries) + + def cols(self): + """Iterate over the columns of the matrix. + + Returns: + columns (Generator): produces Vector instances + representing individual columns of the matrix + """ + return ( + Vector(self._entries[r][c] for r in range(self.n_rows)) + for c in range(self.n_cols) + ) + + def entries(self, *, reverse=False, row_major=True): + """Iterate over the entries of the matrix in flat fashion. + + Args: + reverse (bool): flag to iterate backwards; defaults to False + row_major (bool): flag to iterate in row major order; defaults to False + + Returns: + entries (Generator): produces the entries rows of the matrix + in the type set in the typing class variable + """ + if reverse: + rows, cols = range(self.n_rows - 1, -1, -1), range(self.n_cols - 1, -1, -1) + else: + rows, cols = range(self.n_rows), range(self.n_cols) + if row_major: + return (self._entries[r][c] for r in rows for c in cols) + return (self._entries[r][c] for c in cols for r in rows) + + def __iter__(self): + return self.entries() + + def __reversed__(self): + return self.entries(reverse=True) + + def __add__(self, other): + if isinstance(other, self.__class__): + if (self.n_rows != other.n_rows) or (self.n_cols != other.n_cols): + raise ValueError("matrices need to be of the same dimensions") + return self.__class__( + (s_col + o_col for (s_col, o_col) in zip(s_row, o_row)) + for (s_row, o_row) in zip(self._entries, other._entries) + ) + elif isinstance(other, numbers.Number): + return self.__class__((c + other for c in r) for r in self._entries) + return NotImplemented + + def __radd__(self, other): + if isinstance(other, Vector): + raise TypeError("vectors and matrices cannot be added") + return self + other + + def __sub__(self, other): + return self + (-other) + + def __rsub__(self, other): + if isinstance(other, Vector): + raise TypeError("vectors and matrices cannot be subtracted") + return (-self) + other + + def _matrix_multiply(self, other): + if self.n_cols != other.n_rows: + raise ValueError("matrices need to have compatible dimensions") + return self.__class__((rv * cv for cv in other.cols()) for rv in self.rows()) + + def __mul__(self, other): + if isinstance(other, numbers.Number): + return self.__class__((x * other for x in r) for r in self._entries) + elif isinstance(other, Vector): + return self._matrix_multiply(other.as_matrix()).as_vector() + elif isinstance(other, self.__class__): + return self._matrix_multiply(other) + return NotImplemented + + def __rmul__(self, other): + if isinstance(other, numbers.Number): + return self * other + elif isinstance(other, Vector): + return other.as_matrix(column=False)._matrix_multiply(self).as_vector() + return NotImplemented + + def __truediv__(self, other): + if isinstance(other, numbers.Number): + return self * (1 / other) + return NotImplemented + + def __eq__(self, other): + if isinstance(other, self.__class__): + if (self.n_rows != other.n_rows) or (self.n_cols != other.n_cols): + raise ValueError("matrices need to be of the same dimensions") + for x, y in zip(self, other): + if abs(x - y) > self.zero_threshold: + return False + return True + return NotImplemented + + def __pos__(self): + return self + + def __neg__(self): + return self.__class__((-x for x in r) for r in self._entries) + + def __abs__(self): + return norm(self) + + def __bool__(self): + return bool(abs(self)) + + def __float__(self): + if not (self.n_rows == 1 and self.n_cols == 1): + raise RuntimeError("matrix must have exactly one entry to become a scalar") + return self[0] + + def as_vector(self): + """Cast the matrix as a one-dimensional vector. + + Returns: + vector (Vector) + + Raises: + RuntimeError: if not one of the two dimensions is 1 + """ + if not (self.n_rows == 1 or self.n_cols == 1): + raise RuntimeError("one dimension (m or n) must be 1") + return Vector(x for x in self) + + def transpose(self): + """Transpose the rows and columns of the matrix. + + Returns: + matrix (Matrix) + """ + return self.__class__(zip(*self._entries)) + + +from .vector import Vector diff --git a/sample_package/utils.py b/sample_package/utils.py new file mode 100644 index 0000000..7929de2 --- /dev/null +++ b/sample_package/utils.py @@ -0,0 +1,14 @@ +"""This module provides utility functions.""" + + +def norm(vector_or_matrix): + """Calculate the Frobenius or Euclidean norm of a matrix or vector. + + Args: + vector_or_matrix (Vector/Matrix): the entries whose squares + are to be summed up + + Returns: + norm (float) + """ + return math.sqrt(sum(x ** 2 for x in vector_or_matrix)) diff --git a/sample_package/vector.py b/sample_package/vector.py new file mode 100644 index 0000000..39d87f6 --- /dev/null +++ b/sample_package/vector.py @@ -0,0 +1,141 @@ +"""This module defines a Vector class.""" + +from .matrix import Matrix + + +class Vector: + """A standard one-dimensional vector from linear algebra. + + The class is designed for sub-classing in such a way that + the user can adapt the typing class attribute to change, + for example, how the entries are stored (e.g., as integers). + + Attributes: + storage (callable): must return an iterable that is used + to store the entries of the vector; defaults to tuple + typing (callable): type casting applied to all vector + entries upon creation; defaults to float + zero_threshold (float): maximum difference allowed when + comparing an entry to zero; defaults to 1e-12 + """ + + storage = tuple + typing = float + zero_threshold = 1e-12 + + def __init__(self, data): + """Initiate a new vector. + + Args: + data (iterable): the vector's entries; + must have at least one element + + Raises: + ValueError: if the provided data do not have enough entries + """ + self._entries = self.storage(self.typing(x) for x in data) + if len(self) == 0: + raise ValueError("the vector must have at least one entry") + + def __repr__(self): + name, args = self.__class__.__name__, ", ".join(f"{x:.3f}" for x in self) + return f"{name}(({args}))" + + def __str__(self): + name, first, last, entries = ( + self.__class__.__name__, + self[0], + self[-1], + len(self), + ) + return f"{name}({first:.1f}, ..., {last:.1f})[{entries:d}]" + + def __len__(self): + return len(self._entries) + + def __getitem__(self, index): + if not isinstance(index, int): + raise TypeError("index must be an integer") + return self._entries[index] + + def __iter__(self): + return iter(self._entries) + + def __reversed__(self): + return reversed(self._entries) + + def __add__(self, other): + if isinstance(other, self.__class__): + if len(self) != len(other): + raise ValueError("vectors need to be of the same length") + return self.__class__(x + y for (x, y) in zip(self, other)) + elif isinstance(other, numbers.Number): + return self.__class__(x + other for x in self) + return NotImplemented + + def __radd__(self, other): + return self + other + + def __sub__(self, other): + return self + (-other) + + def __rsub__(self, other): + return (-self) + other + + def __mul__(self, other): + if isinstance(other, self.__class__): + if len(self) != len(other): + raise ValueError("vectors need to be of the same length") + return sum(x * y for (x, y) in zip(self, other)) + elif isinstance(other, numbers.Number): + return self.__class__(x * other for x in self) + return NotImplemented + + def __rmul__(self, other): + return self * other + + def __truediv__(self, other): + if isinstance(other, numbers.Number): + return self * (1 / other) + return NotImplemented + + def __eq__(self, other): + if isinstance(other, self.__class__): + if len(self) != len(other): + raise ValueError("vectors need to be of the same length") + for x, y in zip(self, other): + if abs(x - y) > self.zero_threshold: + return False + return True + return NotImplemented + + def __pos__(self): + return self + + def __neg__(self): + return self.__class__(-x for x in self) + + def __abs__(self): + return norm(self) + + def __bool__(self): + return bool(abs(self)) + + def __float__(self): + if len(self) != 1: + raise RuntimeError("vector must have exactly one entry to become a scalar") + return self[0] + + def as_matrix(self, *, column=True): + """Convert the vector into a matrix. + + Args: + column (bool): if the vector should be interpreted as + as a column vector or not; defaults to True + + Returns: + matrix (Matrix) + """ + if column: + return Matrix([x] for x in self) + return Matrix([(x for x in self)])