{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "# Anscombe's quartet \n", "#### (https://matplotlib.org/stable/gallery/specialty_plots/anscombe.html)\n", "\n", "`Anscombe's quartet`_ is a group of datasets (x, y) that have the same mean,\n", "standard deviation, and regression line, but which are qualitatively different.\n", "\n", "It is often used to illustrate the importance of looking at a set of data\n", "graphically and not only relying on basic statistic properties.\n", "\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "\n", "x = [10, 8, 13, 9, 11, 14, 6, 4, 12, 7, 5]\n", "y1 = [8.04, 6.95, 7.58, 8.81, 8.33, 9.96, 7.24, 4.26, 10.84, 4.82, 5.68]\n", "y2 = [9.14, 8.14, 8.74, 8.77, 9.26, 8.10, 6.13, 3.10, 9.13, 7.26, 4.74]\n", "y3 = [7.46, 6.77, 12.74, 7.11, 7.81, 8.84, 6.08, 5.39, 8.15, 6.42, 5.73]\n", "x4 = [8, 8, 8, 8, 8, 8, 8, 19, 8, 8, 8]\n", "y4 = [6.58, 5.76, 7.71, 8.84, 8.47, 7.04, 5.25, 12.50, 5.56, 7.91, 6.89]\n", "\n", "datasets = {\n", " 'I': (x, y1),\n", " 'II': (x, y2),\n", " 'III': (x, y3),\n", " 'IV': (x4, y4)\n", "}\n", "\n", "fig, axs = plt.subplots(2, 2, sharex=True, sharey=True, figsize=(10, 10),\n", " gridspec_kw={'wspace': 0.08, 'hspace': 0.08})\n", "axs[0, 0].set(xlim=(0, 20), ylim=(2, 14))\n", "axs[0, 0].set(xticks=(0, 10, 20), yticks=(4, 8, 12))\n", "\n", "for ax, (label, (x, y)) in zip(axs.flat, datasets.items()):\n", " ax.text(0.1, 0.9, label, fontsize=20, transform=ax.transAxes, va='top')\n", " ax.tick_params(direction='in', top=True, right=True)\n", " ax.plot(x, y, 'o')\n", "\n", " # linear regression\n", " p1, p0 = np.polyfit(x, y, deg=1)\n", " x_lin = np.array([np.min(x), np.max(x)])\n", " y_lin = p1 * x_lin + p0\n", " ax.plot(x_lin, y_lin, 'r-', lw=2)\n", "\n", " # add text box for the statistics\n", " stats = (f'$\\\\mu$ = {np.mean(y):.2f}\\n'\n", " f'$\\\\sigma$ = {np.std(y):.2f}\\n'\n", " f'$r$ = {np.corrcoef(x, y)[0][1]:.2f}')\n", " bbox = dict(boxstyle='round', fc='blanchedalmond', ec='orange', alpha=0.5)\n", " ax.text(0.95, 0.07, stats, fontsize=9, bbox=bbox,\n", " transform=ax.transAxes, horizontalalignment='right')\n", "\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 4 }