diff --git a/notebooks/Module 2.1.1 - Hypothesis Testing.ipynb b/notebooks/Module 2.1.1 - Hypothesis Testing.ipynb index 800caf2..73981d3 100644 --- a/notebooks/Module 2.1.1 - Hypothesis Testing.ipynb +++ b/notebooks/Module 2.1.1 - Hypothesis Testing.ipynb @@ -74,11 +74,12 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 107, "metadata": {}, "outputs": [], "source": [ - "%run setup.ipy" + "%run setup.ipy\n", + "import numpy as np" ] }, { @@ -109,6 +110,183 @@ "From your results here, note that even with *purely random data*, we can get very high differences between a same (running our function once) and the general population ($N(0, 1)$), just by chance in our we got our sample." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Answers:\n", + "\n", + "Pr(at least one test is \"high\") = 1- Pr(no test is \"high\")\n", + "\n", + "Given that tests are independent:\n", + "Pr(no test is \"high\") = Pr(test 1 is not high) * Pr(test 2 is not high) * ... * Pr(test $n$ is not high)\n", + "\n", + "Therefore:\n", + "\n", + "Two experiments:\n", + "Pr(at least one high test) = 1 - Pr(test 1 is not high) * Pr(test 2 is not high) = 1 - 0.95 * 0.95 = 0.0975 or 9.75%\n", + "\n", + "Ten experiment:\n", + "Pr(at least one high test) = 1 - Pr(test 1 is not high) * Pr(test 2 is not high) * ... * Pr(test 10 is not high) = 1 - 0.95 ^ 10 = 0.401263 or 40.13%\n", + "\n", + "Twenty experiment:\n", + "Pr(at least one high test) = 1 - Pr(test 1 is not high) * Pr(test 2 is not high) * ... * Pr(test 20 is not high) = 1 - 0.95 ^ 20 = 0.641514 or 64.15%" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [], + "source": [ + "#computes 1000 random numbers from a normal distribution and averages them all\n", + "def smean():\n", + " s = np.random.normal(0,1,1000)\n", + " return s.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.017075782661522303" + ] + }, + "execution_count": 109, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#one case of the average of 1000 means\n", + "smean()" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [], + "source": [ + "#generates a list of 10000 means of means\n", + "iterations = 10000\n", + "result = []\n", + "for i in range(iterations):\n", + " result.append(smean())" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([ 20., 154., 562., 1520., 2494., 2594., 1678., 726., 218.,\n", + " 34.]),\n", + " array([-0.1101351 , -0.08851729, -0.06689947, -0.04528165, -0.02366384,\n", + " -0.00204602, 0.0195718 , 0.04118961, 0.06280743, 0.08442525,\n", + " 0.10604306]),\n", + " )" + ] + }, + "execution_count": 111, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAPqklEQVR4nO3df6jd9X3H8edrthVpK9MZbZqExY2UTYXZepcJwujoVoP+EftHIf2jBiqkFYUW2j9i+0eFErCjP5hsFdJVjKOrBLpimHWrlUIp2Oq1WGO0zrRm9TbB3K5sdf+4ad/7434CZ/Hce8+95+ac3HyeDzic73l/P5/z/Xw/ub48+dzv+SZVhSSpD78z7QFIkibH0Jekjhj6ktQRQ1+SOmLoS1JH3jTtASznkksuqa1bt057GJK0rjz55JO/qqoNp9fP+tDfunUrs7Oz0x6GJK0rSf59WN3lHUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6shZ/41c6Wy1de9DUzv2sbtunNqxtb75SV+SOmLoS1JHDH1J6siyoZ9kS5LvJXkuyZEkH2/1O5P8MslT7XHDQJ87khxN8nyS6wfq1yQ53PbdnSRn5rQkScOM8ovc14BPVtWPk7wdeDLJI23fl6vqC4ONk1wB7AKuBN4JfDfJu6rqdeAeYA/wQ+DbwA7g4bU5FUnScpb9pF9VJ6rqx237FeA5YNMSXXYCD1TVq1X1InAU2J5kI3BhVT1WVQXcD9w07glIkka3ojX9JFuBdwM/aqXbkzyd5N4kF7XaJuClgW5zrbapbZ9eH3acPUlmk8zOz8+vZIiSpCWMfJ1+krcB3wQ+UVW/SXIP8Dmg2vMXgY8Aw9bpa4n6G4tV+4H9ADMzM0PbSKdM83p5ab0Z6ZN+kjezEPhfr6p/Aqiql6vq9ar6LfBVYHtrPgdsGei+GTje6puH1CVJEzLK1TsBvgY8V1VfGqhvHGj2AeCZtn0I2JXk/CSXA9uAx6vqBPBKkmvbe94MPLhG5yFJGsEoyzvXAR8GDid5qtU+DXwoydUsLNEcAz4KUFVHkhwEnmXhyp/b2pU7ALcC9wEXsHDVjlfuSNIELRv6VfUDhq/Hf3uJPvuAfUPqs8BVKxmgJGnt+I1cSeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6smzoJ9mS5HtJnktyJMnHW/3iJI8keaE9XzTQ544kR5M8n+T6gfo1SQ63fXcnyZk5LUnSMKN80n8N+GRV/TFwLXBbkiuAvcCjVbUNeLS9pu3bBVwJ7AC+kuS89l73AHuAbe2xYw3PRZK0jGVDv6pOVNWP2/YrwHPAJmAncKA1OwDc1LZ3Ag9U1atV9SJwFNieZCNwYVU9VlUF3D/QR5I0ASta00+yFXg38CPgsqo6AQv/YwAubc02AS8NdJtrtU1t+/T6sOPsSTKbZHZ+fn4lQ5QkLWHk0E/yNuCbwCeq6jdLNR1SqyXqbyxW7a+qmaqa2bBhw6hDlCQtY6TQT/JmFgL/61X1T638cluyoT2fbPU5YMtA983A8VbfPKQuSZqQUa7eCfA14Lmq+tLArkPA7ra9G3hwoL4ryflJLmfhF7aPtyWgV5Jc297z5oE+kqQJeNMIba4DPgwcTvJUq30auAs4mOQW4BfABwGq6kiSg8CzLFz5c1tVvd763QrcB1wAPNwekqQJWTb0q+oHDF+PB3jfIn32AfuG1GeBq1YyQEnS2vEbuZLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6siy/zC6pLPP1r0PTeW4x+66cSrH1drxk74kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjiwb+knuTXIyyTMDtTuT/DLJU+1xw8C+O5IcTfJ8kusH6tckOdz23Z0ka386kqSljHLvnfuAvwXuP63+5ar6wmAhyRXALuBK4J3Ad5O8q6peB+4B9gA/BL4N7AAeHmv0OmtM614wklZm2U/6VfV94Ncjvt9O4IGqerWqXgSOAtuTbAQurKrHqqpY+B/ITascsyRplcZZ0789ydNt+eeiVtsEvDTQZq7VNrXt0+tDJdmTZDbJ7Pz8/BhDlCQNWm3o3wP8IXA1cAL4YqsPW6evJepDVdX+qpqpqpkNGzascoiSpNOtKvSr6uWqer2qfgt8Fdjeds0BWwaabgaOt/rmIXVJ0gStKvTbGv0pHwBOXdlzCNiV5PwklwPbgMer6gTwSpJr21U7NwMPjjFuSdIqLHv1TpJvAO8FLkkyB3wWeG+Sq1lYojkGfBSgqo4kOQg8C7wG3Nau3AG4lYUrgS5g4aodr9yRpAlbNvSr6kNDyl9bov0+YN+Q+ixw1YpGJ0laU34jV5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjiwb+knuTXIyyTMDtYuTPJLkhfZ80cC+O5IcTfJ8kusH6tckOdz23Z0ka386kqSljPJJ/z5gx2m1vcCjVbUNeLS9JskVwC7gytbnK0nOa33uAfYA29rj9PeUJJ1hy4Z+VX0f+PVp5Z3AgbZ9ALhpoP5AVb1aVS8CR4HtSTYCF1bVY1VVwP0DfSRJE7LaNf3LquoEQHu+tNU3AS8NtJtrtU1t+/T6UEn2JJlNMjs/P7/KIUqSTrfWv8gdtk5fS9SHqqr9VTVTVTMbNmxYs8FJUu9WG/ovtyUb2vPJVp8Dtgy02wwcb/XNQ+qSpAlabegfAna37d3AgwP1XUnOT3I5C7+wfbwtAb2S5Np21c7NA30kSRPypuUaJPkG8F7gkiRzwGeBu4CDSW4BfgF8EKCqjiQ5CDwLvAbcVlWvt7e6lYUrgS4AHm4PSdIELRv6VfWhRXa9b5H2+4B9Q+qzwFUrGp0kaU35jVxJ6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkeW/UdUJOmUrXsfmspxj91141SOey7yk74kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOuIN184x07ohlqT1wU/6ktSRsUI/ybEkh5M8lWS21S5O8kiSF9rzRQPt70hyNMnzSa4fd/CSpJVZi0/6f1FVV1fVTHu9F3i0qrYBj7bXJLkC2AVcCewAvpLkvDU4viRpRGdieWcncKBtHwBuGqg/UFWvVtWLwFFg+xk4viRpEeOGfgHfSfJkkj2tdllVnQBoz5e2+ibgpYG+c632Bkn2JJlNMjs/Pz/mECVJp4x79c51VXU8yaXAI0l+ukTbDKnVsIZVtR/YDzAzMzO0jSRp5cb6pF9Vx9vzSeBbLCzXvJxkI0B7PtmazwFbBrpvBo6Pc3xJ0sqsOvSTvDXJ209tA+8HngEOAbtbs93Ag237ELAryflJLge2AY+v9viSpJUbZ3nnMuBbSU69zz9W1b8keQI4mOQW4BfABwGq6kiSg8CzwGvAbVX1+lijlyStyKpDv6p+DvzJkPp/AO9bpM8+YN9qjylJGo/fyJWkjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdWTcfxhdks64rXsfmtqxj91149SOfSb4SV+SOmLoS1JHDH1J6oihL0kdMfQlqSNevXMGTPNKA0laip/0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR2Z+JezkuwA/gY4D/j7qrpr0mOQpFFN68uWZ+qWzhMN/STnAX8H/BUwBzyR5FBVPXsmjuc3YyXp/5v08s524GhV/byq/gd4ANg54TFIUrcmvbyzCXhp4PUc8GenN0qyB9jTXv53kucnMLZBlwC/mvAx1wvnZjjnZTjnZXFLzk0+P/b7//6w4qRDP0Nq9YZC1X5g/5kfznBJZqtqZlrHP5s5N8M5L8M5L4ub1txMenlnDtgy8HozcHzCY5Ckbk069J8AtiW5PMlbgF3AoQmPQZK6NdHlnap6LcntwL+ycMnmvVV1ZJJjGNHUlpbWAedmOOdlOOdlcVOZm1S9YUldknSO8hu5ktQRQ1+SOtJt6Ce5OMkjSV5ozxct0u7eJCeTPLOa/uvNCuZlR5LnkxxNsnegfmeSXyZ5qj1umNzo195i5zmwP0nubvufTvKeUfuud2POzbEkh9vPyOxkR35mjTAvf5TksSSvJvnUSvquiarq8gH8NbC3be8FPr9Iuz8H3gM8s5r+6+0xynmx8Ev4nwF/ALwF+AlwRdt3J/CpaZ/HGs3Fouc50OYG4GEWvoNyLfCjUfuu58c4c9P2HQMumfZ5TGleLgX+FNg3+N/KpH5muv2kz8LtHw607QPATcMaVdX3gV+vtv86NMp59XI7jVHOcydwfy34IfC7STaO2Hc9G2duzmXLzktVnayqJ4D/XWnftdBz6F9WVScA2vOlE+5/thrlvIbdTmPTwOvb21/n713ny17LnedSbUbpu56NMzew8E387yR5st125Vwxzp/7RH5mJn5r5UlK8l3gHUN2fWbSYzmbrMG8LHU7jXuAz7XXnwO+CHxkpWM8S4xy25DF2ox0y5F1bJy5Abiuqo4nuRR4JMlP29+q17tx/twn8jNzTod+Vf3lYvuSvJxkY1WdaH/lPLnCtx+3/9SswbwsejuNqnp54L2+Cvzz2ox6Kka5bchibd4yQt/1bJy5oapOPZ9M8i0WljbOhdAf51YzE7lNTc/LO4eA3W17N/DghPufrUY5r0Vvp3Hamu0HgGeG9F8vRrltyCHg5nalyrXAf7VlsXP9liOrnpskb03ydoAkbwXez/r+ORk0zp/7ZH5mpv3b7mk9gN8DHgVeaM8Xt/o7gW8PtPsGcIKFX7rMAbcs1X+9P1YwLzcA/8bC1QafGaj/A3AYeLr9wG6c9jmNOR9vOE/gY8DH2nZY+IeBftbOe2a5OTpXHqudGxauTvlJexw51+ZmhHl5R8uS3wD/2bYvnNTPjLdhkKSO9Ly8I0ndMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSR/4PyeOr+V1qd2IAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "#plots a histsogram\n", + "plt.hist(result)" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 112, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#counts the number of times a value from the list is greater that 0.166\n", + "counter = 0;\n", + "for i in result:\n", + " if(i>0.166):\n", + " counter = counter + 1\n", + "counter" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After running this many times and only generating zeroes for the number of values greater than 0.166, I've included the reason as to why this is the case:\n", + "\n", + "using the equally weighted estimator Y = (1/1000) * x1 + (1/1000) * x2 + ... + (1/1000) * x1000 = (1/1000) * (x1 + x2 + ... + x1000)\n", + "\n", + "Since E[x1] = x_mu1, E[x2] = x_mu2 e.t.c.\n", + "The expected value of the average of 1000 normal distributions is:\n", + "E[Y] = (1/1000) * x_mu1 + (1/1000) * x_mu2 + ... + (1/1000) * x_mu1000\n", + "\n", + "Since all mu1 to mu1000 are expected to be 0:\n", + "E[Y] = 0\n", + "\n", + "The variance of the average of 1000 normal distributions is:\n", + "\n", + "Var(Y) = Var((1/1000) * (x1 + x2 + ... + x1000))\n", + "\n", + "Var(Y) = (1/1000)^2 * Var(x1 + x2 + ... + x1000)\n", + "\n", + "Given that our distributions are iid\n", + "\n", + "Var(Y) = (1/1000)^2 * (Var(x1) + Var(x2) + ... + Var(x1000))\n", + "\n", + "Var(Y) = 1/1000\n", + "\n", + "Because all Var(xn) = 1, and there are 1000 values, 1000 * (1/1000000) = 1/1000\n", + "\n", + "Thus, we are actually calculating how many times in 10000 does the distribution N(0,0.001) exceed 0.166\n", + "\n", + "For simply one case:\n", + "Pr(Y > 0.166) = Pr((Y-0)/sqrt(0.001) > 0.166/sqrt(0.001)) Normalizes Y to become a standard normal distribution N(0,1)\n", + "\n", + "Pr(Z > 0.166/sqrt(0.001)) = Pr(Z > 5.249381) which is approximately 0.000000414193026\n", + "\n", + "Using the same working as before:\n", + "\n", + "Pr(at least one test is > 0.166) = 1 - Pr(no test is >0.166) = 1 - (1-Pr(Y>0.166))^10000 = 0.004133 or 0.4133%" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -116,6 +294,15 @@ "*For solutions, see `solutions/hypothesis_one.py`*" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Note on answer from solution:\n", + "\n", + "To note, the answers use the mean of 100 standard normals meaning the variance would be 1/100 and standard deviation 1/10, therefore the Pr(Y>0.166) = Pr(Z>1.66) = 0.100586 which gives more measurable results." + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -135,7 +322,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 119, "metadata": {}, "outputs": [], "source": [ @@ -144,14 +331,14 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 120, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "345\n" + "336\n" ] } ], @@ -162,7 +349,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 121, "metadata": {}, "outputs": [], "source": [ @@ -171,12 +358,12 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 122, "metadata": {}, "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD8CAYAAAB+UHOxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAEIxJREFUeJzt3X+spFV9x/H3p/z0R2QRLpbuYhfKttWYWskNbjWx1rUqYFyaSIoxsiGYTSxWK00U+0dJbZqsaSMW02K2gi6NVSjaslHUEn7EmpTVRRDBVblSCrdQdw0/WktNS/32jzlbhuXuvXfv3B9z97xfyWSe5zxnZs7ZZ+d+5jzPPGdSVUiS+vMzK90ASdLKMAAkqVMGgCR1ygCQpE4ZAJLUKQNAkjplAEhSpwwASeqUASBJnTpypRswmxNPPLHWr1+/0s2QpFXljjvu+FFVTcxVb6wDYP369ezevXulmyFJq0qSf5lPPQ8BSVKnDABJ6pQBIEmdMgAkqVNzBkCSq5PsTXLPUNkLk9yU5L52f3wrT5IrkkwluTvJGUOP2dLq35dky9J0R5I0X/MZAXwKeNMBZZcCN1fVBuDmtg5wFrCh3bYCV8IgMIDLgFcCZwKX7Q8NSdLKmDMAquqrwKMHFG8GdrTlHcC5Q+XX1MDtwJokJwNvBG6qqker6jHgJp4dKpKkZbTQcwAvqqpHANr9Sa18LfDQUL3pVnaw8mdJsjXJ7iS79+3bt8DmSZLmstgngTNDWc1S/uzCqu1VNVlVkxMTc17IJklaoIVeCfzDJCdX1SPtEM/eVj4NnDJUbx3wcCt/7QHlty3wtaWxsP7SL86r3gPbzlnilkgLs9ARwE5g/zd5tgA3DJVf0L4NtBF4oh0i+grwhiTHt5O/b2hlkqQVMucIIMlnGHx6PzHJNINv82wDrktyEfAgcF6rfiNwNjAFPAlcCFBVjyb5Y+Abrd6HqurAE8uSpGU0ZwBU1dsOsmnTDHULuPggz3M1cPUhtU6StGTGejZQaSXM99i+tNo5FYQkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6pTTQUtLzJ+O1LhyBCBJnTIAJKlTBoAkdcoAkKROGQCS1CkDQJI65ddA1YX5fhVT6okjAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6tRIAZDkfUnuTXJPks8kOTbJqUl2JbkvybVJjm51j2nrU237+sXogCRpYRYcAEnWAu8BJqvqZcARwPnAh4HLq2oD8BhwUXvIRcBjVXU6cHmrJ0laIaMeAjoSeE6SI4HnAo8ArwOub9t3AOe25c1tnbZ9U5KM+PqSpAVa8GRwVfWvSf4MeBD4L+AfgDuAx6vqqVZtGljbltcCD7XHPpXkCeAE4EcLbYPkJG/Swo1yCOh4Bp/qTwV+DngecNYMVWv/Q2bZNvy8W5PsTrJ73759C22eJGkOoxwCej3wz1W1r6r+B/g88CpgTTskBLAOeLgtTwOnALTtxwGPHvikVbW9qiaranJiYmKE5kmSZjNKADwIbEzy3HYsfxPwHeBW4K2tzhbghra8s63Ttt9SVc8aAUiSlseCA6CqdjE4mftN4NvtubYDHwAuSTLF4Bj/Ve0hVwEntPJLgEtHaLckaUQj/SJYVV0GXHZA8f3AmTPU/Qlw3iivJ0laPP4kpDQm5vuNpge2nbPELVEvnApCkjplAEhSpwwASeqUASBJnTIAJKlTBoAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpfxBGWmX84RgtFkcAktQpA0CSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjplAEhSpwwASeqUASBJnXI2UI2l+c54KWnhHAFIUqdGCoAka5Jcn+S7SfYk+bUkL0xyU5L72v3xrW6SXJFkKsndSc5YnC5IkhZi1BHAnwNfrqpfBl4O7AEuBW6uqg3AzW0d4CxgQ7ttBa4c8bUlSSNYcAAkeQHwGuAqgKr676p6HNgM7GjVdgDntuXNwDU1cDuwJsnJC265JGkko4wATgP2AZ9McmeSTyR5HvCiqnoEoN2f1OqvBR4aevx0K5MkrYBRAuBI4Azgyqp6BfCfPH24ZyaZoayeVSnZmmR3kt379u0boXmSpNmMEgDTwHRV7Wrr1zMIhB/uP7TT7vcO1T9l6PHrgIcPfNKq2l5Vk1U1OTExMULzJEmzWXAAVNW/AQ8l+aVWtAn4DrAT2NLKtgA3tOWdwAXt20AbgSf2HyqSJC2/US8E+13g00mOBu4HLmQQKtcluQh4EDiv1b0ROBuYAp5sdSVJK2SkAKiqu4DJGTZtmqFuAReP8nqSpMXjVBDSYWq+02k8sO2cJW6JxpVTQUhSpwwASeqUASBJnTIAJKlTBoAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKn/EUwLav5/kqVpKXnCECSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjplAEhSpwwASeqUASBJnTIAJKlTzgYqde5QZmh9YNs5S9gSLbeRRwBJjkhyZ5IvtPVTk+xKcl+Sa5Mc3cqPaetTbfv6UV9bkrRwi3EI6L3AnqH1DwOXV9UG4DHgolZ+EfBYVZ0OXN7qSZJWyEgBkGQdcA7wibYe4HXA9a3KDuDctry5rdO2b2r1JUkrYNQRwEeB9wM/besnAI9X1VNtfRpY25bXAg8BtO1PtPrPkGRrkt1Jdu/bt2/E5kmSDmbBAZDkzcDeqrpjuHiGqjWPbU8XVG2vqsmqmpyYmFho8yRJcxjlW0CvBt6S5GzgWOAFDEYEa5Ic2T7lrwMebvWngVOA6SRHAscBj47w+pKkESx4BFBVH6yqdVW1HjgfuKWq3g7cCry1VdsC3NCWd7Z12vZbqupZIwBJ0vJYigvBPgBckmSKwTH+q1r5VcAJrfwS4NIleG1J0jwtyoVgVXUbcFtbvh84c4Y6PwHOW4zXkySNzqkgJKlTBoAkdcq5gLQoDmU+GUnjwRGAJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjrlZHCS5m2+k/49sO2cJW6JFoMjAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6pQBIEmdcioIzWq+l/5LWn0cAUhSpwwASeqUASBJnTIAJKlTBoAkdWrBAZDklCS3JtmT5N4k723lL0xyU5L72v3xrTxJrkgyleTuJGcsVickSYdulBHAU8DvV9VLgI3AxUleClwK3FxVG4Cb2zrAWcCGdtsKXDnCa0uSRrTgAKiqR6rqm235P4A9wFpgM7CjVdsBnNuWNwPX1MDtwJokJy+45ZKkkSzKOYAk64FXALuAF1XVIzAICeCkVm0t8NDQw6ZbmSRpBYx8JXCS5wOfA36vqv49yUGrzlBWMzzfVgaHiHjxi188avMkrQB/PH51GGkEkOQoBn/8P11Vn2/FP9x/aKfd723l08ApQw9fBzx84HNW1faqmqyqyYmJiVGaJ0maxSjfAgpwFbCnqj4ytGknsKUtbwFuGCq/oH0baCPwxP5DRZKk5TfKIaBXA+8Avp3krlb2B8A24LokFwEPAue1bTcCZwNTwJPAhSO8tiRpRAsOgKr6GjMf1wfYNEP9Ai5e6OtpcTnLpySvBJakThkAktQpA0CSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUqZGng9b4cHoHSYfCEYAkdcoAkKROGQCS1CkDQJI65UlgSSvG3w5eWY4AJKlTBoAkdcoAkKROeQ5A0tjzXMHScAQgSZ1yBLAKOMWDpKXgCECSOmUASFKnDABJ6pQBIEmdMgAkqVN+C0jSYcPrBQ6NIwBJ6pQBIEmd8hDQCvICL0kryQBYAv5hl8ab5woGPAQkSZ1a9gBI8qYk30syleTS5X59SdLAsgZAkiOAvwDOAl4KvC3JS5ezDZKkgeU+B3AmMFVV9wMk+SywGfjOMrdjQTy2L/VlKd7z43ReYbkDYC3w0ND6NPDKpXox/2BLGjfjdAJ6uQMgM5TVMyokW4GtbfXHSb635K1aHCcCP1rpRiySw6Uv9mO8HC79gGXoSz480sN/fj6VljsApoFThtbXAQ8PV6iq7cD25WzUYkiyu6omV7odi+Fw6Yv9GC+HSz/g8OnLcn8L6BvAhiSnJjkaOB/YucxtkCSxzCOAqnoqybuBrwBHAFdX1b3L2QZJ0sCyXwlcVTcCNy736y6DVXfYahaHS1/sx3g5XPoBh0lfUlVz15IkHXacCkKSOmUAzFOSY5N8Pcm3ktyb5I9a+alJdiW5L8m17eQ2SY5p61Nt+/qVbP9+s/Tj022KjnuSXJ3kqFaeJFe0ftyd5IyV7cHAwfoxtP1jSX48tL7a9keS/EmS7yfZk+Q9Q+Vjtz9g1r5sSvLNJHcl+VqS01v5WO6T/ZIckeTOJF9o66vqvT4vVeVtHjcG1zA8vy0fBewCNgLXAee38o8D72rLvwN8vC2fD1y70n2Yox9nt20BPjPUj7OBL7XyjcCule7DbP1o65PAXwM/Hqq/2vbHhcA1wM+0bSeN8/6Yoy/fB14ytB8+Nc77ZKg/lwB/A3yhra+q9/p8bo4A5qkG9n+iPKrdCngdcH0r3wGc25Y3t3Xa9k1JZroQblkdrB9VdWPbVsDXGVyjAYN+XNM23Q6sSXLy8rf8mQ7Wjzbf1J8C7z/gIatqfwDvAj5UVT9t9fa2OmO5P2DWvhTwglZ+HE9f+zOW+wQgyTrgHOATbT2ssvf6fBgAh6ANCe8C9gI3AT8AHq+qp1qVaQbTXcDQtBdt+xPACcvb4pkd2I+q2jW07SjgHcCXW9FM03esZQwcpB/vBnZW1SMHVF9t++MXgN9OsjvJl5JsaNXHdn/AQfvyTuDGJNMM/m9ta9XHdp8AH2XwIeKnbf0EVuF7fS4GwCGoqv+tql9l8On4TOAlM1Vr93NOe7FSDuxHkpcNbf5L4KtV9Y9tfTX14zXAecDHZqi+mvrxMuAY4Cc1uNr0r4CrW/Wx7QcctC/vA86uqnXAJ4GPtOpj2Zckbwb2VtUdw8UzVB379/pcDIAFqKrHgdsYHN9ck2T/9RTDU1v8/7QXbftxwKPL29LZDfXjTQBJLgMmGBz73G/O6TtW2lA/fgM4HZhK8gDw3CRTrdpq2x/TwOfapr8DfqUtj/3+gGf05Szg5UOjzGuBV7Xlcd0nrwbe0v4PfZbBoZ+Psorf6wdjAMxTkokka9ryc4DXA3uAW4G3tmpbgBva8s62Ttt+Szu+vqIO0o/vJnkn8EbgbfuPOzc7gQvat082Ak/McHhl2R2kH3dU1c9W1fqqWg88WVWnt4esqv0B/D2DPzwAv87gRCqM6f6AWd8jxyX5xVbtN1sZjOk+qaoPVtW69n/ofAbtejur7L0+Lyt9Fnq13Bh8ArsTuBu4B/jDVn4ag5OmU8DfAse08mPb+lTbftpK92GOfjzF4JzGXe22vzwMfsTnB8C3gcmV7sNs/TigzvC3gFbb/lgDfLH9m/8Tg0/RY7s/5ujLb7W2fovBqOC0cd4nB/TptTz9LaBV9V6fz80rgSWpUx4CkqROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXq/wCBautQQ+rMLAAAAABJRU5ErkJggg==\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAD4CAYAAAAD6PrjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQs0lEQVR4nO3df6zdd13H8efLbowfc9Kl3axtY2tS1I0o4LVOF8106iojdP8sqRFsdLGBTGT4A1tIJJo0KT+iYHQmDaAlTJoKM2tGUGpl/khg4+6X0JW6ysZ6WV0vGgT+sNjy9o/z3Wdn3b23t/ec3ntO+3wkN9/v93M+3/N937N+9zqf76+bqkKSJIDvWuoCJEmjw1CQJDWGgiSpMRQkSY2hIElqLlnqAs5mxYoVtW7duqUuQ5LGyoMPPvi1qlp5ruuNfCisW7eOycnJpS5DksZKkq8sZD0PH0mSGkNBktQYCpKkxlCQJDWGgiSpMRQkSY2hIElqDAVJUmMoSJKakb+jWRp367Z/cl79ntx183muRDo7RwqSpMZQkCQ1hoIkqTEUJEmNoSBJagwFSVLjJam6aHhpqHR2jhQkSY2hIElqDAVJUmMoSJIaQ0GS1Jw1FJJ8OMmJJF/sa7syyYEkj3fT5X2v7UhyNMmRJDf1tf9Yki90r/1pkgz/15EkDWI+I4W/Ajad0bYdOFhVG4CD3TJJrgG2ANd269yZZFm3zl8A24AN3c+Z7ylJWmJnDYWq+mfgv89o3gzs6eb3ALf0te+tqpNV9QRwFNiYZBVwRVV9tqoK+EjfOpKkEbHQcwpXV9VxgG56Vde+GjjW12+qa1vdzZ/ZPqMk25JMJpmcnp5eYImSpHM17BPNM50nqDnaZ1RVu6tqoqomVq5cObTiJElzW2goPNMdEqKbnujap4C1ff3WAE937WtmaJckjZCFhsJ+YGs3vxW4p699S5LLkqynd0L5ge4Q0zeTXNdddfSrfetIkkbEWR+Il+RjwA3AiiRTwLuAXcC+JLcBTwG3AlTVoST7gMeAU8DtVXW6e6s307uS6SXAp7ofaWzN9wF70jg5ayhU1S/P8tKNs/TfCeycoX0SeOU5VSdJWlTe0SxJagwFSVJjKEiSGkNBktQYCpKkxlCQJDWGgiSpMRQkSc1Zb16TLjbeqayLmSMFSVJjKEiSGkNBktQYCpKkxlCQJDWGgiSpMRQkSY2hIElqDAVJUmMoSJIaQ0GS1BgKkqTGUJAkNYaCJKkxFCRJjaEgSWoMBUlSYyhIkhpDQZLUGAqSpGagUEjytiSHknwxyceSvDjJlUkOJHm8my7v678jydEkR5LcNHj5kqRhWnAoJFkN/BYwUVWvBJYBW4DtwMGq2gAc7JZJck33+rXAJuDOJMsGK1+SNEyXDGH9lyT5P+ClwNPADuCG7vU9wH3A7wObgb1VdRJ4IslRYCPw2QFrkC4I67Z/ct59n9x183msRBezBY8UquqrwPuAp4DjwP9U1aeBq6vqeNfnOHBVt8pq4FjfW0x1bS+QZFuSySST09PTCy1RknSOBjl8tJzet//1wPcBL0vyhrlWmaGtZupYVburaqKqJlauXLnQEiVJ52iQw0c/DzxRVdMASe4Gfgp4JsmqqjqeZBVwous/BaztW38NvcNN0kDO5bCLpLkNcvXRU8B1SV6aJMCNwGFgP7C167MVuKeb3w9sSXJZkvXABuCBAbYvSRqyBY8Uqur+JB8HHgJOAQ8Du4HLgX1JbqMXHLd2/Q8l2Qc81vW/vapOD1i/JGmIBrr6qKreBbzrjOaT9EYNM/XfCewcZJuSpPPHO5olSY2hIElqDAVJUmMoSJIaQ0GS1BgKkqTGUJAkNYaCJKkxFCRJjaEgSWoMBUlSYyhIkhpDQZLUGAqSpMZQkCQ1hoIkqRnoj+xIWhrz/bvUT+66+TxXoguNIwVJUmMoSJIaQ0GS1BgKkqTGUJAkNYaCJKkxFCRJjaEgSWoMBUlSYyhIkhofcyFdwHwchs6VIwVJUjNQKCR5eZKPJ/lSksNJfjLJlUkOJHm8my7v678jydEkR5LcNHj5kqRhGnSk8AHg76rqh4AfBQ4D24GDVbUBONgtk+QaYAtwLbAJuDPJsgG3L0kaogWHQpIrgJ8BPgRQVd+uqq8Dm4E9Xbc9wC3d/GZgb1WdrKongKPAxoVuX5I0fIOMFH4AmAb+MsnDST6Y5GXA1VV1HKCbXtX1Xw0c61t/qmt7gSTbkkwmmZyenh6gREnSuRjk6qNLgNcAb6mq+5N8gO5Q0SwyQ1vN1LGqdgO7ASYmJmbsowvffK+ckTQ8g4wUpoCpqrq/W/44vZB4JskqgG56oq//2r711wBPD7B9SdKQLTgUquo/gWNJfrBruhF4DNgPbO3atgL3dPP7gS1JLkuyHtgAPLDQ7UuShm/Qm9feAtyV5EXAl4Ffoxc0+5LcBjwF3ApQVYeS7KMXHKeA26vq9IDblyQN0UChUFWPABMzvHTjLP13AjsH2aYk6fzxjmZJUmMoSJIaQ0GS1BgKkqTGUJAkNYaCJKkxFCRJjX95TYvK5xlJo82RgiSpMRQkSY2hIElqDAVJUmMoSJIaQ0GS1BgKkqTGUJAkNYaCJKkxFCRJjaEgSWoMBUlSYyhIkhpDQZLUGAqSpMZQkCQ1hoIkqTEUJEmNoSBJagwFSVJjKEiSmoFDIcmyJA8nubdbvjLJgSSPd9PlfX13JDma5EiSmwbdtiRpuIYxUngrcLhveTtwsKo2AAe7ZZJcA2wBrgU2AXcmWTaE7UuShmSgUEiyBrgZ+GBf82ZgTze/B7ilr31vVZ2sqieAo8DGQbYvSRquQUcK7wfeDnynr+3qqjoO0E2v6tpXA8f6+k11bS+QZFuSySST09PTA5YoSZqvSxa6YpLXASeq6sEkN8xnlRnaaqaOVbUb2A0wMTExYx9Jw7Nu+yfn1e/JXTef50q01BYcCsD1wOuTvBZ4MXBFko8CzyRZVVXHk6wCTnT9p4C1feuvAZ4eYPuSpCFb8OGjqtpRVWuqah29E8j/WFVvAPYDW7tuW4F7uvn9wJYklyVZD2wAHlhw5ZKkoRtkpDCbXcC+JLcBTwG3AlTVoST7gMeAU8DtVXX6PGxfkrRAQwmFqroPuK+b/y/gxln67QR2DmObkqTh845mSVJjKEiSGkNBktQYCpKkxlCQJDXn45JUSRco73y+8DlSkCQ1hoIkqTEUJEmNoSBJagwFSVJjKEiSGkNBktQYCpKkxlCQJDWGgiSpMRQkSY3PPtJQzPeZOJJGmyMFSVJjKEiSGkNBktQYCpKkxlCQJDWGgiSpMRQkSY2hIElqvHlNc/KmNOni4khBktQYCpKkZsGhkGRtks8kOZzkUJK3du1XJjmQ5PFuurxvnR1JjiY5kuSmYfwCkqThGWSkcAr4nar6YeA64PYk1wDbgYNVtQE42C3TvbYFuBbYBNyZZNkgxUuShmvBoVBVx6vqoW7+m8BhYDWwGdjTddsD3NLNbwb2VtXJqnoCOApsXOj2JUnDN5RzCknWAa8G7geurqrj0AsO4Kqu22rgWN9qU12bJGlEDBwKSS4HPgHcUVXfmKvrDG01y3tuSzKZZHJ6enrQEiVJ8zRQKCS5lF4g3FVVd3fNzyRZ1b2+CjjRtU8Ba/tWXwM8PdP7VtXuqpqoqomVK1cOUqIk6RwMcvVRgA8Bh6vqj/te2g9s7ea3Avf0tW9JclmS9cAG4IGFbl+SNHyD3NF8PfBG4AtJHuna3gHsAvYluQ14CrgVoKoOJdkHPEbvyqXbq+r0ANuXJA3ZgkOhqv6Vmc8TANw4yzo7gZ0L3aYk6fzyjmZJUmMoSJIaQ0GS1PjobElDN99Hrj+56+bzXInOlSMFSVJjKEiSGkNBktQYCpKkxlCQJDWGgiSpMRQkSY33KVyk5nsduaSLiyMFSVJjKEiSGg8fSVoyPg5j9DhSkCQ1hoIkqTEUJEmN5xQuMF5qKmkQjhQkSY0jBUkj71xGwF6pNBhHCpKkxlCQJDWGgiSp8ZzCmPCqIkmLwZGCJKlxpCDpguLzlAbjSEGS1BgKkqTGw0dLzBPI0tLwMNPMFn2kkGRTkiNJjibZvtjblyTNblFHCkmWAX8O/AIwBXw+yf6qemwx6zjf/PYvXTguthHFYh8+2ggcraovAyTZC2wGljQU/J+4pMUy6iGz2KGwGjjWtzwF/MSZnZJsA7Z1i99KcmSO91wBfG1oFS6eca0bxrf2ca0bxrf2ca0bzrH2vHu4Gx/g/Z6t+/sXsvJih0JmaKsXNFTtBnbP6w2TyaqaGLSwxTaudcP41j6udcP41j6udcP41j5o3Yt9onkKWNu3vAZ4epFrkCTNYrFD4fPAhiTrk7wI2ALsX+QaJEmzWNTDR1V1KslvAn8PLAM+XFWHBnzbeR1mGkHjWjeMb+3jWjeMb+3jWjeMb+0D1Z2qFxzSlyRdpHzMhSSpMRQkSc1Ih0KStUk+k+RwkkNJ3tq1vyrJ55I8kmQyyca+dXZ0j9A4kuSmJaz9xUkeSPJoV/sfdu1XJjmQ5PFuunyUap+j7vcm+VKSf0vyt0lePkp1z1V73+u/m6SSrOhrW/La56o7yVu62g4lec841D0O+2dfPcuSPJzk3m55pPfPvlrOrHt4+2dVjewPsAp4TTf/3cC/A9cAnwZ+qWt/LXBfN38N8ChwGbAe+A9g2RLVHuDybv5S4H7gOuA9wPaufTvw7lGqfY66fxG4pGt/96jVPVft3fJaehc4fAVYMUq1z/GZ/yzwD8Bl3WtXjUndI79/9v0Ovw38NXBvtzzS++ccdQ9t/xzpkUJVHa+qh7r5bwKH6d0VXcAVXbfv4bl7HTYDe6vqZFU9ARyl92iNRVc93+oWL+1+qqtxT9e+B7ilmx+J2meru6o+XVWnuvbP0bvHBEakbpjzMwf4E+DtPP9myZGofY663wzsqqqTXb8TXZ9Rr3vk90+AJGuAm4EP9jWP9P4JM9c9zP1zpEOhX5J1wKvpfRu5A3hvkmPA+4AdXbeZHqOxevGqfL5uiPcIcAI4UFX3A1dX1XHohR5wVdd9ZGqfpe5+vw58qpsfmbph5tqTvB74alU9ekb3kal9ls/8FcBPJ7k/yT8l+fGu+6jXfQdjsH8C76f3ReE7fW0jv38yc939Bto/xyIUklwOfAK4o6q+Qe8b1Nuqai3wNuBDz3adYfUlu+a2qk5X1avopfbGJK+co/vI1D5X3UneCZwC7nq2aaa3OO9FzmKG2n8EeCfwBzN0H5naZ/nMLwGW0zsk83vAviRh9Ose+f0zyeuAE1X14HxXmaFt0Ws/W93D2D9HPhSSXEovEO6qqru75q3As/N/w3PDoZF8jEZVfR24D9gEPJNkFUA3ffaQwMjVfkbdJNkKvA74leoOWDKCdcPzat9M71jqo0mepFffQ0m+lxGs/YzPfAq4uztM8wC9b4YrGP26x2H/vB54ffdvYi/wc0k+yujvn7PVPbz9czFPjpzrD72U+wjw/jPaDwM3dPM3Ag9289fy/JMqX2bpTnquBF7ezb8E+JfuP9h7ef6JrPeMUu1z1L2J3iPOV57RfyTqnqv2M/o8yXMnmkei9jk+8zcBf9S1v4LeYYCMQd0jv3+e8XvcwHMnbEd6/5yj7qHtn6P+5zivB94IfKE7bgnwDuA3gA8kuQT4X7rHbFfVoST76H04p4Dbq+r0olfdswrYk94fFvouYF9V3Zvks/QOA9wGPAXcOmK1z1b3UXr/sA70jmDwuap60wjVPWvts3Ueodpn+8xfBHw4yReBbwNbq7enj3rdX2f098/Z7GK098/Z/BlD2j99zIUkqRn5cwqSpMVjKEiSGkNBktQYCpKkxlCQJDWGgiSpMRQkSc3/A6oLIzdOXScHAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] @@ -207,7 +394,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 123, "metadata": {}, "outputs": [], "source": [ @@ -216,7 +403,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 124, "metadata": {}, "outputs": [ { @@ -254,6 +441,46 @@ "Create an alternative hypothesis and experiment to address question 3 above. How can we test if a dice rolls 6s more frequently?" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Answers:\n", + "\n", + "##### Exercises\n", + "We reject the null hypothesis at the 5% level since our p value is less than 0.05\n", + "\n", + "If the null hypothesis were true and the die is not loaded, then we would see this result in 0.0831% of cases\n", + "\n", + "We cannot say with certainty that the die is weighted in such a way to roll more sixes and thus less ones (given opposite sides of a die summing to 7, and assuming that apart from being weighted that this is a standard die) however this would be the most likely case. With averages from 4 to 4.2, the die functions very similar to a balanced die that cannot roll a 1. However, without observing the actual rolls we cannot say that 6 appears more frequently as the die could be weighted to roll more fives.\n", + "\n", + "scipy.stats.ttest_ind is used for determining whether two independent samples have the same mean:\n", + "\n", + "Should be used for two arraytype datasets\n", + "\n", + "scipy.stats.ttest_1samp is used to calculate the T score for a sample against a given mean:\n", + "\n", + "Used as above, when we have an arraytype dataset that we want to test against an expected average.\n", + "\n", + "scipy.stats.ttest_ind_from_stats is a T test for two datasets where the null is that they have the \"identical average values\":\n", + "\n", + "Used if we have the means variances and sizes of two datasets and we want to deteremine through statistical testing whether they have signifcant difference in their means.\n", + "\n", + "scipy.stats.ttest_rel same as scipy.stats.ttest_ind but for related or repeated sample:\n", + "\n", + "Used if the two array datasets are related or were sampled from the same population.\n", + "\n", + "##### Extended Exercise\n", + "\n", + "H0: Pr(rolling a 6) = 1/6\n", + "HA: Pr(rolling a 6) > 1/6\n", + "\n", + "These are our null and alternate hypotheses, at this point the die would be roll some number of times, above uses 100 die results however rolling 1000 times would not be too difficult (though it would be time consuming). If the die is weighted/unfair then the greater number of rolls will improve the accuracy of our result.\n", + "We can then generate a binary counter for whether a six is observed (as well as the other numbers generated as it would be just as interesting to see if the other numbers have equal chances of being rolled or if they were also imbalanced between themselves). Thus our data would be an array of 0s indicating rolls of 1, 2, 3, 4 or 5 and 1s indicating 6s. \n", + "\n", + "We can then use scipy.stats.ttest_1samp (we can include in the functions parameters that the test is one sided as we are testing to see of the 1000 identifier values are greater than 1/6.) This will produce the T score and thus the statistical likelihood that 6s are appearing the expected number of times or greater than average" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -315,6 +542,21 @@ "2. Does our finding hold after adjusting? The solution uses one specific method of fixing the thresholds - if you choose another, then you may get another answer." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Answers:\n", + "\n", + "The errors made here are similar to the jellybean case at the start of this Module however the one of the biggest issues here is we should not immediately start trading. It very well may be the case that the IBM stock price falls on a Thursday, however we should try to find some cause of this relation. We can test this hypothesis however without any backing of why the stock prices drop on Thursday, this information probably would not help us at all.\n", + "\n", + "Multiple comparisons problem occurs when one makes too make simultaneous statistical inferences (here being the five alternate hypotheses of \"stock prices are more likely to drop on xxxxxday.) The more simultaneous inferences, the greater the probability that one of them will randomly be significant.\n", + "\n", + "The best way to correct for the multiple comparisons problem is to use Bonferroni correction AKA multiple comparisons correction. This is simply taking our desired level of significance and distributing it to all the cases. Here since we are unsure which of the days would produce a result statistically relevant we should divide our p threshold by 5, giving us a 0.01 critical value. Thus, we observe that all five days are not statistically significant at the 0.01 and the alternate hypothesis should be rejected.\n", + "\n", + "We may also use the Sidak correction method, the formula for which is alpha_new = 1- (1- alpha_old)^(1/no. of test) which gives a new critical value of 0.0102. Using this correction method, we still reject all alternate hypotheses." + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -453,6 +695,218 @@ "The values can be computed via simulation - that is, draw many random samples, and compute the likelihood of getting a t value at least that high between them." ] }, + { + "cell_type": "code", + "execution_count": 151, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 275, + "metadata": {}, + "outputs": [], + "source": [ + "def student(set1,set2):\n", + " # Numerator\n", + " num = np.mean(set1)-np.mean(set2)\n", + " \n", + " # Errors\n", + " err1 = np.std(set1)/np.sqrt(len(set1))\n", + " err2 = np.std(set2)/np.sqrt(len(set2))\n", + " # Denominator\n", + " den = np.sqrt(err1*err1 + err2*err2)\n", + " \n", + " # Result\n", + " result = num/den\n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": 281, + "metadata": {}, + "outputs": [], + "source": [ + "def sim(size):\n", + " size1 = np.random.randint(2,size)\n", + " size2 = size + 2 - size1\n", + " \n", + " _set1 = np.random.randn(size1)\n", + " _set2 = np.random.randn(size2)\n", + " \n", + " return student(_set1, _set2)" + ] + }, + { + "cell_type": "code", + "execution_count": 282, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{0.01: {5: -4.347020181122642,\n", + " 10: -3.7534309545942137,\n", + " 20: -3.5200182854273523,\n", + " 50: -3.0243717659727767,\n", + " 100: -2.702060412618033},\n", + " 0.025: {5: -3.3298051168147835,\n", + " 10: -2.8786338725864695,\n", + " 20: -2.63852437222008,\n", + " 50: -2.400234898219118,\n", + " 100: -2.2272163133976433},\n", + " 0.05: {5: -2.620179796328343,\n", + " 10: -2.2960005832119927,\n", + " 20: -2.0850939244538655,\n", + " 50: -1.8852402698377235,\n", + " 100: -1.7568449695517905},\n", + " 0.1: {5: -1.8701224898632154,\n", + " 10: -1.7021257051140424,\n", + " 20: -1.5594022663945055,\n", + " 50: -1.4087288445729815,\n", + " 100: -1.3748027359994346},\n", + " 0.25: {5: -0.9398256728031857,\n", + " 10: -0.8445400393685317,\n", + " 20: -0.7769662691429423,\n", + " 50: -0.6896809753435514,\n", + " 100: -0.6958804168769764},\n", + " 0.4: {5: -0.34111838108722703,\n", + " 10: -0.33537928225777236,\n", + " 20: -0.30208453840077504,\n", + " 50: -0.23066449107249015,\n", + " 100: -0.26143571458222453}}" + ] + }, + "execution_count": 282, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "numsims = 10000\n", + "data = {0.01:{}, 0.025:{}, 0.05:{}, 0.1:{}, 0.25:{}, 0.4:{}}\n", + "for i in [5, 10, 20, 50, 100]:\n", + " results = []\n", + " for j in range(numsims):\n", + " results.append(sim(i))\n", + " results.sort()\n", + " # After ordering, the largest value in the smallest 1% of data is at position numsims * 0.01\n", + " # This is true for all other p values we are testing, 5%, 10% and 20% = numsims* their % thus:\n", + " for k in [0.01, 0.025, 0.05, 0.1, 0.25, 0.4]:\n", + " index = int(k*numsims)\n", + " data[k][i] = results[index]\n", + "\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 283, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0.0100.0250.0500.1000.2500.400
5-4.347020-3.329805-2.620180-1.870122-0.939826-0.341118
10-3.753431-2.878634-2.296001-1.702126-0.844540-0.335379
20-3.520018-2.638524-2.085094-1.559402-0.776966-0.302085
50-3.024372-2.400235-1.885240-1.408729-0.689681-0.230664
100-2.702060-2.227216-1.756845-1.374803-0.695880-0.261436
\n", + "
" + ], + "text/plain": [ + " 0.010 0.025 0.050 0.100 0.250 0.400\n", + "5 -4.347020 -3.329805 -2.620180 -1.870122 -0.939826 -0.341118\n", + "10 -3.753431 -2.878634 -2.296001 -1.702126 -0.844540 -0.335379\n", + "20 -3.520018 -2.638524 -2.085094 -1.559402 -0.776966 -0.302085\n", + "50 -3.024372 -2.400235 -1.885240 -1.408729 -0.689681 -0.230664\n", + "100 -2.702060 -2.227216 -1.756845 -1.374803 -0.695880 -0.261436" + ] + }, + "execution_count": 283, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r = pd.DataFrame(data)\n", + "r" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -477,7 +931,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.1" + "version": "3.8.8" } }, "nbformat": 4,