- 7th Sep 2021
- 06:03 am

{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from matplotlib import pyplot as plt" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "def BayesianScenario(A, B, A_B, split, n):\n", " n1 = round(n*A)\n", " n2 = round(n*B)\n", " n12 = round(n2*A_B)\n", " vector_A = np.concatenate((np.ones(n12), np.ones(n1 - n12),np.zeros(n-n1)), axis=None)\n", " vector_B = np.concatenate((np.ones(n12), np.zeros(n-n2), np.ones(n2 - n12)), axis=None)\n", " A_train, A_test, B_train, B_test = train_test_split(vector_A, vector_B, test_size=(1-split))\n", " predicted_A = sum(A_train)/len(A_train)\n", " observed_A = sum(A_test)/len(A_test)\n", " predicted_B = sum(B_train)/len(B_train)\n", " observed_B = sum(B_test)/len(B_test)\n", " predicted_A_B = sum(A_train*B_train)/sum(B_train)\n", " observed_A_B = sum(A_test*B_test)/sum(B_test)\n", " predicted_B_A = sum(A_train*B_train)/sum(A_train)\n", " observed_B_A = sum(A_test*B_test)/sum(A_test)\n", " p_B_A = A_B*B/A\n", " print(\"Predicted A = \", round(predicted_A, 4), \"\\t Observed A = \", round(observed_A,4))\n", " print(\"Predicted B = \", round(predicted_B, 4), \"\\t Observed B = \", round(observed_B,4))\n", " print(\"Predicted A|B = \", round(predicted_A_B, 4), \"\\t Observed A|B = \", round(observed_A_B,4))\n", " print(\"Predicted B|A = \", round(predicted_B_A, 4), \"\\t Observed B|A = \", round(observed_B_A,4))\n", " print(\"Observed Error\")\n", " print(\"Percentage Error A = \", round(100*(A-observed_A)/A, 4))\n", " print(\"Percentage Error B = \", round(100*(B-observed_B)/B, 4))\n", " print(\"Percentage Error A|B = \", round(100*(A_B-observed_A_B)/A_B, 4))\n", " print(\"Percentage Error B|A = \", round(100*(p_B_A-observed_B_A)/p_B_A, 4))\n", " print(\"True Error\")\n", " print(\"Percentage Error A = \", round(100*(A-observed_A)/observed_A, 4))\n", " print(\"Percentage Error B = \", round(100*(B-observed_B)/observed_B, 4))\n", " print(\"Percentage Error A|B = \", round(100*(A_B-observed_A_B)/observed_A_B, 4))\n", " print(\"Percentage Error B|A = \", round(100*(p_B_A-observed_B_A)/observed_B_A, 4))\n", " return vector_A, vector_B, A_train, A_test, B_train, B_test" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Predicted A = 0.505 \t Observed A = 0.48\n", "Predicted B = 0.41 \t Observed B = 0.36\n", "Predicted A|B = 0.6128 \t Observed A|B = 0.5417\n", "Predicted B|A = 0.4975 \t Observed B|A = 0.4062\n", "Observed Error\n", "Percentage Error A = 4.0\n", "Percentage Error B = 10.0\n", "Percentage Error A|B = 9.7222\n", "Percentage Error B|A = 15.3646\n", "True Error\n", "Percentage Error A = 4.1667\n", "Percentage Error B = 11.1111\n", "Percentage Error A|B = 10.7692\n", "Percentage Error B|A = 18.1538\n" ] } ], "source": [ "A = 0.5\n", "B = 0.4\n", "A_B = 0.6\n", "split = 0.8\n", "n = 1000\n", "hey = BayesianScenario(A, B, A_B, split, n)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }