{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"from matplotlib import pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"def BayesianScenario(A, B, A_B, split, n):\n",
" n1 = round(n*A)\n",
" n2 = round(n*B)\n",
" n12 = round(n2*A_B)\n",
" vector_A = np.concatenate((np.ones(n12), np.ones(n1 - n12),np.zeros(n-n1)), axis=None)\n",
" vector_B = np.concatenate((np.ones(n12), np.zeros(n-n2), np.ones(n2 - n12)), axis=None)\n",
" A_train, A_test, B_train, B_test = train_test_split(vector_A, vector_B, test_size=(1-split))\n",
" predicted_A = sum(A_train)/len(A_train)\n",
" observed_A = sum(A_test)/len(A_test)\n",
" predicted_B = sum(B_train)/len(B_train)\n",
" observed_B = sum(B_test)/len(B_test)\n",
" predicted_A_B = sum(A_train*B_train)/sum(B_train)\n",
" observed_A_B = sum(A_test*B_test)/sum(B_test)\n",
" predicted_B_A = sum(A_train*B_train)/sum(A_train)\n",
" observed_B_A = sum(A_test*B_test)/sum(A_test)\n",
" p_B_A = A_B*B/A\n",
" print(\"Predicted A = \", round(predicted_A, 4), \"\\t Observed A = \", round(observed_A,4))\n",
" print(\"Predicted B = \", round(predicted_B, 4), \"\\t Observed B = \", round(observed_B,4))\n",
" print(\"Predicted A|B = \", round(predicted_A_B, 4), \"\\t Observed A|B = \", round(observed_A_B,4))\n",
" print(\"Predicted B|A = \", round(predicted_B_A, 4), \"\\t Observed B|A = \", round(observed_B_A,4))\n",
" print(\"Observed Error\")\n",
" print(\"Percentage Error A = \", round(100*(A-observed_A)/A, 4))\n",
" print(\"Percentage Error B = \", round(100*(B-observed_B)/B, 4))\n",
" print(\"Percentage Error A|B = \", round(100*(A_B-observed_A_B)/A_B, 4))\n",
" print(\"Percentage Error B|A = \", round(100*(p_B_A-observed_B_A)/p_B_A, 4))\n",
" print(\"True Error\")\n",
" print(\"Percentage Error A = \", round(100*(A-observed_A)/observed_A, 4))\n",
" print(\"Percentage Error B = \", round(100*(B-observed_B)/observed_B, 4))\n",
" print(\"Percentage Error A|B = \", round(100*(A_B-observed_A_B)/observed_A_B, 4))\n",
" print(\"Percentage Error B|A = \", round(100*(p_B_A-observed_B_A)/observed_B_A, 4))\n",
" return vector_A, vector_B, A_train, A_test, B_train, B_test"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Predicted A = 0.505 \t Observed A = 0.48\n",
"Predicted B = 0.41 \t Observed B = 0.36\n",
"Predicted A|B = 0.6128 \t Observed A|B = 0.5417\n",
"Predicted B|A = 0.4975 \t Observed B|A = 0.4062\n",
"Observed Error\n",
"Percentage Error A = 4.0\n",
"Percentage Error B = 10.0\n",
"Percentage Error A|B = 9.7222\n",
"Percentage Error B|A = 15.3646\n",
"True Error\n",
"Percentage Error A = 4.1667\n",
"Percentage Error B = 11.1111\n",
"Percentage Error A|B = 10.7692\n",
"Percentage Error B|A = 18.1538\n"
]
}
],
"source": [
"A = 0.5\n",
"B = 0.4\n",
"A_B = 0.6\n",
"split = 0.8\n",
"n = 1000\n",
"hey = BayesianScenario(A, B, A_B, split, n)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}