import os

import numpy as np
import pandas as pd
import pyrsm as rsm
import statsmodels.formula.api as smf
from statsmodels.genmod.families import Binomial
from statsmodels.genmod.families.links import logit


# worthwhile to check the working directory used
os.getcwd()

'/home/jovyan/Desktop/MGTA455/rsm-mgta455-bbb-nptb'


# loading data
bbb_nptb = pd.read_pickle("data/bbb_nptb.pkl")

bbb_nptb


bbb_nptb["buyer_yes"] = (bbb_nptb["buyer"] == "yes").astype(int)
lr = smf.glm(
    formula="buyer_yes ~ offer + gender + last + total + child + \
             youth + cook + do_it + reference + art + geog",
    family=Binomial(link=logit()),
    data=bbb_nptb,
).fit()
# lr.summary()
# rsm.model_fit(lr)
rsm.or_ci(lr)


bbb_nptb["p_art"] = lr.predict(bbb_nptb.assign(offer="Art"))
bbb_nptb["p_diy"] = lr.predict(bbb_nptb.assign(offer="DIY"))
bbb_nptb["p_cook"] = lr.predict(bbb_nptb.assign(offer="Cook"))
bbb_nptb


bbb_nptb["to_offer"] = (
    bbb_nptb[["p_art", "p_diy", "p_cook"]]
    .idxmax(axis=1)
    .str.replace("p_art", "Art")
    .replace("p_diy", "DIY")
    .replace("p_cook", "Cook")
)
bbb_nptb


bbb_nptb["p_target"] = bbb_nptb[["p_art", "p_diy", "p_cook"]].max(axis=1)


bbb_nptb


pd.crosstab(index=bbb_nptb["to_offer"], columns="count").apply(rsm.format_nr)


lri = smf.glm(
    formula="buyer_yes ~ offer + gender + last + total + child + \
             youth + cook + do_it + reference + art + geog + \
             offer:gender + offer:last + offer:total + offer:child + \
             offer:youth + offer:cook + offer:do_it + offer:reference + \
             offer:art + offer:geog",
    family=Binomial(link=logit()),
    data=bbb_nptb,
).fit()
rsm.or_ci(lri)


bbb_nptb["p_arti"] = lri.predict(bbb_nptb.assign(offer="Art"))
bbb_nptb["p_diyi"] = lri.predict(bbb_nptb.assign(offer="DIY"))
bbb_nptb["p_cooki"] = lri.predict(bbb_nptb.assign(offer="Cook"))


bbb_nptb["to_offeri"] = (
    bbb_nptb[["p_arti", "p_diyi", "p_cooki"]]
    .idxmax(axis=1)
    .str.replace("p_arti", "Art")
    .replace("p_diyi", "DIY")
    .replace("p_cooki", "Cook")
)
bbb_nptb


bbb_nptb["p_targeti"] = bbb_nptb[["p_arti", "p_diyi", "p_cooki"]].max(axis=1)


pd.crosstab(index=bbb_nptb["to_offeri"], columns="count").apply(rsm.format_nr)


bbb_nptb[["p_arti", "p_diyi", "p_cooki", "p_targeti"]].agg("mean").sort_values(
    ascending=False
).apply(rsm.format_nr, perc=True)

p_targeti     22.5%
p_diyi        13.0%
p_cooki      10.83%
p_arti        8.99%
dtype: object


bbb_nptb["ep_art"] = bbb_nptb["p_arti"] * 6
bbb_nptb["ep_diy"] = bbb_nptb["p_diyi"] * 4
bbb_nptb["ep_cook"] = bbb_nptb["p_cooki"] * 7


bbb_nptb["to_offer_ep"] = (
    bbb_nptb[["ep_art", "ep_diy", "ep_cook"]]
    .idxmax(axis=1)
    .str.replace("ep_art", "Art")
    .replace("ep_diy", "DIY")
    .replace("ep_cook", "Cook")
)


bbb_nptb["ep_target"] = bbb_nptb[["ep_art", "ep_diy", "ep_cook"]].max(axis=1)
bbb_nptb


pd.crosstab(index=bbb_nptb["to_offer_ep"], columns="count").applymap(rsm.format_nr)


(
    bbb_nptb[["ep_art", "ep_diy", "ep_cook", "ep_target"]]
    .agg("mean")
    .sort_values(ascending=False)
    .apply(rsm.format_nr, sym="$")
)

ep_target    $1.26
ep_cook      $0.76
ep_art       $0.54
ep_diy       $0.52
dtype: object


profit_logit = bbb_nptb["ep_target"].agg("mean") * 520000
print(f"Expected profit from offer customization: ${rsm.format_nr(profit_logit)}")

Expected profit from offer customization: $654,514.7

	acctnum	offer	buyer	gender	state	zip	zip3	first	last	book	nonbook	total	purch	child	youth	cook	do_it	reference	art	geog
0	10001	Art	no	M	MD	21229	212	37	35	29	117	146	2	0	0	1	0	0	0	1
1	10002	Art	no	M	PA	18212	182	29	13	45	291	336	4	1	2	0	0	0	1	0
2	10003	Cook	no	F	NJ	07029	070	23	15	27	84	111	2	0	0	1	0	1	0	0
3	10004	Cook	no	M	NY	11354	113	15	9	29	59	88	2	0	0	1	0	0	0	1
4	10005	DIY	no	M	NY	11733	117	9	7	27	193	220	2	0	0	1	0	1	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
39995	49996	NaN	NaN	M	PA	19335	193	47	21	90	207	297	8	3	1	1	0	1	0	2
39996	49997	NaN	NaN	F	MD	20653	206	11	9	26	239	265	2	0	1	0	1	0	0	0
39997	49998	NaN	NaN	M	PA	19087	190	19	13	29	130	159	2	0	0	1	0	0	0	1
39998	49999	NaN	NaN	M	NJ	08648	086	25	5	100	48	148	9	3	1	1	1	0	3	0
39999	50000	NaN	NaN	F	PA	19047	190	49	17	105	256	361	9	4	1	1	0	0	2	1

	index	OR	OR%	2.5%	97.5%	p.values
1	offer[T.DIY]	1.546	54.644%	1.407	1.700	< .001	***
2	offer[T.Cook]	1.239	23.903%	1.124	1.366	< .001	***
3	gender[T.M]	0.701	-29.872%	0.646	0.761	< .001	***
4	last	0.917	-8.272%	0.912	0.923	< .001	***
5	total	1.001	0.085%	1.000	1.001	< .001	***
6	child	0.978	-2.223%	0.944	1.013	0.207
7	youth	1.146	14.63%	1.089	1.206	< .001	***
8	cook	1.223	22.28%	1.185	1.261	< .001	***
9	do_it	1.104	10.365%	1.053	1.157	< .001	***
10	reference	1.264	26.365%	1.195	1.336	< .001	***
11	art	1.439	43.852%	1.370	1.511	< .001	***
12	geog	1.251	25.07%	1.200	1.303	< .001	***

	acctnum	offer	buyer	gender	state	zip	zip3	first	last	book	...	youth	cook	do_it	reference	art	geog	buyer_yes	p_art	p_diy	p_cook
0	10001	Art	no	M	MD	21229	212	37	35	29	...	0	1	0	0	0	1	0	0.006855	0.010562	0.008480
1	10002	Art	no	M	PA	18212	182	29	13	45	...	2	0	0	0	1	0	0	0.061482	0.091988	0.075074
2	10003	Cook	no	F	NJ	07029	070	23	15	27	...	0	1	0	1	0	0	0	0.051485	0.077440	0.063016
3	10004	Cook	no	M	NY	11354	113	15	9	29	...	0	1	0	0	0	1	0	0.058402	0.087522	0.071365
4	10005	DIY	no	M	NY	11733	117	9	7	27	...	0	1	0	1	0	0	0	0.076909	0.114138	0.093572
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
39995	49996	NaN	NaN	M	PA	19335	193	47	21	90	...	1	1	0	1	0	2	0	0.042615	0.064402	0.052269
39996	49997	NaN	NaN	F	MD	20653	206	11	9	26	...	1	0	1	0	0	0	0	0.078370	0.116217	0.095317
39997	49998	NaN	NaN	M	PA	19087	190	19	13	29	...	0	1	0	0	0	1	0	0.044562	0.067274	0.054632
39998	49999	NaN	NaN	M	NJ	08648	086	25	5	100	...	1	1	1	0	3	0	0	0.206029	0.286372	0.243295
39999	50000	NaN	NaN	F	PA	19047	190	49	17	105	...	1	1	0	0	2	1	0	0.108094	0.157838	0.130558

	acctnum	offer	buyer	gender	state	zip	zip3	first	last	book	...	cook	do_it	reference	art	geog	buyer_yes	p_art	p_diy	p_cook	to_offer
0	10001	Art	no	M	MD	21229	212	37	35	29	...	1	0	0	0	1	0	0.006855	0.010562	0.008480	DIY
1	10002	Art	no	M	PA	18212	182	29	13	45	...	0	0	0	1	0	0	0.061482	0.091988	0.075074	DIY
2	10003	Cook	no	F	NJ	07029	070	23	15	27	...	1	0	1	0	0	0	0.051485	0.077440	0.063016	DIY
3	10004	Cook	no	M	NY	11354	113	15	9	29	...	1	0	0	0	1	0	0.058402	0.087522	0.071365	DIY
4	10005	DIY	no	M	NY	11733	117	9	7	27	...	1	0	1	0	0	0	0.076909	0.114138	0.093572	DIY
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
39995	49996	NaN	NaN	M	PA	19335	193	47	21	90	...	1	0	1	0	2	0	0.042615	0.064402	0.052269	DIY
39996	49997	NaN	NaN	F	MD	20653	206	11	9	26	...	0	1	0	0	0	0	0.078370	0.116217	0.095317	DIY
39997	49998	NaN	NaN	M	PA	19087	190	19	13	29	...	1	0	0	0	1	0	0.044562	0.067274	0.054632	DIY
39998	49999	NaN	NaN	M	NJ	08648	086	25	5	100	...	1	1	0	3	0	0	0.206029	0.286372	0.243295	DIY
39999	50000	NaN	NaN	F	PA	19047	190	49	17	105	...	1	0	0	2	1	0	0.108094	0.157838	0.130558	DIY

	acctnum	offer	buyer	gender	state	zip	zip3	first	last	book	...	do_it	reference	art	geog	buyer_yes	p_art	p_diy	p_cook	to_offer	p_target
0	10001	Art	no	M	MD	21229	212	37	35	29	...	0	0	0	1	0	0.006855	0.010562	0.008480	DIY	0.010562
1	10002	Art	no	M	PA	18212	182	29	13	45	...	0	0	1	0	0	0.061482	0.091988	0.075074	DIY	0.091988
2	10003	Cook	no	F	NJ	07029	070	23	15	27	...	0	1	0	0	0	0.051485	0.077440	0.063016	DIY	0.077440
3	10004	Cook	no	M	NY	11354	113	15	9	29	...	0	0	0	1	0	0.058402	0.087522	0.071365	DIY	0.087522
4	10005	DIY	no	M	NY	11733	117	9	7	27	...	0	1	0	0	0	0.076909	0.114138	0.093572	DIY	0.114138
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
39995	49996	NaN	NaN	M	PA	19335	193	47	21	90	...	0	1	0	2	0	0.042615	0.064402	0.052269	DIY	0.064402
39996	49997	NaN	NaN	F	MD	20653	206	11	9	26	...	1	0	0	0	0	0.078370	0.116217	0.095317	DIY	0.116217
39997	49998	NaN	NaN	M	PA	19087	190	19	13	29	...	0	0	0	1	0	0.044562	0.067274	0.054632	DIY	0.067274
39998	49999	NaN	NaN	M	NJ	08648	086	25	5	100	...	1	0	3	0	0	0.206029	0.286372	0.243295	DIY	0.286372
39999	50000	NaN	NaN	F	PA	19047	190	49	17	105	...	0	0	2	1	0	0.108094	0.157838	0.130558	DIY	0.157838

A Next Product To Buy (NPTB) model for BBB¶

Run a logistic regression¶

Create predictions¶

Accounting for profitability¶

	index	OR	OR%	2.5%	97.5%	p.values
1	offer[T.DIY]	3.164	216.429%	2.314	4.327	< .001	***
2	offer[T.Cook]	0.045	-95.535%	0.031	0.065	< .001	***
3	gender[T.M]	0.471	-52.881%	0.403	0.551	< .001	***
4	offer[T.DIY]:gender[T.M]	0.730	-27.029%	0.588	0.905	0.004	**
5	offer[T.Cook]:gender[T.M]	5.859	485.894%	4.547	7.549	< .001	***
6	last	0.901	-9.88%	0.890	0.913	< .001	***
7	offer[T.DIY]:last	0.916	-8.39%	0.899	0.934	< .001	***
8	offer[T.Cook]:last	1.055	5.497%	1.038	1.073	< .001	***
9	total	1.001	0.091%	1.000	1.002	0.039	*
10	offer[T.DIY]:total	1.000	-0.013%	0.999	1.001	0.829
11	offer[T.Cook]:total	1.001	0.072%	0.999	1.002	0.269
12	child	0.843	-15.668%	0.780	0.911	< .001	***
13	offer[T.DIY]:child	0.705	-29.464%	0.630	0.789	< .001	***
14	offer[T.Cook]:child	1.842	84.185%	1.663	2.040	< .001	***
15	youth	0.841	-15.919%	0.748	0.945	0.004	**
16	offer[T.DIY]:youth	1.404	40.41%	1.201	1.642	< .001	***
17	offer[T.Cook]:youth	1.702	70.192%	1.458	1.987	< .001	***
18	cook	0.776	-22.393%	0.720	0.836	< .001	***
19	offer[T.DIY]:cook	0.667	-33.297%	0.597	0.745	< .001	***
20	offer[T.Cook]:cook	4.548	354.799%	4.090	5.057	< .001	***
21	do_it	0.544	-45.594%	0.483	0.613	< .001	***
22	offer[T.DIY]:do_it	5.847	484.684%	5.013	6.820	< .001	***
23	offer[T.Cook]:do_it	1.211	21.105%	1.025	1.431	0.024	*
24	reference	1.452	45.162%	1.297	1.624	< .001	***
25	offer[T.DIY]:reference	1.050	5.029%	0.895	1.232	0.548
26	offer[T.Cook]:reference	0.704	-29.604%	0.598	0.829	< .001	***
27	art	3.080	208.008%	2.798	3.390	< .001	***
28	offer[T.DIY]:art	0.422	-57.769%	0.367	0.486	< .001	***
29	offer[T.Cook]:art	0.258	-74.236%	0.222	0.299	< .001	***
30	geog	1.782	78.202%	1.640	1.937	< .001	***
31	offer[T.DIY]:geog	1.238	23.753%	1.099	1.394	< .001	***
32	offer[T.Cook]:geog	0.313	-68.704%	0.274	0.357	< .001	***

	acctnum	offer	buyer	gender	state	zip	zip3	first	last	book	...	p_arti	p_diyi	p_cooki	to_offeri	p_targeti	ep_art	ep_diy	ep_cook	to_offer_ep	ep_target
0	10001	Art	no	M	MD	21229	212	37	35	29	...	0.004341	0.000379	0.011592	Cook	0.011592	0.026048	0.001518	0.081146	Cook	0.081146
1	10002	Art	no	M	PA	18212	182	29	13	45	...	0.063494	0.027393	0.058529	Art	0.063494	0.380965	0.109571	0.409706	Cook	0.409706
2	10003	Cook	no	F	NJ	07029	070	23	15	27	...	0.055256	0.033189	0.019800	Art	0.055256	0.331538	0.132754	0.138601	Art	0.331538
3	10004	Cook	no	M	NY	11354	113	15	9	29	...	0.058249	0.050300	0.038185	Art	0.058249	0.349496	0.201199	0.267298	Art	0.349496
4	10005	DIY	no	M	NY	11733	117	9	7	27	...	0.065339	0.056160	0.090641	Cook	0.090641	0.392032	0.224638	0.634486	Cook	0.634486
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
39995	49996	NaN	NaN	M	PA	19335	193	47	21	90	...	0.027214	0.005189	0.084967	Cook	0.084967	0.163286	0.020757	0.594766	Cook	0.594766
39996	49997	NaN	NaN	F	MD	20653	206	11	9	26	...	0.048489	0.367550	0.009093	DIY	0.367550	0.290936	1.470198	0.063653	DIY	1.470198
39997	49998	NaN	NaN	M	PA	19087	190	19	13	29	...	0.041691	0.025337	0.035115	Art	0.041691	0.250147	0.101350	0.245807	Art	0.250147
39998	49999	NaN	NaN	M	NJ	08648	086	25	5	100	...	0.308147	0.086098	0.144968	Art	0.308147	1.848881	0.344392	1.014776	Art	1.848881
39999	50000	NaN	NaN	F	PA	19047	190	49	17	105	...	0.227788	0.010167	0.072679	Art	0.227788	1.366730	0.040669	0.508755	Art	1.366730

col_0	count
to_offer
DIY	40,000

col_0	count
to_offeri
Art	10,253
Cook	15,486
DIY	14,261

col_0	count
to_offer_ep
Art	13,067
Cook	17,615
DIY	9,318