检查VIF

[73]:
get_vif(data_train_woe,data_train_woe.columns,return_data=True)
[73]:
(2.97,
     variables  VIF
 0          ID 1.02
 1   LIMIT_BAL 1.48
 2         SEX 1.02
 3   EDUCATION 1.12
 4    MARRIAGE 1.06
 5         AGE 1.03
 6       PAY_0 1.95
 7       PAY_2 2.91
 8       PAY_3 2.73
 9       PAY_4 2.78
 10      PAY_5 2.97
 11      PAY_6 2.17
 12  BILL_AMT1 1.94
 13  BILL_AMT2 1.90
 14  BILL_AMT3 2.10
 15  BILL_AMT4 1.56
 16  BILL_AMT5 2.30
 17  BILL_AMT6 1.93
 18   PAY_AMT1 1.90
 19   PAY_AMT2 1.89
 20   PAY_AMT3 1.91
 21   PAY_AMT4 2.07
 22   PAY_AMT5 1.82
 23   PAY_AMT6 1.68)
[74]:
# 查看评分卡模型的参数
print('a:',a,'','b:',b)
a: 508.4396430011438  b: 72.13475204444818

预测模型分数

[75]:
# 预测训练集模型分数
data_train_score = get_predict_score(data_train,scorecard)
data_train_score
[75]:
PAY_0 PAY_0_Score PAY_AMT1 PAY_AMT1_Score LIMIT_BAL LIMIT_BAL_Score PAY_AMT2 PAY_AMT2_Score PAY_AMT3 PAY_AMT3_Score PAY_AMT4 PAY_AMT4_Score PAY_AMT5 PAY_AMT5_Score PAY_AMT6 PAY_AMT6_Score y Score Proba
0 -1 28.00 1671.00 0.00 120000.00 -6.00 380.00 -0.00 131062.00 14.00 2000.00 2.00 3000.00 3.00 3000.00 1.00 0 642.00 0.14
1 -2 28.00 1468.00 0.00 200000.00 14.00 2321.00 -0.00 163597.00 14.00 6680.00 5.00 3963.00 3.00 2514.00 1.00 0 665.00 0.10
2 0 43.00 4038.00 0.00 80000.00 -6.00 3199.00 -0.00 914.00 0.00 850.00 -1.00 2055.00 3.00 8318.00 1.00 0 640.00 0.14
3 0 43.00 1596.00 0.00 20000.00 -27.00 2000.00 -0.00 3000.00 0.00 0.00 -5.00 1600.00 -1.00 0.00 -2.00 0 608.00 0.20
4 0 43.00 3000.00 0.00 90000.00 -6.00 2000.00 -0.00 2000.00 0.00 2000.00 2.00 2000.00 -1.00 1087.00 -0.00 0 638.00 0.14
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
22495 2 -137.00 3000.00 0.00 50000.00 -6.00 2525.00 -0.00 3900.00 0.00 0.00 -5.00 2000.00 -1.00 4500.00 1.00 1 452.00 0.69
22496 -1 28.00 0.00 -7.00 210000.00 14.00 358.00 -0.00 12816.00 7.00 0.00 -5.00 102.00 -1.00 210.00 -0.00 0 636.00 0.15
22497 1 -38.00 0.00 -7.00 390000.00 28.00 1266.00 -0.00 0.00 -8.00 0.00 -5.00 0.00 -5.00 0.00 -2.00 0 563.00 0.32
22498 0 43.00 1700.00 0.00 30000.00 -27.00 1600.00 -0.00 1287.00 0.00 1296.00 -1.00 500.00 -1.00 1550.00 -0.00 0 614.00 0.19
22499 2 -137.00 6300.00 5.00 150000.00 14.00 6100.00 8.00 4900.00 7.00 0.00 -5.00 10200.00 9.00 5100.00 1.00 0 502.00 0.52

22500 rows × 19 columns

[76]:
# 预测测试集模型分数
data_test_score = get_predict_score(data_test,scorecard)
data_test_score
[76]:
PAY_0 PAY_0_Score PAY_AMT1 PAY_AMT1_Score LIMIT_BAL LIMIT_BAL_Score PAY_AMT2 PAY_AMT2_Score PAY_AMT3 PAY_AMT3_Score PAY_AMT4 PAY_AMT4_Score PAY_AMT5 PAY_AMT5_Score PAY_AMT6 PAY_AMT6_Score y Score Proba
0 -2 28.00 0.00 -7.00 400000.00 28.00 0.00 -11.00 0.00 -8.00 0.00 -5.00 0.00 -5.00 0.00 -2.00 0 621.00 0.18
1 0 43.00 2600.00 0.00 80000.00 -6.00 4300.00 -0.00 2000.00 0.00 2000.00 2.00 2000.00 -1.00 2000.00 -0.00 0 641.00 0.14
2 1 -38.00 0.00 -7.00 200000.00 14.00 2317.00 -0.00 7588.00 7.00 7614.00 5.00 14053.00 9.00 0.00 -2.00 0 591.00 0.25
3 -1 28.00 1087.00 0.00 20000.00 -27.00 1140.00 -0.00 0.00 -8.00 7014.00 5.00 800.00 -1.00 0.00 -2.00 0 598.00 0.23
4 2 -137.00 5000.00 5.00 70000.00 -6.00 3000.00 -0.00 2000.00 0.00 3000.00 2.00 5000.00 3.00 0.00 -2.00 0 468.00 0.65
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
7495 0 43.00 1128.00 0.00 500000.00 28.00 1000.00 -0.00 0.00 -8.00 8479.00 5.00 236.00 -1.00 2990.00 1.00 0 671.00 0.10
7496 0 43.00 8080.00 5.00 110000.00 -6.00 14298.00 8.00 2519.00 0.00 6616.00 5.00 1953.00 -1.00 5300.00 1.00 0 658.00 0.12
7497 0 43.00 4539.00 0.00 150000.00 14.00 4218.00 -0.00 4204.00 0.00 3296.00 2.00 3408.00 3.00 3416.00 1.00 0 666.00 0.10
7498 0 43.00 5059.00 5.00 140000.00 -6.00 14659.00 8.00 5000.00 7.00 8000.00 5.00 5000.00 3.00 10000.00 3.00 0 671.00 0.10
7499 0 43.00 3500.00 0.00 80000.00 -6.00 2934.00 -0.00 897.00 0.00 929.00 -1.00 935.00 -1.00 984.00 -0.00 0 638.00 0.15

7500 rows × 19 columns

计算AUC 和 KS

[77]:
plot_roc_ks(data_train,scorecard)
../_images/tutorials_tutorials_assess_7_1.png

根据 预测得分计算ks

[78]:
data_proba = get_predict_score(data_train,
                                       scorecard,
                                       init_score=600,
                                       pdo=50,
                                       odds=0,
                                       target='y',
                                       precision=2)


plot_ks(data_train,data_proba)
../_images/tutorials_tutorials_assess_9_1.png
[79]:
# 计算auc 和 ks,并返回 ks 结果数据
plot_roc_ks(data_train,scorecard,return_data=True,precision=4)
../_images/tutorials_tutorials_assess_10_1.png
[79]:
No. fpr tpr thresholds ks
0 1 0.00 0.01 0.82 0.01
1 2 0.04 0.32 0.51 0.28
2 3 0.12 0.49 0.31 0.37
3 4 0.22 0.61 0.20 0.39
4 5 0.32 0.70 0.17 0.38
5 6 0.45 0.78 0.15 0.33
6 7 0.54 0.83 0.14 0.30
7 8 0.65 0.89 0.12 0.25
8 9 0.77 0.94 0.10 0.17
9 10 0.91 0.98 0.08 0.07
10 11 1.00 1.00 0.04 0.00

AUC 计算

[80]:
#根据训练集和评分卡计算auc
get_auc_by_card(data_train,scorecard)
[80]:
0.76
[81]:
#根据训练集预测得分,计算auc
get_auc(data_train_score)
[81]:
0.76

KS 计算

[82]:
#根据训练集和评分卡计算ks
get_ks_by_card(data_train,scorecard)
[82]:
0.39
[83]:
#根据训练集预测得分,计算ks
get_ks(data_train_score)
[83]:
0.39
[84]:
#根据训练集预测得分,计算ks,并返回数据
ks,ks_data = get_ks(data_train_score,return_data=True)
ks_data
[84]:
No. Proba #Total #Bad #Good %Total %Bad %Good %BadRate %CumBad %CumGood KS
0 1 (0.039, 0.08] 2834 187 2647 12.60% 3.79% 15.07% 6.60% 3.79% 15.07% 0.11
1 2 (0.08, 0.1] 2360 206 2154 10.49% 4.17% 12.26% 8.73% 7.96% 27.33% 0.19
2 3 (0.1, 0.12] 2245 223 2022 9.98% 4.52% 11.51% 9.93% 12.48% 38.85% 0.26
3 4 (0.12, 0.14] 3076 437 2639 13.67% 8.85% 15.03% 14.21% 21.33% 53.87% 0.33
4 5 (0.14, 0.15] 2080 292 1788 9.24% 5.92% 10.18% 14.04% 27.25% 64.05% 0.37
5 6 (0.15, 0.17] 1260 213 1047 5.60% 4.32% 5.96% 16.90% 31.56% 70.01% 0.38
6 7 (0.17, 0.2] 2296 490 1806 10.20% 9.93% 10.28% 21.34% 41.49% 80.29% 0.39
7 8 (0.2, 0.31] 1894 488 1406 8.42% 9.89% 8.01% 25.77% 51.38% 88.30% 0.37
8 9 (0.31, 0.511] 2205 844 1361 9.80% 17.10% 7.75% 38.28% 68.48% 96.05% 0.28
9 10 (0.511, 0.82] 2250 1556 694 10.00% 31.52% 3.95% 69.16% 100.00% 100.00% 0.00

查看评分卡分数分布 和 提升度

[85]:
# 查看训练集评分卡分数分布 和 提升度
score_dist(data_train_score)
[85]:
No. Score Range #Total #Bad #Good %Total %Bad %Good %BadRate %BadRate Random %CumBad %CumTotal Lift
0 1 (-inf, 503.9] 2250 1556 694 10.00% 31.52% 3.95% 69.16% 21.94% 69.16% 10.00% 6.92
1 2 (503.9, 567.0] 2276 863 1413 10.12% 17.48% 8.04% 37.92% 21.94% 53.45% 20.12% 2.66
2 3 (567.0, 610.0] 2346 600 1746 10.43% 12.16% 9.94% 25.58% 21.94% 43.93% 30.54% 1.44
3 4 (610.0, 624.0] 2232 433 1799 9.92% 8.77% 10.24% 19.40% 21.94% 37.92% 40.46% 0.94
4 5 (624.0, 635.0] 2615 395 2220 11.62% 8.00% 12.64% 15.11% 21.94% 32.83% 52.08% 0.63
5 6 (635.0, 642.0] 1823 267 1556 8.10% 5.41% 8.86% 14.65% 21.94% 30.38% 60.19% 0.50
6 7 (642.0, 653.0] 2216 292 1924 9.85% 5.92% 10.95% 13.18% 21.94% 27.96% 70.04% 0.40
7 8 (653.0, 666.0] 2364 210 2154 10.51% 4.25% 12.26% 8.88% 21.94% 25.47% 80.54% 0.32
8 9 (666.0, 686.0] 2748 231 2517 12.21% 4.68% 14.33% 8.41% 21.94% 23.22% 92.76% 0.25
9 10 (686.0, inf] 1630 89 1541 7.24% 1.80% 8.77% 5.46% 21.94% 21.94% 100.00% 0.22
[86]:
# 查看训练集评分卡分数分布 和 提升度,分为5组,查看提升度
score_dist(data_train_score,qcut=5)
[86]:
No. Score Range #Total #Bad #Good %Total %Bad %Good %BadRate %BadRate Random %CumBad %CumTotal Lift
0 1 (-inf, 567.0] 4526 2419 2107 20.12% 49.01% 12.00% 53.45% 21.94% 53.45% 20.12% 2.66
1 2 (567.0, 624.0] 4578 1033 3545 20.35% 20.93% 20.18% 22.56% 21.94% 37.92% 40.46% 0.94
2 3 (624.0, 642.0] 4438 662 3776 19.72% 13.41% 21.50% 14.92% 21.94% 30.38% 60.19% 0.50
3 4 (642.0, 666.0] 4580 502 4078 20.36% 10.17% 23.22% 10.96% 21.94% 25.47% 80.54% 0.32
4 5 (666.0, inf] 4378 320 4058 19.46% 6.48% 23.10% 7.31% 21.94% 21.94% 100.00% 0.22
[87]:
# 查看训练集评分卡分数分布 和 提升度,分为5组,查看提升度
view_score_dist(data_train_score,qcut=5)
[87]:
  No. Score Range #Total #Bad #Good %Total %Bad %Good %BadRate %BadRate Random %CumBad %CumTotal Lift Lift.
0 1 (-inf, 567.0] 4526 2419 2107 20.12% 49.01% 12.00% 53.45% 21.94% 53.45% 20.12% 2.660000 2.660000
1 2 (567.0, 624.0] 4578 1033 3545 20.35% 20.93% 20.18% 22.56% 21.94% 37.92% 40.46% 0.940000 0.940000
2 3 (624.0, 642.0] 4438 662 3776 19.72% 13.41% 21.50% 14.92% 21.94% 30.38% 60.19% 0.500000 0.500000
3 4 (642.0, 666.0] 4580 502 4078 20.36% 10.17% 23.22% 10.96% 21.94% 25.47% 80.54% 0.320000 0.320000
4 5 (666.0, inf] 4378 320 4058 19.46% 6.48% 23.10% 7.31% 21.94% 21.94% 100.00% 0.220000 0.220000
[88]:
# 查看训练集评分卡分数分布 和 提升度,分为5组,查看提升度
view_score_dist(data_train_score,qcut=5,color='green')
[88]:
  No. Score Range #Total #Bad #Good %Total %Bad %Good %BadRate %BadRate Random %CumBad %CumTotal Lift Lift.
0 1 (-inf, 567.0] 4526 2419 2107 20.12% 49.01% 12.00% 53.45% 21.94% 53.45% 20.12% 2.660000 2.660000
1 2 (567.0, 624.0] 4578 1033 3545 20.35% 20.93% 20.18% 22.56% 21.94% 37.92% 40.46% 0.940000 0.940000
2 3 (624.0, 642.0] 4438 662 3776 19.72% 13.41% 21.50% 14.92% 21.94% 30.38% 60.19% 0.500000 0.500000
3 4 (642.0, 666.0] 4580 502 4078 20.36% 10.17% 23.22% 10.96% 21.94% 25.47% 80.54% 0.320000 0.320000
4 5 (666.0, inf] 4378 320 4058 19.46% 6.48% 23.10% 7.31% 21.94% 21.94% 100.00% 0.220000 0.220000
[89]:
# 查看训练集评分卡分数分布 和 提升度,分为5组,查看提升度
view_score_dist(data_train_score,qcut=5,color='#02B057')
[89]:
  No. Score Range #Total #Bad #Good %Total %Bad %Good %BadRate %BadRate Random %CumBad %CumTotal Lift Lift.
0 1 (-inf, 567.0] 4526 2419 2107 20.12% 49.01% 12.00% 53.45% 21.94% 53.45% 20.12% 2.660000 2.660000
1 2 (567.0, 624.0] 4578 1033 3545 20.35% 20.93% 20.18% 22.56% 21.94% 37.92% 40.46% 0.940000 0.940000
2 3 (624.0, 642.0] 4438 662 3776 19.72% 13.41% 21.50% 14.92% 21.94% 30.38% 60.19% 0.500000 0.500000
3 4 (642.0, 666.0] 4580 502 4078 20.36% 10.17% 23.22% 10.96% 21.94% 25.47% 80.54% 0.320000 0.320000
4 5 (666.0, inf] 4378 320 4058 19.46% 6.48% 23.10% 7.31% 21.94% 21.94% 100.00% 0.220000 0.220000
[90]:
# 查看训练集评分卡分数分布 和 提升度,  标题显示为中文
score_dist(data_train_score,language='cn')
[90]:
序号 分数区间 #合计 #坏 #好 %合计 %坏 %好 %坏件率 %随机坏件率 %累计坏 %累计合计 提升度
0 1 (-inf, 503.9] 2250 1556 694 10.00% 31.52% 3.95% 69.16% 21.94% 69.16% 10.00% 6.92
1 2 (503.9, 567.0] 2276 863 1413 10.12% 17.48% 8.04% 37.92% 21.94% 53.45% 20.12% 2.66
2 3 (567.0, 610.0] 2346 600 1746 10.43% 12.16% 9.94% 25.58% 21.94% 43.93% 30.54% 1.44
3 4 (610.0, 624.0] 2232 433 1799 9.92% 8.77% 10.24% 19.40% 21.94% 37.92% 40.46% 0.94
4 5 (624.0, 635.0] 2615 395 2220 11.62% 8.00% 12.64% 15.11% 21.94% 32.83% 52.08% 0.63
5 6 (635.0, 642.0] 1823 267 1556 8.10% 5.41% 8.86% 14.65% 21.94% 30.38% 60.19% 0.50
6 7 (642.0, 653.0] 2216 292 1924 9.85% 5.92% 10.95% 13.18% 21.94% 27.96% 70.04% 0.40
7 8 (653.0, 666.0] 2364 210 2154 10.51% 4.25% 12.26% 8.88% 21.94% 25.47% 80.54% 0.32
8 9 (666.0, 686.0] 2748 231 2517 12.21% 4.68% 14.33% 8.41% 21.94% 23.22% 92.76% 0.25
9 10 (686.0, inf] 1630 89 1541 7.24% 1.80% 8.77% 5.46% 21.94% 21.94% 100.00% 0.22
[91]:
# 绘制提升度图
plot_lift(data_train_score)
../_images/tutorials_tutorials_assess_25_0.png
[92]:
# 绘制提升度图,并输出提升度表格
plot_lift(data_train_score,return_data=True)
../_images/tutorials_tutorials_assess_26_0.png
[92]:
No. Score Range #Total #Bad #Good %Total %Bad %Good %BadRate %BadRate Random %CumBad %CumTotal Lift
0 1 (-inf, 503.9] 2250 1556 694 10.00% 31.52% 3.95% 69.16% 21.94% 69.16% 10.00% 6.92
1 2 (503.9, 567.0] 2276 863 1413 10.12% 17.48% 8.04% 37.92% 21.94% 53.45% 20.12% 2.66
2 3 (567.0, 610.0] 2346 600 1746 10.43% 12.16% 9.94% 25.58% 21.94% 43.93% 30.54% 1.44
3 4 (610.0, 624.0] 2232 433 1799 9.92% 8.77% 10.24% 19.40% 21.94% 37.92% 40.46% 0.94
4 5 (624.0, 635.0] 2615 395 2220 11.62% 8.00% 12.64% 15.11% 21.94% 32.83% 52.08% 0.63
5 6 (635.0, 642.0] 1823 267 1556 8.10% 5.41% 8.86% 14.65% 21.94% 30.38% 60.19% 0.50
6 7 (642.0, 653.0] 2216 292 1924 9.85% 5.92% 10.95% 13.18% 21.94% 27.96% 70.04% 0.40
7 8 (653.0, 666.0] 2364 210 2154 10.51% 4.25% 12.26% 8.88% 21.94% 25.47% 80.54% 0.32
8 9 (666.0, 686.0] 2748 231 2517 12.21% 4.68% 14.33% 8.41% 21.94% 23.22% 92.76% 0.25
9 10 (686.0, inf] 1630 89 1541 7.24% 1.80% 8.77% 5.46% 21.94% 21.94% 100.00% 0.22
[93]:
# 查看训练集分数分布

import matplotlib.pyplot as plt
plt.style.use('default')
fig=plt.figure()
ax1=fig.add_subplot(1,1,1)
ax1.hist(data_train_score['Score'] )
plt.show()
../_images/tutorials_tutorials_assess_27_0.png

模型稳定型 PSI计算

[94]:
data_train_score = get_predict_score(data_train,scorecard)
data_test_score = get_predict_score(data_test,scorecard)

# 按照等频分箱,分为10组,计算模型得分 Score 的PSI
get_psi(data_train_score,data_test_score)
[94]:
No. Name Bins Range #Total #Actual #Expected %Total %Actual %Expected PSI Total PSI
0 1 Score (-inf, 503.9] 3018 2250 768 10.06% 10.00% 10.24% 0.00 0.07
1 2 Score (503.9, 567.0] 3026 2276 750 10.09% 10.12% 10.00% 0.00 0.07
2 3 Score (567.0, 610.0] 3018 2346 672 10.06% 10.43% 8.96% 0.00 0.07
3 4 Score (610.0, 624.0] 2945 2232 713 9.82% 9.92% 9.51% 0.00 0.07
4 5 Score (624.0, 635.0] 3065 2615 450 10.22% 11.62% 6.00% 0.04 0.07
5 6 Score (635.0, 642.0] 2706 1823 883 9.02% 8.10% 11.77% 0.01 0.07
6 7 Score (642.0, 653.0] 3054 2216 838 10.18% 9.85% 11.17% 0.00 0.07
7 8 Score (653.0, 666.0] 3151 2364 787 10.50% 10.51% 10.49% 0.00 0.07
8 9 Score (666.0, 686.0] 3555 2748 807 11.85% 12.21% 10.76% 0.00 0.07
9 10 Score (686.0, inf] 2462 1630 832 8.21% 7.24% 11.09% 0.02 0.07
[95]:
# 按照等频分箱,分为10组,计算模型得分 Score 的PSI
view_psi(data_train_score,data_test_score)
[95]:
  No. Name Bins Range #Total #Actual #Expected %Total %Actual %Expected PSI Total PSI PSI.
0 1 Score (-inf, 503.9] 3018 2250 768 10.06% 10.00% 10.24% 0.000000 0.070000 0.000000
1 2 Score (503.9, 567.0] 3026 2276 750 10.09% 10.12% 10.00% 0.000000 0.070000 0.000000
2 3 Score (567.0, 610.0] 3018 2346 672 10.06% 10.43% 8.96% 0.000000 0.070000 0.000000
3 4 Score (610.0, 624.0] 2945 2232 713 9.82% 9.92% 9.51% 0.000000 0.070000 0.000000
4 5 Score (624.0, 635.0] 3065 2615 450 10.22% 11.62% 6.00% 0.040000 0.070000 0.040000
5 6 Score (635.0, 642.0] 2706 1823 883 9.02% 8.10% 11.77% 0.010000 0.070000 0.010000
6 7 Score (642.0, 653.0] 3054 2216 838 10.18% 9.85% 11.17% 0.000000 0.070000 0.000000
7 8 Score (653.0, 666.0] 3151 2364 787 10.50% 10.51% 10.49% 0.000000 0.070000 0.000000
8 9 Score (666.0, 686.0] 3555 2748 807 11.85% 12.21% 10.76% 0.000000 0.070000 0.000000
9 10 Score (686.0, inf] 2462 1630 832 8.21% 7.24% 11.09% 0.020000 0.070000 0.020000
[96]:
# 按照等频分箱,分为10组,计算模型得分 Score 的PSI
view_psi(data_train_score,data_test_score,color='green')
[96]:
  No. Name Bins Range #Total #Actual #Expected %Total %Actual %Expected PSI Total PSI PSI.
0 1 Score (-inf, 503.9] 3018 2250 768 10.06% 10.00% 10.24% 0.000000 0.070000 0.000000
1 2 Score (503.9, 567.0] 3026 2276 750 10.09% 10.12% 10.00% 0.000000 0.070000 0.000000
2 3 Score (567.0, 610.0] 3018 2346 672 10.06% 10.43% 8.96% 0.000000 0.070000 0.000000
3 4 Score (610.0, 624.0] 2945 2232 713 9.82% 9.92% 9.51% 0.000000 0.070000 0.000000
4 5 Score (624.0, 635.0] 3065 2615 450 10.22% 11.62% 6.00% 0.040000 0.070000 0.040000
5 6 Score (635.0, 642.0] 2706 1823 883 9.02% 8.10% 11.77% 0.010000 0.070000 0.010000
6 7 Score (642.0, 653.0] 3054 2216 838 10.18% 9.85% 11.17% 0.000000 0.070000 0.000000
7 8 Score (653.0, 666.0] 3151 2364 787 10.50% 10.51% 10.49% 0.000000 0.070000 0.000000
8 9 Score (666.0, 686.0] 3555 2748 807 11.85% 12.21% 10.76% 0.000000 0.070000 0.000000
9 10 Score (686.0, inf] 2462 1630 832 8.21% 7.24% 11.09% 0.020000 0.070000 0.020000
[97]:
# 按照等频分箱,分为10组,计算模型得分 Score 的PSI
view_psi(data_train_score,data_test_score,color='#02B057')
[97]:
  No. Name Bins Range #Total #Actual #Expected %Total %Actual %Expected PSI Total PSI PSI.
0 1 Score (-inf, 503.9] 3018 2250 768 10.06% 10.00% 10.24% 0.000000 0.070000 0.000000
1 2 Score (503.9, 567.0] 3026 2276 750 10.09% 10.12% 10.00% 0.000000 0.070000 0.000000
2 3 Score (567.0, 610.0] 3018 2346 672 10.06% 10.43% 8.96% 0.000000 0.070000 0.000000
3 4 Score (610.0, 624.0] 2945 2232 713 9.82% 9.92% 9.51% 0.000000 0.070000 0.000000
4 5 Score (624.0, 635.0] 3065 2615 450 10.22% 11.62% 6.00% 0.040000 0.070000 0.040000
5 6 Score (635.0, 642.0] 2706 1823 883 9.02% 8.10% 11.77% 0.010000 0.070000 0.010000
6 7 Score (642.0, 653.0] 3054 2216 838 10.18% 9.85% 11.17% 0.000000 0.070000 0.000000
7 8 Score (653.0, 666.0] 3151 2364 787 10.50% 10.51% 10.49% 0.000000 0.070000 0.000000
8 9 Score (666.0, 686.0] 3555 2748 807 11.85% 12.21% 10.76% 0.000000 0.070000 0.000000
9 10 Score (686.0, inf] 2462 1630 832 8.21% 7.24% 11.09% 0.020000 0.070000 0.020000
[98]:
data_train_score = get_predict_score(data_train,scorecard)
data_test_score = get_predict_score(data_test,scorecard)

# 按照等频分箱,分为5组,计算PSI
get_psi(data_train_score,data_test_score,col='LIMIT_BAL',qcut=5,precision=4)
[98]:
No. Name Bins Range #Total #Actual #Expected %Total %Actual %Expected PSI Total PSI
0 1 LIMIT_BAL (-inf, 50000.0] 7676 5855 1821 25.5867% 26.0222% 24.2800% 0.00 0.00
1 2 LIMIT_BAL (50000.0, 100000.0] 4822 3593 1229 16.0733% 15.9689% 16.3867% 0.00 0.00
2 3 LIMIT_BAL (100000.0, 180000.0] 6123 4548 1575 20.4100% 20.2133% 21.0000% 0.00 0.00
3 4 LIMIT_BAL (180000.0, 270000.0] 5421 4035 1386 18.0700% 17.9333% 18.4800% 0.00 0.00
4 5 LIMIT_BAL (270000.0, inf] 5958 4469 1489 19.8600% 19.8622% 19.8533% 0.00 0.00
[99]:
data_train_score = get_predict_score(data_train,scorecard)
data_test_score = get_predict_score(data_test,scorecard)

# 按照指定的切分点,计算PSI
get_psi(data_train_score,data_test_score,col='LIMIT_BAL',bins=[-inf,30000,50000,100000,inf],precision=4)
[99]:
No. Name Bins Range #Total #Actual #Expected %Total %Actual %Expected PSI Total PSI
0 1 LIMIT_BAL (-inf, 30000.0] 4111 3109 1002 13.7033% 13.8178% 13.3600% 0.00 0.00
1 2 LIMIT_BAL (30000.0, 50000.0] 3649 2746 903 12.1633% 12.2044% 12.0400% 0.00 0.00
2 3 LIMIT_BAL (50000.0, 100000.0] 4794 3593 1201 15.9800% 15.9689% 16.0133% 0.00 0.00
3 4 LIMIT_BAL (100000.0, inf] 17446 13052 4394 58.1533% 58.0089% 58.5867% 0.00 0.00
[100]:
data_train_score = get_predict_score(data_train,scorecard)
data_test_score = get_predict_score(data_test,scorecard)

# 按照指定的切分点,计算PSI,标题显示为中文
get_psi(data_train_score,data_test_score,col='LIMIT_BAL',bins=[-inf,30000,50000,100000,inf],precision=4,language='cn')
[100]:
序号 名称 分组 #合计 #实际 #期望 %合计 %实际 %期望 PSI PSI 合计
0 1 LIMIT_BAL (-inf, 30000.0] 4111 3109 1002 13.7033% 13.8178% 13.3600% 0.00 0.00
1 2 LIMIT_BAL (30000.0, 50000.0] 3649 2746 903 12.1633% 12.2044% 12.0400% 0.00 0.00
2 3 LIMIT_BAL (50000.0, 100000.0] 4794 3593 1201 15.9800% 15.9689% 16.0133% 0.00 0.00
3 4 LIMIT_BAL (100000.0, inf] 17446 13052 4394 58.1533% 58.0089% 58.5867% 0.00 0.00
[101]:
# 批量计算所有特征的PSI
data_train_score = get_predict_score(data_train,scorecard)
data_test_score = get_predict_score(data_test,scorecard)

get_data_psi(data_train_score,data_test_score,precision=4)
[101]:
Name PSI
16 Score 0.07
8 PAY_AMT3 0.00
4 LIMIT_BAL 0.00
17 Proba 0.00
6 PAY_AMT2 0.00
14 PAY_AMT6 0.00
10 PAY_AMT4 0.00
12 PAY_AMT5 0.00
2 PAY_AMT1 0.00
7 PAY_AMT2_Score 0.00
13 PAY_AMT5_Score 0.00
0 PAY_0 0.00
1 PAY_0_Score 0.00
5 LIMIT_BAL_Score 0.00
9 PAY_AMT3_Score 0.00
15 PAY_AMT6_Score 0.00
11 PAY_AMT4_Score 0.00
3 PAY_AMT1_Score 0.00

模型应用分析

决策树分析

[102]:
plot_tree(data_train_score[['Score','y']],max_depth=3,criterion='gini')
[102]:
../_images/tutorials_tutorials_assess_39_0.svg
[103]:
# 绘制决策树,并保存图片到本地
# plot_tree(data_train_score[['Score','y']],max_depth=3,criterion='gini',out_file='test.svg')
[103]:
0
[104]:
# 绘制决策树,并保存图片到本地
# plot_tree(data_train_score[['Score','y']],max_depth=3,criterion='gini',out_file='test.pdf')
[104]:
0