检查VIF
[73]:
get_vif(data_train_woe,data_train_woe.columns,return_data=True)
[73]:
(2.97,
variables VIF
0 ID 1.02
1 LIMIT_BAL 1.48
2 SEX 1.02
3 EDUCATION 1.12
4 MARRIAGE 1.06
5 AGE 1.03
6 PAY_0 1.95
7 PAY_2 2.91
8 PAY_3 2.73
9 PAY_4 2.78
10 PAY_5 2.97
11 PAY_6 2.17
12 BILL_AMT1 1.94
13 BILL_AMT2 1.90
14 BILL_AMT3 2.10
15 BILL_AMT4 1.56
16 BILL_AMT5 2.30
17 BILL_AMT6 1.93
18 PAY_AMT1 1.90
19 PAY_AMT2 1.89
20 PAY_AMT3 1.91
21 PAY_AMT4 2.07
22 PAY_AMT5 1.82
23 PAY_AMT6 1.68)
[74]:
# 查看评分卡模型的参数
print('a:',a,'','b:',b)
a: 508.4396430011438 b: 72.13475204444818
预测模型分数
[75]:
# 预测训练集模型分数
data_train_score = get_predict_score(data_train,scorecard)
data_train_score
[75]:
PAY_0 | PAY_0_Score | PAY_AMT1 | PAY_AMT1_Score | LIMIT_BAL | LIMIT_BAL_Score | PAY_AMT2 | PAY_AMT2_Score | PAY_AMT3 | PAY_AMT3_Score | PAY_AMT4 | PAY_AMT4_Score | PAY_AMT5 | PAY_AMT5_Score | PAY_AMT6 | PAY_AMT6_Score | y | Score | Proba | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -1 | 28.00 | 1671.00 | 0.00 | 120000.00 | -6.00 | 380.00 | -0.00 | 131062.00 | 14.00 | 2000.00 | 2.00 | 3000.00 | 3.00 | 3000.00 | 1.00 | 0 | 642.00 | 0.14 |
1 | -2 | 28.00 | 1468.00 | 0.00 | 200000.00 | 14.00 | 2321.00 | -0.00 | 163597.00 | 14.00 | 6680.00 | 5.00 | 3963.00 | 3.00 | 2514.00 | 1.00 | 0 | 665.00 | 0.10 |
2 | 0 | 43.00 | 4038.00 | 0.00 | 80000.00 | -6.00 | 3199.00 | -0.00 | 914.00 | 0.00 | 850.00 | -1.00 | 2055.00 | 3.00 | 8318.00 | 1.00 | 0 | 640.00 | 0.14 |
3 | 0 | 43.00 | 1596.00 | 0.00 | 20000.00 | -27.00 | 2000.00 | -0.00 | 3000.00 | 0.00 | 0.00 | -5.00 | 1600.00 | -1.00 | 0.00 | -2.00 | 0 | 608.00 | 0.20 |
4 | 0 | 43.00 | 3000.00 | 0.00 | 90000.00 | -6.00 | 2000.00 | -0.00 | 2000.00 | 0.00 | 2000.00 | 2.00 | 2000.00 | -1.00 | 1087.00 | -0.00 | 0 | 638.00 | 0.14 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
22495 | 2 | -137.00 | 3000.00 | 0.00 | 50000.00 | -6.00 | 2525.00 | -0.00 | 3900.00 | 0.00 | 0.00 | -5.00 | 2000.00 | -1.00 | 4500.00 | 1.00 | 1 | 452.00 | 0.69 |
22496 | -1 | 28.00 | 0.00 | -7.00 | 210000.00 | 14.00 | 358.00 | -0.00 | 12816.00 | 7.00 | 0.00 | -5.00 | 102.00 | -1.00 | 210.00 | -0.00 | 0 | 636.00 | 0.15 |
22497 | 1 | -38.00 | 0.00 | -7.00 | 390000.00 | 28.00 | 1266.00 | -0.00 | 0.00 | -8.00 | 0.00 | -5.00 | 0.00 | -5.00 | 0.00 | -2.00 | 0 | 563.00 | 0.32 |
22498 | 0 | 43.00 | 1700.00 | 0.00 | 30000.00 | -27.00 | 1600.00 | -0.00 | 1287.00 | 0.00 | 1296.00 | -1.00 | 500.00 | -1.00 | 1550.00 | -0.00 | 0 | 614.00 | 0.19 |
22499 | 2 | -137.00 | 6300.00 | 5.00 | 150000.00 | 14.00 | 6100.00 | 8.00 | 4900.00 | 7.00 | 0.00 | -5.00 | 10200.00 | 9.00 | 5100.00 | 1.00 | 0 | 502.00 | 0.52 |
22500 rows × 19 columns
[76]:
# 预测测试集模型分数
data_test_score = get_predict_score(data_test,scorecard)
data_test_score
[76]:
PAY_0 | PAY_0_Score | PAY_AMT1 | PAY_AMT1_Score | LIMIT_BAL | LIMIT_BAL_Score | PAY_AMT2 | PAY_AMT2_Score | PAY_AMT3 | PAY_AMT3_Score | PAY_AMT4 | PAY_AMT4_Score | PAY_AMT5 | PAY_AMT5_Score | PAY_AMT6 | PAY_AMT6_Score | y | Score | Proba | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -2 | 28.00 | 0.00 | -7.00 | 400000.00 | 28.00 | 0.00 | -11.00 | 0.00 | -8.00 | 0.00 | -5.00 | 0.00 | -5.00 | 0.00 | -2.00 | 0 | 621.00 | 0.18 |
1 | 0 | 43.00 | 2600.00 | 0.00 | 80000.00 | -6.00 | 4300.00 | -0.00 | 2000.00 | 0.00 | 2000.00 | 2.00 | 2000.00 | -1.00 | 2000.00 | -0.00 | 0 | 641.00 | 0.14 |
2 | 1 | -38.00 | 0.00 | -7.00 | 200000.00 | 14.00 | 2317.00 | -0.00 | 7588.00 | 7.00 | 7614.00 | 5.00 | 14053.00 | 9.00 | 0.00 | -2.00 | 0 | 591.00 | 0.25 |
3 | -1 | 28.00 | 1087.00 | 0.00 | 20000.00 | -27.00 | 1140.00 | -0.00 | 0.00 | -8.00 | 7014.00 | 5.00 | 800.00 | -1.00 | 0.00 | -2.00 | 0 | 598.00 | 0.23 |
4 | 2 | -137.00 | 5000.00 | 5.00 | 70000.00 | -6.00 | 3000.00 | -0.00 | 2000.00 | 0.00 | 3000.00 | 2.00 | 5000.00 | 3.00 | 0.00 | -2.00 | 0 | 468.00 | 0.65 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
7495 | 0 | 43.00 | 1128.00 | 0.00 | 500000.00 | 28.00 | 1000.00 | -0.00 | 0.00 | -8.00 | 8479.00 | 5.00 | 236.00 | -1.00 | 2990.00 | 1.00 | 0 | 671.00 | 0.10 |
7496 | 0 | 43.00 | 8080.00 | 5.00 | 110000.00 | -6.00 | 14298.00 | 8.00 | 2519.00 | 0.00 | 6616.00 | 5.00 | 1953.00 | -1.00 | 5300.00 | 1.00 | 0 | 658.00 | 0.12 |
7497 | 0 | 43.00 | 4539.00 | 0.00 | 150000.00 | 14.00 | 4218.00 | -0.00 | 4204.00 | 0.00 | 3296.00 | 2.00 | 3408.00 | 3.00 | 3416.00 | 1.00 | 0 | 666.00 | 0.10 |
7498 | 0 | 43.00 | 5059.00 | 5.00 | 140000.00 | -6.00 | 14659.00 | 8.00 | 5000.00 | 7.00 | 8000.00 | 5.00 | 5000.00 | 3.00 | 10000.00 | 3.00 | 0 | 671.00 | 0.10 |
7499 | 0 | 43.00 | 3500.00 | 0.00 | 80000.00 | -6.00 | 2934.00 | -0.00 | 897.00 | 0.00 | 929.00 | -1.00 | 935.00 | -1.00 | 984.00 | -0.00 | 0 | 638.00 | 0.15 |
7500 rows × 19 columns
计算AUC 和 KS
[77]:
plot_roc_ks(data_train,scorecard)
根据 预测得分计算ks
[78]:
data_proba = get_predict_score(data_train,
scorecard,
init_score=600,
pdo=50,
odds=0,
target='y',
precision=2)
plot_ks(data_train,data_proba)
[79]:
# 计算auc 和 ks,并返回 ks 结果数据
plot_roc_ks(data_train,scorecard,return_data=True,precision=4)
[79]:
No. | fpr | tpr | thresholds | ks | |
---|---|---|---|---|---|
0 | 1 | 0.00 | 0.01 | 0.82 | 0.01 |
1 | 2 | 0.04 | 0.32 | 0.51 | 0.28 |
2 | 3 | 0.12 | 0.49 | 0.31 | 0.37 |
3 | 4 | 0.22 | 0.61 | 0.20 | 0.39 |
4 | 5 | 0.32 | 0.70 | 0.17 | 0.38 |
5 | 6 | 0.45 | 0.78 | 0.15 | 0.33 |
6 | 7 | 0.54 | 0.83 | 0.14 | 0.30 |
7 | 8 | 0.65 | 0.89 | 0.12 | 0.25 |
8 | 9 | 0.77 | 0.94 | 0.10 | 0.17 |
9 | 10 | 0.91 | 0.98 | 0.08 | 0.07 |
10 | 11 | 1.00 | 1.00 | 0.04 | 0.00 |
AUC 计算
[80]:
#根据训练集和评分卡计算auc
get_auc_by_card(data_train,scorecard)
[80]:
0.76
[81]:
#根据训练集预测得分,计算auc
get_auc(data_train_score)
[81]:
0.76
KS 计算
[82]:
#根据训练集和评分卡计算ks
get_ks_by_card(data_train,scorecard)
[82]:
0.39
[83]:
#根据训练集预测得分,计算ks
get_ks(data_train_score)
[83]:
0.39
[84]:
#根据训练集预测得分,计算ks,并返回数据
ks,ks_data = get_ks(data_train_score,return_data=True)
ks_data
[84]:
No. | Proba | #Total | #Bad | #Good | %Total | %Bad | %Good | %BadRate | %CumBad | %CumGood | KS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | (0.039, 0.08] | 2834 | 187 | 2647 | 12.60% | 3.79% | 15.07% | 6.60% | 3.79% | 15.07% | 0.11 |
1 | 2 | (0.08, 0.1] | 2360 | 206 | 2154 | 10.49% | 4.17% | 12.26% | 8.73% | 7.96% | 27.33% | 0.19 |
2 | 3 | (0.1, 0.12] | 2245 | 223 | 2022 | 9.98% | 4.52% | 11.51% | 9.93% | 12.48% | 38.85% | 0.26 |
3 | 4 | (0.12, 0.14] | 3076 | 437 | 2639 | 13.67% | 8.85% | 15.03% | 14.21% | 21.33% | 53.87% | 0.33 |
4 | 5 | (0.14, 0.15] | 2080 | 292 | 1788 | 9.24% | 5.92% | 10.18% | 14.04% | 27.25% | 64.05% | 0.37 |
5 | 6 | (0.15, 0.17] | 1260 | 213 | 1047 | 5.60% | 4.32% | 5.96% | 16.90% | 31.56% | 70.01% | 0.38 |
6 | 7 | (0.17, 0.2] | 2296 | 490 | 1806 | 10.20% | 9.93% | 10.28% | 21.34% | 41.49% | 80.29% | 0.39 |
7 | 8 | (0.2, 0.31] | 1894 | 488 | 1406 | 8.42% | 9.89% | 8.01% | 25.77% | 51.38% | 88.30% | 0.37 |
8 | 9 | (0.31, 0.511] | 2205 | 844 | 1361 | 9.80% | 17.10% | 7.75% | 38.28% | 68.48% | 96.05% | 0.28 |
9 | 10 | (0.511, 0.82] | 2250 | 1556 | 694 | 10.00% | 31.52% | 3.95% | 69.16% | 100.00% | 100.00% | 0.00 |
查看评分卡分数分布 和 提升度
[85]:
# 查看训练集评分卡分数分布 和 提升度
score_dist(data_train_score)
[85]:
No. | Score Range | #Total | #Bad | #Good | %Total | %Bad | %Good | %BadRate | %BadRate Random | %CumBad | %CumTotal | Lift | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | (-inf, 503.9] | 2250 | 1556 | 694 | 10.00% | 31.52% | 3.95% | 69.16% | 21.94% | 69.16% | 10.00% | 6.92 |
1 | 2 | (503.9, 567.0] | 2276 | 863 | 1413 | 10.12% | 17.48% | 8.04% | 37.92% | 21.94% | 53.45% | 20.12% | 2.66 |
2 | 3 | (567.0, 610.0] | 2346 | 600 | 1746 | 10.43% | 12.16% | 9.94% | 25.58% | 21.94% | 43.93% | 30.54% | 1.44 |
3 | 4 | (610.0, 624.0] | 2232 | 433 | 1799 | 9.92% | 8.77% | 10.24% | 19.40% | 21.94% | 37.92% | 40.46% | 0.94 |
4 | 5 | (624.0, 635.0] | 2615 | 395 | 2220 | 11.62% | 8.00% | 12.64% | 15.11% | 21.94% | 32.83% | 52.08% | 0.63 |
5 | 6 | (635.0, 642.0] | 1823 | 267 | 1556 | 8.10% | 5.41% | 8.86% | 14.65% | 21.94% | 30.38% | 60.19% | 0.50 |
6 | 7 | (642.0, 653.0] | 2216 | 292 | 1924 | 9.85% | 5.92% | 10.95% | 13.18% | 21.94% | 27.96% | 70.04% | 0.40 |
7 | 8 | (653.0, 666.0] | 2364 | 210 | 2154 | 10.51% | 4.25% | 12.26% | 8.88% | 21.94% | 25.47% | 80.54% | 0.32 |
8 | 9 | (666.0, 686.0] | 2748 | 231 | 2517 | 12.21% | 4.68% | 14.33% | 8.41% | 21.94% | 23.22% | 92.76% | 0.25 |
9 | 10 | (686.0, inf] | 1630 | 89 | 1541 | 7.24% | 1.80% | 8.77% | 5.46% | 21.94% | 21.94% | 100.00% | 0.22 |
[86]:
# 查看训练集评分卡分数分布 和 提升度,分为5组,查看提升度
score_dist(data_train_score,qcut=5)
[86]:
No. | Score Range | #Total | #Bad | #Good | %Total | %Bad | %Good | %BadRate | %BadRate Random | %CumBad | %CumTotal | Lift | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | (-inf, 567.0] | 4526 | 2419 | 2107 | 20.12% | 49.01% | 12.00% | 53.45% | 21.94% | 53.45% | 20.12% | 2.66 |
1 | 2 | (567.0, 624.0] | 4578 | 1033 | 3545 | 20.35% | 20.93% | 20.18% | 22.56% | 21.94% | 37.92% | 40.46% | 0.94 |
2 | 3 | (624.0, 642.0] | 4438 | 662 | 3776 | 19.72% | 13.41% | 21.50% | 14.92% | 21.94% | 30.38% | 60.19% | 0.50 |
3 | 4 | (642.0, 666.0] | 4580 | 502 | 4078 | 20.36% | 10.17% | 23.22% | 10.96% | 21.94% | 25.47% | 80.54% | 0.32 |
4 | 5 | (666.0, inf] | 4378 | 320 | 4058 | 19.46% | 6.48% | 23.10% | 7.31% | 21.94% | 21.94% | 100.00% | 0.22 |
[87]:
# 查看训练集评分卡分数分布 和 提升度,分为5组,查看提升度
view_score_dist(data_train_score,qcut=5)
[87]:
No. | Score Range | #Total | #Bad | #Good | %Total | %Bad | %Good | %BadRate | %BadRate Random | %CumBad | %CumTotal | Lift | Lift. | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | (-inf, 567.0] | 4526 | 2419 | 2107 | 20.12% | 49.01% | 12.00% | 53.45% | 21.94% | 53.45% | 20.12% | 2.660000 | 2.660000 |
1 | 2 | (567.0, 624.0] | 4578 | 1033 | 3545 | 20.35% | 20.93% | 20.18% | 22.56% | 21.94% | 37.92% | 40.46% | 0.940000 | 0.940000 |
2 | 3 | (624.0, 642.0] | 4438 | 662 | 3776 | 19.72% | 13.41% | 21.50% | 14.92% | 21.94% | 30.38% | 60.19% | 0.500000 | 0.500000 |
3 | 4 | (642.0, 666.0] | 4580 | 502 | 4078 | 20.36% | 10.17% | 23.22% | 10.96% | 21.94% | 25.47% | 80.54% | 0.320000 | 0.320000 |
4 | 5 | (666.0, inf] | 4378 | 320 | 4058 | 19.46% | 6.48% | 23.10% | 7.31% | 21.94% | 21.94% | 100.00% | 0.220000 | 0.220000 |
[88]:
# 查看训练集评分卡分数分布 和 提升度,分为5组,查看提升度
view_score_dist(data_train_score,qcut=5,color='green')
[88]:
No. | Score Range | #Total | #Bad | #Good | %Total | %Bad | %Good | %BadRate | %BadRate Random | %CumBad | %CumTotal | Lift | Lift. | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | (-inf, 567.0] | 4526 | 2419 | 2107 | 20.12% | 49.01% | 12.00% | 53.45% | 21.94% | 53.45% | 20.12% | 2.660000 | 2.660000 |
1 | 2 | (567.0, 624.0] | 4578 | 1033 | 3545 | 20.35% | 20.93% | 20.18% | 22.56% | 21.94% | 37.92% | 40.46% | 0.940000 | 0.940000 |
2 | 3 | (624.0, 642.0] | 4438 | 662 | 3776 | 19.72% | 13.41% | 21.50% | 14.92% | 21.94% | 30.38% | 60.19% | 0.500000 | 0.500000 |
3 | 4 | (642.0, 666.0] | 4580 | 502 | 4078 | 20.36% | 10.17% | 23.22% | 10.96% | 21.94% | 25.47% | 80.54% | 0.320000 | 0.320000 |
4 | 5 | (666.0, inf] | 4378 | 320 | 4058 | 19.46% | 6.48% | 23.10% | 7.31% | 21.94% | 21.94% | 100.00% | 0.220000 | 0.220000 |
[89]:
# 查看训练集评分卡分数分布 和 提升度,分为5组,查看提升度
view_score_dist(data_train_score,qcut=5,color='#02B057')
[89]:
No. | Score Range | #Total | #Bad | #Good | %Total | %Bad | %Good | %BadRate | %BadRate Random | %CumBad | %CumTotal | Lift | Lift. | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | (-inf, 567.0] | 4526 | 2419 | 2107 | 20.12% | 49.01% | 12.00% | 53.45% | 21.94% | 53.45% | 20.12% | 2.660000 | 2.660000 |
1 | 2 | (567.0, 624.0] | 4578 | 1033 | 3545 | 20.35% | 20.93% | 20.18% | 22.56% | 21.94% | 37.92% | 40.46% | 0.940000 | 0.940000 |
2 | 3 | (624.0, 642.0] | 4438 | 662 | 3776 | 19.72% | 13.41% | 21.50% | 14.92% | 21.94% | 30.38% | 60.19% | 0.500000 | 0.500000 |
3 | 4 | (642.0, 666.0] | 4580 | 502 | 4078 | 20.36% | 10.17% | 23.22% | 10.96% | 21.94% | 25.47% | 80.54% | 0.320000 | 0.320000 |
4 | 5 | (666.0, inf] | 4378 | 320 | 4058 | 19.46% | 6.48% | 23.10% | 7.31% | 21.94% | 21.94% | 100.00% | 0.220000 | 0.220000 |
[90]:
# 查看训练集评分卡分数分布 和 提升度, 标题显示为中文
score_dist(data_train_score,language='cn')
[90]:
序号 | 分数区间 | #合计 | #坏 | #好 | %合计 | %坏 | %好 | %坏件率 | %随机坏件率 | %累计坏 | %累计合计 | 提升度 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | (-inf, 503.9] | 2250 | 1556 | 694 | 10.00% | 31.52% | 3.95% | 69.16% | 21.94% | 69.16% | 10.00% | 6.92 |
1 | 2 | (503.9, 567.0] | 2276 | 863 | 1413 | 10.12% | 17.48% | 8.04% | 37.92% | 21.94% | 53.45% | 20.12% | 2.66 |
2 | 3 | (567.0, 610.0] | 2346 | 600 | 1746 | 10.43% | 12.16% | 9.94% | 25.58% | 21.94% | 43.93% | 30.54% | 1.44 |
3 | 4 | (610.0, 624.0] | 2232 | 433 | 1799 | 9.92% | 8.77% | 10.24% | 19.40% | 21.94% | 37.92% | 40.46% | 0.94 |
4 | 5 | (624.0, 635.0] | 2615 | 395 | 2220 | 11.62% | 8.00% | 12.64% | 15.11% | 21.94% | 32.83% | 52.08% | 0.63 |
5 | 6 | (635.0, 642.0] | 1823 | 267 | 1556 | 8.10% | 5.41% | 8.86% | 14.65% | 21.94% | 30.38% | 60.19% | 0.50 |
6 | 7 | (642.0, 653.0] | 2216 | 292 | 1924 | 9.85% | 5.92% | 10.95% | 13.18% | 21.94% | 27.96% | 70.04% | 0.40 |
7 | 8 | (653.0, 666.0] | 2364 | 210 | 2154 | 10.51% | 4.25% | 12.26% | 8.88% | 21.94% | 25.47% | 80.54% | 0.32 |
8 | 9 | (666.0, 686.0] | 2748 | 231 | 2517 | 12.21% | 4.68% | 14.33% | 8.41% | 21.94% | 23.22% | 92.76% | 0.25 |
9 | 10 | (686.0, inf] | 1630 | 89 | 1541 | 7.24% | 1.80% | 8.77% | 5.46% | 21.94% | 21.94% | 100.00% | 0.22 |
[91]:
# 绘制提升度图
plot_lift(data_train_score)
[92]:
# 绘制提升度图,并输出提升度表格
plot_lift(data_train_score,return_data=True)
[92]:
No. | Score Range | #Total | #Bad | #Good | %Total | %Bad | %Good | %BadRate | %BadRate Random | %CumBad | %CumTotal | Lift | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | (-inf, 503.9] | 2250 | 1556 | 694 | 10.00% | 31.52% | 3.95% | 69.16% | 21.94% | 69.16% | 10.00% | 6.92 |
1 | 2 | (503.9, 567.0] | 2276 | 863 | 1413 | 10.12% | 17.48% | 8.04% | 37.92% | 21.94% | 53.45% | 20.12% | 2.66 |
2 | 3 | (567.0, 610.0] | 2346 | 600 | 1746 | 10.43% | 12.16% | 9.94% | 25.58% | 21.94% | 43.93% | 30.54% | 1.44 |
3 | 4 | (610.0, 624.0] | 2232 | 433 | 1799 | 9.92% | 8.77% | 10.24% | 19.40% | 21.94% | 37.92% | 40.46% | 0.94 |
4 | 5 | (624.0, 635.0] | 2615 | 395 | 2220 | 11.62% | 8.00% | 12.64% | 15.11% | 21.94% | 32.83% | 52.08% | 0.63 |
5 | 6 | (635.0, 642.0] | 1823 | 267 | 1556 | 8.10% | 5.41% | 8.86% | 14.65% | 21.94% | 30.38% | 60.19% | 0.50 |
6 | 7 | (642.0, 653.0] | 2216 | 292 | 1924 | 9.85% | 5.92% | 10.95% | 13.18% | 21.94% | 27.96% | 70.04% | 0.40 |
7 | 8 | (653.0, 666.0] | 2364 | 210 | 2154 | 10.51% | 4.25% | 12.26% | 8.88% | 21.94% | 25.47% | 80.54% | 0.32 |
8 | 9 | (666.0, 686.0] | 2748 | 231 | 2517 | 12.21% | 4.68% | 14.33% | 8.41% | 21.94% | 23.22% | 92.76% | 0.25 |
9 | 10 | (686.0, inf] | 1630 | 89 | 1541 | 7.24% | 1.80% | 8.77% | 5.46% | 21.94% | 21.94% | 100.00% | 0.22 |
[93]:
# 查看训练集分数分布
import matplotlib.pyplot as plt
plt.style.use('default')
fig=plt.figure()
ax1=fig.add_subplot(1,1,1)
ax1.hist(data_train_score['Score'] )
plt.show()
模型稳定型 PSI计算
[94]:
data_train_score = get_predict_score(data_train,scorecard)
data_test_score = get_predict_score(data_test,scorecard)
# 按照等频分箱,分为10组,计算模型得分 Score 的PSI
get_psi(data_train_score,data_test_score)
[94]:
No. | Name | Bins Range | #Total | #Actual | #Expected | %Total | %Actual | %Expected | PSI | Total PSI | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | Score | (-inf, 503.9] | 3018 | 2250 | 768 | 10.06% | 10.00% | 10.24% | 0.00 | 0.07 |
1 | 2 | Score | (503.9, 567.0] | 3026 | 2276 | 750 | 10.09% | 10.12% | 10.00% | 0.00 | 0.07 |
2 | 3 | Score | (567.0, 610.0] | 3018 | 2346 | 672 | 10.06% | 10.43% | 8.96% | 0.00 | 0.07 |
3 | 4 | Score | (610.0, 624.0] | 2945 | 2232 | 713 | 9.82% | 9.92% | 9.51% | 0.00 | 0.07 |
4 | 5 | Score | (624.0, 635.0] | 3065 | 2615 | 450 | 10.22% | 11.62% | 6.00% | 0.04 | 0.07 |
5 | 6 | Score | (635.0, 642.0] | 2706 | 1823 | 883 | 9.02% | 8.10% | 11.77% | 0.01 | 0.07 |
6 | 7 | Score | (642.0, 653.0] | 3054 | 2216 | 838 | 10.18% | 9.85% | 11.17% | 0.00 | 0.07 |
7 | 8 | Score | (653.0, 666.0] | 3151 | 2364 | 787 | 10.50% | 10.51% | 10.49% | 0.00 | 0.07 |
8 | 9 | Score | (666.0, 686.0] | 3555 | 2748 | 807 | 11.85% | 12.21% | 10.76% | 0.00 | 0.07 |
9 | 10 | Score | (686.0, inf] | 2462 | 1630 | 832 | 8.21% | 7.24% | 11.09% | 0.02 | 0.07 |
[95]:
# 按照等频分箱,分为10组,计算模型得分 Score 的PSI
view_psi(data_train_score,data_test_score)
[95]:
No. | Name | Bins Range | #Total | #Actual | #Expected | %Total | %Actual | %Expected | PSI | Total PSI | PSI. | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | Score | (-inf, 503.9] | 3018 | 2250 | 768 | 10.06% | 10.00% | 10.24% | 0.000000 | 0.070000 | 0.000000 |
1 | 2 | Score | (503.9, 567.0] | 3026 | 2276 | 750 | 10.09% | 10.12% | 10.00% | 0.000000 | 0.070000 | 0.000000 |
2 | 3 | Score | (567.0, 610.0] | 3018 | 2346 | 672 | 10.06% | 10.43% | 8.96% | 0.000000 | 0.070000 | 0.000000 |
3 | 4 | Score | (610.0, 624.0] | 2945 | 2232 | 713 | 9.82% | 9.92% | 9.51% | 0.000000 | 0.070000 | 0.000000 |
4 | 5 | Score | (624.0, 635.0] | 3065 | 2615 | 450 | 10.22% | 11.62% | 6.00% | 0.040000 | 0.070000 | 0.040000 |
5 | 6 | Score | (635.0, 642.0] | 2706 | 1823 | 883 | 9.02% | 8.10% | 11.77% | 0.010000 | 0.070000 | 0.010000 |
6 | 7 | Score | (642.0, 653.0] | 3054 | 2216 | 838 | 10.18% | 9.85% | 11.17% | 0.000000 | 0.070000 | 0.000000 |
7 | 8 | Score | (653.0, 666.0] | 3151 | 2364 | 787 | 10.50% | 10.51% | 10.49% | 0.000000 | 0.070000 | 0.000000 |
8 | 9 | Score | (666.0, 686.0] | 3555 | 2748 | 807 | 11.85% | 12.21% | 10.76% | 0.000000 | 0.070000 | 0.000000 |
9 | 10 | Score | (686.0, inf] | 2462 | 1630 | 832 | 8.21% | 7.24% | 11.09% | 0.020000 | 0.070000 | 0.020000 |
[96]:
# 按照等频分箱,分为10组,计算模型得分 Score 的PSI
view_psi(data_train_score,data_test_score,color='green')
[96]:
No. | Name | Bins Range | #Total | #Actual | #Expected | %Total | %Actual | %Expected | PSI | Total PSI | PSI. | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | Score | (-inf, 503.9] | 3018 | 2250 | 768 | 10.06% | 10.00% | 10.24% | 0.000000 | 0.070000 | 0.000000 |
1 | 2 | Score | (503.9, 567.0] | 3026 | 2276 | 750 | 10.09% | 10.12% | 10.00% | 0.000000 | 0.070000 | 0.000000 |
2 | 3 | Score | (567.0, 610.0] | 3018 | 2346 | 672 | 10.06% | 10.43% | 8.96% | 0.000000 | 0.070000 | 0.000000 |
3 | 4 | Score | (610.0, 624.0] | 2945 | 2232 | 713 | 9.82% | 9.92% | 9.51% | 0.000000 | 0.070000 | 0.000000 |
4 | 5 | Score | (624.0, 635.0] | 3065 | 2615 | 450 | 10.22% | 11.62% | 6.00% | 0.040000 | 0.070000 | 0.040000 |
5 | 6 | Score | (635.0, 642.0] | 2706 | 1823 | 883 | 9.02% | 8.10% | 11.77% | 0.010000 | 0.070000 | 0.010000 |
6 | 7 | Score | (642.0, 653.0] | 3054 | 2216 | 838 | 10.18% | 9.85% | 11.17% | 0.000000 | 0.070000 | 0.000000 |
7 | 8 | Score | (653.0, 666.0] | 3151 | 2364 | 787 | 10.50% | 10.51% | 10.49% | 0.000000 | 0.070000 | 0.000000 |
8 | 9 | Score | (666.0, 686.0] | 3555 | 2748 | 807 | 11.85% | 12.21% | 10.76% | 0.000000 | 0.070000 | 0.000000 |
9 | 10 | Score | (686.0, inf] | 2462 | 1630 | 832 | 8.21% | 7.24% | 11.09% | 0.020000 | 0.070000 | 0.020000 |
[97]:
# 按照等频分箱,分为10组,计算模型得分 Score 的PSI
view_psi(data_train_score,data_test_score,color='#02B057')
[97]:
No. | Name | Bins Range | #Total | #Actual | #Expected | %Total | %Actual | %Expected | PSI | Total PSI | PSI. | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | Score | (-inf, 503.9] | 3018 | 2250 | 768 | 10.06% | 10.00% | 10.24% | 0.000000 | 0.070000 | 0.000000 |
1 | 2 | Score | (503.9, 567.0] | 3026 | 2276 | 750 | 10.09% | 10.12% | 10.00% | 0.000000 | 0.070000 | 0.000000 |
2 | 3 | Score | (567.0, 610.0] | 3018 | 2346 | 672 | 10.06% | 10.43% | 8.96% | 0.000000 | 0.070000 | 0.000000 |
3 | 4 | Score | (610.0, 624.0] | 2945 | 2232 | 713 | 9.82% | 9.92% | 9.51% | 0.000000 | 0.070000 | 0.000000 |
4 | 5 | Score | (624.0, 635.0] | 3065 | 2615 | 450 | 10.22% | 11.62% | 6.00% | 0.040000 | 0.070000 | 0.040000 |
5 | 6 | Score | (635.0, 642.0] | 2706 | 1823 | 883 | 9.02% | 8.10% | 11.77% | 0.010000 | 0.070000 | 0.010000 |
6 | 7 | Score | (642.0, 653.0] | 3054 | 2216 | 838 | 10.18% | 9.85% | 11.17% | 0.000000 | 0.070000 | 0.000000 |
7 | 8 | Score | (653.0, 666.0] | 3151 | 2364 | 787 | 10.50% | 10.51% | 10.49% | 0.000000 | 0.070000 | 0.000000 |
8 | 9 | Score | (666.0, 686.0] | 3555 | 2748 | 807 | 11.85% | 12.21% | 10.76% | 0.000000 | 0.070000 | 0.000000 |
9 | 10 | Score | (686.0, inf] | 2462 | 1630 | 832 | 8.21% | 7.24% | 11.09% | 0.020000 | 0.070000 | 0.020000 |
[98]:
data_train_score = get_predict_score(data_train,scorecard)
data_test_score = get_predict_score(data_test,scorecard)
# 按照等频分箱,分为5组,计算PSI
get_psi(data_train_score,data_test_score,col='LIMIT_BAL',qcut=5,precision=4)
[98]:
No. | Name | Bins Range | #Total | #Actual | #Expected | %Total | %Actual | %Expected | PSI | Total PSI | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | LIMIT_BAL | (-inf, 50000.0] | 7676 | 5855 | 1821 | 25.5867% | 26.0222% | 24.2800% | 0.00 | 0.00 |
1 | 2 | LIMIT_BAL | (50000.0, 100000.0] | 4822 | 3593 | 1229 | 16.0733% | 15.9689% | 16.3867% | 0.00 | 0.00 |
2 | 3 | LIMIT_BAL | (100000.0, 180000.0] | 6123 | 4548 | 1575 | 20.4100% | 20.2133% | 21.0000% | 0.00 | 0.00 |
3 | 4 | LIMIT_BAL | (180000.0, 270000.0] | 5421 | 4035 | 1386 | 18.0700% | 17.9333% | 18.4800% | 0.00 | 0.00 |
4 | 5 | LIMIT_BAL | (270000.0, inf] | 5958 | 4469 | 1489 | 19.8600% | 19.8622% | 19.8533% | 0.00 | 0.00 |
[99]:
data_train_score = get_predict_score(data_train,scorecard)
data_test_score = get_predict_score(data_test,scorecard)
# 按照指定的切分点,计算PSI
get_psi(data_train_score,data_test_score,col='LIMIT_BAL',bins=[-inf,30000,50000,100000,inf],precision=4)
[99]:
No. | Name | Bins Range | #Total | #Actual | #Expected | %Total | %Actual | %Expected | PSI | Total PSI | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | LIMIT_BAL | (-inf, 30000.0] | 4111 | 3109 | 1002 | 13.7033% | 13.8178% | 13.3600% | 0.00 | 0.00 |
1 | 2 | LIMIT_BAL | (30000.0, 50000.0] | 3649 | 2746 | 903 | 12.1633% | 12.2044% | 12.0400% | 0.00 | 0.00 |
2 | 3 | LIMIT_BAL | (50000.0, 100000.0] | 4794 | 3593 | 1201 | 15.9800% | 15.9689% | 16.0133% | 0.00 | 0.00 |
3 | 4 | LIMIT_BAL | (100000.0, inf] | 17446 | 13052 | 4394 | 58.1533% | 58.0089% | 58.5867% | 0.00 | 0.00 |
[100]:
data_train_score = get_predict_score(data_train,scorecard)
data_test_score = get_predict_score(data_test,scorecard)
# 按照指定的切分点,计算PSI,标题显示为中文
get_psi(data_train_score,data_test_score,col='LIMIT_BAL',bins=[-inf,30000,50000,100000,inf],precision=4,language='cn')
[100]:
序号 | 名称 | 分组 | #合计 | #实际 | #期望 | %合计 | %实际 | %期望 | PSI | PSI 合计 | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | LIMIT_BAL | (-inf, 30000.0] | 4111 | 3109 | 1002 | 13.7033% | 13.8178% | 13.3600% | 0.00 | 0.00 |
1 | 2 | LIMIT_BAL | (30000.0, 50000.0] | 3649 | 2746 | 903 | 12.1633% | 12.2044% | 12.0400% | 0.00 | 0.00 |
2 | 3 | LIMIT_BAL | (50000.0, 100000.0] | 4794 | 3593 | 1201 | 15.9800% | 15.9689% | 16.0133% | 0.00 | 0.00 |
3 | 4 | LIMIT_BAL | (100000.0, inf] | 17446 | 13052 | 4394 | 58.1533% | 58.0089% | 58.5867% | 0.00 | 0.00 |
[101]:
# 批量计算所有特征的PSI
data_train_score = get_predict_score(data_train,scorecard)
data_test_score = get_predict_score(data_test,scorecard)
get_data_psi(data_train_score,data_test_score,precision=4)
[101]:
Name | PSI | |
---|---|---|
16 | Score | 0.07 |
8 | PAY_AMT3 | 0.00 |
4 | LIMIT_BAL | 0.00 |
17 | Proba | 0.00 |
6 | PAY_AMT2 | 0.00 |
14 | PAY_AMT6 | 0.00 |
10 | PAY_AMT4 | 0.00 |
12 | PAY_AMT5 | 0.00 |
2 | PAY_AMT1 | 0.00 |
7 | PAY_AMT2_Score | 0.00 |
13 | PAY_AMT5_Score | 0.00 |
0 | PAY_0 | 0.00 |
1 | PAY_0_Score | 0.00 |
5 | LIMIT_BAL_Score | 0.00 |
9 | PAY_AMT3_Score | 0.00 |
15 | PAY_AMT6_Score | 0.00 |
11 | PAY_AMT4_Score | 0.00 |
3 | PAY_AMT1_Score | 0.00 |
模型应用分析
决策树分析
[102]:
plot_tree(data_train_score[['Score','y']],max_depth=3,criterion='gini')
[102]:
[103]:
# 绘制决策树,并保存图片到本地
# plot_tree(data_train_score[['Score','y']],max_depth=3,criterion='gini',out_file='test.svg')
[103]:
0
[104]:
# 绘制决策树,并保存图片到本地
# plot_tree(data_train_score[['Score','y']],max_depth=3,criterion='gini',out_file='test.pdf')
[104]:
0