python实现概率版的用户流失率

       本文实现的是一下链接的内容,链接如下:预测用户流失
本文代码主要包含三部分:

  1. 生成模拟用户活跃档案数据
  2. 生成模拟用户连续几个月不活跃的档案数据
  3. 实现用户流失率
# -*- coding: utf-8 -*-
"""
生成用户活跃档案,输出csv
"""
import pandas as pd
import random

user_num = 5  # 要生成的用户数
df = pd.DataFrame(columns=['1月', '2月', '3月', '4月', '5月', '6月',
                           '7月', '8月', '9月', '10月', '11月', '12月'])

for i in range(user_num):
    lst = []
    for j in range(12):
        lst.append(random.randint(0, 1))
    df.loc[i, :] = lst

df.to_csv("C:\\Users\\yang\\Desktop\\data.csv",encoding='utf-8',index=False)
# -*- coding: utf-8 -*-
"""
生成不活跃用户档案
数字表示连续几个月不活跃,
    如:0是活跃,1是连续一个月不活跃,2是连续两个月不活跃
"""
import pandas as pd

file_path = "C:\\Users\\yang\\Desktop\\activity_data.csv"
file_data = pd.read_csv(file_path, encoding='utf-8')

for row in range(len(file_data)):
    for col in range(1,12): # 2月到12月
        if file_data.iloc[row, col] == 1:
            file_data.iloc[row, col] = file_data.iloc[row, col-1] + 1
file_data.to_csv("C:\\Users\\yang\\Desktop\\inactivity_data.csv", encoding='utf-8',index=False)


# -*- coding: utf-8 -*-

import pandas as pd
from collections import Counter

df = pd.read_csv("C:\\Users\\yang\\Desktop\\inactivity_data.csv")
continue_inactivity_df = pd.DataFrame(index=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
                                      columns=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
                                               'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])

for col in range(12):
    for row in Counter(df[str(col + 1) + '月']).keys():
        continue_inactivity_df.iloc[row, col] = Counter(df[str(col + 1) + '月']).get(row)
# print(continue_inactivity_df)

probability = []  # 2到12月的不连续活跃n月的用户占比,顺序存放
for row in range(2, 13):
    p = 0
    for col in range(row - 1, 12):
        p = p + continue_inactivity_df.iloc[row, col] / continue_inactivity_df.iloc[row - 1, col - 1]
    probability.append(round(p / (12 - row + 1), 4))  # p的均值
# print(probability)

# 2月到11月的流失率计算
user_churn = [] # 流失率
for i in range(len(probability)-1):
    churn = 1
    for j in range(i+1,len(probability)-1):
        churn = churn * probability[j]
    user_churn.append(round(churn,8))
print(user_churn)

猜你喜欢

转载自blog.csdn.net/qq_41228463/article/details/83789957