sales_data = sales_data[sales_data['customer_id_no'] != -1]Before continuing decide if there are any other levels on which you wish to filter the data – maybe by a particular product line or whether the customer is an individual or another business. Group your data by the customer ID and follow the format below to create your RFM table.
NOW = dt.datetime(today’s year, month, date) rfm_table = sales_data.groupby('customer_id_no').agg({'date': lambda x: (NOW - x.max()).days, 'visit_freq': lambda x: x.max(), 'gross_sales': lambda x: x.sum()}) rfm_table['date'] = rfm_table['date'].astype(int) rfm_table.rename(columns={'date': 'recency', 'visit_freq': 'frequency', 'gross_sales': 'monetary_value'}, inplace=True)You can create a number of groups for segmenting the customers. Typically four groups is sufficient.
quantiles = rfm_table.quantile(q=[0.25,0.5,0.75]) quantiles = quantiles.to_dict()Determine the score
segmented_rfm = rfm_table def RScore(x,p,d): if x <= d[p][0.25]: return 1 elif x <= d[p][0.50]: return 2 elif x <= d[p][0.75]: return 3 else: return 4 def FMScore(x,p,d): if x <= d[p][0.25]: return 4 elif x <= d[p][0.50]: return 3 elif x <= d[p][0.75]: return 2 else: return 1 # Adding quartile scores and computing the overall RFM Score segmented_rfm['r_quartile'] = segmented_rfm['recency'].apply(RScore, args=('recency',quantiles,)) segmented_rfm['f_quartile'] = segmented_rfm['frequency'].apply(FMScore, args=('frequency',quantiles,)) segmented_rfm['m_quartile'] = segmented_rfm['monetary_value'].apply(FMScore, args=('monetary_value',quantiles,)) segmented_rfm['RFMScore'] = (segmented_rfm.r_quartile.map(str) + segmented_rfm.f_quartile.map(str) + segmented_rfm.m_quartile.map(str))
non_recent_frequent = segmented_rfm.sort_values('monetary_value', ascending=False) non_recent_frequent = non_recent_frequent [(non_recent_frequent ['recency'] > 60) &( non_recent_frequent ['monetary_value'] >= 500) &( non_recent_frequent ['frequency'] >= 30)]Or figure out your top customers by a specific metric, below it is sorted by monetary value.
best_customers = segmented_rfm.sort_values('monetary_value', ascending=False) best_customers = best_customers[best_customers[‘RFMScore’] = 111]