|
@@ -0,0 +1,229 @@
|
|
|
+import pymysql
|
|
|
+import pandas as pd
|
|
|
+import numpy as np
|
|
|
+from datetime import datetime,timedelta,timezone
|
|
|
+pd.set_option('display.max_columns', None)
|
|
|
+pd.set_option('expand_frame_repr', False)
|
|
|
+import warnings
|
|
|
+from typing import Literal
|
|
|
+
|
|
|
+warnings.filterwarnings('ignore')
|
|
|
+
|
|
|
+class AdjustB:
|
|
|
+ def __init__(self,campaign_id,time_period:Literal["1week","2weeks","1month","2months","45days"]):
|
|
|
+ self.campaign_id = campaign_id
|
|
|
+ self.time_period = time_period
|
|
|
+
|
|
|
+ def datatable_connect01(self):
|
|
|
+ conn = pymysql.connect(user="admin",
|
|
|
+ password="NSYbBSPbkGQUbOSNOeyy",
|
|
|
+ host="retail-data.cnrgrbcygoap.us-east-1.rds.amazonaws.com",
|
|
|
+ database="amzn_retail_ad",
|
|
|
+ port=3306)
|
|
|
+ return conn
|
|
|
+
|
|
|
+ def datatable_connect02(self):
|
|
|
+ conn = pymysql.connect(user="root",
|
|
|
+ password="sandbox",
|
|
|
+ host="192.168.1.225",
|
|
|
+ database="amzn_retail_ad",
|
|
|
+ port=3306)
|
|
|
+ return conn
|
|
|
+
|
|
|
+ def datatable_connect03(self):
|
|
|
+ conn = pymysql.connect(user="admin",
|
|
|
+ password="pvmBNS8q3duiUvvp",
|
|
|
+ host="amzn-retail.cluster-cnrgrbcygoap.us-east-1.rds.amazonaws.com",
|
|
|
+ database="zosi_ad_marketing_stream",
|
|
|
+ port=3306)
|
|
|
+ return conn
|
|
|
+
|
|
|
+ # datatable_connect()
|
|
|
+ def get_sp_conversion(self):
|
|
|
+ conn = self.datatable_connect03()
|
|
|
+ cursor = conn.cursor()
|
|
|
+ sql = "select * from zosi_ad_marketing_stream.sp_conversion_raw"
|
|
|
+ sql = sql + self.add_condition(isbudgetTable=False)
|
|
|
+ cursor.execute(sql)
|
|
|
+ columns_name = [i[0] for i in cursor.description]
|
|
|
+ rel = cursor.fetchall()
|
|
|
+ df = pd.DataFrame(rel, columns=columns_name)
|
|
|
+ df = df.groupby('idempotency_id').head(1)
|
|
|
+ # print(df)
|
|
|
+ # df.to_excel("ttt111.xlsx")
|
|
|
+ return df
|
|
|
+
|
|
|
+ def get_sp_traffic(self):
|
|
|
+ conn = self.datatable_connect03()
|
|
|
+ cursor = conn.cursor()
|
|
|
+ sql = "select * from zosi_ad_marketing_stream.sp_traffic_raw"
|
|
|
+ sql = sql + self.add_condition(isbudgetTable=False)
|
|
|
+ print(sql)
|
|
|
+ cursor.execute(sql)
|
|
|
+ columns_name = [i[0] for i in cursor.description]
|
|
|
+ rel = cursor.fetchall()
|
|
|
+ df = pd.DataFrame(rel, columns=columns_name)
|
|
|
+ df = df.groupby('idempotency_id').head(1)
|
|
|
+ return df
|
|
|
+
|
|
|
+ def get_sp_budgetug(self):
|
|
|
+ conn = self.datatable_connect03()
|
|
|
+ cursor = conn.cursor()
|
|
|
+ sql = "select * from zosi_ad_marketing_stream.sp_budget_usage"
|
|
|
+ sql = sql+self.add_condition(isbudgetTable=True)
|
|
|
+ cursor.execute(sql)
|
|
|
+ columns_name = [i[0] for i in cursor.description]
|
|
|
+ rel = cursor.fetchall()
|
|
|
+ df = pd.DataFrame(rel, columns=columns_name)
|
|
|
+ return df
|
|
|
+
|
|
|
+ def add_condition(self,isbudgetTable=False):
|
|
|
+ if self.time_period =='1week':
|
|
|
+ time_ = datetime.today().date() + timedelta(days=-7)
|
|
|
+ elif self.time_period =='2weeks':
|
|
|
+ time_ = datetime.today().date() + timedelta(days=-14)
|
|
|
+ elif self.time_period =='month':
|
|
|
+ time_ = datetime.today().date() + timedelta(days=-30)
|
|
|
+ elif self.time_period =='45days':
|
|
|
+ time_ = datetime.today().date() + timedelta(days=-45)
|
|
|
+ elif self.time_period == '2months':
|
|
|
+ time_ = datetime.today().date() + timedelta(days=-60)
|
|
|
+ # usage_updated_timestamp
|
|
|
+ if isbudgetTable:
|
|
|
+ return f" where usage_updated_timestamp>='{time_}' and budget_scope_id='{self.campaign_id}'"
|
|
|
+ return f" where time_window_start>='{time_}' and campaign_id='{self.campaign_id}'"
|
|
|
+
|
|
|
+ def merge_common_operation(self):
|
|
|
+ conversion = self.get_sp_conversion()
|
|
|
+ conversion_ = conversion.groupby(
|
|
|
+ ['advertiser_id', 'marketplace_id', 'time_window_start', 'campaign_id', 'ad_group_id', 'ad_id',
|
|
|
+ 'keyword_id', 'placement', 'currency']).agg({
|
|
|
+ 'attributed_sales_1d': sum,
|
|
|
+ 'attributed_sales_1d_same_sku': sum,
|
|
|
+ 'attributed_sales_7d': sum,
|
|
|
+ 'attributed_sales_7d_same_sku': sum,
|
|
|
+ 'attributed_sales_14d': sum,
|
|
|
+ 'attributed_sales_14d_same_sku': sum,
|
|
|
+ 'attributed_sales_30d': sum,
|
|
|
+ 'attributed_sales_30d_same_sku': sum,
|
|
|
+ 'attributed_conversions_1d': sum,
|
|
|
+ 'attributed_conversions_1d_same_sku': sum,
|
|
|
+ 'attributed_conversions_7d': sum,
|
|
|
+ 'attributed_conversions_14d_same_sku': sum,
|
|
|
+ 'attributed_conversions_30d': sum,
|
|
|
+ 'attributed_conversions_30d_same_sku': sum,
|
|
|
+ 'attributed_units_ordered_1d': sum,
|
|
|
+ 'attributed_units_ordered_1d_same_sku': sum,
|
|
|
+ 'attributed_units_ordered_7d': sum,
|
|
|
+ 'attributed_units_ordered_7d_same_sku': sum,
|
|
|
+ 'attributed_units_ordered_14d': sum,
|
|
|
+ 'attributed_units_ordered_14d_same_sku': sum,
|
|
|
+ 'attributed_units_ordered_30d': sum,
|
|
|
+ 'attributed_units_ordered_30d_same_sku': sum
|
|
|
+ }).reset_index()
|
|
|
+ traffic = self.get_sp_traffic()
|
|
|
+ traffic[['impressions', 'clicks']] = traffic[['impressions', 'clicks']].astype('int64')
|
|
|
+ traffic['cost'] = traffic['cost'].astype('float64')
|
|
|
+
|
|
|
+ traffic_ = traffic.groupby(
|
|
|
+ ['advertiser_id', 'marketplace_id', 'time_window_start', 'campaign_id', 'ad_group_id', 'ad_id',
|
|
|
+ 'keyword_id', 'keyword_text', 'placement', 'match_type', 'currency'
|
|
|
+ ]).agg({'impressions': sum,
|
|
|
+ 'clicks': sum,
|
|
|
+ 'cost': sum
|
|
|
+ }).reset_index()
|
|
|
+ traffic_conversion = traffic_.merge(conversion_,
|
|
|
+ on=['advertiser_id', 'marketplace_id', 'campaign_id', 'ad_group_id',
|
|
|
+ 'ad_id', 'keyword_id', 'placement', 'time_window_start', 'currency'],
|
|
|
+ how='inner')
|
|
|
+ if len(traffic_conversion) < 1:
|
|
|
+ return pd.DataFrame()
|
|
|
+ traffic_conversion['hour'] = traffic_conversion['time_window_start'].dt.hour
|
|
|
+ traffic_conversion['day'] = traffic_conversion['time_window_start'].dt.dayofweek
|
|
|
+ traffic_conversion = traffic_conversion.groupby(
|
|
|
+ ['campaign_id', 'ad_group_id', 'keyword_id', 'hour']).sum().reset_index()
|
|
|
+ traffic_conversion['cpc'] = traffic_conversion['cost'] / traffic_conversion['clicks']
|
|
|
+ # traffic_conversion['cpc'].fillna(0,inplace=True)
|
|
|
+ # traffic_conversion['cpc'] = traffic_conversion['cpc'].replace([np.inf,np.nan,pd.NA],0)
|
|
|
+ return traffic_conversion
|
|
|
+
|
|
|
+ def func_rule(self,traffic_conversion):
|
|
|
+ pro_list = traffic_conversion.groupby(['campaign_id', 'ad_group_id', 'keyword_id']).head(1)[
|
|
|
+ ['campaign_id', 'ad_group_id', 'keyword_id']].to_numpy().tolist()
|
|
|
+ for i in pro_list:
|
|
|
+ cam_, adg, kid = i[0], i[1], i[2]
|
|
|
+ df0 = traffic_conversion.query("campaign_id==@cam_ and ad_group_id==@adg and keyword_id==@kid")
|
|
|
+ for hour in range(24):
|
|
|
+ if hour not in df0['hour'].tolist():
|
|
|
+ traffic_conversion = traffic_conversion.append(
|
|
|
+ {'campaign_id': cam_, 'ad_group_id': adg, 'keyword_id': kid, 'hour': hour},
|
|
|
+ ignore_index=True)
|
|
|
+ traffic_conversion['cpc_min'] = traffic_conversion.groupby(['campaign_id', 'ad_group_id', 'keyword_id'])[
|
|
|
+ 'cpc'].transform('min')
|
|
|
+
|
|
|
+ traffic_conversion = traffic_conversion.sort_values(by=['campaign_id', 'ad_group_id', 'keyword_id', 'hour'])
|
|
|
+
|
|
|
+ # 给当前没有竞价信息的赋予竞价,为该关键词最小竞价的45%
|
|
|
+ traffic_conversion['cpc'] = traffic_conversion.apply(
|
|
|
+ lambda x: x['cpc_min'] * 0.45 if pd.isna(x['cpc']) or x['cpc'] is None else x['cpc'], axis=1)
|
|
|
+ # total_spend = traffic_conversion['cpc'].sum()
|
|
|
+ # 根据小时对竞价、转化、点击汇总
|
|
|
+ tf_c = traffic_conversion.groupby(['hour']).agg(
|
|
|
+ {'cpc': sum, 'attributed_conversions_1d': sum, 'clicks': sum}).reset_index()
|
|
|
+ # 根据以下公式,突出转化高与竞价低的重要性
|
|
|
+ tf_c['pre_percent'] = tf_c.apply(
|
|
|
+ lambda x: (x['attributed_conversions_1d'] ** 3 - (x['clicks'] - x['attributed_conversions_1d']) ** 3) / x[
|
|
|
+ 'cpc'] ** 3 + 1.001, axis=1)
|
|
|
+ tf_c['pre_percent'] = tf_c['pre_percent'].map(lambda x: np.sqrt(x)) # 避免各时间之间差距太大,进行开根处理
|
|
|
+ # 对无效数值/空值 赋值1.0001
|
|
|
+ tf_c['pre_percent'] = tf_c['pre_percent'].map(lambda x: 1.0001 if pd.isna(x) or x is None else x)
|
|
|
+ # 对23-5点的权重值降低至60%
|
|
|
+ tf_c['pre_percent_s2'] = tf_c.apply(
|
|
|
+ lambda x: x['pre_percent'] * 0.6 if x['hour'] < 6 or x['hour'] > 22 else x['pre_percent'], axis=1)
|
|
|
+ total_val = tf_c['pre_percent_s2'].sum()
|
|
|
+ # print(total_val)
|
|
|
+ # 计算各小时权重(初次分配权重,后续修正)
|
|
|
+ tf_c['pre_percent_s2'] = tf_c['pre_percent_s2'] / total_val
|
|
|
+ # 对分配过度不均衡进行调整,对超过分配的25%部分只给予25%的权重百分比
|
|
|
+ tf_c['pre_percent_s3'] = tf_c['pre_percent_s2'].map(lambda x: 0.25 if x > 0.25 else x)
|
|
|
+ tf_c['temp'] = tf_c['pre_percent_s2'] - tf_c['pre_percent_s3']
|
|
|
+ total_allocate = tf_c['temp'].sum()
|
|
|
+ allocate_count = tf_c['temp'].tolist().count(0.25)
|
|
|
+ allocate_val = total_allocate / allocate_count if allocate_count != 0 else 0
|
|
|
+ # 将超过25%的权重分配到其余小时区间内
|
|
|
+ tf_c['pre_percent_s3'] = tf_c['pre_percent_s3'].map(lambda x: x + allocate_val if x != 0.25 else 0.25)
|
|
|
+ return tf_c[['hour','pre_percent_s3']]
|
|
|
+
|
|
|
+ def merge_conv_traf(self): # 总结过去每天的数据,对单天预算分配
|
|
|
+ traffic_conversion = self.merge_common_operation()
|
|
|
+ traffic_conversion = self.func_rule(traffic_conversion)
|
|
|
+ traffic_conversion.columns = ['hour','SingleDay']
|
|
|
+ return traffic_conversion
|
|
|
+
|
|
|
+ def merge_cvtf_budt_accdday(self): # 总结过去每个不同工作日的数据,对每周每天预算都进行不同分配
|
|
|
+ traffic_conversion = self.merge_common_operation()
|
|
|
+ # TODO 单独筛选周一至周日每天的traffic,再进行后续步骤
|
|
|
+ Monday_df = self.func_rule(traffic_conversion[traffic_conversion['day']==0])
|
|
|
+ Tuesday_df = self.func_rule(traffic_conversion[traffic_conversion['day']==1])
|
|
|
+ Wednesday_df = self.func_rule(traffic_conversion[traffic_conversion['day']==2])
|
|
|
+ Thursday_df = self.func_rule(traffic_conversion[traffic_conversion['day']==3])
|
|
|
+ Friday_df = self.func_rule(traffic_conversion[traffic_conversion['day']==4])
|
|
|
+ Saturday_df = self.func_rule(traffic_conversion[traffic_conversion['day']==5])
|
|
|
+ Sunday_df = self.func_rule(traffic_conversion[traffic_conversion['day']==6])
|
|
|
+
|
|
|
+ weeksummary_percent = pd.merge(Monday_df,Tuesday_df,how='inner',on='hour')
|
|
|
+ weeksummary_percent = weeksummary_percent.merge(Wednesday_df,how='inner',on='hour')
|
|
|
+ weeksummary_percent = weeksummary_percent.merge(Thursday_df,how='inner',on='hour')
|
|
|
+ weeksummary_percent = weeksummary_percent.merge(Friday_df,how='inner',on='hour')
|
|
|
+ weeksummary_percent = weeksummary_percent.merge(Saturday_df,how='inner',on='hour')
|
|
|
+ weeksummary_percent = weeksummary_percent.merge(Sunday_df,how='inner',on='hour')
|
|
|
+ weeksummary_percent.columns = ["hour",'Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
|
|
|
+ # weeksummary_percent.to_excel("S111.xlsx")
|
|
|
+ return weeksummary_percent
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ adjust_ = AdjustB(campaign_id='281441197839505',time_period='45days')
|
|
|
+ rel = adjust_.merge_conv_traf()
|
|
|
+ print(rel)
|