adjust_budget_bid.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. import pymysql
  2. import pandas as pd
  3. import numpy as np
  4. from datetime import datetime,timedelta,timezone
  5. pd.set_option('display.max_columns', None)
  6. pd.set_option('expand_frame_repr', False)
  7. import warnings
  8. from typing import Literal
  9. import json
  10. warnings.filterwarnings('ignore')
  11. class AdjustB:
  12. def __init__(self,campaign_id,time_period:Literal["1week","2weeks","4weeks","6weeks","8weeks","12weeks"]="8weeks"):
  13. self.campaign_id = campaign_id
  14. self.time_period = time_period
  15. def datatable_connect01(self):
  16. conn = pymysql.connect(user="admin",
  17. password="NSYbBSPbkGQUbOSNOeyy",
  18. host="retail-data.cnrgrbcygoap.us-east-1.rds.amazonaws.com",
  19. database="amzn_retail_ad",
  20. port=3306)
  21. return conn
  22. def datatable_connect02(self):
  23. conn = pymysql.connect(user="root",
  24. password="sandbox",
  25. host="192.168.1.225",
  26. database="amzn_retail_ad",
  27. port=3306)
  28. return conn
  29. def datatable_connect03(self):
  30. conn = pymysql.connect(user="admin",
  31. password="pvmBNS8q3duiUvvp",
  32. host="amzn-retail.cluster-cnrgrbcygoap.us-east-1.rds.amazonaws.com",
  33. database="zosi_ad_marketing_stream",
  34. port=3306)
  35. return conn
  36. # datatable_connect()
  37. def get_sp_conversion(self):
  38. conn = self.datatable_connect03()
  39. cursor = conn.cursor()
  40. sql = "select * from zosi_ad_marketing_stream.sp_conversion_raw"
  41. sql = sql + self.add_condition(isbudgetTable=False)
  42. cursor.execute(sql)
  43. columns_name = [i[0] for i in cursor.description]
  44. rel = cursor.fetchall()
  45. df = pd.DataFrame(rel, columns=columns_name)
  46. df = df.groupby('idempotency_id').head(1)
  47. # print(df)
  48. # df.to_excel("ttt111.xlsx")
  49. return df
  50. def get_sp_traffic(self):
  51. conn = self.datatable_connect03()
  52. cursor = conn.cursor()
  53. sql = "select * from zosi_ad_marketing_stream.sp_traffic_raw"
  54. sql = sql + self.add_condition(isbudgetTable=False)
  55. # print(sql)
  56. cursor.execute(sql)
  57. columns_name = [i[0] for i in cursor.description]
  58. rel = cursor.fetchall()
  59. df = pd.DataFrame(rel, columns=columns_name)
  60. df = df.groupby('idempotency_id').head(1)
  61. return df
  62. def get_sp_budgetug(self):
  63. conn = self.datatable_connect03()
  64. cursor = conn.cursor()
  65. sql = "select * from zosi_ad_marketing_stream.sp_budget_usage"
  66. sql = sql+self.add_condition(isbudgetTable=True)
  67. cursor.execute(sql)
  68. columns_name = [i[0] for i in cursor.description]
  69. rel = cursor.fetchall()
  70. df = pd.DataFrame(rel, columns=columns_name)
  71. return df
  72. def add_condition(self,isbudgetTable=False):
  73. if self.time_period =='1week':
  74. time_ = datetime.today().date() + timedelta(days=-7)
  75. elif self.time_period =='2weeks':
  76. time_ = datetime.today().date() + timedelta(days=-14)
  77. elif self.time_period =='4weeks':
  78. time_ = datetime.today().date() + timedelta(days=-28)
  79. elif self.time_period =='6weeks':
  80. time_ = datetime.today().date() + timedelta(days=-42)
  81. elif self.time_period == '8weeks':
  82. time_ = datetime.today().date() + timedelta(days=-56)
  83. elif self.time_period == '12weeks':
  84. time_ = datetime.today().date() + timedelta(days=-84)
  85. # usage_updated_timestamp
  86. if isbudgetTable:
  87. return f" where usage_updated_timestamp>='{time_}' and budget_scope_id='{self.campaign_id}'"
  88. return f" where time_window_start>='{time_}' and campaign_id='{self.campaign_id}'"
  89. def merge_common_operation(self):
  90. conversion = self.get_sp_conversion()
  91. conversion_ = conversion.groupby(
  92. ['advertiser_id', 'marketplace_id', 'time_window_start', 'campaign_id', 'ad_group_id', 'ad_id',
  93. 'keyword_id', 'placement', 'currency']).agg({
  94. 'attributed_sales_1d': sum,
  95. 'attributed_sales_1d_same_sku': sum,
  96. 'attributed_sales_7d': sum,
  97. 'attributed_sales_7d_same_sku': sum,
  98. 'attributed_sales_14d': sum,
  99. 'attributed_sales_14d_same_sku': sum,
  100. 'attributed_sales_30d': sum,
  101. 'attributed_sales_30d_same_sku': sum,
  102. 'attributed_conversions_1d': sum,
  103. 'attributed_conversions_1d_same_sku': sum,
  104. 'attributed_conversions_7d': sum,
  105. 'attributed_conversions_14d_same_sku': sum,
  106. 'attributed_conversions_30d': sum,
  107. 'attributed_conversions_30d_same_sku': sum,
  108. 'attributed_units_ordered_1d': sum,
  109. 'attributed_units_ordered_1d_same_sku': sum,
  110. 'attributed_units_ordered_7d': sum,
  111. 'attributed_units_ordered_7d_same_sku': sum,
  112. 'attributed_units_ordered_14d': sum,
  113. 'attributed_units_ordered_14d_same_sku': sum,
  114. 'attributed_units_ordered_30d': sum,
  115. 'attributed_units_ordered_30d_same_sku': sum
  116. }).reset_index()
  117. traffic = self.get_sp_traffic()
  118. traffic[['impressions', 'clicks']] = traffic[['impressions', 'clicks']].astype('int64')
  119. traffic['cost'] = traffic['cost'].astype('float64')
  120. traffic_ = traffic.groupby(
  121. ['advertiser_id', 'marketplace_id', 'time_window_start', 'campaign_id', 'ad_group_id', 'ad_id',
  122. 'keyword_id', 'keyword_text', 'placement', 'match_type', 'currency'
  123. ]).agg({'impressions': sum,
  124. 'clicks': sum,
  125. 'cost': sum
  126. }).reset_index()
  127. traffic_conversion = traffic_.merge(conversion_,
  128. on=['advertiser_id', 'marketplace_id', 'campaign_id', 'ad_group_id',
  129. 'ad_id', 'keyword_id', 'placement', 'time_window_start', 'currency'],
  130. how='inner')
  131. if len(traffic_conversion) < 1:
  132. return pd.DataFrame()
  133. traffic_conversion['hour'] = traffic_conversion['time_window_start'].dt.hour
  134. traffic_conversion['day'] = traffic_conversion['time_window_start'].dt.dayofweek
  135. traffic_conversion = traffic_conversion.groupby(
  136. ['campaign_id', 'ad_group_id', 'keyword_id', 'hour']).sum().reset_index()
  137. traffic_conversion['cpc'] = traffic_conversion['cost'] / traffic_conversion['clicks']
  138. # traffic_conversion['cpc'].fillna(0,inplace=True)
  139. # traffic_conversion['cpc'] = traffic_conversion['cpc'].replace([np.inf,np.nan,pd.NA],0)
  140. return traffic_conversion
  141. def pre_deal(self,traffic_conversion):
  142. pro_list = traffic_conversion.groupby(['campaign_id', 'ad_group_id', 'keyword_id']).head(1)[
  143. ['campaign_id', 'ad_group_id', 'keyword_id']].to_numpy().tolist()
  144. for i in pro_list:
  145. cam_, adg, kid = i[0], i[1], i[2]
  146. df0 = traffic_conversion.query("campaign_id==@cam_ and ad_group_id==@adg and keyword_id==@kid")
  147. for hour in range(24):
  148. if hour not in df0['hour'].tolist():
  149. traffic_conversion = traffic_conversion.append(
  150. {'campaign_id': cam_, 'ad_group_id': adg, 'keyword_id': kid, 'hour': hour},
  151. ignore_index=True)
  152. traffic_conversion['cpc_min'] = traffic_conversion.groupby(['campaign_id', 'ad_group_id', 'keyword_id'])[
  153. 'cpc'].transform('min')
  154. traffic_conversion = traffic_conversion.sort_values(by=['campaign_id', 'ad_group_id', 'keyword_id', 'hour'])
  155. # 给当前没有竞价信息的赋予竞价,为该关键词最小竞价的45%
  156. traffic_conversion['cpc'] = traffic_conversion.apply(
  157. lambda x: x['cpc_min'] * 0.45 if pd.isna(x['cpc']) or x['cpc'] is None else x['cpc'], axis=1)
  158. return traffic_conversion
  159. def func_rule_budget(self,traffic_conversion):
  160. traffic_conversion = self.pre_deal(traffic_conversion)
  161. # total_spend = traffic_conversion['cpc'].sum()
  162. # 根据小时对竞价、转化、点击汇总
  163. tf_c = traffic_conversion.groupby(['hour']).agg(
  164. {'cpc': sum, 'attributed_conversions_1d': sum, 'clicks': sum}).reset_index()
  165. # 根据以下公式,突出转化高与竞价低的重要性
  166. tf_c['pre_percent'] = tf_c.apply(
  167. lambda x: (x['attributed_conversions_1d'] ** 3 - (x['clicks'] - x['attributed_conversions_1d']) ** 3) / x[
  168. 'cpc'] ** 3 + 1.001, axis=1)
  169. tf_c['pre_percent'] = tf_c['pre_percent'].map(lambda x: np.sqrt(x)) # 避免各时间之间差距太大,进行开根处理
  170. # 对无效数值/空值 赋值1.0001
  171. tf_c['pre_percent'] = tf_c['pre_percent'].map(lambda x: 1.0001 if pd.isna(x) or x is None else x)
  172. # 对23-5点的权重值降低至60%
  173. tf_c['pre_percent_s2'] = tf_c.apply(
  174. lambda x: x['pre_percent'] * 0.6 if x['hour'] < 6 or x['hour'] > 22 else x['pre_percent'], axis=1)
  175. total_val = tf_c['pre_percent_s2'].sum()
  176. # print(total_val)
  177. # 计算各小时权重(初次分配权重,后续修正)
  178. tf_c['pre_percent_s2'] = tf_c['pre_percent_s2'] / total_val
  179. # 对分配过度不均衡进行调整,对超过分配的25%部分只给予25%的权重百分比
  180. tf_c['pre_percent_s3'] = tf_c['pre_percent_s2'].map(lambda x: 0.25 if x > 0.25 else x)
  181. tf_c['temp'] = tf_c['pre_percent_s2'] - tf_c['pre_percent_s3']
  182. total_allocate = tf_c['temp'].sum()
  183. allocate_count = tf_c['temp'].tolist().count(0.25)
  184. allocate_val = total_allocate / allocate_count if allocate_count != 0 else 0
  185. # 将超过25%的权重分配到其余小时区间内
  186. tf_c['pre_percent_s3'] = tf_c['pre_percent_s3'].map(lambda x: x + allocate_val if x != 0.25 else 0.25)
  187. return tf_c[['hour','pre_percent_s3']]
  188. def budget_allocate_singleDay(self): # 总结过去每天的数据,对单天预算分配
  189. traffic_conversion = self.merge_common_operation()
  190. traffic_conversion = self.pre_deal(traffic_conversion)
  191. traffic_conversion = self.func_rule_budget(traffic_conversion)
  192. traffic_conversion.columns = ['hour','SingleDay']
  193. return json.dumps({"budget_allocate_singleDay":traffic_conversion.to_dict(orient='records')})
  194. def budget_allocate_week(self): # 总结过去每个不同工作日的数据,对每周每天预算都进行不同分配
  195. traffic_conversion = self.merge_common_operation()
  196. # 单独筛选周一至周日每天的traffic,再进行后续步骤
  197. Monday_df = self.pre_deal(traffic_conversion[traffic_conversion['day'] == 0])
  198. Monday_df = self.func_rule_budget(Monday_df)
  199. Tuesday_df = self.pre_deal(traffic_conversion[traffic_conversion['day'] == 1])
  200. Tuesday_df = self.func_rule_budget(Tuesday_df)
  201. Wednesday_df = self.pre_deal(traffic_conversion[traffic_conversion['day'] == 2])
  202. Wednesday_df = self.func_rule_budget(Wednesday_df)
  203. Thursday_df = self.pre_deal(traffic_conversion[traffic_conversion['day'] == 3])
  204. Thursday_df = self.func_rule_budget(Thursday_df)
  205. Friday_df = self.pre_deal(traffic_conversion[traffic_conversion['day'] == 4])
  206. Friday_df = self.func_rule_budget(Friday_df)
  207. Saturday_df = self.pre_deal(traffic_conversion[traffic_conversion['day'] == 5])
  208. Saturday_df = self.func_rule_budget(Saturday_df)
  209. Sunday_df = self.pre_deal(traffic_conversion[traffic_conversion['day'] == 6])
  210. Sunday_df = self.func_rule_budget(Sunday_df)
  211. weeksummary_percent = pd.merge(Monday_df,Tuesday_df,how='inner',on='hour')
  212. weeksummary_percent = weeksummary_percent.merge(Wednesday_df,how='inner',on='hour')
  213. weeksummary_percent = weeksummary_percent.merge(Thursday_df,how='inner',on='hour')
  214. weeksummary_percent = weeksummary_percent.merge(Friday_df,how='inner',on='hour')
  215. weeksummary_percent = weeksummary_percent.merge(Saturday_df,how='inner',on='hour')
  216. weeksummary_percent = weeksummary_percent.merge(Sunday_df,how='inner',on='hour')
  217. weeksummary_percent.columns = ["hour",'Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
  218. # weeksummary_percent.to_excel("S111.xlsx")
  219. return json.dumps({"budget_allocate_week":weeksummary_percent.round(4).to_dict(orient='records')})
  220. def rule_set_bid(self,avg_weight, cr, avg_cr, ctr, avg_ctr, weight_value, hour):
  221. if weight_value > avg_weight * 1.5: # 表现极好词
  222. return 2
  223. elif weight_value > avg_weight * 1.25: # 表现较好词
  224. if hour in [23, 0, 1, 2, 3, 4, 5]:
  225. return 1.5
  226. else:
  227. return 1.5 + np.random.randint(100, 300) / 1000
  228. elif weight_value > avg_weight * 1.15: # 表现稍好词
  229. if hour in [23, 0, 1, 2, 3, 4, 5]:
  230. return 1.25
  231. else:
  232. return 1.5 + np.random.randint(100, 200) / 1000
  233. elif weight_value > avg_weight: # 标准权重词
  234. return 1
  235. else:
  236. if ctr >= avg_ctr and cr >= 0.75 * avg_ctr:
  237. return 1
  238. elif cr > avg_ctr:
  239. return 1.25
  240. elif cr > 0.75 * avg_cr:
  241. return 0.75
  242. else:
  243. if ((pd.isna(cr) and pd.isna(ctr)) or None in [cr, ctr]) and hour not in [23, 0, 1, 2, 3, 4, 5]:
  244. return [0.5, 0.7, 0.8, 0.9, 1, 1.1][np.random.randint(0, 5)]
  245. return 0.5
  246. def func_rule_bid(self,traffic_conversion):
  247. tf_c = traffic_conversion.groupby(['hour']).agg(
  248. {'cost': sum, 'attributed_conversions_1d': sum, 'clicks': sum, 'impressions': sum}).reset_index()
  249. tf_c['cpc'] = tf_c['cost'] / tf_c['clicks']
  250. tf_c['cr'] = tf_c['attributed_conversions_1d'] / tf_c['clicks']
  251. tf_c['ctr'] = tf_c['clicks'] / tf_c['impressions']
  252. avg_bid = tf_c['cpc'].mean()
  253. avg_cr = tf_c['attributed_conversions_1d'].sum()/tf_c['clicks'].sum()
  254. avg_ctr = tf_c['clicks'].sum()/tf_c['impressions'].sum()
  255. tf_c['weight_value'] = tf_c['cr']/tf_c['cpc']
  256. avg_weight = avg_cr/avg_bid
  257. # avg_weight = tf_c['weight_value'].mean()
  258. tf_c['weight_allocate'] = tf_c.apply(lambda x:self.rule_set_bid(avg_weight,x['cr'],avg_cr,x['ctr'],avg_ctr,x['weight_value'],x['hour']),axis=1)
  259. # print(avg_bid,avg_cr,avg_ctr,avg_weight)
  260. return tf_c[['hour','weight_allocate']].round(2)
  261. def bid_adjust_singleDay(self):
  262. traffic_conversion = self.merge_common_operation()
  263. # traffic_conversion = self.pre_deal(traffic_conversion)
  264. tf_c = self.pre_deal(traffic_conversion)
  265. tf_c = self.func_rule_bid(tf_c)
  266. tf_c.columns = ['hour','SingleDay']
  267. # 完成
  268. return json.dumps({"bid_adjust_singleDay":tf_c.to_dict(orient='records')})
  269. def bid_adjust_week(self):
  270. traffic_conversion = self.merge_common_operation()
  271. # print(traffic_conversion)
  272. # traffic_conversion = self.pre_deal(traffic_conversion)
  273. # 单独筛选周一至周日每天的traffic,再进行后续步骤
  274. Monday_df = self.pre_deal(traffic_conversion[traffic_conversion['day'] == 0])
  275. Monday_df = self.func_rule_bid(Monday_df)
  276. Tuesday_df = self.pre_deal(traffic_conversion[traffic_conversion['day'] == 1])
  277. Tuesday_df = self.func_rule_bid(Tuesday_df)
  278. Wednesday_df = self.pre_deal(traffic_conversion[traffic_conversion['day'] == 2])
  279. Wednesday_df = self.func_rule_bid(Wednesday_df)
  280. Thursday_df = self.pre_deal(traffic_conversion[traffic_conversion['day'] == 3])
  281. Thursday_df = self.func_rule_bid(Thursday_df)
  282. Friday_df = self.pre_deal(traffic_conversion[traffic_conversion['day'] == 4])
  283. Friday_df = self.func_rule_bid(Friday_df)
  284. Saturday_df = self.pre_deal(traffic_conversion[traffic_conversion['day'] == 5])
  285. Saturday_df = self.func_rule_bid(Saturday_df)
  286. Sunday_df = self.pre_deal(traffic_conversion[traffic_conversion['day'] == 6])
  287. Sunday_df = self.func_rule_bid(Sunday_df)
  288. weeksummary_percent = pd.merge(Monday_df, Tuesday_df, how='left', on='hour')
  289. weeksummary_percent = weeksummary_percent.merge(Wednesday_df, how='left', on='hour')
  290. weeksummary_percent = weeksummary_percent.merge(Thursday_df, how='left', on='hour')
  291. weeksummary_percent = weeksummary_percent.merge(Friday_df, how='left', on='hour')
  292. weeksummary_percent = weeksummary_percent.merge(Saturday_df, how='left', on='hour')
  293. weeksummary_percent = weeksummary_percent.merge(Sunday_df, how='left', on='hour')
  294. weeksummary_percent.columns = ["hour", 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday',
  295. 'Sunday']
  296. # weeksummary_percent.to_excel("S111.xlsx")
  297. return json.dumps({"bid_adjust_week":weeksummary_percent.to_dict(orient='records')})
  298. if __name__ == '__main__':
  299. adjust_ = AdjustB(campaign_id='325523075677132')
  300. # 竞价分配
  301. bid_adjust = adjust_.bid_adjust_week()
  302. print(bid_adjust)
  303. print()
  304. #预算分配
  305. budget_adjust = adjust_.budget_allocate_week()
  306. print(budget_adjust)