Przeglądaj źródła

adjust_budget_bid append

huangyifan 1 rok temu
rodzic
commit
411a04936a

+ 6 - 6
start_sync_amz_RightNowRun.py

@@ -93,15 +93,15 @@ if __name__ == '__main__':
 
     refresh_token = shop_infos(AWS_CREDENTIALS['profile_id'])['refresh_token']
     AWS_CREDENTIALS['refresh_token'] = refresh_token
-    amz_report(conn, AWS_CREDENTIALS=AWS_CREDENTIALS)
+    # amz_report(conn, AWS_CREDENTIALS=AWS_CREDENTIALS)
 
-    # list_date = ["2023-10-22",]
+    list_date = ["2024-01-12","2024-01-11"]
     # list_date = [f'2023-11-{"0"+str(i) if len(str(i))==1 else i}' for i in range(27,30)]
     # print(list_date)
-    # for date_ in list_date:
-    #     print(date_)
-    #     print(date_.replace("-",""))
-    #     amz_report(conn,AWS_CREDENTIALS,para={"startDate":date_,"endDate":date_,"date":date_.replace("-","")})
+    for date_ in list_date:
+        print(date_)
+        print(date_.replace("-",""))
+        amz_report(conn,AWS_CREDENTIALS,para={"startDate":date_,"endDate":date_,"date":date_.replace("-","")})
 
 
     conn.close()

+ 229 - 0
sync_amz_data/public/adjust_budget_bid.py

@@ -0,0 +1,229 @@
+import pymysql
+import pandas as pd
+import numpy as np
+from datetime import datetime,timedelta,timezone
+pd.set_option('display.max_columns', None)
+pd.set_option('expand_frame_repr', False)
+import warnings
+from typing import Literal
+
+warnings.filterwarnings('ignore')
+
+class AdjustB:
+    def __init__(self,campaign_id,time_period:Literal["1week","2weeks","1month","2months","45days"]):
+        self.campaign_id = campaign_id
+        self.time_period = time_period
+
+    def datatable_connect01(self):
+        conn = pymysql.connect(user="admin",
+                               password="NSYbBSPbkGQUbOSNOeyy",
+                               host="retail-data.cnrgrbcygoap.us-east-1.rds.amazonaws.com",
+                               database="amzn_retail_ad",
+                               port=3306)
+        return conn
+
+    def datatable_connect02(self):
+        conn = pymysql.connect(user="root",
+                               password="sandbox",
+                               host="192.168.1.225",
+                               database="amzn_retail_ad",
+                               port=3306)
+        return conn
+
+    def datatable_connect03(self):
+        conn = pymysql.connect(user="admin",
+                               password="pvmBNS8q3duiUvvp",
+                               host="amzn-retail.cluster-cnrgrbcygoap.us-east-1.rds.amazonaws.com",
+                               database="zosi_ad_marketing_stream",
+                               port=3306)
+        return conn
+
+    # datatable_connect()
+    def get_sp_conversion(self):
+        conn = self.datatable_connect03()
+        cursor = conn.cursor()
+        sql = "select * from zosi_ad_marketing_stream.sp_conversion_raw"
+        sql = sql + self.add_condition(isbudgetTable=False)
+        cursor.execute(sql)
+        columns_name = [i[0] for i in cursor.description]
+        rel = cursor.fetchall()
+        df = pd.DataFrame(rel, columns=columns_name)
+        df = df.groupby('idempotency_id').head(1)
+        # print(df)
+        # df.to_excel("ttt111.xlsx")
+        return df
+
+    def get_sp_traffic(self):
+        conn = self.datatable_connect03()
+        cursor = conn.cursor()
+        sql = "select * from zosi_ad_marketing_stream.sp_traffic_raw"
+        sql = sql + self.add_condition(isbudgetTable=False)
+        print(sql)
+        cursor.execute(sql)
+        columns_name = [i[0] for i in cursor.description]
+        rel = cursor.fetchall()
+        df = pd.DataFrame(rel, columns=columns_name)
+        df = df.groupby('idempotency_id').head(1)
+        return df
+
+    def get_sp_budgetug(self):
+        conn = self.datatable_connect03()
+        cursor = conn.cursor()
+        sql = "select * from zosi_ad_marketing_stream.sp_budget_usage"
+        sql = sql+self.add_condition(isbudgetTable=True)
+        cursor.execute(sql)
+        columns_name = [i[0] for i in cursor.description]
+        rel = cursor.fetchall()
+        df = pd.DataFrame(rel, columns=columns_name)
+        return df
+
+    def add_condition(self,isbudgetTable=False):
+        if self.time_period =='1week':
+            time_ = datetime.today().date() + timedelta(days=-7)
+        elif self.time_period =='2weeks':
+            time_ = datetime.today().date() + timedelta(days=-14)
+        elif self.time_period =='month':
+            time_ = datetime.today().date() + timedelta(days=-30)
+        elif self.time_period =='45days':
+            time_ = datetime.today().date() + timedelta(days=-45)
+        elif self.time_period == '2months':
+            time_ = datetime.today().date() + timedelta(days=-60)
+        # usage_updated_timestamp
+        if isbudgetTable:
+            return f" where usage_updated_timestamp>='{time_}' and budget_scope_id='{self.campaign_id}'"
+        return f" where time_window_start>='{time_}' and campaign_id='{self.campaign_id}'"
+
+    def merge_common_operation(self):
+        conversion = self.get_sp_conversion()
+        conversion_ = conversion.groupby(
+            ['advertiser_id', 'marketplace_id', 'time_window_start', 'campaign_id', 'ad_group_id', 'ad_id',
+             'keyword_id', 'placement', 'currency']).agg({
+            'attributed_sales_1d': sum,
+            'attributed_sales_1d_same_sku': sum,
+            'attributed_sales_7d': sum,
+            'attributed_sales_7d_same_sku': sum,
+            'attributed_sales_14d': sum,
+            'attributed_sales_14d_same_sku': sum,
+            'attributed_sales_30d': sum,
+            'attributed_sales_30d_same_sku': sum,
+            'attributed_conversions_1d': sum,
+            'attributed_conversions_1d_same_sku': sum,
+            'attributed_conversions_7d': sum,
+            'attributed_conversions_14d_same_sku': sum,
+            'attributed_conversions_30d': sum,
+            'attributed_conversions_30d_same_sku': sum,
+            'attributed_units_ordered_1d': sum,
+            'attributed_units_ordered_1d_same_sku': sum,
+            'attributed_units_ordered_7d': sum,
+            'attributed_units_ordered_7d_same_sku': sum,
+            'attributed_units_ordered_14d': sum,
+            'attributed_units_ordered_14d_same_sku': sum,
+            'attributed_units_ordered_30d': sum,
+            'attributed_units_ordered_30d_same_sku': sum
+        }).reset_index()
+        traffic = self.get_sp_traffic()
+        traffic[['impressions', 'clicks']] = traffic[['impressions', 'clicks']].astype('int64')
+        traffic['cost'] = traffic['cost'].astype('float64')
+
+        traffic_ = traffic.groupby(
+            ['advertiser_id', 'marketplace_id', 'time_window_start', 'campaign_id', 'ad_group_id', 'ad_id',
+             'keyword_id', 'keyword_text', 'placement', 'match_type', 'currency'
+             ]).agg({'impressions': sum,
+                     'clicks': sum,
+                     'cost': sum
+                     }).reset_index()
+        traffic_conversion = traffic_.merge(conversion_,
+                                            on=['advertiser_id', 'marketplace_id', 'campaign_id', 'ad_group_id',
+                                                'ad_id', 'keyword_id', 'placement', 'time_window_start', 'currency'],
+                                            how='inner')
+        if len(traffic_conversion) < 1:
+            return pd.DataFrame()
+        traffic_conversion['hour'] = traffic_conversion['time_window_start'].dt.hour
+        traffic_conversion['day'] = traffic_conversion['time_window_start'].dt.dayofweek
+        traffic_conversion = traffic_conversion.groupby(
+            ['campaign_id', 'ad_group_id', 'keyword_id', 'hour']).sum().reset_index()
+        traffic_conversion['cpc'] = traffic_conversion['cost'] / traffic_conversion['clicks']
+        # traffic_conversion['cpc'].fillna(0,inplace=True)
+        # traffic_conversion['cpc'] = traffic_conversion['cpc'].replace([np.inf,np.nan,pd.NA],0)
+        return traffic_conversion
+
+    def func_rule(self,traffic_conversion):
+        pro_list = traffic_conversion.groupby(['campaign_id', 'ad_group_id', 'keyword_id']).head(1)[
+            ['campaign_id', 'ad_group_id', 'keyword_id']].to_numpy().tolist()
+        for i in pro_list:
+            cam_, adg, kid = i[0], i[1], i[2]
+            df0 = traffic_conversion.query("campaign_id==@cam_ and ad_group_id==@adg and keyword_id==@kid")
+            for hour in range(24):
+                if hour not in df0['hour'].tolist():
+                    traffic_conversion = traffic_conversion.append(
+                        {'campaign_id': cam_, 'ad_group_id': adg, 'keyword_id': kid, 'hour': hour},
+                        ignore_index=True)
+        traffic_conversion['cpc_min'] = traffic_conversion.groupby(['campaign_id', 'ad_group_id', 'keyword_id'])[
+            'cpc'].transform('min')
+
+        traffic_conversion = traffic_conversion.sort_values(by=['campaign_id', 'ad_group_id', 'keyword_id', 'hour'])
+
+        # 给当前没有竞价信息的赋予竞价,为该关键词最小竞价的45%
+        traffic_conversion['cpc'] = traffic_conversion.apply(
+            lambda x: x['cpc_min'] * 0.45 if pd.isna(x['cpc']) or x['cpc'] is None else x['cpc'], axis=1)
+        # total_spend = traffic_conversion['cpc'].sum()
+        # 根据小时对竞价、转化、点击汇总
+        tf_c = traffic_conversion.groupby(['hour']).agg(
+            {'cpc': sum, 'attributed_conversions_1d': sum, 'clicks': sum}).reset_index()
+        # 根据以下公式,突出转化高与竞价低的重要性
+        tf_c['pre_percent'] = tf_c.apply(
+            lambda x: (x['attributed_conversions_1d'] ** 3 - (x['clicks'] - x['attributed_conversions_1d']) ** 3) / x[
+                'cpc'] ** 3 + 1.001, axis=1)
+        tf_c['pre_percent'] = tf_c['pre_percent'].map(lambda x: np.sqrt(x))  # 避免各时间之间差距太大,进行开根处理
+        # 对无效数值/空值 赋值1.0001
+        tf_c['pre_percent'] = tf_c['pre_percent'].map(lambda x: 1.0001 if pd.isna(x) or x is None else x)
+        # 对23-5点的权重值降低至60%
+        tf_c['pre_percent_s2'] = tf_c.apply(
+            lambda x: x['pre_percent'] * 0.6 if x['hour'] < 6 or x['hour'] > 22 else x['pre_percent'], axis=1)
+        total_val = tf_c['pre_percent_s2'].sum()
+        # print(total_val)
+        # 计算各小时权重(初次分配权重,后续修正)
+        tf_c['pre_percent_s2'] = tf_c['pre_percent_s2'] / total_val
+        # 对分配过度不均衡进行调整,对超过分配的25%部分只给予25%的权重百分比
+        tf_c['pre_percent_s3'] = tf_c['pre_percent_s2'].map(lambda x: 0.25 if x > 0.25 else x)
+        tf_c['temp'] = tf_c['pre_percent_s2'] - tf_c['pre_percent_s3']
+        total_allocate = tf_c['temp'].sum()
+        allocate_count = tf_c['temp'].tolist().count(0.25)
+        allocate_val = total_allocate / allocate_count if allocate_count != 0 else 0
+        # 将超过25%的权重分配到其余小时区间内
+        tf_c['pre_percent_s3'] = tf_c['pre_percent_s3'].map(lambda x: x + allocate_val if x != 0.25 else 0.25)
+        return tf_c[['hour','pre_percent_s3']]
+
+    def merge_conv_traf(self): # 总结过去每天的数据,对单天预算分配
+        traffic_conversion = self.merge_common_operation()
+        traffic_conversion = self.func_rule(traffic_conversion)
+        traffic_conversion.columns = ['hour','SingleDay']
+        return traffic_conversion
+
+    def merge_cvtf_budt_accdday(self): # 总结过去每个不同工作日的数据,对每周每天预算都进行不同分配
+        traffic_conversion = self.merge_common_operation()
+        # TODO 单独筛选周一至周日每天的traffic,再进行后续步骤
+        Monday_df = self.func_rule(traffic_conversion[traffic_conversion['day']==0])
+        Tuesday_df = self.func_rule(traffic_conversion[traffic_conversion['day']==1])
+        Wednesday_df = self.func_rule(traffic_conversion[traffic_conversion['day']==2])
+        Thursday_df = self.func_rule(traffic_conversion[traffic_conversion['day']==3])
+        Friday_df = self.func_rule(traffic_conversion[traffic_conversion['day']==4])
+        Saturday_df = self.func_rule(traffic_conversion[traffic_conversion['day']==5])
+        Sunday_df = self.func_rule(traffic_conversion[traffic_conversion['day']==6])
+
+        weeksummary_percent = pd.merge(Monday_df,Tuesday_df,how='inner',on='hour')
+        weeksummary_percent = weeksummary_percent.merge(Wednesday_df,how='inner',on='hour')
+        weeksummary_percent = weeksummary_percent.merge(Thursday_df,how='inner',on='hour')
+        weeksummary_percent = weeksummary_percent.merge(Friday_df,how='inner',on='hour')
+        weeksummary_percent = weeksummary_percent.merge(Saturday_df,how='inner',on='hour')
+        weeksummary_percent = weeksummary_percent.merge(Sunday_df,how='inner',on='hour')
+        weeksummary_percent.columns = ["hour",'Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
+        # weeksummary_percent.to_excel("S111.xlsx")
+        return weeksummary_percent
+
+
+
+if __name__ == '__main__':
+    adjust_ = AdjustB(campaign_id='281441197839505',time_period='45days')
+    rel = adjust_.merge_conv_traf()
+    print(rel)

+ 25 - 14
sync_amz_data/public/sp_api_client.py

@@ -34,13 +34,23 @@ class SpApiRequest:
         host="retail-data.cnrgrbcygoap.us-east-1.rds.amazonaws.com",
         database="ansjer_dvadmin",
         port=3306)
+
+        return conn
+
+    @classmethod
+    def mysql_connect_auth_lst(cls):
+        conn = pymysql.connect(user="huangyifan",
+                               password="123456",
+                               host="127.0.0.1",
+                               database="amz_sp_api",
+                               port=3306)
         return conn
 
     @classmethod
     def mysql_connect(cls):
         conn = pymysql.connect(user="huangyifan",
                                password="123456",
-                               host="192.168.1.18",
+                               host="127.0.0.1",
                                database="amz_sp_api",
                                port=3306)
         return conn
@@ -59,9 +69,9 @@ class SpApiRequest:
         if country in [Marketplaces.US, Marketplaces.BR, Marketplaces.CA,Marketplaces.MX]:
             region = 'NA'
         elif country in [Marketplaces.DE,Marketplaces.AE, Marketplaces.BE,  Marketplaces.PL,
-                                    Marketplaces.EG,Marketplaces.ES,  Marketplaces.GB, Marketplaces.IN, Marketplaces.IT,
-                                    Marketplaces.NL, Marketplaces.SA, Marketplaces.SE, Marketplaces.TR,Marketplaces.UK,Marketplaces.FR,
-                                    ]:
+                        Marketplaces.EG,Marketplaces.ES,  Marketplaces.GB, Marketplaces.IN, Marketplaces.IT,
+                        Marketplaces.NL, Marketplaces.SA, Marketplaces.SE, Marketplaces.TR,Marketplaces.UK,Marketplaces.FR,
+                        ]:
             region = 'EU'
         else:
             region = str(country)[-2:]
@@ -133,6 +143,7 @@ class SpApiRequest:
             time.sleep(15)
             print("please wait...")
 
+
     def data_deal(self,decom_df,seller_id):
         decom_df['mainImageUrl'] = decom_df['seller-sku'].map(lambda x: self.get_mainImage_url(x))
         url_columns = [i for i in decom_df.columns if "url" in i.lower()]
@@ -331,7 +342,7 @@ class SpApiRequest:
                     return datetime(1999,12,31,0,0,0)
 
     def update_data(self,df,seller_id,country_code,conn):
-        conn = SpApiRequest.mysql_connect_auth()
+        conn = SpApiRequest.mysql_connect_auth_lst()
         cursor = conn.cursor()
         columns = ['listing-id', 'seller_id',
          'asin1', 'seller-sku', 'title', 'image_link', 'country_code',
@@ -349,7 +360,7 @@ class SpApiRequest:
         marketplace_id = self.marketplace.marketplace_id
         try:
             cursor.execute(f"""select * from
-                                ansjer_dvadmin.seller_listings where seller_id='{seller_id}' and marketplace_id='{marketplace_id}'""")
+                                amz_sp_api.seller_listings where seller_id='{seller_id}' and marketplace_id='{marketplace_id}'""")
             col = [i[0] for i in cursor.description]
             query_rel = cursor.fetchall()
             df_rel = pd.DataFrame(query_rel, columns=col)
@@ -391,7 +402,7 @@ class SpApiRequest:
             try:
                 # print(tuple(delete_list))
                 if len(delete_list)>0:
-                    query = f"""delete from ansjer_dvadmin.seller_listings 
+                    query = f"""delete from amz_sp_api.seller_listings 
                            where (seller_id,marketplace_id,sku,listing_id,product_id) in %s""" #where (seller_id,country_code) in %s"""
                     cursor.execute(query,(delete_list,))
 
@@ -446,9 +457,9 @@ class SpApiRequest:
                               'marketplace_id','quantity','fulfillment_channel',
                               'price','opendate','status','update_datetime','product-id','product-id-type'
                               ]
-            conn = SpApiRequest.mysql_connect_auth()
+            conn = SpApiRequest.mysql_connect_auth_lst()
             cursor = conn.cursor()
-            cursor.execute("""select product_id,asin from (select * from ansjer_dvadmin.seller_listings where asin is not null 
+            cursor.execute("""select product_id,asin from (select * from amz_sp_api.seller_listings where asin is not null 
                                 and asin<>'' and product_id is not null and product_id <>'') t1 group by product_id,asin""")
             query_ = cursor.fetchall()
             col_name = [i[0] for i in cursor.description]
@@ -498,12 +509,12 @@ class SpApiRequest:
             if len(update_df)==0:
                 return '无更新数据插入'
             # update_df['country_code'] = update_df['country_code'].map({"GB":"UK"})
-            conn = SpApiRequest.mysql_connect_auth()
+            conn = SpApiRequest.mysql_connect_auth_lst()
             cursor = conn.cursor()
 
             try:
                 insertsql = """insert into
-                ansjer_dvadmin.seller_listings(listing_id,seller_id,asin,sku,title,image_link,country_code,marketplace_id,quantity,
+                amz_sp_api.seller_listings(listing_id,seller_id,asin,sku,title,image_link,country_code,marketplace_id,quantity,
                         fulfillment_channel,price,launch_datetime,status,update_datetime,product_id,product_id_type)
                 values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"""
                 conn.begin()
@@ -693,10 +704,10 @@ class SpApiRequest:
 
 
 if __name__ == '__main__':
-    # SpApiRequest.get_allShops("GET_FLAT_FILE_ALL_ORDERS_DATA_BY_ORDER_DATE_GENERAL")
+    SpApiRequest.get_allShops("GET_FLAT_FILE_ALL_ORDERS_DATA_BY_ORDER_DATE_GENERAL")
 
-    rel = SpApiRequest.get_catelog(account_name='ANLAPUS_US',country=Marketplaces.US,asin='B0BVXB4KT9')
-    print(rel)
+    # rel = SpApiRequest.get_catelog(account_name='ANLAPUS_US',country=Marketplaces.US,asin='B0BVXB4KT9')
+    # print(rel)
 
 """
 create database amz_sp_api;