Просмотр исходного кода

deal with the Erro of the Data_ETL

huangyifan 1 год назад
Родитель
Сommit
bae0478abb

+ 6 - 0
start_sync_amz.py

@@ -1,3 +1,5 @@
+import time
+
 from sync_amz_data.settings import LOG_CONF
 import logging.config
 logging.config.dictConfig(LOG_CONF)
@@ -48,6 +50,7 @@ def amz_report(AWS_CREDENTIALS,para=None):
         sb_report.reportV2_targetsRecord_ETL(conn,params={"startDate":date_,"endDate":date_,"date":date_.replace("-","")})
         sb_report.reportV2_targetsVideo_ETL(conn,params={"startDate":date_,"endDate":date_,"date":date_.replace("-","")})
         conn.close()
+        time.sleep(5)
 
         conn = SP_ETL(**AWS_CREDENTIALS).clickhouse_connect()
         sp_report = SP_ETL(**AWS_CREDENTIALS)
@@ -59,6 +62,8 @@ def amz_report(AWS_CREDENTIALS,para=None):
         sp_report.reportV3_advertiser_spAdvertisedProductETL(conn,params={"startDate":date_,"endDate":date_,"date":date_.replace("-","")})
         sp_report.reportV3_asin_spPurchasedProductETL(conn,params={"startDate":date_,"endDate":date_,"date":date_.replace("-","")})
         conn.close()
+        time.sleep(5)
+
 
         conn = SD_ETL(**AWS_CREDENTIALS).clickhouse_connect()
         sd_report = SD_ETL(**AWS_CREDENTIALS)
@@ -89,6 +94,7 @@ def amz_report(AWS_CREDENTIALS,para=None):
         # sd_report.reportV2_targets_matchedTarget_t2_ETL(conn,params=para)
         # sd_report.reportV2_targets_matchedTarget_t3_ETL(conn,params=para)
         conn.close()
+        time.sleep(5)
 
 if __name__ == '__main__':
     # AccountTask("3006125408623189").do({"record": "portfolios"})

+ 12 - 16
start_sync_amz_RightNowRun.py

@@ -34,11 +34,7 @@ def amz_report(conn,AWS_CREDENTIALS,para=None):
     sb_report.reportV2_searchtermsVideo_ETL(conn,params=para)
     sb_report.reportV2_targetsRecord_ETL(conn,params=para)
     sb_report.reportV2_targetsVideo_ETL(conn,params=para)
-    # time.sleep(3)
-    # conn.close()
-    # time.sleep(3)
 
-    # conn = SP_ETL(**AWS_CREDENTIALS).clickhouse_connect()
     sp_report = SP_ETL(**AWS_CREDENTIALS)
     sp_report.reportV3_campaign_spCampaignsETL(conn,params=para)
     sp_report.reportV3_adGroup_spCampaignsETL(conn,params=para)
@@ -47,10 +43,7 @@ def amz_report(conn,AWS_CREDENTIALS,para=None):
     sp_report.reportV3_searchTerm_spSearchTermETL(conn,params=para)
     sp_report.reportV3_advertiser_spAdvertisedProductETL(conn,params=para)
     sp_report.reportV3_asin_spPurchasedProductETL(conn,params=para)
-    # time.sleep(3)
-    # conn.close()
-    # time.sleep(3)
-    # conn = SD_ETL(**AWS_CREDENTIALS).clickhouse_connect()
+
     sd_report = SD_ETL(**AWS_CREDENTIALS)
 
     sd_report.reportV3_campaign_sdCampaigns_ETL(conn,params=para)
@@ -62,6 +55,7 @@ def amz_report(conn,AWS_CREDENTIALS,para=None):
     sd_report.reportV3_asin_sdPurchasedProduct_ETL(conn,params=para)
     sd_report.reportV3_advertiser_sdAdvertisedProduct_ETL(conn,params=para)
 
+    """
     # sd_report.reportV2_campaignsRecord_t2_ETL(conn,params=para)
     # sd_report.reportV2_campaignsRecord_t3_ETL(conn,params=para)
     # sd_report.reportV2_adGroupsRecord_t2_ETL(conn,params=para)
@@ -78,6 +72,7 @@ def amz_report(conn,AWS_CREDENTIALS,para=None):
     # sd_report.reportV2_adGroups_matchedTarget_t3_ETL(conn,params=para)
     # sd_report.reportV2_targets_matchedTarget_t2_ETL(conn,params=para)
     # sd_report.reportV2_targets_matchedTarget_t3_ETL(conn,params=para)
+    """
 
 if __name__ == '__main__':
     # AccountTask("3006125408623189").do({"record": "portfolios"})
@@ -93,15 +88,16 @@ if __name__ == '__main__':
 
     refresh_token = shop_infos(AWS_CREDENTIALS['profile_id'])['refresh_token']
     AWS_CREDENTIALS['refresh_token'] = refresh_token
-    amz_report(conn, AWS_CREDENTIALS=AWS_CREDENTIALS)
+    # amz_report(conn, AWS_CREDENTIALS=AWS_CREDENTIALS)
 
-    # list_date = ["2024-01-12","2024-01-11"]
-    # list_date = [f'2024-02-{"0"+str(i) if len(str(i))==1 else i}' for i in range(12,18)]
-    # # print(list_date)
-    # for date_ in list_date:
-    #     print(date_)
-    #     print(date_.replace("-",""))
-    #     amz_report(conn,AWS_CREDENTIALS,para={"startDate":date_,"endDate":date_,"date":date_.replace("-","")})
+    # list_date = ["2024-03-01","2024-03-24"]
+    list_date = [f'2024-03-{"0"+str(i) if len(str(i))==1 else i}' for i in range(31,32)]
+    # print(list_date)
+    for date_ in list_date:
+        print(date_)
+        print(date_.replace("-",""))
+        amz_report(conn,AWS_CREDENTIALS,para={"startDate":date_,"endDate":date_,"date":date_.replace("-","")})
+        time.sleep(5)
 
 
     conn.close()

+ 10 - 2
sync_amz_data/DataTransform/Data_ETL.py

@@ -488,6 +488,10 @@ class SP_ETL(SPClient, Common_ETLMethod):
         # print(list_report)
         df_report = pd.json_normalize(list_report)
         df_report = self.type_trans(df_report, params['columns'], timeZone_, extra_columns=[])
+        # TODO
+        df_report[df_report.select_dtypes(["object"]).columns] = df_report[df_report.select_dtypes(["object"]).columns].fillna('')
+        df_report[df_report.select_dtypes(["string"]).columns] = df_report[df_report.select_dtypes(["string"]).columns].fillna('')
+
         # print(df_report.info())
         conn.insert_df("AmazonReport.SP_spAdvertisedProduct_advertiserV3", df_report[params['columns']])
         time.sleep(0.05)
@@ -528,7 +532,10 @@ class SP_ETL(SPClient, Common_ETLMethod):
         # print(list_report)
         df_report = pd.json_normalize(list_report)
         df_report = self.type_trans(df_report, params['columns'], timeZone_, extra_columns=[])
-
+        df_report['advertisedAsin'] = df_report['advertisedAsin'].map(lambda x: str(x) if x is not None else '-')
+        df_report['advertisedSku'] = df_report['advertisedSku'].map(lambda x: str(x) if x is not None else '-')
+        print(df_report.dtypes)
+        # df_report.to_excel('ATEST.xlsx')
         conn.insert_df("AmazonReport.SP_spPurchasedProduct_asinV3", df_report[params['columns']])
         time.sleep(0.05)
         print("插入完成SP_spPurchasedProduct_asinV3")
@@ -840,7 +847,8 @@ class SB_ETL(SBClient, Common_ETLMethod):
         # print(list_report)
         df_report = pd.json_normalize(list_report)
         df_report = self.type_trans(df_report, params['columns'], timeZone_, extra_columns=[])
-
+        df_report[df_report.select_dtypes(["object"]).columns] = df_report[df_report.select_dtypes(["object"]).columns].fillna('')
+        df_report[df_report.select_dtypes(["string"]).columns] = df_report[df_report.select_dtypes(["string"]).columns].fillna('')
         # print(df_report)
         conn.insert_df("AmazonReport.SB_sbPurchasedProduct_asinV3", df_report[params['columns']])
         time.sleep(0.05)

+ 2 - 1
sync_amz_data/public/sp_api_client.py

@@ -656,7 +656,7 @@ class SpApiRequest:
         if level == 'SKU':
             query_judge = f"""select count(*) from asj_ads.SalesAndTrafficByAsin where data_date='{shopReportday}' and countryCode='{countryCode}' and childAsin is not Null and sku is not Null"""
         elif level == 'CHILD':
-            query_judge = f"""select count(*) from asj_ads.SalesAndTrafficByAsin where data_date='{shopReportday}' and countryCode='{countryCode}' and sku is null"""
+            query_judge = f"""select count(*) from asj_ads.SalesAndTrafficByAsin where data_date='{shopReportday}' and countryCode='{countryCode}' and sku is null and childAsin is not null"""
         elif level == 'PARENT':
             query_judge = f"""select count(*) from asj_ads.SalesAndTrafficByAsin where data_date='{shopReportday}' and countryCode='{countryCode}' and sku is null and childAsin is null"""
         else:
@@ -701,6 +701,7 @@ class SpApiRequest:
             values (%s,%s,%s,%s,%s,%s,%s, %s,%s,%s,%s,%s,%s,%s, %s,%s,%s,%s,%s,%s,%s, %s,%s,%s,%s,%s,%s,%s, %s,%s,%s,%s,%s,%s,%s, %s,%s,%s,%s,%s,%s)
         """  # ok
         try:
+            # TODO
             conn.begin()
             cursor.executemany(sql, data_rel)
             conn.commit()

+ 25 - 5
sync_get_order_data.py

@@ -1,20 +1,40 @@
-
+import time
 import warnings
 warnings.filterwarnings('ignore')
 from apscheduler.schedulers.blocking import BlockingScheduler
 from sync_amz_data.settings import MYSQL_AUTH_CONF, MYSQL_DATA_CONF
 from sync_amz_data.public import sp_api_client
+from datetime import datetime,timedelta
+
 def func_run():
     try:
         for days in (-2,-3):
+
             sp_api_client.SpApiRequest.get_allShops("GET_FLAT_FILE_ALL_ORDERS_DATA_BY_ORDER_DATE_GENERAL",days=days,**{})
+            time.sleep(5)
     except Exception as e:
         print(e)
     try:
-        for days in (-2,-3):
-           sp_api_client.SpApiRequest.get_allShops("GET_SALES_AND_TRAFFIC_REPORT",days=days,**{"level":"SKU"})
-           sp_api_client.SpApiRequest.get_allShops("GET_SALES_AND_TRAFFIC_REPORT",days=days,**{"level":"PARENT"})
-           sp_api_client.SpApiRequest.get_allShops("GET_SALES_AND_TRAFFIC_REPORT",days=days,**{"level":"CHILD"})
+        for days in range(-3,-63,-1): #(-2,-12,-1):
+            conn = sp_api_client.SpApiRequest.Data_auth()
+            cursor = conn.cursor()
+            delete_date = (datetime.now() + timedelta(days=days-7)).strftime("%Y-%m-%d")
+            cursor.execute(f"delete from asj_ads.SalesAndTrafficByAsin where data_date='{delete_date}'")
+            conn.commit()
+            sp_api_client.SpApiRequest.get_allShops("GET_SALES_AND_TRAFFIC_REPORT", days=days-7, **{"level": "CHILD"})
+            time.sleep(3.5)
+            sp_api_client.SpApiRequest.get_allShops("GET_SALES_AND_TRAFFIC_REPORT", days=days-7, **{"level": "SKU"})
+            time.sleep(3.5)
+            sp_api_client.SpApiRequest.get_allShops("GET_SALES_AND_TRAFFIC_REPORT", days=days-7, **{"level": "PARENT"})
+            time.sleep(3.5)
+
+            sp_api_client.SpApiRequest.get_allShops("GET_SALES_AND_TRAFFIC_REPORT", days=days, **{"level": "CHILD"})
+            time.sleep(3.5)
+            sp_api_client.SpApiRequest.get_allShops("GET_SALES_AND_TRAFFIC_REPORT",days=days,**{"level":"SKU"})
+            time.sleep(3.5)
+            sp_api_client.SpApiRequest.get_allShops("GET_SALES_AND_TRAFFIC_REPORT",days=days,**{"level":"PARENT"})
+            time.sleep(3.5)
+
     except Exception as e:
         print(e)