Browse Source

modify column

huangyifan 1 year ago
parent
commit
914d769735
1 changed files with 61 additions and 5 deletions
  1. 61 5
      sync_amz_data/DataTransform/Data_ETL.py

+ 61 - 5
sync_amz_data/DataTransform/Data_ETL.py

@@ -7,8 +7,13 @@ import warnings
 warnings.filterwarnings('ignore')
 pd.set_option('expand_frame_repr', False)
 from datetime import datetime,timezone,timedelta
-
+import clickhouse_connect
 class Common_ETLMethod:
+    def clickhouse_connect(self):
+        conn =  clickhouse_connect.get_client(host='3.93.43.158', port=8123, username='root',
+                                               password='6f0eyLuiVn3slzbGWpzI')
+        return conn
+
     def columnsName_modify(self,df):
         """
         列名.换_,设置全部小写
@@ -131,7 +136,13 @@ class SP_ETL(SPClient,Common_ETLMethod):
         if len(df_needManualAdd)>0:
             df_report[df_needManualAdd] = None
         df_report['profileId'] = self.profile_id
+        df_report['campaignId'] = df_report['campaignId'].astype("string")
+        df_report['date'] = df_report['date'].astype("datetime64")
+        df_report[df_report.select_dtypes('O').columns] = df_report[df_report.select_dtypes('O').columns].astype('string')
         params['columns'].append('profileId')
+        conn = self.clickhouse_connect()
+        conn.insert_df("AllReport.SP_spCampaigns_campaignV3",df_report[params['columns']])
+        print("插入完成")
         return df_report[params['columns']]
 
     def reportV3_adGroup_spCampaignsETL(self,**params):
@@ -159,8 +170,15 @@ class SP_ETL(SPClient,Common_ETLMethod):
         df_needManualAdd = [i for i in params['columns'] if i not in df_report.columns]
         if len(df_needManualAdd)>0:
             df_report[df_needManualAdd] = None
-        df_report['profileId'] = self.profile_id
         params['columns'].append('profileId')
+        df_report['profileId'] = self.profile_id
+
+        df_report['adGroupId'] = df_report['adGroupId'].astype("string")
+        df_report['date'] = df_report['date'].astype("datetime64")
+        df_report[df_report.select_dtypes('O').columns] = df_report[df_report.select_dtypes('O').columns].astype('string')
+        conn = self.clickhouse_connect()
+        conn.insert_df("AllReport.SP_spCampaigns_adGroupV3",df_report[params['columns']])
+        print("插入完成")
         return df_report[params['columns']]
 #
     def reportV3_campaignPlacement_spCampaignsETL(self,**params):
@@ -190,6 +208,14 @@ class SP_ETL(SPClient,Common_ETLMethod):
             df_report[df_needManualAdd] = None
         df_report['profileId'] = self.profile_id
         params['columns'].append('profileId')
+
+        df_report['placementClassification'] = df_report['placementClassification'].astype("string")
+        df_report['date'] = df_report['date'].astype("datetime64")
+        df_report[df_report.select_dtypes('O').columns] = df_report[df_report.select_dtypes('O').columns].astype('string')
+        conn = self.clickhouse_connect()
+        conn.insert_df("AllReport.SP_spCampaigns_placementV3",df_report[params['columns']])
+        print("插入完成")
+
         return df_report[params['columns']]
 
 
@@ -223,6 +249,14 @@ class SP_ETL(SPClient,Common_ETLMethod):
             df_report[df_needManualAdd] = None
         df_report['profileId'] = self.profile_id
         params['columns'].append('profileId')
+
+        df_report[['keywordId','portfolioId','campaignId','adGroupId']] = df_report[['keywordId','portfolioId','campaignId','adGroupId']].astype("string")
+        df_report['date'] = df_report['date'].astype("datetime64")
+        df_report[df_report.select_dtypes('O').columns] = df_report[df_report.select_dtypes('O').columns].astype('string')
+        conn = self.clickhouse_connect()
+        conn.insert_df("AllReport.SP_spTargeting_targetingV3",df_report[params['columns']])
+        print("插入完成")
+
         return df_report[params['columns']]
 
     def reportV3_searchTerm_spSearchTermETL(self,**params):
@@ -255,6 +289,14 @@ class SP_ETL(SPClient,Common_ETLMethod):
             df_report[df_needManualAdd] = None
         df_report['profileId'] = self.profile_id
         params['columns'].append('profileId')
+
+        df_report[['keywordId','portfolioId','campaignId','adGroupId']] = df_report[['keywordId','portfolioId','campaignId','adGroupId']].astype("string")
+        df_report['date'] = df_report['date'].astype("datetime64")
+        df_report[df_report.select_dtypes('O').columns] = df_report[df_report.select_dtypes('O').columns].astype('string')
+        conn = self.clickhouse_connect()
+        conn.insert_df("AllReport.SP_spSearchTerm_searchTermV3",df_report[params['columns']])
+        print("插入完成")
+
         return df_report[params['columns']]
 
     def reportV3_advertiser_spAdvertisedProductETL(self,**params):
@@ -285,6 +327,14 @@ class SP_ETL(SPClient,Common_ETLMethod):
             df_report[df_needManualAdd] = None
         df_report['profileId'] = self.profile_id
         params['columns'].append('profileId')
+
+        df_report[['campaignId','adGroupId','adId','portfolioId']] = df_report[['campaignId','adGroupId','adId','portfolioId']].astype("string")
+        df_report['date'] = df_report['date'].astype("datetime64")
+        df_report[df_report.select_dtypes('O').columns] = df_report[df_report.select_dtypes('O').columns].astype('string')
+        conn = self.clickhouse_connect()
+        conn.insert_df("AllReport.SP_spAdvertisedProduct_advertiserV3",df_report[params['columns']])
+        print("插入完成")
+
         return df_report[params['columns']]
 
     def reportV3_asin_spPurchasedProductETL(self,**params):
@@ -313,6 +363,14 @@ class SP_ETL(SPClient,Common_ETLMethod):
             df_report[df_needManualAdd] = None
         df_report['profileId'] = self.profile_id
         params['columns'].append('profileId')
+
+        df_report[['campaignId','adGroupId','keywordId','portfolioId']] = df_report[['campaignId','adGroupId','keywordId','portfolioId']].astype("string")
+        df_report['date'] = df_report['date'].astype("datetime64")
+        df_report[df_report.select_dtypes('O').columns] = df_report[df_report.select_dtypes('O').columns].astype('string')
+        conn = self.clickhouse_connect()
+        conn.insert_df("AllReport.SP_spPurchasedProduct_asinV3",df_report[params['columns']])
+        print("插入完成")
+
         return df_report[params['columns']]
 
 class SB_ETL(SBClient,Common_ETLMethod):
@@ -1780,8 +1838,7 @@ class SD_ETL(SDClient,Common_ETLMethod):
         today = datetime.today()
         if params.get("date") == None:
             params["date"] = (
-                        datetime(today.year, today.month, today.day, tzinfo=timezone.utc) - timedelta(days=1)).strftime(
-                "%Y%m%d")
+                        datetime(today.year, today.month, today.day, tzinfo=timezone.utc) - timedelta(days=1)).strftime("%Y%m%d")
         params['record_type'] = 'targets'
         if params.get('metrics') == None:
             params['metrics'] = self.targets_MT_metrics
@@ -1808,4 +1865,3 @@ if __name__ == '__main__':
     # print(ac_etl.budget_ETL(campaign_ids=["126327624499318"]))
     print(ac_etl.reportV3_campaign_spCampaignsETL(**{}))
 
-    ####