Quellcode durchsuchen

Merge branch 'yifan' of ASJ_ADS/sync_amz_data into master

添加函数注释
yifan_huang96 vor 1 Jahr
Ursprung
Commit
5904a9fc64
1 geänderte Dateien mit 18 neuen und 3 gelöschten Zeilen
  1. 18 3
      sync_amz_data/DataTransform/Data_ETL.py

+ 18 - 3
sync_amz_data/DataTransform/Data_ETL.py

@@ -10,22 +10,34 @@ from datetime import datetime,timezone
 
 class Common_ETLMethod:
     def columnsName_modify(self,df):
+        """
+        列名.换_,设置全部小写
+        """
         df.columns = [i.replace(".","_").lower() for i in df.columns]
         return df
 
     def time_stamp_convert(self,df,time_columns:list):
+        """
+        时间戳转换为utc
+        """
         for time_column in time_columns:
             df[time_column] = pd.to_datetime(df[time_column]*1000000).map(lambda x: x.strftime("%Y-%m-%d %H:%M:%S"))
         df[time_columns] = df[time_columns].astype("datetime64")
         return df
 
     def TZ_Deal(self,df, time_columns):
+        """
+        TZ时间格式转换为utc
+        """
         for time_column in time_columns:
             df[time_column] = df[time_column].map(lambda x: parse(x).strftime("%Y-%m-%d %H:%M:%S"))
         df[time_columns] = df[time_columns].astype("datetime64")
         return df
 
     def placement_segmentsplit(self,df,segment):
+        """
+        拆分placement与percentage列
+        """
         df[segment] = df[segment].astype("string")
         df[segment+str("_percentage")] = df[segment].str.extract("'percentage':.+([\d\.]{1,}),").astype('float32')
         df[segment+str("_placement")] = df[segment].str.extract("'placement':.+'(.+)'")
@@ -34,6 +46,9 @@ class Common_ETLMethod:
         return df
 
     def expression_split(self,df,segment):
+        """
+        拆分type,value列
+        """
         df[segment] = df[segment].astype("string")
         df[segment+str("_type")] = df[segment].str.extract(r"'type':\s{0,1}'(.+?)',")
         df[segment+str("_value")] = df[segment].str.extract(r"'value':\s{0,1}[',[,{](.+)'")
@@ -133,15 +148,15 @@ class SD_ETL(SDClient,Common_ETLMethod):
     def campaigns_ETL(self):
         list_campaign_SD = self.get_campaigns()
         df_campaign = pd.json_normalize(list_campaign_SD)
-        df_campaign['startDate'] = df_campaign['startDate'].map(lambda x: datetime.strptime(x,"%Y%m%d").date())
-        df_campaign['portfolioId'] = df_campaign['portfolioId'].fillna(-1).astype("int64")
+        df_campaign['startDate'] = df_campaign['startDate'].map(lambda x: datetime.strptime(x,"%Y%m%d").date()) # 转换为标准时间格式
+        df_campaign['portfolioId'] = df_campaign['portfolioId'].fillna(-1).astype("int64") # 将portfolio列为空的填充为-1
         return self.columnsName_modify(df_campaign)
 
     def adGroups_ETL(self,**param):
         list_adGroups_SD = [row for _ in list(self.iter_adGroups(**param)) for row in _]
         df_adGroups_SD = pd.json_normalize(list_adGroups_SD)
         tactic = {"T00020":"Contextual targeting","T00030":"Audiences targeting"}
-        df_adGroups_SD["tactic_type"] = df_adGroups_SD['tactic'].map(tactic)
+        df_adGroups_SD["tactic_type"] = df_adGroups_SD['tactic'].map(tactic) # T00020、T00030解释字段
         return self.columnsName_modify(df_adGroups_SD)
 
     def ads_ETL(self):