|
@@ -10,22 +10,34 @@ from datetime import datetime,timezone
|
|
|
|
|
|
class Common_ETLMethod:
|
|
|
def columnsName_modify(self,df):
|
|
|
+ """
|
|
|
+ 列名.换_,设置全部小写
|
|
|
+ """
|
|
|
df.columns = [i.replace(".","_").lower() for i in df.columns]
|
|
|
return df
|
|
|
|
|
|
def time_stamp_convert(self,df,time_columns:list):
|
|
|
+ """
|
|
|
+ 时间戳转换为utc
|
|
|
+ """
|
|
|
for time_column in time_columns:
|
|
|
df[time_column] = pd.to_datetime(df[time_column]*1000000).map(lambda x: x.strftime("%Y-%m-%d %H:%M:%S"))
|
|
|
df[time_columns] = df[time_columns].astype("datetime64")
|
|
|
return df
|
|
|
|
|
|
def TZ_Deal(self,df, time_columns):
|
|
|
+ """
|
|
|
+ TZ时间格式转换为utc
|
|
|
+ """
|
|
|
for time_column in time_columns:
|
|
|
df[time_column] = df[time_column].map(lambda x: parse(x).strftime("%Y-%m-%d %H:%M:%S"))
|
|
|
df[time_columns] = df[time_columns].astype("datetime64")
|
|
|
return df
|
|
|
|
|
|
def placement_segmentsplit(self,df,segment):
|
|
|
+ """
|
|
|
+ 拆分placement与percentage列
|
|
|
+ """
|
|
|
df[segment] = df[segment].astype("string")
|
|
|
df[segment+str("_percentage")] = df[segment].str.extract("'percentage':.+([\d\.]{1,}),").astype('float32')
|
|
|
df[segment+str("_placement")] = df[segment].str.extract("'placement':.+'(.+)'")
|
|
@@ -34,6 +46,9 @@ class Common_ETLMethod:
|
|
|
return df
|
|
|
|
|
|
def expression_split(self,df,segment):
|
|
|
+ """
|
|
|
+ 拆分type,value列
|
|
|
+ """
|
|
|
df[segment] = df[segment].astype("string")
|
|
|
df[segment+str("_type")] = df[segment].str.extract(r"'type':\s{0,1}'(.+?)',")
|
|
|
df[segment+str("_value")] = df[segment].str.extract(r"'value':\s{0,1}[',[,{](.+)'")
|
|
@@ -133,15 +148,15 @@ class SD_ETL(SDClient,Common_ETLMethod):
|
|
|
def campaigns_ETL(self):
|
|
|
list_campaign_SD = self.get_campaigns()
|
|
|
df_campaign = pd.json_normalize(list_campaign_SD)
|
|
|
- df_campaign['startDate'] = df_campaign['startDate'].map(lambda x: datetime.strptime(x,"%Y%m%d").date())
|
|
|
- df_campaign['portfolioId'] = df_campaign['portfolioId'].fillna(-1).astype("int64")
|
|
|
+ df_campaign['startDate'] = df_campaign['startDate'].map(lambda x: datetime.strptime(x,"%Y%m%d").date()) # 转换为标准时间格式
|
|
|
+ df_campaign['portfolioId'] = df_campaign['portfolioId'].fillna(-1).astype("int64") # 将portfolio列为空的填充为-1
|
|
|
return self.columnsName_modify(df_campaign)
|
|
|
|
|
|
def adGroups_ETL(self,**param):
|
|
|
list_adGroups_SD = [row for _ in list(self.iter_adGroups(**param)) for row in _]
|
|
|
df_adGroups_SD = pd.json_normalize(list_adGroups_SD)
|
|
|
tactic = {"T00020":"Contextual targeting","T00030":"Audiences targeting"}
|
|
|
- df_adGroups_SD["tactic_type"] = df_adGroups_SD['tactic'].map(tactic)
|
|
|
+ df_adGroups_SD["tactic_type"] = df_adGroups_SD['tactic'].map(tactic) # T00020、T00030解释字段
|
|
|
return self.columnsName_modify(df_adGroups_SD)
|
|
|
|
|
|
def ads_ETL(self):
|