Data_ETL.py 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. from sync_amz_data.public.amz_ad_client_02 import SPClient,Account,SBClient,SDClient
  2. import pandas as pd
  3. import numpy as np
  4. from dateutil.parser import parse
  5. pd.set_option('display.max_columns', None)
  6. import warnings
  7. warnings.filterwarnings('ignore')
  8. pd.set_option('expand_frame_repr', False)
  9. class Common_ETLMethod:
  10. def columnsName_modify(self,df):
  11. df.columns = [i.replace(".","_").lower() for i in df.columns]
  12. return df
  13. def time_stamp_convert(self,df,time_columns:list):
  14. for time_column in time_columns:
  15. df[time_column] = pd.to_datetime(df[time_column]*1000000).map(lambda x: x.strftime("%Y-%m-%d %H:%M:%S"))
  16. df[time_columns] = df[time_columns].astype("datetime64")
  17. return df
  18. def TZ_Deal(self,df, time_columns):
  19. for time_column in time_columns:
  20. df[time_column] = df[time_column].map(lambda x: parse(x).strftime("%Y-%m-%d %H:%M:%S"))
  21. df[time_columns] = df[time_columns].astype("datetime64")
  22. return df
  23. def placement_segmentsplit(self,df,segment):
  24. df[segment] = df[segment].astype("string")
  25. df[segment+str("_percentage")] = df[segment].str.extract("'percentage':.+([\d\.]{1,}),").astype('float32')
  26. df[segment+str("_placement")] = df[segment].str.extract("'placement':.+'(.+)'")
  27. df.replace(['nan','Nan','NaN'],np.nan,inplace=True)
  28. df.drop(columns=[segment],inplace=True)
  29. return df
  30. def expression_split(self,df,segment):
  31. df[segment] = df[segment].astype("string")
  32. df[segment+str("type")] = df[segment].str.extract("'type':.+'(.+)',")
  33. df[segment+str("value")] = df[segment].str.extract("'value':.+'(.+)'")
  34. df.replace(['nan','Nan','NaN'],np.nan,inplace=True)
  35. df.drop(columns=[segment],inplace=True)
  36. return df
  37. class Acount_ETL(Account,Common_ETLMethod):
  38. def portfolio_ETL(self):
  39. list_portfolio = self.get_portfolio()
  40. df_portfolio = pd.json_normalize(list_portfolio)
  41. print(self.columnsName_modify(df_portfolio))
  42. return self.columnsName_modify(df_portfolio)
  43. class SP_ETL(SPClient,Common_ETLMethod):
  44. def campaigns_ETL(self):
  45. list_campaign_SP = list(self.iter_campaigns(**{"includeExtendedDataFields":True}))
  46. df_campaign = pd.json_normalize(list_campaign_SP)
  47. df_campaign = self.placement_segmentsplit(df_campaign, "dynamicBidding.placementBidding")
  48. df_campaign = self.TZ_Deal(df_campaign,["extendedData.creationDateTime","extendedData.lastUpdateDateTime"])
  49. # print(df_campaign)
  50. return self.columnsName_modify(df_campaign)
  51. def adGroup_ETL(self):
  52. list_adGroup_SP = list(self.iter_adGroups(**{"includeExtendedDataFields":True}))
  53. df_adGroup_SP = pd.json_normalize(list_adGroup_SP)
  54. df_adGroup_SP = self.TZ_Deal(df_adGroup_SP,["extendedData.creationDateTime","extendedData.lastUpdateDateTime"])
  55. return self.columnsName_modify(df_adGroup_SP)
  56. def ads_ETL(self):
  57. list_adId_SP = list(self.iter_ads(**{"includeExtendedDataFields":True}))
  58. df_adId_SP = pd.json_normalize(list_adId_SP)
  59. df_adId_SP = self.TZ_Deal(df_adId_SP,["extendedData.creationDateTime", "extendedData.lastUpdateDateTime"])
  60. return self.columnsName_modify(df_adId_SP)
  61. def keywords_ETL(self):
  62. list_keywords_SP = list(self.iter_keywords(**{"includeExtendedDataFields":True}))
  63. df_keywords_SP = pd.json_normalize(list_keywords_SP)
  64. df_keywords_SP = self.TZ_Deal(df_keywords_SP, ["extendedData.creationDateTime", "extendedData.lastUpdateDateTime"])
  65. return self.columnsName_modify(df_keywords_SP)
  66. def targets_ETL(self):
  67. list_targets = list(self.iter_targets())
  68. df_targets = pd.json_normalize(list_targets)
  69. df_targets = self.TZ_Deal(df_targets, ["extendedData.creationDateTime", "extendedData.lastUpdateDateTime"])
  70. return self.columnsName_modify(df_targets)
  71. def budget_ETL(self,campaign_ids:list):
  72. list_budget = self.get_budget(campaign_ids = campaign_ids)['success']
  73. df_budget = pd.json_normalize(list_budget)
  74. df_budget = self.TZ_Deal(df_budget,["usageUpdatedTimestamp"])
  75. print(df_budget)
  76. class SB_ETL(SBClient,Common_ETLMethod):
  77. def campaigns_ETL(self):
  78. list_campaign_SB = list(self.iter_campaigns(**{"includeExtendedDataFields":True}))
  79. df_campaign = pd.json_normalize(list_campaign_SB)
  80. df_campaign = self.placement_segmentsplit(df_campaign, "bidding.bidAdjustmentsByPlacement")
  81. df_campaign = self.time_stamp_convert(df_campaign,["extendedData.creationDate","extendedData.lastUpdateDate"])
  82. # print(df_campaign)
  83. return self.columnsName_modify(df_campaign)
  84. def adGroup_ETL(self):
  85. list_adGroup_SB = list(self.iter_adGroups(**{"includeExtendedDataFields":True}))
  86. df_adGroup_SP = pd.json_normalize(list_adGroup_SB)
  87. df_adGroup_SP = self.time_stamp_convert(df_adGroup_SP,["extendedData.creationDate","extendedData.lastUpdateDate"])
  88. return self.columnsName_modify(df_adGroup_SP)
  89. def ads_ETL(self):
  90. list_adId_SB = list(self.iter_ads(**{"includeExtendedDataFields":True}))
  91. df_adId_SP = pd.json_normalize(list_adId_SB)
  92. df_adId_SP = self.time_stamp_convert(df_adId_SP,["extendedData.creationDate","extendedData.lastUpdateDate"])
  93. return self.columnsName_modify(df_adId_SP)
  94. def keyword_ETL(self):
  95. list_keywords_SB = [row for _ in list(self.iter_keywords()) for row in _]
  96. df_keywords_SP = pd.json_normalize(list_keywords_SB)
  97. return self.columnsName_modify(df_keywords_SP)
  98. def targets_ETL(self):
  99. list_targets = list(self.iter_targets())
  100. df_targets = pd.json_normalize(list_targets)
  101. # df_targets = self.TZ_Deal(df_targets, ["extendedData.creationDateTime", "extendedData.lastUpdateDateTime"])
  102. df_targets = self.expression_split(df_targets,"resolvedExpressions")
  103. return self.columnsName_modify(df_targets)
  104. def budget_ETL(self,campaign_ids:list):
  105. list_budget = self.get_budget(campaignIds = campaign_ids)['success']
  106. df_budget = pd.json_normalize(list_budget)
  107. df_budget = self.TZ_Deal(df_budget,["usageUpdatedTimestamp"])
  108. print(df_budget)
  109. if __name__ == '__main__':
  110. AWS_CREDENTIALS = {
  111. 'lwa_client_id': 'amzn1.application-oa2-client.ebd701cd07854fb38c37ee49ec4ba109',
  112. 'refresh_token': "Atzr|IwEBIL4ur8kbcwRyxVu_srprAAoTYzujnBvA6jU-0SMxkRgOhGjYJSUNGKvw24EQwJa1jG5RM76mQD2P22AKSq8qSD94LddoXGdKDO74eQVYl0RhuqOMFqdrEZpp1p4bIR6_N8VeSJDHr7UCuo8FiabkSHrkq7tsNvRP-yI-bnpQv4EayPBh7YwHVX3hYdRbhxaBvgJENgCuiEPb35Q2-Z6w6ujjiKUAK2VSbCFpENlEfcHNsjDeY7RCvFlwlCoHj1IeiNIaFTE9yXFu3aEWlExe3LzHv6PZyunEi88QJSXKSh56Um0e0eEg05rMv-VBM83cAqc5POmZnTP1vUdZO8fQv3NFLZ-xU6e1WQVxVPi5Cyqk4jYhGf1Y9t98N654y0tVvw74qNIsTrB-8bGS0Uhfe24oBEWmzObvBY3zhtT1d42myGUJv4pMTU6yPoS83zhPKm3LbUDEpBA1hvvc_09jHk7vUEAuFB-UAZzlht2C1yklzQ",
  113. 'lwa_client_secret': 'cbf0514186db4df91e04a8905f0a91b605eae4201254ced879d8bb90df4b474d',
  114. 'profile_id': "3006125408623189"
  115. }
  116. ac_etl = SB_ETL(**AWS_CREDENTIALS)
  117. # print(ac_etl.budget_ETL(campaign_ids=["126327624499318"]))
  118. print(ac_etl.get_budget(["144123082741012379"]))