Browse Source

add_SP_get infos

huangyifan 1 year ago
parent
commit
b30390e013
2 changed files with 87 additions and 23 deletions
  1. 59 17
      sync_amz_data/DataTransform/Data_ETL.py
  2. 28 6
      sync_amz_data/public/amz_ad_client.py

+ 59 - 17
sync_amz_data/DataTransform/Data_ETL.py

@@ -1,27 +1,20 @@
-from sync_amz_data.public.amz_ad_client import SPClient,Account
+from sync_amz_data.public.amz_ad_client import SPClient,Account,SBClient,SDClient
 import pandas as pd
 import numpy as np
 from dateutil.parser import parse
+pd.set_option('display.max_columns', None)
+import warnings
+warnings.filterwarnings('ignore')
+pd.set_option('expand_frame_repr', False)
 
-class Acount_ETL(Account):
-    def columnsName_modify(self,df):
-        df.columns = [i.replace(".","_") for i in df.columns]
-        return df
-
-    def portfolio_ETL(self):
-        list_portfolio = self.get_portfolio()
-        df_portfolio = pd.json_normalize(list_portfolio)
-        print(self.columnsName_modify(df_portfolio))
-        return self.columnsName_modify(df_portfolio)
-
-class SP_ETL(SPClient):
+class Common_ETLMethod:
     def columnsName_modify(self,df):
         df.columns = [i.replace(".","_") for i in df.columns]
         return df
 
     def time_stamp_convert(self,df,time_columns:list):
         for time_column in time_columns:
-            df[time_column] = pd.to_datetime(df[time_column])
+            df[time_column] = pd.to_datetime(df[time_column]*1000000).map(lambda x: x.strftime("%Y-%m-%d %H:%M:%S"))
         df[time_columns] = df[time_columns].astype("datetime64")
         return df
 
@@ -39,6 +32,22 @@ class SP_ETL(SPClient):
         df.drop(columns=[segment],inplace=True)
         return df
 
+    def expression_split(self,df,segment):
+        df[segment] = df[segment].astype("string")
+        df[segment+str("type")] = df[segment].str.extract("'type':.+'(.+)',")
+        df[segment+str("value")] = df[segment].str.extract("'value':.+'(.+)'")
+        df.replace(['nan','Nan','NaN'],np.nan,inplace=True)
+        df.drop(columns=[segment],inplace=True)
+        return df
+
+class Acount_ETL(Account,Common_ETLMethod):
+    def portfolio_ETL(self):
+        list_portfolio = self.get_portfolio()
+        df_portfolio = pd.json_normalize(list_portfolio)
+        print(self.columnsName_modify(df_portfolio))
+        return self.columnsName_modify(df_portfolio)
+
+class SP_ETL(SPClient,Common_ETLMethod):
     def campaigns_ETL(self):
         list_campaign_SP = list(self.iter_campaigns(**{"includeExtendedDataFields":True}))
         df_campaign = pd.json_normalize(list_campaign_SP)
@@ -77,6 +86,39 @@ class SP_ETL(SPClient):
         df_budget = self.TZ_Deal(df_budget,["usageUpdatedTimestamp"])
         print(df_budget)
 
+class SB_ETL(SBClient,Common_ETLMethod):
+    def campaigns_ETL(self):
+        list_campaign_SB = list(self.iter_campaigns(**{"includeExtendedDataFields":True}))
+        df_campaign = pd.json_normalize(list_campaign_SB)
+        df_campaign = self.placement_segmentsplit(df_campaign, "bidding.bidAdjustmentsByPlacement")
+        df_campaign = self.time_stamp_convert(df_campaign,["extendedData.creationDate","extendedData.lastUpdateDate"])
+        # print(df_campaign)
+        return self.columnsName_modify(df_campaign)
+
+    def adGroup_ETL(self):
+        list_adGroup_SB = list(self.iter_adGroups(**{"includeExtendedDataFields":True}))
+        df_adGroup_SP = pd.json_normalize(list_adGroup_SB)
+        df_adGroup_SP = self.time_stamp_convert(df_adGroup_SP,["extendedData.creationDate","extendedData.lastUpdateDate"])
+        return self.columnsName_modify(df_adGroup_SP)
+
+    def ads_ETL(self):
+        list_adId_SB = list(self.iter_ads(**{"includeExtendedDataFields":True}))
+        df_adId_SP = pd.json_normalize(list_adId_SB)
+        df_adId_SP = self.time_stamp_convert(df_adId_SP,["extendedData.creationDate","extendedData.lastUpdateDate"])
+        return self.columnsName_modify(df_adId_SP)
+
+    def keyword_ETL(self):
+        list_keywords_SB = [row for _ in list(self.iter_keywords()) for row in _]
+        df_keywords_SP = pd.json_normalize(list_keywords_SB)
+        return self.columnsName_modify(df_keywords_SP)
+
+    def targets_ETL(self):
+        list_targets = list(self.iter_targets())
+        df_targets = pd.json_normalize(list_targets)
+        # df_targets = self.TZ_Deal(df_targets, ["extendedData.creationDateTime", "extendedData.lastUpdateDateTime"])
+        df_targets = self.expression_split(df_targets,"expressions")
+        return self.columnsName_modify(df_targets)
+
 if __name__ == '__main__':
     AWS_CREDENTIALS = {
         'lwa_client_id': 'amzn1.application-oa2-client.ebd701cd07854fb38c37ee49ec4ba109',
@@ -84,6 +126,6 @@ if __name__ == '__main__':
         'lwa_client_secret': 'cbf0514186db4df91e04a8905f0a91b605eae4201254ced879d8bb90df4b474d',
         'profile_id': "3006125408623189"
     }
-    ac_etl = SP_ETL(**AWS_CREDENTIALS)
-    print(ac_etl.budget_ETL(campaign_ids=["126327624499318"]))
-
+    ac_etl = SB_ETL(**AWS_CREDENTIALS)
+    # print(ac_etl.budget_ETL(campaign_ids=["126327624499318"]))
+    print(ac_etl.targets_ETL())

+ 28 - 6
sync_amz_data/public/amz_ad_client.py

@@ -239,7 +239,7 @@ class SBClient(BaseClient):
             body["maxResults"] = 100
         while True:
             info: dict = self.get_ad_groups(**body)
-            print(info)
+            # print(info)
             yield from info["adGroups"]
             if not info.get("nextToken"):
                 break
@@ -257,14 +257,25 @@ class SBClient(BaseClient):
             body["maxResults"] = 100
         while True:
             info: dict = self.get_ads(**body)
-            print(info)
+            # print(info)
             yield from info["ads"]
             if not info.get("nextToken"):
                 break
             body["nextToken"] = info["nextToken"]
-    def get_keywords(self):
+    def get_keywords(self,**param):
         url_path = "/sb/keywords"
-        return self._request(url_path, method="GET")
+        return self._request(url_path, method="GET",params=param)
+    def iter_keywords(self,**param):
+        if "startIndex" not in param:
+            param["startIndex"] = 0
+            param["count"] = 5000
+        while True:
+            info:list = self.get_keywords(**param)
+            # print(info)
+            if len(info)==0:
+                break
+            param["startIndex"] += 5000
+            yield info
 
     def get_targets(self,**body):
         url_path = "/sb/targets/list"
@@ -375,8 +386,19 @@ if __name__ == '__main__':
     # print(sp.get_keyword_bidrecommendation(adGroupId="119753215871672",keyword=["8mp security camera system","8mp security camera system"],matchType=["broad","exact"]))
     sb = SBClient(**AWS_CREDENTIALS)
     # print(list(sb.iter_targets()))
-    print(sb.get_keyword_bidrecommendation(**{'campaignId': 27333596383941,'keywords':[{"matchType":'broad',"keywordText":"4k security camera system"}]}))
-    print(sb.get_budget([27333596383941]))
+    # print(sb.get_keyword_bidrecommendation(**{'campaignId': 27333596383941,'keywords':[{"matchType":'broad',"keywordText":"4k security camera system"}]}))
+    import pandas as pd
+
+    pd.set_option('display.max_columns', None)
+    import warnings
+
+    warnings.filterwarnings('ignore')
+    pd.set_option('expand_frame_repr', False)
+    # a = sb.iter_keywords()
+    a = [row for _ in list(sb.iter_keywords()) for row in _]
+    print(len(a))
+
+    # print(pd.json_normalize(a))
     # sd = SDClient(**AWS_CREDENTIALS)
     # print(sd.get_campaigns(startIndex=10, count=10))