huangyifan 1 год назад
Родитель
Сommit
8a1228764f
2 измененных файлов с 115 добавлено и 15 удалено
  1. 40 6
      sync_amz_data/DataTransform/Data_ETL.py
  2. 75 9
      sync_amz_data/public/amz_ad_client.py

+ 40 - 6
sync_amz_data/DataTransform/Data_ETL.py

@@ -6,6 +6,7 @@ pd.set_option('display.max_columns', None)
 import warnings
 warnings.filterwarnings('ignore')
 pd.set_option('expand_frame_repr', False)
+from datetime import datetime,timezone
 
 class Common_ETLMethod:
     def columnsName_modify(self,df):
@@ -34,15 +35,16 @@ class Common_ETLMethod:
 
     def expression_split(self,df,segment):
         df[segment] = df[segment].astype("string")
-        df[segment+str("_type")] = df[segment].str.extract("'type':.+'(.+)',")
-        df[segment+str("_value")] = df[segment].str.extract("'value':.+'(.+)'")
+        df[segment+str("_type")] = df[segment].str.extract(r"'type':\s{0,1}'(.+?)',")
+        df[segment+str("_value")] = df[segment].str.extract(r"'value':\s{0,1}[',[,{](.+)'")
+        df[segment+str("_value")] = df[segment+str("_value")].map(lambda x: x if pd.isna(x) or "," not in x else "["+x+"'}]")
         df.replace(['nan','Nan','NaN'],np.nan,inplace=True)
         df.drop(columns=[segment],inplace=True)
         return df
 
 class Acount_ETL(Account,Common_ETLMethod):
     def portfolio_ETL(self):
-        list_portfolio = self.get_portfolio()
+        list_portfolio = self.get_portfolios()
         df_portfolio = pd.json_normalize(list_portfolio)
         print(self.columnsName_modify(df_portfolio))
         return self.columnsName_modify(df_portfolio)
@@ -84,7 +86,7 @@ class SP_ETL(SPClient,Common_ETLMethod):
         list_budget = self.get_budget(campaign_ids = campaign_ids)['success']
         df_budget = pd.json_normalize(list_budget)
         df_budget = self.TZ_Deal(df_budget,["usageUpdatedTimestamp"])
-        print(df_budget)
+        return self.columnsName_modify(df_budget)
 
 class SB_ETL(SBClient,Common_ETLMethod):
     def campaigns_ETL(self):
@@ -127,6 +129,38 @@ class SB_ETL(SBClient,Common_ETLMethod):
         df_budget = self.TZ_Deal(df_budget,["usageUpdatedTimestamp"])
         print(df_budget)
 
+class SD_ETL(SDClient,Common_ETLMethod):
+    def campaigns_ETL(self):
+        list_campaign_SD = self.get_campaigns()
+        df_campaign = pd.json_normalize(list_campaign_SD)
+        df_campaign['startDate'] = df_campaign['startDate'].map(lambda x: datetime.strptime(x,"%Y%m%d").date())
+        df_campaign['portfolioId'] = df_campaign['portfolioId'].fillna(-1).astype("int64")
+        return self.columnsName_modify(df_campaign)
+
+    def adGroups_ETL(self,**param):
+        list_adGroups_SD = [row for _ in list(self.iter_adGroups(**param)) for row in _]
+        df_adGroups_SD = pd.json_normalize(list_adGroups_SD)
+        tactic = {"T00020":"Contextual targeting","T00030":"Audiences targeting"}
+        df_adGroups_SD["tactic_type"] = df_adGroups_SD['tactic'].map(tactic)
+        return self.columnsName_modify(df_adGroups_SD)
+
+    def ads_ETL(self):
+        list_ads_SD = [row for _ in list(self.iter_ads()) for row in _]
+        df_ads_SD = pd.json_normalize(list_ads_SD)
+        return self.columnsName_modify(df_ads_SD)
+
+    def targets_ETL(self,**param):
+        list_targets = [row for _ in list(self.iter_targets(**param)) for row in _]
+        df_targets = pd.json_normalize(list_targets)
+        df_targets = self.expression_split(df_targets, "resolvedExpression")
+        print(df_targets)
+
+    def budget_ETL(self,campaignsIds:list):
+        list_budget = self.get_budget(campaignIds=campaignsIds)['success']
+        df_budget = pd.json_normalize(list_budget)
+        df_budget = self.TZ_Deal(df_budget,["usageUpdatedTimestamp"])
+        return self.columnsName_modify(df_budget)
+
 if __name__ == '__main__':
     AWS_CREDENTIALS = {
         'lwa_client_id': 'amzn1.application-oa2-client.ebd701cd07854fb38c37ee49ec4ba109',
@@ -134,6 +168,6 @@ if __name__ == '__main__':
         'lwa_client_secret': 'cbf0514186db4df91e04a8905f0a91b605eae4201254ced879d8bb90df4b474d',
         'profile_id': "3006125408623189"
     }
-    ac_etl = SB_ETL(**AWS_CREDENTIALS)
+    ac_etl = SD_ETL(**AWS_CREDENTIALS)
     # print(ac_etl.budget_ETL(campaign_ids=["126327624499318"]))
-    print(ac_etl.keyword_ETL())
+    print(ac_etl.budget_ETL(["257424912382921"]))

+ 75 - 9
sync_amz_data/public/amz_ad_client.py

@@ -305,7 +305,7 @@ class SBClient(BaseClient):
         while True:
             info:list = self.get_keywords(**param)
             # print(info)
-            if len(info)==0:
+            if len(info) == 0:
                 break
             param["startIndex"] += 5000
             yield info
@@ -397,14 +397,80 @@ class SBClient(BaseClient):
         logger.info(f"解压完成:{de_file}")
         return de_file
 
-
-
-
 class SDClient(BaseClient):
     def get_campaigns(self, **params) -> List[dict]:
         url_path = "/sd/campaigns"
         return self._request(url_path, params=params)
 
+    def get_adGroups(self,**params):
+        url_path = '/sd/adGroups'
+        return self._request(url_path, params=params)
+    def iter_adGroups(self,**param):
+        if "startIndex" not in param:
+            param["startIndex"] = 0
+            param["count"] = 5000
+        while True:
+            info:list = self.get_adGroups(**param)
+            # print(info)
+            if len(info) == 0:
+                break
+            param["startIndex"] += 5000
+            yield info
+
+    def get_ads(self,**params):
+        url_path = '/sd/productAds'
+        return self._request(url_path, params=params)
+
+    def iter_ads(self,**param):
+        if "startIndex" not in param:
+            param["startIndex"] = 0
+            param["count"] = 5000
+        while True:
+            info:list = self.get_ads(**param)
+            # print(info)
+            if len(info) == 0:
+                break
+            param["startIndex"] += 5000
+            yield info
+
+    def get_targets(self,**params):
+        url_path = '/sd/targets'
+        return self._request(url_path, params=params)
+
+    def iter_targets(self,**param):
+        if "startIndex" not in param:
+            param["startIndex"] = 0
+            param["count"] = 5000
+        while True:
+            info:list = self.get_targets(**param)
+            # print(info)
+            if len(info) == 0:
+                break
+            param["startIndex"] += 5000
+            yield info
+    def get_budget(self, campaignIds: list):
+        url_path = "/sd/campaigns/budget/usage"
+        body = {"campaignIds": campaignIds}
+        return self._request(url_path, method="POST", body=body)
+
+    def get_target_bidrecommendation(self,tactic:str,products:list,typeFilter:list,themes:dict,locale:str='en_US'):#
+        url_path = '/sd/targets/recommendations'
+        headers ={
+            'Content-Type':"application/vnd.sdtargetingrecommendations.v3.3+json",
+            'Accept':"application/vnd.sdtargetingrecommendations.v3.3+json"
+        }
+        # "tactic":"T00020",
+        # "products":[{"asin":"B00MP57IOY"}],
+        # "typeFilter":["PRODUCT"],
+        # "themes":{"product":[{"name":"TEST","expression":[{"type":"asinBrandSameAs"}]}]}
+        body = {
+            "tactic":tactic,
+            "products":products,
+            "typeFilter":typeFilter,
+            "themes":themes
+                 }
+
+        return self._request(url_path, method="POST", headers=headers,body=body,params={"locale":locale})
 
 class Account(BaseClient):
     def get_portfolios(self):
@@ -415,6 +481,7 @@ class Account(BaseClient):
         yield from self.get_portfolios()
 
 
+
 AccountClient = Account
 
 if __name__ == '__main__':
@@ -429,14 +496,13 @@ if __name__ == '__main__':
     # adGroupId="119753215871672",
     # keyword=["8mp security camera system","8mp security camera system"],
     # matchType=["broad","exact"]))
-    sb = SBClient(**AWS_CREDENTIALS)
-    # print(list(sb.iter_targets()))
+
+    sd = SDClient(**AWS_CREDENTIALS)
+
     # print(sb.get_keyword_bidrecommendation(**{'campaignId': 27333596383941, 'keywords': [
     #     {"matchType": 'broad', "keywordText": "4k security camera system"}]}))
-    a = list(sb.iter_keywords())
+    a = list(sd.iter_targets(**{"campaignIdFilter":"257424912382921"})) #list(sd.iter_targets())#
     print(a,len(a))
-    # sd = SDClient(**AWS_CREDENTIALS)
-    # print(sd.get_campaigns(startIndex=10, count=10))
 
     # sb = SBClient(**AWS_CREDENTIALS)
     # metrics = [