瀏覽代碼

add files/modify files

huangyifan 1 年之前
父節點
當前提交
692c3785ab

+ 7 - 2
start_sync_amz_RightNowRun.py

@@ -2,6 +2,8 @@ from sync_amz_data.tasks.account import AccountTask
 from sync_amz_data.settings import LOG_CONF
 import logging.config
 logging.config.dictConfig(LOG_CONF)
+import time
+
 from sync_amz_data.DataTransform import Data_ETL
 from sync_amz_data.DataTransform.Data_ETL import Common_ETLMethod,SP_ETL,SB_ETL,SD_ETL
 
@@ -24,7 +26,9 @@ def amz_report(AWS_CREDENTIALS,para=None):
     sb_report.reportV2_searchtermsVideo_ETL(conn,params=para)
     sb_report.reportV2_targetsRecord_ETL(conn,params=para)
     sb_report.reportV2_targetsVideo_ETL(conn,params=para)
+    time.sleep(3)
     conn.close()
+    time.sleep(3)
 
     conn = SP_ETL(**AWS_CREDENTIALS).clickhouse_connect()
     sp_report = SP_ETL(**AWS_CREDENTIALS)
@@ -35,8 +39,9 @@ def amz_report(AWS_CREDENTIALS,para=None):
     sp_report.reportV3_searchTerm_spSearchTermETL(conn,params=para)
     sp_report.reportV3_advertiser_spAdvertisedProductETL(conn,params=para)
     sp_report.reportV3_asin_spPurchasedProductETL(conn,params=para)
+    time.sleep(3)
     conn.close()
-
+    time.sleep(3)
     conn = SD_ETL(**AWS_CREDENTIALS).clickhouse_connect()
     sd_report = SD_ETL(**AWS_CREDENTIALS)
 
@@ -81,7 +86,7 @@ if __name__ == '__main__':
     amz_report(AWS_CREDENTIALS=AWS_CREDENTIALS)
 
 
-    # list_date = ['2023-11-12','2023-11-13']
+    # list_date = ['2023-11-02',]
     # # # list_date = [f'2023-10-{i}' for i in range(24,23,-1)]
     # for date_ in list_date:
     #     print(date_)

+ 20 - 2
sync_amz_data/DataTransform/Data_ETL.py

@@ -117,8 +117,26 @@ class Common_ETLMethod(BaseClient):
         # df_report[df_report.select_dtypes('O').columns] = df_report[df_report.select_dtypes('O').columns].astype('string')
         toFloat = [i for i in columns if 'sales' in i.lower() or 'percent' in i.lower() or 'video' in i.lower()]
         if len(toFloat) > 0:
-            df_report[toFloat] = df_report[toFloat].applymap(lambda x: np.nan if pd.isna(x) or x == '' else float(x))
-
+            df_report[toFloat] = df_report[toFloat].applymap(lambda x: 0.0 if pd.isna(x) or x == '' else float(x))
+
+        df_report[df_report.select_dtypes(["int"]).columns] = df_report[df_report.select_dtypes(["int"]).columns].fillna(0)
+        df_report[df_report.select_dtypes(["float"]).columns] = df_report[df_report.select_dtypes(["float"]).columns].fillna(0.0)
+        if "campaignRuleBasedBudget" in df_report.columns:
+            df_report["campaignRuleBasedBudget"] = df_report["campaignRuleBasedBudget"].fillna(0.0)
+        if "campaignRuleBasedBudgetAmount" in df_report.columns:
+            df_report["campaignRuleBasedBudgetAmount"] = df_report["campaignRuleBasedBudgetAmount"].fillna(0.0)
+        if "eCPAddToCart" in df_report.columns:
+            df_report["eCPAddToCart"] = df_report["eCPAddToCart"].fillna(0.0)
+        if "eCPBrandSearch" in df_report.columns:
+            df_report["eCPBrandSearch"] =df_report["eCPBrandSearch"].fillna(0.0)
+        if "viewClickThroughRate" in df_report.columns:
+            df_report["viewClickThroughRate"] = df_report["viewClickThroughRate"].fillna(0.0)
+        if "searchTermImpressionRank" in df_report.columns:
+            df_report["searchTermImpressionRank"] = df_report["searchTermImpressionRank"].fillna(0)
+        if "searchTermImpressionShare" in df_report.columns:
+            df_report["searchTermImpressionShare"] = df_report["searchTermImpressionShare"].fillna(0.0)
+        if "impressionsFrequencyAverage" in df_report.columns:
+            df_report["impressionsFrequencyAverage"] = df_report["impressionsFrequencyAverage"].fillna(0.0)
         df_report.fillna(np.nan, inplace=True)
         # print(df_report[columns].info())
         # df_report.to_excel("df.xlsx")

File diff suppressed because it is too large
+ 145 - 27
sync_amz_data/public/amz_ad_client.py


+ 135 - 0
sync_amz_data/public/sp_api_client.py

@@ -0,0 +1,135 @@
+import clickhouse_connect
+import time
+from sp_api.util import throttle_retry, load_all_pages
+from sp_api.api import Orders,ListingsItems,Inventories,Reports
+from sp_api.base import Marketplaces,ReportType,ProcessingStatus
+import pandas as pd
+import gzip
+from io import BytesIO,StringIO
+from datetime import datetime, timedelta,timezone
+import pytz
+import time
+from sync_amz_data.public.amz_ad_client import shop_infos
+
+
+
+
+class SpApiRequest:
+    def __init__(self, credentials, marketplace,profile_id):
+        self.credentials = credentials
+        self.marketplace = marketplace
+        self.shopInfo = shop_infos(profile_id)
+        self.timezone = self.shopInfo['time_zone']
+
+    def timeToLocalTime(self,utctime):
+        report_localTime = datetime.now(tz=pytz.timezone(self.timezone)) + timedelta(days=-1)
+        startTimeutc = report_localTime
+        #todo
+
+
+    def nowTime(self):
+        computerTimenow = datetime.now()
+        localTimeNow = datetime.now(tz=pytz.timezone(self.timezone))
+        utctimeNow = datetime.utcnow()
+        return  {"computerTimenow":computerTimenow,"localTimeNow":localTimeNow,"utctimeNow":utctimeNow}
+
+    def create_report(self,**kwargs):
+        reportType = kwargs['reportType']
+        reportOptions =kwargs.get("reportOptions")
+
+        dataStartTime = datetime.now(tz=pytz.timezone(self.timezone)).strftime("%Y-%m-%dT%H:%M:%S") if kwargs.get("dataStartTime") is None else kwargs.get("dataStartTime")+"T00:00:00"
+        dataEndTime = datetime.now(tz=pytz.timezone(self.timezone)).strftime("%Y-%m-%dT%H:%M:%S") if kwargs.get("dataEndTime") is None else kwargs.get("dataEndTime")+"T23:59:59"
+
+        report = Reports(credentials=self.credentials, marketplace=self.marketplace)
+        rel = report.create_report(
+                        reportType=reportType,marketplaceIds=[self.marketplace.marketplace_id],reportOptions=reportOptions,
+                        dataStartTime=dataStartTime,dataEndTime=dataEndTime
+                        )
+        reportId = rel.payload.get("reportId")
+        print(reportId)
+        return reportId
+
+    def decompression(self,reportId):
+        report = Reports(credentials=self.credentials, marketplace=self.marketplace)
+        while True:
+            time.sleep(15)
+            reportId_info = report.get_report(reportId=reportId)
+            print(reportId_info)
+            if reportId_info.payload.get("processingStatus")==ProcessingStatus.DONE:
+                reportDocumentId = reportId_info.payload.get("reportDocumentId")
+                rp_table = report.get_report_document(reportDocumentId=reportDocumentId,download=False)
+                print(rp_table)
+                if rp_table.payload.get('compressionAlgorithm') is not None:
+                    df = pd.read_table(filepath_or_buffer=rp_table.payload['url'],compression={"method":'gzip'},encoding='iso-8859-1')
+                    return df
+                else:
+                    df = pd.read_table(rp_table.payload.get("url"),encoding='iso-8859-1')
+                    return df
+            else:
+                if reportId_info.payload.get("processingStatus") in [ProcessingStatus.CANCELLED,ProcessingStatus.FATAL]:
+                    print("取消或失败")
+                    break
+
+            print("please wait...")
+
+    def GET_MERCHANT_LISTINGS_ALL_DATA(self,limit=None):
+        para = {"reportType":ReportType.GET_MERCHANT_LISTINGS_ALL_DATA}
+        reportid = self.create_report(**para)
+        decom_df = self.decompression(reportid)
+        if limit != None:
+            decom_df = decom_df.iloc[:limit,:]
+        decom_df['mainImageUrl'] = decom_df['seller-sku'].map(lambda x: self.get_mainImage_url(x))
+        print(decom_df)
+        return decom_df
+
+
+
+
+
+    def GET_FLAT_FILE_ALL_ORDERS_DATA_BY_ORDER_DATE_GENERAL(self):
+        para = {"reportType":ReportType.GET_FLAT_FILE_ALL_ORDERS_DATA_BY_ORDER_DATE_GENERAL,"dataStartTime":"2023-11-10","dataEndTime":"2023-11-10","reportOptions":{"ShowSalesChannel":"true"}}
+        reportid = self.create_report(**para) #{"ShowSalesChannel":"true"}
+        decom_df = self.decompression(reportid)
+        print(decom_df)
+        print(decom_df.columns)
+
+    def get_mainImage_url(self, sku):
+        listingClient = ListingsItems(credentials=self.credentials, marketplace=self.marketplace)
+        try:
+            r1 = listingClient.get_listings_item(sellerId=self.shopInfo['advertiser_id'], sku=sku)
+            img = r1.payload.get("summaries")[0].get("mainImage")
+            img_url = None if img is None else img.get("link")
+        except:
+            time.sleep(3)
+            r1 = listingClient.get_listings_item(sellerId=self.shopInfo['advertiser_id'], sku=sku)
+            img = r1.payload.get("summaries")[0].get("mainImage")
+            img_url = None if img is None else img.get("link")
+        return img_url
+
+    @throttle_retry()
+    @load_all_pages()
+    def load_all_orders(**kwargs):
+        """
+        a generator function to return all pages, obtained by NextToken
+        """
+        return Orders(credentials=aws_credentials, marketplace=Marketplaces.US).get_orders(**kwargs)
+
+    @throttle_retry()
+    @load_all_pages()
+    def load_order_items(**kwargs):
+        return Orders(credentials=aws_credentials).get_order_items(**kwargs)
+
+
+if __name__ == '__main__':
+    aws_credentials = {
+        'refresh_token': 'Atzr|IwEBIMxC7d17ZYBTcNe-zfnbk-TEC-40uIlSRmGAH_sfNozcA7RdSt4iBdUorC2GC_uoUmIY4oGhHGT621el1my0YaABEvuGn4eIe1EFxrHYLM1fljnQxHQjgGKwTb48cMdHNxOiMd8_CbvLYL_NT3E_zTPKCSQjqa8zxTkddBjL-5stlTRzhVHmB2Ox5-6XgvpSnoBtlZqkI96rYmKi63f0NTQ2e9IV3cNqXli8X9_DYGgZlpl60qE56A-ZNy4otv4myR5kqY2bTll0c-ynLtqI5ukDwPwO369b_Ie4kniRd7Or-Ip6jPyTIdfghLV4AVrxbvE',
+        'lwa_app_id': 'amzn1.application-oa2-client.1f9d3d4747e14b22b4b598e54e6b922e',  # 卖家中心里面开发者资料LWA凭证
+        'lwa_client_secret': 'amzn1.oa2-cs.v1.3af0f5649f5b8e151cd5bd25c10f2bf3113172485cd6ffc52ccc6a5e8512b490',
+        'aws_access_key': 'AKIARBAGHTGOZC7544GN',
+        'aws_secret_key': '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',
+        'role_arn': 'arn:aws:iam::070880041373:role/Amazon_SP_API_ROLE'
+    }
+    sp_ = SpApiRequest(aws_credentials,Marketplaces.US,'3006125408623189')
+    sp_.GET_FLAT_FILE_ALL_ORDERS_DATA_BY_ORDER_DATE_GENERAL()
+    # sp_.decompression("1532408019678")
+    # print(type(sp_))

Some files were not shown because too many files changed in this diff