diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..37d4c56 Binary files /dev/null and b/.DS_Store differ diff --git a/.gitignore b/.gitignore index 7bbc71c..75542c8 100644 --- a/.gitignore +++ b/.gitignore @@ -99,3 +99,6 @@ ENV/ # mypy .mypy_cache/ +bilibili/log +.DS_Store +bilibili/log.err diff --git a/5.py b/5.py deleted file mode 100644 index 64edb00..0000000 --- a/5.py +++ /dev/null @@ -1,9 +0,0 @@ -import urllib2,pprint -import cPickle as pickle -b=urllib2.urlopen('http://www.pythonchallenge.com/pc/def/banner.p') -result=pickle.Unpickler(b).load() -pprint.pprint(result) -output=open('c:\\Users\\Administrator\\Desktop\\5text.txt','w') -for line in result: - print >> output, ' '.join([c[0]*[1] for c in line]) -output.close() diff --git a/AWS/.DS_Store b/AWS/.DS_Store new file mode 100644 index 0000000..2225f58 Binary files /dev/null and b/AWS/.DS_Store differ diff --git a/AWS/Amazon_Utils.py b/AWS/Amazon_Utils.py new file mode 100644 index 0000000..5390247 --- /dev/null +++ b/AWS/Amazon_Utils.py @@ -0,0 +1,193 @@ +# -*- coding:UTF-8 -*- +import xlrd +import xlwt +import os +import re +import requests +def is_bash(): + '''判断使用平台''' + if platform.system().lower() == "windows": + return 0 + if platform.system().lower() == 'darwin' or platform.system().lower() == 'linux': + return 1 +def is_TTD(f): + '''是否被Amazon屏蔽请求变狗''' + temp = f + if(re.findall("(_TTD_\.jpg)", f)): + return 1 + else: + return 0 +def retry(func): + '''装饰器:try最多5次''' + def wrap(*args): + i = 0 + r = None + while i<5: + try: + r = func(*args) + if r: + i = 5 + except Exception as e: + i+=1 + return r + return wrap + +def excel_bulit(workbook, asin): + '''Bulit a excel.构建Excel''' + table= workbook.add_sheet("{}".format(asin),cell_overwrite_ok=True) + style = xlwt.XFStyle()#设置样式 + font = xlwt.Font()#设置字体 + font.name = 'SimSun' # 指定“宋体” + style.font = font + alignment=xlwt.Alignment()#设置对齐 + alignment.horz=xlwt.Alignment.HORZ_CENTER#单元格字符水平居中 + # 格式: HORZ_GENERAL, HORZ_LEFT, HORZ_CENTER, HORZ_RIGHT, HORZ_FILLED, HORZ_JUSTIFIED, HORZ_CENTER_ACROSS_SEL, HORZ_DISTRIBUTED + alignment.vert=xlwt.Alignment.VERT_CENTER#单元格字符垂直居中 + #格式: VERT_TOP, VERT_CENTER, VERT_BOTTOM, VERT_JUSTIFIED, VERT_DISTRIBUTED + style.alignment=alignment#添加至样式 + return table + +def Get_ASINlists(fn): + '''从表格里获取ASINURL''' + data=xlrd.open_workbook(fn) # 打开工作薄 + sheets=data.sheets() + z={} + p=0 + for sheet in sheets: + p+=1 + z[p]=sheet.name + print(p,z[p]) + try: + sheet_index=int(input("plz input index in the serial number(default 1):\n")) + if sheet_index in range(1,len(sheets)+1): + t=sheet_index + else: + print('The digital is wrong,plz input a correct number') + except Exception as e: + print(str(e)) + t=1 + table=data.sheet_by_index(t-1) # 读取指定sheet + cols=table.ncols + rows=table.nrows + print("{0}'s rows ,cols are {1},{2}".format(z[t],rows,cols)) + first_sheet=table.row_values(0) + try: + url_index=first_sheet.index('ASIN')#返回第一行URL的列数 + except Exception as e: + print(e) + return [] + ASINs=table.col_values(url_index) # 读取指定列(该列含有URL) + ASINs.pop(0) + return ASINs +def Get_Exceldata(fn, colname): + '''从表格里获取任意列数组''' + if not os.path.exists(fn): + print("Doesn't exist: {}".format(os.path.abspath(fn))) + return None + data=xlrd.open_workbook(fn) # 打开工作薄 + sheets=data.sheets() + z={} + p=0 + for sheet in sheets: + p+=1 + z[p]=sheet.name + print(p,z[p]) + try: + sheet_index=int(input("plz input index in the serial number(default 1):\n")) + if sheet_index in range(1,len(sheets)+1): + t=sheet_index + else: + print('The digital is wrong,plz input a correct number') + except Exception as e: + print(str(e)) + t=1 + table=data.sheet_by_index(t-1) # 读取指定sheet + cols=table.ncols + rows=table.nrows + print("{0}'s rows ,cols are {1},{2}".format(z[t],rows,cols)) + first_sheet=table.row_values(0) + try: + url_index=first_sheet.index(colname)#返回第一行URL的列数 + except Exception as e: + print(e) + return [] + ASINs=table.col_values(url_index) # 读取指定列(该列含有URL) + ASINs.pop(0)#去掉表头 + return ASINs + +def File_path_choice(): + '''选择文件夹中的文件,返回所选文件路径''' + t='./stainless steel toilet brush holder.xlsx'#默认工作簿地址 + try: + file_path='.' + print('当前路径{0}文件夹中的文件和文件夹如下:'.format(os.path.abspath(file_path))) + file_names=os.listdir(file_path)#列出下载文件夹中的文件名 + for i in range(len(file_names)): + print(i+1,file_names[i]) + file_num=int(raw_input("Default workbook is 1,plz input a number of serial number(default {0}):\nOr Enter a number more than the last option you could input a url of file\n ".format(t)))#默认文件名是t,或者输入比最后选项大的数 + if file_num in range(1,len(file_names)+1): + file_path=file_path+'/'+file_names[file_num-1] + if file_num>=len(file_names)+1: + file_path=str(raw_input("plz input a fileurl (like:D:\\Documents\\Downloads\1.xlsx or /Users/\{name\}/Projects/1.xls\n")) + f=os.path.exists(file_path) + while f==False: + file_path=str(raw_input("Your file is not exsits,plz input a fileurl:\n")) + f=os.path.exists(file_path) + except Exception as e: + print(str(e)) + file_path=t + return file_path + +def is_Captcha(f, host, asin, session, headers): + '''Amazon验证码''' + url = re.findall("(https://images-na.ssl-images-amazon.com/captcha.*?jpg)", f) + i = 0 + while url: + captcha = AmazonCaptcha.fromlink(url[0]) + text = captcha.solve() +# with open('./t.html', 'w') as f: +# f.write(r) +# f.close() + validateCaptcha_url = host+"/errors/validateCaptcha" + amzn = re.findall("name=\"amzn\" value=\"(.*?)\"", f)[0] + amzn_r = re.findall("name=\"amzn-r\" value=\"(.*?)\"", f)[0] + params = { + "amzn": amzn, + "amzn-r": html.unescape(amzn_r), + "field-keywords": text + } + validateCaptcha_url = validateCaptcha_url+"?"+ urllib.parse.urlencode(params) + print(validateCaptcha_url) + r = session.get(validateCaptcha_url, headers = headers, timeout = 5)#Amazon验证请求 + u = host+asin + r = session.get(u, headers=headers, timeout = 5) + f = r.text + print(url) + url = re.findall("(https://images-na.ssl-images-amazon.com/captcha.*?jpg)", f) + i += 1 + if i == 5: + url = [] + return f, session + +def requests_asin(host, asin): + '''Get the content of Amazon listing web page.获取listing网页内容''' + headers={ + 'Connection':'keep-alive', + 'sec-ch-ua':'"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"', + 'sec-ch-ua-mobile':'?0', + 'sec-ch-ua-platform':'"macOS"', + 'Upgrade-Insecure-Requests':'1', + 'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36', + 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', + 'Sec-Fetch-Site':'none', + 'Sec-Fetch-Mode':'navigate', + 'Sec-Fetch-User':'?1', + 'Sec-Fetch-Dest':'document', + 'Accept-Encoding':'gzip, deflate, br', + 'Accept-Language':'en-US,en;q=0.9' + }#浏览器头部 + #proxies={'HTTP': 'HTTP://127.0.0.1:1081', 'HTTPS': 'HTTPS://127.0.0.1:1081'}#免费代理IP + session=requests.Session() + url = host+'/dp/'+asin + r = session.get(url, headers=headers, timeout = 5) + return is_Captcha(r.text, host, asin, session, headers) diff --git a/AWS/Amazozn_email.py b/AWS/Amazozn_email.py new file mode 100644 index 0000000..a839286 --- /dev/null +++ b/AWS/Amazozn_email.py @@ -0,0 +1,149 @@ +# -*- conding:UTF-8 -*- +# Author:Toryun +# Python version:2.7.13 +# Date:18/3/25 +# Function:Create a style Excel in special situation生成指定表格,写入随机Email和密码 +#Amazom CAPTCHA IMG API https://www.amazon.com/ap/captcha?appAction=REGISTER&captchaObfuscationLevel=ape:aGFyZA==&captchaType=image +import random +import requests +import xlwt +import xlrd +import datetime,time +import mechanize +import os +import cookielib +import sys +from PIL import Image +import re +#--------------------------------------------------------------------------------- +def excel_bulit(): + #Bulit a excel,Generate a account of Amazon 构建Excel,生成亚马逊账号 + workbook = xlwt.Workbook(encoding = 'utf-8') + table= workbook.add_sheet("data",cell_overwrite_ok=True) + style = xlwt.XFStyle()#设置样式 + font = xlwt.Font()#设置字体 + font.name = 'SimSun' # 指定“宋体” + style.font = font + alignment=xlwt.Alignment()#设置对齐 + alignment.horz=xlwt.Alignment.HORZ_CENTER#单元格字符水平居中 + # 格式: HORZ_GENERAL, HORZ_LEFT, HORZ_CENTER, HORZ_RIGHT, HORZ_FILLED, HORZ_JUSTIFIED, HORZ_CENTER_ACROSS_SEL, HORZ_DISTRIBUTED + alignment.vert=xlwt.Alignment.VERT_CENTER#单元格字符垂直居中 + #格式: VERT_TOP, VERT_CENTER, VERT_BOTTOM, VERT_JUSTIFIED, VERT_DISTRIBUTED + style.alignment=alignment#添加至样式 + + for i in range(0,100): + t='0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' + t1=''.join(random.sample(t,8)) + b1=''.join(random.sample(t,8)) + mail=t1+'@gmail.com' + table.write(i,0,mail,style) + table.write(i,1,b1,style) + file_save='C:\\Users\\Administrator\\Desktop\email.xls' + workbook.save(file_save) + +#--------------------------------------------------------------------------------- +#Reading account accounts and passwords +#读取账户账号和密码 +def read_excel(i): + '''Read account and password from workbook读取工作簿中的账号密码''' + workbook=xlrd.open_workbook('C:\\Users\\Administrator\\Desktop\email.xls') + table=workbook.sheet_by_index(0) + account=table.col_values(0) + password=table.col_values(1) + a=account[i] + p=password[i] + return a,p +#--------------------------------------------------------------------------------- +#Register Amazon(Using mechanize library to simulate browser registration) +#注册亚马逊(使用mechanize库模拟浏览器注册) +def register(name,account,password,i): + '''Using the mechanize lib simulation browser to large quantity register Amazon accounts使用mechanize模拟浏览器批量注册亚马逊账号''' + url='https://www.amazon.com/ap/register?openid.pape.max_auth_age=0&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&pageId=usflex&ignoreAuthState=1&openid.return_to=https%3A%2F%2Fwww.amazon.com%2F%3Fref_%3Dnav_ya_signin&prevRID=3CBEZNC1DVKVH5BS3CQT&openid.assoc_handle=usflex&openid.mode=checkid_setup&openid.ns.pape=http%3A%2F%2Fspecs.openid.net%2Fextensions%2Fpape%2F1.0&prepopulatedLoginId=&failedSignInCount=0&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0' + br = mechanize.Browser() + br.set_handle_equiv(True)#handle HTTP-EQUIV headers (HTTP headers embedded in HTML). + br.set_handle_redirect(True) + br.set_handle_referer(True)#add Referer (sic) header + br.set_handle_robots(False)# Ignore robots.txt. Do not do this without thought and consideration. + br.set_handle_gzip(False) + br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1) + br.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101 Firefox/59.0')] + proxies={"HTTPS": "HTTPS://122.242.96.30:808"}#proxy代理 + br.set_proxies(proxies) + for tries in range(10):#解决100610错误问题(代理连接问题) + try: + br.open(url) + except: + if tries<=10: + continue + else: + break + br.select_form(nr=0) + br.form['customerName']=name + br.form['email']=account + br.form['password']=password + br.form['passwordCheck']=password + br.submit() + response=str(br.response().read()) + try:#如果需要验证进行以下操作 + img_url=re.findall(r'Visual CAPTCHA image, continue down for an audio option.<\/a>(.*?).[\s]?<\/span>", f) + return Fulfilled + +if __name__ == '__main__': + host = "https://www.amazon.ca" + fp = "./asin.xls" + file_save = "./fulfilledby.xls" + ASINs = Get_ASINlists(fp) + workbook = xlwt.Workbook(encoding = 'utf-8') + table=excel_bulit(workbook, "1") + table.write(0, 0, "ASIN") + table.write(0, 1, "Fulfilled") + for i in range(len(ASINs)): + try: + asin = ASINs[i] + table.write(i+1, 0, asin) + m = FulfilledBy(host, asin) + if m: + table.write(i+1, 1, m[0][0]+m[0][1]) + print(asin, m) + except Exception as e: + print(e) + pass + workbook.save(file_save) + print("Saved to {}".format(os.path.abspath(file_save))) \ No newline at end of file diff --git a/AWS/Get_Amazon_QA.py b/AWS/Get_Amazon_QA.py new file mode 100644 index 0000000..b9afecc --- /dev/null +++ b/AWS/Get_Amazon_QA.py @@ -0,0 +1,197 @@ +# -*- coding:UTF-8 -*- +import re +import os +import math +import xlrd +import xlwt +import html +import random +import platform +import requests +from Amazon_Utils import excel_bulit, Get_ASINlists +proxies_list80 = [ + "184.60.66.122", + "34.23.45.223", + "162.144.236.128", + "108.170.12.13", + "64.251.22.20", + "138.91.159.185", + "162.144.233.16", + "167.99.174.59", + "54.86.198.153", + "68.183.143.134", + "45.77.198.163", + "162.240.75.37", + "104.45.128.122", + "191.101.1.116", + "74.208.177.198", + "164.92.108.63", + "93.188.161.84", + "52.88.105.39", + "104.225.220.233", + "143.110.232.177", + "34.239.204.118", + "209.126.6.159", + "45.79.17.203", + "104.215.127.197", + "34.75.202.63", + "147.182.142.189", + "137.184.232.148", + "85.239.242.23", + "192.236.160.186", + "142.11.222.22", + "24.199.82.12", + "129.153.163.10", + "50.16.22.43", + "65.109.84.104", + "74.208.205.5", + "65.108.9.181", + "34.239.204.118", + "103.216.160.163", + "103.216.160.164", + "103.216.160.160", + "34.87.103.220", + "103.216.160.167" + ] +proxies_list1994 = ["216.127.188.18", + "198.74.98.18", + "198.52.105.249", + "173.82.102.194", + "72.11.130.145", + "72.44.76.76", + "198.52.114.146", + "72.44.68.249", + "104.194.232.179", + "104.129.41.2", + "170.178.193.106", + "173.82.20.178", + "72.44.67.178", + "173.82.46.138", + "198.52.115.114", + "173.82.43.108", + "173.44.42.66", + "198.211.55.167" + ] +def is_bash(): + '''判断使用平台''' + if platform.system().lower() == "windows": + return 0 + if platform.system().lower() == 'darwin' or platform.system().lower() == 'linux': + return 1 +def is_TTD(url, f): + '''是否被Amazon屏蔽请求变狗''' + temp = f + if(re.findall("(_TTD_\.jpg)", f)): + if is_bash(): + a = os.system('''curl -s '{}' \ + -H 'authority: www.amazon.com' \ + -H 'accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9' \ + -H 'accept-language: en-US,en;q=0.9' \ + -H 'sec-ch-ua: "Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"' \ + -H 'sec-ch-ua-mobile: ?0' \ + -H 'sec-ch-ua-platform: "macOS"' \ + -H 'sec-fetch-dest: document' \ + -H 'sec-fetch-mode: navigate' \ + -H 'sec-fetch-site: none' \ + -H 'sec-fetch-user: ?1' \ + -H 'upgrade-insecure-requests: 1' \ + -H 'user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36' \ + --compressed > ./t.html'''.format(url)) + else: + a = os.system('''curl -s "{}" ^ + -H "authority: www.amazon.com" ^ + -H "accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9" ^ + -H "accept-language: en-US,en;q=0.9" ^ + -H "sec-ch-ua: ""Not_A Brand"";v=""99"", ""Google Chrome"";v=""109"", ""Chromium"";v=""109""" ^ + -H "sec-ch-ua-mobile: ?0" ^ + -H "sec-ch-ua-platform: ""macOS""" ^ + -H "sec-fetch-dest: document" ^ + -H "sec-fetch-mode: navigate" ^ + -H "sec-fetch-site: none" ^ + -H "sec-fetch-user: ?1" ^ + -H "upgrade-insecure-requests: 1" ^ + -H "user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36" ^ + --compressed > t.html'''.format(url)) + if a == 0: + with open('./t.html', 'r') as f: + temp = f.read() + f.close() + return temp +def matchQA(url, headers, f, QA_links, QAs): + questions_number = re.findall("(\d+) questions", f) + print(questions_number) + if len(questions_number) != 0: + qn = math.ceil(int(questions_number[0])/10) + for i in range(1, qn+1): + print(i) + url_i = url + str(i) + r = requests.get(url_i, headers = headers, timeout = 5) + temp = is_TTD(url_i, r.text) + QA_link = re.findall("askInlineAnswers\" id=\"(.*?)\">", temp) + QA_links.append(QA_link) + for i in range(len(QA_link)): + url_i_i = 'https://www.amazon.com/ask/questions/'+QA_link[i] + r = requests.get(url_i_i, headers = headers, timeout = 5) + temp = is_TTD(url_i_i, r.text) + QA = re.findall("\s+(.*?)<\/span>", temp) + QAs.append(QA) + return QA_links, QAs +def retry(func): + def wrap(*args): + i = 0 + r = None + while i<5: + try: + r = func(*args) + if r: + i = 5 + except Exception as e: + i+=1 + return r + return wrap +@retry +def Get_Amazon_QA(asin): + QAs = [] + QA_links = [] + questions_number = [] + url = "https://www.amazon.com/ask/questions/asin/{}/".format(asin) + headers={ + "host": "www.amazon.com", + "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", + "accept-language": "en-US,en;q=0.9", + "sec-ch-ua": "\"Not_A Brand\";v=\"99\", \"Google Chrome\";v=\"109\", \"Chromium\";v=\"109\"", + "sec-ch-ua-mobile": "?0", + "sec-ch-ua-platform": "\"macOS\"", + "sec-fetch-dest": "document", + "sec-fetch-mode": "navigate", + "sec-fetch-site": "none", + "sec-fetch-user": "?1", + "upgrade-insecure-requests": "1" + } + proxies = {'HTTP': 'HTTP://122.242.96.30:808', 'HTTPS': 'HTTPS://122.242.96.30:808'} + r = requests.get(url, headers = headers, proxies = proxies, timeout = 5) + temp = is_TTD(url, r.text) + QA_links, QAs = matchQA(url, headers, temp, QA_links, QAs) + return QA_links, QAs + +def main(): + file_save='./AMZQA.xls' + fn = './OR.xls' + asinlist = Get_ASINlists(fn) + workbook = xlwt.Workbook(encoding = 'utf-8') + for i in range(len(asinlist)): + table = excel_bulit(workbook, asinlist[i]) + print(asinlist[i]) + QA_links, QAs = Get_Amazon_QA(asinlist[i]) + #print(QA_links) + k = 0 + for j in range(len(QA_links)): + for m in range(len(QA_links[j])): + table.write(k,0,QA_links[j][m]) + for n in range(len(QAs[k])): + table.write(k,n+1, html.unescape(QAs[k][n])) + k += 1 + workbook.save(file_save) + print("Saved to {}".format(file_save)) +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/AWS/Get_Amazon_bestsellers.py b/AWS/Get_Amazon_bestsellers.py new file mode 100644 index 0000000..951acf5 --- /dev/null +++ b/AWS/Get_Amazon_bestsellers.py @@ -0,0 +1,56 @@ +import requests +import time +import re +import platform +import csv + +url = "https://www.amazon.com/Best-Sellers-Industrial-Scientific-Cut-Off-Wheels/zgbs/industrial/256194011/ref=zg_bs_nav_industrial_3_2665570011" +fp = "c:/bs.csv" +if platform.system().lower() == 'windows': + fp = "c:/bs.csv" +if platform.system().lower() == 'darwin' or platform.system().lower() == 'linux': + fp = "/Users/bs.csv" +def amazonbs(url): + headers={ + "Host": + "www.amazon.com", + "User-Agent": + "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0", + "Accept": + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9v", + "Accept-Language": + "zh-CN,en;q=0.8,zh;q=0.7,zh-TW;q=0.5,zh-HK;q=0.3,en-US;q=0.2", + "Accept-Encoding": + "gzip, deflate, br", + "Connection": + "keep-alive", + "Upgrade-Insecure-Requests":"1" + } + proxies={'HTTP': 'HTTP://122.242.96.30:808', 'HTTPS': 'HTTPS://122.242.96.30:808'} + try: + r = requests.get(url, headers = headers, proxies = proxies) + except requests.exceptions.RequestException as e: + raise SystemExit(e) + if (r.status_code == 200): + ranks = re.findall(r'#(\d+)<\/span>', r.content) + asins = re.findall(r'(.*?)<\/div>', r.content) + imgs = re.findall(r'a-section a-spacing-mini _cDEzb_noop_3Xbw5.*?src="(.*?)\"', r.content) + stars = re.findall(r'a-icon-alt">(.*?) out of 5 stars<\/span>', r.content) + reviews = re.findall(r'a-size-small">(.*?)<\/span>', r.content) + prices = re.findall(r'(.*?)<\/span>', r.content) + return ranks, asins, titles, imgs, stars, reviews, prices +def list_to_csv(fp, ranks, asins, titles, imgs, stars, reviews, prices): + with open('/Users/jin/Desktop/bs.csv', 'w+') as csvfile: + spamwriter = csv.writer(csvfile, delimiter=',', quotechar=' ', quoting=csv.QUOTE_MINIMAL) + spamwriter.writerow(['Rank'] + ['Asin'] + ['Title'] + ['Img'] + ['Star'] + ['Review'] + ['prices']) + t = [] + print(len(prices)) + for i in range(len(prices)): + #print i + spamwriter.writerow([ranks[i], asins[i], titles[i], imgs[i], stars[i], reviews[i], prices[i]]) + print("{}: Save sucessfully!".format(fp)) + +if __name__ == '__main__': + ranks, asins, titles, imgs, stars, reviews, prices = amazonbs(url) + list_to_csv(fp, ranks, asins, titles, imgs, stars, reviews, prices) \ No newline at end of file diff --git a/AWS/Get_Amazon_keywords_rank.py b/AWS/Get_Amazon_keywords_rank.py new file mode 100644 index 0000000..4fe92ea --- /dev/null +++ b/AWS/Get_Amazon_keywords_rank.py @@ -0,0 +1,119 @@ +# -*- coding:UTF-8 -*- +#Python Version:2.7.13 +#auth:Toryun +#Date:17/8/24 +#Function:Find rank of the words in the Amazon search,return to the excel +import re +import os +import time +import requests +from Amazon_Utils import xlwt, retry, excel_bulit, Get_ASINlists, Get_Exceldata +def is_TTD(f): + '''是否被Amazon屏蔽请求变狗''' + temp = f + if(re.findall("(_TTD_\.jpg)", f)): + return 1 + else: + return 0 + return temp +@retry +def get_result(keyword): + '''返回搜索产品数量''' + #proxies={'HTTP': 'HTTP://122.242.96.30:808', 'HTTPS': 'HTTPS://122.242.96.30:808'}#免费IP地址*http://www.xicidaili.com* + url='https://www.amazon.com/s?k={}&page=1'.format(keyword) + _headers={ + "Host": + "www.amazon.com", + "User-Agent": + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36", + "Referer": + "https://www.amazon.com/", + "Accept": + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", + "Accept-Language": + "en-US,en;q=0.8", + "Accept-Encoding": + "gzip, deflate, br", + "Connection": + "keep-alive", + "Cache-Control":"max-age=0", + "Upgrade-Insecure-Requests":"1" + }#得到request头部 + + r=requests.get(url,headers=_headers)#通过代理得到请求内容 +# with open('./t.html', 'wb+') as f: +# f.write(r.content) +# f.close() + if is_TTD(r.text): + print("Blocked!") + return None + else: + m=re.findall('a-section a-spacing-small a-spacing-top-small\">\s+.*?over (.*?)results for',r.text)#匹配result + print(m[0]) #返回第一个值 + return m[0] +def get_rank_keyword(asins, keyword, page): + '''返回关键词的页面排名''' + url='https://www.amazon.com/s?k={}&page={}'.format(keyword, page) + _headers={ + "Host": + "www.amazon.com", + "User-Agent": + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36", + "Referer": + "https://www.amazon.com/", + "Accept": + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", + "Accept-Language": + "en-US,en;q=0.8", + "Accept-Encoding": + "gzip, deflate, br", + "Connection": + "keep-alive", + "Cache-Control":"max-age=0", + "Upgrade-Insecure-Requests":"1" + }#得到request头部 + + r=requests.get(url,headers=_headers)#通过代理得到请求内容 + if is_TTD(r.text): + print("Blocked!") + return None + else: + t = {} + for i in range(len(asins)): + result = re.findall("data-asin=\"{}\" data-index=\"(\d+)\"".format(asins[i]), r.text)#匹配result + if len(result) != 0: + t[asins[i]] = [page, int(result[0])-1] + return t + +if __name__ == '__main__': + workbook = xlwt.Workbook(encoding = 'utf-8') + table = excel_bulit(workbook, "1") + table.write(0, 0, "ASIN") + table.write(0, 1, "keyword") + table.write(0, 2, "页数") + table.write(0, 3, "单页排名") + asins = ["B08B6FJPK5","B08B6FQVZZ","B08HQJ6CV6","B08HQL4Q3G","B08HQPYMH3","B08JLGNPVX","B0BW3Y3W91","B0BW442C9T"] + file_save = "./keywordrank.xls" + fn = "./kw.xls" + keywords = Get_Exceldata(fn, '关键词') + i = 1 + for keyword in keywords: + print(keyword) + t = get_result(keyword) + if t: + t = int(t.replace(",","")) + for page in range(1, (t//48)+2): + d = get_rank_keyword(asins, keyword, page) + print(page, d) + if d: + for k in d: + table.write(i, 0, k) + table.write(i, 1, keyword) + table.write(i, 2, d[k][0]) + table.write(i, 3, d[k][1]) + i+=1 + if page > 7: + print("Result is over 7th page") + break + workbook.save(file_save) + print("Saved to {}".format(os.path.abspath(file_save))) diff --git a/AWS/Get_Amazon_products_num.py b/AWS/Get_Amazon_products_num.py new file mode 100644 index 0000000..3a777ca --- /dev/null +++ b/AWS/Get_Amazon_products_num.py @@ -0,0 +1,137 @@ +#Python version:2.7.13 +#Author:Toryun +#Date:2017-11-18 +#Function:Get a number products of sellers +'''获取卖家上架产品数和页数''' +import requests,re,datetime,os,xlrd +from xlutils.copy import copy +def post_products(sellerID): + '''获取卖家上架产品数和页数''' + post_url='https://www.amazon.com/sp/ajax/products' + headers={"Host": + "www.amazon.com", + "User-Agent": + "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0", + "Accept": + "application/json, text/javascript, */*; q=0.01", + "Accept-Language": + "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2", + "Accept-Encoding": + "gzip, deflate, br", + "Referer": + "https://www.amazon.com/sp?_encoding=UTF8&asin=&isAmazonFulfilled=1&isCBA=&marketplaceID=ATVPDKIKX0DER&orderID=&seller={0}&tab=&vasStoreID=".format(sellerID), + "Content-Type": + "application/x-www-form-urlencoded", + "Content-Length": + "313", + "X-Requested-With": + "XMLHttpRequest", + "Cookie": + "x-wl-uid=1dnnurSt1bfiNTOwNHfLsu0IQMCkCME8HNKE7nQa/V9+Gn9aUz4xNY52zyADhI3uc5rHcRUYm/KA=; session-id-time=2082787201l; session-id=147-6188368-5590659; ubid-main=133-0860109-3446425; session-token=9Vkx3Rqg2Cyc6BtpXvyhmmeBNEN9ukAAN7WPivZz9U87xbRdp4yG36+KbZZXG6AYdqIjjXjwBSUknP0gESudqpwW0GUj69jdYrUZAbePjDk0G3Xx3FT7uqPCXIpEoYsEJAjEgedklWlpWQAn3BQgxKy0XicKNDVnT3uDVuiE/sXySDreYrlmM6EP0hZZdITGDlkI/MlKi7iW2Nz47Ufo30c1TcuDQtzJA3g602ofFtB2LCfA37oCqBh4mUad+apz; s_nr=1503652206665-New; s_vnum=1935652206666%26vn%3D1; s_dslv=1503652206666; lc-main=en_US; skin=noskin", + "Connection": + "keep-alive" + } + data={"marketplaceID":"ATVPDKIKX0DER", + "seller":"{0}".format(sellerID), + "productSearchRequestData":{"marketplace":"ATVPDKIKX0DER","seller":"{0}".format(sellerID),"url":"/sp/ajax/products","pageSize":12,"searchKeyword":"","extraRestrictions":{},"pageNumber":1}} + r=requests.post(post_url,data=data,headers=headers) + return r.content +def requests_url(url): + '''获取listing网页内容''' + headers={"Host": +"www.amazon.com", +"User-Agent": +"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0", +"Accept": +"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", +"Accept-Language": +"zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3", +"Accept-Encoding": +"gzip, deflate, br", +"Connection": +"keep-alive", +"Cache-Control":"max-age=0", +"Upgrade-Insecure-Requests":"1" +}#火狐浏览器头部 + proxies={'HTTP': 'HTTP://122.242.96.30:808', 'HTTPS': 'HTTPS://122.242.96.30:808'}#免费代理IP + r=requests.get(url,headers=headers,proxies=proxies) + return r.content +def File_path_choice(): + '''选择文件夹中的文件,返回所选文件路径''' + t='D:\\Documents\\Downloads\stainless steel toilet brush holder.xlsx'#默认工作簿地址 + try: + file_path='d:/documents/downloads' + print '路径{0}文件夹中的文件和文件夹如下:'.format(file_path) + file_names=os.listdir(file_path)#列出下载文件夹中的文件名 + for i in range(len(file_names)): + print i+1,file_names[i] + file_num=int(raw_input("Plz input a number of serial number (Default workbook is {0}):\n Or Enter a number more than the last option ,then you could input a url of file\n".format('stainless steel toilet brush holder.xlsx')))#默认文件名是t,或者输入比最后选项大的数 + if file_num in range(1,len(file_names)+1): + file_path=file_path+'/'+file_names[file_num-1] + if file_num>=len(file_names)+1: + file_path=str(raw_input("plz input a fileurl (like:D:\\Documents\\Downloads\1.xlsx\n")) + f=os.path.exists(file_path) + while f==False: + file_path=str(raw_input("Your file is not exsits,plz input a fileurl:\n")) + f=os.path.exists(file_path) + except Exception,e: + print str(e) + file_path=t + return file_path +def main(): + '''打开工作簿,选择sheet匹配products数,存储到新的工作簿中''' + start=datetime.datetime.now()#开始时间 + file_path=File_path_choice()#返回所选文件路径 + data=xlrd.open_workbook(file_path)#打开路径中文件 + sheets=data.sheets()#获取所有sheet (类型list) + print "{0}'s sheets:\n".format(file_path) + z={} + p=0 + for sheet in sheets: + p+=1 + z[p]=sheet.name + print p,z[p] + try: + sheet_index=int(raw_input("plz input index in the serial number(default 1):\n"))#选择工作簿中的sheet + if sheet_index in range(1,len(sheets)+1):#判断输入数是否超出范围 + sheet_num=sheet_index + else: + print 'The digital is wrong,plz input a correct number' + except Exception,e:#如果输入数非法,则默认为sheet1 + print str(e) + sheet_num=1 + table=data.sheet_by_index(sheet_num-1) + rows=table.nrows#计算表中列数和行数 + cols=table.ncols + print "{0} 's rows,cols are {1},{2}".format(z[sheet_num],rows,cols) + rows_1st=table.row_values(0)#读取第一行 + URL_index=rows_1st.index('URL')#读取URL所在位置 + FBA_index=rows_1st.index('FBA')#返回运输方式的所在列数 + URL=table.col_values(URL_index,1,rows)#读取该列从第2行到最后一行 + FBA=table.col_values(FBA_index,1,rows)# 读取导入FBA数组 + data1=copy(data)#xlutils.copy 类里的copy函数 + table1=data1.get_sheet(sheet_num-1) + for i in xrange(rows-1): + try: + if FBA[i+1]=='FBA': #判断是否为FBA运输方式(因为只有该运输方式和第三方运输可以查店铺月反馈数 + r=requests_url(URL[i+1]) + print i+1,URL[i+1] + sellerID=re.findall(r'\/gp\/help\/seller\/at-a-glance\.html\/ref=dp_merchant_link\?ie=UTF8&seller=(.*?)&isAmazonFulfilled=1',r) #返回第一个匹配的卖家店铺sellerID + print sellerID[0] + if sellerID: + post=post_products(sellerID[0]) + products_num=re.findall(r'\"productsTotalCount\":(\d+)',post) + print products_num + table1.write(i+1,cols,products_num) + table1.write(i+1,cols+1,sellerID[0]) + else: + print 'None' + except Exception,e:# + print str(e) + filepath='d:/Documents/Downloads/best_copy.xls' + data1.save(filepath) #保存到新的工作簿 + end=datetime.datetime.now() + t=end-start + print '存储到新的工作簿 {0}\n总用时:{1} s'.format(filepath,t) +if __name__=='__main__': + main() diff --git a/AWS/Get_Amazon_sales.py b/AWS/Get_Amazon_sales.py new file mode 100644 index 0000000..145bf14 --- /dev/null +++ b/AWS/Get_Amazon_sales.py @@ -0,0 +1,57 @@ +# -*- coding:UTF-8 -*- +#Python Version:3.8.1 +#auth:Toryun +#Date:23/3/15 +#Function:Find the sales in the Amazon search,return to the excel +import re +import os +import requests +from Amazon_Utils import xlwt, is_TTD, retry, excel_bulit, Get_ASINlists, Get_Exceldata + +def Get_sales(keyword, page): + url='https://www.amazon.com/s?k={}&page={}'.format(keyword, page) + headers={ + "Host": "www.amazon.com", + "Connection": "keep-alive", + "Cache-Control": "max-age=0", + "device-memory": "8", + "sec-ch-device-memory": "8", + "dpr": "2", + "sec-ch-dpr": "2", + "viewport-width": "1920", + "sec-ch-viewport-width": "1920", + "rtt": "50", + "downlink": "10", + "ect": "4g", + "sec-ch-ua": 'Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"', + "sec-ch-ua-mobile": "?0", + "sec-ch-ua-platform": "macOS", + "Upgrade-Insecure-Requests": "1", + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", + "Sec-Fetch-Site": "same-origin", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-User": "?1", + "Sec-Fetch-Dest": "document", + "Referer": "https://www.amazon.com/", + "Accept-Encoding": "gzip, deflate, br", + "Accept-Language": "en-US,en;q=0.8" + }#得到request头部 + + r = requests.get(url,headers=headers) +# with open('./t.html', 'wb+') as f: +# f.write(r.content) +# f.close() + i = 0 + while is_TTD(r.text): + r = requests.get(url,headers=headers) + i += 1 + if i == 5: + break + m = re.findall('dp\/(B[A-Z0-9]{9}).*?a-row a-size-base\">(.*?)<\/span>', r.text)#匹配result + return m + +if __name__ == '__main__': + keyword = "outdoor rug" + page = 1 + print(Get_sales(keyword, page)) \ No newline at end of file diff --git a/AWS/Get_Amazon_tag.py b/AWS/Get_Amazon_tag.py new file mode 100644 index 0000000..269f334 --- /dev/null +++ b/AWS/Get_Amazon_tag.py @@ -0,0 +1,110 @@ +# -*- conding:UTF-8 -*- +#Author:Toryun +#Python version:2.7.13 +#Date:17/11/11 +#Function:Get the keywords in Read reviews that mention +import requests,re,xlrd,os,datetime,time +from xlutils.copy import copy +def requests_url(url): + '''获取网页内容''' + headers={"Host": +"www.amazon.com", +"User-Agent": +"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0", +"Accept": +"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", +"Accept-Language": +"zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3", +"Accept-Encoding": +"gzip, deflate, br", +"Connection": +"keep-alive", +"Cache-Control":"max-age=0", +"Upgrade-Insecure-Requests":"1" +}#火狐浏览器头部 + proxies={'HTTP': 'HTTP://122.242.96.30:808', 'HTTPS': 'HTTPS://122.242.96.30:808'}#免费代理IP + r=requests.get(url,headers=headers,proxies=proxies) + return r.content +def File_path_choice(): + '''选择文件夹中的文件,返回所选文件路径''' + t='D:\\Documents\\Downloads\stainless steel toilet brush holder.xlsx'#默认工作簿地址 + try: + file_path='d:/documents/downloads' + print '路径{0}文件夹中的文件和文件夹如下:'.format(file_path) + file_names=os.listdir(file_path)#列出下载文件夹中的文件名 + for i in range(len(file_names)): + print i+1,file_names[i] + file_num=int(raw_input("Default workbook is 1,plz input a number of serial number(default {0}):\nOr Enter a number more than the last option you could input a url of file\n ".format(t)))#默认文件名是t,或者输入比最后选项大的数 + if file_num in range(1,len(file_names)+1): + file_path=file_path+'/'+file_names[file_num-1] + if file_num>=len(file_names)+1: + file_path=str(raw_input("plz input a fileurl (like:D:\\Documents\\Downloads\1.xlsx\n")) + f=os.path.exists(file_path) + while f==False: + file_path=str(raw_input("Your file is not exsits,plz input a fileurl:\n")) + f=os.path.exists(file_path) + except Exception,e: + print str(e) + file_path=t + return file_path +def main(): + '''打开工作簿,选择sheet匹配标签获得个数,存储到新的工作簿中''' + start=datetime.datetime.now()#开始时间 + file_path=File_path_choice()#返回所选文件路径 + data=xlrd.open_workbook(file_path)#打开路径中文件 + sheets=data.sheets()#获取所有sheet (类型list) + print "{0}'s sheets:\n".format(file_path) + z={} + p=0 + for sheet in sheets: + p+=1 + z[p]=sheet.name + print p,z[p] + try: + sheet_index=int(raw_input("plz input index in the serial number(default 1):\n"))#选择工作簿中的sheet + if sheet_index in range(1,len(sheets)+1):#判断输入数是否超出范围 + sheet_num=sheet_index + else: + print 'The digital is wrong,plz input a correct number' + except Exception,e:#如果输入数非法,则默认为sheet1 + print str(e) + sheet_num=1 + table=data.sheet_by_index(sheet_num-1) + rows=table.nrows#计算表中列数和行数 + cols=table.ncols + print "{0} 's rows,cols are {1},{2}".format(z[sheet_num],rows,cols) + rows_1st=table.row_values(0)#读取第一行 + URL_index=rows_1st.index('URL')#读取URL所在位置 + URL=table.col_values(URL_index,1,rows-1)#读取该列从第2行到最后一行 + data1=copy(data)#xlutils.copy 类里的copy函数 + table1=data1.get_sheet(sheet_num-1) + for i in xrange(rows): + Amazon_='https://www.amazon.com' + try: + r=requests_url(URL[i]) + print i,URL[i] + filter_tag_url=re.findall(r'data-reviews-state-param class=\"a-link-normal\" href=\"(.*?)">\(containing \"(.*?)\"\)',r1) + reviews_num=re.findall(r'
(.*?) reviews',r1) + + table1.write(i+1,cols+j,str(filter_tag)+':'+str(reviews_num))#写入表格 + else: + print "None" + except Exception,e: + print str(e) + filepath='d:/Documents/Downloads/Amaozn_tag.xls' + data1.save(filepath)#保存到新的工作簿 + end=datetime.datetime.now() + s=end-start + print '存储到新的工作簿{0}\n总用时:{1} s'.format(filepath,s) +if __name__ =='__main__': + main() + + + diff --git a/AWS/Get_FBA_Package_Weight_Dimensions.py b/AWS/Get_FBA_Package_Weight_Dimensions.py new file mode 100644 index 0000000..2c1767a --- /dev/null +++ b/AWS/Get_FBA_Package_Weight_Dimensions.py @@ -0,0 +1,196 @@ +import re +import os +import math +import time +import html +import xlrd +import xlwt +import json +import random +import requests +from hyper.contrib import HTTP20Adapter +from Amazon_Utils import excel_bulit, Get_ASINlists + +proxies_list80 = [ + "184.60.66.122", + "34.23.45.223", + "162.144.236.128", + "108.170.12.13", + "64.251.22.20", + "138.91.159.185", + "162.144.233.16", + "167.99.174.59", + "54.86.198.153", + "68.183.143.134", + "45.77.198.163", + "162.240.75.37", + "104.45.128.122", + "191.101.1.116", + "74.208.177.198", + "164.92.108.63", + "93.188.161.84", + "52.88.105.39", + "104.225.220.233", + "143.110.232.177", + "34.239.204.118", + "209.126.6.159", + "45.79.17.203", + "104.215.127.197", + "34.75.202.63", + "147.182.142.189", + "137.184.232.148", + "85.239.242.23", + "192.236.160.186", + "142.11.222.22", + "24.199.82.12", + "129.153.163.10", + "50.16.22.43", + "65.109.84.104", + "74.208.205.5", + "65.108.9.181", + "34.239.204.118", + "103.216.160.163", + "103.216.160.164", + "103.216.160.160", + "34.87.103.220", + "103.216.160.167" + ] +proxies_list1994 = ["216.127.188.18", + "198.74.98.18", + "198.52.105.249", + "173.82.102.194", + "72.11.130.145", + "72.44.76.76", + "198.52.114.146", + "72.44.68.249", + "104.194.232.179", + "104.129.41.2", + "170.178.193.106", + "173.82.20.178", + "72.44.67.178", + "173.82.46.138", + "198.52.115.114", + "173.82.43.108", + "173.44.42.66", + "198.211.55.167" + ] +#proxies = {'HTTP': 'HTTP://{}:80'.format(random.choice(proxies_list80))}#, 'HTTPS': 'HTTPS://{}:1994'.format(random.choice(proxies_list1994))} +proxies = random.choice(proxies_list80) +def retry(func): + def wrap(*args): + i = 0 + r = None + while i<5: + try: + r = func(*args) + if len(r) != 0: + i = 5 + except Exception as e: + i+=1 + return r + return wrap + +def Get_weight_dimesions(asin): + a = os.system('''curl -s 'https://sellercentral.amazon.com/rcpublic/getadditionalpronductinfo?countryCode=US&asin={}&fnsku=&searchType=GENERAL&locale=en-US' \ + -H 'authority: sellercentral.amazon.com' \ + -H 'accept: */*' \ + -H 'accept-language: zh-CN,zh;q=0.9,en;q=0.8' \ + -H 'cookie: session-id=132-1594050-2630761; i18n-prefs=USD; ubid-main=133-9093459-6797703; csm-hit=tb:s-RD27W807S27J306PMDRN|1677155166857&t:1677155166871&adb:adblk_yes; session-id-time=2082787201l; lc-main=en_US; session-token=3U342I6z4xu9pIKvXj/jEB8fxtttcgKVe/PTCxrlHNtirjv70uLvc2sWuPCySPtGv07DYrWPFr3zazDtBf/0JXQYxBzGji3UmAZTVJI7qibvrnu00XXRdFdZpA+Ycz4IFhG8ikiALaNMWLvZOUUccCZ9cUBM/30YmsewMY1Zilx+fd7dqeHGaIThYvezLcNtmRhvy07BFd44yJtWpQsMszNXZHyiU+mep8TChN16z4E=' \ + -H 'referer: https://sellercentral.amazon.com/fba/profitabilitycalculator/index?lang=en_US' \ + -H 'sec-ch-ua: "Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"' \ + -H 'sec-ch-ua-mobile: ?0' \ + -H 'sec-ch-ua-platform: "macOS"' \ + -H 'sec-fetch-dest: empty' \ + -H 'sec-fetch-mode: cors' \ + -H 'sec-fetch-site: same-origin' \ + -H 'user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36' \ + --compressed > ./t.json'''.format(asin)) + we = None + d = None + if a == 0: + with open("./t.json", "rb+") as f: + i = 0 + while i<3: + try: + r = json.load(f) + weight, l, w, h = 0, 0, 0, 0 + weightUnit = 'pounds' + dimensionUnit = 'inches' + if 'length' in r['data'].keys(): + l = r['data']['length'] + if 'width' in r['data'].keys(): + w = r['data']['width'] + if 'height' in r['data'].keys(): + h = r['data']['height'] + if 'dimensionUnit' in r['data'].keys(): + dimensionUnit = r['data']['dimensionUnit'] + if 'weight' in r['data'].keys(): + weight = r['data']['weight'] + weightUnit = r['data']['weightUnit'] + we = [weight, weightUnit] + d = [l, w, h, dimensionUnit] + i = 5 + except ValueError as e: + print(e) + i += 1 + return we,d + +def Productmatch(asin, proxies): + url = "https://sellercentral.amazon.com/rcpublic/productmatch?searchKey={}&countryCode=US&locale=en-US".format(asin) + headers={ + "Host": + "sellercentral.amazon.com", + "User-Agent": + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36", + "Accept": + "*/*", + "Accept-Language": + "zh-CN,zh;q=0.9,en;q=0.8", + "Accept-Encoding": + "gzip, deflate, br", + "sec-fetch-mode": "cors", + "sec-ch-ua-platform": "macOS", + "sec-fetch-site": "same-origin", + "reffer": "https://sellercentral.amazon.com/fba/profitabilitycalculator/index?lang=en_US", + "Upgrade-Insecure-Requests":"1" + } + params = { + "countryCode": + "US", + "searchKey": + "{}".format(asin), + "locale": + "en-US" + } + r = requests.get(url, headers = headers, proxies = proxies, data = params) + w = {} + with open('./t.html',"wb+") as f: + f.write(r.content) + f.close() + print(r.status_code) + if r.status_code == 200: + w = r.json()['data'] + + return w + +def main(): + file_save='./AMZWD.xls' + fn = './OR.xls' + asinlist = Get_ASINlists(fn) + workbook = xlwt.Workbook(encoding = 'utf-8') + table = excel_bulit(workbook, '1') + k = len(asinlist) + print("{} ASINs".format(k)) + for i in range(k): + print(asinlist[i]) + weight, dimensions = Get_weight_dimesions(asinlist[i]) + table.write(i, 0, asinlist[i]) + if weight: + table.write(i, 1, weight[0]) + table.write(i, 2, weight[1]) + table.write(i, 3, dimensions[0]*dimensions[1]*dimensions[2]) + table.write(i, 4, dimensions[3]) + workbook.save(file_save) + print("Saved to {}".format(file_save)) +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/AWS/Get_description.py b/AWS/Get_description.py new file mode 100644 index 0000000..47b6145 --- /dev/null +++ b/AWS/Get_description.py @@ -0,0 +1,92 @@ +# -*- coding:UTF-8 -*- +#Python version is 2.7.13 +#Author: Toryun +#Time:2017-08- 28 +#Fuction:Get description of product in the list +'''API:
  • \s+(.*?)<\/span><\/li> + + +
    .*?

    (.*?)<\/p>''' +import re,requests,xlrd,xlwt,datetime,time,os +from xlutils.copy import copy +'''从表中获取URL查询Amazon商家店铺30天内的feedback''' +def main(): + start=datetime.datetime.now() + try: + filename=raw_input("plz input a filename like c:\\eakd.xlsx (defult filename is d:/Documents/Downloads/Search Term Food Storage Container.xls) :\n") + t=os.path.exists(filename) + while t==False: + filename=raw_input("The path is wrong,plz input a correct filename like c:\\eakd.xlsx:\n") + t=os.path.exists(filename) + if filename=="\\n": + filename='d:/Documents/Downloads/Search Term Food Storage Container.xls' + except Exception,e: + print str(e) + + data=xlrd.open_workbook(filename) # 打开指定工作薄 + sheets=data.sheets()#获取工作薄所有列表 + print type(sheets) + z={} + p=0 + for sheet in sheets: + p+=1 + z[p]=sheet.name + print p,z[p] #返回所有列表名 + try: + sheet_index=int(raw_input("plz input index in the serial number(default 1):\n")) + if sheet_index in range(1,len(sheets)+1): + t=sheet_index + else: + print 'The digital is wrong,plz input a correct number' + except Exception,e: + print str(e) + t=1 + table=data.sheet_by_index(t-1) # 打开列表 + nrows=table.nrows # 行数 + cols=table.ncols # 列数 + print '列数:%d, 行数:%d'%(cols,nrows) + row_1st=table.row_values(0)#读取第一行 + URL_index=row_1st.index('URL')#返回URL列的所在列数 + URL=table.col_values(URL_index) # 读取导入URL数组 + l=len(URL) + headers={'Host': +"www.amazon.com", +'User-Agent': +"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3141.7 Safari/537.36", +'Accept': +"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", +'Accept-Language': +"zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3", +'Accept-Encoding': +"gzip, deflate, br", +'Connection': +"keep-alive", +'Upgrade-Insecure-Requests': +"1"} + proxies={'HTTP': 'HTTP://122.242.96.30:808', 'HTTPS': 'HTTPS://122.242.96.30:808'}#免费IP地址*http://www.xicidaili.com* + data2=copy(data) # 复制工作簿 + table2=data2.get_sheet(t-1) + for i in range(nrows): + try: + r=requests.get(URL[i+1],headers=headers,proxies=proxies) + print i+1,URL[i+1] + list_description=re.findall(r'

  • \s+(.*?)\s+<\/span><\/li>',r.content) #返回匹配的商品的list详情 + productDescription=re.findall(r'\s+

    (.*)\s+<\/p>',r.content)#返回匹配的商品产品详情 + if list_description: + print list_description + table2.write(i+1,cols,str(list_description) + if productDescription: + print productDescription + table2.write(i+1,cols+1,str(productDescription)) + else: + print 'None' + + except Exception,e: + print str(e) + filepath='d:/Documents/Downloads/best_copy1.xls' + data2.save(filepath) #保存到新的工作簿 + end=datetime.datetime.now() + t=end-start + print '存储到新的工作簿 {0}\n总用时:{1} s'.format(filepath,t) +if __name__=='__main__': + main() diff --git a/AWS/Is_Amz_videos.py b/AWS/Is_Amz_videos.py new file mode 100644 index 0000000..4a03404 --- /dev/null +++ b/AWS/Is_Amz_videos.py @@ -0,0 +1,61 @@ +# -*- coding:UTF-8 -*- +# Functions: 获取amazon列表每个listing是否含有videos +import re +import os +import xlwt +import requests +import datetime +from Amazon_Utils import Get_Exceldata, excel_bulit + +def is_AMZ_V(url): + header = { + "Host": + "www.amazon.com", + "User-Agent": + "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0", + "Accept": + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9v", + "Accept-Language": + "zh-CN,en;q=0.8,zh;q=0.7,zh-TW;q=0.5,zh-HK;q=0.3,en-US;q=0.2", + "Accept-Encoding": + "gzip, deflate, br", + "Connection": + "keep-alive", + "Upgrade-Insecure-Requests":"1" + } + proxies = {'HTTP': 'HTTP://122.242.96.30:808', 'HTTPS': 'HTTPS://122.242.96.30:808'} + + try: + r = requests.get(url, headers = header, proxies = proxies) + except requests.exceptions.RequestException as e: + raise SystemExit(e) + if(r.status_code == 200): + is_videos = re.findall("(.*?)<\/span>",r.text) + print(is_videos) + return is_videos + else: + return "{}".format(r.status_code) + +def main(): + start=datetime.datetime.now() + fn="./url.xls" + file_save = "./amzvide.xls" + workbook = xlwt.Workbook(encoding = 'utf-8') + table = excel_bulit(workbook, "1") + URL = Get_Exceldata(fn, "URL") + rows = len(URL) + for i in range(rows): + try: + u=URL[i] + print(i,u) + is_v = is_AMZ_V(u) + table.write(i, 0, str(is_v)) + table.write(i, 1, u) + except Exception as e: + print(str(e)) + workbook.save(file_save) + end=datetime.datetime.now() + t=end-start + print('已将照片存入Excel {0}中\n总共用时:{1}s'.format(os.path.abspath(file_save),t)) +if __name__=="__main__": + main() diff --git a/AWS/PyAWS_Store_Reviews.py b/AWS/PyAWS_Store_Reviews.py new file mode 100644 index 0000000..5fbaabe --- /dev/null +++ b/AWS/PyAWS_Store_Reviews.py @@ -0,0 +1,116 @@ +# -*- coding:UTF-8 -*- +# ===================== +#==Python version is 2.7.13 +#== Author: Toryun +#== Time:2017-08- 28 +#====================== +import re +import requests +import xlrd +import datetime +import xlwt +import os +import time +from xlutils.copy import copy +# 输入Asin,get网页返回内容和网址 +def requests_url(Asin): + url='https://www.amazon.com/dp/'+Asin + headers={"Host": +"www.amazon.com", +"User-Agent": +"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3141.7 Safari/537.36", +"Accept": +"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", +"Accept-Language": +"zh-CN,zh;q=0.8", +"Accept-Encoding": +"gzip, deflate, br", +"Connection": +"keep-alive", +"Cache-Control":"max-age=0", +"Upgrade-Insecure-Requests":"1" +} + proxies={'HTTP': 'HTTP://122.242.96.30:808', 'HTTPS': 'HTTPS://122.242.96.30:808'}#免费代理IP + r=requests.get(url,headers=headers,proxies=proxies) + return r.content,url + +def File_path_choice(): + '''选择文件夹中的文件,返回所选文件路径''' + t='D:\\Documents\\Downloads\stainless steel toilet brush holder.xlsx'#默认工作簿地址 + try: + file_path='d:/documents/downloads' + print '路径{0}文件夹中的文件和文件夹如下:'.format(file_path) + file_names=os.listdir(file_path)#列出下载文件夹中的文件名 + for i in range(len(file_names)): + print i+1,file_names[i] + file_num=int(raw_input("Default workbook is 1,plz input a number of serial number(default {0}):\nOr Enter a number more than the last option you could input a url of file\n ".format(t)))#默认文件名是t,或者输入比最后选项大的数 + if file_num in range(1,len(file_names)+1): + file_path=file_path+'/'+file_names[file_num-1] + if file_num>=len(file_names)+1: + file_path=str(raw_input("plz input a fileurl (like:D:\\Documents\\Downloads\1.xlsx\n")) + f=os.path.exists(file_path) + while f==False: + file_path=str(raw_input("Your file is not exsits,plz input a fileurl:\n")) + f=os.path.exists(file_path) + except Exception,e: + print str(e) + file_path=t + return file_path + +def main(): + start=datetime.datetime.now() # 计算所用时间 + file_path=File_path_choice() + data=xlrd.open_workbook(file_path)#打开工作簿 + sheets=data.sheets() + z={} + p=0 + for sheet in sheets: + p+=1 + z[p]=sheet.name + print p,z[p] + try: + sheet_index=int(raw_input("plz input index in the serial number(default 1):\n")) + if sheet_index in range(1,len(sheets)+1): + t=sheet_index + else: + print 'The digital is wrong,plz input a correct number' + except Exception,e: + print str(e) + t=1 + + table=data.sheet_by_index(t-1)# 打开sheet + rows=table.nrows#计算表中列数和行数 + cols=table.ncols + print "{0}'s cols,rows is {1},{2}".format(z[t],cols,rows)#打印该sheet中的列行数 + row_1st=table.row_values(0)#读取第一行 + asin_index=row_1st.index('Asin')#返回Asin列的所在列数 + URL=table.col_values(asin_index,1,rows-1)#读取Asin列第二行到最后一行 + data1=copy(data)#复制工作簿 + table1=data1.get_sheet(t-1) + date=str(start.month)+'/'+str(start.day) + table1.write(0,cols,date) + for i in range(rows): + try: + r,url=requests_url(URL[i]) + time.sleep(3) + print i,url + reviews=re.findall(r'(.*?) customer reviews',r) + if reviews: + print reviews[0] + table1.write(i+1,cols,int(reviews[0])) + else: + print 'reviews is None' + table1.write(i+1,cols,0) + except Exception,e: + print str(e) + u='c:\\first_Choice_copy.xls' + data1.save(u) #保存复制表格 + end=datetime.datetime.now() + t=end-start#总用时 + print 'It save in {0}.\nTotal time: {1} s.'.format(u,t) + +if __name__== '__main__': + main() + + + diff --git a/AWS/README.md b/AWS/README.md new file mode 100644 index 0000000..621473b --- /dev/null +++ b/AWS/README.md @@ -0,0 +1,2 @@ +These files are only suitable for windows and python2.7+ +If you don't have speacial excel, please don't use diff --git a/AWS/Sellersprite.py b/AWS/Sellersprite.py new file mode 100644 index 0000000..85fec69 --- /dev/null +++ b/AWS/Sellersprite.py @@ -0,0 +1,262 @@ +# -*- coding:UTF-8 -*- +# 登陆卖家精灵 +import os +import json +import execjs +import hashlib +import requests +from Amazon_Utils import xlwt, excel_bulit, Get_ASINlists + +class dl: + '''获取卖家精灵tk''' + def __init__(self): + self.GOOGLE_TKK_DEFAULT = "446379.1364508470" + default_EXT_VERSION = "3.4.2" + self.EXT_VERSION = default_EXT_VERSION.replace(".", "00", 1).replace(".", "0") + ".1364508470" + + def updateTkk(self, e): + return self.GOOGLE_TKK_DEFAULT + + def tkk(self, e, t): + try: + tkk = self.updateTkk(e) + return self._cal(t, tkk if tkk and tkk != "" else self.GOOGLE_TKK_DEFAULT) + except Exception as e: + raise e + + def s2Tkk(self, e, t): + s = [] + a = [e, t] + for i in range(len(a)): + if a[i] and a[i] is not None and len(str(a[i])) > 0: + t = len(s) + if isinstance(a[i], list): + for j in range(len(a[i])): + s.append(a[i][j]) + if t == len(s): + s.append(str(a[i])) + return "" if len(s) < 1 else self._cal("".join(s), self.EXT_VERSION) + + def _cal(self, e, t): + def n(e, t): + for i in range(0, len(t) - 2, 3): + r = t[i + 2] + r = ord(r) - 87 if r >= "a" else int(r) + r = e >> r if t[i + 1] == "+" else e << r + e = (e + r) & 4294967295 if t[i] == "+" else e ^ r + return e + + def cal(e, t): + r = t.split(".") + t = int(r[0]) if r[0] else 0 + s = [] + a = 0 + for i in range(len(e)): + o = ord(e[i]) + if o < 128: + s.append(o) + a += 1 + else: + if o >= 2048: + if 64512 == (64512 & o) and i + 1 < len(e) and 56320 == (64512 & ord(e[i + 1])): + o = 65536 + ((1023 & o) << 10) + (1023 & ord(e[i + 1])) + s.append(o >> 18 | 240) + s.append(o >> 12 & 63 | 128) + i += 1 + else: + s.append(o >> 12 | 224) + s.append(o >> 6 & 63 | 128) + else: + s.append(o >> 6 | 192) + s.append(63 & o | 128) + a += 2 + e = t + for i in range(len(s)): + e = n(e + s[i], "+-a^+6") + e = n(e, "+-3^+b+-f") + e ^= int(r[1]) if r[1] else 0 + if e < 0: + e = 2147483648 + (2147483647 & e) + r = e % 1000000 + return str(r) + "." + str(r ^ t) + + return cal(e, t) +def Sellersprite_extension_login(session, email, password, tk): + '''卖家精灵插件登陆''' + url = "https://www.sellersprite.com/v2/extension/signin?email={}&password={}&tk={}&version=3.4.2&language=zh_CN&extension=lnbmbgocenenhhhdojdielgnmeflbnfb&source=chrome".format(email, password, tk) + headers = { + "Host": "www.sellersprite.com", + "Connection": "keep-alive", + "Accept": "application/json", + "Random-Token": "6152a0b0-11a4-438e-877e-339c77be509a", + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36", + "Content-Type": "application/json", + "Sec-Fetch-Site": "none", + "Sec-Fetch-Mode": "cors", + "Sec-Fetch-Dest": "empty", + "Accept-Encoding": "gzip, deflate, br", + "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8" + } + r = session.get(url, headers = headers) + token = "" + if r.status_code == 200: + token = r.json()['data']['token'] + return session, token + else: + print(r.text) +def _tk(email, asin): + '''从js获取卖家精灵tk''' + node = execjs.get() + file = './sellersprite.js' + ctx = node.compile(open(file, 'r', encoding='utf8').read()) + # 数据源的 tk + e = email + t = asin + # 得到结果, 与我们抓包请求中的 tk 参数结果一致 + # 58497.291017 + tk = ctx.call('s2Tkk', e, t) + return tk +def keepa(session, asin, Auth_Token, tk): + '''获取asin keepa排名数据''' + url = "https://www.sellersprite.com/v2/extension/keepa?station=US&asin={}&tk={}&version=3.4.2&language=zh_CN&extension=lnbmbgocenenhhhdojdielgnmeflbnfb&source=chrome".format(asin, tk) + headers = { + "Host": "www.sellersprite.com", + "Connection": "keep-alive", + "Accept": "application/json", + "Random-Token": "6152a0b0-11a4-438e-877e-339c77be509a", + "Auth-Token": Auth_Token, + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36", + "Content-Type": "application/json", + "Sec-Fetch-Site": "none", + "Sec-Fetch-Mode": "cors", + "Sec-Fetch-Dest": "empty", + "Accept-Encoding": "gzip, deflate, br", + "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8" + } + r = session.get(url, headers = headers) + response = {'times':[], 'bsr':[], 'subRanks':[]} + if r.status_code == 200: + #print(r.json()['data']['keepa'].keys()) + response['bsr'] = r.json()['data']['keepa']['bsr'] + response['times'] = r.json()['data']['times'] + response['subRanks'] = list(r.json()['data']['keepa']['subRanks'].values())[0] + #print(response['subRanks']) + else: + print(r.text) + return response +def salt_pwd_hash(email, password): + '''对password加密,返回加密后的password, salt''' + password_hash = hashlib.md5(password.encode()).hexdigest() + email_password_hash = email + password_hash + salt = hashlib.md5(email_password_hash.encode()).hexdigest() + return password_hash, salt + +def Sellersprite_web_login(session, email, pwd, salt): + '''登陆卖家精灵网页版''' + url = "https://www.sellersprite.com/w/user/signin" + headers = { + "host": "www.sellersprite.com", + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36", + "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", + "accept-language": "zh-CN,zh;q=0.9", + "cache-control": "max-age=0", + "content-type": "application/x-www-form-urlencoded", + "sec-ch-ua": "\"Chromium\";v=\"110\", \"Not A(Brand\";v=\"24\", \"Google Chrome\";v=\"110\"", + "sec-ch-ua-mobile": "?0", + "sec-ch-ua-platform": "\"Windows\"", + "sec-fetch-dest": "document", + "sec-fetch-mode": "navigate", + "sec-fetch-site": "same-origin", + "sec-fetch-user": "?1", + "upgrade-insecure-requests": "1" + } + FormData = { + "callback": "", + "password": pwd, + "email": email, + "autoLogin": "Y", + "salt": salt + } + r = session.post(url, headers = headers, data = FormData) +# with open('./t.html', 'w', encoding = "UTF-8") as f: +# f.write(r.text) +# f.close() + return session + +def get_keywors_traffic_extend_asin(session, asins): + '''获取asin数组的扩展流量词''' + url = "https://www.sellersprite.com/v3/api/traffic/extend/asin" + headers = { + "host": "www.sellersprite.com", + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36", + "accept": "application/json, text/plain, */*", + "accept-language": "zh-CN,zh;q=0.9,en;q=0.8", + "content-type": "application/json;charset=UTF-8", +# "sec-ch-ua": "\"Chromium\";v=\"110\", \"Not A(Brand\";v=\"24\", \"Google Chrome\";v=\"110\"", +# "sec-ch-ua-mobile": "?0", +# "sec-ch-ua-platform": "\"Windows\"", +# "sec-fetch-dest": "empty", +# "sec-fetch-mode": "cors", +# "sec-fetch-site": "same-origin" + } + Payload = { + "queryVariations":"true", + "asinList":asins, + "originAsinList":asins, + "market":1, + "page":1, + "month":"", + "size":50, + "orderColumn":12, + "desc":"true", + "exactly":"false", + "ac":"false" + } + r = session.post(url, headers = headers, data = json.dumps(Payload)) + keywords = [] + if r.status_code == 200: + keywordlist = r.json()['data']['items'] + for i in range(len(keywordlist)): + keywords.append(keywordlist[i]['keywords']) + else: + print(r.text) + return keywords + +def Save_To_Excel(session, Auth_Token, dl): + '''保存BSR''' + asinsfile = "./bs.xls" + file_save = "./BSR.xls" + if not os.path.exists(asinsfile): + print("The asinsfile to be queried does not exist: {}".format(os.path.abspath(asinsfile))) + return + asins = Get_ASINlists(asinsfile) + workbook = xlwt.Workbook(encoding = 'utf-8') + print("Total asins: {}".format(len(asins))) + for i in range(len(asins)): + print(asins[i]) + keepa_tk = dl.s2Tkk("", asins[i]) + BSR = keepa(session, asins[i], Auth_Token, keepa_tk) + table = excel_bulit(workbook, asins[i]) + if len(BSR['times']) == 0: + print("The BSR is None.") + continue + for j in range(len(BSR['times'])): + table.write(j, 0, BSR['times'][j]) + table.write(j, 1, BSR['bsr'][j]) + table.write(j, 2, BSR['subRanks'][j]) + workbook.save(file_save) + print("The excel is saved to {}".format(os.path.abspath(file_save))) +if __name__ == '__main__': + email = "" + password = "" +# asins = ["B098T9ZFB5","B09JW5FNVX","B0B71DH45N","B07MHHM31K","B08RYQR1CJ"] + pwd, salt = salt_pwd_hash(email, password) + #print(type(salt)) + session = requests.Session() +# session = Sellersprite_login(session, email, pwd, salt) + #keywords = get_keywors_traffic_extend_asin(session, asins) + #print(keywords) + dl = dl() + extension_login_tk = dl.s2Tkk(email, pwd) + session, Auth_Token = Sellersprite_extension_login(session,email,pwd, extension_login_tk) + Save_To_Excel(session, Auth_Token, dl) \ No newline at end of file diff --git a/AWS/__init__.py b/AWS/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/AWS/amazon_comments.py b/AWS/amazon_comments.py new file mode 100755 index 0000000..db0e798 --- /dev/null +++ b/AWS/amazon_comments.py @@ -0,0 +1,408 @@ +import random +import csv +import requests +import time +import xlrd +import re +import math +import os +user_agent_list = ["Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36", +"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/44.0.2403.155 Safari/537.36", +"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36", +"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36", +"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2226.0 Safari/537.36", +"Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36", +"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36", +"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36", +"Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36", +"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36", +"Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36", +"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36", +"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36", +"Mozilla/5.0 (X11; OpenBSD i386) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36", +"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36", +"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2309.372 Safari/537.36", +"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2117.157 Safari/537.36", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36", +"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1866.237 Safari/537.36", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/4E423F", +"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.116 Safari/537.36 Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.10", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.517 Safari/537.36", +"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36", +"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36", +"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1623.0 Safari/537.36", +"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.17 Safari/537.36", +"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36", +"Mozilla/5.0 (X11; CrOS i686 4319.74.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36", +"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36", +"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36", +"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1467.0 Safari/537.36", +"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36", +"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1500.55 Safari/537.36", +"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36", +"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36", +"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36", +"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36", +"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.90 Safari/537.36", +"Mozilla/5.0 (X11; NetBSD) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36", +"Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36", +"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.60 Safari/537.17", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1309.0 Safari/537.17", +"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.15 (KHTML, like Gecko) Chrome/24.0.1295.0 Safari/537.15", +"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.14 (KHTML, like Gecko) Chrome/24.0.1292.0 Safari/537.14", +"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.13 (KHTML, like Gecko) Chrome/24.0.1290.1 Safari/537.13", +"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.13 (KHTML, like Gecko) Chrome/24.0.1290.1 Safari/537.13", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.13 (KHTML, like Gecko) Chrome/24.0.1290.1 Safari/537.13", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/537.13 (KHTML, like Gecko) Chrome/24.0.1290.1 Safari/537.13", +"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.13 (KHTML, like Gecko) Chrome/24.0.1284.0 Safari/537.13", +"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.6 Safari/537.11", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.6 Safari/537.11", +"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.26 Safari/537.11", +"Mozilla/5.0 (Windows NT 6.0) yi; AppleWebKit/345667.12221 (KHTML, like Gecko) Chrome/23.0.1271.26 Safari/453667.1221", +"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.17 Safari/537.11", +"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.4", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_0) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4", +"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.2 (KHTML, like Gecko) Chrome/22.0.1216.0 Safari/537.2", +"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1", +"Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11", +"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6", +"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6", +"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5", +"Mozilla/5.0 (X11; FreeBSD amd64) AppleWebKit/536.5 (KHTML like Gecko) Chrome/19.0.1084.56 Safari/1EA69", +"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5", +"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", +"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", +"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3", +"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3", +"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", +"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", +"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", +"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24", +"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.22 (KHTML, like Gecko) Chrome/19.0.1047.0 Safari/535.22", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.21 (KHTML, like Gecko) Chrome/19.0.1042.0 Safari/535.21", +"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.21 (KHTML, like Gecko) Chrome/19.0.1041.0 Safari/535.21", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20", +"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/18.6.872.0 Safari/535.2 UNTRUSTED/1.0 3gpp-gba UNTRUSTED/1.0", +"Mozilla/5.0 (Macintosh; AMD Mac OS X 10_8_2) AppleWebKit/535.22 (KHTML, like Gecko) Chrome/18.6.872", +"Mozilla/5.0 (X11; CrOS i686 1660.57.0) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.46 Safari/535.19", +"Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.45 Safari/535.19", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.45 Safari/535.19", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.45 Safari/535.19", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Safari/535.19", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.151 Safari/535.19", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.19 (KHTML, like Gecko) Ubuntu/11.10 Chromium/18.0.1025.142 Chrome/18.0.1025.142 Safari/535.19", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.11 Safari/535.19", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11", +"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11", +"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11", +"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11", +"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11", +"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11", +"Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11", +"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11", +"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/11.10 Chromium/17.0.963.65 Chrome/17.0.963.65 Safari/535.11", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/11.04 Chromium/17.0.963.65 Chrome/17.0.963.65 Safari/535.11", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/10.10 Chromium/17.0.963.65 Chrome/17.0.963.65 Safari/535.11", +"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/11.10 Chromium/17.0.963.65 Chrome/17.0.963.65 Safari/535.11", +"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11", +"Mozilla/5.0 (X11; FreeBSD amd64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11", +"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_4) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/11.04 Chromium/17.0.963.56 Chrome/17.0.963.56 Safari/535.11", +"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11", +"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11", +"Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.12 Safari/535.11", +"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.8 (KHTML, like Gecko) Chrome/17.0.940.0 Safari/535.8", +"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.77 Safari/535.7ad-imcjapan-syosyaman-xkgi3lqg03!wgz", +"Mozilla/5.0 (X11; CrOS i686 1193.158.0) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7", +"Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7", +"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7", +"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.63 Safari/535.7xs5D9rRDFpg2g", +"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.8 (KHTML, like Gecko) Chrome/16.0.912.63 Safari/535.8", +"Mozilla/5.0 (Windows NT 5.2; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.63 Safari/535.7", +"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7", +"Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7", +"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.6 (KHTML, like Gecko) Chrome/16.0.897.0 Safari/535.6", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.54 Safari/535.2", +"Mozilla/5.0 (X11; FreeBSD i386) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.121 Safari/535.2", +"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.2 (KHTML, like Gecko) Ubuntu/11.10 Chromium/15.0.874.120 Chrome/15.0.874.120 Safari/535.2", +"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.120 Safari/535.2", +"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.872.0 Safari/535.2", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.2 (KHTML, like Gecko) Ubuntu/11.04 Chromium/15.0.871.0 Chrome/15.0.871.0 Safari/535.2", +"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.864.0 Safari/535.2", +"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.861.0 Safari/535.2", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.861.0 Safari/535.2", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.861.0 Safari/535.2", +"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.860.0 Safari/535.2", +"Chrome/15.0.860.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/15.0.860.0", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.834.0 Safari/535.1", +"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Ubuntu/11.04 Chromium/14.0.825.0 Chrome/14.0.825.0 Safari/535.1", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.824.0 Safari/535.1", +"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.815.10913 Safari/535.1", +"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.815.0 Safari/535.1", +"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Ubuntu/11.04 Chromium/14.0.814.0 Chrome/14.0.814.0 Safari/535.1", +"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.814.0 Safari/535.1", +"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Ubuntu/10.04 Chromium/14.0.813.0 Chrome/14.0.813.0 Safari/535.1", +"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.813.0 Safari/535.1", +"Mozilla/5.0 (Windows NT 5.2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.813.0 Safari/535.1", +"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.813.0 Safari/535.1", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.813.0 Safari/535.1", +"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.812.0 Safari/535.1", +"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.811.0 Safari/535.1", +"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.810.0 Safari/535.1", +"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.810.0 Safari/535.1", +"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.809.0 Safari/535.1", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Ubuntu/10.10 Chromium/14.0.808.0 Chrome/14.0.808.0 Safari/535.1", +"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Ubuntu/10.04 Chromium/14.0.808.0 Chrome/14.0.808.0 Safari/535.1", +"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Ubuntu/10.04 Chromium/14.0.804.0 Chrome/14.0.804.0 Safari/535.1", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.803.0 Safari/535.1", +"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Ubuntu/11.04 Chromium/14.0.803.0 Chrome/14.0.803.0 Safari/535.1", +"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.803.0 Safari/535.1", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.803.0 Safari/535.1", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.803.0 Safari/535.1", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.803.0 Safari/535.1", +"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.801.0 Safari/535.1", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.801.0 Safari/535.1", +"Mozilla/5.0 (Windows NT 5.2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.794.0 Safari/535.1", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.794.0 Safari/535.1", +"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.792.0 Safari/535.1", +"Mozilla/5.0 (Windows NT 5.2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.792.0 Safari/535.1", +"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.792.0 Safari/535.1", +"Mozilla/5.0 (Macintosh; PPC Mac OS X 10_6_7) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.790.0 Safari/535.1", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.790.0 Safari/535.1", +"Mozilla/5.0 (Windows; U; Windows NT 6.1) AppleWebKit/526.3 (KHTML, like Gecko) Chrome/14.0.564.21 Safari/526.3", +"Mozilla/5.0 (X11; CrOS i686 13.587.48) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.43 Safari/535.1", +"Mozilla/5.0 Slackware/13.37 (X11; U; Linux x86_64; en-US) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41", +"Mozilla/5.0 ArchLinux (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Ubuntu/11.04 Chromium/13.0.782.41 Chrome/13.0.782.41 Safari/535.1", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1", +"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1", +"Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1", +"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1", +"Mozilla/5.0 (Windows NT 5.2; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1", +"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_3) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_3) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.32 Safari/535.1", +"Mozilla/5.0 (X11; Linux amd64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.24 Safari/535.1", +"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.24 Safari/535.1", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.24 Safari/535.1", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.220 Safari/535.1", +"Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.220 Safari/535.1", +"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.220 Safari/535.1", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.215 Safari/535.1", +"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.215 Safari/535.1", +"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.215 Safari/535.1", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.215 Safari/535.1", +"Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.20 Safari/535.1", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.20 Safari/535.1", +"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.20 Safari/535.1", +"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.20 Safari/535.1", +"Mozilla/5.0 (X11; CrOS i686 0.13.587) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.14 Safari/535.1", +"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.107 Safari/535.1", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.107 Safari/535.1", +"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.1 Safari/535.1", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.36 (KHTML, like Gecko) Chrome/13.0.766.0 Safari/534.36", +"Mozilla/5.0 (X11; Linux amd64) AppleWebKit/534.36 (KHTML, like Gecko) Chrome/13.0.766.0 Safari/534.36", +"Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.35 (KHTML, like Gecko) Ubuntu/10.10 Chromium/13.0.764.0 Chrome/13.0.764.0 Safari/534.35", +"Mozilla/5.0 (X11; CrOS i686 0.13.507) AppleWebKit/534.35 (KHTML, like Gecko) Chrome/13.0.763.0 Safari/534.35", +"Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.33 (KHTML, like Gecko) Ubuntu/9.10 Chromium/13.0.752.0 Chrome/13.0.752.0 Safari/534.33", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/534.31 (KHTML, like Gecko) Chrome/13.0.748.0 Safari/534.31", +"Mozilla/5.0 (Windows NT 6.1; en-US) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.750.0 Safari/534.30", +"Mozilla/5.0 (X11; CrOS i686 12.433.109) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.93 Safari/534.30", +"Mozilla/5.0 (X11; CrOS i686 12.0.742.91) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.93 Safari/534.30", +"Mozilla/5.0 Slackware/13.37 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/12.0.742.91", +"Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.91 Chromium/12.0.742.91 Safari/534.30", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.68 Safari/534.30", +"Mozilla/5.0 ArchLinux (X11; U; Linux x86_64; en-US) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.60 Safari/534.30", +"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.53 Safari/534.30", +"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.113 Safari/534.30", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.30 (KHTML, like Gecko) Ubuntu/11.04 Chromium/12.0.742.112 Chrome/12.0.742.112 Safari/534.30", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.30 (KHTML, like Gecko) Ubuntu/10.10 Chromium/12.0.742.112 Chrome/12.0.742.112 Safari/534.30", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.30 (KHTML, like Gecko) Ubuntu/10.04 Chromium/12.0.742.112 Chrome/12.0.742.112 Safari/534.30", +"Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.30 (KHTML, like Gecko) Ubuntu/11.04 Chromium/12.0.742.112 Chrome/12.0.742.112 Safari/534.30", +"Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.30 (KHTML, like Gecko) Ubuntu/10.10 Chromium/12.0.742.112 Chrome/12.0.742.112 Safari/534.30", +"Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.30 (KHTML, like Gecko) Ubuntu/10.04 Chromium/12.0.742.112 Chrome/12.0.742.112 Safari/534.30", +"Mozilla/5.0 (Windows NT 7.1) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.112 Safari/534.30", +"Mozilla/5.0 (Windows NT 5.2) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.112 Safari/534.30", +"Mozilla/5.0 (Windows 8) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.112 Safari/534.30", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_6) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.112 Safari/534.30", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_4) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.112 Safari/534.30", +"Mozilla/5.0 (X11; CrOS i686 12.433.216) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.105 Safari/534.30", +"Mozilla/5.0 ArchLinux (X11; U; Linux x86_64; en-US) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.100 Safari/534.30", +"Mozilla/5.0 ArchLinux (X11; U; Linux x86_64; en-US) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.100", +"Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.30 (KHTML, like Gecko) Slackware/Chrome/12.0.742.100 Safari/534.30", +"Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.100 Safari/534.30", +"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.100 Safari/534.30", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.100 Safari/534.30", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_4) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.100 Safari/534.30", +"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.724.100 Safari/534.30", +"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/534.25 (KHTML, like Gecko) Chrome/12.0.706.0 Safari/534.25", +"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/534.25 (KHTML, like Gecko) Chrome/12.0.704.0 Safari/534.25", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.24 (KHTML, like Gecko) Ubuntu/10.10 Chromium/12.0.703.0 Chrome/12.0.703.0 Safari/534.24", +"Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.24 (KHTML, like Gecko) Ubuntu/10.10 Chromium/12.0.702.0 Chrome/12.0.702.0 Safari/534.24", +"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/12.0.702.0 Safari/534.24", +"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/12.0.702.0 Safari/534.24", +"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.700.3 Safari/534.24", +"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.699.0 Safari/534.24", +"Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.699.0 Safari/534.24", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_6) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.698.0 Safari/534.24", +"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.697.0 Safari/534.24", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.71 Safari/534.24", +"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.68 Safari/534.24", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.68 Safari/534.24", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.68 Safari/534.24", +"Mozilla/5.0 Slackware/13.37 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/11.0.696.50", +"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.43 Safari/534.24", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.34 Safari/534.24", +"Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.34 Safari/534.24", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.3 Safari/534.24", +"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.3 Safari/534.24", +"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.3 Safari/534.24", +"Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.14 Safari/534.24", +"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.12 Safari/534.24", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_6) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.12 Safari/534.24", +"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.24 (KHTML, like Gecko) Ubuntu/10.04 Chromium/11.0.696.0 Chrome/11.0.696.0 Safari/534.24", +"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.0 Safari/534.24", +"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.694.0 Safari/534.24", +"Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.23 (KHTML, like Gecko) Chrome/11.0.686.3 Safari/534.23", +"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.21 (KHTML, like Gecko) Chrome/11.0.682.0 Safari/534.21", +"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.21 (KHTML, like Gecko) Chrome/11.0.678.0 Safari/534.21", +"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_7_0; en-US) AppleWebKit/534.21 (KHTML, like Gecko) Chrome/11.0.678.0 Safari/534.21", +"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.20 (KHTML, like Gecko) Chrome/11.0.672.2 Safari/534.20", +"Mozilla/5.0 (Windows NT) AppleWebKit/534.20 (KHTML, like Gecko) Chrome/11.0.672.2 Safari/534.20", +"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; en-US) AppleWebKit/534.20 (KHTML, like Gecko) Chrome/11.0.672.2 Safari/534.20", +"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.20 (KHTML, like Gecko) Chrome/11.0.669.0 Safari/534.20", +"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.19 (KHTML, like Gecko) Chrome/11.0.661.0 Safari/534.19", +"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.18 (KHTML, like Gecko) Chrome/11.0.661.0 Safari/534.18", +"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; en-US) AppleWebKit/534.18 (KHTML, like Gecko) Chrome/11.0.660.0 Safari/534.18", +"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.17 (KHTML, like Gecko) Chrome/11.0.655.0 Safari/534.17", +"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.17 (KHTML, like Gecko) Chrome/11.0.655.0 Safari/534.17", +"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.17 (KHTML, like Gecko) Chrome/11.0.654.0 Safari/534.17", +"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/534.17 (KHTML, like Gecko) Chrome/11.0.652.0 Safari/534.17", +"Mozilla/4.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/11.0.1245.0 Safari/537.36", +"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.17 (KHTML, like Gecko) Chrome/10.0.649.0 Safari/534.17", +"Mozilla/5.0 (Windows; U; Windows NT 6.1; de-DE) AppleWebKit/534.17 (KHTML, like Gecko) Chrome/10.0.649.0 Safari/534.17", +"Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.82 Safari/534.16", +"Mozilla/5.0 (X11; U; Linux armv7l; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.204 Safari/534.16", +"Mozilla/5.0 (X11; U; FreeBSD x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.204 Safari/534.16", +"Mozilla/5.0 (X11; U; FreeBSD i386; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.204 Safari/534.16"] + + + +def read_excel(path): + '''Read asin from workbook读取工作簿中的asin''' + if not os.path.exists(path): + print("Not exists this file in {}".format(path)) + return [] + workbook=xlrd.open_workbook(path) + table=workbook.sheet_by_index(0) + header = table.row_values(0)#读取第一行 + asin_index = header.index('asin')#读取URL所在位置 + asins = table.col_values(asin_index) + return asins + +def Amazon_comments(Asins, user_agent_list): + ''' + 获取评论 + k 是页面数 + ''' + if type(Asins) != list: + print("Its type is not list, please change") + return + if len(Asins) == 0: + print("The Asins is none") + return + base_url = "https://www.amazon.com/hz/reviews-render/ajax/reviews/get/ref=cm_cr_arp_d_viewopt_sr" + + for Asin in Asins: + max_k = 2 + k = 1 + with open('./Amazoncomments.csv','a+',encoding = 'utf-8',newline='') as f: + f_csv = csv.writer(f) + f_csv.writerow([Asin]) + while k != max_k: + filterByStar = "all_stars" + pagenumber = "{}".format(k) + usersagent = random.choice(user_agent_list) + headers = {"authority": "www.amazon.com", + "method": "POST", + "path": "/hz/reviews-render/ajax/reviews/get/ref=cm_cr_arp_d_viewopt_sr", + "scheme": "https", + "accept": "text/html,*/*", + "accept-encoding": "gzip, deflate, br", + "accept-language": "zh-CN,zh;q=0.9", + "content-length": "272", + "content-type": "application/x-www-form-urlencoded;charset=UTF-8", + "cookie": "session-id=143-4438163-4876421; ubid-main=134-1563764-9160204; lc-main=en_US; i18n-prefs=USD; aws_lang=cn; aws-target-data=%7B%22support%22%3A%221%22%7D; s_fid=163AE8802CD54AB6-3EF14BD87C6B321B; s_vn=1628815857048%26vn%3D1; regStatus=pre-register; s_cc=true; aws-target-visitor-id=1597279856745-452791.38_0; aws-ubid-main=945-4122367-6470350; s_dslv=1597279872775; s_nr=1597279872791-New; skin=noskin; UM_distinctid=173e5a09903177-043c2e52daf5e2-3e3e5f0e-100200-173e5a09904623; session-id-time=2082787201l; session-token=ufX/64LHPAew+TaDsA4SVue5UZQv1HVhHTthYYArFOU7k0/ajO9Jvaocjm6sL3aDwypcAHjd2/LpRvnmUOHmeQpHc4VTMCsDOrd2M512kNSMhkKPiFyWGFtKMGt5yjo4DIZN6JhTYSUskCzSqds/ID/ro4DJow4tGvlTsQ7xS2rCMoU46HHQCGM7qAsWvxsg; CNZZDATA1278807219=218040419-1598230055-https%253A%252F%252Fwww.amazon.com%252F%7C1598437146; csm-hit=tb:D59GPPZ80DMPS80MXXTY+s-JTTJP5NCPRAJD3YRV0BA|1598442524371&t:1598442524371&adb:adblk_no", + "downlink": "1.3", + "ect": "4g", + "origin": "https://www.amazon.com", + "referer": "https://www.amazon.com/product-reviews/{}/ref=acr_dp_hist_1?ie=UTF8&filterByStar=one_star&reviewerType=all_reviews".format(Asin), + "rtt": "50", + "sec-fetch-dest": "empty", + "sec-fetch-mode": "cors", + "sec-fetch-site": "same-origin", + "user-agent": usersagent, + "x-requested-with": "XMLHttpRequest"} + + payload = {"sortBy": "", + "reviewerType": "all_reviews", + "formatType": "", + "mediaType": "", + "filterByStar": "{}".format(filterByStar), + "pageNumber": "{}".format(pagenumber), + "filterByLanguage": "", + "filterByKeyword": "", + "shouldAppend": "undefined", + "deviceType": "desktop", + "canShowIntHeader": "undefined", + "reftag": "cm_cr_arp_d_viewopt_sr", + "pageSize": "10", + "asin": "{}".format(Asin), + "scope": "reviewsAjax0" + } + session = requests.Session() + r = session.post(base_url, headers = headers, data = payload) + info = re.findall(r"(\\n){10}\s+\\n\s+\\n\s+\\n(.*?)\\n<\/span>\\n \\n<\/span><\/div>

    (\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})[\s\S]*?(\d+)', r.text) + for proxy in proxies: + yield ":".join(proxy) + + @staticmethod + def freeProxy08(): + """ 小幻代理 """ + urls = ['https://ip.ihuan.me/address/5Lit5Zu9.html'] + for url in urls: + r = WebRequest().get(url, timeout=10) + proxies = re.findall(r'>\s*?(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s*?(\d+)', r.text) + for proxy in proxies: + yield ":".join(proxy) + + @staticmethod + def freeProxy09(page_count=1): + """ 免费代理库 """ + for i in range(1, page_count + 1): + url = 'http://ip.jiangxianli.com/?country=中国&page={}'.format(i) + html_tree = WebRequest().get(url).tree + for index, tr in enumerate(html_tree.xpath("//table//tr")): + if index == 0: + continue + yield ":".join(tr.xpath("./td/text()")[0:2]).strip() + + @staticmethod + def freeProxy10(): + """ 89免费代理 """ + r = WebRequest().get("https://www.89ip.cn/index_1.html", timeout=10) + proxies = re.findall( + r'[\s\S]*?(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})[\s\S]*?[\s\S]*?[\s\S]*?(\d+)[\s\S]*?', + r.text) + for proxy in proxies: + yield ':'.join(proxy) + + # @staticmethod + # def wallProxy01(): + # """ + # PzzQz https://pzzqz.com/ + # """ + # from requests import Session + # from lxml import etree + # session = Session() + # try: + # index_resp = session.get("https://pzzqz.com/", timeout=20, verify=False).text + # x_csrf_token = re.findall('X-CSRFToken": "(.*?)"', index_resp) + # if x_csrf_token: + # data = {"http": "on", "ping": "3000", "country": "cn", "ports": ""} + # proxy_resp = session.post("https://pzzqz.com/", verify=False, + # headers={"X-CSRFToken": x_csrf_token[0]}, json=data).json() + # tree = etree.HTML(proxy_resp["proxy_html"]) + # for tr in tree.xpath("//tr"): + # ip = "".join(tr.xpath("./td[1]/text()")) + # port = "".join(tr.xpath("./td[2]/text()")) + # yield "%s:%s" % (ip, port) + # except Exception as e: + # print(e) + + # @staticmethod + # def freeProxy10(): + # """ + # 墙外网站 cn-proxy + # :return: + # """ + # urls = ['http://cn-proxy.com/', 'http://cn-proxy.com/archives/218'] + # request = WebRequest() + # for url in urls: + # r = request.get(url, timeout=10) + # proxies = re.findall(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})[\w\W](\d+)', r.text) + # for proxy in proxies: + # yield ':'.join(proxy) + + # @staticmethod + # def freeProxy11(): + # """ + # https://proxy-list.org/english/index.php + # :return: + # """ + # urls = ['https://proxy-list.org/english/index.php?p=%s' % n for n in range(1, 10)] + # request = WebRequest() + # import base64 + # for url in urls: + # r = request.get(url, timeout=10) + # proxies = re.findall(r"Proxy\('(.*?)'\)", r.text) + # for proxy in proxies: + # yield base64.b64decode(proxy).decode() + + # @staticmethod + # def freeProxy12(): + # urls = ['https://list.proxylistplus.com/Fresh-HTTP-Proxy-List-1'] + # request = WebRequest() + # for url in urls: + # r = request.get(url, timeout=10) + # proxies = re.findall(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})[\s\S]*?(\d+)', r.text) + # for proxy in proxies: + # yield ':'.join(proxy) + + +if __name__ == '__main__': + p = ProxyFetcher() + methods = [method for method in dir(p) if callable(getattr(p, method)) and not method.startswith("__") and method!="is_available"] + for method in methods: + print(method) + for _ in getattr(p, method)(): + p.is_available(_) + with open('./proxies.txt', 'a') as f: + f.write(str(p.ipproxies)) + f.close() \ No newline at end of file diff --git a/AWS/get_dimensions.py b/AWS/get_dimensions.py new file mode 100644 index 0000000..f07fbde --- /dev/null +++ b/AWS/get_dimensions.py @@ -0,0 +1,110 @@ +# -*- conding:UTF-8 -*- +import re,requests,xlrd,datetime,xlwt,os,time +from xlutils.copy import copy +# 输入Asin,get网页返回内容和网址 +def requests_url(Asin): + url='https://www.amazon.com/dp/'+Asin + headers={"Host": +"www.amazon.com", +"User-Agent": +"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3141.7 Safari/537.36", +"Accept": +"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", +"Accept-Language": +"zh-CN,zh;q=0.8", +"Accept-Encoding": +"gzip, deflate, br", +"Connection": +"keep-alive", +"Cache-Control":"max-age=0", +"Upgrade-Insecure-Requests":"1" +} + proxies={'HTTP': 'HTTP://122.242.96.30:808', 'HTTPS': 'HTTPS://122.242.96.30:808'}#免费代理IP + r=requests.get(url,headers=headers,proxies=proxies) + return r.content,url + +def File_path_choice(): + '''选择文件夹中的文件,返回所选文件路径''' + t='D:\\Documents\\Downloads\stainless steel toilet brush holder.xlsx'#默认工作簿地址 + try: + file_path='d:/documents/downloads' + print '路径{0}文件夹中的文件和文件夹如下:'.format(file_path) + file_names=os.listdir(file_path)#列出下载文件夹中的文件名 + for i in range(len(file_names)): + print i+1,file_names[i] + file_num=int(raw_input("Default workbook is 1,plz input a number of serial number(default {0}):\nOr Enter a number more than the last option you could input a url of file\n ".format(t)))#默认文件名是t,或者输入比最后选项大的数 + if file_num in range(1,len(file_names)+1): + file_path=file_path+'/'+file_names[file_num-1] + if file_num>=len(file_names)+1: + file_path=str(raw_input("plz input a fileurl (like:D:\\Documents\\Downloads\1.xlsx\n")) + f=os.path.exists(file_path) + while f==False: + file_path=str(raw_input("Your file is not exsits,plz input a fileurl:\n")) + f=os.path.exists(file_path) + except Exception,e: + print str(e) + file_path=t + return file_path + +def main(): + start=datetime.datetime.now() # 计算所用时间 + file_path=File_path_choice() + data=xlrd.open_workbook(file_path)#打开工作簿 + sheets=data.sheets() + z={} + p=0 + for sheet in sheets: + p+=1 + z[p]=sheet.name + print p,z[p] + try: + sheet_index=int(raw_input("plz input index in the serial number(default 1):\n")) + if sheet_index in range(1,len(sheets)+1): + t=sheet_index + else: + print 'The digital is wrong,plz input a correct number' + except Exception,e: + print str(e) + t=1 + + table=data.sheet_by_index(t-1)# 打开sheet + rows=table.nrows#计算表中列数和行数 + cols=table.ncols + print "{0}'s cols,rows is {1},{2}".format(z[t],cols,rows)#打印该sheet中的列行数 + row_1st=table.row_values(0)#读取第一行 + asin_index=row_1st.index('Asin')#返回Asin列的所在列数 + URL=table.col_values(asin_index,1,rows-1)#读取Asin列第二行到最后一行 + data1=copy(data)#复制工作簿 + table1=data1.get_sheet(t-1) + for i in range(rows-1): + try: + r,url=requests_url(URL[i]) + time.sleep(3) + print i,url + dimensions1=re.findall(r'\s+(.*?)\sinches',r) + dimensions2=re.findall('Product Dimensions:\s+<\/b>\s+(.*?)\s+inches',r) + price=re.findall(r'class="a-size-medium a-color-price">\$(.*?)<\/span>',r) + if dimensions1: + print dimensions1[0] + print price[0] + table1.write(i+1,cols,dimensions1[0]) + elif dimensions2: + print dimensions2[0] + print price[0] + table1.write(i+1,cols,dimensions2[0]) + else: + print 'dimensions is None' + table1.write(i+1,cols+1,price[0]) + except Exception,e: + print str(e) + u='c:\\first_Choice_copy.xls' + data1.save(u) #保存复制表格 + end=datetime.datetime.now() + t=end-start#总用时 + print 'It save in {0}.\nTotal time: {1} s.'.format(u,t) + +if __name__== '__main__': + main() + + + diff --git a/get_feedbacknum.py b/AWS/get_feedbacknum.py similarity index 59% rename from get_feedbacknum.py rename to AWS/get_feedbacknum.py index b99daa0..268aae9 100644 --- a/get_feedbacknum.py +++ b/AWS/get_feedbacknum.py @@ -1,23 +1,37 @@ # -*- coding:UTF-8 -*- -# ======================== -#=== Python version is 2.7.13 == -#== Author: Toryun == -#== Time:2017-08- 28 == -#======================== -import re,requests,xlrd,xlwt,datetime,time,os -from xlutils.copy import copy -def main(): - start=datetime.datetime.now() +# ===================== +#==Python version is 2.7.13 +#== Author: Toryun +#== Time:2017-08- 28 +#====================== +import re,requests,xlrd,datetime,time,os +from xlutils.copy import copy +'''从表中获取URL查询Amazon商家店铺30天内的feedback''' +def File_path_choice(): + '''选择文件夹中的文件,返回所选文件路径''' + t='D:\\Documents\\Downloads\stainless steel toilet brush holder.xlsx'#默认工作簿地址 try: - filename=raw_input("plz input a filename like c:\\eakd.xlsx (defult filename is d:/Documents/Downloads/Search Term Food Storage Container.xls) :\n") - t=os.path.exists(filename) - while t==False: - filename=raw_input("The path is wrong,plz input a correct filename like c:\\eakd.xlsx:\n") - t=os.path.exists(filename) - if filename=="\\n": - filename='d:/Documents/Downloads/Search Term Food Storage Container.xls' + file_path='d:/documents/downloads' + print '路径{0}文件夹中的文件和文件夹如下:'.format(file_path) + file_names=os.listdir(file_path)#列出下载文件夹中的文件名 + for i in range(len(file_names)): + print i+1,file_names[i] + file_num=int(raw_input("Default workbook is 1,plz input a number of serial number(default {0}):\nOr Enter a number more than the last option you could input a url of file\n ".format(t)))#默认文件名是t,或者输入比最后选项大的数 + if file_num in range(1,len(file_names)+1): + file_path=file_path+'/'+file_names[file_num-1] + if file_num>=len(file_names)+1: + file_path=str(raw_input("plz input a fileurl (like:D:\\Documents\\Downloads\1.xlsx\n")) + f=os.path.exists(file_path) + while f==False: + file_path=str(raw_input("Your file is not exsits,plz input a fileurl:\n")) + f=os.path.exists(file_path) except Exception,e: print str(e) + file_path=t + return file_path +def main(): + start=datetime.datetime.now() + filename=File_path_choice() data=xlrd.open_workbook(filename) # 打开指定工作薄 sheets=data.sheets()#获取工作薄所有列表 @@ -29,7 +43,7 @@ def main(): print p,z[p] #返回所有列表名 try: sheet_index=int(raw_input("plz input index in the serial number(default 1):\n")) - if sheet_index in range(1,len(sheets)): + if sheet_index in range(1,len(sheets)+1): t=sheet_index else: print 'The digital is wrong,plz input a correct number' @@ -46,7 +60,6 @@ def main(): FBA=table.col_values(FBA_index)# 读取导入FBA数组 URL=table.col_values(URL_index) # 读取导入URL数组 l=len(URL) - counts_arry=[] headers={'Host': "www.amazon.com", 'User-Agent': @@ -61,12 +74,13 @@ def main(): "keep-alive", 'Upgrade-Insecure-Requests': "1"} + proxies={'HTTP': 'HTTP://122.242.96.30:808', 'HTTPS': 'HTTPS://122.242.96.30:808'}#免费代理IP data2=copy(data) # 复制工作簿 table2=data2.get_sheet(t-1) - for i in range(nrows-2): + for i in range(nrows-1): try: if FBA[i+1]=='FBA': #判断是否为FBA运输方式(因为只有该运输方式和第三方运输可以查店铺月反馈数 - r=requests.get(URL[i+1]) + r=requests.get(URL[i+1],headers=headers,proxies=proxies) print i+1,URL[i+1] sellerID=re.findall(r'\/gp\/help\/seller\/at-a-glance\.html\/ref=dp_merchant_link\?ie=UTF8&seller=(.*?)&isAmazonFulfilled=1',r.content) #返回第一个匹配的卖家店铺sellerID print sellerID[0] @@ -77,18 +91,11 @@ def main(): print '匹配失败' else: print counts - table2.write(i+1,cols-1,counts[0])# 进行写入操作 - if sellerID[0]==u'': - return None - print '匹配店铺失败' - if sellerID==None: - print '该店铺无sellerID' - if FBA[i+1]=='AMZ': - i+=1 + table2.write(i+1,cols,counts[0])# 进行写入操作 if FBA[i+1]=='MCH': - r=requests.get(URL[i+1]) + r=requests.get(URL[i+1],headers=headers,proxies=proxies) print i+1,URL[i+1] - sellerID=re.findall(r'\/gp\/help\/seller\/at-a-glance\.html\/ref=dp_merchant_link\?ie=UTF8&seller=(.*?)\'>',r.content) #返回第一个匹配的卖家店铺sellerID + sellerID=re.findall(r'\/gp\/help\/seller\/at-a-glance\.html\?ie=UTF8&seller=(.*?)&isAmazonFulfilled=1',r.content) #返回第一个匹配的卖家店铺sellerID print sellerID[0] if sellerID: q=requests.get('https://www.amazon.com/sp?_encoding=UTF8&asin=&isAmazonFulfilled=1&isCBA=&marketplaceID=ATVPDKIKX0DER&orderID=&seller='+sellerID[0]+'&tab=&vasStoreID=') @@ -97,14 +104,22 @@ def main(): print '匹配失败' else: print counts - table2.write(i+1,cols-1,counts[0])# 进行写入操作 + table2.write(i+1,cols,counts[0])# 进行写入操作 + if sellerID[0]==u'': + return None + print '匹配店铺失败' + if sellerID==None: + print '该店铺无sellerID' + if FBA[i+1]=='AMZ': + i+=1 + except Exception,e: - print str(e) - print len(counts_arry) - data2.save('d:/Documents/Downloads/best_copy.xls') #保存到新的工作簿 + print str(e) + filepath='d:/Documents/Downloads/best_copy.xls' + data2.save(filepath) #保存到新的工作簿 end=datetime.datetime.now() t=end-start - print '总用时:%s s'%(t) + print '存储到新的工作簿 {0}\n总用时:{1} s'.format(filepath,t) if __name__=='__main__': main() diff --git a/AWS/get_img.py b/AWS/get_img.py new file mode 100644 index 0000000..cc30574 --- /dev/null +++ b/AWS/get_img.py @@ -0,0 +1,106 @@ +#-*- coding:UTF-8 -*- +# ======================== +#=== Python version is 2.7.13 +#== Author: Toryun +#== Time:2017-08- 28 +#======================== +import re,requests,xlrd,datetime,os,xlsxwriter +def File_path_choice(): + '''选择文件夹中的文件,返回所选文件路径''' + t='D:\\Documents\\Downloads\stainless steel toilet brush holder.xlsx'#默认工作簿地址 + try: + file_path='d:/documents/downloads' + print '路径{0}文件夹中的文件和文件夹如下:'.format(file_path) + file_names=os.listdir(file_path)#列出下载文件夹中的文件名 + for i in range(len(file_names)): + print i+1,file_names[i] + file_num=int(raw_input("Default workbook is 1,plz input a number of serial number(default {0}):\nOr Enter a number more than the last option you could input a url of file\n ".format(t)))#默认文件名是t,或者输入比最后选项大的数 + if file_num in range(1,len(file_names)+1): + file_path=file_path+'/'+file_names[file_num-1] + if file_num>=len(file_names)+1: + file_path=str(raw_input("plz input a fileurl (like:D:\\Documents\\Downloads\1.xlsx\n")) + f=os.path.exists(file_path) + while f==False: + file_path=str(raw_input("Your file is not exsits,plz input a fileurl:\n")) + f=os.path.exists(file_path) + except Exception,e: + print str(e) + file_path=t + return file_path +def get_image_url_and_filename(text): + '''匹配图片地址,返回地址和命名''' + url=re.findall(r'id=\"landingImage\" data-a-dynamic-image=\"{"(.*?)\":',text) + r=url[0] + return r +def save_url_image_to_file(url,filename): + '''请求图片URL,并保存到指定文件夹''' + r=requests.get(url) + with open("d:jpg/"+filename+".jpg","wb") as f: + f.write(r.content) + f.close() +def main(): + start=datetime.datetime.now() + fn=File_path_choice() + fp='c:\\image_xls.xls' + workbook1=xlsxwriter.Workbook(fp) + sheet1=workbook1.add_worksheet() + data=xlrd.open_workbook(fn) # 打开工作薄 + sheets=data.sheets() + z={} + p=0 + for sheet in sheets: + p+=1 + z[p]=sheet.name + print p,z[p] + try: + sheet_index=int(raw_input("plz input index in the serial number(default 1):\n")) + if sheet_index in range(1,len(sheets)+1): + t=sheet_index + else: + print 'The digital is wrong,plz input a correct number' + except Exception,e: + print str(e) + t=1 + table=data.sheet_by_index(t-1) # 读取指定sheet + cols=table.ncols + rows=table.nrows + print "{0}'s rows ,cols are {1},{2}".format(z[t],rows,cols) + first_sheet=table.row_values(0) + url_index=first_sheet.index('URL')#返回第一行URL的列数 + asin_index=first_sheet.index('Asin')#返回第一行asin的列数 + URL=table.col_values(url_index) # 读取指定列(该列含有URL) + asin=table.col_values(asin_index) + for i in range(rows-1): + try: + u=URL[i+1] + print i,u + img_headers={ + "Host": + "www.amazon.com", + "User-Agent": + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3141.7 Safari/537.36", + "Accept": + "*/*", + "Accept-Language": + "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3", + "Accept-Encoding": + "gzip, deflate", + "Connection": + "keep-alive"} + proxies={'HTTP': 'HTTP://122.242.96.30:808', 'HTTPS': 'HTTPS://122.242.96.30:808'}#免费IP地址*http://www.xicidaili.com* + r=requests.get(u,headers=img_headers,proxies=proxies) # 获取listing + url=get_image_url_and_filename(r.content) # 获取网页内容中的图片地址和命名 + filename=asin[i+1] + save_url_image_to_file(url,filename) # 保存到文件夹去 + sheet1.write(i,0,filename) + sheet1.insert_image(i,1,"d:jpg/"+filename+".jpg",{'x_scale':0.2,'y_scale':0.2})#把图片按长宽原来比例0.2的插入Excel中 + except Exception,e: + print str(e) + + workbook1.close() + end=datetime.datetime.now() + t=end-start + print '已将照片存入Excel {0}中\n总共用时:{1}s'.format(fp,t) + +if __name__=='__main__': + main() diff --git a/get_ranks.py b/AWS/get_ranks.py similarity index 65% rename from get_ranks.py rename to AWS/get_ranks.py index c418f1c..944ada8 100644 --- a/get_ranks.py +++ b/AWS/get_ranks.py @@ -2,8 +2,9 @@ import re,requests,xlrd,datetime,xlwt,time from xlutils.copy import copy # 输入Asin,get网页返回内容和网址 -def requests_url(asin): - url='https://www.amazon.com/dp/'+asin +def requests_url(url): + '''获取网页内容''' + url='https://www.amazon.com/dp/'+url headers={"Host": "www.amazon.com", "User-Agent": @@ -19,32 +20,33 @@ def requests_url(asin): "Cache-Control":"max-age=0", "Upgrade-Insecure-Requests":"1" } - proxies={'HTTP': 'HTTP://122.242.96.30:808', 'HTTPS': 'HTTPS://122.242.96.30:808'}#免费代理IP - r=requests.get(url,headers=headers,proxies=proxies) + proxies={'HTTP': 'HTTP://122.242.96.30:808', 'HTTPS': 'HTTPS://122.242.96.30:808'} + r=requests.get(url,headers=headers,proxies=proxies) return r.content,url def main(): + '''打开工作簿,选择sheet查询rank''' start=datetime.datetime.now() # 计算所用时间 - data=xlrd.open_workbook('D:\\Documents\\Downloads\Food_Bins&Canisters_adjust_cell_phone.xls')#打开工作簿 + data=xlrd.open_workbook('D:\\Documents\\Downloads\stainless steel toilet brush holder.xlsx')#打开工作簿 sheets=data.sheets() z={} - p=0#key + p=0 for sheet in sheets: p+=1 - z[p]=sheet.name#把工作簿中的sheet名导入字典z + z[p]=sheet.name print p,z[p] try: sheet_index=int(raw_input("plz input index in the serial number(default 1):\n")) - if sheet_index in range(1,len(sheets)): + if sheet_index in range(1,len(sheets)+1): t=sheet_index else: print 'The digital is wrong,plz input a correct number' except Exception,e: print str(e) t=1 - table=data.sheet_by_index(t-1)# 打开第一个sheet + table=data.sheet_by_index(t-1)# 打开sheet rows=table.nrows#计算表中列数和行数 cols=table.ncols - print "%s 's cols,rows is"%(z[t],cols,rows)#打印表名中的行列数 + print cols,rows row_1st=table.row_values(0)#读取第一行 asin_index=row_1st.index('Asin')#返回Asin列的所在列数 URL=table.col_values(asin_index,1,rows-1)#读取Asin列第二行到最后一行 @@ -55,22 +57,20 @@ def main(): time.sleep(2) r,url=requests_url(URL[i]) print i,url - rank1=re.findall(r'(#\d+,\d{3}.*?)\(',r)#匹配排名大于百位的主目录排名 + rank1=re.findall(r'(#\d+,\d{3}.*?)\(',r) if rank1: print rank1 else: - rank1=re.findall(r'(#\d{1,3}.*?)\((#\d+)<\/span>',r)#匹配排名小于千位的小目录排名 + rank2=re.findall(r'>(#\d+)<\/span>',r) if rank2: print rank2 else: - rank2=re.findall(r'>(#\d+|#\d+,\d{3})\sin\s<',r)#匹配排名大于百位的小目录排名 + rank2=re.findall(r'>(#\d+|#\d+,\d{3})\sin\s<',r) print rank2 - - - table1.write(i+1,cols,rank1[0])#写入sheet中 + table1.write(i+1,cols,rank1[0]) table1.write(i+1,cols+1,rank2) except Exception,e: @@ -78,7 +78,7 @@ def main(): data1.save('c:\\first_Choice_copy.xls') #保存复制表格 end=datetime.datetime.now() t=end-start#总用时 - print 'Total time: {0} s.'.format(t) + print 'Total time: %s s.'%(t) if __name__== '__main__': main() diff --git a/keywords_find.py b/AWS/keywords_find.py similarity index 83% rename from keywords_find.py rename to AWS/keywords_find.py index f3388c3..9dfa2a3 100644 --- a/keywords_find.py +++ b/AWS/keywords_find.py @@ -6,7 +6,7 @@ import re,requests,xlwt,xlrd,string,datetime,os,time from xlutils.copy import copy def get_url(i,url): - '''利用代理IP查询Amazon搜索,返回结果''' + '''从工作簿中获取Keyword利用代理IP查询Amazon搜索,返回结果''' try: proxies={'HTTP': 'HTTP://122.242.96.30:808', 'HTTPS': 'HTTPS://122.242.96.30:808'}#免费IP地址*http://www.xicidaili.com* amazon='https://www.amazon.com/s/ref=nb_sb_noss_2?url=search-alias%3Daps&field-keywords=' @@ -51,8 +51,8 @@ def main(): start=datetime.datetime.now() data=xlrd.open_workbook(curl) table=data.sheet_by_index(0) - key=table.row_values(0)#获取头行返回数组 - key_index=key.index('Keyword')#获取Keyword所在列数 + key=table.row_values(0) + key_index=key.index('Keyword') Keywords=table.col_values(key_index) rows=table.nrows print "Workbook's rows is %d"%rows @@ -63,9 +63,10 @@ def main(): m=get_url(i,url) table1.write(i,0,url) table1.write(i,1,m) - data1.save('c:\\4.xlsx') + data1_url='c:\\4.xlsx' + data1.save(data1_url) end=datetime.datetime.now() t=end-start - print 'Total time: {0} s'.format(t)#打印总用时(从读取文件到保存文件) + print '您用{0}所搜索的关键词已经存储到{1}\nTotal time: {2} s'.format(curl,data1_url,t)#打印文件原地址和新存储地址,总用时(从读取文件到保存文件) if __name__=='__main__': main() diff --git a/AWS/postTocart.py b/AWS/postTocart.py new file mode 100644 index 0000000..e8f8dbd --- /dev/null +++ b/AWS/postTocart.py @@ -0,0 +1,279 @@ +#-*- coding:UTF-8 -*- +# ======================== +#Python version: 3.8.1 +#Author: Toryun +#Update: 2023-03-12 +#Function:Get the stocks of amaozn goods by add to Cart +#======================== +import os +import re +import ssl +import time +import xlwt +import html +import urllib +import random +import requests +from io import BytesIO +from amazoncaptcha import AmazonCaptcha +from Amazon_Utils import File_path_choice, Get_ASINlists, is_TTD, retry, excel_bulit, requests_asin + +ssl._create_default_https_context = ssl._create_unverified_context + + +def get_form_parameter(f): + '''addCart params''' + offerListingID = re.findall("name=\"offerListingID\" value=\"(.*?)\">", f) + CSRF = re.findall("name='CSRF' value='(.*?)'", f) + anti_csrftoken_a2z = re.findall("id=\"anti-csrftoken-a2z\" name=\"anti-csrftoken-a2z\" value=\"(.*?)\"", f) + session_id = re.findall('id=\"session-id\" name=\"session-id\" value=\"(.*?)\"', f) + merchantID = re.findall("name=\"merchantID\" value=\"(.*?)\"", f) + return offerListingID, CSRF, anti_csrftoken_a2z, session_id,merchantID + +def Post_form_addToCart(session, asin, offerListingID, CSRF, anti_csrftoken_a2z, session_id,merchantID): + '''Match the POST parameter and add the product to the shopping cart.匹配post参数,添加产品到购物车''' + host_url='https://www.amazon.com/gp/product/handle-buy-box/ref=dp_start-bbf_1_glance'#请求网址 + post_parameter={ + "items[0.base][asin]": asin, + "clientName": "OffersX_OfferDisplay_DetailPage", + "items[0.base][offerListingId]": offerListingID, + "CSRF": CSRF, + "anti-csrftoken-a2z": anti_csrftoken_a2z, + "offerListingID": offerListingID, + "session-id": session_id, + "ASIN": asin, + "isMerchantExclusive": "0", + "merchantID": merchantID, + "isAddon": "0", + "nodeID": "", + "sellingCustomerID": "", + "qid": "", + "sr": "", + "storeID": "", + "tagActionCode": "", + "viewID": "glance", + "rebateId": "", + "ctaDeviceType": "desktop", + "ctaPageType": "detail", + "usePrimeHandler": "0", + "rsid": session_id, + "sourceCustomerOrgListID": "", + "sourceCustomerOrgListItemID": "", + "wlPopCommand": "", + "items[0.base][quantity]": "1", + "quantity": "1", + "submit.add-to-cart": "Add to Cart", + "dropdown-selection": "add-new", + "dropdown-selection-ubb": "add-new" + }#添加商品到购物车的参数 + headers={ + 'Connection':'keep-alive', + 'Cache-Control':'max-age=0', + 'device-memory':'8', + 'sec-ch-device-memory':'8', + 'dpr':'2', + 'sec-ch-dpr':'2', + 'viewport-width':'1920', + 'sec-ch-viewport-width':'1920', + 'rtt':'100', + 'downlink':'10', + 'ect':'4g', + 'sec-ch-ua':'"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"', + 'sec-ch-ua-mobile':'?0', + 'sec-ch-ua-platform':'"macOS"', + 'Upgrade-Insecure-Requests':'1', + 'Origin':'https://www.amazon.com', + 'Content-Type':'application/x-www-form-urlencoded', + 'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36', + 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', + 'Sec-Fetch-Site':'same-origin', + 'Sec-Fetch-Mode':'navigate', + 'Sec-Fetch-User':'?1', + 'Sec-Fetch-Dest':'document', + 'Accept-Encoding':'gzip, deflate, br', + 'Accept-Language':'en-US,en;q=0.9' + }#请求头部 + r = session.post(host_url,data = post_parameter,headers = headers) + i = 0 + while is_TTD(r.text):#如果变狗则不断请求直到不变狗 + r = session.post(host_url,data = post_parameter,headers = headers) + time.sleep(random.randint(0,6)) + print(r.status_code)#返回请求状态(成功为200) + i += 1 + if i == 5: + break + +def get_cart_view(session): + '''Get the shopping cart page and match the key API, such as token, requestID, activeItems...获取购物车页面,匹配token、requestID、activeItems等关键API''' + url = "https://www.amazon.com/gp/cart/view.html?ref_=sw_gtc" + headers = { + 'host':'www.amazon.com', + 'Connection':'keep-alive', + 'Cache-Control':'max-age=0', + 'device-memory':'8', + 'sec-ch-device-memory':'8', + 'dpr':'2', + 'sec-ch-dpr':'2', + 'viewport-width':'683', + 'sec-ch-viewport-width':'683', + 'rtt':'350', + 'downlink':'1.4', + 'ect':'3g', + 'sec-ch-ua':'"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"', + 'sec-ch-ua-mobile':'?0', + 'sec-ch-ua-platform':'"macOS"', + 'Upgrade-Insecure-Requests':'1', + 'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36', + 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', + 'Sec-Fetch-Site':'same-origin', + 'Sec-Fetch-Mode':'navigate', + 'Sec-Fetch-User':'?1', + 'Sec-Fetch-Dest':'document', + 'Referer':'https://www.amazon.com/cart/smart-wagon?newItems=23a604b9-c6e4-4622-abcf-f35b1456f995,1', + 'Accept-Encoding':'gzip, deflate, br', + 'Accept-Language':'en-US,en;q=0.9' + } + r = session.get(url, headers=headers, timeout = 5) + price = re.findall('sc-product-price a-text-bold">\$(.*?)<\/span>', r.text) + token = re.findall('name=\'token\' value=\'(.*?)\'', r.text) + requestID = re.findall('name=\'requestID\' value=\'(.*?)\'', r.text) + actionItemID = re.findall('data-itemid=\"(.*?)\"', r.text) + encodedOffering = re.findall('data-encoded-offering=\"(.*?)\"', r.text) + return price,token,actionItemID,requestID,encodedOffering +@retry +def update_quantity(session, asin, price, token, actionItemID, requestID, encodedOffering): + '''Pass 999 quantity request return inventory.传递999数量请求返回库存量''' + t=int(time.time()) + url='https://www.amazon.com/cart/ref=ox_sc_update_quantity_1%7C1%7C999' + headers={ + 'Connection':'keep-alive', + 'sec-ch-ua':'"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"', + 'X-AUI-View':'Desktop', + 'sec-ch-device-memory':'8', + 'sec-ch-viewport-width':'1017', + 'X-Requested-With':'XMLHttpRequest', + 'dpr':'2', + 'downlink':'1.25', + 'sec-ch-ua-platform':'"macOS"', + 'device-memory':'8', + 'rtt':'250', + 'sec-ch-ua-mobile':'?0', + 'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36', + 'viewport-width':'1017', + 'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8;', + 'Accept':'application/json, text/javascript, */*; q=0.01', + 'sec-ch-dpr':'2', + 'ect':'3g', + 'Origin':'https://www.amazon.com', + 'Sec-Fetch-Site':'same-origin', + 'Sec-Fetch-Mode':'cors', + 'Sec-Fetch-Dest':'empty', + 'Referer':'https://www.amazon.com/gp/cart/view.html?ref_=sw_gtc', + 'Accept-Encoding':'gzip, deflate, br', + 'Accept-Language':'en-US,en;q=0.9' + } + json_update={ + "quantity.{0}".format(actionItemID): "999", + "pageAction": "update-quantity", + "submit.update-quantity.{0}".format(actionItemID): "1", + "displayedSavedItemNum": "0", + "actionItemID": actionItemID, + "actionType": "update-quantity", + "asin": asin, + "encodedOffering": encodedOffering, + "hasMoreItems": "false", + "addressId": "", + "addressZip": "", + "closeAddonUpsell": "1", + "displayedSavedItemNum": "0", + "activeItems": [{"itemId":"sc-active-{}".format(actionItemID),"giftable":1,"giftWrapped":0,"quantity":1,"price":price,"incentivizedCartMessage":"","installments":{}}], + "savedItems": [], + "timeStamp": t, + "requestID": requestID, + "token": token + } + r = session.post(url,data=json_update,headers=headers) + stock = "inf" + if r.status_code == 200: + stock=r.json()['features']['nav-cart']['cartQty'] + return stock + +def delete_quantity(session, asin, price, token, actionItemID, requestID, stock, encodedOffering): + '''Delete the products that have been added to the shopping cart.删除已添加到购物车的产品''' + t=int(time.time())#时间戳 + url='https://www.amazon.com/cart/ref=ox_sc_cart_actions_1' + headers={ + 'Connection':'keep-alive', + 'sec-ch-ua':'"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"', + 'X-AUI-View':'Desktop', + 'sec-ch-device-memory':'8', + 'sec-ch-viewport-width':'1017', + 'X-Requested-With':'XMLHttpRequest', + 'dpr':'2', + 'downlink':'1.45', + 'sec-ch-ua-platform':'"macOS"', + 'device-memory':'8', + 'rtt':'300', + 'sec-ch-ua-mobile':'?0', + 'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36', + 'viewport-width':'1017', + 'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8;', + 'Accept':'application/json, text/javascript, */*; q=0.01', + 'sec-ch-dpr':'2', + 'ect':'3g', + 'Origin':'https://www.amazon.com', + 'Sec-Fetch-Site':'same-origin', + 'Sec-Fetch-Mode':'cors', + 'Sec-Fetch-Dest':'empty', + 'Referer':'https://www.amazon.com/gp/cart/view.html?ref_=sw_gtc', + 'Accept-Encoding':'gzip, deflate, br', + 'Accept-Language':'en-US,en;q=0.9' + } + json_dalete={ + "submit.cart-actions": "1", + "pageAction": "cart-actions", + "actionPayload": [{"type":"DELETE_START","payload":{"itemId":actionItemID,"list":"activeItems","relatedItemIds":[],"isPrimeAsin":"false"}}], + "hasMoreItems": "false", + "addressId": "", + "addressZip": "", + "closeAddonUpsell": "1", + "displayedSavedItemNum": "0", + "activeItems": [{"itemId":"sc-active-{}".format(actionItemID),"giftable":1,"giftWrapped":0,"quantity":stock,"price":price,"incentivizedCartMessage":"","installments":{},"isSelected":1}], + "savedItems": [], + "timeStamp": t, + "requestID": requestID, + "token": token + } + session.post(url, data = json_dalete, headers = headers) + return session + +def main(): + #fn=File_path_choice() + #ASINs=["B09PVJVS15", "B09V71J4CL"] + fp = "./asin.xls" + ASINs = Get_ASINlists(fp) + workbook = xlwt.Workbook(encoding = 'utf-8') + table=excel_bulit(workbook, "1") + for i in range(len(ASINs)): + asin = ASINs[i] + stock = "inf" + table.write(i,0,asin) + r, session = requests_asin('https://www.amazon.com'+asin) + if not is_TTD(r): + offerListingId, CSRF, anti_csrftoken_a2z, session_id,merchantID=get_form_parameter(r) + if offerListingId[0]=='' and merchantID[0]=='' and len(CSRF)==1:#不在售 + stock = 0 + else: + Post_form_addToCart(session, asin, offerListingId[0], CSRF[0], anti_csrftoken_a2z[0], session_id[0],merchantID[0]) + price, token, actionItemID, requestID, encodedOffering = get_cart_view(session) + stock = update_quantity(session, asin, price[0], token[0], actionItemID[0], requestID[0], encodedOffering[0]) + delete_quantity(session, asin, price[0], token[0], actionItemID[0], requestID[0], stock, encodedOffering[0]) + else: + print("Request is blocked: {}".format(url)) + table.write(i,1,stock) + print(asin, stock) + file_save='./stock.xls' + workbook.save(file_save) + print("Saved to {}".format(os.path.abspath(file_save))) +if __name__=='__main__': + main() diff --git a/AWS/proxies.txt b/AWS/proxies.txt new file mode 100644 index 0000000..285c6cd --- /dev/null +++ b/AWS/proxies.txt @@ -0,0 +1 @@ +{'http': ['121.37.205.253:41890', '115.29.140.201:8499', '47.92.242.45:8999', '14.17.94.9:443', '47.109.57.93:1337', '121.37.201.60:9002', '47.108.118.128:8888', '47.113.219.226:9091', '140.210.196.193:3128', '117.69.232.247:8089', '117.69.237.238:8089', '121.37.207.154:8118', '117.69.236.128:8089', '120.79.31.133:20002', '183.165.251.43:8089', '101.200.235.69:8080', '183.164.243.140:8089', '120.55.49.231:1234', '36.137.9.213:7890', '59.48.218.218:9091'], 'https': []} \ No newline at end of file diff --git a/AWS/requirement.txt b/AWS/requirement.txt new file mode 100644 index 0000000..f8d23ee --- /dev/null +++ b/AWS/requirement.txt @@ -0,0 +1,8 @@ +xlwt +xlrd +lxml +xlutils +requests +mechanize +xlsxwriter +amazoncaptcha \ No newline at end of file diff --git a/Apply_for_Job/51job/51job.py b/Apply_for_Job/51job/51job.py new file mode 100644 index 0000000..f051438 --- /dev/null +++ b/Apply_for_Job/51job/51job.py @@ -0,0 +1,307 @@ +# -*- coding: utf-8 -*- +""" +@author:Toryun +@data:2023/4/18 +@version:Python3.8 +@Function: 获取前程无忧招聘工作数据 +""" +import re +import os +import sys +import time +import json +import hmac +import random +import execjs +import pickle +import requests +import pandas as pd +import urllib.parse +from lxml import etree +from tqdm import tqdm +from hashlib import sha256 +from urllib.parse import urlencode +from bs4 import BeautifulSoup +sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) + +from Utils.proxy import proxies + +class Job(object): + def __init__(self): + '''初始化参数''' + self.__baseurl = 'https://cupidjob.51job.com/open/noauth/search-pc' + self.__api_key = "51job" + self.__time = int(time.time()) + self.__keyword = "亚马逊运营" + self.__urlencode_keyword = urllib.parse.quote("亚马逊运营") + self.__searchType = 2 + self.__function = "" + self.__industry = "" + self.__jobArea = "000000"#默认全国 + self.__jobArea2 = "" + self.__landmark = "" + self.__metro = "" + self.__salary = "" + self.__workYear = "" + self.__degree = "" + self.__companyType = "" + self.__companySize = "" + self.__jobType = "" + self.__issueDate = "" + self.__sortType = 0 + self.__pageNum = 1 + self.__requestId = "" + self.__pageSize = 50#20条一页最多请求1000条 + self.__source = 1 + self.__accountId = "" + self.__pageCode = "sou|sou|soulb" + self.__key = 'abfc8f9dcf8c3f3d8aa294ac5f2cf2cc7767e5592590f39c3f503271dd68562b'#sign密钥 + self.__proxies = proxies()#代理ip + + def get_sign(self, params, key): + #sign加密方法 + encoded_params = urlencode(params) + message = '/open/noauth/search-pc?'+ encoded_params + hmac_key = bytes(key, 'utf-8') + message = bytes(message, 'utf-8') + signature = hmac.new(hmac_key, message, sha256).hexdigest() + return signature + + def get_property(self): + #检测是否登陆属性 + property ='%7B%22partner%22%3A%22%22%2C%22webId%22%3A2%2C%22fromdomain%22%3A%2251job_web%22%2C%22frompageUrl%22%3A%22https%3A%2F%2Fwe.51job.com%2F%22%2C%22pageUrl%22%3A%22https%3A%2F%2Fwe.51job.com%2Fpc%2Fsearch%3Fkeyword%3D{}%26searchType%3D2%26sortType%3D0%26metro%3D%22%2C%22identityType%22%3A%22%22%2C%22userType%22%3A%22%22%2C%22isLogin%22%3A%22%E5%90%A6%22%2C%22accountid%22%3A%22%22%7D'.format(urllib.parse.quote(self.__urlencode_keyword)) + return property + + def get_uuid(self): + uuid = requests.get("https://oauth.51job.com/ajax/get_token.php?fromdomain=51job_web").json()['resultBody']['uuid'] + return uuid + def __citycode__(self, city): + #获取城市代码,默认全国 + self.__jobArea = "000000" + url = 'https://vapi.51job.com/resource.php?query=dd&version=400&clientid=000011&accountid=&usertoken=&client_id=000011&property=%7B%22partner%22%3A%22%22%2C%22webId%22%3A2%2C%22fromdomain%22%3A%2251job_web%22%2C%22frompageUrl%22%3A%22https%3A%2F%2Fwe.51job.com%2F%22%2C%22pageUrl%22%3A%22https%3A%2F%2Fwe.51job.com%2Fpc%2Fsearch%3Fkeyword%3D%25E4%25BA%259A%25E9%25A9%25AC%25E9%2580%258A%25E8%25BF%2590%25E8%2590%25A5%26searchType%3D2%26sortType%3D0%26metro%3D%22%2C%22identityType%22%3A%22%22%2C%22userType%22%3A%22%22%2C%22isLogin%22%3A%22%E5%90%A6%22%2C%22accountid%22%3A%22%22%7D&dd_name=d_pc_abc_area&path=763cd7c36162daa3d2ed2b48b500e623&sign=53363cff7c255ce47b2c8944787c8196' + + response = requests.get(url) + json_data = response.json() + for i in range(len(json_data['resultbody'])): + for item in json_data['resultbody'][i]['sub']: + if item['value'] == city: + print(item['id']) + self.__jobArea = item['id'] + break + + def __search__(self, job): + #搜索招聘信息 + try: + self.__keyword = job + except: + self.__keyword = "亚马逊运营" + payload={ + "api_key": self.__api_key, + "timestamp": self.__time, + "keyword": self.__keyword, + "searchType": self.__searchType, + "function": self.__function, + "industry": self.__industry, + "jobArea": self.__jobArea, + "jobArea2": self.__jobArea2, + "landmark": self.__landmark, + "metro": self.__metro, + "salary": self.__salary, + "workYear": self.__workYear, + "degree": self.__degree, + "companyType": self.__companyType, + "companySize": self.__companySize, + "jobType": self.__jobType, + "issueDate": self.__issueDate, + "sortType": self.__sortType, + "pageNum": self.__pageNum, + "requestId": self.__requestId, + "pageSize": self.__pageSize, + "source": self.__source, + "accountId": self.__accountId, + "pageCode": self.__pageCode + } + headers = { + 'Accept': 'application/json, text/plain, */*', + 'Accept-Language': 'zh-CN,zh;q=0.9', + 'Connection': 'keep-alive', + 'Cookie': 'JSESSIONID=AC424E89E896ACE2581306A5F3162C0B; acw_tc=ac11000116817489511698415e00dd6e33dd767bc22c847a283fd181bca0a6; uid=wKhJP2Q9c9d90Yn04FYjAg==', + #'Cookie': 'guid=753e3a11c580dd4649d7f95dd88c1d6a; sajssdk_2015_cross_new_user=1; Hm_lvt_1370a11171bd6f2d9b1fe98951541941=1681739056; Hm_lpvt_1370a11171bd6f2d9b1fe98951541941=1681739056; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22753e3a11c580dd4649d7f95dd88c1d6a%22%2C%22first_id%22%3A%221878f74b224cc-04e52bc15a1bf4-1d525634-2073600-1878f74b225404%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_referrer%22%3A%22%22%7D%2C%22identities%22%3A%22eyIkaWRlbnRpdHlfY29va2llX2lkIjoiMTg3OGY3NGIyMjRjYy0wNGU1MmJjMTVhMWJmNC0xZDUyNTYzNC0yMDczNjAwLTE4NzhmNzRiMjI1NDA0IiwiJGlkZW50aXR5X2xvZ2luX2lkIjoiNzUzZTNhMTFjNTgwZGQ0NjQ5ZDdmOTVkZDg4YzFkNmEifQ%3D%3D%22%2C%22history_login_id%22%3A%7B%22name%22%3A%22%24identity_login_id%22%2C%22value%22%3A%22753e3a11c580dd4649d7f95dd88c1d6a%22%7D%2C%22%24device_id%22%3A%221878f74b224cc-04e52bc15a1bf4-1d525634-2073600-1878f74b225404%22%7D; nsearch=jobarea%3D%26%7C%26ord_field%3D%26%7C%26recentSearch0%3D%26%7C%26recentSearch1%3D%26%7C%26recentSearch2%3D%26%7C%26recentSearch3%3D%26%7C%26recentSearch4%3D%26%7C%26collapse_expansion%3D; search=jobarea%7E%60%7C%21recentSearch0%7E%60000000%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FA%D1%C7%C2%ED%D1%B7%D4%CB%D3%AA%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21; acw_tc=ac11000116817391419527213e00dded418252606ba0c5d1f6c6c9342cb919; JSESSIONID=7F9E94C3F0C08826EC12AB428F44555C; uid=wKhJP2Q9TYZ9Y4nyzyMiAg==; JSESSIONID=E6F6189DA1387899D42ED8B125F42738', + 'From-Domain': '51job_web', + 'Origin': 'https://we.51job.com', + 'Referer': 'https://we.51job.com/', + 'Sec-Fetch-Dest': 'empty', + 'Sec-Fetch-Mode': 'cors', + 'Sec-Fetch-Site': 'same-site', + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36', + 'account-id': '', + 'partner': '', + 'property': self.get_property(), + 'sec-ch-ua': '"Chromium";v="112", "Google Chrome";v="112", "Not:A-Brand";v="99"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-platform': '"macOS"', + 'sign': self.get_sign(payload, self.__key), + 'user-token': '', + 'uuid': '753e3a11c580dd4649d7f95dd88c1d6a' + } + url = self.__baseurl+'?'+urlencode(payload) + response = requests.request("GET", url, headers=headers).json() + #print(response['resultbody']['job']['items'][0]) + return response + def __jobdetails__(self, url): + target_url = "_0x48a0dc(_0x319bfa)" + target_code = "_0x3baf44[_0x3e621b]=_0x30f62c;" + def get_timestamp(url, proxies, target_url, target_code): + timestamp__1258 = "" + try: + os.environ["EXECJS_RUNTIME"] = "Node" + headers = { + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', + 'Accept-Language': 'zh-CN,zh;q=0.9', + 'Cache-Control': 'max-age=0', + 'Connection': 'keep-alive', + 'Cookie': 'acw_tc=ac11000116818008452498924e00e0a4044f563de8a51e045de6ce729d8179', + 'Sec-Fetch-Dest': 'document', + 'Sec-Fetch-Mode': 'navigate', + 'Sec-Fetch-Site': 'same-origin', + 'Sec-Fetch-User': '?1', + 'Upgrade-Insecure-Requests': '1', + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36', + 'sec-ch-ua': '"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-platform': '"macOS"' + } + response = requests.request("GET", url, headers=headers, proxies = proxies, timeout = 5) + # 解析HTML内容 + soup = BeautifulSoup(response.text, 'html.parser') + # 找到