import random
# 导入settings文件中的UAPOOL
from settings import UAPOOL
# 导入官方文档对应的HttpProxyMiddleware
from scrapy.contrib.downloadermiddleware.useragent import UserAgentMiddleware
class Uamid(UserAgentMiddleware):
# 初始化 注意一定要user_agent,不然容易报错
def __init__(self, user_agent=''):
self.user_agent = user_agent
# 请求处理
def process_request(self, request, spider):
# 先随机选择一个用户代理
thisua = random.choice(UAPOOL)
print("当前使用User-Agent是:"+thisua)
request.headers.setdefault('User-Agent',thisua)
在settings.py文件中添加用户UserAgent的信息
# 设置用户代理池,那些动态代理就放到这里了
UAPOOL= [
"Mozilla/5.0 (Windows NT 10.0WOW64rv:52.0) Gecko/20100101 Firefox/52.0",
"Mozilla/5.0 (Windows NT 10.0Win64x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0Win64x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393"
]
from bottle import run,route,request,responsefrom urllib.request import urlopen
@route('<url:re:.*>')
def get_method(url):
data=urlopen(url)
return data.read()
run(host='0.0.0.0',port=3456,debug=True)
我简单写一个,使用bottle框架,注意不要该ie的代理,使用别的浏览器如firefox代理到
127.0.0.1:3456