某WZ

Proxy_AntiBanIP.py

import requests,random,time
from lxml import etree

# 代理提取API
proxy_url = 'http://proxy.siyetian.com/apis_get.html?token=MesJWLNp2Y35keJdXTqV1dNRVT45ERVdnT31STqFUeNpXQ10EVBl3TE1UNPR1Yx8ERjFTTE1ke.QO4MTOxMDOzcTA&limit=5&type=1&time=&data_format=json'

headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
}
# 使用代理提取API提取IP
res_proxies = requests.get(proxy_url, headers=headers).json()
print(res_proxies)
# 构建代理池
proxies_pool = []
for ip_item in res_proxies['data']:
    socks5 = f"socks5://{ip_item['ip']}:{ip_item['port']}"
    proxy = {'http': socks5, 'https': socks5}
    proxies_pool.append(proxy)
print(proxies_pool)

url_init = 'https://wz.sun0769.com/political/index/politicsNewest?id=1&page='

# 使用代理池抓取100页数据
for i in range(1, 101):
    time.sleep(0.1)
    url = url_init + str(i)
    choice_proxy = random.choice(proxies_pool)
    response = requests.get(url, headers=headers, proxies=choice_proxy)
    response.encoding = 'utf-8'
    tree = etree.HTML(response.text)
    titles = tree.xpath('/html/body/div[2]/div[3]/ul[2]/li/span[3]/a/text()')
    print(f'Page{i}:', titles)

Last updated