5000言小说
5000yan.py
import requests,time
from bs4 import BeautifulSoup
url = 'https://bixuejian.5000yan.com/'
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
}
response = requests.get(url, headers=headers)
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text, 'lxml')
# print(soup.prettify())
tags = soup.select('li.p-2 a')
# print(tags)
for tag in tags:
time.sleep(0.5)
chapter_name = tag.string
chapter_url = tag['href']
res_chapter = requests.get(chapter_url, headers=headers)
res_chapter.encoding = 'utf-8'
soup_chapter = BeautifulSoup(res_chapter.text, 'lxml')
paragraphs = soup_chapter.select('div.grap p')
with open(chapter_name + '.txt', 'a') as chapter:
chapter.write(chapter_name + '\n')
for paragraph in paragraphs:
chapter.write(paragraph.text.strip() + '\n')
print(chapter_name + ' - 下载完毕')
print('所有章节下载完毕')
Last updated