5000言小说

5000yan.py

import requests,time
from bs4 import BeautifulSoup

url = 'https://bixuejian.5000yan.com/'

headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
}

response = requests.get(url, headers=headers)
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text, 'lxml')
# print(soup.prettify())

tags = soup.select('li.p-2 a')
# print(tags)

for tag in tags:
    time.sleep(0.5)
    chapter_name = tag.string
    chapter_url = tag['href']
    res_chapter = requests.get(chapter_url, headers=headers)
    res_chapter.encoding = 'utf-8'
    soup_chapter = BeautifulSoup(res_chapter.text, 'lxml')
    paragraphs = soup_chapter.select('div.grap p')

    with open(chapter_name + '.txt', 'a') as chapter:
        chapter.write(chapter_name + '\n')
        for paragraph in paragraphs:
            chapter.write(paragraph.text.strip() + '\n')
    print(chapter_name + ' - 下载完毕')
print('所有章节下载完毕')

Last updated