meijukankan 视频

video.py 串行版本

import requests
import re
import base64
import os
import time

url = 'https://www.meijukankan.net/play/2749-0-0.html'

headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
    'Referer': 'https://www.meijukankan.net/meijukk/2749.html',
    'Sec-Ch-Ua': '"Google Chrome";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
    'Sec-Ch-Ua-Mobile': '?0',
    'Sec-Ch-Ua-Platform': "macOS",
    'Sec-Fetch-Dest': 'document',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'same-origin',
    'Sec-Fetch-User': '?1',
    'Cookie': 'PHPSESSID=3u1gcjqc1of80k69f26qsmph8j'
}

session = requests.session()


# 获取m3u8文件地址
def get_m3u8():
    # 获取html源码
    res = session.get(url, headers=headers)
    html_content = res.text

    # 解析出描述性的m3u8文件地址
    str_base64_m3u8 = re.findall('var now=base64decode\("(.*?)"\)', html_content, re.S)[0]
    url_desc_m3u8 = base64.b64decode(str_base64_m3u8).decode('utf-8')

    # 获取出描述性m3u8文件的内容
    res1_m3u8 = session.get(url_desc_m3u8, headers=headers)
    suffix_url = res1_m3u8.text.split('\n')[2]

    # 根据上一步组装出真正的m3u8文件地址
    url_detail_m3u8 = 'https://v5.1080pzy.co' + suffix_url

    # 解析出本集名称
    name_list = re.findall('<div class="play_menu">正在播放:<a href=".*?">(.*?)</a>(.*?)-无尽视频</div>', html_content,
                           re.S)
    name = name_list[0][0] + name_list[0][1]

    return url_detail_m3u8, name


# 获取ts文件地址
def get_ts_urls(url_detail_m3u8, name):
    # 抓取m3u8具体文件内容
    detail_m3u8 = session.get(url_detail_m3u8, headers=headers)
    # 将m3u8文件写入本地
    with open(name + '/index.m3u8', 'wb') as f:
        f.write(detail_m3u8.content)

    # 解析出所有的ts文件地址
    ts_urls = re.findall(r',\n(.*?\.ts)', detail_m3u8.text, re.S)
    ts_urls = ts_urls[0:20]
    return ts_urls


# 下载ts文件
def download_ts(ts_url, name):
    ts_name = os.path.basename(ts_url)
    ts_dir = name + '/' + ts_name
    video = session.get(ts_url, headers=headers)
    with open(ts_dir, 'wb') as f:
        f.write(video.content)
    print('{} download completed'.format(ts_name))


# 视频合并
def merge(name, prefix):
    # 处理m3u8文件为本地文件
    with open(name + '/index.m3u8', 'r') as f:
        for line in f:
            with open(name + '/index_local.m3u8', 'a') as f2:
                f2.write(line.replace(prefix + '/', ''))

    # 合并所有ts为mp4
    os.chdir(name)
    os.system(f'ffmpeg -i index_local.m3u8 -c copy {name}.mp4')



def main():
    start = time.time()

    # 0.获取m3u8文件地址、本集名称
    url_detail_m3u8, name = get_m3u8()

    # 1.创建本地目录
    if not os.path.exists(name):
        os.mkdir(name)

    # 2.获取ts文件地址,并写入本地
    ts_urls = get_ts_urls(url_detail_m3u8, name)
    # 获取前缀,用于合并视频时处理原始m3u8文件
    prefix = os.path.dirname(ts_urls[0])

    # 3.下载ts文件
    for ts_url in ts_urls:
        download_ts(ts_url, name)

    # 4.合并视频
    merge(name, prefix)

    print('耗时: {}'.format(time.time() - start))


main()

video_async.py 异步协程版本

video_threading_pool.py 线程池版本

Last updated