meijukankan 视频
video.py 串行版本
import requests
import re
import base64
import os
import time
url = 'https://www.meijukankan.net/play/2749-0-0.html'
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
'Referer': 'https://www.meijukankan.net/meijukk/2749.html',
'Sec-Ch-Ua': '"Google Chrome";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
'Sec-Ch-Ua-Mobile': '?0',
'Sec-Ch-Ua-Platform': "macOS",
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-User': '?1',
'Cookie': 'PHPSESSID=3u1gcjqc1of80k69f26qsmph8j'
}
session = requests.session()
# 获取m3u8文件地址
def get_m3u8():
# 获取html源码
res = session.get(url, headers=headers)
html_content = res.text
# 解析出描述性的m3u8文件地址
str_base64_m3u8 = re.findall('var now=base64decode\("(.*?)"\)', html_content, re.S)[0]
url_desc_m3u8 = base64.b64decode(str_base64_m3u8).decode('utf-8')
# 获取出描述性m3u8文件的内容
res1_m3u8 = session.get(url_desc_m3u8, headers=headers)
suffix_url = res1_m3u8.text.split('\n')[2]
# 根据上一步组装出真正的m3u8文件地址
url_detail_m3u8 = 'https://v5.1080pzy.co' + suffix_url
# 解析出本集名称
name_list = re.findall('<div class="play_menu">正在播放:<a href=".*?">(.*?)</a>(.*?)-无尽视频</div>', html_content,
re.S)
name = name_list[0][0] + name_list[0][1]
return url_detail_m3u8, name
# 获取ts文件地址
def get_ts_urls(url_detail_m3u8, name):
# 抓取m3u8具体文件内容
detail_m3u8 = session.get(url_detail_m3u8, headers=headers)
# 将m3u8文件写入本地
with open(name + '/index.m3u8', 'wb') as f:
f.write(detail_m3u8.content)
# 解析出所有的ts文件地址
ts_urls = re.findall(r',\n(.*?\.ts)', detail_m3u8.text, re.S)
ts_urls = ts_urls[0:20]
return ts_urls
# 下载ts文件
def download_ts(ts_url, name):
ts_name = os.path.basename(ts_url)
ts_dir = name + '/' + ts_name
video = session.get(ts_url, headers=headers)
with open(ts_dir, 'wb') as f:
f.write(video.content)
print('{} download completed'.format(ts_name))
# 视频合并
def merge(name, prefix):
# 处理m3u8文件为本地文件
with open(name + '/index.m3u8', 'r') as f:
for line in f:
with open(name + '/index_local.m3u8', 'a') as f2:
f2.write(line.replace(prefix + '/', ''))
# 合并所有ts为mp4
os.chdir(name)
os.system(f'ffmpeg -i index_local.m3u8 -c copy {name}.mp4')
def main():
start = time.time()
# 0.获取m3u8文件地址、本集名称
url_detail_m3u8, name = get_m3u8()
# 1.创建本地目录
if not os.path.exists(name):
os.mkdir(name)
# 2.获取ts文件地址,并写入本地
ts_urls = get_ts_urls(url_detail_m3u8, name)
# 获取前缀,用于合并视频时处理原始m3u8文件
prefix = os.path.dirname(ts_urls[0])
# 3.下载ts文件
for ts_url in ts_urls:
download_ts(ts_url, name)
# 4.合并视频
merge(name, prefix)
print('耗时: {}'.format(time.time() - start))
main()
video_async.py 异步协程版本
import re
import base64
import os
import time
import aiohttp
import asyncio
url = 'https://www.meijukankan.net/play/2749-0-0.html'
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
'Referer': 'https://www.meijukankan.net/meijukk/2749.html',
'Sec-Ch-Ua': '"Google Chrome";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
'Sec-Ch-Ua-Mobile': '?0',
'Sec-Ch-Ua-Platform': "macOS",
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-User': '?1',
'Cookie': 'PHPSESSID=3u1gcjqc1of80k69f26qsmph8j'
}
# 获取m3u8文件地址
async def get_m3u8():
async with aiohttp.ClientSession() as session:
# 获取html源码
async with session.get(url, headers=headers, ssl=False) as resp:
html_content = await resp.text()
# 解析出描述性的m3u8文件地址
str_base64_m3u8 = re.findall('var now=base64decode\("(.*?)"\)', html_content, re.S)[0]
url_desc_m3u8 = base64.b64decode(str_base64_m3u8).decode('utf-8')
# 获取出描述性m3u8文件的内容
# res1_m3u8 = session.get(url_desc_m3u8, headers=headers)
# suffix_url = res1_m3u8.text.split('\n')[2]
async with session.get(url_desc_m3u8, headers=headers, ssl=False) as resp2:
res1_m3u8 = await resp2.text()
suffix_url = res1_m3u8.split('\n')[2]
# 根据上一步组装出真正的m3u8文件地址
url_detail_m3u8 = 'https://v5.1080pzy.co' + suffix_url
# 解析出本集名称
name_list = re.findall('<div class="play_menu">正在播放:<a href=".*?">(.*?)</a>(.*?)-无尽视频</div>', html_content,
re.S)
name = name_list[0][0] + name_list[0][1]
return url_detail_m3u8, name
# 获取ts文件地址
async def get_ts_urls(url_detail_m3u8, name):
async with aiohttp.ClientSession() as session:
async with session.get(url_detail_m3u8, headers=headers, ssl=False) as resp:
# 抓取m3u8具体文件内容
detail_m3u8 = await resp.text()
# 将m3u8文件写入本地
with open(name + '/index.m3u8', 'w') as f:
f.write(detail_m3u8)
# 解析出所有的ts文件地址
ts_urls = re.findall(r',\n(.*?\.ts)', detail_m3u8, re.S)
ts_urls = ts_urls[0:10]
return ts_urls
# 下载ts文件
async def download_ts(ts_url, name):
ts_name = os.path.basename(ts_url)
ts_dir = name + '/' + ts_name
async with aiohttp.ClientSession() as session:
async with session.get(ts_url, headers=headers, ssl=False) as resp:
with open(ts_dir, 'wb') as f:
f.write(await resp.content.read())
print('{} download completed'.format(ts_name))
# 视频合并
def merge(name, prefix):
# 处理m3u8文件为本地文件
with open(name + '/index.m3u8', 'r') as f:
for line in f:
with open(name + '/index_local.m3u8', 'a') as f2:
f2.write(line.replace(prefix + '/', ''))
# 合并所有ts为mp4
os.chdir(name)
os.system(f'ffmpeg -i index_local.m3u8 -c copy {name}.mp4')
async def main():
# 0.获取m3u8文件地址、本集名称
print('Step get_m3u8 start')
time0 = time.time()
url_detail_m3u8, name = await get_m3u8()
print('Step get_m3u8 end')
print('获取m3u8文件: 耗时: {}\n'.format(time.time() - time0))
# 1.创建本地目录
if not os.path.exists(name):
os.mkdir(name)
# 2.获取ts文件地址,并写入本地
print('Step get_ts_urls start')
time1 = time.time()
ts_urls = await get_ts_urls(url_detail_m3u8, name)
# 获取前缀,用于合并视频时处理原始m3u8文件
prefix = os.path.dirname(ts_urls[0])
print('Step get_ts_urls end')
print('获取ts文件地址: 耗时: {}\n'.format(time.time() - time1))
# 3.下载ts文件
time2 = time.time()
print('Step download_ts start')
tasks = [asyncio.create_task(download_ts(ts_url, name)) for ts_url in ts_urls]
await asyncio.wait(tasks)
print('Step download_ts end')
print('下载视频耗时: {}\n'.format(time.time() - time2))
# 4.合并视频
print('Step merge start')
time3 = time.time()
merge(name, prefix)
print('Step merge end')
print('合并视频耗时: {}'.format(time.time() - time3))
# main()
asyncio.run(main())
video_threading_pool.py 线程池版本
import requests
import re
import base64
import os
import time
from concurrent.futures import ThreadPoolExecutor
requests.packages.urllib3.disable_warnings()
os.environ['NO_PROXY'] = 'meijukankan.net'
url = 'https://www.meijukankan.net/play/2749-0-2.html'
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
'Referer': 'https://www.meijukankan.net/meijukk/2749.html',
'Sec-Ch-Ua': '"Google Chrome";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
'Sec-Ch-Ua-Mobile': '?0',
'Sec-Ch-Ua-Platform': "macOS",
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-User': '?1',
# 'Cookie': 'PHPSESSID=3u1gcjqc1of80k69f26qsmph8j'
}
session = requests.session()
# 获取m3u8文件地址
def get_m3u8():
# 获取html源码
res = session.get(url, headers=headers, verify=False)
html_content = res.text
# 解析出描述性的m3u8文件地址
str_base64_m3u8 = re.findall('var now=base64decode\("(.*?)"\)', html_content, re.S)[0]
url_desc_m3u8 = base64.b64decode(str_base64_m3u8).decode('utf-8')
# 获取出描述性m3u8文件的内容
res1_m3u8 = session.get(url_desc_m3u8, headers=headers, verify=False)
suffix_url = res1_m3u8.text.split('\n')[2]
# 根据上一步组装出真正的m3u8文件地址
url_detail_m3u8 = 'https://v5.1080pzy.co' + suffix_url
# 解析出本集名称
name_list = re.findall('<div class="play_menu">正在播放:<a href=".*?">(.*?)</a>(.*?)-无尽视频</div>', html_content, re.S)
name = name_list[0][0] + name_list[0][1]
return url_detail_m3u8, name
# 获取ts文件地址
def get_ts_urls(url_detail_m3u8, name):
# 抓取m3u8具体文件内容
detail_m3u8 = session.get(url_detail_m3u8, headers=headers, verify=False)
# 将m3u8文件写入本地
with open(name + '/index.m3u8', 'wb') as f:
f.write(detail_m3u8.content)
# 解析出所有的ts文件地址
ts_urls = re.findall(r',\n(.*?\.ts)', detail_m3u8.text, re.S)
ts_urls = ts_urls[0:100]
return ts_urls
# 下载ts文件
def download_ts(ts_url, name):
ts_name = os.path.basename(ts_url)
ts_dir = name + '/' + ts_name
print('{} download start'.format(ts_name))
video = session.get(ts_url, headers=headers, verify=False)
with open(ts_dir, 'wb') as f:
f.write(video.content)
print('{} download completed'.format(ts_name))
return 'success'
# 视频合并
def merge(name, prefix):
# 处理m3u8文件为本地文件
with open(name + '/index.m3u8', 'r') as f:
for line in f:
with open(name + '/index_local.m3u8', 'a') as f2:
f2.write(line.replace(prefix + '/', ''))
# 合并所有ts为mp4
os.chdir(name)
os.system(f'ffmpeg -i index_local.m3u8 -c copy {name}.mp4')
def main():
start = time.time()
# 0.获取m3u8文件地址、本集名称
print('Step get_m3u8 start')
time0 = time.time()
url_detail_m3u8, name = get_m3u8()
print('Step get_m3u8 end')
print('获取m3u8文件: 耗时: {}\n'.format(time.time() - time0))
# 1.创建本地目录
if not os.path.exists(name):
os.mkdir(name)
# 2.获取ts文件地址,并写入本地
print('Step get_ts_urls start')
time1 = time.time()
ts_urls = get_ts_urls(url_detail_m3u8, name)
# 获取前缀,用于合并视频时处理原始m3u8文件
prefix = os.path.dirname(ts_urls[0])
print('Step get_ts_urls end')
print('获取ts文件地址: 耗时: {}\n'.format(time.time() - time1))
# 3.下载ts文件
time2 = time.time()
print('Step download_ts start')
pool = ThreadPoolExecutor(10)
tasks = [pool.submit(download_ts, ts_url, name) for ts_url in ts_urls]
result = [task.result() for task in tasks]
print(result)
pool.shutdown()
print('Step download_ts end')
print('下载视频耗时: {}\n'.format(time.time() - time2))
# 4.合并视频
print('Step merge start')
time3 = time.time()
merge(name, prefix)
print('Step merge end')
print('合并视频耗时: {}\n'.format(time.time() - time3))
print('总耗时: {}'.format(time.time() - start))
main()
Last updated