优志愿
说明
# URL
https://youzy.cn/tzy/search/colleges/collegeList
# 接口
https://uwf7de983aad7a717eb.youzy.cn/youzy.dms.basiclib.api.college.query此案例含有requests请求json数据的坑,需谨慎处理
代码实现 - 1
enc.js
const CryptoJS = require("crypto-js")
function get_sign(t) {
e = '/youzy.dms.basiclib.api.college.query'
var r, i = "9SASji5OWnG41iRKiSvTJHlXHmRySRp1", o = "", a = t || {}, s = (e = e || "").split("?");
if (s.length > 0 && (r = s[1]),
r) {
var u = r.split("&")
, c = "";
u.forEach((function (e) {
var t = e.split("=");
c += "".concat(t[0], "=").concat(encodeURI(t[1]), "&")
}
)),
o = "".concat(_.trimEnd(c, "&"), "&").concat(i)
} else
o = Object.keys(a).length > 0 ? "".concat(JSON.stringify(a), "&").concat(i) : "&".concat(i);
return o = o.toLowerCase(),
// n(o)
CryptoJS.MD5(CryptoJS.enc.Utf8.parse(o)).toString()
}spider.py
import execjs
import json
import requests
headers = {
'Accept': '*/*',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Content-Type': 'application/json',
'Origin': 'https://pv4y-pc.youzy.cn',
'Pragma': 'no-cache',
'Referer': 'https://pv4y-pc.youzy.cn/',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-site',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
'sec-ch-ua': '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"macOS"',
# 'u-sign': '643ff9499febb3ee34c95ffe0bb29cb0',
'u-token': '',
}
json_data = {
'keyword': '',
'provinceNames': [],
'natureTypes': [],
'eduLevel': '',
'categories': [],
'features': [],
'pageIndex': 1,
'pageSize': 20,
'sort': 11,
}
data = json.dumps(json_data, separators=(',', ':'))
# 处理json_data,序列化使之字符串中不含空格
# 若不进行此部处理且在requests中使用json=jsona_data,则requests中的json会自动序列化使字符串中含有空格
# 造成与网站程序所需字符串不一致,从而导致失败
# 处理后则在requests中不使用json=json_data,转而使用data = data
# print(data)
# {"keyword":"","provinceNames":[],"natureTypes":[],"eduLevel":"","categories":[],"features":[],"pageIndex":1,"pageSize":20,"sort":11}
js_compile = execjs.compile(open('day15-04.js', 'r', encoding='utf-8').read())
sign = js_compile.call("get_sign", json_data)
print(sign)
headers['u-sign'] = sign
response = requests.post(
'https://uwf7de983aad7a717eb.youzy.cn/youzy.dms.basiclib.api.college.query',
headers=headers,
data=data
)
print(response.json())
代码实现 - 2
enc.js
const CryptoJS = require("crypto-js")
function get_sign(t) {
e = '/youzy.dms.basiclib.api.college.query'
var r, i = "9SASji5OWnG41iRKiSvTJHlXHmRySRp1", o = "", a = t || {}, s = (e = e || "").split("?");
if (s.length > 0 && (r = s[1]),
r) {
var u = r.split("&")
, c = "";
u.forEach((function (e) {
var t = e.split("=");
c += "".concat(t[0], "=").concat(encodeURI(t[1]), "&")
}
)),
o = "".concat(_.trimEnd(c, "&"), "&").concat(i)
} else
o = Object.keys(a).length > 0 ? "".concat(JSON.stringify(a), "&").concat(i) : "&".concat(i);
return o = o.toLowerCase(),
// n(o)
CryptoJS.MD5(CryptoJS.enc.Utf8.parse(o)).toString()
}spieder.py
import execjs
# 使用此requests可以简单绕过指纹识别,下方配合参数(impersonate="chrome119")
from curl_cffi import requests
headers = {
'Accept': '*/*',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Content-Type': 'application/json',
'Origin': 'https://pv4y-pc.youzy.cn',
'Pragma': 'no-cache',
'Referer': 'https://pv4y-pc.youzy.cn/',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-site',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
'sec-ch-ua': '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"macOS"',
# 'u-sign': '643ff9499febb3ee34c95ffe0bb29cb0',
'u-token': '',
}
json_data = {
'keyword': '',
'provinceNames': [],
'natureTypes': [],
'eduLevel': '',
'categories': [],
'features': [],
'pageIndex': 1,
'pageSize': 20,
'sort': 11,
}
js_compile = execjs.compile(open('enc.js', 'r', encoding='utf-8').read())
sign = js_compile.call("get_sign", json_data)
print(sign)
headers['u-sign'] = sign
response = requests.post(
'https://uwf7de983aad7a717eb.youzy.cn/youzy.dms.basiclib.api.college.query',
headers=headers,
json=json_data,
impersonate="chrome119"
)
print(response.json())
Last updated