Skip to content

Commit 4c6ca9b

Browse files
author
Taois
committed
feat: 完美
1 parent ecddacb commit 4c6ca9b

File tree

5 files changed

+767
-12
lines changed

5 files changed

+767
-12
lines changed

libs/drpyS.js

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -455,9 +455,9 @@ export async function init(filePath, env = {}, refresh) {
455455
// delete sandbox.rule;
456456

457457
// 清理沙箱中的临时构建变量
458-
delete sandbox._asyncGetRule;
459-
delete sandbox.module;
460-
delete sandbox.exports;
458+
// delete sandbox._asyncGetRule;
459+
// delete sandbox.module;
460+
// delete sandbox.exports;
461461

462462
// 缓存模块和文件的 hash 值
463463
moduleCache.set(hashMd5, {moduleObject, hash: fileHash});
@@ -515,9 +515,9 @@ export async function getRuleObject(filePath, env, refresh) {
515515
// delete sandbox.rule;
516516

517517
// 清理沙箱中的临时构建变量
518-
delete sandbox._asyncGetRule;
519-
delete sandbox.module;
520-
delete sandbox.exports;
518+
// delete sandbox._asyncGetRule;
519+
// delete sandbox.module;
520+
// delete sandbox.exports;
521521

522522
// 缓存模块和文件的 hash 值
523523
ruleObjectCache.set(filePath, {ruleObject, hash: fileHash});
@@ -569,9 +569,9 @@ export async function initJx(filePath, env, refresh) {
569569
log(`[initJx] 加载解析:${filePath} 耗时 ${cost}毫秒`)
570570

571571
// 清理沙箱中的临时构建变量
572-
delete sandbox._asyncGetLazy;
573-
delete sandbox.module;
574-
delete sandbox.exports;
572+
// delete sandbox._asyncGetLazy;
573+
// delete sandbox.module;
574+
// delete sandbox.exports;
575575

576576
jxCache.set(hashMd5, {jxObj, hash: fileHash});
577577
return jxObj;

spider/js/_lib.cntv-urlparse.cjs

Lines changed: 282 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
#!/usr/bin/env node
2+
/**
3+
* CCTV Video URL Parser
4+
* 解析央视视频URL,获取视频下载链接
5+
*
6+
* 使用方法:
7+
* node parse_url.js "https://tv.cctv.com/2026/03/13/VIDE1I89jcwxtmOiZUd6zsLR260313.shtml"
8+
* node parse_url.js "330318aa5ca745d286b8d6c57e971a39"
9+
* echo "<html>...</html>" | node parse_url.js -
10+
*/
11+
12+
const crypto = require('crypto');
13+
const fs = require('fs');
14+
15+
// 固定参数
16+
const CCTV_API_URL = 'https://vdn.apps.cntv.cn/api/getHttpVideoInfo.do';
17+
const SECRET_KEY = '47899B86370B879139C08EA3B5E88267';
18+
const UID = '826D8646DEBBFD97A82D23CAE45A55BE';
19+
20+
// 提取视频ID的正则规则
21+
const PID_RULES = [
22+
/var\s+guid\s*=\s*["']([\da-fA-F]+)["']/,
23+
/videoCenterId(?:["']\s*,|:)\s*["']([\da-fA-F]+)["']/,
24+
/changePlayer\s*\(\s*["']([\da-fA-F]+)["']\)/,
25+
/load[Vv]ideo\s*\(\s*["']([\da-fA-F]+)["']\)/,
26+
/var\s+initMyAray\s*=\s*["']([\da-fA-F]+)["']/,
27+
/var\s+ids\s*=\s*\[["']([\da-fA-F]+)["']\]/
28+
];
29+
30+
// PID格式:32位十六进制字符串
31+
const PID_PATTERN = /^[\da-fA-F]{32}$/;
32+
33+
/**
34+
* MD5哈希函数
35+
*/
36+
function md5(value) {
37+
return crypto.createHash('md5').update(value, 'utf-8').digest('hex');
38+
}
39+
40+
/**
41+
* HTTP GET请求(使用fetch)
42+
*/
43+
async function httpGet(url) {
44+
try {
45+
const response = await fetch(url, {
46+
headers: {
47+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36'
48+
}
49+
});
50+
if (!response.ok) {
51+
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
52+
}
53+
return await response.text();
54+
} catch (error) {
55+
throw error;
56+
}
57+
}
58+
59+
/**
60+
* 判断输入类型
61+
* @param {string} input - 用户输入
62+
* @returns {string} - 'url' | 'pid' | 'html' | 'stdin'
63+
*/
64+
function detectInputType(input) {
65+
if (input === '-') {
66+
return 'stdin';
67+
}
68+
// 优先检测URL
69+
if (input.match(/^https?:\/\//i)) {
70+
return 'url';
71+
}
72+
// 检测HTML/JavaScript内容(包含HTML标签或JavaScript变量声明)
73+
if (input.includes('<html') || input.includes('<HTML') || input.includes('<body') || input.includes('<BODY') ||
74+
input.includes('var guid') || input.includes('var videoCenterId') || input.includes('changePlayer') ||
75+
input.includes('loadVideo') || input.includes('initMyAray')) {
76+
return 'html';
77+
}
78+
// 纯32位十六进制字符串,当作PID
79+
if (PID_PATTERN.test(input)) {
80+
return 'pid';
81+
}
82+
// 默认当作HTML/JS内容处理
83+
return 'html';
84+
}
85+
86+
/**
87+
* 从HTML页面中提取视频ID
88+
*/
89+
function extractPid(html) {
90+
for (const rule of PID_RULES) {
91+
const match = html.match(rule);
92+
if (match && match[1]) {
93+
return match[1];
94+
}
95+
}
96+
return null;
97+
}
98+
99+
/**
100+
* 解析m3u8内容,获取最高质量的流URL
101+
*/
102+
function parseM3U8ForBestQuality(m3u8Content, m3u8BaseUrl) {
103+
const lines = m3u8Content.split('\n');
104+
let bestBandwidth = 0;
105+
let bestUri = null;
106+
107+
// 解析主m3u8,寻找最高带宽的流
108+
for (let i = 0; i < lines.length; i++) {
109+
const line = lines[i];
110+
if (line.startsWith('#EXT-X-STREAM-INF:')) {
111+
const bandwidthMatch = line.match(/BANDWIDTH=(\d+)/);
112+
if (bandwidthMatch) {
113+
const bandwidth = parseInt(bandwidthMatch[1], 10);
114+
// 查找下一行的URI
115+
for (let j = i + 1; j < lines.length; j++) {
116+
const uriLine = lines[j].trim();
117+
if (uriLine && !uriLine.startsWith('#')) {
118+
if (bandwidth > bestBandwidth) {
119+
bestBandwidth = bandwidth;
120+
bestUri = uriLine;
121+
}
122+
break;
123+
}
124+
}
125+
}
126+
}
127+
}
128+
129+
// 如果没有找到多码率流,直接返回第一个非空行(单码率情况)
130+
if (!bestUri) {
131+
for (const line of lines) {
132+
const trimmed = line.trim();
133+
if (trimmed && !trimmed.startsWith('#')) {
134+
bestUri = trimmed;
135+
break;
136+
}
137+
}
138+
}
139+
140+
// 处理路径
141+
if (bestUri && !bestUri.startsWith('http')) {
142+
const urlObj = new URL(m3u8BaseUrl);
143+
// 如果是绝对路径(以/开头)
144+
if (bestUri.startsWith('/')) {
145+
bestUri = `${urlObj.protocol}//${urlObj.host}${bestUri}`;
146+
} else {
147+
// 相对路径
148+
const basePath = urlObj.pathname.substring(0, urlObj.pathname.lastIndexOf('/') + 1);
149+
bestUri = `${urlObj.protocol}//${urlObj.host}${basePath}${bestUri}`;
150+
}
151+
}
152+
153+
return bestUri;
154+
}
155+
156+
/**
157+
* 根据PID获取视频信息
158+
* @param {string} pid - 视频ID
159+
* @returns {Promise<Object>} 视频信息对象
160+
*/
161+
async function getVideoInfoByPid(pid) {
162+
console.log(`使用视频ID: ${pid}`);
163+
164+
// 构建API请求参数
165+
console.log('步骤1: 构建API请求...');
166+
const tsp = Math.floor(Date.now() / 1000);
167+
const vn = '2049';
168+
const vc = md5(tsp + vn + SECRET_KEY + UID);
169+
170+
const apiParams = new URLSearchParams({
171+
pid: pid,
172+
client: 'flash',
173+
im: '0',
174+
tsp: tsp.toString(),
175+
vn: vn,
176+
vc: vc,
177+
uid: UID,
178+
wlan: ''
179+
});
180+
181+
const apiUrl = `${CCTV_API_URL}?${apiParams.toString()}`;
182+
console.log(`API URL: ${apiUrl}`);
183+
184+
// 调用API获取视频信息
185+
console.log('步骤2: 获取视频信息...');
186+
const apiResponse = await httpGet(apiUrl);
187+
const videoData = JSON.parse(apiResponse);
188+
189+
console.log('视频信息:', {
190+
title: videoData.title,
191+
pgtv: videoData.pgtv
192+
});
193+
194+
// 获取m3u8下载链接
195+
console.log('步骤3: 解析下载链接...');
196+
const manifest = videoData.manifest || {};
197+
let hlsUrl = manifest.hls_h5e_url || manifest.hls_url || videoData.hls_h5e_url || videoData.hls_url;
198+
199+
if (!hlsUrl) {
200+
throw new Error('无法获取HLS下载链接');
201+
}
202+
console.log(`HLS URL: ${hlsUrl}`);
203+
204+
// 解析m3u8获取最高质量的流
205+
console.log('步骤4: 解析M3U8获取最佳质量...');
206+
const m3u8Content = await httpGet(hlsUrl);
207+
const downloadUrl = parseM3U8ForBestQuality(m3u8Content, hlsUrl);
208+
209+
if (!downloadUrl) {
210+
throw new Error('无法解析M3U8内容');
211+
}
212+
213+
console.log(`最终下载链接: ${downloadUrl}`);
214+
215+
return {
216+
success: true,
217+
title: videoData.title,
218+
pid: pid,
219+
pgtv: videoData.pgtv,
220+
hls_key: manifest.hls_h5e_url ? 'hls_h5e_url' : 'hls_url',
221+
download_url: downloadUrl,
222+
m3u8_url: hlsUrl,
223+
cover_url: videoData.image
224+
};
225+
}
226+
227+
/**
228+
* 主函数:解析CCTV视频URL
229+
* @param {string} input - 输入(URL、PID或HTML内容)
230+
* @returns {Promise<Object>} 视频信息对象
231+
*/
232+
async function parseCCTVUrl(input) {
233+
try {
234+
const inputType = detectInputType(input);
235+
console.log(`检测到输入类型: ${inputType}`);
236+
237+
// 如果是PID,直接调用API
238+
if (inputType === 'pid') {
239+
return await getVideoInfoByPid(input);
240+
}
241+
242+
// 如果是URL,获取页面内容并提取PID
243+
if (inputType === 'url') {
244+
console.log(`正在解析URL: ${input}`);
245+
console.log('步骤1: 获取页面内容...');
246+
const html = await httpGet(input);
247+
const pid = extractPid(html);
248+
249+
if (!pid) {
250+
throw new Error('无法从页面中提取视频ID');
251+
}
252+
console.log(`找到视频ID: ${pid}`);
253+
254+
return await getVideoInfoByPid(pid);
255+
}
256+
257+
// 如果是HTML内容,直接提取PID
258+
if (inputType === 'html') {
259+
console.log('检测到HTML内容,直接提取视频ID...');
260+
const pid = extractPid(input);
261+
262+
if (!pid) {
263+
throw new Error('无法从HTML内容中提取视频ID');
264+
}
265+
console.log(`找到视频ID: ${pid}`);
266+
267+
return await getVideoInfoByPid(pid);
268+
}
269+
270+
throw new Error(`不支持的输入类型: ${inputType}`);
271+
272+
} catch (error) {
273+
console.error('解析失败:', error.message);
274+
return {
275+
success: false,
276+
error: error.message
277+
};
278+
}
279+
}
280+
281+
// 导出函数供其他模块使用
282+
module.exports = {parseCCTVUrl, detectInputType, getVideoInfoByPid};

spider/js/_lib.cntv-wasm.cjs

Lines changed: 5 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)