1+ #!/usr/bin/env node
2+ /**
3+ * CCTV Video URL Parser
4+ * 解析央视视频URL,获取视频下载链接
5+ *
6+ * 使用方法:
7+ * node parse_url.js "https://tv.cctv.com/2026/03/13/VIDE1I89jcwxtmOiZUd6zsLR260313.shtml"
8+ * node parse_url.js "330318aa5ca745d286b8d6c57e971a39"
9+ * echo "<html>...</html>" | node parse_url.js -
10+ */
11+
12+ const crypto = require ( 'crypto' ) ;
13+ const fs = require ( 'fs' ) ;
14+
15+ // 固定参数
16+ const CCTV_API_URL = 'https://vdn.apps.cntv.cn/api/getHttpVideoInfo.do' ;
17+ const SECRET_KEY = '47899B86370B879139C08EA3B5E88267' ;
18+ const UID = '826D8646DEBBFD97A82D23CAE45A55BE' ;
19+
20+ // 提取视频ID的正则规则
21+ const PID_RULES = [
22+ / v a r \s + g u i d \s * = \s * [ " ' ] ( [ \d a - f A - F ] + ) [ " ' ] / ,
23+ / v i d e o C e n t e r I d (?: [ " ' ] \s * , | : ) \s * [ " ' ] ( [ \d a - f A - F ] + ) [ " ' ] / ,
24+ / c h a n g e P l a y e r \s * \( \s * [ " ' ] ( [ \d a - f A - F ] + ) [ " ' ] \) / ,
25+ / l o a d [ V v ] i d e o \s * \( \s * [ " ' ] ( [ \d a - f A - F ] + ) [ " ' ] \) / ,
26+ / v a r \s + i n i t M y A r a y \s * = \s * [ " ' ] ( [ \d a - f A - F ] + ) [ " ' ] / ,
27+ / v a r \s + i d s \s * = \s * \[ [ " ' ] ( [ \d a - f A - F ] + ) [ " ' ] \] /
28+ ] ;
29+
30+ // PID格式:32位十六进制字符串
31+ const PID_PATTERN = / ^ [ \d a - f A - F ] { 32 } $ / ;
32+
33+ /**
34+ * MD5哈希函数
35+ */
36+ function md5 ( value ) {
37+ return crypto . createHash ( 'md5' ) . update ( value , 'utf-8' ) . digest ( 'hex' ) ;
38+ }
39+
40+ /**
41+ * HTTP GET请求(使用fetch)
42+ */
43+ async function httpGet ( url ) {
44+ try {
45+ const response = await fetch ( url , {
46+ headers : {
47+ 'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36'
48+ }
49+ } ) ;
50+ if ( ! response . ok ) {
51+ throw new Error ( `HTTP ${ response . status } : ${ response . statusText } ` ) ;
52+ }
53+ return await response . text ( ) ;
54+ } catch ( error ) {
55+ throw error ;
56+ }
57+ }
58+
59+ /**
60+ * 判断输入类型
61+ * @param {string } input - 用户输入
62+ * @returns {string } - 'url' | 'pid' | 'html' | 'stdin'
63+ */
64+ function detectInputType ( input ) {
65+ if ( input === '-' ) {
66+ return 'stdin' ;
67+ }
68+ // 优先检测URL
69+ if ( input . match ( / ^ h t t p s ? : \/ \/ / i) ) {
70+ return 'url' ;
71+ }
72+ // 检测HTML/JavaScript内容(包含HTML标签或JavaScript变量声明)
73+ if ( input . includes ( '<html' ) || input . includes ( '<HTML' ) || input . includes ( '<body' ) || input . includes ( '<BODY' ) ||
74+ input . includes ( 'var guid' ) || input . includes ( 'var videoCenterId' ) || input . includes ( 'changePlayer' ) ||
75+ input . includes ( 'loadVideo' ) || input . includes ( 'initMyAray' ) ) {
76+ return 'html' ;
77+ }
78+ // 纯32位十六进制字符串,当作PID
79+ if ( PID_PATTERN . test ( input ) ) {
80+ return 'pid' ;
81+ }
82+ // 默认当作HTML/JS内容处理
83+ return 'html' ;
84+ }
85+
86+ /**
87+ * 从HTML页面中提取视频ID
88+ */
89+ function extractPid ( html ) {
90+ for ( const rule of PID_RULES ) {
91+ const match = html . match ( rule ) ;
92+ if ( match && match [ 1 ] ) {
93+ return match [ 1 ] ;
94+ }
95+ }
96+ return null ;
97+ }
98+
99+ /**
100+ * 解析m3u8内容,获取最高质量的流URL
101+ */
102+ function parseM3U8ForBestQuality ( m3u8Content , m3u8BaseUrl ) {
103+ const lines = m3u8Content . split ( '\n' ) ;
104+ let bestBandwidth = 0 ;
105+ let bestUri = null ;
106+
107+ // 解析主m3u8,寻找最高带宽的流
108+ for ( let i = 0 ; i < lines . length ; i ++ ) {
109+ const line = lines [ i ] ;
110+ if ( line . startsWith ( '#EXT-X-STREAM-INF:' ) ) {
111+ const bandwidthMatch = line . match ( / B A N D W I D T H = ( \d + ) / ) ;
112+ if ( bandwidthMatch ) {
113+ const bandwidth = parseInt ( bandwidthMatch [ 1 ] , 10 ) ;
114+ // 查找下一行的URI
115+ for ( let j = i + 1 ; j < lines . length ; j ++ ) {
116+ const uriLine = lines [ j ] . trim ( ) ;
117+ if ( uriLine && ! uriLine . startsWith ( '#' ) ) {
118+ if ( bandwidth > bestBandwidth ) {
119+ bestBandwidth = bandwidth ;
120+ bestUri = uriLine ;
121+ }
122+ break ;
123+ }
124+ }
125+ }
126+ }
127+ }
128+
129+ // 如果没有找到多码率流,直接返回第一个非空行(单码率情况)
130+ if ( ! bestUri ) {
131+ for ( const line of lines ) {
132+ const trimmed = line . trim ( ) ;
133+ if ( trimmed && ! trimmed . startsWith ( '#' ) ) {
134+ bestUri = trimmed ;
135+ break ;
136+ }
137+ }
138+ }
139+
140+ // 处理路径
141+ if ( bestUri && ! bestUri . startsWith ( 'http' ) ) {
142+ const urlObj = new URL ( m3u8BaseUrl ) ;
143+ // 如果是绝对路径(以/开头)
144+ if ( bestUri . startsWith ( '/' ) ) {
145+ bestUri = `${ urlObj . protocol } //${ urlObj . host } ${ bestUri } ` ;
146+ } else {
147+ // 相对路径
148+ const basePath = urlObj . pathname . substring ( 0 , urlObj . pathname . lastIndexOf ( '/' ) + 1 ) ;
149+ bestUri = `${ urlObj . protocol } //${ urlObj . host } ${ basePath } ${ bestUri } ` ;
150+ }
151+ }
152+
153+ return bestUri ;
154+ }
155+
156+ /**
157+ * 根据PID获取视频信息
158+ * @param {string } pid - 视频ID
159+ * @returns {Promise<Object> } 视频信息对象
160+ */
161+ async function getVideoInfoByPid ( pid ) {
162+ console . log ( `使用视频ID: ${ pid } ` ) ;
163+
164+ // 构建API请求参数
165+ console . log ( '步骤1: 构建API请求...' ) ;
166+ const tsp = Math . floor ( Date . now ( ) / 1000 ) ;
167+ const vn = '2049' ;
168+ const vc = md5 ( tsp + vn + SECRET_KEY + UID ) ;
169+
170+ const apiParams = new URLSearchParams ( {
171+ pid : pid ,
172+ client : 'flash' ,
173+ im : '0' ,
174+ tsp : tsp . toString ( ) ,
175+ vn : vn ,
176+ vc : vc ,
177+ uid : UID ,
178+ wlan : ''
179+ } ) ;
180+
181+ const apiUrl = `${ CCTV_API_URL } ?${ apiParams . toString ( ) } ` ;
182+ console . log ( `API URL: ${ apiUrl } ` ) ;
183+
184+ // 调用API获取视频信息
185+ console . log ( '步骤2: 获取视频信息...' ) ;
186+ const apiResponse = await httpGet ( apiUrl ) ;
187+ const videoData = JSON . parse ( apiResponse ) ;
188+
189+ console . log ( '视频信息:' , {
190+ title : videoData . title ,
191+ pgtv : videoData . pgtv
192+ } ) ;
193+
194+ // 获取m3u8下载链接
195+ console . log ( '步骤3: 解析下载链接...' ) ;
196+ const manifest = videoData . manifest || { } ;
197+ let hlsUrl = manifest . hls_h5e_url || manifest . hls_url || videoData . hls_h5e_url || videoData . hls_url ;
198+
199+ if ( ! hlsUrl ) {
200+ throw new Error ( '无法获取HLS下载链接' ) ;
201+ }
202+ console . log ( `HLS URL: ${ hlsUrl } ` ) ;
203+
204+ // 解析m3u8获取最高质量的流
205+ console . log ( '步骤4: 解析M3U8获取最佳质量...' ) ;
206+ const m3u8Content = await httpGet ( hlsUrl ) ;
207+ const downloadUrl = parseM3U8ForBestQuality ( m3u8Content , hlsUrl ) ;
208+
209+ if ( ! downloadUrl ) {
210+ throw new Error ( '无法解析M3U8内容' ) ;
211+ }
212+
213+ console . log ( `最终下载链接: ${ downloadUrl } ` ) ;
214+
215+ return {
216+ success : true ,
217+ title : videoData . title ,
218+ pid : pid ,
219+ pgtv : videoData . pgtv ,
220+ hls_key : manifest . hls_h5e_url ? 'hls_h5e_url' : 'hls_url' ,
221+ download_url : downloadUrl ,
222+ m3u8_url : hlsUrl ,
223+ cover_url : videoData . image
224+ } ;
225+ }
226+
227+ /**
228+ * 主函数:解析CCTV视频URL
229+ * @param {string } input - 输入(URL、PID或HTML内容)
230+ * @returns {Promise<Object> } 视频信息对象
231+ */
232+ async function parseCCTVUrl ( input ) {
233+ try {
234+ const inputType = detectInputType ( input ) ;
235+ console . log ( `检测到输入类型: ${ inputType } ` ) ;
236+
237+ // 如果是PID,直接调用API
238+ if ( inputType === 'pid' ) {
239+ return await getVideoInfoByPid ( input ) ;
240+ }
241+
242+ // 如果是URL,获取页面内容并提取PID
243+ if ( inputType === 'url' ) {
244+ console . log ( `正在解析URL: ${ input } ` ) ;
245+ console . log ( '步骤1: 获取页面内容...' ) ;
246+ const html = await httpGet ( input ) ;
247+ const pid = extractPid ( html ) ;
248+
249+ if ( ! pid ) {
250+ throw new Error ( '无法从页面中提取视频ID' ) ;
251+ }
252+ console . log ( `找到视频ID: ${ pid } ` ) ;
253+
254+ return await getVideoInfoByPid ( pid ) ;
255+ }
256+
257+ // 如果是HTML内容,直接提取PID
258+ if ( inputType === 'html' ) {
259+ console . log ( '检测到HTML内容,直接提取视频ID...' ) ;
260+ const pid = extractPid ( input ) ;
261+
262+ if ( ! pid ) {
263+ throw new Error ( '无法从HTML内容中提取视频ID' ) ;
264+ }
265+ console . log ( `找到视频ID: ${ pid } ` ) ;
266+
267+ return await getVideoInfoByPid ( pid ) ;
268+ }
269+
270+ throw new Error ( `不支持的输入类型: ${ inputType } ` ) ;
271+
272+ } catch ( error ) {
273+ console . error ( '解析失败:' , error . message ) ;
274+ return {
275+ success : false ,
276+ error : error . message
277+ } ;
278+ }
279+ }
280+
281+ // 导出函数供其他模块使用
282+ module . exports = { parseCCTVUrl, detectInputType, getVideoInfoByPid} ;
0 commit comments