-
Notifications
You must be signed in to change notification settings - Fork 159
/
Copy path采集之王[合].js
435 lines (427 loc) · 21.6 KB
/
采集之王[合].js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
/**
* 强烈推荐静态分类。可以加快速度!!!
* 不建议:
* 传参 ?type=url¶ms=../json/采集.json
* 建议:
* 传参 ?type=url¶ms=../json/采集静态.json$1
* 传参 ?type=url¶ms=../json/采集[zy]静态.json$1
* 传参 ?type=url¶ms=../json/采集[密]静态.json$1
* hipy-server支持@改名比如:
* 传参 ?type=url¶ms=../json/采集静态.json$1@采王道长[合]
* 传参 ?type=url¶ms=../json/采集[zy]静态.json$1@采王zy[密]
* 传参 ?type=url¶ms=../json/采集[密]静态.json@采王成人[密]
* [{"name":"暴风资源","url":"https://bfzyapi.com","parse_url":""},{"name":"飞刀资源","url":"http://www.feidaozy.com","parse_url":""},{"name":"黑木耳资源","url":"https://www.heimuer.tv","parse_url":""}]
*/
var rule = {
title: '采集之王[合]',
author: '道长',
version: '20240706 beta17',
update_info: `
20240706:
1.静态json数据支持cate_excludes分类名称列表过滤无数据分类
2.更新采集分类生成器增加过滤筛选模式
20240705:
1.支持传参json后面增加$1 这样的额外标识,用于搜索结果精准匹配
2.支持传参json后面增加$1$1 这样的额外标识,用于强制获取搜索图片。$1$不显示图片。默认是搜索强制有图片的[已实现详情页请求使用批量]
3.修复二级数据无序匹配搜索列表图片的问题
4.修改搜索精准和图片显示额外参数间隔符从#变为$
20240703:
1.采集json支持"searchable": 0,用于搜索时排除这个源
20240604:
1.首页推荐取消硬控等待。增加随机推荐功能。
2.首页推荐新增更新日志查看功能
【特别说明】目前只支持标准json格式的采集站(资源站一般都提供xml和json两种接口,目前没有支持xml的想法,没什么必要)
有些资源站的json接口不是标准的/api.php/provide/vod/,需要自己在采集静态.json中编辑对应的api属性填写比如:/api.php/provide/vod/at/json/
有些资源站的采集数据是加密后的切片片段,可能需要采集站特定的解析接口,需要自己编辑json里的parse_url属性
资源站部分大分类下无数据很正常,可以自行编辑json里cate_exclude属性排除掉自己测试过无数据的分类(小程序无法自动识别,只能人工测好哪些分类无数据)
`.trim(),
host: '',
homeTid: '', // 首页推荐。一般填写第一个资源站的想要的推荐分类的id.可以空
homeUrl: '/api.php/provide/vod/?ac=detail&t={{rule.homeTid}}',
detailUrl: '/api.php/provide/vod/?ac=detail&ids=fyid',
searchUrl: '/api.php/provide/vod/?wd=**&pg=#TruePage##page=fypage',
classUrl: '/api.php/provide/vod/',
url: '/api.php/provide/vod/?ac=detail&pg=fypage&t=fyfilter',
filter_url: '{{fl.类型}}',
headers: {'User-Agent': 'MOBILE_UA'},
timeout: 5000, // class_name: '电影&电视剧&综艺&动漫',
limit: 20,
search_limit: 5, // 搜索限制取前5个,可以注释掉,就不限制搜索
searchable: 1,//是否启用全局搜索,
quickSearch: 0,//是否启用快速搜索,
filterable: 1,//是否启用分类筛选,
play_parse: true,
parse_url: '', // 这个参数暂时不起作用。聚合类的每个资源应该有自己独立的解析口。单独配置在采集.json里的parse_url有效
search_match: false, // 搜索精准匹配
search_pic: true, // 搜索强制需要图片
// params: 'http://127.0.0.1:5707/files/json/%E9%87%87%E9%9B%86.json',
// params: 'http://127.0.0.1:5707/files/json/采集静态.json$1',
// params: 'http://127.0.0.1:5707/files/json/采集[zy]静态.json$1',
// hostJs:$js.toString(()=>{
//
// }),
预处理: async function () {
function getClasses(item) {
let classes = [];
if (item.class_name && item.class_url) {
if (!/&|电影|电视剧|综艺|动漫[\u4E00-\u9FA5]+/.test(item.class_name)) {
try {
item.class_name = ungzip(item.class_name)
} catch (e) {
log(`不识别的class_name导致gzip解码失败:${e}`)
return classes
}
}
let names = item.class_name.split('&');
let urls = item.class_url.split('&');
let cnt = Math.min(names.length, urls.length);
for (let i = 0; i < cnt; i++) {
classes.push({
'type_id': urls[i],
'type_name': names[i]
});
}
}
return classes
}
if (typeof (batchFetch) === 'function') {
// 支持批量请求直接放飞自我。搜索限制最大线程数量16
rule.search_limit = 16;
log('当前程序支持批量请求[batchFetch],搜索限制已设置为16');
}
let _url = rule.params;
log(`传入参数:${_url}`);
if (_url && typeof (_url) === 'string' && /^(http|file)/.test(_url)) {
if (_url.includes('$')) {
let _url_params = _url.split('$');
_url = _url_params[0];
rule.search_match = !!(_url_params[1]);
if (_url_params.length > 2) { // 强制图片
rule.search_pic = !!(_url_params[2]);
}
}
let html = await request(_url);
let json = JSON.parse(html);
let _classes = [];
rule.filter = {};
rule.filter_def = {};
for (const it of json) {
let _obj = {
type_name: it.name,
type_id: it.url,
parse_url: it.parse_url || '',
searchable: it.searchable !== 0,
api: it.api || '',
cate_exclude: it.cate_exclude || '',
cate_excludes: it.cate_excludes || [],
// class_name: it.class_name || '',
// class_url: it.class_url || '',
};
_classes.push(_obj);
try {
let json1 = [];
if (it.class_name && it.class_url) {
json1 = getClasses(it);
} else {
json1 = JSON.parse(await request(urljoin(_obj.type_id, _obj.api || rule.classUrl))).class;
}
if (_obj.cate_excludes && Array.isArray(_obj.cate_excludes) && _obj.cate_excludes.length > 0) {
json1 = json1.filter(cl => !_obj.cate_excludes.includes(cl.type_name));
} else if (_obj.cate_exclude) {
json1 = json1.filter(cl => !new RegExp(_obj.cate_exclude, 'i').test(cl.type_name));
}
rule.filter[_obj.type_id] = [{
"key": "类型", "name": "类型", "value": json1.map(i => {
return {"n": i.type_name, 'v': i.type_id}
})
}];
if (json1.length > 0) {
rule.filter_def[it.url] = {"类型": json1[0].type_id};
}
} catch (e) {
rule.filter[it.url] = [{"key": "类型", "name": "类型", "value": [{"n": "全部", "v": ""}]}];
}
}
rule.classes = _classes;
}
},
class_parse: async function () {
return {
class: rule.classes
}
},
推荐: async function () {
let {input, publicUrl} = this;
let update_info = [{
vod_name: '更新日志',
vod_id: 'update_info',
vod_remarks: `版本:${rule.version}`,
vod_pic: urljoin(publicUrl, './images/logo.png'),
}];
let VODS = [];
if (rule.classes) {
let randomClass = getRandomItem(rule.classes);
let _url = urljoin(randomClass.type_id, input);
if (randomClass.api) {
_url = _url.replace('/api.php/provide/vod/', randomClass.api)
}
try {
let html = await request(_url, {timeout: rule.timeout});
let json = JSON.parse(html);
VODS = json.list;
VODS.forEach(it => {
it.vod_id = randomClass.type_id + '$' + it.vod_id;
it.vod_remarks = it.vod_remarks + '|' + randomClass.type_name;
});
} catch (e) {
}
}
VODS = update_info.concat(VODS);
return VODS;
},
一级: async function () {
let {MY_CATE, input} = this;
let VODS = [];
if (rule.classes) {
// log(input);
let _url = urljoin(MY_CATE, input);
let current_vod = rule.classes.find(item => item.type_id === MY_CATE);
if (current_vod && current_vod.api) {
_url = _url.replace('/api.php/provide/vod/', current_vod.api)
}
let html = await request(_url);
let json = JSON.parse(html);
VODS = json.list;
VODS.forEach(it => {
it.vod_id = MY_CATE + '$' + it.vod_id
});
}
return VODS;
},
二级: async function () {
let {orId, fyclass, input} = this;
let VOD = {};
if (orId === 'update_info') {
VOD = {
vod_content: rule.update_info.trim(),
vod_name: '更新日志',
type_name: '更新日志',
vod_pic: 'https://resource-cdn.tuxiaobei.com/video/FtWhs2mewX_7nEuE51_k6zvg6awl.png',
vod_remarks: `版本:${rule.version}`,
vod_play_from: '道长在线',
// vod_play_url: '嗅探播放$https://resource-cdn.tuxiaobei.com/video/10/8f/108fc9d1ac3f69d29a738cdc097c9018.mp4',
vod_play_url: '随机小视频$http://api.yujn.cn/api/zzxjj.php',
};
} else {
if (rule.classes) {
let _url = urljoin(fyclass, input);
let current_vod = rule.classes.find(item => item.type_id === fyclass);
if (current_vod && current_vod.api) {
_url = _url.replace('/api.php/provide/vod/', current_vod.api)
}
let html = await request(_url);
let json = JSON.parse(html);
let data = json.list;
VOD = data[0];
if (current_vod && current_vod.type_name) {
VOD.vod_play_from = VOD.vod_play_from.split('$$$').map(it => current_vod.type_name + '|' + it).join('$$$')
}
}
}
return VOD;
},
搜索: async function () {
let {MY_PAGE, input, KEY} = this;
let VODS = [];
if (rule.classes) {
let canSearch = rule.classes.filter(it => it.searchable);
let page = Number(MY_PAGE);
page = (MY_PAGE - 1) % Math.ceil(canSearch.length / rule.search_limit) + 1;
let truePage = Math.ceil(MY_PAGE / Math.ceil(canSearch.length / rule.search_limit));
if (rule.search_limit) {
let start = (page - 1) * rule.search_limit;
let end = page * rule.search_limit;
let t1 = new Date().getTime();
let searchMode = typeof (batchFetch) === 'function' ? '批量' : '单个';
log('start:' + start);
log('end:' + end);
log('搜索模式:' + searchMode);
log('精准搜索:' + rule.search_match);
log('强制获取图片:' + rule.search_pic);
// log('t1:' + t1);
if (start < canSearch.length) {
let search_classes = canSearch.slice(start, end);
let urls = [];
search_classes.forEach(it => {
let _url = urljoin(it.type_id, input);
if (it.api) {
_url = _url.replace('/api.php/provide/vod/', it.api)
}
_url = _url.replace("#TruePage#", "" + truePage);
urls.push(_url);
});
let results_list = [];
let results = [];
if (typeof (batchFetch) === 'function') {
let reqUrls = urls.map(it => {
return {
url: it.split('#')[0], // 去掉 #page=页码
options: {timeout: rule.timeout}
}
});
let rets = await batchFetch(reqUrls);
let detailUrls = [];
let detailUrlCount = 0;
rets.forEach((ret, idx) => {
let it = search_classes[idx];
if (ret) {
try {
let json = JSON.parse(ret);
let data = json.list;
data.forEach(i => {
i.site_name = it.type_name;
i.vod_id = it.type_id + '$' + i.vod_id;
i.vod_remarks = i.vod_remarks + '|' + it.type_name;
});
if (rule.search_match) {
data = data.filter(item => item.vod_name && (new RegExp(KEY, 'i')).test(item.vod_name))
}
if (data.length > 0) {
if (rule.search_pic && !data[0].vod_pic) {
log(`当前搜索站点【${it.type_name}】没图片,尝试访问二级去获取图片`);
let detailUrl = urls[idx].split('wd=')[0] + 'ac=detail&ids=' + data.map(k => k.vod_id.split('$')[1]).join(',');
detailUrls.push(detailUrl);
results_list.push({
data: data,
has_pic: false,
detailUrlCount: detailUrlCount
});
detailUrlCount++;
// try {
// let detailJson = JSON.parse(request(detailUrl));
// data.forEach((d, _seq) => {
// log('二级数据列表元素数:' + detailJson.list.length);
// let detailVodPic = detailJson.list[_seq].vod_pic;
// if (detailVodPic) {
// Object.assign(d, {vod_pic: detailVodPic});
// }
// });
// } catch (e) {
// log(`强制获取网站${it.type_id}的搜索图片失败:${e.message}`);
// }
} else {
results_list.push({data: data, has_pic: true});
}
// results = results.concat(data);
}
} catch (e) {
log(`请求:${it.type_id}发生错误:${e.message}`)
}
}
});
// 构造请求二级的batchFetch列表
let reqUrls2 = detailUrls.map(it => {
return {
url: it,
options: {timeout: rule.timeout}
}
});
let rets2 = reqUrls2.length > 0 ? await batchFetch(reqUrls2) : [];
for (let k = 0; k < results_list.length; k++) {
let result_data = results_list[k].data;
if (!results_list[k].has_pic) {
try {
let detailJson = JSON.parse(rets2[results_list[k].detailUrlCount]);
log('二级数据列表元素数:' + detailJson.list.length);
result_data.forEach((d, _seq) => {
// let detailVodPic = detailJson.list[_seq].vod_pic;
// log(detailJson);
let detailVodPic = detailJson.list.find(vod => vod.vod_id.toString() === d.vod_id.split('$')[1]);
if (detailVodPic) {
Object.assign(d, {vod_pic: detailVodPic.vod_pic});
}
});
} catch (e) {
log(`强制获取网站${result_data[0].site_name}的搜索图片失败:${e.message}`);
}
}
results = results.concat(result_data);
}
} else {
for (const idx in urls) {
let _url = urls[idx];
let it = search_classes[idx];
try {
let html = await request(_url);
let json = JSON.parse(html);
let data = json.list;
data.forEach(i => {
i.vod_id = it.type_id + '$' + i.vod_id;
i.vod_remarks = i.vod_remarks + '|' + it.type_name;
});
if (rule.search_match) {
data = data.filter(item => item.vod_name && (new RegExp(KEY, 'i')).test(item.vod_name))
}
if (data.length > 0) {
if (rule.search_pic && !data[0].vod_pic) {
log(`当前搜索站点【${it.type_name}】没图片,尝试访问二级去获取图片`);
let detailUrl = urls[idx].split('wd=')[0] + 'ac=detail&ids=' + data.map(k => k.vod_id.split('$')[1]).join(',');
try {
let detailJson = JSON.parse(await request(detailUrl));
log('二级数据列表元素数:' + detailJson.list.length);
data.forEach((d, _seq) => {
// let detailVodPic = detailJson.list[_seq].vod_pic;
let detailVodPic = detailJson.list.find(vod => vod.vod_id.toString() === d.vod_id.split('$')[1]);
if (detailVodPic) {
Object.assign(d, {vod_pic: detailVodPic.vod_pic});
}
});
} catch (e) {
log(`强制获取网站${it.type_id}的搜索图片失败:${e.message}`);
}
}
results = results.concat(data);
}
results = results.concat(data);
} catch (e) {
log(`请求:${it.type_id}发生错误:${e.message}`)
}
}
}
VODS = results;
let t2 = new Date().getTime();
// log('t2:'+t2);
log(`${searchMode}搜索:${urls.length}个站耗时:${(Number(t2) - Number(t1))}ms`)
}
}
}
return VODS;
},
lazy: async function () {
let {flag, input} = this;
// lazy想办法用对应的parse_url,但是有难度,暂未实现
let parse_url = '';
if (flag && flag.includes('|')) {
let type_name = flag.split('|')[0];
let current_vod = rule.classes.find(item => item.type_name === type_name);
if (current_vod && current_vod.parse_url) {
parse_url = current_vod.parse_url
}
}
if (/\.(m3u8|mp4)/.test(input)) {
input = {parse: 0, url: input}
} else {
if (parse_url.startsWith('json:')) {
let purl = parse_url.replace('json:', '') + input;
let html = await request(purl);
input = {parse: 0, url: JSON.parse(html).url}
} else {
input = parse_url + input;
}
}
return input
},
}
function getRandomItem(items) {//从列表随机取出一个元素
return items[Math.random() * items.length | 0];
}