Skip to content

Commit 566e6ab

Browse files
committed
修改去读书[书].js文件的正则替换规则
1 parent 06e8d5a commit 566e6ab

File tree

1 file changed

+60
-79
lines changed

1 file changed

+60
-79
lines changed

spider/js/去读书[书].js

Lines changed: 60 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -29,127 +29,108 @@ var rule = {
2929
headers: {
3030
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36'
3131
},
32-
request: async function (url, obj) {
33-
obj = obj || {};
34-
try {
35-
const response = await _fetch(url, {
36-
method: obj.method || 'GET',
37-
headers: obj.headers || this.headers
38-
});
39-
return response.text();
40-
} catch (err) {
41-
return '';
42-
}
43-
},
44-
32+
4533
一级: async function () {
4634
let {input, pdfa, pdfh, pd} = this;
47-
let url = input.startsWith('http') ? input : this.host + '/book/' + input + '/0/1.html';
48-
let html = await this.request(url);
35+
let url = input.startsWith('http') ? input : `${this.host}/book/${input}/0/1.html`;
36+
let html = await request(url);
4937
let d = [];
5038
let items = pdfa(html, '.blockcontent .c_row') || pdfa(html, '.c_row') || [];
5139
for (let item of items) {
5240
let title = pdfh(item, '.c_subject a:eq(1)&&Text');
53-
let itemUrl = pd(item, '.c_subject a:eq(1)&&href');
54-
if (!title || !itemUrl) continue;
55-
let remarks = pdfh(item, '.c_tag span:eq(1)&&Text') || '';
56-
let pic = pd(item, 'img&&src') || '';
57-
let content = pdfh(item, '.c_description&&Text') || '';
41+
let url = pd(item, '.c_subject a:eq(1)&&href');
42+
if (!title || !url) continue;
5843
d.push({
59-
title: title,
60-
url: itemUrl,
61-
desc: remarks,
62-
pic_url: pic,
63-
content: content,
44+
title,
45+
url,
46+
desc: pdfh(item, '.c_tag span:eq(1)&&Text') || '',
47+
pic_url: pd(item, 'img&&src') || '',
48+
content: pdfh(item, '.c_description&&Text') || '',
6449
});
6550
}
6651
return setResult(d);
6752
},
6853

6954
二级: async function () {
7055
let {input, pdfa, pdfh, pd} = this;
71-
let html = await this.request(input);
72-
let VOD = {};
73-
VOD.vod_name = pdfh(html, '[property="og:novel:book_name"]&&content') || '';
74-
VOD.type_name = '';
75-
VOD.vod_pic = pd(html, '.divbox.cf img&&src') || '';
76-
VOD.vod_content = pdfh(html, '.tabcontent .tabvalue:eq(0)&&Text') || '';
77-
VOD.vod_remarks = pdfh(html, 'h3 a&&Text') || '';
78-
VOD.vod_year = '';
79-
VOD.vod_area = '';
80-
VOD.vod_actor = pdfh(html, '[property="og:novel:author"]&&content') || '';
81-
VOD.vod_director = VOD.vod_actor;
82-
VOD.vod_play_from = '去读书网';
83-
let toc_url = pd(html, 'a:contains(点击阅读)&&href') || '';
84-
if (toc_url && !toc_url.startsWith('http')) {
85-
toc_url = this.host + toc_url;
86-
}
87-
let toc_html = toc_url ? await this.request(toc_url) : '';
56+
let html = await request(input);
57+
let VOD = {
58+
vod_name: pdfh(html, '[property="og:novel:book_name"]&&content') || '',
59+
type_name: '',
60+
vod_pic: pd(html, '.divbox.cf img&&src') || '',
61+
vod_content: pdfh(html, '.tabcontent .tabvalue:eq(0)&&Text') || '',
62+
vod_remarks: pdfh(html, 'h3 a&&Text') || '',
63+
vod_year: '',
64+
vod_area: '',
65+
vod_actor: pdfh(html, '[property="og:novel:author"]&&content') || '',
66+
vod_director: '',
67+
vod_play_from: '去读书网',
68+
vod_play_url: ''
69+
};
70+
VOD.vod_director = VOD.vod_actor; // 复用作者信息
71+
let tocUrl = pd(html, 'a:contains(点击阅读)&&href') || '';
72+
tocUrl = tocUrl && !tocUrl.startsWith('http') ? `${this.host}${tocUrl}` : tocUrl;
73+
let tocHtml = tocUrl ? await request(tocUrl) : '';
8874
let chapters = [];
89-
let chapterItems = pdfa(toc_html, '.index li') || [];
90-
for (let chapter of chapterItems) {
91-
let title = pdfh(chapter, 'a&&Text');
92-
let chapter_url = pd(chapter, 'a&&href');
93-
if (!title || !chapter_url) continue;
94-
if (!chapter_url.startsWith('http')) {
95-
chapter_url = this.host + chapter_url;
96-
}
97-
chapters.push(title + '$' + chapter_url);
75+
let chs = pdfa(tocHtml, '.index li') || [];
76+
for (let ch of chs) {
77+
let title = pdfh(ch, 'a&&Text');
78+
let chUrl = pd(ch, 'a&&href');
79+
if (!title || !chUrl) continue;
80+
chUrl = chUrl.startsWith('http') ? chUrl : `${this.host}${chUrl}`;
81+
chapters.push(`${title}$${chUrl}`);
9882
}
9983
VOD.vod_play_url = chapters.join('#');
10084
return VOD;
10185
},
10286

10387
搜索: async function () {
104-
let {KEY, pdfa, pdfh, pd} = this;
105-
let url = this.host + this.searchUrl.replace('**', encodeURIComponent(KEY));
106-
let html = await this.request(url);
107-
if (!html) {
108-
url = this.host + '/modules/article/search.php?q=' + encodeURIComponent(KEY);
109-
html = await this.request(url);
110-
}
88+
let {input, pdfa, pdfh, pd} = this;
89+
let html = await request(input);
11190
let d = [];
11291
let items = pdfa(html, '#jieqi_page_contents .c_row') || [];
11392
for (let item of items) {
11493
let title = pdfh(item, '.c_subject a&&Text');
115-
let itemUrl = pd(item, '.c_subject a&&href');
116-
if (!title || !itemUrl) continue;
117-
itemUrl = itemUrl.startsWith('http') ? itemUrl : this.host + itemUrl;
94+
let url = pd(item, '.c_subject a&&href');
95+
if (!title || !url) continue;
96+
url = url.startsWith('http') ? url : `${this.host}${url}`;
11897
let pic = pd(item, 'img&&src') || '';
119-
pic = pic.startsWith('http') ? pic : this.host + pic;
98+
pic = pic.startsWith('http') ? pic : `${this.host}${pic}`;
12099
d.push({
121-
title: title,
122-
url: itemUrl,
100+
title,
101+
url,
123102
desc: pdfh(item, '.c_tag span:eq(1)&&Text') || '',
124103
pic_url: pic,
125104
content: '',
126105
});
127106
}
128107
return setResult(d);
129108
},
130-
109+
// 懒加载解析(章节内容)
131110
lazy: async function () {
132111
let {input, pdfh} = this;
133-
let html = await this.request(input);
112+
let html = await request(input);
134113
let title = pdfh(html, 'h1&&Text') || '';
135114
let content = pdfh(html, '#acontent&&Html') || '';
136115
if (content) {
137-
content = content.replace(/<script[^>]*?>.*?<\/script>/gs, '')
138-
.replace(/<\/p>/g, '\n\n')
139-
.replace(/<br[^>]*?>/g, '\n')
140-
.replace(/<[^>]*?>/g, '')
141-
.replace(/.*| www\.qudushu\.la|.*便/g, '')
142-
.replace(/[()]/g, '')
143-
.replace(/&nbsp;/g, ' ')
144-
.replace(/[ \t]+/g, ' ')
145-
.replace(/\n[ \t]+|[ \t]+\n/g, '\n')
146-
.replace(/\n+/g, '\n\n')
147-
.trim();
116+
const replaceRules = [
117+
[/<script[^>]*?>[\s\S]*?<\/script>/gi, ''],
118+
[/<\/p>|<br\s*\/?>/g, '\n'],
119+
[/<[^>]*?>/g, ''],
120+
[/.*| www\.qudushu\.la|.*便/g, ''],
121+
[/&nbsp;|[ \t]+/g, ' '],
122+
[/\n[ \t]*\n+/g, '\n']
123+
];
124+
replaceRules.forEach(([reg, val]) => content = content.replace(reg, val));
125+
content = content.trim();
126+
if (content.startsWith(title)) {
127+
content = content.replace(title, '').trim();
128+
}
148129
}
149130
return {
150131
parse: 0,
151-
url: 'novel://' + JSON.stringify({title, content}),
132+
url: `novel://${JSON.stringify({title, content})}`,
152133
js: ''
153134
};
154135
}
155-
};
136+
};

0 commit comments

Comments
 (0)