Skip to content

Commit 854b19d

Browse files
author
Taois
committed
feat: 新增php爬虫入库db本地源
1 parent 9b03c59 commit 854b19d

File tree

6 files changed

+791
-2
lines changed

6 files changed

+791
-2
lines changed
Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
<?php
2+
require_once __DIR__ . '/lib/spider.php';
3+
4+
class Spider extends BaseSpider {
5+
private $db;
6+
private $dbPath;
7+
8+
public function getName() {
9+
return "74P福利(本地库)";
10+
}
11+
12+
public function init($extend = "") {
13+
// 数据库文件位于当前目录 (与本文件同名,后缀为 .db)
14+
$dbName = str_replace('.php', '.db', basename(__FILE__));
15+
$this->dbPath = __DIR__ . '/' . $dbName;
16+
17+
// 尝试查找对应的数据库文件 (如果当前文件名不匹配,尝试查找原版爬虫名对应的db)
18+
if (!file_exists($this->dbPath)) {
19+
$originName = '74P福利图 ᵈᶻ[画].db';
20+
if (file_exists(__DIR__ . '/' . $originName)) {
21+
$this->dbPath = __DIR__ . '/' . $originName;
22+
}
23+
}
24+
25+
try {
26+
$this->db = new SQLite3($this->dbPath);
27+
$this->db->busyTimeout(5000);
28+
} catch (Exception $e) {
29+
// 数据库连接失败,可能是文件不存在
30+
}
31+
}
32+
33+
public function isVideoFormat($url) {
34+
return false;
35+
}
36+
37+
public function manualVideoCheck() {
38+
return false;
39+
}
40+
41+
public function homeContent($filter) {
42+
if (!$this->db) return ['class' => []];
43+
44+
$classes = [];
45+
$res = $this->db->query("SELECT tid, name FROM categories");
46+
while ($row = $res->fetchArray(SQLITE3_ASSOC)) {
47+
$classes[] = [
48+
"type_id" => $row['tid'],
49+
"type_name" => $row['name']
50+
];
51+
}
52+
return ['class' => $classes, 'filters' => []];
53+
}
54+
55+
public function homeVideoContent() {
56+
return ['list' => []];
57+
}
58+
59+
public function categoryContent($tid, $pg = 1, $filter = [], $extend = []) {
60+
if (!$this->db) return ['list' => [], 'page' => $pg, 'pagecount' => 0, 'limit' => 20, 'total' => 0];
61+
62+
$limit = 20;
63+
$offset = ($pg - 1) * $limit;
64+
65+
// 获取总数
66+
$countStmt = $this->db->prepare("SELECT COUNT(*) as total FROM vods WHERE type_id = :tid");
67+
$countStmt->bindValue(':tid', $tid, SQLITE3_TEXT);
68+
$countRes = $countStmt->execute();
69+
$total = 0;
70+
if ($row = $countRes->fetchArray(SQLITE3_ASSOC)) {
71+
$total = $row['total'];
72+
}
73+
74+
$stmt = $this->db->prepare("SELECT * FROM vods WHERE type_id = :tid ORDER BY crawled_at DESC LIMIT :limit OFFSET :offset");
75+
$stmt->bindValue(':tid', $tid, SQLITE3_TEXT);
76+
$stmt->bindValue(':limit', $limit, SQLITE3_INTEGER);
77+
$stmt->bindValue(':offset', $offset, SQLITE3_INTEGER);
78+
79+
$res = $stmt->execute();
80+
$vlist = [];
81+
while ($row = $res->fetchArray(SQLITE3_ASSOC)) {
82+
$vlist[] = [
83+
'vod_id' => $row['vod_id'],
84+
'vod_name' => $row['vod_name'],
85+
'vod_pic' => $row['vod_pic'],
86+
'vod_remarks' => $row['vod_remarks'],
87+
'style' => ["type" => "rect", "ratio" => 1.33]
88+
];
89+
}
90+
91+
$pageCount = ceil($total / $limit);
92+
93+
return ['list' => $vlist, 'page' => $pg, 'pagecount' => $pageCount, 'limit' => $limit, 'total' => $total];
94+
}
95+
96+
public function detailContent($ids) {
97+
if (!$this->db) return ['list' => []];
98+
99+
$vod_id = $ids[0];
100+
101+
// 1. 获取视频详情 (关联 categories 获取 type_name)
102+
$stmt = $this->db->prepare("
103+
SELECT v.*, c.name as type_name
104+
FROM vods v
105+
LEFT JOIN categories c ON v.type_id = c.tid
106+
WHERE v.vod_id = :vod_id
107+
");
108+
$stmt->bindValue(':vod_id', $vod_id, SQLITE3_TEXT);
109+
$res = $stmt->execute();
110+
$vod_row = $res->fetchArray(SQLITE3_ASSOC);
111+
112+
if (!$vod_row) return ['list' => []];
113+
114+
$vod = [
115+
'vod_id' => $vod_row['vod_id'],
116+
'vod_name' => $vod_row['vod_name'],
117+
'vod_pic' => $vod_row['vod_pic'],
118+
'type_name' => $vod_row['type_name'],
119+
'vod_content' => $vod_row['vod_content'],
120+
'vod_play_from' => '',
121+
'vod_play_url' => ''
122+
];
123+
$vod_pk = $vod_row['id'];
124+
125+
// 2. 获取剧集列表 (关联 play_sources 获取 play_from)
126+
$stmt_ep = $this->db->prepare("
127+
SELECT e.*, s.name as play_from
128+
FROM episodes e
129+
LEFT JOIN play_sources s ON e.sid = s.id
130+
WHERE e.vod_pk = :vod_pk
131+
");
132+
$stmt_ep->bindValue(':vod_pk', $vod_pk, SQLITE3_INTEGER);
133+
$res_ep = $stmt_ep->execute();
134+
135+
$episodes_map = []; // play_from => [ "name$url" ]
136+
137+
while ($row = $res_ep->fetchArray(SQLITE3_ASSOC)) {
138+
$play_from = $row['play_from'];
139+
$name = $row['name'];
140+
// 优先使用已解析的 URL,如果没有则使用原始 URL
141+
$url = !empty($row['resolved_url']) ? $row['resolved_url'] : $row['raw_url'];
142+
143+
if (!isset($episodes_map[$play_from])) {
144+
$episodes_map[$play_from] = [];
145+
}
146+
$episodes_map[$play_from][] = "{$name}\${$url}";
147+
}
148+
149+
$play_from_list = [];
150+
$play_url_list = [];
151+
152+
foreach ($episodes_map as $from => $eps) {
153+
$play_from_list[] = $from;
154+
$play_url_list[] = implode("#", $eps);
155+
}
156+
157+
$vod['vod_play_from'] = implode("$$$", $play_from_list);
158+
$vod['vod_play_url'] = implode("$$$", $play_url_list);
159+
160+
return ['list' => [$vod]];
161+
}
162+
163+
public function searchContent($key, $quick = false, $pg = 1) {
164+
if (!$this->db) return ['list' => [], 'page' => $pg];
165+
166+
$limit = 20;
167+
$offset = ($pg - 1) * $limit;
168+
169+
// 获取总数
170+
$countStmt = $this->db->prepare("SELECT COUNT(*) as total FROM vods WHERE vod_name LIKE :key");
171+
$countStmt->bindValue(':key', "%$key%", SQLITE3_TEXT);
172+
$countRes = $countStmt->execute();
173+
$total = 0;
174+
if ($row = $countRes->fetchArray(SQLITE3_ASSOC)) {
175+
$total = $row['total'];
176+
}
177+
178+
$stmt = $this->db->prepare("SELECT * FROM vods WHERE vod_name LIKE :key ORDER BY crawled_at DESC LIMIT :limit OFFSET :offset");
179+
$stmt->bindValue(':key', "%$key%", SQLITE3_TEXT);
180+
$stmt->bindValue(':limit', $limit, SQLITE3_INTEGER);
181+
$stmt->bindValue(':offset', $offset, SQLITE3_INTEGER);
182+
183+
$res = $stmt->execute();
184+
$vlist = [];
185+
while ($row = $res->fetchArray(SQLITE3_ASSOC)) {
186+
$vlist[] = [
187+
'vod_id' => $row['vod_id'],
188+
'vod_name' => $row['vod_name'],
189+
'vod_pic' => $row['vod_pic'],
190+
'vod_remarks' => $row['vod_remarks'],
191+
'style' => ["type" => "rect", "ratio" => 1.33]
192+
];
193+
}
194+
195+
$pageCount = ceil($total / $limit);
196+
return ['list' => $vlist, 'page' => $pg, 'pagecount' => $pageCount, 'limit' => $limit, 'total' => $total];
197+
}
198+
199+
public function playerContent($flag, $id, $vipFlags = []) {
200+
// id 已经是 detailContent 中返回的 url
201+
// 如果是已解析的 pics:// 链接,直接返回
202+
// 如果是原始链接,说明爬取时未解析成功,这里直接返回原始链接让客户端尝试处理(虽然本地模式下通常无法处理网络请求,但保持一致性)
203+
return [
204+
"parse" => 0,
205+
"playUrl" => "",
206+
"url" => $id,
207+
"header" => ""
208+
];
209+
}
210+
}
6.77 MB
Binary file not shown.

spider/php/74P福利图 ᵈᶻ[画].php

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ public function detailContent($ids) {
172172
];
173173

174174
if (preg_match('/<h1[^>]*>(.*?)<\/h1>/', $html, $h1)) {
175-
$vod['vod_name'] = $h1[1];
175+
$vod['vod_name'] = trim(strip_tags($h1[1]));
176176
}
177177

178178
$contentHtml = "";
@@ -258,6 +258,8 @@ private function scrapeAllImages($url) {
258258
foreach ($matches[1] as $src) {
259259
$lowerSrc = strtolower($src);
260260
if (strpos($lowerSrc, '.gif') !== false || strpos($lowerSrc, '.svg') !== false || strpos($lowerSrc, 'logo') !== false || strpos($lowerSrc, 'avatar') !== false || strpos($lowerSrc, 'icon') !== false) continue;
261+
if (strpos($lowerSrc, '/covers/') !== false) continue; // 过滤封面图推荐
262+
261263

262264
if (strpos($src, '//') === 0) $src = 'https:' . $src;
263265
elseif (strpos($src, '/') === 0) $src = $this->baseUrl . $src;

spider/php/config.php

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@
4343
$file === $self ||
4444
strpos($file, '_') === 0 ||
4545
fnmatch('config*.php', $file) ||
46-
stripos($file, 'test') !== false) {
46+
stripos($file, 'test') !== false ||
47+
stripos($file, 'bridge') !== false) {
4748
continue;
4849
}
4950

0 commit comments

Comments
 (0)