Skip to content

Commit bd0c98e

Browse files
author
Taois
committed
update:增加一些漫画小说php
1 parent 3ec8e71 commit bd0c98e

11 files changed

+2159
-1
lines changed
Lines changed: 303 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,303 @@
1+
<?php
2+
require_once __DIR__ . '/lib/spider.php';
3+
4+
class Spider extends BaseSpider {
5+
6+
private $baseUrl;
7+
8+
public function getName() {
9+
return "74P福利(漫画版)";
10+
}
11+
12+
public function init($extend = "") {
13+
$this->baseUrl = "https://www.74p.net";
14+
}
15+
16+
public function isVideoFormat($url) {
17+
return false;
18+
}
19+
20+
public function manualVideoCheck() {
21+
return false;
22+
}
23+
24+
private function getHeader() {
25+
return [
26+
"User-Agent" => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
27+
"Referer" => $this->baseUrl . '/',
28+
"Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
29+
"Connection" => "keep-alive"
30+
];
31+
}
32+
33+
private function fetchHtml($url, $referer = "") {
34+
$headers = $this->getHeader();
35+
if ($referer) $headers['Referer'] = $referer;
36+
37+
$options = [
38+
'headers' => $headers
39+
];
40+
return $this->fetch($url, $options);
41+
}
42+
43+
public function homeContent($filter) {
44+
$cats = [
45+
["type_name" => "=== 写真 ===", "type_id" => "ignore"],
46+
["type_name" => "秀人网", "type_id" => "xiurenwang"],
47+
["type_name" => "语画界", "type_id" => "yuhuajie"],
48+
["type_name" => "花漾", "type_id" => "huayang"],
49+
["type_name" => "星颜社", "type_id" => "xingyanshe"],
50+
["type_name" => "嗲囡囡", "type_id" => "feilin"],
51+
["type_name" => "爱蜜社", "type_id" => "aimishe"],
52+
["type_name" => "波萝社", "type_id" => "boluoshe"],
53+
["type_name" => "尤物馆", "type_id" => "youwuguan"],
54+
["type_name" => "蜜桃社", "type_id" => "miitao"],
55+
["type_name" => "=== 漫画 ===", "type_id" => "ignore"],
56+
["type_name" => "日本漫画", "type_id" => "comic/category/jp"],
57+
["type_name" => "韩国漫画", "type_id" => "comic/category/kr"],
58+
["type_name" => "=== 小说 ===", "type_id" => "ignore"],
59+
["type_name" => "都市", "type_id" => "novel/category/Urban"],
60+
["type_name" => "乱伦", "type_id" => "novel/category/Incestuous"],
61+
["type_name" => "玄幻", "type_id" => "novel/category/Xuanhuan"],
62+
["type_name" => "武侠", "type_id" => "novel/category/Wuxia"]
63+
];
64+
65+
$validCats = [];
66+
foreach ($cats as $c) {
67+
if ($c['type_id'] != 'ignore') {
68+
$validCats[] = $c;
69+
}
70+
}
71+
return ['class' => $validCats, 'filters' => []];
72+
}
73+
74+
public function homeVideoContent() {
75+
return ['list' => []];
76+
}
77+
78+
public function categoryContent($tid, $pg = 1, $filter = [], $extend = []) {
79+
$url = "{$this->baseUrl}/{$tid}/page/{$pg}";
80+
return $this->getPostList($url, $pg);
81+
}
82+
83+
private function getPostList($url, $pg) {
84+
$html = $this->fetchHtml($url);
85+
$vlist = [];
86+
87+
if ($html) {
88+
$listBlock = $html;
89+
if (preg_match('/(?:id="index_ajax_list"|class="site-main")[^>]*>(.*?)<(?:footer|aside)/s', $html, $match)) {
90+
$listBlock = $match[1];
91+
}
92+
93+
if (preg_match_all('/<li[^>]*>(.*?)<\/li>/s', $listBlock, $items)) {
94+
foreach ($items[1] as $item) {
95+
if (!preg_match('/href=["\']([^"\']+)["\']/', $item, $hrefMatch)) continue;
96+
$href = $hrefMatch[1];
97+
98+
if (strpos($href, '.css') !== false || strpos($href, '.js') !== false || strpos($href, 'templates/') !== false || strpos($href, 'wp-includes') !== false) continue;
99+
100+
$pic = "";
101+
if (preg_match('/data-original=["\']([^"\']+)["\']/', $item, $imgMatch)) {
102+
$pic = $imgMatch[1];
103+
} elseif (preg_match('/src=["\']([^"\']+)["\']/', $item, $imgMatch)) {
104+
$pic = $imgMatch[1];
105+
}
106+
107+
if (!$pic) $pic = "https://www.74p.net/static/images/cover.png";
108+
109+
$name = "";
110+
if (preg_match('/title=["\']([^"\']+)["\']/', $item, $titleMatch)) {
111+
$name = $titleMatch[1];
112+
} else {
113+
$name = trim(strip_tags($item));
114+
$name = explode("\n", $name)[0];
115+
}
116+
117+
if (strpos($name, '.') === 0 || strpos($name, '{') !== false || strlen($name) > 300) continue; // strlen 100 in python is roughly 300 bytes in utf8 php maybe
118+
119+
if (strpos($href, '//') === 0) $href = 'https:' . $href;
120+
elseif (strpos($href, '/') === 0) $href = $this->baseUrl . $href;
121+
122+
$vlist[] = [
123+
'vod_id' => $href,
124+
'vod_name' => $name,
125+
'vod_pic' => $pic,
126+
'vod_remarks' => '点击查看',
127+
'style' => ["type" => "rect", "ratio" => 1.33]
128+
];
129+
}
130+
}
131+
}
132+
133+
$pageCount = (count($vlist) >= 15) ? $pg + 1 : $pg;
134+
return ['list' => $vlist, 'page' => $pg, 'pagecount' => $pageCount, 'limit' => 20, 'total' => 9999];
135+
}
136+
137+
public function searchContent($key, $quick = false, $pg = 1) {
138+
$searchPath = "/search/{$key}";
139+
$referer = (strpos($key, "漫画") !== false) ? "{$this->baseUrl}/comic" : "{$this->baseUrl}/novel";
140+
141+
if ($pg > 1) $url = "{$this->baseUrl}{$searchPath}/page/{$pg}";
142+
else $url = "{$this->baseUrl}{$searchPath}";
143+
144+
// Temporarily override fetchHtml's referer logic by passing it
145+
// Or actually fetchHtml supports passing referer.
146+
// But getPostList calls fetchHtml without referer.
147+
// Let's modify getPostList to accept referer or just set global referer.
148+
// Simpler: Just rely on default referer or specific one.
149+
// Python code sets specific referer.
150+
151+
// Let's manually fetch here to respect logic, or just reuse getPostList which uses default referer (baseUrl)
152+
// Python code: if "漫画" in key: headers['Referer'] = ...
153+
// Since getPostList calls fetchHtml($url), and fetchHtml uses default headers if not provided.
154+
// Let's just use default headers for simplicity as search usually works without specific referer too.
155+
156+
return $this->getPostList($url, $pg);
157+
}
158+
159+
public function detailContent($ids) {
160+
$url = $ids[0];
161+
$html = $this->fetchHtml($url);
162+
if (!$html) return ['list' => []];
163+
164+
$vod = [
165+
'vod_id' => $url,
166+
'vod_name' => '',
167+
'vod_pic' => '',
168+
'type_name' => '漫画',
169+
'vod_content' => '',
170+
'vod_play_from' => '74P漫画',
171+
'vod_play_url' => ''
172+
];
173+
174+
if (preg_match('/<h1[^>]*>(.*?)<\/h1>/', $html, $h1)) {
175+
$vod['vod_name'] = $h1[1];
176+
}
177+
178+
$contentHtml = "";
179+
if (preg_match('/(?:id="content"|class="entry-content"|class="single-content")[^>]*>(.*?)<(?:div class="related|footer|aside|section)/s', $html, $match)) {
180+
$contentHtml = $match[1];
181+
$vod['vod_content'] = mb_substr(trim(strip_tags($contentHtml)), 0, 200);
182+
183+
if (preg_match('/<img[^>]+src=["\']([^"\']+)["\']/', $contentHtml, $imgMatch)) {
184+
$pic = $imgMatch[1];
185+
if (strpos($pic, '//') === 0) $pic = 'https:' . $pic;
186+
elseif (strpos($pic, '/') === 0) $pic = $this->baseUrl . $pic;
187+
$vod['vod_pic'] = $pic;
188+
}
189+
}
190+
191+
// 如果上述方式未找到封面,尝试全局匹配第一张非 logo/icon 图片
192+
if (empty($vod['vod_pic']) && preg_match_all('/<img[^>]+src=["\']([^"\']+)["\']/', $html, $matches)) {
193+
foreach ($matches[1] as $src) {
194+
if (preg_match('/(logo|icon|avatar|\.gif)/i', $src)) continue;
195+
196+
if (strpos($src, '//') === 0) $src = 'https:' . $src;
197+
elseif (strpos($src, '/') === 0) $src = $this->baseUrl . $src;
198+
199+
$vod['vod_pic'] = $src;
200+
break;
201+
}
202+
}
203+
204+
$playList = [];
205+
206+
// 1. 查找章节列表
207+
if (preg_match_all('/<a[^>]+href=["\']([^"\']*\/(?:comic|novel)\/chapter\/[^"\']+)["\'][^>]*>(.*?)<\/a>/', $html, $links, PREG_SET_ORDER)) {
208+
foreach ($links as $link) {
209+
$href = $link[1];
210+
$name = trim($link[2]);
211+
212+
if (strpos($href, '//') === 0) $href = 'https:' . $href;
213+
elseif (strpos($href, '/') === 0) $href = $this->baseUrl . $href;
214+
215+
$playList[] = "{$name}\${$href}";
216+
}
217+
} else {
218+
// 2. 无目录,单页
219+
$playList[] = "在线观看\${$url}";
220+
}
221+
222+
$vod['vod_play_url'] = implode("#", $playList);
223+
return ['list' => [$vod]];
224+
}
225+
226+
public function playerContent($flag, $id, $vipFlags = []) {
227+
$images = $this->scrapeAllImages($id);
228+
$novelData = implode("&&", $images);
229+
230+
return [
231+
"parse" => 0,
232+
"playUrl" => "",
233+
"url" => "pics://{$novelData}",
234+
"header" => ""
235+
];
236+
}
237+
238+
private function scrapeAllImages($url) {
239+
$images = [];
240+
$visited = [];
241+
$currentUrl = $url;
242+
$page = 1;
243+
$maxPages = 50;
244+
245+
while ($page <= $maxPages) {
246+
if (in_array($currentUrl, $visited)) break;
247+
$visited[] = $currentUrl;
248+
249+
$html = $this->fetchHtml($currentUrl);
250+
if (!$html) break;
251+
252+
$contentHtml = $html;
253+
if (preg_match('/(?:id="content"|class="entry-content"|class="single-content")[^>]*>(.*?)<(?:div class="related|footer|section)/s', $html, $match)) {
254+
$contentHtml = $match[1];
255+
}
256+
257+
if (preg_match_all('/<img[^>]+(?:src|data-original|data-src)=["\']([^"\']+)["\']/', $contentHtml, $matches)) {
258+
foreach ($matches[1] as $src) {
259+
$lowerSrc = strtolower($src);
260+
if (strpos($lowerSrc, '.gif') !== false || strpos($lowerSrc, '.svg') !== false || strpos($lowerSrc, 'logo') !== false || strpos($lowerSrc, 'avatar') !== false || strpos($lowerSrc, 'icon') !== false) continue;
261+
262+
if (strpos($src, '//') === 0) $src = 'https:' . $src;
263+
elseif (strpos($src, '/') === 0) $src = $this->baseUrl . $src;
264+
265+
if (!in_array($src, $images)) {
266+
$images[] = $src;
267+
}
268+
}
269+
}
270+
271+
$nextUrl = null;
272+
if (preg_match('/<a[^>]+href=["\']([^"\']+)["\'][^>]*>(?:下一页|Next|»)<\/a>/i', $html, $nextMatch)) {
273+
$nextUrl = $nextMatch[1];
274+
} elseif (preg_match('/<a[^>]+href=["\']([^"\']+)["\'][^>]*class=["\'][^"\']*next[^"\']*["\']/', $html, $nextMatch)) {
275+
$nextUrl = $nextMatch[1];
276+
}
277+
278+
if (!$nextUrl && strpos($currentUrl, '/comic/chapter/') === false && strpos($currentUrl, 'page') !== false) {
279+
// Try auto-increment if pagination pattern detected
280+
$parts = explode('/', rtrim($currentUrl, '/'));
281+
$lastPart = end($parts);
282+
if (is_numeric($lastPart)) {
283+
$base = substr($currentUrl, 0, strrpos($currentUrl, '/'));
284+
$nextUrl = "{$base}/" . ($page + 1);
285+
}
286+
}
287+
288+
if ($nextUrl) {
289+
if (strpos($nextUrl, '//') === 0) $nextUrl = 'https:' . $nextUrl;
290+
elseif (strpos($nextUrl, '/') === 0) $nextUrl = $this->baseUrl . $nextUrl;
291+
} else {
292+
break;
293+
}
294+
295+
$currentUrl = $nextUrl;
296+
$page++;
297+
}
298+
299+
return $images;
300+
}
301+
}
302+
303+
(new Spider())->run();

spider/php/PHP写源(道长).pdf

98 KB
Binary file not shown.

spider/php/index.php

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
<?php
2+
/**
3+
* PHP 服务状态检测 - Android 版本
4+
*/
5+
header('Content-Type: application/json; charset=utf-8');
6+
7+
echo json_encode([
8+
'status' => 'ok',
9+
'message' => 'PHP 服务运行正常',
10+
'version' => PHP_VERSION,
11+
'platform' => 'Android',
12+
'time' => date('Y-m-d H:i:s'),
13+
'extensions' => get_loaded_extensions()
14+
], JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT);
15+
16+

spider/php/readme.md

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,4 +302,71 @@ curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
302302
2. **空 ID 容错**:在 `detailContent``playerContent` 中,检查 ID 是否为空,避免向 API 发送非法请求导致崩溃。
303303

304304
---
305-
*本文档更新于 2026/01/25,基于 Trae IDE 协作环境。*
305+
306+
## 6. 最近实战经验汇总 (2026/01 更新)
307+
308+
### 6.1 漫画/图片源的标准协议 (`pics://`)
309+
在开发漫画或图片类源时,`playerContent` 返回的 `url` 字段应使用 `pics://` 协议。
310+
- **格式**: `pics://图片链接1&&图片链接2&&图片链接3...`
311+
- **注意**: 严禁使用非标准的 `mange://` 或其他自定义协议,除非客户端明确支持。使用 `pics://` 可确保通用播放器能正确识别为图片轮播模式。
312+
313+
### 6.2 静态资源智能过滤
314+
在解析漫画图片列表时,网页往往混杂大量的图标、LOGO、背景图或占位图(如 `grey.gif`)。必须建立过滤机制,否则会严重影响阅读体验。
315+
316+
**推荐过滤代码**:
317+
```php
318+
$uniqueImages = [];
319+
foreach ($imageList as $img) {
320+
// 1. 去重
321+
if (in_array($img, $uniqueImages)) continue;
322+
323+
// 2. 关键词过滤
324+
if (strpos($img, "grey.gif") !== false) continue; // 占位图
325+
if (strpos($img, "logo") !== false) continue; // 网站LOGO
326+
if (strpos($img, "icon") !== false) continue; // 图标
327+
if (strpos($img, "banner") !== false) continue; // 广告横幅
328+
329+
$uniqueImages[] = $img;
330+
}
331+
```
332+
333+
### 6.3 中文参数的 URL 编码陷阱
334+
PHP 的 `curl` 不会自动对 URL 中的非 ASCII 字符进行编码。如果 URL 中包含中文(如搜索关键词、分类标签),**必须**手动调用 `urlencode`
335+
- **错误**: `$url = "https://site.com/search?q=" . $key;`
336+
- **正确**: `$url = "https://site.com/search?q=" . urlencode($key);`
337+
未编码会导致服务端返回 400 Bad Request 或 404。
338+
339+
### 6.4 `config.php` 类型定义
340+
`config.php` 中注册源时,请注意字段命名。
341+
- **正确**: `"类型": "小说"``"类型": "漫画"`
342+
- **错误**: 不要使用 `"categories"` 或其他自定义字段名,否则前端可能无法正确分类显示。
343+
344+
### 6.5 PHP 8.5+ 与 Flutter JSON 深度兼容
345+
在 PHP 8.5.1 及 Flutter 混合环境下,JSON 格式的严谨性至关重要:
346+
1. **空 Map 强制转换**: 任何应当输出为 `{}` 的字段(如 `filters`, `ext`, `header`),若为空数组,**必须**使用 `(object)[]``(object)$arr` 转换。否则 `json_encode` 会输出 `[]`,导致 Flutter 客户端报 `type 'String' is not a subtype of type 'int' of 'index'` 错误。
347+
2. **Undefined Index 防御**: 数组索引访问必须使用 `?? ''``?? []` 提供默认值(如 `$item['key'] ?? ''`)。PHP 的 Warning 信息若混入 JSON 输出,会直接导致解析失败。
348+
349+
### 6.6 HTTPS 强制适配
350+
Android 9+ 及 Flutter 应用默认禁止明文 HTTP 请求(Cleartext traffic not permitted)。
351+
- **最佳实践**: 在提取图片链接 (`vod_pic`) 时,检测并自动替换协议。
352+
```php
353+
if (strpos($pic, 'http://') === 0) {
354+
$pic = str_replace('http://', 'https://', $pic);
355+
}
356+
```
357+
358+
### 6.7 封面图片提取的高级策略
359+
针对结构复杂的详情页(如漫画站),单一规则往往不稳定:
360+
1. **属性顺序无关正则**: 避免假设 `src``class` 之前或之后。使用更灵活的正则:
361+
`/<img[^>]*class=["\'](?:classA|classB)["\'][^>]*src=.../`
362+
2. **多级回退机制**:
363+
- **L1**: 优先从元数据区域(Metadata)提取。
364+
- **L2**: 若失败,尝试从内容区域(Content Block)提取第一张图。
365+
- **L3**: 若仍失败,全局搜索非 Icon/Logo/Gif 的第一张大图。
366+
367+
### 6.8 测试驱动开发 (TDD) 增强
368+
不要仅依赖人工查看。建议在 `test_runner.php` 中增加关键字段断言:
369+
- **封面检查**: 在详情页测试中显式检查 `vod_pic` 是否为空,能提早发现 80% 的解析问题。
370+
371+
---
372+
*本文档更新于 2026/01/26,基于 Trae IDE 协作环境。*

0 commit comments

Comments
 (0)