|
| 1 | +<?php |
| 2 | +/** |
| 3 | + * B站视频爬虫 - PHP 适配版 (道长重构) |
| 4 | + * 按照 BaseSpider 结构重写 |
| 5 | + */ |
| 6 | + |
| 7 | +require_once __DIR__ . '/lib/spider.php'; |
| 8 | + |
| 9 | +class Spider extends BaseSpider { |
| 10 | + |
| 11 | + private $cookie = []; |
| 12 | + |
| 13 | + public function init($extend = '') { |
| 14 | + $this->headers['Referer'] = "https://www.bilibili.com"; |
| 15 | + // 配置初始 Cookie |
| 16 | + // 实际使用时,建议通过 ext 传入 cookie |
| 17 | + $configCookie = 'buvid3=xxxx; SESSDATA=xxxx;'; |
| 18 | + |
| 19 | + // 尝试从 extend 获取 cookie (假设 extend 是 JSON 字符串或直接是 cookie 字符串) |
| 20 | + // 这里简化处理:如果 extend 包含 SESSDATA,则认为是 cookie |
| 21 | + if (!empty($extend)) { |
| 22 | + if (strpos($extend, 'SESSDATA') !== false) { |
| 23 | + $configCookie = $extend; |
| 24 | + } elseif (is_array($extend) && isset($extend['cookie'])) { |
| 25 | + $configCookie = $extend['cookie']; |
| 26 | + } else { |
| 27 | + // 尝试解析 json |
| 28 | + $json = json_decode($extend, true); |
| 29 | + if (isset($json['cookie'])) { |
| 30 | + $configCookie = $json['cookie']; |
| 31 | + } |
| 32 | + } |
| 33 | + } |
| 34 | + |
| 35 | + $this->cookie = $this->parseCookie($configCookie); |
| 36 | + } |
| 37 | + |
| 38 | + private function parseCookie($cookieStr) { |
| 39 | + if (empty($cookieStr)) return []; |
| 40 | + $cookies = []; |
| 41 | + $pairs = explode(';', $cookieStr); |
| 42 | + foreach ($pairs as $pair) { |
| 43 | + $pair = trim($pair); |
| 44 | + if (strpos($pair, '=') !== false) { |
| 45 | + list($name, $value) = explode('=', $pair, 2); |
| 46 | + $cookies[trim($name)] = trim($value); |
| 47 | + } |
| 48 | + } |
| 49 | + return $cookies; |
| 50 | + } |
| 51 | + |
| 52 | + private function buildCookieString() { |
| 53 | + $pairs = []; |
| 54 | + foreach ($this->cookie as $name => $value) { |
| 55 | + $pairs[] = $name . '=' . $value; |
| 56 | + } |
| 57 | + return implode('; ', $pairs); |
| 58 | + } |
| 59 | + |
| 60 | + // 覆盖父类 fetch 以自动添加 cookie |
| 61 | + protected function fetch($url, $options = [], $headers = []) { |
| 62 | + if (!isset($options['cookie'])) { |
| 63 | + $cookieStr = $this->buildCookieString(); |
| 64 | + if (!empty($cookieStr)) { |
| 65 | + $options['cookie'] = $cookieStr; |
| 66 | + } |
| 67 | + } |
| 68 | + return parent::fetch($url, $options, $headers); |
| 69 | + } |
| 70 | + |
| 71 | + public function homeContent($filter = []) { |
| 72 | + $classes = [ |
| 73 | + ["type_id" => "沙雕仙逆", "type_name" => "傻屌仙逆"], |
| 74 | + ["type_id" => "沙雕动画", "type_name" => "沙雕动画"], |
| 75 | + ["type_id" => "纪录片超清", "type_name" => "纪录片"], |
| 76 | + ["type_id" => "演唱会超清", "type_name" => "演唱会"], |
| 77 | + ["type_id" => "音乐超清", "type_name" => "流行音乐"], |
| 78 | + ["type_id" => "美食超清", "type_name" => "美食"], |
| 79 | + ["type_id" => "食谱", "type_name" => "食谱"], |
| 80 | + ["type_id" => "体育超清", "type_name" => "体育"], |
| 81 | + ["type_id" => "球星", "type_name" => "球星"], |
| 82 | + ["type_id" => "中小学教育", "type_name" => "教育"], |
| 83 | + ["type_id" => "幼儿教育", "type_name" => "幼儿教育"], |
| 84 | + ["type_id" => "旅游", "type_name" => "旅游"], |
| 85 | + ["type_id" => "风景4K", "type_name" => "风景"], |
| 86 | + ["type_id" => "说案", "type_name" => "说案"], |
| 87 | + ["type_id" => "知名UP主", "type_name" => "知名UP主"], |
| 88 | + ["type_id" => "探索发现超清", "type_name" => "探索发现"], |
| 89 | + ["type_id" => "鬼畜", "type_name" => "鬼畜"], |
| 90 | + ["type_id" => "搞笑超清", "type_name" => "搞笑"], |
| 91 | + ["type_id" => "儿童超清", "type_name" => "儿童"], |
| 92 | + ["type_id" => "动物世界超清", "type_name" => "动物世界"], |
| 93 | + ["type_id" => "相声小品超清", "type_name" => "相声小品"], |
| 94 | + ["type_id" => "戏曲", "type_name" => "戏曲"], |
| 95 | + ["type_id" => "解说", "type_name" => "解说"], |
| 96 | + ["type_id" => "演讲", "type_name" => "演讲"], |
| 97 | + ["type_id" => "小姐姐超清", "type_name" => "小姐姐"], |
| 98 | + ["type_id" => "荒野求生超清", "type_name" => "荒野求生"], |
| 99 | + ["type_id" => "健身", "type_name" => "健身"], |
| 100 | + ["type_id" => "帕梅拉", "type_name" => "帕梅拉"], |
| 101 | + ["type_id" => "太极拳", "type_name" => "太极拳"], |
| 102 | + ["type_id" => "广场舞", "type_name" => "广场舞"], |
| 103 | + ["type_id" => "舞蹈", "type_name" => "舞蹈"], |
| 104 | + ["type_id" => "音乐", "type_name" => "音乐"], |
| 105 | + ["type_id" => "歌曲", "type_name" => "歌曲"], |
| 106 | + ["type_id" => "MV4K", "type_name" => "MV"], |
| 107 | + ["type_id" => "舞曲超清", "type_name" => "舞曲"], |
| 108 | + ["type_id" => "4K", "type_name" => "4K"], |
| 109 | + ["type_id" => "电影", "type_name" => "电影"], |
| 110 | + ["type_id" => "电视剧", "type_name" => "电视剧"], |
| 111 | + ["type_id" => "白噪音超清", "type_name" => "白噪音"], |
| 112 | + ["type_id" => "考公考证", "type_name" => "考公考证"], |
| 113 | + ["type_id" => "平面设计教学", "type_name" => "平面设计教学"], |
| 114 | + ["type_id" => "软件教程", "type_name" => "软件教程"], |
| 115 | + ["type_id" => "Windows", "type_name" => "Windows"] |
| 116 | + ]; |
| 117 | + return ['class' => $classes]; |
| 118 | + } |
| 119 | + |
| 120 | + public function homeVideoContent() { |
| 121 | + $url = 'https://api.bilibili.com/x/web-interface/popular?ps=20&pn=1'; |
| 122 | + $data = json_decode($this->fetch($url), true); |
| 123 | + |
| 124 | + $videos = []; |
| 125 | + if (isset($data['data']['list'])) { |
| 126 | + foreach ($data['data']['list'] as $item) { |
| 127 | + $videos[] = [ |
| 128 | + 'vod_id' => $item['aid'], |
| 129 | + 'vod_name' => strip_tags($item['title']), |
| 130 | + 'vod_pic' => $item['pic'], |
| 131 | + 'vod_remarks' => $this->formatDuration($item['duration']) |
| 132 | + ]; |
| 133 | + } |
| 134 | + } |
| 135 | + return ['list' => $videos]; |
| 136 | + } |
| 137 | + |
| 138 | + public function categoryContent($tid, $pg = 1, $filter = [], $extend = []) { |
| 139 | + $page = max(1, intval($pg)); |
| 140 | + |
| 141 | + $url = 'https://api.bilibili.com/x/web-interface/search/type'; |
| 142 | + $params = [ |
| 143 | + 'search_type' => 'video', |
| 144 | + 'keyword' => $tid, |
| 145 | + 'page' => $page |
| 146 | + ]; |
| 147 | + $url .= '?' . http_build_query($params); |
| 148 | + |
| 149 | + $data = json_decode($this->fetch($url), true); |
| 150 | + |
| 151 | + $videos = []; |
| 152 | + if (isset($data['data']['result'])) { |
| 153 | + foreach ($data['data']['result'] as $item) { |
| 154 | + if ($item['type'] !== 'video') continue; |
| 155 | + |
| 156 | + $videos[] = [ |
| 157 | + 'vod_id' => $item['aid'], |
| 158 | + 'vod_name' => strip_tags($item['title']), |
| 159 | + 'vod_pic' => 'https:' . $item['pic'], |
| 160 | + 'vod_remarks' => $this->formatSearchDuration($item['duration']) |
| 161 | + ]; |
| 162 | + } |
| 163 | + } |
| 164 | + |
| 165 | + $pageCount = $data['data']['numPages'] ?? 1; |
| 166 | + $total = $data['data']['numResults'] ?? count($videos); |
| 167 | + |
| 168 | + return $this->pageResult($videos, $page, $total, 20); |
| 169 | + } |
| 170 | + |
| 171 | + public function searchContent($key, $quick = false, $pg = 1) { |
| 172 | + return $this->categoryContent($key, $pg); |
| 173 | + } |
| 174 | + |
| 175 | + public function detailContent($ids) { |
| 176 | + if (empty($ids)) return ['list' => []]; |
| 177 | + $vid = $ids[0]; |
| 178 | + |
| 179 | + $url = 'https://api.bilibili.com/x/web-interface/view?aid=' . $vid; |
| 180 | + $data = json_decode($this->fetch($url), true); |
| 181 | + |
| 182 | + if (!isset($data['data'])) { |
| 183 | + return ['list' => []]; |
| 184 | + } |
| 185 | + |
| 186 | + $video = $data['data']; |
| 187 | + |
| 188 | + // 构建播放列表 |
| 189 | + $playUrl = ''; |
| 190 | + foreach ($video['pages'] as $index => $page) { |
| 191 | + $part = $page['part'] ?: '第' . ($index + 1) . '集'; |
| 192 | + // 构造 playId: avid_cid |
| 193 | + $playUrl .= "{$part}\${$vid}_{$page['cid']}#"; |
| 194 | + } |
| 195 | + |
| 196 | + $vod = [ |
| 197 | + "vod_id" => $vid, |
| 198 | + "vod_name" => strip_tags($video['title']), |
| 199 | + "vod_pic" => $video['pic'], |
| 200 | + "vod_content" => $video['desc'], |
| 201 | + "vod_play_from" => "B站视频", |
| 202 | + "vod_play_url" => rtrim($playUrl, '#') |
| 203 | + ]; |
| 204 | + |
| 205 | + return ['list' => [$vod]]; |
| 206 | + } |
| 207 | + |
| 208 | + public function playerContent($flag, $id, $vipFlags = []) { |
| 209 | + if (strpos($id, '_') !== false) { |
| 210 | + list($avid, $cid) = explode('_', $id); |
| 211 | + } else { |
| 212 | + return ['parse' => 0, 'url' => '', 'error' => '无效的视频ID格式']; |
| 213 | + } |
| 214 | + |
| 215 | + $url = 'https://api.bilibili.com/x/player/playurl'; |
| 216 | + $params = [ |
| 217 | + 'avid' => $avid, |
| 218 | + 'cid' => $cid, |
| 219 | + 'qn' => 112, // 原画质量 |
| 220 | + 'fnval' => 0, |
| 221 | + ]; |
| 222 | + $url .= '?' . http_build_query($params); |
| 223 | + |
| 224 | + $data = json_decode($this->fetch($url), true); |
| 225 | + |
| 226 | + if (!isset($data['data']) || $data['code'] !== 0) { |
| 227 | + return ['parse' => 0, 'url' => '', 'error' => '获取播放地址失败']; |
| 228 | + } |
| 229 | + |
| 230 | + // 直接返回第一个播放地址 |
| 231 | + if (isset($data['data']['durl'][0]['url'])) { |
| 232 | + $playUrl = $data['data']['durl'][0]['url']; |
| 233 | + |
| 234 | + $headers = $this->headers; |
| 235 | + $headers['Referer'] = 'https://www.bilibili.com/video/av' . $avid; |
| 236 | + $headers['Origin'] = 'https://www.bilibili.com'; |
| 237 | + |
| 238 | + return [ |
| 239 | + 'parse' => 0, |
| 240 | + 'url' => $playUrl, |
| 241 | + 'header' => $headers, |
| 242 | + 'danmaku' => "https://api.bilibili.com/x/v1/dm/list.so?oid={$cid}" |
| 243 | + ]; |
| 244 | + } |
| 245 | + |
| 246 | + return ['parse' => 0, 'url' => '', 'error' => '无法获取播放地址']; |
| 247 | + } |
| 248 | + |
| 249 | + // 工具函数 |
| 250 | + private function formatDuration($seconds) { |
| 251 | + if ($seconds <= 0) return '00:00'; |
| 252 | + $minutes = floor($seconds / 60); |
| 253 | + $secs = $seconds % 60; |
| 254 | + return sprintf('%02d:%02d', $minutes, $secs); |
| 255 | + } |
| 256 | + |
| 257 | + private function formatSearchDuration($duration) { |
| 258 | + $parts = explode(':', $duration); |
| 259 | + if (count($parts) === 2) { |
| 260 | + return $duration; |
| 261 | + } |
| 262 | + return '00:00'; |
| 263 | + } |
| 264 | +} |
| 265 | + |
| 266 | +(new Spider())->run(); |
0 commit comments