1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
| import requests import json import re import sys import subprocess import os import urllib.parse import jsbeautifier import time import random
# ================= ⚙️ 配置区域 ================= # 1. 目标页面 (起始页,脚本会从这一页开始自动往后翻) # 示例: "https://missav.ws/zh/genres/uncensored-leak" 或 "https://missav.ws/dm143/genres/潮吹" TARGET_PAGE = "https://missav.ws/dm143/genres/潮吹"
# 2. 速度限制 (设置为 "0" 则不限制) # 下载限速 (例如: "10M" = 10MB/s, "500K" = 500KB/s) DOWNLOAD_LIMIT = "10M" # 上传限速 (例如: "5M") UPLOAD_LIMIT = "0"
# 3. Rclone 配置 (需与 rclone config 中的 Name 一致) RCLONE_REMOTE = "onedrive" REMOTE_DIR = "Videos/MissAV"
# 4. 翻页限制 (0 表示无限翻页,直到最后一页) MAX_PAGES = 0
# 5. 路径配置 (通常无需修改) WORK_DIR = "/root" ARCHIVE_FILE = f"{WORK_DIR}/downloaded_history.txt" YTDLP_PATH = "/root/dl_env/bin/yt-dlp" FLARESOLVERR_URL = "http://localhost:8191/v1" # ===============================================
def get_page_via_flaresolverr(url): print(f">>> [监控] 正在扫描页面: {url}") headers = {"Content-Type": "application/json"} payload = { "cmd": "request.get", "url": url, "maxTimeout": 60000 } try: response = requests.post(FLARESOLVERR_URL, json=payload, headers=headers) data = response.json() if data.get("status") == "ok": return data["solution"] else: print(f">>> Flaresolverr 报错: {data.get('message')}") return None except Exception as e: print(f">>> 连接 Flaresolverr 失败: {e}") return None
def extract_next_page(html_source): """提取下一页链接""" match = re.search(r'<a[^>]+href="([^"]+)"[^>]*rel="next"', html_source) if not match: match = re.search(r'<a[^>]+rel="next"[^>]*href="([^"]+)"', html_source) if match: next_url = match.group(1) if not next_url.startswith("http"): if next_url.startswith("/"): return f"https://missav.ws{next_url}" else: return f"https://missav.ws/{next_url}" return next_url return None
def extract_video_links(html_source): """提取视频链接""" links = set() pattern = r'href="([^"]*?\/[\w]+-[\d]+)"' matches = re.findall(pattern, html_source) for link in matches: if not link.startswith("http"): full_link = f"https://missav.ws{link}" if link.startswith("/") else f"https://missav.ws/{link}" else: full_link = link if any(x in full_link for x in ["/genres/", "/makers/", "/actresses/", "/search", "/login", "label", "series"]): continue links.add(full_link) return list(links)
def is_downloaded(url): if not os.path.exists(ARCHIVE_FILE): return False video_id = url.rstrip('/').split("/")[-1] with open(ARCHIVE_FILE, 'r') as f: content = f.read() if video_id in content: return True return False
def mark_as_downloaded(url): video_id = url.rstrip('/').split("/")[-1] with open(ARCHIVE_FILE, 'a') as f: f.write(f"{video_id}\n")
def clean_url(raw_url): url = raw_url.replace(r'\/', '/').replace('%3A', ':').replace('%2F', '/') return urllib.parse.unquote(url).replace('\\', '')
def extract_m3u8(html_source): """解密并提取 M3U8""" scripts = re.findall(r'<script[^>]*>(.*?)</script>', html_source, re.DOTALL | re.IGNORECASE) for script in scripts: if "eval(function(p,a,c,k,e,d)" in script: try: unpacked = jsbeautifier.beautify(script) match = re.search(r'(https?:\/\/[^"\';\s]+\.m3u8[^"\';\s]*)', unpacked) if match: return clean_url(match.group(1)) match = re.search(r'(https?:\/\/[^"\';\s]*surrit\.com[^"\';\s]*)', unpacked) if match: return clean_url(match.group(1)) except: pass match = re.search(r'(https?:\/\/[^\s"\'<>]*(?:surrit\.com|sixyik\.com|missav)[^\s"\'<>]*)', html_source) if match and "m3u8" in match.group(1): return clean_url(match.group(1)) return None
def save_cookies(cookies): cookie_file = "monitor_cookies.txt" with open(cookie_file, 'w') as f: f.write("# Netscape HTTP Cookie File\n") for cookie in cookies: domain = cookie.get('domain', '') if not domain.startswith('.') and re.search(r'[a-zA-Z]', domain): domain = '.' + domain f.write(f"{domain}\tTRUE\t{cookie.get('path','/')}\t{'TRUE' if cookie.get('secure') else 'FALSE'}\t{cookie.get('expiry',0)}\t{cookie.get('name','')}\t{cookie.get('value','')}\n") return cookie_file
def process_video(url): print(f"\n>>> [处理中] {url}") solution = get_page_via_flaresolverr(url) if not solution: return m3u8_url = extract_m3u8(solution["response"]) if not m3u8_url: print(f">>> ❌ 跳过:无法提取 M3U8") return
video_id = url.rstrip('/').split("/")[-1] filename = f"{video_id}.mp4" cookie_file = save_cookies(solution["cookies"]) cmd = [ YTDLP_PATH, m3u8_url, "--output", filename, "--no-part", "--user-agent", solution["userAgent"], "--cookies", cookie_file ] if DOWNLOAD_LIMIT != "0": cmd.extend(["--limit-rate", DOWNLOAD_LIMIT]) print(f">>> 🚀 开始下载 (限速: {DOWNLOAD_LIMIT}): {filename}") try: subprocess.run(cmd, check=True) print(">>> ✅ 下载完成,上传 OneDrive...") rclone_cmd = ["rclone", "move", filename, f"{RCLONE_REMOTE}:{REMOTE_DIR}", "--progress"] if UPLOAD_LIMIT != "0": rclone_cmd.extend(["--bwlimit", UPLOAD_LIMIT]) subprocess.run(rclone_cmd, check=True) mark_as_downloaded(url) print(">>> ✨ 处理完成!") except subprocess.CalledProcessError: print(">>> ❌ 下载或上传失败") finally: if os.path.exists(cookie_file): os.remove(cookie_file)
if __name__ == "__main__": current_url = TARGET_PAGE page_count = 0 while current_url: page_count += 1 print(f"\n{'='*20} 正在处理第 {page_count} 页 {'='*20}") page_solution = get_page_via_flaresolverr(current_url) if not page_solution: print(">>> 页面获取失败,停止翻页。") break html = page_solution["response"] video_links = extract_video_links(html) print(f">>> 本页发现 {len(video_links)} 个视频") new_downloads = 0 for link in video_links: if is_downloaded(link): print(f">>> [已跳过] 历史已下载: {link}") continue process_video(link) new_downloads += 1 time.sleep(5) print(f">>> 第 {page_count} 页处理完成,共下载 {new_downloads} 个新视频。") if MAX_PAGES > 0 and page_count >= MAX_PAGES: print(f">>> 达到最大翻页限制 ({MAX_PAGES}),任务结束。") break next_page = extract_next_page(html) if next_page: print(f">>> 🔎 发现下一页: {next_page}") current_url = next_page wait_time = random.randint(10, 20) print(f">>> 休息 {wait_time} 秒后继续...") time.sleep(wait_time) else: print(">>> ✅ 没有发现下一页,所有页面采集完毕!") current_url = None
|