分享个weibo pythhon下载脚本

yhzc2023 · 发表于 2024-12-30 22:11:50

import requests
import re
import json
import os
import time

script_dir = os.path.dirname(os.path.abspath(__file__))

class WeiboVideoDownloader:
    def __init__(self):
        self.headers = {
            "accept": "*/*",
            "accept-language": "zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7",
            "cache-control": "no-cache",
            "pragma": "no-cache",
            "priority": "i",
            "sec-ch-ua": '"Google Chrome";v="131", "Chromium";v="131", "Not:A-Brand";v="24"',
            "sec-ch-ua-mobile": "?0",
            "sec-ch-ua-platform": '"Windows"',
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
        }

        self.weibo_headers = {
            **self.headers,
            "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
        }

    def download_video(self, urls, title):
        headers = {
            **self.headers,
            "range": "bytes=0-",
            "referer": "https://weibo.com/",
            "sec-fetch-dest": "video",
        }
        save_files = []

        def download(url, index):
            if index == 0:
                name = f"{title}"
            else:
                name = f"{title}_{index}"
            output_filename = os.path.join(script_dir, "video", f"{name}.mp4")

            try:
                response = requests.get(url, headers=headers)

                if response.status_code == 206:
                    with open(output_filename, "wb") as f:
                        f.write(response.content)
                    print(f"视频已下载到: {output_filename}")
                    save_files.append(output_filename)
                else:
                    print(f"请求失败，状态码：{response.status_code}")
            except Exception as e:
                print(f"下载视频时发生错误: {str(e)}")

        for index, url in enumerate(urls):
            print(f"正在下载: {index + 1 }/{len(urls)}")
            download(url, index)
            if index < len(urls) - 1:
                print(f"已下载{index + 1}/{len(urls)}\n等待10秒....")
                time.sleep(10)
        return save_files

    def parase_data(self, url):
        try:
            response = requests.get(url, headers=self.weibo_headers)

            if response.status_code == 200:
                match = re.findall(
                    r"var \$render_data \= (.*?)\[0\] \|\| \{\}",
                    response.content.decode("utf-8"),
                    re.DOTALL,
                )

                if match:
                    data = json.loads(match[0])
                    data = data[0]
                    status_data = data["status"]
                    page_info = status_data["page_info"]
                    object_id = page_info["object_id"]
                    title = page_info["title"]
                    if page_info["type"] != "video":
                        return print("网页不是视频类型"),None

                    pics = status_data.get("pics", []) or []
                    if len(pics) == 0:
                        return list(page_info["urls"].values())[:1], title
                    return [
                        pic["videoSrc"] for pic in pics if pic["type"] == "video"
                    ], title

            else:
                print(f"请求失败，状态码: {response.status_code}")
                return None, None

        except Exception as e:
            print(f"获取视频URL失败: {str(e)}")
            return None, None

# 使用示例
if __name__ == "__main__":
    downloader = WeiboVideoDownloader()
    weibo_url = "https://m.weibo.cn/detail/5116883378900540"
    video_urls, title = downloader.parase_data(weibo_url)
    if video_urls:
        save_files = downloader.download_video(video_urls, title)
        print(save_files)

爬格子PA · 发表于 2025-1-15 09:29:52

感谢分享！！！谢谢

分享个weibo pythhon下载脚本

发表回复

挑战者 lv2