import requests
import json
import csv
from typing import Dict, Any, List, Optional
from datetime import datetime
class DouyinDataParser:
"""抓包后抖音API响应解析与结构化保存工具"""
@staticmethod
def extract_user_posts(
response_data: Dict[str, Any],
only_save_public: bool = True
) -> List[Dict[str, Any]]:
"""
从「用户发布视频列表」接口提取结构化数据
:param response_data: 抓包后JSON转的Python字典
:param only_save_public: 是否只保存公开可见的视频
:return: 结构化后的视频列表
"""
structured_videos = []
raw_aweme_list = response_data.get("aweme_list", [])
if not raw_aweme_list:
print("未找到aweme_list字段,请检查是否传入了正确的接口响应!")
return []
for aweme in raw_aweme_list:
try:
# 过滤非公开视频
if only_save_public and aweme.get("is_top", 0) != 1 and aweme.get("status", {}).get("allow_share", 1) != 1:
continue
# 转换时间戳
create_datetime = datetime.fromtimestamp(aweme.get("create_time", 0)).strftime("%Y-%m-%d %H:%M:%S")
# 提取数据
video_data = {
"视频ID": aweme.get("aweme_id"),
"视频标题": aweme.get("desc", "").strip().replace("\n", " "),
"发布时间": create_datetime,
"作者UID": aweme.get("author", {}).get("uid"),
"作者昵称": aweme.get("author", {}).get("nickname", "").strip(),
"作者抖音号": aweme.get("author", {}).get("unique_id", "").strip(),
"视频播放地址(CDN链接,可能失效快)": aweme.get("video", {}).get("play_addr", {}).get("url_list", [None])[0],
"视频封面地址": aweme.get("video", {}).get("cover", {}).get("url_list", [None])[0],
"点赞数": aweme.get("statistics", {}).get("digg_count", 0),
"评论数": aweme.get("statistics", {}).get("comment_count", 0),
"转发数": aweme.get("statistics", {}).get("share_count", 0),
"播放数": aweme.get("statistics", {}).get("play_count", 0),
"是否置顶": "是" if aweme.get("is_top", 0) == 1 else "否",
}
structured_videos.append(video_data)
except Exception as e:
print(f"解析单个视频数据失败,跳过:{str(e)}")
continue
return structured_videos
@staticmethod
def save_to_csv(data: List[Dict[str, Any]], filename: Optional[str] = None) -> None:
"""
把结构化数据保存到CSV文件
:param data: extract_*函数返回的结构化列表
:param filename: 保存的文件名,默认带时间戳
"""
if not data:
print("没有可保存的数据!")
return
if not filename:
filename = f"douyin_posts_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
try:
# 用UTF-8 with BOM保存,避免Excel打开乱码
with open(filename, mode="w", newline="", encoding="utf-8-sig") as f:
writer = csv.DictWriter(f, fieldnames=data[0].keys())
writer.writeheader()
writer.writerows(data)
print(f"数据保存成功!文件路径:{filename}")
except Exception as e:
print(f"保存CSV失败:{str(e)}")
def main():
print("=" * 60)
print("抖音抓包后API响应解析工具(仅用于学习API结构)")
print("=" * 60)
print("\n使用步骤:")
print("1. 完成前置抓包,拿到「用户发布视频列表」接口的完整响应JSON;")
print("2. 把JSON复制到当前目录下的「response.json」文件中;")
print("3. 运行本工具即可自动解析并保存为CSV。")
print("=" * 60 + "\n")
# 读取本地JSON文件
try:
with open("response.json", mode="r", encoding="utf-8") as f:
raw_response = json.load(f)
except FileNotFoundError:
print("❌ 未找到「response.json」文件,请按使用步骤操作!")
return
except json.JSONDecodeError:
print("❌ 「response.json」格式错误,请检查是否是有效的JSON!")
return
# 解析数据
parser = DouyinDataParser()
posts = parser.extract_user_posts(raw_response)
print(f"\n✅ 成功解析 {len(posts)} 条视频数据!")
# 保存到CSV
if posts:
parser.save_to_csv(posts)
if __name__ == "__main__":
main()