feat: 支持下载 cc 字幕 (#234)

This commit is contained in:
ᴀᴍᴛᴏᴀᴇʀ
2025-01-30 01:20:53 +08:00
committed by GitHub
parent 802565e4f6
commit cc7f773300
5 changed files with 177 additions and 6 deletions

View File

@@ -25,6 +25,7 @@ mod danmaku;
mod error;
mod favorite_list;
mod submission;
mod subtitle;
mod video;
mod watch_later;
@@ -197,4 +198,26 @@ mod tests {
assert!(videos.iter().all(|v| matches!(v, VideoInfo::Submission { .. })));
assert!(videos.iter().rev().is_sorted_by_key(|v| v.release_datetime()));
}
#[ignore = "only for manual test"]
#[tokio::test]
async fn test_subtitle_parse() -> Result<()> {
let bili_client = BiliClient::new();
let Ok(Some(mixin_key)) = bili_client.wbi_img().await.map(|wbi_img| wbi_img.into()) else {
panic!("获取 mixin key 失败");
};
set_global_mixin_key(mixin_key);
let video = Video::new(&bili_client, "BV1gLfnY8E6D".to_string());
let pages = video.get_pages().await?;
println!("pages: {:?}", pages);
let subtitles = video.get_subtitles(&pages[0]).await?;
for subtitle in subtitles {
println!(
"{}: {}",
subtitle.lan,
subtitle.body.to_string().chars().take(200).collect::<String>()
);
}
Ok(())
}
}

View File

@@ -0,0 +1,75 @@
use std::fmt::Display;
#[derive(Debug, serde::Deserialize)]
pub struct SubTitlesInfo {
pub subtitles: Vec<SubTitleInfo>,
}
#[derive(Debug, serde::Deserialize)]
pub struct SubTitleInfo {
pub lan: String,
pub subtitle_url: String,
}
pub struct SubTitle {
pub lan: String,
pub body: SubTitleBody,
}
#[derive(Debug, serde::Deserialize)]
pub struct SubTitleBody(pub Vec<SubTitleItem>);
#[derive(Debug, serde::Deserialize)]
pub struct SubTitleItem {
from: f64,
to: f64,
content: String,
}
impl SubTitleInfo {
pub fn is_ai_sub(&self) -> bool {
// ai aisubtitle.hdslb.com/bfs/ai_subtitle/xxxx
// 非 ai aisubtitle.hdslb.com/bfs/subtitle/xxxx
self.subtitle_url.contains("ai_subtitle")
}
}
impl Display for SubTitleBody {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for (idx, item) in self.0.iter().enumerate() {
writeln!(f, "{}", idx)?;
writeln!(f, "{} --> {}", format_time(item.from), format_time(item.to))?;
writeln!(f, "{}", item.content)?;
writeln!(f)?;
}
Ok(())
}
}
fn format_time(time: f64) -> String {
let (second, millisecond) = (time.trunc(), (time.fract() * 1e3) as u32);
let (hour, minute, second) = (
(second / 3600.0) as u32,
((second % 3600.0) / 60.0) as u32,
(second % 60.0) as u32,
);
format!("{:02}:{:02}:{:02},{:03}", hour, minute, second, millisecond)
}
#[cfg(test)]
mod tests {
#[test]
fn test_format_time() {
// float 解析会有精度问题,但误差几毫秒应该不太关键
// 想再健壮一点就得手写 serde_json 解析拆分秒和毫秒,然后分别处理了
let testcases = [
(0.0, "00:00:00,000"),
(1.5, "00:00:01,500"),
(206.45, "00:03:26,449"),
(360001.23, "100:00:01,229"),
];
for (time, expect) in testcases.iter() {
assert_eq!(super::format_time(*time), *expect);
}
}
}

View File

@@ -8,6 +8,7 @@ use crate::bilibili::analyzer::PageAnalyzer;
use crate::bilibili::client::BiliClient;
use crate::bilibili::credential::encoded_query;
use crate::bilibili::danmaku::{DanmakuElem, DanmakuWriter, DmSegMobileReply};
use crate::bilibili::subtitle::{SubTitle, SubTitleBody, SubTitleInfo, SubTitlesInfo};
use crate::bilibili::{Validate, VideoInfo, MIXIN_KEY};
static MASK_CODE: u64 = 2251799813685247;
@@ -164,6 +165,44 @@ impl<'a> Video<'a> {
.validate()?;
Ok(PageAnalyzer::new(res["data"].take()))
}
pub async fn get_subtitles(&self, page: &PageInfo) -> Result<Vec<SubTitle>> {
let mut res = self
.client
.request(Method::GET, "https://api.bilibili.com/x/player/wbi/v2")
.await
.query(&encoded_query(
vec![("cid", &page.cid.to_string()), ("bvid", &self.bvid), ("aid", &self.aid)],
MIXIN_KEY.load().as_deref(),
))
.send()
.await?
.json::<serde_json::Value>()
.await?
.validate()?;
// 接口返回的信息,包含了一系列的字幕,每个字幕包含了字幕的语言和 json 下载地址
let subtitles_info: SubTitlesInfo = serde_json::from_value(res["data"]["subtitle"].take())?;
let tasks = subtitles_info
.subtitles
.into_iter()
.filter(|v| !v.is_ai_sub())
.map(|v| self.get_subtitle(v))
.collect::<FuturesUnordered<_>>();
tasks.try_collect().await
}
async fn get_subtitle(&self, info: SubTitleInfo) -> Result<SubTitle> {
let mut res = self
.client
.client // 这里可以直接使用 inner_client因为该请求不需要鉴权
.request(Method::GET, format!("https:{}", &info.subtitle_url).as_str(), None)
.send()
.await?
.json::<serde_json::Value>()
.await?;
let body: SubTitleBody = serde_json::from_value(res["body"].take())?;
Ok(SubTitle { lan: info.lan, body })
}
}
fn bvid_to_aid(bvid: &str) -> u64 {

View File

@@ -114,7 +114,7 @@ impl From<VideoStatus> for u32 {
}
}
/// 包含个子任务,从前到后分别是:视频封面、视频内容、视频信息、视频弹幕
/// 包含个子任务,从前到后分别是:视频封面、视频内容、视频信息、视频弹幕、视频字幕
#[derive(Clone)]
pub struct PageStatus(Status);
@@ -124,11 +124,11 @@ impl PageStatus {
}
pub fn should_run(&self) -> Vec<bool> {
self.0.should_run(4)
self.0.should_run(5)
}
pub fn update_status(&mut self, result: &[Result<()>]) {
assert!(result.len() == 4, "PageStatus should have 4 status");
assert!(result.len() == 5, "PageStatus should have 5 status");
self.0.update_status(result)
}

View File

@@ -5,7 +5,7 @@ use std::pin::Pin;
use anyhow::{anyhow, bail, Context, Result};
use bili_sync_entity::*;
use futures::stream::{FuturesOrdered, FuturesUnordered};
use futures::{Future, Stream, StreamExt};
use futures::{Future, Stream, StreamExt, TryStreamExt};
use sea_orm::entity::prelude::*;
use sea_orm::ActiveValue::Set;
use sea_orm::TransactionTrait;
@@ -369,13 +369,14 @@ pub async fn download_page(
let seprate_status = status.should_run();
let is_single_page = video_model.single_page.context("single_page is null")?;
let base_name = TEMPLATE.path_safe_render("page", &page_format_args(video_model, &page_model))?;
let (poster_path, video_path, nfo_path, danmaku_path, fanart_path) = if is_single_page {
let (poster_path, video_path, nfo_path, danmaku_path, fanart_path, subtitle_path) = if is_single_page {
(
base_path.join(format!("{}-poster.jpg", &base_name)),
base_path.join(format!("{}.mp4", &base_name)),
base_path.join(format!("{}.nfo", &base_name)),
base_path.join(format!("{}.zh-CN.default.ass", &base_name)),
Some(base_path.join(format!("{}-fanart.jpg", &base_name))),
base_path.join(format!("{}.srt", &base_name)),
)
} else {
(
@@ -393,6 +394,9 @@ pub async fn download_page(
.join(format!("{} - S01E{:0>2}.zh-CN.default.ass", &base_name, page_model.pid)),
// 对于多页视频,会在上一步 fetch_video_poster 中获取剧集的 fanart无需在此处下载单集的
None,
base_path
.join("Season 1")
.join(format!("{} - S01E{:0>2}.srt", &base_name, page_model.pid)),
)
};
let dimension = match (page_model.width, page_model.height) {
@@ -434,13 +438,20 @@ pub async fn download_page(
&page_info,
danmaku_path,
)),
Box::pin(fetch_page_subtitle(
seprate_status[4],
bili_client,
video_model,
&page_info,
&subtitle_path,
)),
];
let tasks: FuturesOrdered<_> = tasks.into_iter().collect();
let results: Vec<Result<()>> = tasks.collect().await;
status.update_status(&results);
results
.iter()
.zip(["封面", "视频", "详情", "弹幕"])
.zip(["封面", "视频", "详情", "弹幕", "字幕"])
.for_each(|(res, task_name)| match res {
Ok(_) => info!(
"处理视频「{}」第 {} 页{}成功",
@@ -553,6 +564,29 @@ pub async fn fetch_page_danmaku(
.await
}
pub async fn fetch_page_subtitle(
should_run: bool,
bili_client: &BiliClient,
video_model: &video::Model,
page_info: &PageInfo,
subtitle_path: &Path,
) -> Result<()> {
if !should_run {
return Ok(());
}
let bili_video = Video::new(bili_client, video_model.bvid.clone());
let subtitles = bili_video.get_subtitles(page_info).await?;
let tasks = subtitles
.into_iter()
.map(|subtitle| async move {
let path = subtitle_path.with_extension(format!("{}.srt", subtitle.lan));
tokio::fs::write(path, subtitle.body.to_string()).await
})
.collect::<FuturesUnordered<_>>();
tasks.try_collect::<Vec<()>>().await?;
Ok(())
}
pub async fn generate_page_nfo(
should_run: bool,
video_model: &video::Model,