diff --git a/crates/bili_sync/src/bilibili/mod.rs b/crates/bili_sync/src/bilibili/mod.rs index bc31c19..3cd81c5 100644 --- a/crates/bili_sync/src/bilibili/mod.rs +++ b/crates/bili_sync/src/bilibili/mod.rs @@ -25,6 +25,7 @@ mod danmaku; mod error; mod favorite_list; mod submission; +mod subtitle; mod video; mod watch_later; @@ -197,4 +198,26 @@ mod tests { assert!(videos.iter().all(|v| matches!(v, VideoInfo::Submission { .. }))); assert!(videos.iter().rev().is_sorted_by_key(|v| v.release_datetime())); } + + #[ignore = "only for manual test"] + #[tokio::test] + async fn test_subtitle_parse() -> Result<()> { + let bili_client = BiliClient::new(); + let Ok(Some(mixin_key)) = bili_client.wbi_img().await.map(|wbi_img| wbi_img.into()) else { + panic!("获取 mixin key 失败"); + }; + set_global_mixin_key(mixin_key); + let video = Video::new(&bili_client, "BV1gLfnY8E6D".to_string()); + let pages = video.get_pages().await?; + println!("pages: {:?}", pages); + let subtitles = video.get_subtitles(&pages[0]).await?; + for subtitle in subtitles { + println!( + "{}: {}", + subtitle.lan, + subtitle.body.to_string().chars().take(200).collect::() + ); + } + Ok(()) + } } diff --git a/crates/bili_sync/src/bilibili/subtitle.rs b/crates/bili_sync/src/bilibili/subtitle.rs new file mode 100644 index 0000000..b553f7f --- /dev/null +++ b/crates/bili_sync/src/bilibili/subtitle.rs @@ -0,0 +1,75 @@ +use std::fmt::Display; + +#[derive(Debug, serde::Deserialize)] +pub struct SubTitlesInfo { + pub subtitles: Vec, +} + +#[derive(Debug, serde::Deserialize)] +pub struct SubTitleInfo { + pub lan: String, + pub subtitle_url: String, +} + +pub struct SubTitle { + pub lan: String, + pub body: SubTitleBody, +} + +#[derive(Debug, serde::Deserialize)] +pub struct SubTitleBody(pub Vec); + +#[derive(Debug, serde::Deserialize)] +pub struct SubTitleItem { + from: f64, + to: f64, + content: String, +} + +impl SubTitleInfo { + pub fn is_ai_sub(&self) -> bool { + // ai: aisubtitle.hdslb.com/bfs/ai_subtitle/xxxx + // 非 ai: aisubtitle.hdslb.com/bfs/subtitle/xxxx + self.subtitle_url.contains("ai_subtitle") + } +} + +impl Display for SubTitleBody { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for (idx, item) in self.0.iter().enumerate() { + writeln!(f, "{}", idx)?; + writeln!(f, "{} --> {}", format_time(item.from), format_time(item.to))?; + writeln!(f, "{}", item.content)?; + writeln!(f)?; + } + Ok(()) + } +} + +fn format_time(time: f64) -> String { + let (second, millisecond) = (time.trunc(), (time.fract() * 1e3) as u32); + let (hour, minute, second) = ( + (second / 3600.0) as u32, + ((second % 3600.0) / 60.0) as u32, + (second % 60.0) as u32, + ); + format!("{:02}:{:02}:{:02},{:03}", hour, minute, second, millisecond) +} + +#[cfg(test)] +mod tests { + #[test] + fn test_format_time() { + // float 解析会有精度问题,但误差几毫秒应该不太关键 + // 想再健壮一点就得手写 serde_json 解析拆分秒和毫秒,然后分别处理了 + let testcases = [ + (0.0, "00:00:00,000"), + (1.5, "00:00:01,500"), + (206.45, "00:03:26,449"), + (360001.23, "100:00:01,229"), + ]; + for (time, expect) in testcases.iter() { + assert_eq!(super::format_time(*time), *expect); + } + } +} diff --git a/crates/bili_sync/src/bilibili/video.rs b/crates/bili_sync/src/bilibili/video.rs index d4ec0e6..ca5bfe0 100644 --- a/crates/bili_sync/src/bilibili/video.rs +++ b/crates/bili_sync/src/bilibili/video.rs @@ -8,6 +8,7 @@ use crate::bilibili::analyzer::PageAnalyzer; use crate::bilibili::client::BiliClient; use crate::bilibili::credential::encoded_query; use crate::bilibili::danmaku::{DanmakuElem, DanmakuWriter, DmSegMobileReply}; +use crate::bilibili::subtitle::{SubTitle, SubTitleBody, SubTitleInfo, SubTitlesInfo}; use crate::bilibili::{Validate, VideoInfo, MIXIN_KEY}; static MASK_CODE: u64 = 2251799813685247; @@ -164,6 +165,44 @@ impl<'a> Video<'a> { .validate()?; Ok(PageAnalyzer::new(res["data"].take())) } + + pub async fn get_subtitles(&self, page: &PageInfo) -> Result> { + let mut res = self + .client + .request(Method::GET, "https://api.bilibili.com/x/player/wbi/v2") + .await + .query(&encoded_query( + vec![("cid", &page.cid.to_string()), ("bvid", &self.bvid), ("aid", &self.aid)], + MIXIN_KEY.load().as_deref(), + )) + .send() + .await? + .json::() + .await? + .validate()?; + // 接口返回的信息,包含了一系列的字幕,每个字幕包含了字幕的语言和 json 下载地址 + let subtitles_info: SubTitlesInfo = serde_json::from_value(res["data"]["subtitle"].take())?; + let tasks = subtitles_info + .subtitles + .into_iter() + .filter(|v| !v.is_ai_sub()) + .map(|v| self.get_subtitle(v)) + .collect::>(); + tasks.try_collect().await + } + + async fn get_subtitle(&self, info: SubTitleInfo) -> Result { + let mut res = self + .client + .client // 这里可以直接使用 inner_client,因为该请求不需要鉴权 + .request(Method::GET, format!("https:{}", &info.subtitle_url).as_str(), None) + .send() + .await? + .json::() + .await?; + let body: SubTitleBody = serde_json::from_value(res["body"].take())?; + Ok(SubTitle { lan: info.lan, body }) + } } fn bvid_to_aid(bvid: &str) -> u64 { diff --git a/crates/bili_sync/src/utils/status.rs b/crates/bili_sync/src/utils/status.rs index 01cc81a..4ab1ba5 100644 --- a/crates/bili_sync/src/utils/status.rs +++ b/crates/bili_sync/src/utils/status.rs @@ -114,7 +114,7 @@ impl From for u32 { } } -/// 包含四个子任务,从前到后分别是:视频封面、视频内容、视频信息、视频弹幕 +/// 包含五个子任务,从前到后分别是:视频封面、视频内容、视频信息、视频弹幕、视频字幕 #[derive(Clone)] pub struct PageStatus(Status); @@ -124,11 +124,11 @@ impl PageStatus { } pub fn should_run(&self) -> Vec { - self.0.should_run(4) + self.0.should_run(5) } pub fn update_status(&mut self, result: &[Result<()>]) { - assert!(result.len() == 4, "PageStatus should have 4 status"); + assert!(result.len() == 5, "PageStatus should have 5 status"); self.0.update_status(result) } diff --git a/crates/bili_sync/src/workflow.rs b/crates/bili_sync/src/workflow.rs index 7022f3f..d631b7d 100644 --- a/crates/bili_sync/src/workflow.rs +++ b/crates/bili_sync/src/workflow.rs @@ -5,7 +5,7 @@ use std::pin::Pin; use anyhow::{anyhow, bail, Context, Result}; use bili_sync_entity::*; use futures::stream::{FuturesOrdered, FuturesUnordered}; -use futures::{Future, Stream, StreamExt}; +use futures::{Future, Stream, StreamExt, TryStreamExt}; use sea_orm::entity::prelude::*; use sea_orm::ActiveValue::Set; use sea_orm::TransactionTrait; @@ -369,13 +369,14 @@ pub async fn download_page( let seprate_status = status.should_run(); let is_single_page = video_model.single_page.context("single_page is null")?; let base_name = TEMPLATE.path_safe_render("page", &page_format_args(video_model, &page_model))?; - let (poster_path, video_path, nfo_path, danmaku_path, fanart_path) = if is_single_page { + let (poster_path, video_path, nfo_path, danmaku_path, fanart_path, subtitle_path) = if is_single_page { ( base_path.join(format!("{}-poster.jpg", &base_name)), base_path.join(format!("{}.mp4", &base_name)), base_path.join(format!("{}.nfo", &base_name)), base_path.join(format!("{}.zh-CN.default.ass", &base_name)), Some(base_path.join(format!("{}-fanart.jpg", &base_name))), + base_path.join(format!("{}.srt", &base_name)), ) } else { ( @@ -393,6 +394,9 @@ pub async fn download_page( .join(format!("{} - S01E{:0>2}.zh-CN.default.ass", &base_name, page_model.pid)), // 对于多页视频,会在上一步 fetch_video_poster 中获取剧集的 fanart,无需在此处下载单集的 None, + base_path + .join("Season 1") + .join(format!("{} - S01E{:0>2}.srt", &base_name, page_model.pid)), ) }; let dimension = match (page_model.width, page_model.height) { @@ -434,13 +438,20 @@ pub async fn download_page( &page_info, danmaku_path, )), + Box::pin(fetch_page_subtitle( + seprate_status[4], + bili_client, + video_model, + &page_info, + &subtitle_path, + )), ]; let tasks: FuturesOrdered<_> = tasks.into_iter().collect(); let results: Vec> = tasks.collect().await; status.update_status(&results); results .iter() - .zip(["封面", "视频", "详情", "弹幕"]) + .zip(["封面", "视频", "详情", "弹幕", "字幕"]) .for_each(|(res, task_name)| match res { Ok(_) => info!( "处理视频「{}」第 {} 页{}成功", @@ -553,6 +564,29 @@ pub async fn fetch_page_danmaku( .await } +pub async fn fetch_page_subtitle( + should_run: bool, + bili_client: &BiliClient, + video_model: &video::Model, + page_info: &PageInfo, + subtitle_path: &Path, +) -> Result<()> { + if !should_run { + return Ok(()); + } + let bili_video = Video::new(bili_client, video_model.bvid.clone()); + let subtitles = bili_video.get_subtitles(page_info).await?; + let tasks = subtitles + .into_iter() + .map(|subtitle| async move { + let path = subtitle_path.with_extension(format!("{}.srt", subtitle.lan)); + tokio::fs::write(path, subtitle.body.to_string()).await + }) + .collect::>(); + tasks.try_collect::>().await?; + Ok(()) +} + pub async fn generate_page_nfo( should_run: bool, video_model: &video::Model,