feat: 支持下载 cc 字幕 (#234)
This commit is contained in:
@@ -25,6 +25,7 @@ mod danmaku;
|
||||
mod error;
|
||||
mod favorite_list;
|
||||
mod submission;
|
||||
mod subtitle;
|
||||
mod video;
|
||||
mod watch_later;
|
||||
|
||||
@@ -197,4 +198,26 @@ mod tests {
|
||||
assert!(videos.iter().all(|v| matches!(v, VideoInfo::Submission { .. })));
|
||||
assert!(videos.iter().rev().is_sorted_by_key(|v| v.release_datetime()));
|
||||
}
|
||||
|
||||
#[ignore = "only for manual test"]
|
||||
#[tokio::test]
|
||||
async fn test_subtitle_parse() -> Result<()> {
|
||||
let bili_client = BiliClient::new();
|
||||
let Ok(Some(mixin_key)) = bili_client.wbi_img().await.map(|wbi_img| wbi_img.into()) else {
|
||||
panic!("获取 mixin key 失败");
|
||||
};
|
||||
set_global_mixin_key(mixin_key);
|
||||
let video = Video::new(&bili_client, "BV1gLfnY8E6D".to_string());
|
||||
let pages = video.get_pages().await?;
|
||||
println!("pages: {:?}", pages);
|
||||
let subtitles = video.get_subtitles(&pages[0]).await?;
|
||||
for subtitle in subtitles {
|
||||
println!(
|
||||
"{}: {}",
|
||||
subtitle.lan,
|
||||
subtitle.body.to_string().chars().take(200).collect::<String>()
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
75
crates/bili_sync/src/bilibili/subtitle.rs
Normal file
75
crates/bili_sync/src/bilibili/subtitle.rs
Normal file
@@ -0,0 +1,75 @@
|
||||
use std::fmt::Display;
|
||||
|
||||
#[derive(Debug, serde::Deserialize)]
|
||||
pub struct SubTitlesInfo {
|
||||
pub subtitles: Vec<SubTitleInfo>,
|
||||
}
|
||||
|
||||
#[derive(Debug, serde::Deserialize)]
|
||||
pub struct SubTitleInfo {
|
||||
pub lan: String,
|
||||
pub subtitle_url: String,
|
||||
}
|
||||
|
||||
pub struct SubTitle {
|
||||
pub lan: String,
|
||||
pub body: SubTitleBody,
|
||||
}
|
||||
|
||||
#[derive(Debug, serde::Deserialize)]
|
||||
pub struct SubTitleBody(pub Vec<SubTitleItem>);
|
||||
|
||||
#[derive(Debug, serde::Deserialize)]
|
||||
pub struct SubTitleItem {
|
||||
from: f64,
|
||||
to: f64,
|
||||
content: String,
|
||||
}
|
||||
|
||||
impl SubTitleInfo {
|
||||
pub fn is_ai_sub(&self) -> bool {
|
||||
// ai: aisubtitle.hdslb.com/bfs/ai_subtitle/xxxx
|
||||
// 非 ai: aisubtitle.hdslb.com/bfs/subtitle/xxxx
|
||||
self.subtitle_url.contains("ai_subtitle")
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for SubTitleBody {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
for (idx, item) in self.0.iter().enumerate() {
|
||||
writeln!(f, "{}", idx)?;
|
||||
writeln!(f, "{} --> {}", format_time(item.from), format_time(item.to))?;
|
||||
writeln!(f, "{}", item.content)?;
|
||||
writeln!(f)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn format_time(time: f64) -> String {
|
||||
let (second, millisecond) = (time.trunc(), (time.fract() * 1e3) as u32);
|
||||
let (hour, minute, second) = (
|
||||
(second / 3600.0) as u32,
|
||||
((second % 3600.0) / 60.0) as u32,
|
||||
(second % 60.0) as u32,
|
||||
);
|
||||
format!("{:02}:{:02}:{:02},{:03}", hour, minute, second, millisecond)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn test_format_time() {
|
||||
// float 解析会有精度问题,但误差几毫秒应该不太关键
|
||||
// 想再健壮一点就得手写 serde_json 解析拆分秒和毫秒,然后分别处理了
|
||||
let testcases = [
|
||||
(0.0, "00:00:00,000"),
|
||||
(1.5, "00:00:01,500"),
|
||||
(206.45, "00:03:26,449"),
|
||||
(360001.23, "100:00:01,229"),
|
||||
];
|
||||
for (time, expect) in testcases.iter() {
|
||||
assert_eq!(super::format_time(*time), *expect);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -8,6 +8,7 @@ use crate::bilibili::analyzer::PageAnalyzer;
|
||||
use crate::bilibili::client::BiliClient;
|
||||
use crate::bilibili::credential::encoded_query;
|
||||
use crate::bilibili::danmaku::{DanmakuElem, DanmakuWriter, DmSegMobileReply};
|
||||
use crate::bilibili::subtitle::{SubTitle, SubTitleBody, SubTitleInfo, SubTitlesInfo};
|
||||
use crate::bilibili::{Validate, VideoInfo, MIXIN_KEY};
|
||||
|
||||
static MASK_CODE: u64 = 2251799813685247;
|
||||
@@ -164,6 +165,44 @@ impl<'a> Video<'a> {
|
||||
.validate()?;
|
||||
Ok(PageAnalyzer::new(res["data"].take()))
|
||||
}
|
||||
|
||||
pub async fn get_subtitles(&self, page: &PageInfo) -> Result<Vec<SubTitle>> {
|
||||
let mut res = self
|
||||
.client
|
||||
.request(Method::GET, "https://api.bilibili.com/x/player/wbi/v2")
|
||||
.await
|
||||
.query(&encoded_query(
|
||||
vec![("cid", &page.cid.to_string()), ("bvid", &self.bvid), ("aid", &self.aid)],
|
||||
MIXIN_KEY.load().as_deref(),
|
||||
))
|
||||
.send()
|
||||
.await?
|
||||
.json::<serde_json::Value>()
|
||||
.await?
|
||||
.validate()?;
|
||||
// 接口返回的信息,包含了一系列的字幕,每个字幕包含了字幕的语言和 json 下载地址
|
||||
let subtitles_info: SubTitlesInfo = serde_json::from_value(res["data"]["subtitle"].take())?;
|
||||
let tasks = subtitles_info
|
||||
.subtitles
|
||||
.into_iter()
|
||||
.filter(|v| !v.is_ai_sub())
|
||||
.map(|v| self.get_subtitle(v))
|
||||
.collect::<FuturesUnordered<_>>();
|
||||
tasks.try_collect().await
|
||||
}
|
||||
|
||||
async fn get_subtitle(&self, info: SubTitleInfo) -> Result<SubTitle> {
|
||||
let mut res = self
|
||||
.client
|
||||
.client // 这里可以直接使用 inner_client,因为该请求不需要鉴权
|
||||
.request(Method::GET, format!("https:{}", &info.subtitle_url).as_str(), None)
|
||||
.send()
|
||||
.await?
|
||||
.json::<serde_json::Value>()
|
||||
.await?;
|
||||
let body: SubTitleBody = serde_json::from_value(res["body"].take())?;
|
||||
Ok(SubTitle { lan: info.lan, body })
|
||||
}
|
||||
}
|
||||
|
||||
fn bvid_to_aid(bvid: &str) -> u64 {
|
||||
|
||||
@@ -114,7 +114,7 @@ impl From<VideoStatus> for u32 {
|
||||
}
|
||||
}
|
||||
|
||||
/// 包含四个子任务,从前到后分别是:视频封面、视频内容、视频信息、视频弹幕
|
||||
/// 包含五个子任务,从前到后分别是:视频封面、视频内容、视频信息、视频弹幕、视频字幕
|
||||
#[derive(Clone)]
|
||||
pub struct PageStatus(Status);
|
||||
|
||||
@@ -124,11 +124,11 @@ impl PageStatus {
|
||||
}
|
||||
|
||||
pub fn should_run(&self) -> Vec<bool> {
|
||||
self.0.should_run(4)
|
||||
self.0.should_run(5)
|
||||
}
|
||||
|
||||
pub fn update_status(&mut self, result: &[Result<()>]) {
|
||||
assert!(result.len() == 4, "PageStatus should have 4 status");
|
||||
assert!(result.len() == 5, "PageStatus should have 5 status");
|
||||
self.0.update_status(result)
|
||||
}
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ use std::pin::Pin;
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use bili_sync_entity::*;
|
||||
use futures::stream::{FuturesOrdered, FuturesUnordered};
|
||||
use futures::{Future, Stream, StreamExt};
|
||||
use futures::{Future, Stream, StreamExt, TryStreamExt};
|
||||
use sea_orm::entity::prelude::*;
|
||||
use sea_orm::ActiveValue::Set;
|
||||
use sea_orm::TransactionTrait;
|
||||
@@ -369,13 +369,14 @@ pub async fn download_page(
|
||||
let seprate_status = status.should_run();
|
||||
let is_single_page = video_model.single_page.context("single_page is null")?;
|
||||
let base_name = TEMPLATE.path_safe_render("page", &page_format_args(video_model, &page_model))?;
|
||||
let (poster_path, video_path, nfo_path, danmaku_path, fanart_path) = if is_single_page {
|
||||
let (poster_path, video_path, nfo_path, danmaku_path, fanart_path, subtitle_path) = if is_single_page {
|
||||
(
|
||||
base_path.join(format!("{}-poster.jpg", &base_name)),
|
||||
base_path.join(format!("{}.mp4", &base_name)),
|
||||
base_path.join(format!("{}.nfo", &base_name)),
|
||||
base_path.join(format!("{}.zh-CN.default.ass", &base_name)),
|
||||
Some(base_path.join(format!("{}-fanart.jpg", &base_name))),
|
||||
base_path.join(format!("{}.srt", &base_name)),
|
||||
)
|
||||
} else {
|
||||
(
|
||||
@@ -393,6 +394,9 @@ pub async fn download_page(
|
||||
.join(format!("{} - S01E{:0>2}.zh-CN.default.ass", &base_name, page_model.pid)),
|
||||
// 对于多页视频,会在上一步 fetch_video_poster 中获取剧集的 fanart,无需在此处下载单集的
|
||||
None,
|
||||
base_path
|
||||
.join("Season 1")
|
||||
.join(format!("{} - S01E{:0>2}.srt", &base_name, page_model.pid)),
|
||||
)
|
||||
};
|
||||
let dimension = match (page_model.width, page_model.height) {
|
||||
@@ -434,13 +438,20 @@ pub async fn download_page(
|
||||
&page_info,
|
||||
danmaku_path,
|
||||
)),
|
||||
Box::pin(fetch_page_subtitle(
|
||||
seprate_status[4],
|
||||
bili_client,
|
||||
video_model,
|
||||
&page_info,
|
||||
&subtitle_path,
|
||||
)),
|
||||
];
|
||||
let tasks: FuturesOrdered<_> = tasks.into_iter().collect();
|
||||
let results: Vec<Result<()>> = tasks.collect().await;
|
||||
status.update_status(&results);
|
||||
results
|
||||
.iter()
|
||||
.zip(["封面", "视频", "详情", "弹幕"])
|
||||
.zip(["封面", "视频", "详情", "弹幕", "字幕"])
|
||||
.for_each(|(res, task_name)| match res {
|
||||
Ok(_) => info!(
|
||||
"处理视频「{}」第 {} 页{}成功",
|
||||
@@ -553,6 +564,29 @@ pub async fn fetch_page_danmaku(
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn fetch_page_subtitle(
|
||||
should_run: bool,
|
||||
bili_client: &BiliClient,
|
||||
video_model: &video::Model,
|
||||
page_info: &PageInfo,
|
||||
subtitle_path: &Path,
|
||||
) -> Result<()> {
|
||||
if !should_run {
|
||||
return Ok(());
|
||||
}
|
||||
let bili_video = Video::new(bili_client, video_model.bvid.clone());
|
||||
let subtitles = bili_video.get_subtitles(page_info).await?;
|
||||
let tasks = subtitles
|
||||
.into_iter()
|
||||
.map(|subtitle| async move {
|
||||
let path = subtitle_path.with_extension(format!("{}.srt", subtitle.lan));
|
||||
tokio::fs::write(path, subtitle.body.to_string()).await
|
||||
})
|
||||
.collect::<FuturesUnordered<_>>();
|
||||
tasks.try_collect::<Vec<()>>().await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn generate_page_nfo(
|
||||
should_run: bool,
|
||||
video_model: &video::Model,
|
||||
|
||||
Reference in New Issue
Block a user