feat: 大范围重构,支持视频合集下载 (#97)

This commit is contained in:
ᴀᴍᴛᴏᴀᴇʀ
2024-07-03 18:57:12 +08:00
committed by GitHub
parent 097f885050
commit 4c9ad2318c
27 changed files with 1545 additions and 446 deletions

View File

@@ -0,0 +1,131 @@
use sea_orm::ActiveValue::NotSet;
use sea_orm::{IntoActiveModel, Set};
use serde_json::json;
use crate::bilibili::VideoInfo;
use crate::utils::id_time_key;
impl VideoInfo {
/// 将 VideoInfo 转换为 ActiveModel
pub fn to_model(&self, base_model: Option<bili_sync_entity::video::Model>) -> bili_sync_entity::video::ActiveModel {
let base_model = match base_model {
Some(base_model) => base_model.into_active_model(),
None => {
let mut tmp_model = bili_sync_entity::video::Model::default().into_active_model();
// 注意此处要把 id 设置成 NotSet否则 id 会是 Unchanged(0)
tmp_model.id = NotSet;
tmp_model
}
};
match self {
VideoInfo::Simple {
bvid,
cover,
ctime,
pubtime,
} => bili_sync_entity::video::ActiveModel {
bvid: Set(bvid.clone()),
cover: Set(cover.clone()),
ctime: Set(ctime.naive_utc()),
pubtime: Set(pubtime.naive_utc()),
category: Set(2), // 视频合集里的内容类型肯定是视频
valid: Set(true),
..base_model
},
VideoInfo::Detail {
title,
vtype,
bvid,
intro,
cover,
upper,
ctime,
fav_time,
pubtime,
attr,
} => bili_sync_entity::video::ActiveModel {
bvid: Set(bvid.clone()),
name: Set(title.clone()),
category: Set(*vtype),
intro: Set(intro.clone()),
cover: Set(cover.clone()),
ctime: Set(ctime.naive_utc()),
pubtime: Set(pubtime.naive_utc()),
favtime: Set(fav_time.naive_utc()),
download_status: Set(0),
valid: Set(*attr == 0),
tags: Set(None),
single_page: Set(None),
upper_id: Set(upper.mid),
upper_name: Set(upper.name.clone()),
upper_face: Set(upper.face.clone()),
..base_model
},
VideoInfo::View {
title,
bvid,
intro,
cover,
upper,
ctime,
pubtime,
state,
..
} => bili_sync_entity::video::ActiveModel {
bvid: Set(bvid.clone()),
name: Set(title.clone()),
category: Set(2), // 视频合集里的内容类型肯定是视频
intro: Set(intro.clone()),
cover: Set(cover.clone()),
ctime: Set(ctime.naive_utc()),
pubtime: Set(pubtime.naive_utc()),
favtime: Set(pubtime.naive_utc()), // 合集不包括 fav_time使用发布时间代替
download_status: Set(0),
valid: Set(*state == 0),
tags: Set(None),
single_page: Set(None),
upper_id: Set(upper.mid),
upper_name: Set(upper.name.clone()),
upper_face: Set(upper.face.clone()),
..base_model
},
}
}
pub fn to_fmt_args(&self) -> Option<serde_json::Value> {
match self {
VideoInfo::Simple { .. } => None, // 不能从简单的视频信息中构造格式化参数
VideoInfo::Detail { title, bvid, upper, .. } => Some(json!({
"bvid": &bvid,
"title": &title,
"upper_name": &upper.name,
"upper_mid": &upper.mid,
})),
VideoInfo::View { title, bvid, upper, .. } => Some(json!({
"bvid": &bvid,
"title": &title,
"upper_name": &upper.name,
"upper_mid": &upper.mid,
})),
}
}
pub fn video_key(&self) -> String {
match self {
// 对于合集没有 fav_time只能用 pubtime 代替
VideoInfo::Simple { bvid, pubtime, .. } => id_time_key(bvid, pubtime),
VideoInfo::Detail { bvid, fav_time, .. } => id_time_key(bvid, fav_time),
// 详情接口返回的数据仅用于填充详情,不会被作为 video_key
_ => unreachable!(),
}
}
pub fn bvid(&self) -> &str {
match self {
VideoInfo::Simple { bvid, .. } => bvid,
VideoInfo::Detail { bvid, .. } => bvid,
// 同上
_ => unreachable!(),
}
}
}

View File

@@ -0,0 +1,23 @@
pub mod convert;
pub mod model;
pub mod nfo;
pub mod status;
use chrono::{DateTime, Utc};
use tracing_subscriber::util::SubscriberInitExt;
pub fn init_logger(log_level: &str) {
tracing_subscriber::fmt::Subscriber::builder()
.with_env_filter(tracing_subscriber::EnvFilter::builder().parse_lossy(log_level))
.with_timer(tracing_subscriber::fmt::time::ChronoLocal::new(
"%Y-%m-%d %H:%M:%S%.3f".to_owned(),
))
.finish()
.try_init()
.expect("初始化日志失败");
}
/// 生成视频的唯一标记,均由 bvid 和时间戳构成
pub fn id_time_key(bvid: &String, time: &DateTime<Utc>) -> String {
format!("{}-{}", bvid, time.timestamp())
}

View File

@@ -0,0 +1,95 @@
use anyhow::Result;
use bili_sync_entity::*;
use bili_sync_migration::OnConflict;
use sea_orm::entity::prelude::*;
use sea_orm::ActiveValue::Set;
use crate::adapter::{unique_video_columns, VideoListModel};
use crate::bilibili::{PageInfo, VideoInfo};
/// 尝试创建 Video Model如果发生冲突则忽略
pub async fn create_videos(
videos_info: &[VideoInfo],
video_list_model: &dyn VideoListModel,
connection: &DatabaseConnection,
) -> Result<()> {
let video_models = videos_info
.iter()
.map(|v| video_list_model.video_model_by_info(v, None))
.collect::<Vec<_>>();
video::Entity::insert_many(video_models)
.on_conflict(OnConflict::columns(unique_video_columns()).do_nothing().to_owned())
.do_nothing()
.exec(connection)
.await?;
Ok(())
}
/// 创建视频的所有分 P
pub async fn create_video_pages(
pages_info: &[PageInfo],
video_model: &video::Model,
connection: &impl ConnectionTrait,
) -> Result<()> {
let page_models = pages_info
.iter()
.map(move |p| {
let (width, height) = match &p.dimension {
Some(d) => {
if d.rotate == 0 {
(Some(d.width), Some(d.height))
} else {
(Some(d.height), Some(d.width))
}
}
None => (None, None),
};
page::ActiveModel {
video_id: Set(video_model.id),
cid: Set(p.cid),
pid: Set(p.page),
name: Set(p.name.clone()),
width: Set(width),
height: Set(height),
duration: Set(p.duration),
image: Set(p.first_frame.clone()),
download_status: Set(0),
..Default::default()
}
})
.collect::<Vec<page::ActiveModel>>();
page::Entity::insert_many(page_models)
.on_conflict(
OnConflict::columns([page::Column::VideoId, page::Column::Pid])
.do_nothing()
.to_owned(),
)
.do_nothing()
.exec(connection)
.await?;
Ok(())
}
/// 更新视频 model 的下载状态
pub async fn update_videos_model(videos: Vec<video::ActiveModel>, connection: &DatabaseConnection) -> Result<()> {
video::Entity::insert_many(videos)
.on_conflict(
OnConflict::column(video::Column::Id)
.update_column(video::Column::DownloadStatus)
.to_owned(),
)
.exec(connection)
.await?;
Ok(())
}
/// 更新视频页 model 的下载状态
pub async fn update_pages_model(pages: Vec<page::ActiveModel>, connection: &DatabaseConnection) -> Result<()> {
let query = page::Entity::insert_many(pages).on_conflict(
OnConflict::column(page::Column::Id)
.update_columns([page::Column::DownloadStatus, page::Column::Path])
.to_owned(),
);
query.exec(connection).await?;
Ok(())
}

View File

@@ -0,0 +1,336 @@
use anyhow::Result;
use bili_sync_entity::*;
use quick_xml::events::{BytesCData, BytesText};
use quick_xml::writer::Writer;
use quick_xml::Error;
use tokio::io::AsyncWriteExt;
use crate::config::NFOTimeType;
#[allow(clippy::upper_case_acronyms)]
pub enum NFOMode {
MOVIE,
TVSHOW,
EPOSODE,
UPPER,
}
pub enum ModelWrapper<'a> {
Video(&'a video::Model),
Page(&'a page::Model),
}
pub struct NFOSerializer<'a>(pub ModelWrapper<'a>, pub NFOMode);
/// serde xml 似乎不太好用,先这么裸着写
/// (真是又臭又长啊
impl<'a> NFOSerializer<'a> {
pub async fn generate_nfo(self, nfo_time_type: &NFOTimeType) -> Result<String> {
let mut buffer = r#"<?xml version="1.0" encoding="utf-8" standalone="yes"?>
"#
.as_bytes()
.to_vec();
let mut tokio_buffer = tokio::io::BufWriter::new(&mut buffer);
let mut writer = Writer::new_with_indent(&mut tokio_buffer, b' ', 4);
match self {
NFOSerializer(ModelWrapper::Video(v), NFOMode::MOVIE) => {
let nfo_time = match nfo_time_type {
NFOTimeType::FavTime => v.favtime,
NFOTimeType::PubTime => v.pubtime,
};
writer
.create_element("movie")
.write_inner_content_async::<_, _, Error>(|writer| async move {
writer
.create_element("plot")
.write_cdata_content_async(BytesCData::new(&v.intro))
.await
.unwrap();
writer.create_element("outline").write_empty_async().await.unwrap();
writer
.create_element("title")
.write_text_content_async(BytesText::new(&v.name))
.await
.unwrap();
writer
.create_element("actor")
.write_inner_content_async::<_, _, Error>(|writer| async move {
writer
.create_element("name")
.write_text_content_async(BytesText::new(&v.upper_id.to_string()))
.await
.unwrap();
writer
.create_element("role")
.write_text_content_async(BytesText::new(&v.upper_name))
.await
.unwrap();
Ok(writer)
})
.await
.unwrap();
writer
.create_element("year")
.write_text_content_async(BytesText::new(&nfo_time.format("%Y").to_string()))
.await
.unwrap();
if let Some(tags) = &v.tags {
let tags: Vec<String> = serde_json::from_value(tags.clone()).unwrap();
for tag in tags {
writer
.create_element("genre")
.write_text_content_async(BytesText::new(&tag))
.await
.unwrap();
}
}
writer
.create_element("uniqueid")
.with_attribute(("type", "bilibili"))
.write_text_content_async(BytesText::new(&v.bvid))
.await
.unwrap();
writer
.create_element("aired")
.write_text_content_async(BytesText::new(&nfo_time.format("%Y-%m-%d").to_string()))
.await
.unwrap();
Ok(writer)
})
.await
.unwrap();
}
NFOSerializer(ModelWrapper::Video(v), NFOMode::TVSHOW) => {
let nfo_time = match nfo_time_type {
NFOTimeType::FavTime => v.favtime,
NFOTimeType::PubTime => v.pubtime,
};
writer
.create_element("tvshow")
.write_inner_content_async::<_, _, Error>(|writer| async move {
writer
.create_element("plot")
.write_cdata_content_async(BytesCData::new(&v.intro))
.await
.unwrap();
writer.create_element("outline").write_empty_async().await.unwrap();
writer
.create_element("title")
.write_text_content_async(BytesText::new(&v.name))
.await
.unwrap();
writer
.create_element("actor")
.write_inner_content_async::<_, _, Error>(|writer| async move {
writer
.create_element("name")
.write_text_content_async(BytesText::new(&v.upper_id.to_string()))
.await
.unwrap();
writer
.create_element("role")
.write_text_content_async(BytesText::new(&v.upper_name))
.await
.unwrap();
Ok(writer)
})
.await
.unwrap();
writer
.create_element("year")
.write_text_content_async(BytesText::new(&nfo_time.format("%Y").to_string()))
.await
.unwrap();
if let Some(tags) = &v.tags {
let tags: Vec<String> = serde_json::from_value(tags.clone()).unwrap();
for tag in tags {
writer
.create_element("genre")
.write_text_content_async(BytesText::new(&tag))
.await
.unwrap();
}
}
writer
.create_element("uniqueid")
.with_attribute(("type", "bilibili"))
.write_text_content_async(BytesText::new(&v.bvid))
.await
.unwrap();
writer
.create_element("aired")
.write_text_content_async(BytesText::new(&nfo_time.format("%Y-%m-%d").to_string()))
.await
.unwrap();
Ok(writer)
})
.await
.unwrap();
}
NFOSerializer(ModelWrapper::Video(v), NFOMode::UPPER) => {
writer
.create_element("person")
.write_inner_content_async::<_, _, Error>(|writer| async move {
writer.create_element("plot").write_empty_async().await.unwrap();
writer.create_element("outline").write_empty_async().await.unwrap();
writer
.create_element("lockdata")
.write_text_content_async(BytesText::new("false"))
.await
.unwrap();
writer
.create_element("dateadded")
.write_text_content_async(BytesText::new(
&v.pubtime.format("%Y-%m-%d %H:%M:%S").to_string(),
))
.await
.unwrap();
writer
.create_element("title")
.write_text_content_async(BytesText::new(&v.upper_id.to_string()))
.await
.unwrap();
writer
.create_element("sorttitle")
.write_text_content_async(BytesText::new(&v.upper_id.to_string()))
.await
.unwrap();
Ok(writer)
})
.await
.unwrap();
}
NFOSerializer(ModelWrapper::Page(p), NFOMode::EPOSODE) => {
writer
.create_element("episodedetails")
.write_inner_content_async::<_, _, Error>(|writer| async move {
writer.create_element("plot").write_empty_async().await.unwrap();
writer.create_element("outline").write_empty_async().await.unwrap();
writer
.create_element("title")
.write_text_content_async(BytesText::new(&p.name))
.await
.unwrap();
writer
.create_element("season")
.write_text_content_async(BytesText::new("1"))
.await
.unwrap();
writer
.create_element("episode")
.write_text_content_async(BytesText::new(&p.pid.to_string()))
.await
.unwrap();
Ok(writer)
})
.await
.unwrap();
}
_ => unreachable!(),
}
tokio_buffer.flush().await?;
Ok(std::str::from_utf8(&buffer).unwrap().to_owned())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_generate_nfo() {
let video = video::Model {
intro: "intro".to_string(),
name: "name".to_string(),
upper_id: 1,
upper_name: "upper_name".to_string(),
favtime: chrono::NaiveDateTime::new(
chrono::NaiveDate::from_ymd_opt(2022, 2, 2).unwrap(),
chrono::NaiveTime::from_hms_opt(2, 2, 2).unwrap(),
),
pubtime: chrono::NaiveDateTime::new(
chrono::NaiveDate::from_ymd_opt(2033, 3, 3).unwrap(),
chrono::NaiveTime::from_hms_opt(3, 3, 3).unwrap(),
),
bvid: "bvid".to_string(),
tags: Some(serde_json::json!(["tag1", "tag2"])),
..Default::default()
};
assert_eq!(
NFOSerializer(ModelWrapper::Video(&video), NFOMode::MOVIE)
.generate_nfo(&NFOTimeType::PubTime)
.await
.unwrap(),
r#"<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<movie>
<plot><![CDATA[intro]]></plot>
<outline/>
<title>name</title>
<actor>
<name>1</name>
<role>upper_name</role>
</actor>
<year>2033</year>
<genre>tag1</genre>
<genre>tag2</genre>
<uniqueid type="bilibili">bvid</uniqueid>
<aired>2033-03-03</aired>
</movie>"#,
);
assert_eq!(
NFOSerializer(ModelWrapper::Video(&video), NFOMode::TVSHOW)
.generate_nfo(&NFOTimeType::FavTime)
.await
.unwrap(),
r#"<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<tvshow>
<plot><![CDATA[intro]]></plot>
<outline/>
<title>name</title>
<actor>
<name>1</name>
<role>upper_name</role>
</actor>
<year>2022</year>
<genre>tag1</genre>
<genre>tag2</genre>
<uniqueid type="bilibili">bvid</uniqueid>
<aired>2022-02-02</aired>
</tvshow>"#,
);
assert_eq!(
NFOSerializer(ModelWrapper::Video(&video), NFOMode::UPPER)
.generate_nfo(&NFOTimeType::FavTime)
.await
.unwrap(),
r#"<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<person>
<plot/>
<outline/>
<lockdata>false</lockdata>
<dateadded>2033-03-03 03:03:03</dateadded>
<title>1</title>
<sorttitle>1</sorttitle>
</person>"#,
);
let page = page::Model {
name: "name".to_string(),
pid: 3,
..Default::default()
};
assert_eq!(
NFOSerializer(ModelWrapper::Page(&page), NFOMode::EPOSODE)
.generate_nfo(&NFOTimeType::FavTime)
.await
.unwrap(),
r#"<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<episodedetails>
<plot/>
<outline/>
<title>name</title>
<season>1</season>
<episode>3</episode>
</episodedetails>"#,
);
}
}

View File

@@ -0,0 +1,182 @@
use anyhow::Result;
static STATUS_MAX_RETRY: u32 = 0b100;
static STATUS_OK: u32 = 0b111;
/// 用来表示下载的状态,不想写太多列了,所以仅使用一个 u32 表示。
/// 从低位开始,固定每三位表示一种数据的状态,从 0b000 开始,每失败一次加一,最多 0b100即重试 4 次),
/// 如果成功,将对应的三位设置为 0b111。
/// 当所有任务都成功或者由于尝试次数过多失败,为 status 最高位打上标记 1将来不再继续尝试。
#[derive(Clone)]
pub struct Status(u32);
impl Status {
/// 如果 status 整体大于等于 1 << 31则表示任务已经被处理过不再需要重试。
/// 数据库可以使用 status < Status::handled() 来筛选需要处理的内容。
pub fn handled() -> u32 {
1 << 31
}
fn new(status: u32) -> Self {
Self(status)
}
/// 一般仅需要被内部调用,用来设置最高位的标记
fn set_flag(&mut self, handled: bool) {
if handled {
self.0 |= 1 << 31;
} else {
self.0 &= !(1 << 31);
}
}
/// 从低到高检查状态,如果该位置的任务应该继续尝试执行,则返回 true否则返回 false
fn should_run(&self, size: usize) -> Vec<bool> {
assert!(size < 10, "u32 can only store 10 status");
(0..size).map(|x| self.check_continue(x)).collect()
}
/// 如果任务的执行次数小于 STATUS_MAX_RETRY说明可以继续运行
fn check_continue(&self, offset: usize) -> bool {
assert!(offset < 10, "u32 can only store 10 status");
self.get_status(offset) < STATUS_MAX_RETRY
}
/// 根据任务结果更新状态,如果任务成功,设置为 STATUS_OK否则加一
fn update_status(&mut self, result: &[Result<()>]) {
assert!(result.len() < 10, "u32 can only store 10 status");
for (i, res) in result.iter().enumerate() {
self.set_result(res, i);
}
if self.should_run(result.len()).iter().all(|x| !x) {
// 所有任务都成功或者由于尝试次数过多失败,为 status 最高位打上标记,将来不再重试
self.set_flag(true)
}
}
fn set_result(&mut self, result: &Result<()>, offset: usize) {
if result.is_ok() {
// 如果任务已经执行到最大次数,那么此时 Result 也是 Ok此时不应该更新状态
if self.get_status(offset) < STATUS_MAX_RETRY {
self.set_ok(offset);
}
} else {
self.plus_one(offset);
}
}
/// 根据 mask 设置状态,如果 mask 为 false则清除对应的状态
fn set_mask(&mut self, mask: &[bool]) {
assert!(mask.len() < 10, "u32 can only store 10 status");
for (i, &m) in mask.iter().enumerate() {
if !m {
self.clear(i);
self.set_flag(false);
}
}
}
fn plus_one(&mut self, offset: usize) {
self.0 += 1 << (3 * offset);
}
fn set_ok(&mut self, offset: usize) {
self.0 |= STATUS_OK << (3 * offset);
}
fn clear(&mut self, offset: usize) {
self.0 &= !(STATUS_OK << (3 * offset));
}
fn get_status(&self, offset: usize) -> u32 {
let helper = !0u32;
(self.0 & (helper << (offset * 3)) & (helper >> (32 - 3 * offset - 3))) >> (offset * 3)
}
}
impl From<Status> for u32 {
fn from(status: Status) -> Self {
status.0
}
}
/// 从前到后分别表示视频封面、视频信息、Up 主头像、Up 主信息、分 P 下载
#[derive(Clone)]
pub struct VideoStatus(Status);
impl VideoStatus {
pub fn new(status: u32) -> Self {
Self(Status::new(status))
}
pub fn set_mask(&mut self, clear: &[bool]) {
assert!(clear.len() == 5, "VideoStatus should have 5 status");
self.0.set_mask(clear)
}
pub fn should_run(&self) -> Vec<bool> {
self.0.should_run(5)
}
pub fn update_status(&mut self, result: &[Result<()>]) {
assert!(result.len() == 5, "VideoStatus should have 5 status");
self.0.update_status(result)
}
}
impl From<VideoStatus> for u32 {
fn from(status: VideoStatus) -> Self {
status.0.into()
}
}
/// 从前到后分别表示:视频封面、视频内容、视频信息
#[derive(Clone)]
pub struct PageStatus(Status);
impl PageStatus {
pub fn new(status: u32) -> Self {
Self(Status::new(status))
}
pub fn set_mask(&mut self, clear: &[bool]) {
assert!(clear.len() == 4, "PageStatus should have 4 status");
self.0.set_mask(clear)
}
pub fn should_run(&self) -> Vec<bool> {
self.0.should_run(4)
}
pub fn update_status(&mut self, result: &[Result<()>]) {
assert!(result.len() == 4, "PageStatus should have 4 status");
self.0.update_status(result)
}
}
impl From<PageStatus> for u32 {
fn from(status: PageStatus) -> Self {
status.0.into()
}
}
#[cfg(test)]
mod test {
use anyhow::anyhow;
use super::*;
#[test]
fn test_status() {
let mut status = Status::new(0);
assert_eq!(status.should_run(3), vec![true, true, true]);
for count in 1..=3 {
status.update_status(&[Err(anyhow!("")), Ok(()), Ok(())]);
assert_eq!(status.should_run(3), vec![true, false, false]);
assert_eq!(u32::from(status.clone()), 0b111_111_000 + count);
}
status.update_status(&[Err(anyhow!("")), Ok(()), Ok(())]);
assert_eq!(status.should_run(3), vec![false, false, false]);
assert_eq!(u32::from(status), 0b111_111_100 | Status::handled());
}
}