feat: 支持设置 video 和 page 的下载并发 (#157)

This commit is contained in:
ᴀᴍᴛᴏᴀᴇʀ
2024-07-28 02:32:02 +08:00
committed by GitHub
parent 401fcdc630
commit 8a4a95e343
6 changed files with 362 additions and 316 deletions

View File

@@ -1,308 +0,0 @@
use std::borrow::Cow;
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use anyhow::Result;
use arc_swap::ArcSwapOption;
use handlebars::handlebars_helper;
use once_cell::sync::Lazy;
use serde::de::{Deserializer, MapAccess, Visitor};
use serde::ser::SerializeMap;
use serde::{Deserialize, Serialize};
use crate::bilibili::{CollectionItem, CollectionType, Credential, DanmakuOption, FilterOption};
pub static TEMPLATE: Lazy<handlebars::Handlebars> = Lazy::new(|| {
let mut handlebars = handlebars::Handlebars::new();
handlebars_helper!(truncate: |s: String, len: usize| {
if s.chars().count() > len {
s.chars().take(len).collect::<String>()
} else {
s.to_string()
}
});
handlebars.register_helper("truncate", Box::new(truncate));
handlebars
.register_template_string("video", &CONFIG.video_name)
.unwrap();
handlebars.register_template_string("page", &CONFIG.page_name).unwrap();
handlebars
});
pub static CONFIG: Lazy<Config> = Lazy::new(|| {
let config = Config::load().unwrap_or_else(|err| {
if err
.downcast_ref::<std::io::Error>()
.map_or(true, |e| e.kind() != std::io::ErrorKind::NotFound)
{
panic!("加载配置文件失败,错误为: {err}");
}
warn!("配置文件不存在,使用默认配置...");
Config::default()
});
// 放到外面,确保新的配置项被保存
info!("配置加载完毕,覆盖刷新原有配置");
config.save().unwrap();
// 检查配置文件内容
info!("校验配置文件内容...");
config.check();
config
});
pub static ARGS: Lazy<Args> = Lazy::new(Args::parse);
pub static CONFIG_DIR: Lazy<PathBuf> =
Lazy::new(|| dirs::config_dir().expect("No config path found").join("bili-sync"));
#[derive(Serialize, Deserialize)]
pub struct Config {
pub credential: ArcSwapOption<Credential>,
pub filter_option: FilterOption,
#[serde(default)]
pub danmaku_option: DanmakuOption,
pub favorite_list: HashMap<String, PathBuf>,
#[serde(
default,
serialize_with = "serialize_collection_list",
deserialize_with = "deserialize_collection_list"
)]
pub collection_list: HashMap<CollectionItem, PathBuf>,
#[serde(default)]
pub submission_list: HashMap<String, PathBuf>,
#[serde(default)]
pub watch_later: WatchLaterConfig,
pub video_name: Cow<'static, str>,
pub page_name: Cow<'static, str>,
pub interval: u64,
pub upper_path: PathBuf,
#[serde(default)]
pub nfo_time_type: NFOTimeType,
#[serde(default)]
pub delay: DelayConfig,
#[serde(default = "default_time_format")]
pub time_format: String,
}
fn default_time_format() -> String {
"%Y-%m-%d".to_string()
}
#[derive(Serialize, Deserialize, Default)]
pub struct WatchLaterConfig {
pub enabled: bool,
pub path: PathBuf,
}
#[derive(Serialize, Deserialize, Default)]
pub struct DelayConfig {
pub refresh_video_list: Option<Delay>,
pub fetch_video_detail: Option<Delay>,
pub download_video: Option<Delay>,
pub download_page: Option<Delay>,
}
#[derive(Serialize, Deserialize)]
#[serde(untagged, rename_all = "lowercase")]
pub enum Delay {
Random { min: u64, max: u64 },
Fixed(u64),
}
#[derive(Serialize, Deserialize, Default)]
#[serde(rename_all = "lowercase")]
pub enum NFOTimeType {
#[default]
FavTime,
PubTime,
}
impl Default for Config {
fn default() -> Self {
Self {
credential: ArcSwapOption::from(Some(Arc::new(Credential::default()))),
filter_option: FilterOption::default(),
danmaku_option: DanmakuOption::default(),
favorite_list: HashMap::new(),
collection_list: HashMap::new(),
submission_list: HashMap::new(),
watch_later: Default::default(),
video_name: Cow::Borrowed("{{title}}"),
page_name: Cow::Borrowed("{{bvid}}"),
interval: 1200,
upper_path: CONFIG_DIR.join("upper_face"),
nfo_time_type: NFOTimeType::FavTime,
delay: Default::default(),
time_format: default_time_format(),
}
}
}
impl Config {
/// 简单的预检查
pub fn check(&self) {
let mut ok = true;
if self.favorite_list.is_empty() && self.collection_list.is_empty() && !self.watch_later.enabled {
ok = false;
error!("没有配置任何需要扫描的内容,程序空转没有意义");
}
if self.watch_later.enabled && !self.watch_later.path.is_absolute() {
error!(
"稍后再看保存的路径应为绝对路径,检测到:{}",
self.watch_later.path.display()
);
}
for path in self.favorite_list.values() {
if !path.is_absolute() {
ok = false;
error!("收藏夹保存的路径应为绝对路径,检测到: {}", path.display());
}
}
if !self.upper_path.is_absolute() {
ok = false;
error!("up 主头像保存的路径应为绝对路径");
}
if self.video_name.is_empty() {
ok = false;
error!("未设置 video_name 模板");
}
if self.page_name.is_empty() {
ok = false;
error!("未设置 page_name 模板");
}
let credential = self.credential.load();
match credential.as_deref() {
Some(credential) => {
if credential.sessdata.is_empty()
|| credential.bili_jct.is_empty()
|| credential.buvid3.is_empty()
|| credential.dedeuserid.is_empty()
|| credential.ac_time_value.is_empty()
{
ok = false;
error!("Credential 信息不完整,请确保填写完整");
}
}
None => {
ok = false;
error!("未设置 Credential 信息");
}
}
for delay_config in [
&self.delay.refresh_video_list,
&self.delay.fetch_video_detail,
&self.delay.download_video,
&self.delay.download_page,
]
.iter()
.filter_map(|x| x.as_ref())
{
if let Delay::Random { min, max } = delay_config {
if min >= max {
ok = false;
error!("随机延迟的最小值应小于最大值");
}
}
}
if !ok {
panic!(
"位于 {} 的配置文件不合法,请参考提示信息修复后继续运行",
CONFIG_DIR.join("config.toml").display()
);
}
}
fn load() -> Result<Self> {
let config_path = CONFIG_DIR.join("config.toml");
let config_content = std::fs::read_to_string(config_path)?;
Ok(toml::from_str(&config_content)?)
}
pub fn save(&self) -> Result<()> {
let config_path = CONFIG_DIR.join("config.toml");
std::fs::create_dir_all(&*CONFIG_DIR)?;
std::fs::write(config_path, toml::to_string_pretty(self)?)?;
Ok(())
}
}
fn serialize_collection_list<S>(
collection_list: &HashMap<CollectionItem, PathBuf>,
serializer: S,
) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
let mut map = serializer.serialize_map(Some(collection_list.len()))?;
for (k, v) in collection_list {
let prefix = match k.collection_type {
CollectionType::Series => "series",
CollectionType::Season => "season",
};
map.serialize_entry(&[prefix, &k.mid, &k.sid].join(":"), v)?;
}
map.end()
}
fn deserialize_collection_list<'de, D>(deserializer: D) -> Result<HashMap<CollectionItem, PathBuf>, D::Error>
where
D: Deserializer<'de>,
{
struct CollectionListVisitor;
impl<'de> Visitor<'de> for CollectionListVisitor {
type Value = HashMap<CollectionItem, PathBuf>;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("a map of collection list")
}
fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error>
where
A: MapAccess<'de>,
{
let mut collection_list = HashMap::new();
while let Some((key, value)) = map.next_entry::<String, PathBuf>()? {
let collection_item = match key.split(':').collect::<Vec<&str>>().as_slice() {
[prefix, mid, sid] => {
let collection_type = match *prefix {
"series" => CollectionType::Series,
"season" => CollectionType::Season,
_ => {
return Err(serde::de::Error::custom(
"invalid collection type, should be series or season",
))
}
};
CollectionItem {
mid: mid.to_string(),
sid: sid.to_string(),
collection_type,
}
}
_ => {
return Err(serde::de::Error::custom(
"invalid collection key, should be series:mid:sid or season:mid:sid",
))
}
};
collection_list.insert(collection_item, value);
}
Ok(collection_list)
}
}
deserializer.deserialize_map(CollectionListVisitor)
}
use clap::Parser;
#[derive(Parser)]
#[command(version, about, long_about = None)]
pub struct Args {
#[arg(short, long, env = "SCAN_ONLY")]
pub scan_only: bool,
#[arg(short, long, default_value = "None,bili_sync=info", env = "RUST_LOG")]
pub log_level: String,
}

View File

@@ -0,0 +1,11 @@
use clap::Parser;
#[derive(Parser)]
#[command(version, about, long_about = None)]
pub struct Args {
#[arg(short, long, env = "SCAN_ONLY")]
pub scan_only: bool,
#[arg(short, long, default_value = "None,bili_sync=info", env = "RUST_LOG")]
pub log_level: String,
}

View File

@@ -0,0 +1,54 @@
use std::path::PathBuf;
use clap::Parser;
use handlebars::handlebars_helper;
use once_cell::sync::Lazy;
use crate::config::clap::Args;
use crate::config::Config;
/// 全局的 CONFIG可以从中读取配置信息
pub static CONFIG: Lazy<Config> = Lazy::new(|| {
let config = Config::load().unwrap_or_else(|err| {
if err
.downcast_ref::<std::io::Error>()
.map_or(true, |e| e.kind() != std::io::ErrorKind::NotFound)
{
panic!("加载配置文件失败,错误为: {err}");
}
warn!("配置文件不存在,使用默认配置...");
Config::default()
});
// 放到外面,确保新的配置项被保存
info!("配置加载完毕,覆盖刷新原有配置");
config.save().unwrap();
// 检查配置文件内容
info!("校验配置文件内容...");
config.check();
config
});
/// 全局的 TEMPLATE用来渲染 video_name 和 page_name 模板
pub static TEMPLATE: Lazy<handlebars::Handlebars> = Lazy::new(|| {
let mut handlebars = handlebars::Handlebars::new();
handlebars_helper!(truncate: |s: String, len: usize| {
if s.chars().count() > len {
s.chars().take(len).collect::<String>()
} else {
s.to_string()
}
});
handlebars.register_helper("truncate", Box::new(truncate));
handlebars
.register_template_string("video", &CONFIG.video_name)
.unwrap();
handlebars.register_template_string("page", &CONFIG.page_name).unwrap();
handlebars
});
/// 全局的 ARGS用来解析命令行参数
pub static ARGS: Lazy<Args> = Lazy::new(Args::parse);
/// 全局的 CONFIG_DIR表示配置文件夹的路径
pub static CONFIG_DIR: Lazy<PathBuf> =
Lazy::new(|| dirs::config_dir().expect("No config path found").join("bili-sync"));

View File

@@ -0,0 +1,129 @@
use std::collections::HashMap;
use std::path::PathBuf;
use serde::de::{Deserializer, MapAccess, Visitor};
use serde::ser::SerializeMap;
use serde::{Deserialize, Serialize};
use crate::bilibili::{CollectionItem, CollectionType};
/// 稍后再看的配置
#[derive(Serialize, Deserialize, Default)]
pub struct WatchLaterConfig {
pub enabled: bool,
pub path: PathBuf,
}
/// 每次执行操作后的延迟配置
#[derive(Serialize, Deserialize, Default)]
pub struct DelayConfig {
pub refresh_video_list: Option<Delay>,
pub fetch_video_detail: Option<Delay>,
pub download_video: Option<Delay>,
pub download_page: Option<Delay>,
}
/// 延迟的定义,支持固定时间和随机时间
#[derive(Serialize, Deserialize)]
#[serde(untagged, rename_all = "lowercase")]
pub enum Delay {
Random { min: u64, max: u64 },
Fixed(u64),
}
/// NFO 文件使用的时间类型
#[derive(Serialize, Deserialize, Default)]
#[serde(rename_all = "lowercase")]
pub enum NFOTimeType {
#[default]
FavTime,
PubTime,
}
/// 并发下载相关的配置
#[derive(Serialize, Deserialize)]
pub struct ConcurrentLimit {
pub video: usize,
pub page: usize,
pub delay: DelayConfig,
}
impl Default for ConcurrentLimit {
fn default() -> Self {
Self {
video: 3,
page: 2,
delay: DelayConfig::default(),
}
}
}
/* 后面是用于自定义 Collection 的序列化、反序列化的样板代码 */
pub(super) fn serialize_collection_list<S>(
collection_list: &HashMap<CollectionItem, PathBuf>,
serializer: S,
) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
let mut map = serializer.serialize_map(Some(collection_list.len()))?;
for (k, v) in collection_list {
let prefix = match k.collection_type {
CollectionType::Series => "series",
CollectionType::Season => "season",
};
map.serialize_entry(&[prefix, &k.mid, &k.sid].join(":"), v)?;
}
map.end()
}
pub(super) fn deserialize_collection_list<'de, D>(deserializer: D) -> Result<HashMap<CollectionItem, PathBuf>, D::Error>
where
D: Deserializer<'de>,
{
struct CollectionListVisitor;
impl<'de> Visitor<'de> for CollectionListVisitor {
type Value = HashMap<CollectionItem, PathBuf>;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("a map of collection list")
}
fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error>
where
A: MapAccess<'de>,
{
let mut collection_list = HashMap::new();
while let Some((key, value)) = map.next_entry::<String, PathBuf>()? {
let collection_item = match key.split(':').collect::<Vec<&str>>().as_slice() {
[prefix, mid, sid] => {
let collection_type = match *prefix {
"series" => CollectionType::Series,
"season" => CollectionType::Season,
_ => {
return Err(serde::de::Error::custom(
"invalid collection type, should be series or season",
))
}
};
CollectionItem {
mid: mid.to_string(),
sid: sid.to_string(),
collection_type,
}
}
_ => {
return Err(serde::de::Error::custom(
"invalid collection key, should be series:mid:sid or season:mid:sid",
))
}
};
collection_list.insert(collection_item, value);
}
Ok(collection_list)
}
}
deserializer.deserialize_map(CollectionListVisitor)
}

View File

@@ -0,0 +1,162 @@
use std::borrow::Cow;
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use anyhow::Result;
use arc_swap::ArcSwapOption;
use serde::{Deserialize, Serialize};
mod clap;
mod global;
mod item;
use crate::bilibili::{CollectionItem, Credential, DanmakuOption, FilterOption};
pub use crate::config::global::{ARGS, CONFIG, CONFIG_DIR, TEMPLATE};
use crate::config::item::{deserialize_collection_list, serialize_collection_list, ConcurrentLimit};
pub use crate::config::item::{Delay, NFOTimeType, WatchLaterConfig};
fn default_time_format() -> String {
"%Y-%m-%d".to_string()
}
#[derive(Serialize, Deserialize)]
pub struct Config {
pub credential: ArcSwapOption<Credential>,
pub filter_option: FilterOption,
#[serde(default)]
pub danmaku_option: DanmakuOption,
pub favorite_list: HashMap<String, PathBuf>,
#[serde(
default,
serialize_with = "serialize_collection_list",
deserialize_with = "deserialize_collection_list"
)]
pub collection_list: HashMap<CollectionItem, PathBuf>,
#[serde(default)]
pub submission_list: HashMap<String, PathBuf>,
#[serde(default)]
pub watch_later: WatchLaterConfig,
pub video_name: Cow<'static, str>,
pub page_name: Cow<'static, str>,
pub interval: u64,
pub upper_path: PathBuf,
#[serde(default)]
pub nfo_time_type: NFOTimeType,
#[serde(default)]
pub concurrent_limit: ConcurrentLimit,
#[serde(default = "default_time_format")]
pub time_format: String,
}
impl Default for Config {
fn default() -> Self {
Self {
credential: ArcSwapOption::from(Some(Arc::new(Credential::default()))),
filter_option: FilterOption::default(),
danmaku_option: DanmakuOption::default(),
favorite_list: HashMap::new(),
collection_list: HashMap::new(),
submission_list: HashMap::new(),
watch_later: Default::default(),
video_name: Cow::Borrowed("{{title}}"),
page_name: Cow::Borrowed("{{bvid}}"),
interval: 1200,
upper_path: CONFIG_DIR.join("upper_face"),
nfo_time_type: NFOTimeType::FavTime,
concurrent_limit: ConcurrentLimit::default(),
time_format: default_time_format(),
}
}
}
impl Config {
fn load() -> Result<Self> {
let config_path = CONFIG_DIR.join("config.toml");
let config_content = std::fs::read_to_string(config_path)?;
Ok(toml::from_str(&config_content)?)
}
pub fn save(&self) -> Result<()> {
let config_path = CONFIG_DIR.join("config.toml");
std::fs::create_dir_all(&*CONFIG_DIR)?;
std::fs::write(config_path, toml::to_string_pretty(self)?)?;
Ok(())
}
pub fn check(&self) {
let mut ok = true;
if self.favorite_list.is_empty() && self.collection_list.is_empty() && !self.watch_later.enabled {
ok = false;
error!("没有配置任何需要扫描的内容,程序空转没有意义");
}
if self.watch_later.enabled && !self.watch_later.path.is_absolute() {
error!(
"稍后再看保存的路径应为绝对路径,检测到:{}",
self.watch_later.path.display()
);
}
for path in self.favorite_list.values() {
if !path.is_absolute() {
ok = false;
error!("收藏夹保存的路径应为绝对路径,检测到: {}", path.display());
}
}
if !self.upper_path.is_absolute() {
ok = false;
error!("up 主头像保存的路径应为绝对路径");
}
if self.video_name.is_empty() {
ok = false;
error!("未设置 video_name 模板");
}
if self.page_name.is_empty() {
ok = false;
error!("未设置 page_name 模板");
}
let credential = self.credential.load();
match credential.as_deref() {
Some(credential) => {
if credential.sessdata.is_empty()
|| credential.bili_jct.is_empty()
|| credential.buvid3.is_empty()
|| credential.dedeuserid.is_empty()
|| credential.ac_time_value.is_empty()
{
ok = false;
error!("Credential 信息不完整,请确保填写完整");
}
}
None => {
ok = false;
error!("未设置 Credential 信息");
}
}
if !(self.concurrent_limit.video > 0 && self.concurrent_limit.page > 0) {
ok = false;
error!("允许的并发数必须大于 0");
}
for delay_config in [
&self.concurrent_limit.delay.refresh_video_list,
&self.concurrent_limit.delay.fetch_video_detail,
&self.concurrent_limit.delay.download_video,
&self.concurrent_limit.delay.download_page,
]
.iter()
.filter_map(|x| x.as_ref())
{
if let Delay::Random { min, max } = delay_config {
if min >= max {
ok = false;
error!("随机延迟的最小值应小于最大值");
}
}
}
if !ok {
panic!(
"位于 {} 的配置文件不合法,请参考提示信息修复后继续运行",
CONFIG_DIR.join("config.toml").display()
);
}
}
}

View File

@@ -60,7 +60,7 @@ pub async fn refresh_video_list<'a>(
info!("到达上一次处理的位置,提前中止");
break;
}
delay(CONFIG.delay.refresh_video_list.as_ref()).await;
delay(CONFIG.concurrent_limit.delay.refresh_video_list.as_ref()).await;
}
new_count = video_list_model.video_count(connection).await? - new_count;
video_list_model.log_refresh_video_end(got_count, new_count);
@@ -80,7 +80,7 @@ pub async fn fetch_video_details(
video_list_model
.fetch_videos_detail(video, video_model, connection)
.await?;
delay(CONFIG.delay.fetch_video_detail.as_ref()).await;
delay(CONFIG.concurrent_limit.delay.fetch_video_detail.as_ref()).await;
}
video_list_model.log_fetch_video_end();
Ok(video_list_model)
@@ -94,8 +94,7 @@ pub async fn download_unprocessed_videos(
) -> Result<()> {
video_list_model.log_download_video_start();
let unhandled_videos_pages = video_list_model.unhandled_video_pages(connection).await?;
// 对于视频,允许三个同时下载(视频内还有分页、不同分页还有多种下载任务)
let semaphore = Semaphore::new(3);
let semaphore = Semaphore::new(CONFIG.concurrent_limit.video);
let downloader = Downloader::new(bili_client.client.clone());
let mut uppers_mutex: HashMap<i64, (Mutex<()>, Mutex<()>)> = HashMap::new();
for (video_model, _) in &unhandled_videos_pages {
@@ -232,7 +231,7 @@ pub async fn download_video_pages(
}
let mut video_active_model: video::ActiveModel = video_model.into();
video_active_model.download_status = Set(status.into());
delay(CONFIG.delay.download_video.as_ref()).await;
delay(CONFIG.concurrent_limit.delay.download_video.as_ref()).await;
Ok(video_active_model)
}
@@ -247,8 +246,7 @@ pub async fn dispatch_download_page(
if !should_run {
return Ok(());
}
// 对于视频的分页,允许两个同时下载(绝大部分是单页视频)
let child_semaphore = Semaphore::new(2);
let child_semaphore = Semaphore::new(CONFIG.concurrent_limit.page);
let mut tasks = pages
.into_iter()
.map(|page_model| download_page(bili_client, video_model, page_model, &child_semaphore, downloader))
@@ -420,7 +418,7 @@ pub async fn download_page(
let mut page_active_model: page::ActiveModel = page_model.into();
page_active_model.download_status = Set(status.into());
page_active_model.path = Set(Some(video_path.to_str().unwrap().to_string()));
delay(CONFIG.delay.download_page.as_ref()).await;
delay(CONFIG.concurrent_limit.delay.download_page.as_ref()).await;
Ok(page_active_model)
}