refactor: 尝试将任务处理部分重构为 stream 写法,增补注释

This commit is contained in:
amtoaer
2025-01-23 17:13:51 +08:00
parent b4177d4ffc
commit aa88f97eff
6 changed files with 127 additions and 119 deletions

View File

@@ -12,7 +12,7 @@ use sea_orm::{DatabaseConnection, TransactionTrait, Unchanged};
use crate::adapter::{helper, VideoListModel};
use crate::bilibili::{self, BiliClient, Collection, CollectionItem, CollectionType, VideoInfo};
use crate::utils::status::Status;
use crate::utils::status::STATUS_COMPLETED;
#[async_trait]
impl VideoListModel for collection::Model {
@@ -38,7 +38,7 @@ impl VideoListModel for collection::Model {
video::Column::CollectionId
.eq(self.id)
.and(video::Column::Valid.eq(true))
.and(video::Column::DownloadStatus.lt(Status::handled()))
.and(video::Column::DownloadStatus.lt(STATUS_COMPLETED))
.and(video::Column::Category.eq(2))
.and(video::Column::SinglePage.is_not_null())
.into_condition(),

View File

@@ -12,7 +12,7 @@ use sea_orm::{DatabaseConnection, TransactionTrait, Unchanged};
use crate::adapter::{helper, VideoListModel};
use crate::bilibili::{self, BiliClient, FavoriteList, VideoInfo};
use crate::utils::status::Status;
use crate::utils::status::STATUS_COMPLETED;
#[async_trait]
impl VideoListModel for favorite::Model {
@@ -38,7 +38,7 @@ impl VideoListModel for favorite::Model {
video::Column::FavoriteId
.eq(self.id)
.and(video::Column::Valid.eq(true))
.and(video::Column::DownloadStatus.lt(Status::handled()))
.and(video::Column::DownloadStatus.lt(STATUS_COMPLETED))
.and(video::Column::Category.eq(2))
.and(video::Column::SinglePage.is_not_null())
.into_condition(),

View File

@@ -13,7 +13,7 @@ use sea_orm::{DatabaseConnection, TransactionTrait, Unchanged};
use crate::adapter::helper::video_with_path;
use crate::adapter::{helper, VideoListModel};
use crate::bilibili::{self, BiliClient, Submission, VideoInfo};
use crate::utils::status::Status;
use crate::utils::status::STATUS_COMPLETED;
#[async_trait]
impl VideoListModel for submission::Model {
@@ -39,7 +39,7 @@ impl VideoListModel for submission::Model {
video::Column::SubmissionId
.eq(self.id)
.and(video::Column::Valid.eq(true))
.and(video::Column::DownloadStatus.lt(Status::handled()))
.and(video::Column::DownloadStatus.lt(STATUS_COMPLETED))
.and(video::Column::Category.eq(2))
.and(video::Column::SinglePage.is_not_null())
.into_condition(),

View File

@@ -13,7 +13,7 @@ use sea_orm::{DatabaseConnection, TransactionTrait, Unchanged};
use crate::adapter::helper::video_with_path;
use crate::adapter::{helper, VideoListModel};
use crate::bilibili::{self, BiliClient, VideoInfo, WatchLater};
use crate::utils::status::Status;
use crate::utils::status::STATUS_COMPLETED;
#[async_trait]
impl VideoListModel for watch_later::Model {
@@ -39,7 +39,7 @@ impl VideoListModel for watch_later::Model {
video::Column::WatchLaterId
.eq(self.id)
.and(video::Column::Valid.eq(true))
.and(video::Column::DownloadStatus.lt(Status::handled()))
.and(video::Column::DownloadStatus.lt(STATUS_COMPLETED))
.and(video::Column::Category.eq(2))
.and(video::Column::SinglePage.is_not_null())
.into_condition(),

View File

@@ -2,78 +2,85 @@ use anyhow::Result;
static STATUS_MAX_RETRY: u32 = 0b100;
static STATUS_OK: u32 = 0b111;
pub static STATUS_COMPLETED: u32 = 1 << 31;
/// 用来表示下载的状态,不想写太多列了,所以仅使用一个 u32 表示。
/// 从低位开始,固定每三位表示一种数据的状态,从 0b000 开始,每失败一次加一,最多 0b100即重试 4 次),
/// 如果成功,将对应的三位设置为 0b111
/// 当所有任务都成功或者由于尝试次数过多失败,为 status 最高位打上标记 1将来不再继续尝试
/// 从低位开始,固定每三位表示一种子任务的状态。
/// 子任务状态从 0b000 开始,每执行失败一次将状态加一,最多 0b100即允许重试 4 次),该值定义为 STATUS_MAX_RETRY
/// 如果子任务执行成功,将状态设置为 0b111该值定义为 STATUS_OK
/// 子任务达到最大失败次数或者执行成功时,认为该子任务已经完成。
/// 当所有子任务都已经完成时,为最高位打上标记 1表示整个下载任务已经完成。
#[derive(Clone)]
pub struct Status(u32);
impl Status {
/// 如果 status 整体大于等于 1 << 31则表示任务已经被处理过不再需要重试。
/// 数据库可以使用 status < Status::handled() 来筛选需要处理的内容。
pub const fn handled() -> u32 {
1 << 31
}
fn new(status: u32) -> Self {
Self(status)
}
/// 一般仅需要被内部调用,用来设置最高位的标记
fn set_flag(&mut self, handled: bool) {
if handled {
/// 设置最高位的完成标记
fn set_completed(&mut self, completed: bool) {
if completed {
self.0 |= 1 << 31;
} else {
self.0 &= !(1 << 31);
}
}
/// 从低到高检查状态,如果该位置的任务应该继续尝试执行,则返回 true否则返回 false
fn should_run(&self, size: usize) -> Vec<bool> {
(0..size).map(|x| self.check_continue(x)).collect()
// 获取最高位的完成标记
fn get_completed(&self) -> bool {
self.0 >> 31 == 1
}
/// 如果任务的执行次数小于 STATUS_MAX_RETRY说明可以继续运行
/// 获取某个子任务的状态
fn get_status(&self, offset: usize) -> u32 {
let helper = !0u32;
(self.0 & (helper << (offset * 3)) & (helper >> (32 - 3 * offset - 3))) >> (offset * 3)
}
// 将某个子任务的状态加一(在任务失败时使用)
fn plus_one(&mut self, offset: usize) {
self.0 += 1 << (3 * offset);
}
// 设置某个子任务的状态为 STATUS_OK在任务成功时使用
fn set_ok(&mut self, offset: usize) {
self.0 |= STATUS_OK << (3 * offset);
}
/// 检查某个子任务是否还应该继续执行,实际是检查该子任务的状态是否小于 STATUS_MAX_RETRY
fn check_continue(&self, offset: usize) -> bool {
self.get_status(offset) < STATUS_MAX_RETRY
}
/// 根据任务结果更新状态,如果任务成功,设置为 STATUS_OK否则加一
/// 依次检查所有子任务是否还应该继续执行,返回一个 bool 数组
fn should_run(&self, size: usize) -> Vec<bool> {
(0..size).map(|x| self.check_continue(x)).collect()
}
/// 根据子任务执行结果更新子任务的状态
/// 如果 Result 是 Ok那么认为任务执行成功将状态设置为 STATUS_OK
/// 如果 Result 是 Err那么认为任务执行失败将状态加一
fn set_result(&mut self, result: &Result<()>, offset: usize) {
if self.get_status(offset) < STATUS_MAX_RETRY {
match result {
Ok(_) => self.set_ok(offset),
Err(_) => self.plus_one(offset),
}
}
}
/// 根据任务结果更新状态,任务结果是一个 Result 数组,需要与子任务一一对应
/// 如果所有子任务都已经完成,那么打上最高位的完成标记
fn update_status(&mut self, result: &[Result<()>]) {
for (i, res) in result.iter().enumerate() {
self.set_result(res, i);
}
if self.should_run(result.len()).iter().all(|x| !x) {
// 所有任务都成功或者由于尝试次数过多失败,为 status 最高位打上标记,将来不再重试
self.set_flag(true)
self.set_completed(true)
}
}
fn set_result(&mut self, result: &Result<()>, offset: usize) {
if result.is_ok() {
// 如果任务已经执行到最大次数,那么此时 Result 也是 Ok此时不应该更新状态
if self.get_status(offset) < STATUS_MAX_RETRY {
self.set_ok(offset);
}
} else {
self.plus_one(offset);
}
}
fn plus_one(&mut self, offset: usize) {
self.0 += 1 << (3 * offset);
}
fn set_ok(&mut self, offset: usize) {
self.0 |= STATUS_OK << (3 * offset);
}
fn get_status(&self, offset: usize) -> u32 {
let helper = !0u32;
(self.0 & (helper << (offset * 3)) & (helper >> (32 - 3 * offset - 3))) >> (offset * 3)
}
}
impl From<Status> for u32 {
@@ -82,7 +89,7 @@ impl From<Status> for u32 {
}
}
/// 从前到后分别表示视频封面、视频信息、Up 主头像、Up 主信息、分 P 下载
/// 包含五个子任务,从前到后依次是视频封面、视频信息、Up 主头像、Up 主信息、分 P 下载
#[derive(Clone)]
pub struct VideoStatus(Status);
@@ -107,7 +114,7 @@ impl From<VideoStatus> for u32 {
}
}
/// 从前到后分别表示:视频封面、视频内容、视频信息
/// 包含四个子任务,从前到后分别:视频封面、视频内容、视频信息、视频弹幕
#[derive(Clone)]
pub struct PageStatus(Status);
@@ -124,6 +131,10 @@ impl PageStatus {
assert!(result.len() == 4, "PageStatus should have 4 status");
self.0.update_status(result)
}
pub fn get_completed(&self) -> bool {
self.0.get_completed()
}
}
impl From<PageStatus> for u32 {
@@ -149,6 +160,6 @@ mod test {
}
status.update_status(&[Err(anyhow!("")), Ok(()), Ok(())]);
assert_eq!(status.should_run(3), vec![false, false, false]);
assert_eq!(u32::from(status), 0b111_111_100 | Status::handled());
assert_eq!(u32::from(status), 0b111_111_100 | STATUS_COMPLETED);
}
}

View File

@@ -102,16 +102,16 @@ pub async fn download_unprocessed_videos(
connection: &DatabaseConnection,
) -> Result<()> {
video_list_model.log_download_video_start();
let unhandled_videos_pages = video_list_model.unhandled_video_pages(connection).await?;
let semaphore = Semaphore::new(CONFIG.concurrent_limit.video);
let downloader = Downloader::new(bili_client.client.clone());
let mut uppers_mutex: HashMap<i64, (Mutex<()>, Mutex<()>)> = HashMap::new();
let unhandled_videos_pages = video_list_model.unhandled_video_pages(connection).await?;
for (video_model, _) in &unhandled_videos_pages {
uppers_mutex
.entry(video_model.upper_id)
.or_insert_with(|| (Mutex::new(()), Mutex::new(())));
}
let mut tasks = unhandled_videos_pages
let tasks = unhandled_videos_pages
.into_iter()
.map(|(video_model, pages_model)| {
let upper_mutex = uppers_mutex.get(&video_model.upper_id).expect("upper mutex not found");
@@ -127,32 +127,32 @@ pub async fn download_unprocessed_videos(
)
})
.collect::<FuturesUnordered<_>>();
let mut models = Vec::with_capacity(10);
while let Some(res) = tasks.next().await {
match res {
Ok(model) => {
models.push(model);
let mut download_aborted = false;
let mut stream = tasks
// 触发风控时设置 download_aborted 标记并终止流
.take_while(|res| {
if res
.as_ref()
.is_err_and(|e| e.downcast_ref::<DownloadAbortError>().is_some())
{
download_aborted = true;
}
Err(e) => {
if e.downcast_ref::<DownloadAbortError>().is_some() {
error!("下载视频时触发风控,将终止收藏夹下所有下载任务,等待下一轮执行");
break;
}
}
}
// 满十个就写入数据库
if models.len() == 10 {
update_videos_model(std::mem::replace(&mut models, Vec::with_capacity(10)), connection).await?;
}
}
if !models.is_empty() {
futures::future::ready(!download_aborted)
})
// 过滤掉没有触发风控的普通 Err只保留正确返回的 Model
.filter_map(|res| futures::future::ready(res.ok()))
// 将成功返回的 Model 按十个一组合并
.chunks(10);
while let Some(models) = stream.next().await {
update_videos_model(models, connection).await?;
}
if download_aborted {
error!("下载视频时触发风控,终止收藏夹下所有下载任务,等待下一轮执行");
}
video_list_model.log_download_video_end();
Ok(())
}
/// 暂时这样做,后面提取成上下文
#[allow(clippy::too_many_arguments)]
pub async fn download_video_pages(
bili_client: &BiliClient,
@@ -232,7 +232,7 @@ pub async fn download_video_pages(
&video_model.bvid, &video_model.name, task_name, e
),
});
if let Err(e) = results.into_iter().nth(4).expect("not enough results") {
if let Err(e) = results.into_iter().nth(4).context("page download result not found")? {
if e.downcast_ref::<DownloadAbortError>().is_some() {
return Err(e);
}
@@ -242,6 +242,7 @@ pub async fn download_video_pages(
Ok(video_active_model)
}
/// 分发并执行分页下载任务,当且仅当所有分页成功下载或达到最大重试次数时返回 Ok否则根据失败原因返回对应的错误
pub async fn dispatch_download_page(
should_run: bool,
bili_client: &BiliClient,
@@ -254,57 +255,56 @@ pub async fn dispatch_download_page(
return Ok(());
}
let child_semaphore = Semaphore::new(CONFIG.concurrent_limit.page);
let mut tasks = pages
let tasks = pages
.into_iter()
.map(|page_model| download_page(bili_client, video_model, page_model, &child_semaphore, downloader))
.collect::<FuturesUnordered<_>>();
let mut models = Vec::with_capacity(10);
let (mut should_error, mut is_break) = (false, false);
while let Some(res) = tasks.next().await {
match res {
Ok(model) => {
if let Set(status) = model.download_status {
let status = PageStatus::new(status);
if status.should_run().iter().any(|v| *v) {
// 有一个分页没变成终止状态(即下载成功或者重试次数达到限制),就应该向上层传递 Error
should_error = true;
let (mut download_aborted, mut error_occurred) = (false, false);
let mut stream = tasks
.take_while(|res| {
match res {
Ok(model) => {
// 当前函数返回的是所有分页的下载状态,只要有任何一个分页返回新的下载状态标识位是 false当前函数就应该认为是失败的
if model
.download_status
.try_as_ref()
.is_none_or(|status| !PageStatus::new(*status).get_completed())
{
error_occurred = true;
}
}
models.push(model);
}
Err(e) => {
if e.downcast_ref::<DownloadAbortError>().is_some() {
should_error = true;
is_break = true;
break;
Err(e) => {
if e.downcast_ref::<DownloadAbortError>().is_some() {
download_aborted = true;
}
}
}
}
if models.len() == 10 {
update_pages_model(std::mem::replace(&mut models, Vec::with_capacity(10)), connection).await?;
}
}
if !models.is_empty() {
// 仅在发生风控时终止流,其它情况继续执行
futures::future::ready(!download_aborted)
})
.filter_map(|res| futures::future::ready(res.ok()))
.chunks(10);
while let Some(models) = stream.next().await {
update_pages_model(models, connection).await?;
}
if should_error {
if is_break {
error!(
"下载视频 {} - {} 的分页时触发风控,将异常向上传递...",
&video_model.bvid, &video_model.name
);
bail!(DownloadAbortError());
} else {
error!(
"下载视频 {} - {} 的分页时出现了错误,将在下一轮尝试重新处理",
&video_model.bvid, &video_model.name
);
bail!(ProcessPageError());
}
if download_aborted {
error!(
"下载视频 {} - {} 的分页时触发风控,将异常向上传递...",
&video_model.bvid, &video_model.name
);
bail!(DownloadAbortError());
}
if error_occurred {
error!(
"下载视频 {} - {} 的分页时出现了错误,将在下一轮尝试重新处理",
&video_model.bvid, &video_model.name
);
bail!(ProcessPageError());
}
Ok(())
}
/// 下载某个分页,未发生风控且正常运行时返回 Ok(Page::ActiveModel),其中 status 字段存储了新的下载状态,发生风控时返回 DownloadAbortError
pub async fn download_page(
bili_client: &BiliClient,
video_model: &video::Model,
@@ -312,10 +312,7 @@ pub async fn download_page(
semaphore: &Semaphore,
downloader: &Downloader,
) -> Result<page::ActiveModel> {
let permit = semaphore.acquire().await;
if let Err(e) = permit {
return Err(e.into());
}
let _permit = semaphore.acquire().await.context("acquire semaphore failed")?;
let mut status = PageStatus::new(page_model.download_status);
let seprate_status = status.should_run();
let is_single_page = video_model.single_page.context("single_page is null")?;
@@ -415,8 +412,8 @@ pub async fn download_page(
&video_model.bvid, &video_model.name, page_model.pid, task_name, e
),
});
// 查看下载视频的状态,该状态会影响上层是否 break
if let Err(e) = results.into_iter().nth(1).expect("not enough results") {
// 如果下载视频时触发风控,直接返回 DownloadAbortError
if let Err(e) = results.into_iter().nth(1).context("video download result not found")? {
if let Ok(BiliError::RiskControlOccurred) = e.downcast::<BiliError>() {
bail!(DownloadAbortError());
}