diff --git a/Cargo.lock b/Cargo.lock index 2452f10..b482057 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,8 +8,10 @@ version = "1.2.0" dependencies = [ "anyhow", "clap", + "encoding_rs", "futures", "kdam", + "mailparse", "regex", "reqwest", "scraper", @@ -177,6 +179,16 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "charset" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1f927b07c74ba84c7e5fe4db2baeb3e996ab2688992e39ac68ce3220a677c7e" +dependencies = [ + "base64", + "encoding_rs", +] + [[package]] name = "clap" version = "4.5.45" @@ -262,6 +274,12 @@ dependencies = [ "syn", ] +[[package]] +name = "data-encoding" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" + [[package]] name = "derive_more" version = "2.0.1" @@ -925,6 +943,17 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" +[[package]] +name = "mailparse" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60819a97ddcb831a5614eb3b0174f3620e793e97e09195a395bfa948fd68ed2f" +dependencies = [ + "charset", + "data-encoding", + "quoted_printable", +] + [[package]] name = "markup5ever" version = "0.35.0" @@ -1227,6 +1256,12 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "quoted_printable" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "640c9bd8497b02465aeef5375144c26062e0dcd5939dfcbb0f5db76cb8c17c73" + [[package]] name = "rand" version = "0.8.5" diff --git a/Cargo.toml b/Cargo.toml index 848c6fb..671242c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,8 @@ kdam = { version = "0.6.1", features = ["spinner", "template"] } futures = "0.3.31" scraper = "0.24.0" anyhow = "1.0.98" +mailparse = "0.16.1" +encoding_rs = "0.8" [[bin]] name = "Course-order-assistant" diff --git a/src/core.rs b/src/core.rs index 4e4c0bf..b36a4a4 100644 --- a/src/core.rs +++ b/src/core.rs @@ -1,5 +1,7 @@ use anyhow::{anyhow, Result}; +use encoding_rs::Encoding; use futures::{stream::FuturesUnordered, StreamExt}; +use mailparse::{parse_mail, MailHeaderMap, ParsedMail}; use regex::Regex; use reqwest::Client; use scraper::{Html, Selector}; @@ -26,17 +28,60 @@ pub struct Course { pub course_name: String, #[serde(default)] #[tabled(rename = "選上機率(%)")] - pub sucess_rate: f32, + pub success_rate: f32, #[serde(default)] #[tabled(rename = "選課比例")] pub choice_rate: f32, } +#[derive(Debug, Clone)] +pub struct StudentIdentity { + pub program_type: String, // 四技、二專等 + pub department: String, // 系所 + pub grade: String, // 年級 +} + +#[derive(Debug, Deserialize)] +pub struct CourseDetailResponse { + #[serde(alias = "Display")] + pub display: String, + #[serde(alias = "Result")] + pub result: Vec, +} + +#[derive(Debug, Deserialize)] +pub struct CourseDetail { + #[serde(alias = "EducationCode")] + pub education_code: String, + #[serde(alias = "DepartmentAliase")] + pub department_aliase: String, + #[serde(alias = "Restrict")] + pub restrict: String, + #[serde(alias = "Persons")] + pub persons: i32, +} + pub fn round_digits(num: f32, digits: i32) -> f32 { let base = 10.0_f32.powi(digits); return (num * base).round() / base; } +/// Build a department search string based on student identity, +/// For example, StudentIdentity { program_type: "四技", department: "資訊工程系", grade: "二年級", class: "甲班" } +/// Will generate "四技資訊工程系二" +pub fn build_department_search_string(identity: &StudentIdentity) -> String { + let grade_number = identity + .grade + .chars() + .find(|c| matches!(c, '一' | '二' | '三' | '四')) + .expect("年級資訊不完整"); + + format!( + "{}{}{}", + identity.program_type, identity.department, grade_number + ) +} + pub async fn get_course_info(client: &Client, semester: &str, course_id: String) -> Result { let url = "https://querycourse.ntust.edu.tw/querycourse/api/courses"; let body = json!({ @@ -57,29 +102,99 @@ pub async fn get_course_info(client: &Client, semester: &str, course_id: String) // .wrap_or_exit("人數上限轉換失敗"); data.choice_rate = round_digits(raw_choice_rate, 2); - data.sucess_rate = 100.0; + data.success_rate = 100.0; if data.choice_rate > 0.0 { - data.sucess_rate = 100.0 / data.choice_rate; - if data.sucess_rate > 100.0 { - data.sucess_rate = 100.0; + data.success_rate = 100.0 / data.choice_rate; + if data.success_rate > 100.0 { + data.success_rate = 100.0; } - data.sucess_rate = round_digits(data.sucess_rate, 2); + data.success_rate = round_digits(data.success_rate, 2); } Ok(data) } +/// Use a new probability calculation method for physical education courses +/// Find corresponding enrollment limits based on the user's department information +pub async fn get_pe_course_info_with_identity( + client: &Client, + semester: &str, + course_id: String, + student_identity: &StudentIdentity, +) -> Result { + let mut course = get_course_info(client, semester, course_id.clone()).await?; + + if course_id.contains("PE") { + match get_course_limit_detail(client, semester, &course_id).await { + Ok(limit_response) => { + let search_string = build_department_search_string(student_identity); + + if let Some(dept_limit) = limit_response + .result + .iter() + .find(|d| d.department_aliase == search_string) + { + let restrict_num = dept_limit.restrict.parse().unwrap_or(1.0); + let persons = dept_limit.persons as f32; + + if restrict_num <= 0.0 { + return Ok(course); + } + + course.choice_rate = round_digits(persons / restrict_num, 2); + course.success_rate = if course.choice_rate > 0.0 { + round_digits(100.0_f32.min(100.0 / course.choice_rate), 2) + } else { + 100.0 + }; + } + } + Err(_) => return Ok(course), + } + } + + Ok(course) +} + pub async fn get_semester(client: &Client) -> Result { let url = "https://querycourse.ntust.edu.tw/querycourse/api/semestersinfo"; let data = client.get(url).send().await?.json::().await?; let body = data[0]["Semester"].as_str().unwrap_or_default().to_string(); Ok(body) } +/// Get enrollment limit information for courses +/// Used for probability and ratio calculation of physical education courses +pub async fn get_course_limit_detail( + client: &Client, + semester: &str, + course_no: &str, +) -> Result { + let url = "https://querycourse.ntust.edu.tw/querycourse/api/LimitOnTheNumber"; + let params = [ + ("semester", semester), + ("courseNo", course_no), + ("mylanguage", "zh"), + ]; + + let res = client.get(url).query(¶ms).send().await?; + let response = res.json::().await?; + Ok(response) +} pub async fn fetch_all_courses( course_ids: Vec, client: &Client, semester: &str, callback: impl FnMut(), +) -> (Vec, Vec, Vec) { + fetch_all_courses_with_identity(course_ids, client, semester, None, callback).await +} + +pub async fn fetch_all_courses_with_identity( + course_ids: Vec, + client: &Client, + semester: &str, + student_identity: Option<&StudentIdentity>, + callback: impl FnMut(), ) -> (Vec, Vec, Vec) { let mut unsafe_courses: Vec = Vec::new(); let mut safe_courses = Vec::new(); @@ -89,7 +204,17 @@ pub async fn fetch_all_courses( for course in course_ids.into_iter() { let client = client.clone(); let semester = semester.to_string(); - futures.push(async move { get_course_info(&client, &semester, course).await }); + let identity = student_identity.cloned(); + + futures.push(async move { + if course.contains("PE") { + if let Some(id) = identity.as_ref() { + return get_pe_course_info_with_identity(&client, &semester, course, id).await; + } + } + + get_course_info(&client, &semester, course).await + }); } let mut what = callback; @@ -101,7 +226,7 @@ pub async fn fetch_all_courses( } match result { Ok(course_info) => { - if course_info.sucess_rate == 100.0 { + if course_info.success_rate == 100.0 { safe_courses.push(course_info); } else { unsafe_courses.push(course_info); @@ -116,21 +241,219 @@ pub async fn fetch_all_courses( (safe_courses, unsafe_courses, unknown_courses) } +/// Detect if the content is MHTML format by checking for multipart boundaries +pub fn is_mhtml_format(content: &str) -> bool { + let lower = content.to_ascii_lowercase(); + let first_tag = lower.find('<').unwrap_or(usize::MAX); + let sniff = &lower[..first_tag.min(4096).min(lower.len())]; + + let has_mime = sniff.contains("mime-version:"); + let has_multipart = sniff.contains("content-type: multipart/"); + let has_boundary = sniff.contains("boundary="); + + (has_mime || has_multipart) && has_boundary +} + +/// Extract and decode HTML content from MHTML format +/// Walk through MIME tree to collect candidate HTML parts with metadata +fn decode_part_to_utf8( + part: &ParsedMail<'_>, +) -> Result<(String, usize, Option, Option)> { + let raw = part.get_body_raw()?; + let raw_len = raw.len(); + + let charset = part + .ctype + .params + .iter() + .find(|(k, _)| k.eq_ignore_ascii_case("charset")) + .map(|(_, v)| v.as_str()) + .unwrap_or("utf-8"); + + let enc = Encoding::for_label(charset.as_bytes()).unwrap_or(encoding_rs::UTF_8); + let (cow, _, _) = enc.decode(&raw); + let html = cow.into_owned(); + + let content_location = part.get_headers().get_first_value("Content-Location"); + let content_id = part + .get_headers() + .get_first_value("Content-ID") + .map(|mut s| { + s.retain(|ch| ch != '<' && ch != '>'); + s + }); + + Ok((html, raw_len, content_location, content_id)) +} + +fn collect_html_parts<'a>( + part: &'a ParsedMail<'a>, + acc: &mut Vec<(String, usize, Option, Option)>, +) -> Result<()> { + if part.ctype.mimetype.eq_ignore_ascii_case("text/html") { + acc.push(decode_part_to_utf8(part)?); + } + for sp in &part.subparts { + collect_html_parts(sp, acc)?; + } + Ok(()) +} + +fn pick_best_html( + mut candidates: Vec<(String, usize, Option, Option)>, + root_cid: Option<&str>, +) -> Option { + if candidates.is_empty() { + return None; + } + + if let Some(cid) = root_cid { + if let Some((html, _, _, _)) = candidates + .iter() + .find(|(_, _, _, c)| { + if let Some(id) = c { + id.eq_ignore_ascii_case(cid) + } else { + false + } + }) + .cloned() + { + return Some(html); + } + } + + candidates.sort_by(|a, b| { + let score = |loc: &Option, len: usize, html: &str| -> (i32, i64) { + let mut s = 0; + if let Some(l) = loc { + let ll = l.to_ascii_lowercase(); + if ll.ends_with(".html") || ll.ends_with(".htm") { + s += 2; + } + let p = ll.split(&['?', '#'][..]).next().unwrap_or(&ll); + let depth = p.matches('/').count(); + if depth <= 3 { + s += 1; + } + } + let h = html.to_ascii_lowercase(); + if h.contains(" anyhow::Result { + let mail = parse_mail(content.as_bytes())?; + + let root_cid = mail + .ctype + .params + .iter() + .find(|(k, _)| k.eq_ignore_ascii_case("start")) + .map(|(_, v)| v.trim_matches(['<', '>'].as_ref()).to_string()); + + let mut candidates = Vec::new(); + collect_html_parts(&mail, &mut candidates)?; + pick_best_html(candidates, root_cid.as_deref()) + .ok_or_else(|| anyhow::anyhow!("在 MHTML 中找不到 text/html part")) +} + +/// If it looks like MHTML, parse and return decoded UTF-8 HTML. +/// Otherwise, return the original content. +pub fn preprocess_file_content(file_content: &str) -> String { + if is_mhtml_format(file_content) { + if let Ok(html) = extract_html_from_mhtml(file_content) { + return html; + } + } + + file_content.to_string() +} + pub fn extract_course_ids(file_content: &str) -> Vec { + let processed = preprocess_file_content(file_content); + let re = Regex::new(r"[A-Z]{2}[G|1-9]{1}[AB|0-9]{3}[0|1|3|5|7]{1}[0-9]{2}") .expect("Regex 模板創建失敗"); - let document = Html::parse_document(file_content); + let document = Html::parse_document(&processed); let selector = Selector::parse("#cartTable").expect("無法解析選擇器"); - if let Some(table_element) = document.select(&selector).next() { - let table_html = table_element.inner_html(); - re.find_iter(&table_html) + if let Some(node) = document.select(&selector).next() { + let sub_html = node.html(); + re.find_iter(&sub_html) .map(|m| m.as_str().to_string()) .collect() } else { - re.find_iter(file_content) + re.find_iter(&processed) .map(|m| m.as_str().to_string()) .collect() } } + +/// Extract student identity information from HTML content +/// Looks for student identity in the format: "四技 資訊工程系 二年級 甲班" +pub fn extract_student_identity(html_content: &str) -> Result { + let processed_content = preprocess_file_content(html_content); + let document = Html::parse_document(&processed_content); + let selector = Selector::parse("span").expect("無法解析選擇器"); + + // Find the span element containing grade information + for element in document.select(&selector) { + let text_content = element.inner_html(); + let text = text_content.trim(); + + // Skip elements that don't contain grade info + if !text.contains("年級") { + continue; + } + + // Found grade info - attempt to parse it using string splitting + // Format: [學制] [系所] [年級] [班級], separated by spaces, fixed order + let parts: Vec<&str> = text.split_whitespace().collect(); + + // Check if we have enough parts (at least 4) + if parts.len() < 3 { + continue; + } + + // Find the grade position + let grade_pos = match parts.iter().position(|&part| part.contains("年級")) { + Some(pos) => pos, + None => continue, + }; + + // Validate grade position constraints + if grade_pos < 2 || grade_pos >= parts.len() { + continue; + } + + // Extract components - all validations passed + let program_type = parts[grade_pos - 2].to_string(); + let department = parts[grade_pos - 1].to_string(); + let grade = parts[grade_pos].to_string(); + + return Ok(StudentIdentity { + program_type, + department, + grade, + }); + } + + // No grade information found in any span element + Err(anyhow!( + "未找到包含年級資訊的文字內容。請確認 HTML 內容是否正確,或將此錯誤訊息回報給開發者。" + )) +} diff --git a/src/main.rs b/src/main.rs index 5c3946f..33b087a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -12,7 +12,7 @@ use tabled::{ Table, }; pub mod core; -use core::{extract_course_ids, fetch_all_courses, get_semester}; +use core::{extract_course_ids, extract_student_identity, fetch_all_courses, fetch_all_courses_with_identity, get_semester}; #[derive(Parser, Debug)] #[command(author, about = "台灣科技大學\n選課志願序小幫手", long_about)] @@ -92,16 +92,33 @@ async fn main() { let semester = get_semester(&client).await.wrap_or_exit("無法取得學期資訊"); + // Try to extract student identity from the HTML file for enhanced PE course calculations + let student_identity = match extract_student_identity(&file_content) { + Ok(identity) => { + println!("✓ 成功提取學生身份資訊: {} {} {}", + identity.program_type, identity.department, identity.grade); + Some(identity) + } + Err(err) => { + println!("⚠ 無法提取學生身份資訊,將使用一般計算方式計算體育課人數"); + println!("錯誤詳細: {}", err); + + None + } + }; + let mut pb = get_process_bar(course_ids.len()); let callback = || { let _ = pb.update(1); }; - // let callback = || { - // }; - - let (mut safe_courses, mut unsafe_courses, unknown_courses) = - fetch_all_courses(course_ids, &client, &semester, callback).await; + // Use enhanced course fetching with student identity if available + let (mut safe_courses, mut unsafe_courses, unknown_courses) = + if let Some(ref identity) = student_identity { + fetch_all_courses_with_identity(course_ids, &client, &semester, Some(identity), callback).await + } else { + fetch_all_courses(course_ids, &client, &semester, callback).await + }; for course in unknown_courses { eprint!("\n警告: 查無課程資料,課程代碼: {}", course); }