diff --git a/core/src/machine_learning/face_extractor.rs b/core/src/machine_learning/face_extractor.rs index b464cb7a..ff0fc761 100644 --- a/core/src/machine_learning/face_extractor.rs +++ b/core/src/machine_learning/face_extractor.rs @@ -5,12 +5,15 @@ use crate::photo::model::PictureId; use anyhow::*; +use super::nms::Nms; +use image::ImageReader; +use std::io::Cursor; use std::path::{Path, PathBuf}; use std::result::Result::Ok; use rust_faces::{ BlazeFaceParams, Face as DetectedFace, FaceDetection, FaceDetectorBuilder, InferParams, - MtCnnParams, Provider, ToArray3, + Provider, ToArray3, }; use gdk4::prelude::TextureExt; @@ -82,26 +85,14 @@ impl Face { pub struct FaceExtractor { base_path: PathBuf, - /// I think this is the "back model" trained on - /// photos taken by the back camera of phones. - blaze_face_640_model: Box, + /// BlazeFace model configured to match large to huge faces, like selfies + blaze_face_huge: Box, - /// I think this is the "front model" trained on - /// photos taken by the selfie camera of phones. - blaze_face_320_model: Box, + /// BlazeFace model configured to match medium to large faces. + blaze_face_big: Box, - /// An alternative model with good results, but much slower than BlazeFace. - mtcnn_model: Box, -} - -/// What kind of face extraction model to use. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum ExtractMode { - /// A fast but less accurate model suitable for mobile devices. - Lightweight, - - /// A slow but more accurate model suitable for desktop devices. - Heavyweight, + /// BlazeFace model configured to match small to medium faces. + blaze_face_small: Box, } impl FaceExtractor { @@ -109,49 +100,66 @@ impl FaceExtractor { let base_path = PathBuf::from(base_path).join("photo_faces"); std::fs::create_dir_all(&base_path)?; - let bz_params = BlazeFaceParams { - score_threshold: 0.95, // confidence match is a face + // Tweaking the target size seems to affect which faces are detected. + // Testing against my library, it looks like smaller numbers match bigger faces, + // bigger numbers smaller faces. + // + // 1280. Default. Misses larger faces. + // 960. Three quarters. Matches a mix of some larger, some smaller. + // 640. Half default. Misses a mix of some larger, some smaller. + // 320. Quarter default. Matches only very big faces. + + let bz_params_huge = BlazeFaceParams { + score_threshold: 0.95, + target_size: 160, ..BlazeFaceParams::default() }; - let blaze_face_640_model = - FaceDetectorBuilder::new(FaceDetection::BlazeFace640(bz_params.clone())) - .download() - .infer_params(InferParams { - provider: Provider::OrtCpu, - intra_threads: Some(5), - ..Default::default() - }) - .build()?; - - let blaze_face_320_model = FaceDetectorBuilder::new(FaceDetection::BlazeFace320(bz_params)) + let blaze_face_huge = FaceDetectorBuilder::new(FaceDetection::BlazeFace640(bz_params_huge)) .download() .infer_params(InferParams { provider: Provider::OrtCpu, - //intra_threads: Some(5), + intra_threads: Some(5), ..Default::default() }) .build()?; - let mtcnn_params = MtCnnParams { - //thresholds: [0.6, 0.7, 0.7], - ..MtCnnParams::default() + let bz_params_big = BlazeFaceParams { + score_threshold: 0.95, + target_size: 640, + ..BlazeFaceParams::default() }; - let mtcnn_model = FaceDetectorBuilder::new(FaceDetection::MtCnn(mtcnn_params)) + let blaze_face_big = FaceDetectorBuilder::new(FaceDetection::BlazeFace640(bz_params_big)) .download() .infer_params(InferParams { provider: Provider::OrtCpu, - //intra_threads: Some(5), + intra_threads: Some(5), ..Default::default() }) .build()?; + let bz_params_small = BlazeFaceParams { + score_threshold: 0.95, + target_size: 1280, + ..BlazeFaceParams::default() + }; + + let blaze_face_small = + FaceDetectorBuilder::new(FaceDetection::BlazeFace640(bz_params_small)) + .download() + .infer_params(InferParams { + provider: Provider::OrtCpu, + //intra_threads: Some(5), + ..Default::default() + }) + .build()?; + Ok(FaceExtractor { base_path, - blaze_face_640_model, - blaze_face_320_model, - mtcnn_model, + blaze_face_huge, + blaze_face_big, + blaze_face_small, }) } @@ -160,12 +168,9 @@ impl FaceExtractor { &self, picture_id: &PictureId, picture_path: &Path, - extract_mode: ExtractMode, ) -> Result> { - info!( - "Detecting faces in {:?} using {:?} model", - picture_path, extract_mode - ); + // return Ok(vec![]); + info!("Detecting faces in {:?}", picture_path); let original_image = Self::open_image(picture_path).await?; @@ -173,40 +178,42 @@ impl FaceExtractor { let mut faces: Vec<(DetectedFace, String)> = vec![]; - if extract_mode == ExtractMode::Lightweight || extract_mode == ExtractMode::Heavyweight { - let result = self.blaze_face_640_model.detect(image.view().into_dyn()); - if let Ok(detected_faces) = result { - let detected_faces = Self::remove_duplicates(detected_faces, &faces); - for f in detected_faces { - faces.push((f, "blaze_face_640".into())); - } - } else { - error!("Failed extracting faces with blaze_face_640: {:?}", result); + let result = self.blaze_face_big.detect(image.view().into_dyn()); + if let Ok(detected_faces) = result { + for f in detected_faces { + faces.push((f, "blaze_face_big".into())); } + } else { + error!("Failed extracting faces with blaze_face_big: {:?}", result); + } - let result = self.blaze_face_320_model.detect(image.view().into_dyn()); - if let Ok(detected_faces) = result { - let detected_faces = Self::remove_duplicates(detected_faces, &faces); - for f in detected_faces { - faces.push((f, "blaze_face_320".into())); - } - } else { - error!("Failed extracting faces with blaze_face_320: {:?}", result); + let result = self.blaze_face_small.detect(image.view().into_dyn()); + if let Ok(detected_faces) = result { + //let detected_faces = Self::remove_duplicates(detected_faces, &faces); + for f in detected_faces { + faces.push((f, "blaze_face_small".into())); } + } else { + error!( + "Failed extracting faces with blaze_face_small: {:?}", + result + ); } - if extract_mode == ExtractMode::Heavyweight { - let result = self.mtcnn_model.detect(image.view().into_dyn()); - if let Ok(detected_faces) = result { - let detected_faces = Self::remove_duplicates(detected_faces, &faces); - for f in detected_faces { - faces.push((f, "mtcnn".into())); - } - } else { - error!("Failed extracting faces with MTCNN model: {:?}", result); + let result = self.blaze_face_huge.detect(image.view().into_dyn()); + if let Ok(detected_faces) = result { + //let detected_faces = Self::remove_duplicates(detected_faces, &faces); + for f in detected_faces { + faces.push((f, "blaze_face_huge".into())); } + } else { + error!("Failed extracting faces with blaze_face_huge: {:?}", result); } + // Use "non-maxima suppression" to remove duplicate matches. + let nms = Nms::default(); + let mut faces = nms.suppress_non_maxima(faces); + debug!( "Picture {} has {} faces. Found: {:?}", picture_id, @@ -327,65 +334,6 @@ impl FaceExtractor { Ok(faces) } - /// Remove any duplicates where being a duplicate is determined by - /// the distance between centres being below a certain threshold - fn remove_duplicates( - detected_faces: Vec, - existing_faces: &[(DetectedFace, String)], - ) -> Vec { - detected_faces - .into_iter() - .filter(|f| f.confidence >= 0.95) - .filter(|f1| { - let nearest = existing_faces - .iter() - .min_by_key(|f2| Self::nose_distance(f1, &f2.0) as u32); - - nearest.is_none() - || nearest.is_some_and(|f2| { - Self::distance(Self::centre(f1), Self::centre(&f2.0)) > 150.0 - }) - }) - .collect() - } - - /// Computes Euclidean distance between two points - fn distance(coord1: (f32, f32), coord2: (f32, f32)) -> f32 { - let (x1, y1) = coord1; - let (x2, y2) = coord2; - - let x = x1 - x2; - let x = x * x; - - let y = y1 - y2; - let y = y * y; - - f32::sqrt(x + y) - } - - /// Distance between the nose landmarks of two faces. - /// Will fallback to centre of face bounds if no landmarks. - fn nose_distance(face1: &DetectedFace, face2: &DetectedFace) -> f32 { - if let (Some(face1_landmarks), Some(face2_landmarks)) = (&face1.landmarks, &face2.landmarks) - { - // If we have landmarks, then the first two are the right and left eyes. - // Use the midpoint between the eyes as the centre of the thumbnail. - let coord1 = (face1_landmarks[2].0, face1_landmarks[2].1); - let coord2 = (face2_landmarks[2].0, face2_landmarks[2].1); - Self::distance(coord1, coord2) - } else { - let coord1 = ( - face1.rect.x + (face1.rect.width / 2.0), - face1.rect.y + (face1.rect.height / 2.0), - ); - let coord2 = ( - face2.rect.x + (face2.rect.width / 2.0), - face2.rect.y + (face2.rect.height / 2.0), - ); - Self::distance(coord1, coord2) - } - } - /// Computes the centre of a face. fn centre(f: &DetectedFace) -> (f32, f32) { if let Some(ref landmarks) = f.landmarks { @@ -407,16 +355,12 @@ impl FaceExtractor { let mut loader = glycin::Loader::new(file); loader.sandbox_selector(glycin::SandboxSelector::FlatpakSpawn); let image = loader.load().await?; - let frame = image.next_frame().await?; + let bytes = frame.texture().save_to_png_bytes(); + let image = + ImageReader::with_format(Cursor::new(bytes), image::ImageFormat::Png).decode()?; - let png_file = tempfile::Builder::new().suffix(".png").tempfile()?; - - // FIXME can we avoid this step of saving to the file system and just - // load the image from memory? - frame.texture().save_to_png(png_file.path())?; - - Ok(image::open(png_file.path())?) + Ok(image) } } diff --git a/core/src/machine_learning/mod.rs b/core/src/machine_learning/mod.rs index cc5e89ed..e1dd378f 100644 --- a/core/src/machine_learning/mod.rs +++ b/core/src/machine_learning/mod.rs @@ -5,4 +5,5 @@ //pub mod blaze_face; pub mod face_extractor; pub mod face_recognizer; +pub mod nms; //pub mod yolov8; diff --git a/core/src/machine_learning/nms.rs b/core/src/machine_learning/nms.rs new file mode 100644 index 00000000..0354660d --- /dev/null +++ b/core/src/machine_learning/nms.rs @@ -0,0 +1,55 @@ +// SPDX-FileCopyrightText: © 2024 David Bliss +// SPDX-FileCopyrightText: © 2023 Rusty Builder Indies +// +// SPDX-License-Identifier: MIT + +use std::collections::HashMap; + +use rust_faces::Face; + +/// Non-maximum suppression. +#[derive(Copy, Clone, Debug)] +pub struct Nms { + pub iou_threshold: f32, +} + +impl Default for Nms { + fn default() -> Self { + Self { iou_threshold: 0.3 } + } +} + +impl Nms { + /// Suppress non-maxima faces. + /// + /// # Arguments + /// + /// * `faces` - Faces to suppress. + /// + /// # Returns + /// + /// * `Vec` - Suppressed faces. + /// + /// This method is lifted from the rust-faces project and modified to add turn + /// the face into a tuple that carries a model name. + pub fn suppress_non_maxima(&self, mut faces: Vec<(Face, String)>) -> Vec<(Face, String)> { + faces.sort_by(|a, b| a.0.confidence.partial_cmp(&b.0.confidence).unwrap()); + + let mut faces_map = HashMap::new(); + faces.iter().rev().enumerate().for_each(|(i, face)| { + faces_map.insert(i, face); + }); + + let mut nms_faces = Vec::with_capacity(faces.len()); + let mut count = 0; + while !faces_map.is_empty() { + if let Some((_, face)) = faces_map.remove_entry(&count) { + nms_faces.push(face.clone()); + faces_map.retain(|_, face2| face.0.rect.iou(&face2.0.rect) < self.iou_threshold); + } + count += 1; + } + + nms_faces + } +} diff --git a/i18n/en-US/fotema.ftl b/i18n/en-US/fotema.ftl index c37ded49..db167c99 100644 --- a/i18n/en-US/fotema.ftl +++ b/i18n/en-US/fotema.ftl @@ -42,17 +42,15 @@ people-page = People # Status page shown for people album when face detection is disabled. people-page-status-off = .title = Enable face detection? - .description = { -app-name } can automatically detect faces and recognize people. Do you want to enable this feature? + .description = { -app-name } can automatically detect faces and recognize people, but this is a time consuming process. Do you want to enable this feature? .notice = { -app-name } must download about 45 megabytes of data to recognize faces and people. - .enable-mobile = Enable for mobile - .enable-desktop = Enable for desktop + .enable = Enable # Status page shown for people album when no people are found. people-page-status-no-people = .title = No people found .description = { -app-name } will look for faces in new photos when launched. - If you've told { -app-name } that a face is for a person you know, then { -app-name } - will also look for new photos of that person. + Name the people in your photos so { -app-name } can make an album for each person. ## Thumbnail decorations @@ -238,10 +236,7 @@ prefs-views-selfies = Selfies # Set face detection mode. Off, lightweight mobile model, or heavyweight # desktop model prefs-views-faces = Face Detection - .subtitle = Enable face detection. - .off = Off - .enable-mobile = Mobile - .enable-desktop = Desktop + .subtitle = Enable face detection when Fotema launches. This is a time consuming process. ## Progress bar for background tasks diff --git a/src/app.rs b/src/app.rs index c60342d2..b658a36f 100644 --- a/src/app.rs +++ b/src/app.rs @@ -97,11 +97,8 @@ pub enum FaceDetectionMode { #[default] Off, - /// Enable with lightweight face detection model suitable for mobile. - Mobile, - - /// Enable with heavyweight face detection model suitable for desktop. - Desktop, + // Enable + On, } /// Settings the user can change in the preferences dialog. diff --git a/src/app/background/bootstrap.rs b/src/app/background/bootstrap.rs index 214b2ccb..a6f975c4 100644 --- a/src/app/background/bootstrap.rs +++ b/src/app/background/bootstrap.rs @@ -16,7 +16,6 @@ use fotema_core::video; use fotema_core::visual; use fotema_core::people; use fotema_core::PictureId; -use fotema_core::machine_learning::face_extractor::ExtractMode; use std::sync::{Arc, Mutex}; use std::time::Instant; @@ -181,14 +180,13 @@ impl Bootstrap { fn add_task_photo_detect_faces(&mut self) { let sender = self.photo_detect_faces.sender().clone(); - let mode = match self.settings_state.read().face_detection_mode { - FaceDetectionMode::Off => None, - FaceDetectionMode::Mobile => Some(ExtractMode::Lightweight), - FaceDetectionMode::Desktop => Some(ExtractMode::Heavyweight), + let mode = self.settings_state.read().face_detection_mode; + match mode { + FaceDetectionMode::Off => {}, + FaceDetectionMode::On => { + self.enqueue(Box::new(move || sender.emit(PhotoDetectFacesInput::DetectForAllPictures))); + }, }; - if let Some(mode) = mode { - self.enqueue(Box::new(move || sender.emit(PhotoDetectFacesInput::DetectForAllPictures(mode)))); - } } fn add_task_photo_detect_faces_for_one(&mut self, picture_id: PictureId) { diff --git a/src/app/background/load_library.rs b/src/app/background/load_library.rs index 431bc4c4..e9d1f715 100644 --- a/src/app/background/load_library.rs +++ b/src/app/background/load_library.rs @@ -9,7 +9,7 @@ use fotema_core::visual::Repository; use fotema_core::Visual; use std::sync::Arc; use anyhow::*; -use tracing::error; +use tracing::{error, info}; #[derive(Debug)] pub enum LoadLibraryInput { @@ -52,6 +52,8 @@ impl LoadLibrary { .map(Arc::new) .collect::>>(); + info!("Loaded {} visual items", all.len()); + let mut index = self.state.write(); index.clear(); index.append(&mut all); diff --git a/src/app/background/photo_detect_faces.rs b/src/app/background/photo_detect_faces.rs index ecb77767..1a7a1f31 100644 --- a/src/app/background/photo_detect_faces.rs +++ b/src/app/background/photo_detect_faces.rs @@ -14,7 +14,6 @@ use tracing::{error, info}; use futures::executor::block_on; use fotema_core::machine_learning::face_extractor::FaceExtractor; -use fotema_core::machine_learning::face_extractor::ExtractMode; use fotema_core::people; use fotema_core::photo::PictureId; @@ -27,7 +26,7 @@ use crate::app::components::progress_monitor::{ #[derive(Debug)] pub enum PhotoDetectFacesInput { - DetectForAllPictures(ExtractMode), + DetectForAllPictures, DetectForOnePicture(PictureId), } @@ -59,23 +58,23 @@ impl PhotoDetectFaces { let result = self.repo.get_file_to_scan(picture_id)?; if let Some(picture_path) = result { let unprocessed = vec![(picture_id, picture_path)]; - self.detect(sender, ExtractMode::Heavyweight, unprocessed) + self.detect(sender, unprocessed) } else { Err(anyhow!("No file to scan")) } } - fn detect_for_all(&self, sender: ComponentSender, extract_mode: ExtractMode) -> Result<()> { + fn detect_for_all(&self, sender: ComponentSender) -> Result<()> { let unprocessed: Vec<(PictureId, PathBuf)> = self.repo .find_need_face_scan()? .into_iter() .filter(|(_, path)| path.exists()) .collect(); - self.detect(sender, extract_mode, unprocessed) + self.detect(sender, unprocessed) } - fn detect(&self, sender: ComponentSender, extract_mode: ExtractMode, unprocessed: Vec<(PictureId, PathBuf)>) -> Result<()> { + fn detect(&self, sender: ComponentSender, unprocessed: Vec<(PictureId, PathBuf)>) -> Result<()> { let start = std::time::Instant::now(); let count = unprocessed.len(); @@ -108,7 +107,7 @@ impl PhotoDetectFaces { // Careful! panic::catch_unwind returns Ok(Err) if the evaluated expression returns // an error but doesn't panic. let result = block_on(async { - extractor.extract_faces(picture_id, path, extract_mode).await + extractor.extract_faces(picture_id, path).await }).and_then(|faces| repo.clone().add_face_scans(picture_id, &faces)); if result.is_err() { @@ -144,13 +143,13 @@ impl Worker for PhotoDetectFaces { fn update(&mut self, msg: Self::Input, sender: ComponentSender) { match msg { - PhotoDetectFacesInput::DetectForAllPictures(extract_mode) => { + PhotoDetectFacesInput::DetectForAllPictures => { info!("Extracting faces for all pictures..."); let this = self.clone(); // Avoid runtime panic from calling block_on rayon::spawn(move || { - if let Err(e) = this.detect_for_all(sender, extract_mode) { + if let Err(e) = this.detect_for_all(sender) { error!("Failed to extract photo faces: {}", e); } }); diff --git a/src/app/components/albums/album.rs b/src/app/components/albums/album.rs index 545a008c..e740aa37 100644 --- a/src/app/components/albums/album.rs +++ b/src/app/components/albums/album.rs @@ -359,6 +359,8 @@ impl Album { //self.photo_grid.add_filter(move |item| (self.photo_grid_filter)(&item.picture)); self.photo_grid.extend_from_iter(all); + info!("{} items added to album", self.photo_grid.len()); + self.go_to_last(); } diff --git a/src/app/components/albums/people_album.rs b/src/app/components/albums/people_album.rs index a509e8cc..b82f2e61 100644 --- a/src/app/components/albums/people_album.rs +++ b/src/app/components/albums/people_album.rs @@ -58,9 +58,7 @@ pub enum PeopleAlbumInput { SettingsChanged, - EnableForMobile, - - EnableForDesktop, + EnableFaceDetection, } #[derive(Debug)] @@ -184,17 +182,10 @@ impl SimpleComponent for PeopleAlbum { set_orientation: gtk::Orientation::Vertical, gtk::Button { - set_label: &fl!("people-page-status-off", "enable-mobile"), - //add_css_class: "suggested-action", - add_css_class: "pill", - connect_clicked => PeopleAlbumInput::EnableForMobile, - }, - - gtk::Button { - set_label: &fl!("people-page-status-off", "enable-desktop"), + set_label: &fl!("people-page-status-off", "enable"), //add_css_class: "suggested-action", add_css_class: "pill", - connect_clicked => PeopleAlbumInput::EnableForDesktop, + connect_clicked => PeopleAlbumInput::EnableFaceDetection, }, } } @@ -262,22 +253,13 @@ impl SimpleComponent for PeopleAlbum { PeopleAlbumInput::SettingsChanged => { self.refresh(); }, - PeopleAlbumInput::EnableForMobile => { + PeopleAlbumInput::EnableFaceDetection => { let mut settings = self.settings_state.read().clone(); - settings.face_detection_mode = FaceDetectionMode::Mobile; + settings.face_detection_mode = FaceDetectionMode::On; *self.settings_state.write() = settings; self.refresh(); let _ = sender.output(PeopleAlbumOutput::EnableFaceDetection); }, - - PeopleAlbumInput::EnableForDesktop => { - let mut settings = self.settings_state.read().clone(); - settings.face_detection_mode = FaceDetectionMode::Desktop; - *self.settings_state.write() = settings; - self.refresh(); - let _ = sender.output(PeopleAlbumOutput::EnableFaceDetection); - }, - } } } diff --git a/src/app/components/preferences.rs b/src/app/components/preferences.rs index d39aa4cd..b3975be9 100644 --- a/src/app/components/preferences.rs +++ b/src/app/components/preferences.rs @@ -4,7 +4,6 @@ use relm4::{adw, ComponentParts, ComponentSender, SimpleComponent}; use relm4::adw::prelude::*; -use relm4::gtk; use tracing::info; @@ -14,7 +13,6 @@ use crate::app::FaceDetectionMode; pub struct PreferencesDialog { parent: adw::ApplicationWindow, - face_detection_mode_row: adw::ComboRow, dialog: adw::PreferencesDialog, settings_state: SettingsState, @@ -22,6 +20,12 @@ pub struct PreferencesDialog { settings: Settings, } +impl PreferencesDialog { + pub fn is_face_detection_active(&self) -> bool { + self.settings.face_detection_mode == FaceDetectionMode::On + } +} + #[derive(Debug)] pub enum PreferencesInput { /// Show the preferences dialog. @@ -63,11 +67,19 @@ impl SimpleComponent for PreferencesDialog { }, #[local_ref] - face_detection_mode_row -> adw::ComboRow { + face_detection_mode_row -> adw::SwitchRow { set_title: &fl!("prefs-views-faces"), set_subtitle: &fl!("prefs-views-faces", "subtitle"), - connect_selected_item_notify[sender] => move |row| { - let mode = FaceDetectionMode::from_repr(row.selected()).unwrap_or_default(); + + #[watch] + set_active: model.is_face_detection_active(), + + connect_active_notify[sender] => move |switch| { + let mode = if switch.is_active() { + FaceDetectionMode::On + } else { + FaceDetectionMode::Off + }; let _ = sender.input_sender().send(PreferencesInput::UpdateFaceDetectionMode(mode)); }, } @@ -85,17 +97,12 @@ impl SimpleComponent for PreferencesDialog { settings_state.subscribe(sender.input_sender(), |settings| PreferencesInput::SettingsChanged(settings.clone())); - let face_detection_mode_row = adw::ComboRow::new(); - let list = gtk::StringList::new(&[ - &fl!("prefs-views-faces", "off"), - &fl!("prefs-views-faces", "enable-mobile"), - &fl!("prefs-views-faces", "enable-desktop"), - ]); - face_detection_mode_row.set_model(Some(&list)); + let face_detection_mode_row = adw::SwitchRow::builder() + .active(settings_state.read().face_detection_mode == FaceDetectionMode::On) + .build(); let model = Self { settings_state: settings_state.clone(), - face_detection_mode_row: face_detection_mode_row.clone(), parent, dialog: dialog.clone(), settings: settings_state.read().clone(), @@ -117,12 +124,6 @@ impl SimpleComponent for PreferencesDialog { PreferencesInput::SettingsChanged(settings) => { info!("Received update from settings shared state"); self.settings = settings; - let index = match self.settings.face_detection_mode { - FaceDetectionMode::Off => 0, - FaceDetectionMode::Mobile => 1, - FaceDetectionMode::Desktop => 2, - }; - self.face_detection_mode_row.set_selected(index); }, PreferencesInput::UpdateShowSelfies(show_selfies) => { info!("Update show selfies: {}", show_selfies);