Skip to content

Commit

Permalink
Merge pull request #14 from adrianreber/mmap
Browse files Browse the repository at this point in the history
Fixes for Fedora
  • Loading branch information
adrianreber authored May 30, 2021
2 parents 4140e37 + b8b7c4a commit 4b17e10
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 62 deletions.
11 changes: 11 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@ sha-1 = "0.9.6"
sha2 = "0.9.5"
walkdir = "2"
glob = "0.3.0"
memmap = "0.7.0"
25 changes: 16 additions & 9 deletions src/db/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,16 +121,23 @@ pub fn get_categories(c: &PgConnection) -> Vec<Category> {
///
/// ctime is the most important parameter to detect if
/// something has changed on the primary mirror.
pub fn get_directories(c: &PgConnection) -> Vec<Directory> {
pub fn get_directories(c: &PgConnection, cat_id: i32) -> Vec<Directory> {
use crate::db::schema::category_directory;
use crate::db::schema::directory;

let query = directory::dsl::directory.select((
directory::dsl::id,
directory::dsl::name,
directory::dsl::files,
directory::dsl::readable,
directory::dsl::ctime,
));
let subselect = category_directory::dsl::category_directory
.select(category_directory::dsl::directory_id)
.filter(category_directory::dsl::category_id.eq(cat_id));

let query = directory::dsl::directory
.select((
directory::dsl::id,
directory::dsl::name,
directory::dsl::files,
directory::dsl::readable,
directory::dsl::ctime,
))
.filter(directory::dsl::id.eq_any(subselect));
let debug = diesel::debug_query::<diesel::pg::Pg, _>(&query);
print_step(debug.to_string());
query
Expand All @@ -139,7 +146,7 @@ pub fn get_directories(c: &PgConnection) -> Vec<Directory> {
}

/// This retrieves the list of which directory belongs to given category.
pub fn get_category_directories(c: &PgConnection, cat_id: i32) -> Vec<CategoryDirectory> {
pub fn _get_category_directories(c: &PgConnection, cat_id: i32) -> Vec<CategoryDirectory> {
use crate::db::schema::category_directory;

let query = category_directory::dsl::category_directory
Expand Down
8 changes: 8 additions & 0 deletions src/db/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,14 @@ table! {
}
}

table! {
host_category_dir (id) {
id -> Integer,
directory_id -> Integer,
}
}

joinable!(category -> directory (topdir_id));

allow_tables_to_appear_in_same_query!(category, directory);
allow_tables_to_appear_in_same_query!(category_directory, directory);
97 changes: 47 additions & 50 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -382,37 +382,16 @@ fn check_for_repo(repos: &[db::models::Repository], prefix: String, arch_id: i32
/// Remove non-existing directories from the database
///
/// If a directory has been deleted on the file system it will still exist in the database. This
/// function will get the list of directories from the table `category_directory` for the current
/// directory (as specified in `cat_id`) and compare it with the file system. Every directory that
/// does not exist on the file system will be removed from `category_directory`, `directory` and
/// `file_detail`.
/// function goes through the list of database directories and if it does not exist on the file
/// system it will be removed from `category_directory`, `host_category_dir`, `directory`,
/// `repository' and `file_detail`.
fn cleanup_database(
c: &PgConnection,
cds: &HashMap<String, CategoryDirectory>,
dirs: &[db::models::Directory],
topdir: String,
cat_id: i32,
) -> Result<usize, diesel::result::Error> {
debug::STEPS.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
let cgs = db::functions::get_category_directories(c, cat_id);

for d in dirs {
let mut wrong_category = true;
for cg in &cgs {
// If the directory is not part of the table category_directory
// we will not delete it from the database. Another possible
// to cleanup the database would be to compare by name.
//
// cgs only contains directories for the current category id
if cg.directory_id == d.id {
wrong_category = false;
}
}

if wrong_category {
continue;
}

let mut dir_gone_from_fs = true;

for k in cds.keys() {
Expand All @@ -428,7 +407,7 @@ fn cleanup_database(
}

if dir_gone_from_fs {
debug::STEPS.fetch_add(3, std::sync::atomic::Ordering::SeqCst);
debug::STEPS.fetch_add(5, std::sync::atomic::Ordering::SeqCst);
// Delete from CategoryDirectory (Is it possible to delete multiple entries at once???)
// Something like 'DELETE FROM category_directory where directory_id = 10 or directory_id = 20'.
let delete_cd = diesel::delete(
Expand All @@ -439,14 +418,23 @@ fn cleanup_database(
debug::print_step(debug_cd.to_string());
delete_cd.execute(c)?;

// Delete from Directory
let delete_dir = diesel::delete(
db::schema::directory::dsl::directory
.filter(db::schema::directory::dsl::id.eq(d.id)),
// Delete from HostCategoryDir
let delete_host_category_dir = diesel::delete(
db::schema::host_category_dir::dsl::host_category_dir
.filter(db::schema::host_category_dir::dsl::directory_id.eq(d.id)),
);
let debug_dir = diesel::debug_query::<diesel::pg::Pg, _>(&delete_dir);
debug::print_step(debug_dir.to_string());
delete_dir.execute(c)?;
let debug_host_category_dir = diesel::debug_query::<diesel::pg::Pg, _>(&delete_host_category_dir);
debug::print_step(debug_host_category_dir.to_string());
delete_host_category_dir.execute(c)?;

// Delete from Repository
let delete_repository = diesel::delete(
db::schema::repository::dsl::repository
.filter(db::schema::repository::dsl::directory_id.eq(d.id)),
);
let debug_repository = diesel::debug_query::<diesel::pg::Pg, _>(&delete_repository);
debug::print_step(debug_repository.to_string());
delete_repository.execute(c)?;

// And remove if from FileDetail
let delete_fd = diesel::delete(
Expand All @@ -456,6 +444,15 @@ fn cleanup_database(
let debug_fd = diesel::debug_query::<diesel::pg::Pg, _>(&delete_fd);
debug::print_step(debug_fd.to_string());
delete_fd.execute(c)?;

// Delete from Directory
let delete_dir = diesel::delete(
db::schema::directory::dsl::directory
.filter(db::schema::directory::dsl::id.eq(d.id)),
);
let debug_dir = diesel::debug_query::<diesel::pg::Pg, _>(&delete_dir);
debug::print_step(debug_dir.to_string());
delete_dir.execute(c)?;
}
}
Ok(0)
Expand Down Expand Up @@ -1153,26 +1150,26 @@ fn scan_local_directory(
};

if !fullfiletimelist.is_empty() && !skip_fftl {
use std::fs;

debug::print_step(format!(
"Local directory ({}) scan using {}",
url, fullfiletimelist
));
let data = fs::read_to_string(fullfiletimelist)?;
let pattern = Regex::new(r"([0-9]*)\t(.*)\t([0-9]*)\t(.*)")?;
data.lines()
.filter_map(|line| pattern.captures(line))
.map(|info| FileInfo {
is_directory: info[2].starts_with('d'),
is_readable: !info[2].contains('-'),
size: info[3].parse().unwrap(),
timestamp: info[1].parse().unwrap(),
name: Some(info[4].to_string()),
})
.for_each(|x| {
add_entry_to_category_directories(x, cds, excludes, topdir);
});
let file = std::fs::File::open(fullfiletimelist)?;
let data = unsafe { memmap::MmapOptions::new().map(&file)? };
for line in data.split(|elem| elem == &b'\n') {
let v: Vec<&str> = std::str::from_utf8(line)?.split('\t').collect();
if v.len() < 4 {
continue;
}
let info = FileInfo {
is_directory: v[1].starts_with('d'),
is_readable: !v[1].contains('-'),
size: v[2].parse()?,
timestamp: v[0].parse()?,
name: Some(v[3].to_string()),
};
add_entry_to_category_directories(info, cds, excludes, topdir);
}

return Ok(());
}
Expand Down Expand Up @@ -1481,7 +1478,7 @@ fn main() {

handle_unreadable(&mut cds);

let mut d = db::functions::get_directories(&connection);
let mut d = db::functions::get_directories(&connection, category.id);

if let Err(e) =
sync_category_directories(&connection, topdir.clone(), category.id, &mut d, &mut cds)
Expand Down Expand Up @@ -1573,7 +1570,7 @@ fn main() {
}

if params.delete_directories {
if let Err(e) = cleanup_database(&connection, &cds, &d, topdir, category.id) {
if let Err(e) = cleanup_database(&connection, &cds, &d, topdir) {
println!("Database cleanup failed {}", e);
process::exit(1);
}
Expand Down
6 changes: 3 additions & 3 deletions src/scan_primary_mirror_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ fn sync_category_directories_test() {
.is_err());

// this is empty
let mut dirs = db::functions::get_directories(&c);
let mut dirs = db::functions::get_directories(&c, 37);
let mut cds: HashMap<String, CategoryDirectory> = HashMap::new();

let mut cd1 = CategoryDirectory {
Expand All @@ -339,7 +339,7 @@ fn sync_category_directories_test() {
assert_eq!(dirs[0].readable, true);
assert_eq!(dirs[0].name, "topdir/directory1".to_string());

dirs = db::functions::get_directories(&c);
dirs = db::functions::get_directories(&c, 37);
// test after reading from database
assert_eq!(dirs.len(), 1);
assert_eq!(dirs[0].ctime, 1000);
Expand All @@ -352,7 +352,7 @@ fn sync_category_directories_test() {
assert!(
!sync_category_directories(&c, "topdir/".to_string(), 37, &mut dirs, &mut cds).is_err()
);
dirs = db::functions::get_directories(&c);
dirs = db::functions::get_directories(&c, 37);
assert_eq!(dirs.len(), 1);
// this should have been updated
assert_eq!(dirs[0].ctime, 2000);
Expand Down

0 comments on commit 4b17e10

Please sign in to comment.