Skip to content

Commit

Permalink
Allow logging to internal disk as a workaround
Browse files Browse the repository at this point in the history
The recommended way to persistent logging during the takeover process is
to provide an external disk or internal disk that's not the target for
balenaOS. This PR adds a mechanism that keeps the log in RAM and then
dumps the logs to the disk running the os. This can be helpful when
running a takeover remotely.

- Adds a new CLI option `--log-to-balenaos`. This is not compatible with
  other logging options takeover provides
- Dumps partial logs at captured if the takeover process fails at any
  point during the different stages

This mechanism is being added to allow running as part of a balenaOS to
balenaOS update whereby the disk partition layout changes across OS versions.

Change-type: minor
Signed-off-by: Rahul Thakoor <[email protected]>
  • Loading branch information
rahul-thakoor committed Nov 21, 2024
1 parent beb21a1 commit 733d127
Show file tree
Hide file tree
Showing 7 changed files with 444 additions and 48 deletions.
19 changes: 17 additions & 2 deletions src/common.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,26 @@
use finder::Finder;
use mod_logger::Logger;
use std::cmp::min;
use std::ffi::{CStr, CString, OsString};
use std::fs::{read_to_string, OpenOptions};
use std::io::Write;
use std::mem::MaybeUninit;
use std::os::unix::ffi::OsStrExt;
use std::path::{Path, PathBuf};
use std::process::{Command, ExitStatus, Stdio};
use std::process::{exit, Command, ExitStatus, Stdio};
use std::thread::sleep;
use std::time::Duration;

use log::{debug, error, trace, warn};
use libc::LINUX_REBOOT_CMD_RESTART;
use log::{debug, error, info, trace, warn};

use regex::Regex;

pub(crate) mod stage2_config;

pub(crate) mod defs;

pub(crate) mod logging;
pub(crate) mod system;
use system::{is_dir, stat};

Expand Down Expand Up @@ -410,6 +415,16 @@ pub(crate) fn log(text: &str) {
}
}

pub(crate) fn reboot() -> ! {
trace!("reboot entered");
Logger::flush();
sync();
sleep(Duration::from_secs(3));
info!("rebooting");
let _res = unsafe { libc::reboot(LINUX_REBOOT_CMD_RESTART) };
exit(1);
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
272 changes: 272 additions & 0 deletions src/common/logging.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
use log::{error, info};
use std::fs;
use std::{
fs::copy,
path::{Path, PathBuf},
};

use crate::common::{path_append, ToError};
use crate::{
common::{
debug,
defs::{BALENA_DATA_FSTYPE, NIX_NONE},
disk_util::DEF_BLOCK_SIZE,
error::Result,
loop_device::LoopDevice,
},
stage2::get_partition_infos,
};

use nix::{
mount::{mount, umount, MsFlags},
unistd::sync,
};

use self::stage2_config::Stage2Config;

use super::{
defs::{BALENA_PART_MP, OLD_ROOT_MP},
reboot, stage2_config,
};

pub const LOG_TMPFS_DESTINATION: &str = "/tmp";
pub const LOG_STAGE_1: &str = "stage1.log";
pub const LOG_STAGE_2_INIT: &str = "stage2-init.log";
pub const LOG_STAGE_2: &str = "stage2.log";
pub const LOG_PRE_UNMOUNT_DATA_PART_DEST: &str = "/mnt/data/balenahup/takeover";

pub enum Stage {
S1,
S2Init,
Stage2,
}

fn create_dir_if_not_exist(path: &str) -> Result<()> {
let dir_path = Path::new(path);
if !dir_path.is_dir() {
println!("Directory does not exist. Creating: {}", dir_path.display());
fs::create_dir_all(dir_path).upstream_with_context(
format!("Failed to create directory {}", dir_path.display()).as_str(),
)?;
println!("Directory created successfully.");
} else {
println!("Directory already exists: {}", dir_path.display());
}

Ok(())
}

// Helper function to get the path for storing logs in different stages
pub fn get_stage_tmpfs_logfile_path(stage: Stage) -> String {
match stage {
Stage::S1 => format!("{}/{}", LOG_TMPFS_DESTINATION, LOG_STAGE_1),
Stage::S2Init => format!("{}/{}", LOG_TMPFS_DESTINATION, LOG_STAGE_2_INIT),
Stage::Stage2 => format!("{}/{}", LOG_TMPFS_DESTINATION, LOG_STAGE_2),
}
}

// Helper function to handle dumping log from tmpfs to data partition
// at different stages in the takeover process
pub fn copy_tmpfs_log_to_data_partition(source_tmp_log_path: &str, dest_dir_path: &str) {
info!(
"copy_tmpfs_log_to_data_partition entered! Copying {} from tmpfs to {}",
source_tmp_log_path, dest_dir_path
);
// check if target destination exists, if not create
match create_dir_if_not_exist(dest_dir_path) {
Ok(_) => (),
Err(_) => reboot(),
}

let source_tmp_log = Path::new(source_tmp_log_path);
if source_tmp_log.exists() && source_tmp_log.is_file() {
let file_name = source_tmp_log
.file_name()
.map(|name| name.to_string_lossy().to_string())
.expect("Failed to extract file name from path");

match copy(
PathBuf::from(source_tmp_log),
path_append(dest_dir_path, format!("/{}", file_name)),
) {
Ok(_) => info!(
"Copied {} from {} to {} on data partition",
file_name,
&source_tmp_log.display(),
&dest_dir_path
),
Err(why) => error!(
"Could not copy {} from {} to {}: {:?}",
file_name,
&source_tmp_log.display(),
&dest_dir_path,
why
),
}
} else {
info!(
"File {} does not exist or is not a regular file.",
source_tmp_log_path
);
}
}

// Helper function to dump logs
// Caters for state prior to calling pivot_root
pub fn stage2_init_pre_pivot_root_tmpfs_log_handler() {
let stage1_logfile = get_stage_tmpfs_logfile_path(Stage::S1);
let stage2_init_logfile = get_stage_tmpfs_logfile_path(Stage::S2Init);

// copy files to data partition
copy_tmpfs_log_to_data_partition(stage1_logfile.as_str(), LOG_PRE_UNMOUNT_DATA_PART_DEST);
copy_tmpfs_log_to_data_partition(stage2_init_logfile.as_str(), LOG_PRE_UNMOUNT_DATA_PART_DEST);
}

// Error handling if an error occurs in stage2-init prior to calling pivot_root
pub fn stage2_init_pre_pivot_root_err_handler(log_to_balenaos: bool) -> ! {
if log_to_balenaos {
stage2_init_pre_pivot_root_tmpfs_log_handler();
}

reboot();
}

// Helper function to dump logs
// Caters for state after calling pivot_root
pub fn stage2_init_post_pivot_root_tmpfs_log_handler() {
// if pivot_root called, stage1 log will be relative to old root mountpoint
let stage1_logfile = format!(
"{}/{}",
OLD_ROOT_MP,
get_stage_tmpfs_logfile_path(Stage::S1)
);

let stage2_init_logfile = get_stage_tmpfs_logfile_path(Stage::S2Init);

// data part will be relative to old mount point
let destination = format!("{}/{}", OLD_ROOT_MP, LOG_PRE_UNMOUNT_DATA_PART_DEST);

// copy files to data partition
copy_tmpfs_log_to_data_partition(stage1_logfile.as_str(), destination.as_str());
copy_tmpfs_log_to_data_partition(stage2_init_logfile.as_str(), destination.as_str());
}

// Error handling if an error occurs in stage2-init after calling pivot_root
pub fn stage2_init_post_pivot_root_err_handler() -> ! {
stage2_init_post_pivot_root_tmpfs_log_handler();
reboot();
}

// Helper function to dump logs
// Caters for state in stage2 worker prior to unmounting partitions
pub fn stage2_pre_unmount_tmpfs_log_handler() {
let stage1_logfile = format!(
"{}/{}",
OLD_ROOT_MP,
get_stage_tmpfs_logfile_path(Stage::S1)
);
let stage2_init_logfile = get_stage_tmpfs_logfile_path(Stage::S2Init);
let stage2_logfile = get_stage_tmpfs_logfile_path(Stage::Stage2);

// copy files to data partition
let destination = format!("{}/{}", OLD_ROOT_MP, LOG_PRE_UNMOUNT_DATA_PART_DEST);

copy_tmpfs_log_to_data_partition(stage1_logfile.as_str(), destination.as_str());
copy_tmpfs_log_to_data_partition(stage2_init_logfile.as_str(), destination.as_str());
copy_tmpfs_log_to_data_partition(stage2_logfile.as_str(), destination.as_str());
}
// Error handling if an error occurs in stage2 worker process before unmounting partitions
pub fn stage2_pre_unmount_err_handler(log_to_balenaos: bool) -> ! {
if log_to_balenaos {
stage2_pre_unmount_tmpfs_log_handler();
}

reboot();
}

// Helper function to dump logs
// Caters for state in stage2 worker after unmounting partitions
pub fn stage2_post_unmount_tmpfs_log_handler(s2_config: &Stage2Config) -> Result<()> {
let stage1_logfile = format!(
"{}/{}",
OLD_ROOT_MP,
get_stage_tmpfs_logfile_path(Stage::S1)
);
let stage2_init_logfile = get_stage_tmpfs_logfile_path(Stage::S2Init);
let stage2_logfile = get_stage_tmpfs_logfile_path(Stage::Stage2);

// destination will be relative to partition mountpoint
let destination = format!("{}/{}", BALENA_PART_MP, "/balenahup/takeover");

// Mount raw data partition
let device = &s2_config.flash_dev;

let (_boot_part, _root_a_part, data_part) = get_partition_infos(device)?;

let mut loop_device = LoopDevice::get_free(true)?;
info!("Create loop device: '{}'", loop_device.get_path().display());
let byte_offset = data_part.start_lba * DEF_BLOCK_SIZE as u64;
let size_limit = data_part.num_sectors * DEF_BLOCK_SIZE as u64;

debug!(
"Setting up device '{}' with offset {}, sizelimit {} on '{}'",
device.display(),
byte_offset,
size_limit,
loop_device.get_path().display()
);

loop_device
.setup(device, Some(byte_offset), Some(size_limit))
.unwrap();
info!(
"Setup device '{}' with offset {}, sizelimit {} on '{}'",
device.display(),
byte_offset,
size_limit,
loop_device.get_path().display()
);

mount(
Some(loop_device.get_path()),
BALENA_PART_MP,
Some(BALENA_DATA_FSTYPE.as_bytes()),
MsFlags::empty(),
NIX_NONE,
)
.upstream_with_context(&format!(
"Failed to mount '{}' to '{}'",
loop_device.get_path().display(),
BALENA_PART_MP,
))?;

info!(
"Mounted data partition as {} on {}",
loop_device.get_path().display(),
BALENA_PART_MP
);

// copy files to data partition
copy_tmpfs_log_to_data_partition(stage1_logfile.as_str(), destination.as_str());
copy_tmpfs_log_to_data_partition(stage2_init_logfile.as_str(), destination.as_str());
copy_tmpfs_log_to_data_partition(stage2_logfile.as_str(), destination.as_str());

sync();
umount(BALENA_PART_MP).unwrap();
info!("Unmounted data partition from {}", BALENA_PART_MP);

loop_device.unset()?;
Ok(())
}

// Error handling if an error occurs in stage2 worker process after unmounting partitions
pub fn stage2_post_unmount_err_handler(s2_config: &Stage2Config) -> ! {
// if --log-to-balenaos was not passed, we simply reboot
if s2_config.log_to_balenaos {
match stage2_post_unmount_tmpfs_log_handler(s2_config) {
Ok(_) => (),
Err(_) => reboot(),
}
}
reboot();
}
9 changes: 9 additions & 0 deletions src/common/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,11 @@ pub struct Options {
nwmgr_cfg: Option<Vec<PathBuf>>,
#[clap(long, value_name = "DT_SLUG", help = "Device Type slug to change to")]
change_dt_to: Option<String>,
#[clap(
long,
help = "Logs to RAM and then dumps logs to balenaOS disk after flashing"
)]
log_to_balenaos: bool,
}

impl Options {
Expand Down Expand Up @@ -285,4 +290,8 @@ impl Options {
pub fn change_dt_to(&self) -> &Option<String> {
&self.change_dt_to
}

pub fn log_to_balenaos(&self) -> bool {
self.log_to_balenaos
}
}
1 change: 1 addition & 0 deletions src/common/stage2_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ pub(crate) struct LogDevice {
pub(crate) struct Stage2Config {
pub log_dev: Option<LogDevice>,
pub log_level: String,
pub log_to_balenaos: bool,
pub flash_dev: PathBuf,
pub pretend: bool,
pub umount_parts: Vec<UmountPart>,
Expand Down
Loading

0 comments on commit 733d127

Please sign in to comment.