diff --git a/CHANGELOG.md b/CHANGELOG.md index 65ef73a1e1f..313abc61386 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,9 @@ and this project adheres to unnecessary fields (`max_connections` and `max_pending_resets`) from the snapshot format, bumping the snapshot version to 5.0.0. Users need to regenerate snapshots. +- [#4926](https://github.com/firecracker-microvm/firecracker/pull/4926): Replace + underlying implementation for seccompiler from in house one in favor of + `libseccomp` which produces smaller and more optimized BPF code. ### Deprecated diff --git a/Cargo.lock b/Cargo.lock index d499d3f16f8..8e657305fba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -210,7 +210,7 @@ dependencies = [ "bitflags 2.6.0", "cexpr", "clang-sys", - "itertools 0.10.5", + "itertools 0.12.1", "lazy_static", "lazycell", "log", @@ -601,7 +601,6 @@ dependencies = [ name = "firecracker" version = "1.11.0-dev" dependencies = [ - "bincode", "cargo_toml", "displaydoc", "event-manager", @@ -776,6 +775,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.13.0" @@ -1185,14 +1193,13 @@ name = "seccompiler" version = "1.11.0-dev" dependencies = [ "bincode", + "clap", "displaydoc", "libc", - "log-instrument", "serde", "serde_json", "thiserror 2.0.7", - "utils", - "vmm-sys-util", + "zerocopy 0.8.13", ] [[package]] @@ -1576,7 +1583,6 @@ dependencies = [ "memfd", "micro_http", "proptest", - "seccompiler", "semver", "serde", "serde_json", diff --git a/src/cpu-template-helper/src/utils/mod.rs b/src/cpu-template-helper/src/utils/mod.rs index bd570840fc5..b6d3465efd5 100644 --- a/src/cpu-template-helper/src/utils/mod.rs +++ b/src/cpu-template-helper/src/utils/mod.rs @@ -12,7 +12,7 @@ use std::sync::{Arc, Mutex}; use vmm::builder::{build_microvm_for_boot, StartMicrovmError}; use vmm::cpu_config::templates::{CustomCpuTemplate, Numeric}; use vmm::resources::VmResources; -use vmm::seccomp_filters::get_empty_filters; +use vmm::seccomp::get_empty_filters; use vmm::vmm_config::instance_info::{InstanceInfo, VmState}; use vmm::{EventManager, Vmm, HTTP_MAX_PAYLOAD_SIZE}; use vmm_sys_util::tempfile::TempFile; diff --git a/src/firecracker/Cargo.toml b/src/firecracker/Cargo.toml index cbd9ffe5e2d..92167f32b07 100644 --- a/src/firecracker/Cargo.toml +++ b/src/firecracker/Cargo.toml @@ -22,7 +22,6 @@ libc = "0.2.168" log-instrument = { path = "../log-instrument", optional = true } micro_http = { git = "https://github.com/firecracker-microvm/micro-http" } -seccompiler = { path = "../seccompiler" } serde = { version = "1.0.216", features = ["derive"] } serde_derive = "1.0.136" serde_json = "1.0.133" @@ -42,13 +41,12 @@ serde = { version = "1.0.216", features = ["derive"] } userfaultfd = "0.8.1" [build-dependencies] -bincode = "1.2.1" seccompiler = { path = "../seccompiler" } serde = { version = "1.0.216" } serde_json = "1.0.133" [features] -tracing = ["log-instrument", "seccompiler/tracing", "utils/tracing", "vmm/tracing"] +tracing = ["log-instrument", "utils/tracing", "vmm/tracing"] gdb = ["vmm/gdb"] [lints] diff --git a/src/firecracker/build.rs b/src/firecracker/build.rs index b20e1cd4e1e..87710b54fc4 100644 --- a/src/firecracker/build.rs +++ b/src/firecracker/build.rs @@ -1,13 +1,8 @@ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 -use std::collections::BTreeMap; -use std::fs::File; use std::path::Path; -use seccompiler::common::BpfProgram; -use seccompiler::compiler::{Compiler, JsonFile}; - const ADVANCED_BINARY_FILTER_FILE_NAME: &str = "seccomp_filter.bpf"; const JSON_DIR: &str = "../../resources/seccomp"; @@ -44,19 +39,7 @@ fn main() { // Also retrigger the build script on any seccompiler source code change. println!("cargo:rerun-if-changed={}", SECCOMPILER_SRC_DIR); - let input = std::fs::read_to_string(seccomp_json_path).expect("Correct input file"); - let filters: JsonFile = serde_json::from_str(&input).expect("Input read"); - - let arch = target_arch.as_str().try_into().expect("Target"); - let compiler = Compiler::new(arch); - - // transform the IR into a Map of BPFPrograms - let bpf_data: BTreeMap = compiler - .compile_blob(filters.0, false) - .expect("Successfull compilation"); - - // serialize the BPF programs & output them to a file let out_path = format!("{}/{}", out_dir, ADVANCED_BINARY_FILTER_FILE_NAME); - let output_file = File::create(out_path).expect("Create seccompiler output path"); - bincode::serialize_into(output_file, &bpf_data).expect("Seccompiler serialization"); + seccompiler::compile_bpf(&seccomp_json_path, &target_arch, &out_path, false) + .expect("Cannot compile seccomp filters"); } diff --git a/src/firecracker/examples/seccomp/jailer.rs b/src/firecracker/examples/seccomp/jailer.rs index f82e3f5e249..47f4a667749 100644 --- a/src/firecracker/examples/seccomp/jailer.rs +++ b/src/firecracker/examples/seccomp/jailer.rs @@ -5,7 +5,7 @@ use std::fs::File; use std::os::unix::process::CommandExt; use std::process::{Command, Stdio}; -use seccompiler::{apply_filter, deserialize_binary}; +use vmm::seccomp::{apply_filter, deserialize_binary}; fn main() { let args: Vec = args().collect(); diff --git a/src/firecracker/examples/seccomp/panic.rs b/src/firecracker/examples/seccomp/panic.rs index 7998552a4d1..315899872f4 100644 --- a/src/firecracker/examples/seccomp/panic.rs +++ b/src/firecracker/examples/seccomp/panic.rs @@ -3,7 +3,7 @@ use std::env::args; use std::fs::File; -use seccompiler::{apply_filter, deserialize_binary}; +use vmm::seccomp::{apply_filter, deserialize_binary}; fn main() { let args: Vec = args().collect(); diff --git a/src/firecracker/src/api_server/mod.rs b/src/firecracker/src/api_server/mod.rs index 6ac2955af8f..a2edce205cd 100644 --- a/src/firecracker/src/api_server/mod.rs +++ b/src/firecracker/src/api_server/mod.rs @@ -14,13 +14,13 @@ use std::sync::mpsc; pub use micro_http::{Body, HttpServer, Request, Response, ServerError, StatusCode, Version}; use parsed_request::{ParsedRequest, RequestAction}; -use seccompiler::BpfProgramRef; use serde_json::json; use utils::time::{get_time_us, ClockType}; use vmm::logger::{ debug, error, info, update_metric_with_elapsed_time, warn, ProcessTimeReporter, METRICS, }; use vmm::rpc_interface::{ApiRequest, ApiResponse, VmmAction}; +use vmm::seccomp::BpfProgramRef; use vmm::vmm_config::snapshot::SnapshotType; use vmm_sys_util::eventfd::EventFd; @@ -78,7 +78,7 @@ impl ApiServer { // Load seccomp filters on the API thread. // Execution panics if filters cannot be loaded, use --no-seccomp if skipping filters // altogether is the desired behaviour. - if let Err(err) = seccompiler::apply_filter(seccomp_filter) { + if let Err(err) = vmm::seccomp::apply_filter(seccomp_filter) { panic!( "Failed to set the requested seccomp filters on the API thread: {}", err @@ -208,7 +208,7 @@ mod tests { use vmm::builder::StartMicrovmError; use vmm::logger::StoreMetric; use vmm::rpc_interface::{VmmActionError, VmmData}; - use vmm::seccomp_filters::get_empty_filters; + use vmm::seccomp::get_empty_filters; use vmm::vmm_config::instance_info::InstanceInfo; use vmm::vmm_config::snapshot::CreateSnapshotParams; use vmm_sys_util::tempfile::TempFile; diff --git a/src/firecracker/src/api_server_adapter.rs b/src/firecracker/src/api_server_adapter.rs index ffc4732025d..776d03a4daa 100644 --- a/src/firecracker/src/api_server_adapter.rs +++ b/src/firecracker/src/api_server_adapter.rs @@ -8,13 +8,13 @@ use std::sync::{Arc, Mutex}; use std::thread; use event_manager::{EventOps, Events, MutEventSubscriber, SubscriberOps}; -use seccompiler::BpfThreadMap; use vmm::logger::{error, warn, ProcessTimeReporter}; use vmm::resources::VmResources; use vmm::rpc_interface::{ ApiRequest, ApiResponse, BuildMicrovmFromRequestsError, PrebootApiController, RuntimeApiController, VmmAction, }; +use vmm::seccomp::BpfThreadMap; use vmm::vmm_config::instance_info::InstanceInfo; use vmm::{EventManager, FcExitCode, Vmm}; use vmm_sys_util::epoll::EventSet; diff --git a/src/firecracker/src/main.rs b/src/firecracker/src/main.rs index 8fb5392afcf..de60c476be3 100644 --- a/src/firecracker/src/main.rs +++ b/src/firecracker/src/main.rs @@ -17,7 +17,6 @@ use std::{io, panic}; use api_server_adapter::ApiServerError; use event_manager::SubscriberOps; use seccomp::FilterError; -use seccompiler::BpfThreadMap; use utils::arg_parser::{ArgParser, Argument}; use utils::validators::validate_instance_id; use vmm::builder::StartMicrovmError; @@ -26,6 +25,7 @@ use vmm::logger::{ }; use vmm::persist::SNAPSHOT_VERSION; use vmm::resources::VmResources; +use vmm::seccomp::BpfThreadMap; use vmm::signal_handler::register_signal_handlers; use vmm::snapshot::{Snapshot, SnapshotError}; use vmm::vmm_config::instance_info::{InstanceInfo, VmState}; diff --git a/src/firecracker/src/seccomp.rs b/src/firecracker/src/seccomp.rs index 5794d6498a8..2c7b3ddecd8 100644 --- a/src/firecracker/src/seccomp.rs +++ b/src/firecracker/src/seccomp.rs @@ -5,8 +5,7 @@ use std::fs::File; use std::io::{BufReader, Read}; use std::path::Path; -use seccompiler::{deserialize_binary, BpfThreadMap, DeserializationError}; -use vmm::seccomp_filters::get_empty_filters; +use vmm::seccomp::{deserialize_binary, get_empty_filters, BpfThreadMap, DeserializationError}; const THREAD_CATEGORIES: [&str; 3] = ["vmm", "api", "vcpu"]; @@ -118,7 +117,7 @@ fn filter_thread_categories(map: BpfThreadMap) -> Result Deserialize<'de> for Comment { - fn deserialize(_deserializer: D) -> std::result::Result - where - D: Deserializer<'de>, - { - String::deserialize(_deserializer)?; - - Ok(Comment {}) - } -} - -/// Seccomp filter errors. -#[derive(Debug, PartialEq, thiserror::Error, displaydoc::Display)] -pub enum FilterError { - /// The seccomp rules vector is empty. - EmptyRulesVector, - /// The seccomp filter contains too many BPF instructions. - FilterTooLarge, - /// The seccomp rule contains an invalid argument number. - InvalidArgumentNumber, - /// {0} - Arch(TargetArchError), - /// Syscall {0} has conflicting rules. - ConflictingRules(i64), -} - -/// Supported target architectures. -#[allow(non_camel_case_types)] -#[derive(Debug, PartialEq, Clone, Copy)] -pub enum TargetArch { - /// x86_64 arch - x86_64, - /// aarch64 arch - aarch64, -} - -/// Errors related to target arch. -#[derive(Debug, PartialEq, thiserror::Error, displaydoc::Display)] -pub enum TargetArchError { - /// Invalid target arch string: {0} - InvalidString(String), -} - -impl TargetArch { - /// Get the arch audit value. - fn get_audit_value(self) -> u32 { - match self { - TargetArch::x86_64 => AUDIT_ARCH_X86_64, - TargetArch::aarch64 => AUDIT_ARCH_AARCH64, - } - } - - /// Get the string representation. - fn to_string(self) -> &'static str { - match self { - TargetArch::x86_64 => "x86_64", - TargetArch::aarch64 => "aarch64", - } - } -} - -impl TryInto for &str { - type Error = TargetArchError; - fn try_into(self) -> std::result::Result { - match self.to_lowercase().as_str() { - "x86_64" => Ok(TargetArch::x86_64), - "aarch64" => Ok(TargetArch::aarch64), - _ => Err(TargetArchError::InvalidString(self.to_string())), - } - } -} - -impl From for &str { - fn from(target_arch: TargetArch) -> Self { - target_arch.to_string() - } -} - -/// Comparison to perform when matching a condition. -#[derive(Clone, Debug, Deserialize, PartialEq)] -#[serde(rename_all = "snake_case")] -pub enum SeccompCmpOp { - /// Argument value is equal to the specified value. - Eq, - /// Argument value is greater than or equal to the specified value. - Ge, - /// Argument value is greater than specified value. - Gt, - /// Argument value is less than or equal to the specified value. - Le, - /// Argument value is less than specified value. - Lt, - /// Masked bits of argument value are equal to masked bits of specified value. - MaskedEq(u64), - /// Argument value is not equal to specified value. - Ne, -} - -/// Seccomp argument value length. -#[derive(Clone, Debug, Deserialize, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum SeccompCmpArgLen { - /// Argument value length is 4 bytes. - Dword, - /// Argument value length is 8 bytes. - Qword, -} - -/// Condition that syscall must match in order to satisfy a rule. -#[derive(Clone, Debug, PartialEq, Deserialize)] -#[serde(deny_unknown_fields)] -pub struct SeccompCondition { - /// Index of the argument that is to be compared. - #[serde(rename = "index")] - arg_number: u8, - /// Length of the argument value that is to be compared. - #[serde(rename = "type")] - arg_len: SeccompCmpArgLen, - /// Comparison to perform. - #[serde(rename = "op")] - operator: SeccompCmpOp, - /// The value that will be compared with the argument value. - #[serde(rename = "val")] - value: u64, - /// Optional empty value, represents a `comment` property in the JSON file. - comment: Option, -} - -/// Actions that `seccomp` can apply to process calling a syscall. -#[derive(Clone, Debug, PartialEq, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum SeccompAction { - /// Allows syscall. - Allow, - /// Returns from syscall with specified error number. - Errno(u32), - /// Kills calling thread. - KillThread, - /// Kills calling process. - KillProcess, - /// Same as allow but logs call. - Log, - /// Notifies tracing process of the caller with respective number. - Trace(u32), - /// Sends `SIGSYS` to the calling process. - Trap, -} - -/// Rule that `seccomp` attempts to match for a syscall. -/// -/// If all conditions match then rule gets matched. -/// The action of the first rule that matches will be applied to the calling process. -/// If no rule matches the default action is applied. -#[derive(Clone, Debug, PartialEq)] -pub struct SeccompRule { - /// Conditions of rule that need to match in order for the rule to get matched. - conditions: Vec, - /// Action applied to calling process if rule gets matched. - action: SeccompAction, -} - -/// Type that associates the syscall number to its SeccompRules. -pub type SeccompRuleMap = BTreeMap>; - -/// Filter containing rules assigned to syscall numbers. -#[derive(Clone, Debug, PartialEq)] -pub struct SeccompFilter { - /// Map of syscall numbers and corresponding rule chains. - rules: SeccompRuleMap, - /// Default action to apply to syscall numbers that do not exist in the hash map. - default_action: SeccompAction, - /// Target architecture of the generated BPF filter. - target_arch: TargetArch, -} - -impl SeccompCondition { - /// Validates the SeccompCondition data - pub fn validate(&self) -> Result<(), FilterError> { - // Checks that the given argument number is valid. - if self.arg_number > ARG_NUMBER_MAX { - return Err(FilterError::InvalidArgumentNumber); - } - - Ok(()) - } - - /// Splits the [`SeccompCondition`] into 32 bit chunks and offsets. - /// - /// Returns most significant half, least significant half of the `value` field of - /// [`SeccompCondition`], as well as the offsets of the most significant and least significant - /// half of the argument specified by `arg_number` relative to `struct seccomp_data` passed to - /// the BPF program by the kernel. - /// - /// [`SeccompCondition`]: struct.SeccompCondition.html - fn value_segments(&self) -> (u32, u32, u8, u8) { - // Splits the specified value into its most significant and least significant halves. - let (msb, lsb) = ((self.value >> 32) as u32, (self.value & 0xFFFFFFFF) as u32); - - // Offset to the argument specified by `arg_number`. - // Cannot overflow because the value will be at most 16 + 6 * 8 = 64. - let arg_offset = SECCOMP_DATA_ARGS_OFFSET + self.arg_number * SECCOMP_DATA_ARG_SIZE; - - // Extracts offsets of most significant and least significant halves of argument. - // Addition cannot overflow because it's at most `arg_offset` + 4 = 68. - let (msb_offset, lsb_offset) = { (arg_offset + SECCOMP_DATA_ARG_SIZE / 2, arg_offset) }; - - (msb, lsb, msb_offset, lsb_offset) - } - - /// Translates the `eq` (equal) condition into BPF statements. - /// - /// # Arguments - /// - /// * `offset` - The given jump offset to the start of the next rule. - /// - /// The jump is performed if the condition fails and thus the current rule does not match so - /// `seccomp` tries to match the next rule by jumping out of the current rule. - /// - /// In case the condition is part of the last rule, the jump offset is to the default action of - /// respective filter. - /// - /// The most significant and least significant halves of the argument value are compared - /// separately since the BPF operand and accumulator are 4 bytes whereas an argument value is 8. - fn into_eq_bpf(self, offset: u8) -> Vec { - let (msb, lsb, msb_offset, lsb_offset) = self.value_segments(); - - let mut bpf = match self.arg_len { - SeccompCmpArgLen::Dword => vec![], - SeccompCmpArgLen::Qword => vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(msb_offset)), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, msb, 0, offset + 2), - ], - }; - - bpf.append(&mut vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(lsb_offset)), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, lsb, 0, offset), - ]); - bpf - } - - /// Translates the `ge` (greater than or equal) condition into BPF statements. - /// - /// # Arguments - /// - /// * `offset` - The given jump offset to the start of the next rule. - fn into_ge_bpf(self, offset: u8) -> Vec { - let (msb, lsb, msb_offset, lsb_offset) = self.value_segments(); - - let mut bpf = match self.arg_len { - SeccompCmpArgLen::Dword => vec![], - SeccompCmpArgLen::Qword => vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(msb_offset)), - BPF_JUMP(BPF_JMP + BPF_JGT + BPF_K, msb, 3, 0), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, msb, 0, offset + 2), - ], - }; - - bpf.append(&mut vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(lsb_offset)), - BPF_JUMP(BPF_JMP + BPF_JGE + BPF_K, lsb, 0, offset), - ]); - bpf - } - - /// Translates the `gt` (greater than) condition into BPF statements. - /// - /// # Arguments - /// - /// * `offset` - The given jump offset to the start of the next rule. - fn into_gt_bpf(self, offset: u8) -> Vec { - let (msb, lsb, msb_offset, lsb_offset) = self.value_segments(); - - let mut bpf = match self.arg_len { - SeccompCmpArgLen::Dword => vec![], - SeccompCmpArgLen::Qword => vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(msb_offset)), - BPF_JUMP(BPF_JMP + BPF_JGT + BPF_K, msb, 3, 0), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, msb, 0, offset + 2), - ], - }; - - bpf.append(&mut vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(lsb_offset)), - BPF_JUMP(BPF_JMP + BPF_JGT + BPF_K, lsb, 0, offset), - ]); - bpf - } - - /// Translates the `le` (less than or equal) condition into BPF statements. - /// - /// # Arguments - /// - /// * `offset` - The given jump offset to the start of the next rule. - fn into_le_bpf(self, offset: u8) -> Vec { - let (msb, lsb, msb_offset, lsb_offset) = self.value_segments(); - - let mut bpf = match self.arg_len { - SeccompCmpArgLen::Dword => vec![], - SeccompCmpArgLen::Qword => vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(msb_offset)), - BPF_JUMP(BPF_JMP + BPF_JGT + BPF_K, msb, offset + 3, 0), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, msb, 0, 2), - ], - }; - - bpf.append(&mut vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(lsb_offset)), - BPF_JUMP(BPF_JMP + BPF_JGT + BPF_K, lsb, offset, 0), - ]); - bpf - } - - /// Translates the `lt` (less than) condition into BPF statements. - /// - /// # Arguments - /// - /// * `offset` - The given jump offset to the start of the next rule. - fn into_lt_bpf(self, offset: u8) -> Vec { - let (msb, lsb, msb_offset, lsb_offset) = self.value_segments(); - - let mut bpf = match self.arg_len { - SeccompCmpArgLen::Dword => vec![], - SeccompCmpArgLen::Qword => vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(msb_offset)), - BPF_JUMP(BPF_JMP + BPF_JGT + BPF_K, msb, offset + 3, 0), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, msb, 0, 2), - ], - }; - - bpf.append(&mut vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(lsb_offset)), - BPF_JUMP(BPF_JMP + BPF_JGE + BPF_K, lsb, offset, 0), - ]); - bpf - } - - /// Translates the `masked_eq` (masked equal) condition into BPF statements. - /// - /// The `masked_eq` condition is `true` if the result of logical `AND` between the given value - /// and the mask is the value being compared against. - /// - /// # Arguments - /// - /// * `offset` - The given jump offset to the start of the next rule. - fn into_masked_eq_bpf(self, offset: u8, mask: u64) -> Vec { - let (_, _, msb_offset, lsb_offset) = self.value_segments(); - let masked_value = self.value & mask; - let (msb, lsb) = ( - (masked_value >> 32) as u32, - (masked_value & 0xFFFFFFFF) as u32, - ); - let (mask_msb, mask_lsb) = ((mask >> 32) as u32, (mask & 0xFFFFFFFF) as u32); - - let mut bpf = match self.arg_len { - SeccompCmpArgLen::Dword => vec![], - SeccompCmpArgLen::Qword => vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(msb_offset)), - BPF_STMT(BPF_ALU + BPF_AND + BPF_K, mask_msb), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, msb, 0, offset + 3), - ], - }; - - bpf.append(&mut vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(lsb_offset)), - BPF_STMT(BPF_ALU + BPF_AND + BPF_K, mask_lsb), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, lsb, 0, offset), - ]); - bpf - } - - /// Translates the `ne` (not equal) condition into BPF statements. - /// - /// # Arguments - /// - /// * `offset` - The given jump offset to the start of the next rule. - fn into_ne_bpf(self, offset: u8) -> Vec { - let (msb, lsb, msb_offset, lsb_offset) = self.value_segments(); - - let mut bpf = match self.arg_len { - SeccompCmpArgLen::Dword => vec![], - SeccompCmpArgLen::Qword => vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(msb_offset)), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, msb, 0, 2), - ], - }; - - bpf.append(&mut vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(lsb_offset)), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, lsb, offset, 0), - ]); - bpf - } - - /// Translates the [`SeccompCondition`] into BPF statements. - /// - /// # Arguments - /// - /// * `offset` - The given jump offset to the start of the next rule. - /// - /// [`SeccompCondition`]: struct.SeccompCondition.html - fn into_bpf(self, offset: u8) -> Vec { - let result = match self.operator { - SeccompCmpOp::Eq => self.into_eq_bpf(offset), - SeccompCmpOp::Ge => self.into_ge_bpf(offset), - SeccompCmpOp::Gt => self.into_gt_bpf(offset), - SeccompCmpOp::Le => self.into_le_bpf(offset), - SeccompCmpOp::Lt => self.into_lt_bpf(offset), - SeccompCmpOp::MaskedEq(mask) => self.into_masked_eq_bpf(offset, mask), - SeccompCmpOp::Ne => self.into_ne_bpf(offset), - }; - - // Verifies that the `CONDITION_MAX_LEN` constant was properly updated. - assert!(result.len() <= CONDITION_MAX_LEN as usize); - - result - } -} - -impl From for u32 { - /// Return codes of the BPF program for each action. - /// - /// # Arguments - /// - /// * `action` - The [`SeccompAction`] that the kernel will take. - /// - /// [`SeccompAction`]: struct.SeccompAction.html - fn from(action: SeccompAction) -> Self { - match action { - SeccompAction::Allow => SECCOMP_RET_ALLOW, - SeccompAction::Errno(x) => SECCOMP_RET_ERRNO | (x & SECCOMP_RET_MASK), - SeccompAction::KillThread => SECCOMP_RET_KILL_THREAD, - SeccompAction::KillProcess => SECCOMP_RET_KILL_PROCESS, - SeccompAction::Log => SECCOMP_RET_LOG, - SeccompAction::Trace(x) => SECCOMP_RET_TRACE | (x & SECCOMP_RET_MASK), - SeccompAction::Trap => SECCOMP_RET_TRAP, - } - } -} - -impl SeccompRule { - /// Creates a new rule. Rules with 0 conditions always match. - /// - /// # Arguments - /// - /// * `conditions` - Vector of [`SeccompCondition`] that the syscall must match. - /// * `action` - Action taken if the syscall matches the conditions. See [`SeccompAction`]. - /// - /// [`SeccompCondition`]: struct.SeccompCondition.html - /// [`SeccompAction`]: struct.SeccompAction.html - pub fn new(conditions: Vec, action: SeccompAction) -> Self { - Self { conditions, action } - } - - /// Appends a condition of the rule to an accumulator. - /// - /// The length of the rule and offset to the next rule are updated. - /// - /// # Arguments - /// - /// * `condition` - The condition added to the rule. - /// * `accumulator` - Accumulator of BPF statements that compose the BPF program. - /// * `rule_len` - Number of conditions in the rule. - /// * `offset` - Offset (in number of BPF statements) to the next rule. - fn append_condition( - condition: SeccompCondition, - accumulator: &mut Vec>, - rule_len: &mut usize, - offset: &mut u8, - ) { - // Tries to detect whether prepending the current condition will produce an unjumpable - // offset (since BPF jumps are a maximum of 255 instructions, which is u8::MAX). - if offset.checked_add(CONDITION_MAX_LEN + 1).is_none() { - // If that is the case, three additional helper jumps are prepended and the offset - // is reset to 1. - // - // - The first jump continues the evaluation of the condition chain by jumping to the - // next condition or the action of the rule if the last condition was matched. - // - The second, jumps out of the rule, to the next rule or the default action of the - // filter in case of the last rule in the rule chain of a syscall. - // - The third jumps out of the rule chain of the syscall, to the rule chain of the next - // syscall number to be checked or the default action of the filter in the case of the - // last rule chain. - let helper_jumps = vec![ - BPF_STMT(BPF_JMP + BPF_JA, 2), - BPF_STMT(BPF_JMP + BPF_JA, u32::from(*offset) + 1), - BPF_STMT(BPF_JMP + BPF_JA, u32::from(*offset) + 1), - ]; - *rule_len += helper_jumps.len(); - accumulator.push(helper_jumps); - *offset = 1; - } - - let condition = condition.into_bpf(*offset); - *rule_len += condition.len(); - // Safe to unwrap since we checked that condition length is less than `CONDITION_MAX_LEN`. - *offset += u8::try_from(condition.len()).unwrap(); - accumulator.push(condition); - } -} - -impl From for BpfProgram { - /// Translates a rule into BPF statements. - /// - /// Each rule starts with 2 jump statements: - /// * The first jump enters the rule, attempting a match. - /// * The second jump points to the end of the rule chain for one syscall, into the rule chain - /// for the next syscall or the default action if the current syscall is the last one. It - /// essentially jumps out of the current rule chain. - fn from(rule: SeccompRule) -> Self { - // Rule is built backwards, last statement is the action of the rule. - // The offset to the next rule is 1. - let mut accumulator = - Vec::with_capacity(rule.conditions.len() * CONDITION_MAX_LEN as usize); - let mut rule_len = 1; - let mut offset = 1; - accumulator.push(vec![BPF_STMT(BPF_RET + BPF_K, u32::from(rule.action))]); - - // Conditions are translated into BPF statements and prepended to the rule. - rule.conditions.into_iter().for_each(|condition| { - SeccompRule::append_condition(condition, &mut accumulator, &mut rule_len, &mut offset) - }); - - // The two initial jump statements are prepended to the rule. - let rule_jumps = vec![ - BPF_STMT(BPF_JMP + BPF_JA, 1), - BPF_STMT(BPF_JMP + BPF_JA, u32::from(offset) + 1), - ]; - rule_len += rule_jumps.len(); - accumulator.push(rule_jumps); - - // Finally, builds the translated rule by consuming the accumulator. - let mut result = Vec::with_capacity(rule_len); - accumulator - .into_iter() - .rev() - .for_each(|mut instructions| result.append(&mut instructions)); - - result - } -} - -impl SeccompFilter { - /// Creates a new filter with a set of rules and a default action. - /// - /// # Arguments - /// - /// * `rules` - Map of syscall numbers and the rules that will be applied to each of them. - /// * `default_action` - Action taken for all syscalls that do not match any rule. - /// * `target_arch` - Target architecture of the generated BPF filter. - pub fn new( - rules: SeccompRuleMap, - default_action: SeccompAction, - target_arch: &str, - ) -> Result { - let instance = Self { - rules, - default_action, - target_arch: target_arch.try_into().map_err(FilterError::Arch)?, - }; - - instance.validate()?; - - Ok(instance) - } - - /// Performs semantic checks on the SeccompFilter. - fn validate(&self) -> Result<(), FilterError> { - for (syscall_number, syscall_rules) in self.rules.iter() { - // All inserted syscalls must have at least one rule, otherwise BPF code will break. - if syscall_rules.is_empty() { - return Err(FilterError::EmptyRulesVector); - } - - // Now check for conflicting rules. - // Match on the number of empty rules for the given syscall. - // An `empty rule` is a rule that doesn't have any argument checks. - match syscall_rules - .iter() - .filter(|rule| rule.conditions.is_empty()) - .count() - { - // If the syscall has an empty rule, it may only have that rule. - 1 if syscall_rules.len() > 1 => { - return Err(FilterError::ConflictingRules(*syscall_number)); - } - // This syscall only has the one rule, so is valid. - 1 if syscall_rules.len() <= 1 => {} - // The syscall has no empty rules. - 0 => {} - // For a greater than 1 number of empty rules, error out. - _ => { - return Err(FilterError::ConflictingRules(*syscall_number)); - } - } - } - - Ok(()) - } - - /// Appends a chain of rules to an accumulator, updating the length of the filter. - /// - /// # Arguments - /// - /// * `syscall_number` - The syscall to which the rules apply. - /// * `chain` - The chain of rules for the specified syscall. - /// * `default_action` - The action to be taken in none of the rules apply. - /// * `accumulator` - The expanding BPF program. - /// * `filter_len` - The size (in number of BPF statements) of the BPF program. This is limited - /// to 4096. If the limit is exceeded, the filter is invalidated. - fn append_syscall_chain( - syscall_number: i64, - chain: Vec, - default_action: u32, - accumulator: &mut Vec>, - filter_len: &mut usize, - ) -> Result<(), FilterError> { - // The rules of the chain are translated into BPF statements. - let chain: Vec<_> = chain.into_iter().map(SeccompRule::into).collect(); - let chain_len: usize = chain.iter().map(std::vec::Vec::len).sum(); - - // The chain starts with a comparison checking the loaded syscall number against the - // syscall number of the chain. - let mut built_syscall = Vec::with_capacity(1 + chain_len + 1); - built_syscall.push(BPF_JUMP( - BPF_JMP + BPF_JEQ + BPF_K, - u32::try_from(syscall_number).unwrap(), - 0, - 1, - )); - - // The rules of the chain are appended. - chain - .into_iter() - .for_each(|mut rule| built_syscall.append(&mut rule)); - - // The default action is appended, if the syscall number comparison matched and then all - // rules fail to match, the default action is reached. - built_syscall.push(BPF_STMT(BPF_RET + BPF_K, default_action)); - - // The chain is appended to the result. - *filter_len += built_syscall.len(); - accumulator.push(built_syscall); - - // BPF programs are limited to 4096 statements. - if *filter_len >= usize::from(BPF_MAX_LEN) { - return Err(FilterError::FilterTooLarge); - } - - Ok(()) - } -} - -impl TryInto for SeccompFilter { - type Error = FilterError; - fn try_into(self) -> Result { - // Initialize the result with the precursory architecture check. - let mut result = VALIDATE_ARCHITECTURE(self.target_arch); - - // If no rules are set up, the filter will always return the default action, - // so let's short-circuit the function. - if self.rules.is_empty() { - result.extend(vec![BPF_STMT( - BPF_RET + BPF_K, - u32::from(self.default_action), - )]); - - return Ok(result); - } - - // The called syscall number is loaded. - let mut accumulator = Vec::with_capacity(1); - let mut filter_len = 1; - accumulator.push(EXAMINE_SYSCALL()); - - // Orders syscalls by priority, the highest number represents the highest priority. - let mut iter = self.rules.into_iter(); - - // For each syscall adds its rule chain to the filter. - let default_action = u32::from(self.default_action); - iter.try_for_each(|(syscall_number, chain)| { - SeccompFilter::append_syscall_chain( - syscall_number, - chain, - default_action, - &mut accumulator, - &mut filter_len, - ) - })?; - - // The default action is once again appended, it is reached if all syscall number - // comparisons fail. - filter_len += 1; - accumulator.push(vec![BPF_STMT(BPF_RET + BPF_K, default_action)]); - - // Finally, builds the translated filter by consuming the accumulator. - result.reserve(filter_len); - accumulator - .into_iter() - .for_each(|mut instructions| result.append(&mut instructions)); - - if result.len() >= usize::from(BPF_MAX_LEN) { - return Err(FilterError::FilterTooLarge); - } - - Ok(result) - } -} - -/// Builds a `jump` BPF instruction. -/// -/// # Arguments -/// -/// * `code` - The operation code. -/// * `jt` - The jump offset in case the operation returns `true`. -/// * `jf` - The jump offset in case the operation returns `false`. -/// * `k` - The operand. -#[allow(non_snake_case)] -#[inline(always)] -fn BPF_JUMP(code: u16, k: u32, jt: u8, jf: u8) -> sock_filter { - sock_filter { code, jt, jf, k } -} - -/// Builds a "statement" BPF instruction. -/// -/// # Arguments -/// -/// * `code` - The operation code. -/// * `k` - The operand. -#[allow(non_snake_case)] -#[inline(always)] -fn BPF_STMT(code: u16, k: u32) -> sock_filter { - sock_filter { - code, - jt: 0, - jf: 0, - k, - } -} - -/// Builds a sequence of BPF instructions that validate the underlying architecture. -#[allow(non_snake_case)] -#[inline(always)] -fn VALIDATE_ARCHITECTURE(target_arch: TargetArch) -> Vec { - let audit_arch_value = target_arch.get_audit_value(); - vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, 4), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, audit_arch_value, 1, 0), - BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_KILL_PROCESS), - ] -} - -/// Builds a sequence of BPF instructions that are followed by syscall examination. -#[allow(non_snake_case)] -#[inline(always)] -fn EXAMINE_SYSCALL() -> Vec { - vec![BPF_STMT( - BPF_LD + BPF_W + BPF_ABS, - u32::from(SECCOMP_DATA_NR_OFFSET), - )] -} - -#[cfg(test)] -mod tests { - #![allow(clippy::undocumented_unsafe_blocks)] - use std::env::consts::ARCH; - use std::thread; - - use super::SeccompCmpOp::*; - use super::{SeccompCmpArgLen as ArgLen, SeccompCondition as Cond, *}; - - // BPF structure definition for filter array. - // See /usr/include/linux/filter.h . - #[repr(C)] - struct sock_fprog { - pub len: ::std::os::raw::c_ushort, - pub filter: *const sock_filter, - } - - // Builds the (syscall, rules) tuple for allowing a syscall with certain arguments. - fn allow_syscall_if(syscall_number: i64, rules: Vec) -> (i64, Vec) { - (syscall_number, rules) - } - - impl SeccompCondition { - /// Creates a new `SeccompCondition`. - pub fn new( - arg_number: u8, - arg_len: SeccompCmpArgLen, - operator: SeccompCmpOp, - value: u64, - ) -> Result { - let instance = Self { - arg_number, - arg_len, - operator, - value, - comment: None, - }; - - instance.validate().map(|_| Ok(instance))? - } - } - - // The type of the `req` parameter is different for the `musl` library. This will enable - // successful build for other non-musl libraries. - #[cfg(target_env = "musl")] - type IoctlRequest = i32; - #[cfg(not(target_env = "musl"))] - type IoctlRequest = u64; - - // We use KVM_GET_PIT2 as the second parameter for ioctl syscalls in some unit tests - // because it's a corner case. More details - // [here](https://github.com/firecracker-microvm/firecracker/issues/1206) - const KVM_GET_PIT2: u64 = 0x8070_ae9f; - const KVM_GET_PIT2_MSB: u64 = 0x0000_ae9f; - const KVM_GET_PIT2_LSB: u64 = 0x8070_0000; - - const EXTRA_SYSCALLS: [i64; 6] = [ - libc::SYS_rt_sigprocmask, - libc::SYS_sigaltstack, - libc::SYS_munmap, - libc::SYS_exit, - libc::SYS_rt_sigreturn, - libc::SYS_futex, - ]; - - fn install_filter(bpf_filter: BpfProgram) { - unsafe { - { - let rc = libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); - assert_eq!(rc, 0); - } - let bpf_prog = sock_fprog { - len: u16::try_from(bpf_filter.len()).unwrap(), - filter: bpf_filter.as_ptr(), - }; - let bpf_prog_ptr = &bpf_prog as *const sock_fprog; - { - let rc = libc::prctl( - libc::PR_SET_SECCOMP, - libc::SECCOMP_MODE_FILTER, - bpf_prog_ptr, - ); - assert_eq!(rc, 0); - } - } - } - - fn validate_seccomp_filter( - rules: Vec<(i64, Vec)>, - validation_fn: fn(), - should_fail: bool, - ) { - let failure_code: i32 = 1000; - - let mut rule_map: SeccompRuleMap = rules.into_iter().collect(); - - for syscall in EXTRA_SYSCALLS.iter() { - rule_map - .entry(*syscall) - .or_default() - .append(&mut vec![SeccompRule::new(vec![], SeccompAction::Allow)]); - } - - // Build seccomp filter. - let filter = SeccompFilter::new( - rule_map, - SeccompAction::Errno(u32::try_from(failure_code).unwrap()), - ARCH, - ) - .unwrap(); - - // We need to run the validation inside another thread in order to avoid setting - // the seccomp filter for the entire unit tests process. - let errno = thread::spawn(move || { - // Install the filter. - install_filter(filter.try_into().unwrap()); - - // Call the validation fn. - validation_fn(); - - // Return errno. - std::io::Error::last_os_error().raw_os_error().unwrap() - }) - .join() - .unwrap(); - - // In case of a seccomp denial `errno` should be `failure_code` - if should_fail { - assert_eq!(errno, failure_code); - } else { - assert_ne!(errno, failure_code); - } - } - - #[test] - fn test_eq_operator() { - // check use cases for SeccompCmpArgLen::DWORD - let rules = vec![allow_syscall_if( - libc::SYS_ioctl, - vec![SeccompRule::new( - vec![Cond::new(1, SeccompCmpArgLen::Dword, Eq, KVM_GET_PIT2).unwrap()], - SeccompAction::Allow, - )], - )]; - // check syscalls that are supposed to work - validate_seccomp_filter( - rules.clone(), - || unsafe { - libc::ioctl(0, KVM_GET_PIT2 as IoctlRequest); - }, - false, - ); - // check syscalls that are not supposed to work - validate_seccomp_filter( - rules, - || unsafe { - libc::ioctl(0, 0); - }, - true, - ); - - // check use cases for SeccompCmpArgLen::QWORD - let rules = vec![allow_syscall_if( - libc::SYS_ioctl, - vec![SeccompRule::new( - vec![Cond::new(2, SeccompCmpArgLen::Qword, Eq, u64::MAX).unwrap()], - SeccompAction::Allow, - )], - )]; - // check syscalls that are supposed to work - validate_seccomp_filter( - rules.clone(), - || unsafe { - libc::ioctl(0, 0, u64::MAX); - }, - false, - ); - // check syscalls that are not supposed to work - validate_seccomp_filter( - rules, - || unsafe { - libc::ioctl(0, 0, 0); - }, - true, - ); - } - - #[test] - fn test_ge_operator() { - // check use case for SeccompCmpArgLen::DWORD - let rules = vec![allow_syscall_if( - libc::SYS_ioctl, - vec![SeccompRule::new( - vec![Cond::new(1, SeccompCmpArgLen::Dword, Ge, KVM_GET_PIT2).unwrap()], - SeccompAction::Allow, - )], - )]; - // check syscalls that are supposed to work - validate_seccomp_filter( - rules.clone(), - || unsafe { - libc::ioctl(0, KVM_GET_PIT2 as IoctlRequest); - libc::ioctl(0, (KVM_GET_PIT2 + 1) as IoctlRequest); - }, - false, - ); - // check syscalls that are not supposed to work - validate_seccomp_filter( - rules, - || unsafe { - libc::ioctl(0, (KVM_GET_PIT2 - 1) as IoctlRequest); - }, - true, - ); - - // check use case for SeccompCmpArgLen::QWORD - let rules = vec![allow_syscall_if( - libc::SYS_ioctl, - vec![SeccompRule::new( - vec![Cond::new(2, SeccompCmpArgLen::Qword, Ge, u64::from(u32::MAX)).unwrap()], - SeccompAction::Allow, - )], - )]; - // check syscalls that are supposed to work - validate_seccomp_filter( - rules.clone(), - || unsafe { - libc::ioctl(0, 0, u64::from(u32::MAX)); - libc::ioctl(0, 0, u64::from(u32::MAX) + 1); - }, - false, - ); - // check syscalls that are not supposed to work - validate_seccomp_filter( - rules, - || unsafe { - libc::ioctl(0, 0, 1); - }, - true, - ); - } - - #[test] - fn test_gt_operator() { - // check use case for SeccompCmpArgLen::DWORD - let rules = vec![allow_syscall_if( - libc::SYS_ioctl, - vec![SeccompRule::new( - vec![Cond::new(1, SeccompCmpArgLen::Dword, Gt, KVM_GET_PIT2).unwrap()], - SeccompAction::Allow, - )], - )]; - // check syscalls that are supposed to work - validate_seccomp_filter( - rules.clone(), - || unsafe { - libc::ioctl(0, (KVM_GET_PIT2 + 1) as IoctlRequest); - }, - false, - ); - // check syscalls that are not supposed to work - validate_seccomp_filter( - rules, - || unsafe { - libc::ioctl(0, KVM_GET_PIT2 as IoctlRequest); - }, - true, - ); - - // check use case for SeccompCmpArgLen::QWORD - let rules = vec![allow_syscall_if( - libc::SYS_ioctl, - vec![SeccompRule::new( - vec![Cond::new(2, SeccompCmpArgLen::Qword, Gt, u64::from(u32::MAX) + 10).unwrap()], - SeccompAction::Allow, - )], - )]; - // check syscalls that are supposed to work - validate_seccomp_filter( - rules.clone(), - || unsafe { - libc::ioctl(0, 0, u64::from(u32::MAX) + 11); - }, - false, - ); - // check syscalls that are not supposed to work - validate_seccomp_filter( - rules, - || unsafe { - libc::ioctl(0, 0, u64::from(u32::MAX) + 10); - }, - true, - ); - } - - #[test] - fn test_le_operator() { - // check use case for SeccompCmpArgLen::DWORD - let rules = vec![allow_syscall_if( - libc::SYS_ioctl, - vec![SeccompRule::new( - vec![Cond::new(1, SeccompCmpArgLen::Dword, Le, KVM_GET_PIT2).unwrap()], - SeccompAction::Allow, - )], - )]; - // check syscalls that are supposed to work - validate_seccomp_filter( - rules.clone(), - || unsafe { - libc::ioctl(0, KVM_GET_PIT2 as IoctlRequest); - libc::ioctl(0, (KVM_GET_PIT2 - 1) as IoctlRequest); - }, - false, - ); - // check syscalls that are not supposed to work - validate_seccomp_filter( - rules, - || unsafe { - libc::ioctl(0, (KVM_GET_PIT2 + 1) as IoctlRequest); - }, - true, - ); - - // check use case for SeccompCmpArgLen::QWORD - let rules = vec![allow_syscall_if( - libc::SYS_ioctl, - vec![SeccompRule::new( - vec![Cond::new(2, SeccompCmpArgLen::Qword, Le, u64::from(u32::MAX) + 10).unwrap()], - SeccompAction::Allow, - )], - )]; - // check syscalls that are supposed to work - validate_seccomp_filter( - rules.clone(), - || unsafe { - libc::ioctl(0, 0, u64::from(u32::MAX) + 10); - libc::ioctl(0, 0, u64::from(u32::MAX) + 9); - }, - false, - ); - // check syscalls that are not supposed to work - validate_seccomp_filter( - rules, - || unsafe { - libc::ioctl(0, 0, u64::from(u32::MAX) + 11); - }, - true, - ); - } - - #[test] - fn test_lt_operator() { - // check use case for SeccompCmpArgLen::DWORD - let rules = vec![allow_syscall_if( - libc::SYS_ioctl, - vec![SeccompRule::new( - vec![Cond::new(1, SeccompCmpArgLen::Dword, Lt, KVM_GET_PIT2).unwrap()], - SeccompAction::Allow, - )], - )]; - // check syscalls that are supposed to work - validate_seccomp_filter( - rules.clone(), - || unsafe { - libc::ioctl(0, (KVM_GET_PIT2 - 1) as IoctlRequest); - }, - false, - ); - // check syscalls that are not supposed to work - validate_seccomp_filter( - rules, - || unsafe { - libc::ioctl(0, KVM_GET_PIT2 as IoctlRequest); - }, - true, - ); - - // check use case for SeccompCmpArgLen::QWORD - let rules = vec![allow_syscall_if( - libc::SYS_ioctl, - vec![SeccompRule::new( - vec![Cond::new(2, SeccompCmpArgLen::Qword, Lt, u64::from(u32::MAX) + 10).unwrap()], - SeccompAction::Allow, - )], - )]; - // check syscalls that are supposed to work - validate_seccomp_filter( - rules.clone(), - || unsafe { - libc::ioctl(0, 0, u64::from(u32::MAX) + 9); - }, - false, - ); - // check syscalls that are not supposed to work - validate_seccomp_filter( - rules, - || unsafe { - libc::ioctl(0, 0, u64::from(u32::MAX) + 10); - }, - true, - ); - } - - #[test] - fn test_masked_eq_operator() { - // check use case for SeccompCmpArgLen::DWORD - let rules = vec![allow_syscall_if( - libc::SYS_ioctl, - vec![SeccompRule::new( - vec![Cond::new( - 1, - SeccompCmpArgLen::Dword, - MaskedEq(KVM_GET_PIT2_MSB), - KVM_GET_PIT2, - ) - .unwrap()], - SeccompAction::Allow, - )], - )]; - // check syscalls that are supposed to work - validate_seccomp_filter( - rules.clone(), - || unsafe { - libc::ioctl(0, KVM_GET_PIT2 as IoctlRequest); - libc::ioctl(0, KVM_GET_PIT2_MSB as IoctlRequest); - }, - false, - ); - // check syscalls that are not supposed to work - validate_seccomp_filter( - rules, - || unsafe { - libc::ioctl(0, KVM_GET_PIT2_LSB as IoctlRequest); - }, - true, - ); - - // check use case for SeccompCmpArgLen::QWORD - let rules = vec![allow_syscall_if( - libc::SYS_ioctl, - vec![SeccompRule::new( - vec![Cond::new( - 2, - SeccompCmpArgLen::Qword, - MaskedEq(u64::from(u32::MAX)), - u64::MAX, - ) - .unwrap()], - SeccompAction::Allow, - )], - )]; - // check syscalls that are supposed to work - validate_seccomp_filter( - rules.clone(), - || unsafe { - libc::ioctl(0, 0, u64::from(u32::MAX)); - libc::ioctl(0, 0, u64::MAX); - }, - false, - ); - // check syscalls that are not supposed to work - validate_seccomp_filter( - rules, - || unsafe { - libc::ioctl(0, 0, 0); - }, - true, - ); - } - - #[test] - fn test_ne_operator() { - // check use case for SeccompCmpArgLen::DWORD - let rules = vec![allow_syscall_if( - libc::SYS_ioctl, - vec![SeccompRule::new( - vec![Cond::new(1, SeccompCmpArgLen::Dword, Ne, KVM_GET_PIT2).unwrap()], - SeccompAction::Allow, - )], - )]; - // check syscalls that are supposed to work - validate_seccomp_filter( - rules.clone(), - || unsafe { - libc::ioctl(0, 0); - }, - false, - ); - // check syscalls that are not supposed to work - validate_seccomp_filter( - rules, - || unsafe { - libc::ioctl(0, KVM_GET_PIT2 as IoctlRequest); - }, - true, - ); - - // check use case for SeccompCmpArgLen::QWORD - let rules = vec![allow_syscall_if( - libc::SYS_ioctl, - vec![SeccompRule::new( - vec![Cond::new(2, SeccompCmpArgLen::Qword, Ne, u64::MAX).unwrap()], - SeccompAction::Allow, - )], - )]; - // check syscalls that are supposed to work - validate_seccomp_filter( - rules.clone(), - || unsafe { - libc::ioctl(0, 0, 0); - }, - false, - ); - // check syscalls that are not supposed to work - validate_seccomp_filter( - rules, - || unsafe { - libc::ioctl(0, 0, u64::MAX); - }, - true, - ); - } - - // Checks that rule gets translated correctly into BPF statements. - #[test] - fn test_rule_bpf_output() { - Cond::new(6, ArgLen::Qword, Eq, 1).unwrap_err(); - - // Builds rule. - let rule = SeccompRule::new( - vec![ - Cond::new(0, ArgLen::Dword, Eq, 1).unwrap(), - Cond::new(2, ArgLen::Qword, MaskedEq(0b1010), 14).unwrap(), - ], - SeccompAction::Allow, - ); - - let (msb_offset, lsb_offset) = { (4, 0) }; - - // Builds hardcoded BPF instructions. - let instructions = vec![ - BPF_STMT(0x05, 1), - BPF_STMT(0x05, 10), - BPF_STMT(0x20, 32 + msb_offset), - BPF_STMT(0x54, 0), - BPF_JUMP(0x15, 0, 0, 6), - BPF_STMT(0x20, 32 + lsb_offset), - BPF_STMT(0x54, 0b1010), - BPF_JUMP(0x15, 14 & 0b1010, 0, 3), - BPF_STMT(0x20, 16 + lsb_offset), - BPF_JUMP(0x15, 1, 0, 1), - BPF_STMT(0x06, 0x7fff_0000), - ]; - - // Compares translated rule with hardcoded BPF instructions. - let bpfprog: BpfProgram = rule.into(); - assert_eq!(bpfprog, instructions); - } - - // Checks that rule with too many conditions gets translated correctly into BPF statements - // using three helper jumps. - #[test] - fn test_rule_many_conditions_bpf_output() { - // Builds rule. - let mut conditions = Vec::with_capacity(43); - for _ in 0..42 { - conditions.push(Cond::new(0, ArgLen::Qword, MaskedEq(0), 0).unwrap()); - } - conditions.push(Cond::new(0, ArgLen::Qword, Eq, 0).unwrap()); - let rule = SeccompRule::new(conditions, SeccompAction::Allow); - - let (msb_offset, lsb_offset) = { (4, 0) }; - - // Builds hardcoded BPF instructions. - let mut instructions = vec![ - BPF_STMT(0x05, 1), - BPF_STMT(0x05, 6), - BPF_STMT(0x20, 16 + msb_offset), - BPF_JUMP(0x15, 0, 0, 3), - BPF_STMT(0x20, 16 + lsb_offset), - BPF_JUMP(0x15, 0, 0, 1), - BPF_STMT(0x05, 2), - BPF_STMT(0x05, 254), - BPF_STMT(0x05, 254), - ]; - let mut offset = 253; - for _ in 0..42 { - offset -= 6; - instructions.append(&mut vec![ - BPF_STMT(0x20, 16 + msb_offset), - BPF_STMT(0x54, 0), - BPF_JUMP(0x15, 0, 0, offset + 3), - BPF_STMT(0x20, 16 + lsb_offset), - BPF_STMT(0x54, 0), - BPF_JUMP(0x15, 0, 0, offset), - ]); - } - instructions.push(BPF_STMT(0x06, 0x7fff_0000)); - - // Compares translated rule with hardcoded BPF instructions. - let bpfprog: BpfProgram = rule.into(); - assert_eq!(bpfprog, instructions); - } - - fn create_test_bpf_filter(arg_len: ArgLen) -> SeccompFilter { - SeccompFilter::new( - vec![ - allow_syscall_if( - 1, - vec![ - SeccompRule::new( - vec![ - Cond::new(2, arg_len.clone(), Le, 14).unwrap(), - Cond::new(2, arg_len.clone(), Ne, 10).unwrap(), - ], - SeccompAction::Allow, - ), - SeccompRule::new( - vec![ - Cond::new(2, arg_len.clone(), Gt, 20).unwrap(), - Cond::new(2, arg_len.clone(), Lt, 30).unwrap(), - ], - SeccompAction::Allow, - ), - SeccompRule::new( - vec![Cond::new(2, arg_len.clone(), Ge, 42).unwrap()], - SeccompAction::Allow, - ), - ], - ), - allow_syscall_if( - 9, - vec![SeccompRule::new( - vec![Cond::new(1, arg_len, MaskedEq(0b100), 36).unwrap()], - SeccompAction::Allow, - )], - ), - ] - .into_iter() - .collect(), - SeccompAction::Trap, - ARCH, - ) - .unwrap() - } - - #[test] - fn test_filter_bpf_output_dword() { - // Compares translated filter with hardcoded BPF program. - { - let mut empty_rule_map = BTreeMap::new(); - empty_rule_map.insert(1, vec![]); - SeccompFilter::new(empty_rule_map, SeccompAction::Allow, ARCH).unwrap_err(); - } - - let filter = create_test_bpf_filter(ArgLen::Dword); - - let mut instructions = Vec::new(); - instructions.extend(VALIDATE_ARCHITECTURE(ARCH.try_into().unwrap())); - instructions.extend(vec![ - BPF_STMT(0x20, 0), - BPF_JUMP(0x15, 1, 0, 1), - BPF_STMT(0x05, 1), - BPF_STMT(0x05, 6), - BPF_STMT(0x20, 32), - BPF_JUMP(0x15, 10, 3, 0), - BPF_STMT(0x20, 32), - BPF_JUMP(0x25, 14, 1, 0), - BPF_STMT(0x06, 0x7fff_0000), - BPF_STMT(0x05, 1), - BPF_STMT(0x05, 6), - BPF_STMT(0x20, 32), - BPF_JUMP(0x35, 30, 3, 0), - BPF_STMT(0x20, 32), - BPF_JUMP(0x25, 20, 0, 1), - BPF_STMT(0x06, 0x7fff_0000), - BPF_STMT(0x05, 1), - BPF_STMT(0x05, 4), - BPF_STMT(0x20, 32), - BPF_JUMP(0x35, 42, 0, 1), - BPF_STMT(0x06, 0x7fff_0000), - BPF_STMT(0x06, 0x0003_0000), - BPF_JUMP(0x15, 9, 0, 1), - BPF_STMT(0x05, 1), - BPF_STMT(0x05, 5), - BPF_STMT(0x20, 24), - BPF_STMT(0x54, 0b100), - BPF_JUMP(0x15, 36 & 0b100, 0, 1), - BPF_STMT(0x06, 0x7fff_0000), - BPF_STMT(0x06, 0x0003_0000), - BPF_STMT(0x06, 0x0003_0000), - ]); - - let bpfprog: BpfProgram = filter.try_into().unwrap(); - assert_eq!(bpfprog, instructions); - } - - #[test] - fn test_filter_bpf_output_qword() { - // Compares translated filter with hardcoded BPF program. - { - let mut empty_rule_map = BTreeMap::new(); - empty_rule_map.insert(1, vec![]); - SeccompFilter::new(empty_rule_map, SeccompAction::Allow, ARCH).unwrap_err(); - } - - let filter = create_test_bpf_filter(ArgLen::Qword); - - let mut instructions = Vec::new(); - instructions.extend(VALIDATE_ARCHITECTURE(ARCH.try_into().unwrap())); - instructions.extend(vec![ - BPF_STMT(0x20, 0), - BPF_JUMP(0x15, 1, 0, 1), - BPF_STMT(0x05, 1), - BPF_STMT(0x05, 11), - BPF_STMT(0x20, 36), - BPF_JUMP(0x15, 0, 0, 2), - BPF_STMT(0x20, 32), - BPF_JUMP(0x15, 10, 6, 0), - BPF_STMT(0x20, 36), - BPF_JUMP(0x25, 0, 4, 0), - BPF_JUMP(0x15, 0, 0, 2), - BPF_STMT(0x20, 32), - BPF_JUMP(0x25, 14, 1, 0), - BPF_STMT(0x06, 0x7fff_0000), - BPF_STMT(0x05, 1), - BPF_STMT(0x05, 12), - BPF_STMT(0x20, 36), - BPF_JUMP(0x25, 0, 9, 0), - BPF_JUMP(0x15, 0, 0, 2), - BPF_STMT(0x20, 32), - BPF_JUMP(0x35, 30, 6, 0), - BPF_STMT(0x20, 36), - BPF_JUMP(0x25, 0, 3, 0), - BPF_JUMP(0x15, 0, 0, 3), - BPF_STMT(0x20, 32), - BPF_JUMP(0x25, 20, 0, 1), - BPF_STMT(0x06, 0x7fff_0000), - BPF_STMT(0x05, 1), - BPF_STMT(0x05, 7), - BPF_STMT(0x20, 36), - BPF_JUMP(0x25, 0, 3, 0), - BPF_JUMP(0x15, 0, 0, 3), - BPF_STMT(0x20, 32), - BPF_JUMP(0x35, 42, 0, 1), - BPF_STMT(0x06, 0x7fff_0000), - BPF_STMT(0x06, 0x0003_0000), - BPF_JUMP(0x15, 9, 0, 1), - BPF_STMT(0x05, 1), - BPF_STMT(0x05, 8), - BPF_STMT(0x20, 28), - BPF_STMT(0x54, 0), - BPF_JUMP(0x15, 0, 0, 4), - BPF_STMT(0x20, 24), - BPF_STMT(0x54, 0b100), - BPF_JUMP(0x15, 36 & 0b100, 0, 1), - BPF_STMT(0x06, 0x7fff_0000), - BPF_STMT(0x06, 0x0003_0000), - BPF_STMT(0x06, 0x0003_0000), - ]); - - let bpfprog: BpfProgram = filter.try_into().unwrap(); - assert_eq!(bpfprog, instructions); - } - - #[test] - fn test_bpf_expanding_functions() { - // Compares the output of the BPF instruction generating functions to hardcoded - // instructions. - assert_eq!( - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, 16), - sock_filter { - code: 0x20, - jt: 0, - jf: 0, - k: 16, - } - ); - assert_eq!( - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 10, 2, 5), - sock_filter { - code: 0x15, - jt: 2, - jf: 5, - k: 10, - } - ); - } - - #[test] - fn test_bpf_functions() { - { - let ret = VALIDATE_ARCHITECTURE(ARCH.try_into().unwrap()); - let instructions = vec![ - sock_filter { - code: 32, - jt: 0, - jf: 0, - k: 4, - }, - sock_filter { - code: 21, - jt: 1, - jf: 0, - #[cfg(target_arch = "x86_64")] - k: AUDIT_ARCH_X86_64, - #[cfg(target_arch = "aarch64")] - k: AUDIT_ARCH_AARCH64, - }, - sock_filter { - code: 6, - jt: 0, - jf: 0, - k: SECCOMP_RET_KILL_PROCESS, - }, - ]; - assert_eq!(ret, instructions); - } - - { - let ret = EXAMINE_SYSCALL(); - let instructions = vec![sock_filter { - code: 32, - jt: 0, - jf: 0, - k: 0, - }]; - assert_eq!(ret, instructions); - } - } - - #[test] - fn test_empty_filter() { - // An empty filter should always return the default action. - // For example, for an empty allowlist, it should always trap/kill, - // for an empty denylist, it should allow allow all system calls. - - let mut expected_program = Vec::new(); - expected_program.extend(VALIDATE_ARCHITECTURE(ARCH.try_into().unwrap())); - expected_program.extend(vec![BPF_STMT(0x06, 0x7fff_0000)]); - - let empty_rule_map = BTreeMap::new(); - let filter = SeccompFilter::new(empty_rule_map, SeccompAction::Allow, ARCH).unwrap(); - let prog: BpfProgram = filter.try_into().unwrap(); - - assert_eq!(expected_program, prog); - - // This should allow any system calls. - let pid = thread::spawn(move || { - // Install the filter. - install_filter(prog); - - unsafe { libc::getpid() } - }) - .join() - .unwrap(); - - // Check that the getpid syscall returned successfully. - assert!(pid > 0); - } - - #[test] - fn test_error_messages() { - assert_eq!( - format!("{}", FilterError::EmptyRulesVector), - "The seccomp rules vector is empty." - ); - assert_eq!( - format!("{}", FilterError::FilterTooLarge), - "The seccomp filter contains too many BPF instructions." - ); - assert_eq!( - format!("{}", FilterError::InvalidArgumentNumber), - "The seccomp rule contains an invalid argument number." - ); - assert_eq!( - format!( - "{}", - FilterError::Arch(TargetArchError::InvalidString("lala".to_string())) - ), - format!("{0}", TargetArchError::InvalidString("lala".to_string())) - ); - } - - #[test] - fn test_from_seccomp_action() { - assert_eq!(0x7fff_0000, u32::from(SeccompAction::Allow)); - assert_eq!(0x0005_002a, u32::from(SeccompAction::Errno(42))); - assert_eq!(0x0000_0000, u32::from(SeccompAction::KillThread)); - assert_eq!(0x8000_0000, u32::from(SeccompAction::KillProcess)); - assert_eq!(0x7ffc_0000, u32::from(SeccompAction::Log)); - assert_eq!(0x7ff0_002a, u32::from(SeccompAction::Trace(42))); - assert_eq!(0x0003_0000, u32::from(SeccompAction::Trap)); - } - - #[test] - fn test_validate_condition() { - // Invalid argument number - assert_eq!( - Cond::new(90, ArgLen::Dword, Eq, 65), - Err(FilterError::InvalidArgumentNumber) - ); - - // Valid argument number - Cond::new(0, ArgLen::Dword, Eq, 65).unwrap(); - } - - #[test] - fn test_seccomp_filter_validate() { - // Failure cases. - { - // Syscall has no rules. - assert_eq!( - SeccompFilter::new( - vec![(1, vec![]),].into_iter().collect(), - SeccompAction::Trap, - ARCH, - ) - .unwrap_err(), - FilterError::EmptyRulesVector - ); - // Syscall has multiple empty rules. - assert_eq!( - SeccompFilter::new( - vec![( - 1, - vec![ - SeccompRule::new(vec![], SeccompAction::Allow), - SeccompRule::new(vec![], SeccompAction::Allow) - ] - ),] - .into_iter() - .collect(), - SeccompAction::Trap, - ARCH, - ) - .unwrap_err(), - FilterError::ConflictingRules(1) - ); - - // Syscall has both empty rules condition-based rules. - assert_eq!( - SeccompFilter::new( - vec![( - 1, - vec![ - SeccompRule::new(vec![], SeccompAction::Allow), - SeccompRule::new( - vec![ - Cond::new(2, ArgLen::Dword, Le, 14).unwrap(), - Cond::new(1, ArgLen::Dword, Ne, 10).unwrap(), - ], - SeccompAction::Allow, - ), - ] - ),] - .into_iter() - .collect(), - SeccompAction::Trap, - ARCH, - ) - .unwrap_err(), - FilterError::ConflictingRules(1) - ); - } - } -} diff --git a/src/seccompiler/src/bin.rs b/src/seccompiler/src/bin.rs new file mode 100644 index 00000000000..8fb9d0fd511 --- /dev/null +++ b/src/seccompiler/src/bin.rs @@ -0,0 +1,40 @@ +// Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use clap::Parser; +use seccompiler::{compile_bpf, CompilationError}; + +const DEFAULT_OUTPUT_FILENAME: &str = "seccomp_binary_filter.out"; + +#[derive(Debug, Parser)] +#[command(version = format!("v{}", env!("CARGO_PKG_VERSION")))] +struct Cli { + #[arg( + short, + long, + help = "The computer architecture where the BPF program runs. Supported architectures: \ + x86_64, aarch64." + )] + target_arch: String, + #[arg(short, long, help = "File path of the JSON input.")] + input_file: String, + #[arg(short, long, help = "Optional path of the output file.", default_value = DEFAULT_OUTPUT_FILENAME)] + output_file: String, + #[arg( + short, + long, + help = "Deprecated! Transforms the filters into basic filters. Drops all argument checks \ + and rule-level actions. Not recommended." + )] + basic: bool, +} + +fn main() -> Result<(), CompilationError> { + let cli = Cli::parse(); + compile_bpf( + &cli.input_file, + &cli.target_arch, + &cli.output_file, + cli.basic, + ) +} diff --git a/src/seccompiler/src/bindings.rs b/src/seccompiler/src/bindings.rs new file mode 100644 index 00000000000..969ea91cd1c --- /dev/null +++ b/src/seccompiler/src/bindings.rs @@ -0,0 +1,171 @@ +// Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// Copyright 2021 Sony Group Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +//! Raw FFI bindings for libseccomp library + +use std::os::raw::*; + +pub const MINUS_EEXIST: i32 = -libc::EEXIST; + +/// Filter context/handle (`*mut`) +pub type scmp_filter_ctx = *mut c_void; +/// Filter context/handle (`*const`) +pub type const_scmp_filter_ctx = *const c_void; + +/// Comparison operators +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[repr(C)] +pub enum scmp_compare { + _SCMP_CMP_MIN = 0, + /// not equal + SCMP_CMP_NE = 1, + /// less than + SCMP_CMP_LT = 2, + /// less than or equal + SCMP_CMP_LE = 3, + /// equal + SCMP_CMP_EQ = 4, + /// greater than or equal + SCMP_CMP_GE = 5, + /// greater than + SCMP_CMP_GT = 6, + /// masked equality + SCMP_CMP_MASKED_EQ = 7, + _SCMP_CMP_MAX, +} + +/// Argument datum +pub type scmp_datum_t = u64; + +/// Argument / Value comparison definition +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[repr(C)] +pub struct scmp_arg_cmp { + /// argument number, starting at 0 + pub arg: c_uint, + /// the comparison op, e.g. `SCMP_CMP_*` + pub op: scmp_compare, + pub datum_a: scmp_datum_t, + pub datum_b: scmp_datum_t, +} + +pub const SCMP_ARCH_X86_64: u32 = 0xc000003e; +pub const SCMP_ARCH_AARCH64: u32 = 0xc00000b7; +/// Kill the process +pub const SCMP_ACT_KILL_PROCESS: u32 = 0x80000000; +/// Kill the thread +pub const SCMP_ACT_KILL_THREAD: u32 = 0x00000000; +/// Throw a `SIGSYS` signal +pub const SCMP_ACT_TRAP: u32 = 0x00030000; +/// Notifies userspace +pub const SCMP_ACT_ERRNO_MASK: u32 = 0x00050000; +/// Return the specified error code +#[must_use] +pub const fn SCMP_ACT_ERRNO(x: u16) -> u32 { + SCMP_ACT_ERRNO_MASK | x as u32 +} +pub const SCMP_ACT_TRACE_MASK: u32 = 0x7ff00000; +/// Notify a tracing process with the specified value +#[must_use] +pub const fn SCMP_ACT_TRACE(x: u16) -> u32 { + SCMP_ACT_TRACE_MASK | x as u32 +} +/// Allow the syscall to be executed after the action has been logged +pub const SCMP_ACT_LOG: u32 = 0x7ffc0000; +/// Allow the syscall to be executed +pub const SCMP_ACT_ALLOW: u32 = 0x7fff0000; + +#[link(name = "seccomp")] +unsafe extern "C" { + /// Initialize the filter state + /// + /// - `def_action`: the default filter action + /// + /// This function initializes the internal seccomp filter state and should + /// be called before any other functions in this library to ensure the filter + /// state is initialized. Returns a filter context on success, `ptr::null()` on failure. + pub safe fn seccomp_init(def_action: u32) -> scmp_filter_ctx; + + /// Adds an architecture to the filter + /// + /// - `ctx`: the filter context + /// - `arch_token`: the architecture token, e.g. `SCMP_ARCH_*` + /// + /// This function adds a new architecture to the given seccomp filter context. + /// Any new rules added after this function successfully returns will be added + /// to this architecture but existing rules will not be added to this + /// architecture. If the architecture token is [`SCMP_ARCH_NATIVE`] then the native + /// architecture will be assumed. Returns zero on success, `-libc::EEXIST` if + /// specified architecture is already present, other negative values on failure. + pub fn seccomp_arch_add(ctx: scmp_filter_ctx, arch_token: u32) -> c_int; + + /// Resolve a syscall name to a number + /// + /// - `name`: the syscall name + /// + /// Resolve the given syscall name to the syscall number. Returns the syscall + /// number on success, including negative pseudo syscall numbers (e.g. `__PNR_*`); + /// returns [`__NR_SCMP_ERROR`] on failure. + pub fn seccomp_syscall_resolve_name(name: *const c_char) -> c_int; + + /// Add a new rule to the filter + /// + /// - `ctx`: the filter context + /// - `action`: the filter action + /// - `syscall`: the syscall number + /// - `arg_cnt`: the number of argument filters in the argument filter chain + /// - `...`: [`scmp_arg_cmp`] structs + /// + /// This function adds a series of new argument/value checks to the seccomp + /// filter for the given syscall; multiple argument/value checks can be + /// specified and they will be chained together (AND'd together) in the filter. + /// If the specified rule needs to be adjusted due to architecture specifics it + /// will be adjusted without notification. Returns zero on success, negative + /// values on failure. + pub fn seccomp_rule_add( + ctx: scmp_filter_ctx, + action: u32, + syscall: c_int, + arg_cnt: c_uint, + ... + ) -> c_int; + + /// Add a new rule to the filter + /// + /// - `ctx`: the filter context + /// - `action`: the filter action + /// - `syscall`: the syscall number + /// - `arg_cnt`: the number of elements in the arg_array parameter + /// - `arg_array`: array of [`scmp_arg_cmp`] structs + /// + /// This function adds a series of new argument/value checks to the seccomp + /// filter for the given syscall; multiple argument/value checks can be + /// specified and they will be chained together (AND'd together) in the filter. + /// If the specified rule needs to be adjusted due to architecture specifics it + /// will be adjusted without notification. Returns zero on success, negative + /// values on failure. + pub fn seccomp_rule_add_array( + ctx: scmp_filter_ctx, + action: u32, + syscall: c_int, + arg_cnt: c_uint, + arg_array: *const scmp_arg_cmp, + ) -> c_int; + + /// Generate seccomp Berkeley Packet Filter (BPF) code and export it to a file + /// + /// - `ctx`: the filter context + /// - `fd`: the destination fd + /// + /// This function generates seccomp Berkeley Packer Filter (BPF) code and writes + /// it to the given fd. Returns zero on success, negative values on failure. + pub fn seccomp_export_bpf(ctx: const_scmp_filter_ctx, fd: c_int) -> c_int; +} + +/// Negative pseudo syscall number returned by some functions in case of an error +pub const __NR_SCMP_ERROR: c_int = -1; diff --git a/src/seccompiler/src/common.rs b/src/seccompiler/src/common.rs deleted file mode 100644 index 80ff96ad9f3..00000000000 --- a/src/seccompiler/src/common.rs +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -//! Module that defines common data structures used by both the library crate -//! and seccompiler-bin. - -use serde::{Deserialize, Serialize}; - -/// The maximum seccomp-BPF program length allowed by the linux kernel. -pub const BPF_MAX_LEN: u16 = 4096; - -/// BPF instruction structure definition. -/// See /usr/include/linux/filter.h . -#[repr(C)] -#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] -#[doc(hidden)] -pub struct sock_filter { - pub code: ::std::os::raw::c_ushort, - pub jt: ::std::os::raw::c_uchar, - pub jf: ::std::os::raw::c_uchar, - pub k: ::std::os::raw::c_uint, -} - -/// Program made up of a sequence of BPF instructions. -pub type BpfProgram = Vec; diff --git a/src/seccompiler/src/compiler.rs b/src/seccompiler/src/compiler.rs deleted file mode 100644 index 9194bc7e5bd..00000000000 --- a/src/seccompiler/src/compiler.rs +++ /dev/null @@ -1,540 +0,0 @@ -// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -//! Module defining the logic for compiling the deserialized filter objects into the IR. -//! Used by seccompiler-bin. -//! -//! Via the `Compiler::compile_blob()` method, it also drives the entire JSON -> BLOB -//! transformation process. -//! -//! It also defines some of the objects that a JSON seccomp filter is deserialized into: -//! [`Filter`](struct.Filter.html), -//! [`SyscallRule`](struct.SyscallRule.html). -// -//! The rest of objects are deserialized directly into the IR (intermediate representation): -//! [`SeccompCondition`](../backend/struct.SeccompCondition.html), -//! [`SeccompAction`](../backend/enum.SeccompAction.html), -//! [`SeccompCmpOp`](../backend/enum.SeccompCmpOp.html), -//! [`SeccompCmpArgLen`](../backend/enum.SeccompCmpArgLen.html). - -use std::collections::BTreeMap; -use std::convert::{Into, TryInto}; -use std::{fmt, result}; - -use serde::de::{self, Error as _, MapAccess, Visitor}; -use serde::Deserialize; - -use crate::backend::{ - Comment, FilterError, SeccompAction, SeccompCondition, SeccompFilter, SeccompRule, - SeccompRuleMap, TargetArch, -}; -use crate::common::BpfProgram; -use crate::syscall_table::SyscallTable; - -/// Errors compiling Filters into BPF. -#[derive(Debug, PartialEq, thiserror::Error, displaydoc::Display)] -pub enum CompilationError { - /// `filter_action` and `default_action` are equal. - IdenticalActions, - /// {0} - Filter(#[from] FilterError), - /// Invalid syscall name: {0} for given arch: {1:?}. - SyscallName(String, TargetArch), -} - -/// Deserializable object that represents the Json filter file. -#[derive(Debug)] -pub struct JsonFile(pub BTreeMap); - -// Implement a custom deserializer, that returns an error for duplicate thread keys. -impl<'de> Deserialize<'de> for JsonFile { - fn deserialize(deserializer: D) -> result::Result - where - D: de::Deserializer<'de>, - { - #[derive(Debug)] - struct JsonFileVisitor; - - impl<'d> Visitor<'d> for JsonFileVisitor { - type Value = BTreeMap; - - fn expecting(&self, f: &mut fmt::Formatter<'_>) -> result::Result<(), fmt::Error> { - f.write_str("a map of filters") - } - - fn visit_map(self, mut access: M) -> result::Result - where - M: MapAccess<'d>, - { - let mut values = Self::Value::new(); - - while let Some((key, value)) = access.next_entry()? { - if values.insert(key, value).is_some() { - return Err(M::Error::custom("duplicate filter key")); - }; - } - - Ok(values) - } - } - Ok(JsonFile(deserializer.deserialize_map(JsonFileVisitor)?)) - } -} - -/// Deserializable object representing a syscall rule. -#[derive(Debug, Deserialize, PartialEq, Clone)] -#[serde(deny_unknown_fields)] -pub struct SyscallRule { - /// Name of the syscall. - syscall: String, - /// Rule conditions. - #[serde(rename = "args")] - conditions: Option>, - /// Optional empty value, represents a `comment` property in the JSON file. - comment: Option, -} - -impl SyscallRule { - /// Perform semantic checks after deserialization. - fn validate(&self) -> Result<(), CompilationError> { - // Validate all `SeccompCondition`s. - if let Some(conditions) = self.conditions.as_ref() { - return conditions - .iter() - .filter_map(|cond| cond.validate().err()) - .next() - .map_or(Ok(()), |err| Err(CompilationError::Filter(err))); - } - - Ok(()) - } -} - -/// Deserializable seccomp filter. Refers to one thread category. -#[derive(Deserialize, PartialEq, Debug, Clone)] -#[serde(deny_unknown_fields)] -pub struct Filter { - /// Default action if no rules match. e.g. `Kill` for an AllowList. - default_action: SeccompAction, - /// Default action if a rule matches. e.g. `Allow` for an AllowList. - filter_action: SeccompAction, - /// The collection of `SyscallRule`s. - filter: Vec, -} - -impl Filter { - /// Perform semantic checks after deserialization. - fn validate(&self) -> Result<(), CompilationError> { - // Doesn't make sense to have equal default and on-match actions. - if self.default_action == self.filter_action { - return Err(CompilationError::IdenticalActions); - } - - // Validate all `SyscallRule`s. - self.filter - .iter() - .filter_map(|syscall_rule| syscall_rule.validate().err()) - .next() - .map_or(Ok(()), Err) - } -} - -/// Object responsible for compiling [`Filter`](struct.Filter.html)s into -/// [`BpfProgram`](../common/type.BpfProgram.html)s. -/// Uses the [`SeccompFilter`](../backend/struct.SeccompFilter.html) interface as an IR language. -#[derive(Debug)] -pub struct Compiler { - /// Target architecture. Can be different from the current `target_arch`. - arch: TargetArch, - /// Target-specific syscall table. - syscall_table: SyscallTable, -} - -impl Compiler { - /// Create a new `Compiler` instance, for the given target architecture. - pub fn new(arch: TargetArch) -> Self { - Self { - arch, - syscall_table: SyscallTable::new(arch), - } - } - - /// Perform semantic checks after deserialization. - fn validate_filters(&self, filters: &BTreeMap) -> Result<(), CompilationError> { - // Validate all `Filter`s. - filters - .iter() - .filter_map(|(_, filter)| filter.validate().err()) - .next() - .map_or(Ok(()), Err) - } - - /// Main compilation function. - pub fn compile_blob( - &self, - filters: BTreeMap, - is_basic: bool, - ) -> Result, CompilationError> { - self.validate_filters(&filters)?; - let mut bpf_map: BTreeMap = BTreeMap::new(); - - for (thread_name, filter) in filters.into_iter() { - if is_basic { - bpf_map.insert( - thread_name, - self.make_basic_seccomp_filter(filter)?.try_into()?, - ); - } else { - bpf_map.insert(thread_name, self.make_seccomp_filter(filter)?.try_into()?); - } - } - Ok(bpf_map) - } - - /// Transforms the deserialized `Filter` into a `SeccompFilter` (IR language). - fn make_seccomp_filter(&self, filter: Filter) -> Result { - let mut rule_map: SeccompRuleMap = SeccompRuleMap::new(); - let filter_action = &filter.filter_action; - - for syscall_rule in filter.filter { - let syscall_name = syscall_rule.syscall; - let action = filter_action.clone(); - let syscall_nr = self - .syscall_table - .get_syscall_nr(&syscall_name) - .ok_or_else(|| CompilationError::SyscallName(syscall_name.clone(), self.arch))?; - let rule_accumulator = rule_map.entry(syscall_nr).or_default(); - - match syscall_rule.conditions { - Some(conditions) => rule_accumulator.push(SeccompRule::new(conditions, action)), - None => rule_accumulator.push(SeccompRule::new(vec![], action)), - }; - } - - SeccompFilter::new(rule_map, filter.default_action, self.arch.into()) - .map_err(CompilationError::Filter) - } - - /// Transforms the deserialized `Filter` into a basic `SeccompFilter` (IR language). - /// This filter will drop any argument checks and any rule-level action. - /// All rules will trigger the filter-level `filter_action`. - fn make_basic_seccomp_filter(&self, filter: Filter) -> Result { - let mut rule_map: SeccompRuleMap = SeccompRuleMap::new(); - let filter_action = &filter.filter_action; - - for syscall_rule in filter.filter { - let syscall_name = syscall_rule.syscall; - // Basic filters bypass the rule-level action and use the filter_action. - let action = filter_action.clone(); - let syscall_nr = self - .syscall_table - .get_syscall_nr(&syscall_name) - .ok_or_else(|| CompilationError::SyscallName(syscall_name.clone(), self.arch))?; - - // If there is already an entry for this syscall, do nothing. - // Otherwise, insert an empty rule that triggers the filter_action. - rule_map - .entry(syscall_nr) - .or_insert_with(|| vec![SeccompRule::new(vec![], action)]); - } - - SeccompFilter::new(rule_map, filter.default_action, self.arch.into()) - .map_err(CompilationError::Filter) - } -} - -#[cfg(test)] -mod tests { - use std::collections::BTreeMap; - use std::convert::TryInto; - use std::env::consts::ARCH; - - use super::{CompilationError, Compiler, Filter, SyscallRule}; - use crate::backend::SeccompCmpArgLen::*; - use crate::backend::SeccompCmpOp::*; - use crate::backend::{ - FilterError, SeccompAction, SeccompCondition as Cond, SeccompFilter, SeccompRule, - TargetArch, - }; - - impl Filter { - fn new( - default_action: SeccompAction, - filter_action: SeccompAction, - filter: Vec, - ) -> Filter { - Filter { - default_action, - filter_action, - filter, - } - } - } - - impl SyscallRule { - fn new(syscall: String, conditions: Option>) -> SyscallRule { - SyscallRule { - syscall, - conditions, - comment: None, - } - } - } - - fn match_syscall(syscall_number: i64, action: SeccompAction) -> (i64, Vec) { - (syscall_number, vec![SeccompRule::new(vec![], action)]) - } - - fn match_syscall_if(syscall_number: i64, rules: Vec) -> (i64, Vec) { - (syscall_number, rules) - } - - #[test] - // Test the transformation of Filter objects into SeccompFilter objects. - // We test this private method because we are interested in seeing that the - // Filter -> SeccompFilter transformation is done correctly. - fn test_make_seccomp_filter() { - let compiler = Compiler::new(ARCH.try_into().unwrap()); - // Test a well-formed filter. Malformed filters are tested in test_compile_blob(). - let filter = Filter::new( - SeccompAction::Trap, - SeccompAction::Allow, - vec![ - SyscallRule::new("read".to_string(), None), - SyscallRule::new( - "futex".to_string(), - Some(vec![ - Cond::new(2, Dword, Le, 65).unwrap(), - Cond::new(1, Qword, Ne, 80).unwrap(), - ]), - ), - SyscallRule::new( - "futex".to_string(), - Some(vec![ - Cond::new(3, Qword, Gt, 65).unwrap(), - Cond::new(1, Qword, Lt, 80).unwrap(), - ]), - ), - SyscallRule::new( - "futex".to_string(), - Some(vec![Cond::new(3, Qword, Ge, 65).unwrap()]), - ), - SyscallRule::new( - "ioctl".to_string(), - Some(vec![Cond::new(3, Dword, MaskedEq(100), 65).unwrap()]), - ), - ], - ); - - // The expected IR. - let seccomp_filter = SeccompFilter::new( - vec![ - match_syscall( - compiler.syscall_table.get_syscall_nr("read").unwrap(), - SeccompAction::Allow, - ), - match_syscall_if( - compiler.syscall_table.get_syscall_nr("futex").unwrap(), - vec![ - SeccompRule::new( - vec![ - Cond::new(2, Dword, Le, 65).unwrap(), - Cond::new(1, Qword, Ne, 80).unwrap(), - ], - SeccompAction::Allow, - ), - SeccompRule::new( - vec![ - Cond::new(3, Qword, Gt, 65).unwrap(), - Cond::new(1, Qword, Lt, 80).unwrap(), - ], - SeccompAction::Allow, - ), - SeccompRule::new( - vec![Cond::new(3, Qword, Ge, 65).unwrap()], - SeccompAction::Allow, - ), - ], - ), - match_syscall_if( - compiler.syscall_table.get_syscall_nr("ioctl").unwrap(), - vec![SeccompRule::new( - vec![Cond::new(3, Dword, MaskedEq(100), 65).unwrap()], - SeccompAction::Allow, - )], - ), - ] - .into_iter() - .collect(), - SeccompAction::Trap, - ARCH, - ) - .unwrap(); - - assert_eq!( - compiler.make_seccomp_filter(filter).unwrap(), - seccomp_filter - ); - } - - #[test] - // Test the transformation of Filter objects into SeccompFilter objects. - // This `basic` alternative version of the make_seccomp_filter method drops argument checks. - fn test_make_basic_seccomp_filter() { - let compiler = Compiler::new(ARCH.try_into().unwrap()); - // Test a well-formed filter. Malformed filters are tested in test_compile_blob(). - let filter = Filter::new( - SeccompAction::Trap, - SeccompAction::Allow, - vec![ - SyscallRule::new("read".to_string(), None), - SyscallRule::new( - "futex".to_string(), - Some(vec![ - Cond::new(2, Dword, Le, 65).unwrap(), - Cond::new(1, Qword, Ne, 80).unwrap(), - ]), - ), - SyscallRule::new( - "futex".to_string(), - Some(vec![ - Cond::new(3, Qword, Gt, 65).unwrap(), - Cond::new(1, Qword, Lt, 80).unwrap(), - ]), - ), - SyscallRule::new( - "futex".to_string(), - Some(vec![Cond::new(3, Qword, Ge, 65).unwrap()]), - ), - SyscallRule::new( - "ioctl".to_string(), - Some(vec![Cond::new(3, Dword, MaskedEq(100), 65).unwrap()]), - ), - ], - ); - - // The expected IR. - let seccomp_filter = SeccompFilter::new( - vec![ - match_syscall( - compiler.syscall_table.get_syscall_nr("read").unwrap(), - SeccompAction::Allow, - ), - match_syscall( - compiler.syscall_table.get_syscall_nr("futex").unwrap(), - SeccompAction::Allow, - ), - match_syscall( - compiler.syscall_table.get_syscall_nr("ioctl").unwrap(), - SeccompAction::Allow, - ), - ] - .into_iter() - .collect(), - SeccompAction::Trap, - ARCH, - ) - .unwrap(); - - assert_eq!( - compiler.make_basic_seccomp_filter(filter).unwrap(), - seccomp_filter - ); - } - - #[test] - fn test_compile_blob() { - let compiler = Compiler::new(ARCH.try_into().unwrap()); - // Test with malformed filters. - - let mut wrong_syscall_name_filters = BTreeMap::new(); - wrong_syscall_name_filters.insert( - "T1".to_string(), - Filter::new( - SeccompAction::Trap, - SeccompAction::Allow, - vec![SyscallRule::new("wrong_syscall".to_string(), None)], - ), - ); - - assert_eq!( - compiler.compile_blob(wrong_syscall_name_filters, false), - Err(CompilationError::SyscallName( - "wrong_syscall".to_string(), - compiler.arch - )) - ); - - let mut identical_action_filters = BTreeMap::new(); - identical_action_filters.insert( - "T1".to_string(), - Filter::new(SeccompAction::Allow, SeccompAction::Allow, vec![]), - ); - - assert_eq!( - compiler.compile_blob(identical_action_filters, false), - Err(CompilationError::IdenticalActions) - ); - - // Test with correct filters. - let mut correct_filters = BTreeMap::new(); - correct_filters.insert( - "Thread1".to_string(), - Filter::new( - SeccompAction::Trap, - SeccompAction::Allow, - vec![ - SyscallRule::new("read".to_string(), None), - SyscallRule::new( - "futex".to_string(), - Some(vec![ - Cond::new(1, Dword, Eq, 65).unwrap(), - Cond::new(2, Qword, Le, 80).unwrap(), - ]), - ), - SyscallRule::new( - "futex".to_string(), - Some(vec![ - Cond::new(3, Dword, Eq, 65).unwrap(), - Cond::new(2, Qword, Le, 80).unwrap(), - ]), - ), - ], - ), - ); - - // We don't test the BPF compilation in this module. - // This is done in the seccomp/lib.rs module. - // Here, we only test the (Filter -> SeccompFilter) transformations. (High-level -> IR) - compiler - .compile_blob(correct_filters.clone(), false) - .unwrap(); - // Also test with basic filtering on. - compiler.compile_blob(correct_filters, true).unwrap(); - } - - #[test] - fn test_error_messages() { - assert_eq!( - format!("{}", CompilationError::IdenticalActions), - "`filter_action` and `default_action` are equal." - ); - assert_eq!( - format!( - "{}", - CompilationError::Filter(FilterError::InvalidArgumentNumber) - ), - "The seccomp rule contains an invalid argument number." - ); - assert_eq!( - format!( - "{}", - CompilationError::SyscallName("asdsad".to_string(), TargetArch::x86_64) - ), - format!( - "Invalid syscall name: {} for given arch: {}.", - "asdsad", "x86_64" - ) - ); - } -} diff --git a/src/seccompiler/src/lib.rs b/src/seccompiler/src/lib.rs index cc3e4756996..3fd62106275 100644 --- a/src/seccompiler/src/lib.rs +++ b/src/seccompiler/src/lib.rs @@ -1,270 +1,181 @@ -// Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 -#![warn(missing_docs)] - -//! The library crate that defines common helper functions that are generally used in -//! conjunction with seccompiler-bin. - -pub mod backend; -pub mod common; -pub mod compiler; -/// Syscall tables -pub mod syscall_table; - use std::collections::HashMap; -use std::fmt::Debug; -use std::io::Read; -use std::sync::Arc; +use std::fs::File; +use std::io::{Read, Seek}; +use std::os::fd::{AsRawFd, FromRawFd}; +use std::os::unix::fs::MetadataExt; +use std::str::FromStr; -use bincode::{DefaultOptions, Error as BincodeError, Options}; -use common::BPF_MAX_LEN; -// Re-export the data types needed for calling the helper functions. -pub use common::{sock_filter, BpfProgram}; +use bincode::Error as BincodeError; -/// Type that associates a thread category to a BPF program. -pub type BpfThreadMap = HashMap>; +mod bindings; +use bindings::*; -// BPF structure definition for filter array. -// See /usr/include/linux/filter.h . -#[repr(C)] -struct sock_fprog { - pub len: ::std::os::raw::c_ushort, - pub filter: *const sock_filter, -} - -/// Reference to program made up of a sequence of BPF instructions. -pub type BpfProgramRef<'a> = &'a [sock_filter]; +pub mod types; +pub use types::*; +use zerocopy::IntoBytes; -/// Binary filter deserialization errors. +/// Binary filter compilation errors. #[derive(Debug, thiserror::Error, displaydoc::Display)] -pub enum DeserializationError { - /// Bincode deserialization failed: {0} - Bincode(BincodeError), +pub enum CompilationError { + /// Cannot open input file: {0} + IntputOpen(std::io::Error), + /// Cannot read input file: {0} + InputRead(std::io::Error), + /// Cannot deserialize json: {0} + JsonDeserialize(serde_json::Error), + /// Cannot parse arch: {0} + ArchParse(String), + /// Cannot create libseccomp context + LibSeccompContext, + /// Cannot add libseccomp arch + LibSeccompArch, + /// Cannot add libseccomp syscall + LibSeccompSycall, + /// Cannot add libseccomp syscall rule + LibSeccompRule, + /// Cannot export libseccomp bpf + LibSeccompExport, + /// Cannot create memfd: {0} + MemfdCreate(std::io::Error), + /// Cannot rewind memfd: {0} + MemfdRewind(std::io::Error), + /// Cannot read from memfd: {0} + MemfdRead(std::io::Error), + /// Cannot create output file: {0} + OutputCreate(std::io::Error), + /// Cannot serialize bfp: {0} + BincodeSerialize(BincodeError), } -/// Filter installation errors. -#[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] -pub enum InstallationError { - /// Filter length exceeds the maximum size of {BPF_MAX_LEN:} instructions - FilterTooLarge, - /// prctl` syscall failed with error code: {0} - Prctl(i32), -} - -/// Deserialize a BPF file into a collection of usable BPF filters. -/// Has an optional `bytes_limit` that is passed to bincode to constrain the maximum amount of -/// memory that we can allocate while performing the deserialization. -/// It's recommended that the integrator of the library uses this to prevent memory allocations -/// DOS-es. -pub fn deserialize_binary( - reader: R, - bytes_limit: Option, -) -> std::result::Result { - let result = match bytes_limit { - // Also add the default options. These are not part of the `DefaultOptions` as per - // this issue: https://github.com/servo/bincode/issues/333 - Some(limit) => DefaultOptions::new() - .with_fixint_encoding() - .allow_trailing_bytes() - .with_limit(limit) - .deserialize_from::>(reader), - // No limit is the default. - None => bincode::deserialize_from::>(reader), - }; - - Ok(result - .map_err(DeserializationError::Bincode)? - .into_iter() - .map(|(k, v)| (k.to_lowercase(), Arc::new(v))) - .collect()) -} - -/// Helper function for installing a BPF filter. -pub fn apply_filter(bpf_filter: BpfProgramRef) -> std::result::Result<(), InstallationError> { - // If the program is empty, don't install the filter. - if bpf_filter.is_empty() { - return Ok(()); - } +pub fn compile_bpf( + input_path: &str, + arch: &str, + out_path: &str, + basic: bool, +) -> Result<(), CompilationError> { + let mut file_content = String::new(); + File::open(input_path) + .map_err(CompilationError::IntputOpen)? + .read_to_string(&mut file_content) + .map_err(CompilationError::InputRead)?; + let bpf_map_json: BpfJson = + serde_json::from_str(&file_content).map_err(CompilationError::JsonDeserialize)?; + + let arch = TargetArch::from_str(arch).map_err(CompilationError::ArchParse)?; - // If the program length is greater than the limit allowed by the kernel, - // fail quickly. Otherwise, `prctl` will give a more cryptic error code. - let bpf_filter_len = - u16::try_from(bpf_filter.len()).map_err(|_| InstallationError::FilterTooLarge)?; - if bpf_filter_len > BPF_MAX_LEN { - return Err(InstallationError::FilterTooLarge); + // SAFETY: Safe because the parameters are valid. + let memfd_fd = unsafe { libc::memfd_create(c"bpf".as_ptr().cast(), 0) }; + if memfd_fd < 0 { + return Err(CompilationError::MemfdCreate( + std::io::Error::last_os_error(), + )); } // SAFETY: Safe because the parameters are valid. - unsafe { - { - let rc = libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); - if rc != 0 { - return Err(InstallationError::Prctl(*libc::__errno_location())); + let mut memfd = unsafe { File::from_raw_fd(memfd_fd) }; + + let mut bpf_map: HashMap> = HashMap::new(); + for (name, filter) in bpf_map_json.0.iter() { + let default_action = filter.default_action.to_scmp_type(); + let filter_action = filter.filter_action.to_scmp_type(); + + // SAFETY: Safe as all args are correct. + let bpf_filter = { + let r = seccomp_init(default_action); + if r.is_null() { + return Err(CompilationError::LibSeccompContext); } - } - - let bpf_prog = sock_fprog { - len: bpf_filter_len, - filter: bpf_filter.as_ptr(), + r }; - let bpf_prog_ptr = &bpf_prog as *const sock_fprog; - { - let rc = libc::prctl( - libc::PR_SET_SECCOMP, - libc::SECCOMP_MODE_FILTER, - bpf_prog_ptr, - ); - if rc != 0 { - return Err(InstallationError::Prctl(*libc::__errno_location())); - } - } - } - - Ok(()) -} - -#[cfg(test)] -mod tests { - #![allow(clippy::undocumented_unsafe_blocks)] - - use std::collections::HashMap; - use std::sync::Arc; - use std::thread; - use super::*; - use crate::common::BpfProgram; - - #[test] - fn test_deserialize_binary() { - // Malformed bincode binary. - { - let data = "adassafvc".to_string(); - deserialize_binary(data.as_bytes(), None).unwrap_err(); + // SAFETY: Safe as all args are correct. + unsafe { + let r = seccomp_arch_add(bpf_filter, arch.to_scmp_type()); + if r != 0 && r != MINUS_EEXIST { + return Err(CompilationError::LibSeccompArch); + } } - // Test that the binary deserialization is correct, and that the thread keys - // have been lowercased. - { - let bpf_prog = vec![ - sock_filter { - code: 32, - jt: 0, - jf: 0, - k: 0, - }, - sock_filter { - code: 32, - jt: 0, - jf: 0, - k: 4, - }, - ]; - let mut filter_map: HashMap = HashMap::new(); - filter_map.insert("VcpU".to_string(), bpf_prog.clone()); - let bytes = bincode::serialize(&filter_map).unwrap(); - - let mut expected_res = BpfThreadMap::new(); - expected_res.insert("vcpu".to_string(), Arc::new(bpf_prog)); - assert_eq!(deserialize_binary(&bytes[..], None).unwrap(), expected_res); + for rule in filter.filter.iter() { + // SAFETY: Safe as all args are correct. + let syscall = unsafe { + let r = seccomp_syscall_resolve_name(rule.syscall.as_ptr()); + if r == __NR_SCMP_ERROR { + return Err(CompilationError::LibSeccompSycall); + } + r + }; + + // TODO remove when we drop deprecated "basic" arg from cli. + // "basic" bpf means it ignores condition checks. + if basic { + // SAFETY: Safe as all args are correct. + unsafe { + if seccomp_rule_add(bpf_filter, filter_action, syscall, 0) != 0 { + return Err(CompilationError::LibSeccompRule); + } + } + } else if let Some(rules) = &rule.args { + let comparators = rules + .iter() + .map(|rule| rule.to_scmp_type()) + .collect::>(); + + // SAFETY: Safe as all args are correct. + // We can assume no one will define u32::MAX + // filters for a syscall. + #[allow(clippy::cast_possible_truncation)] + unsafe { + if seccomp_rule_add_array( + bpf_filter, + filter_action, + syscall, + comparators.len() as u32, + comparators.as_ptr(), + ) != 0 + { + return Err(CompilationError::LibSeccompRule); + } + } + } else { + // SAFETY: Safe as all args are correct. + unsafe { + if seccomp_rule_add(bpf_filter, filter_action, syscall, 0) != 0 { + return Err(CompilationError::LibSeccompRule); + } + } + } } - // Test deserialization with binary_limit. - { - let bpf_prog = vec![sock_filter { - code: 32, - jt: 0, - jf: 0, - k: 0, - }]; - - let mut filter_map: HashMap = HashMap::new(); - filter_map.insert("t1".to_string(), bpf_prog.clone()); - - let bytes = bincode::serialize(&filter_map).unwrap(); - - // Binary limit too low. - assert!(matches!( - deserialize_binary(&bytes[..], Some(20)).unwrap_err(), - DeserializationError::Bincode(error) - if error.to_string() == "the size limit has been reached" - )); - - let mut expected_res = BpfThreadMap::new(); - expected_res.insert("t1".to_string(), Arc::new(bpf_prog)); - - // Correct binary limit. - assert_eq!( - deserialize_binary(&bytes[..], Some(50)).unwrap(), - expected_res - ); + // SAFETY: Safe as all args are correect. + unsafe { + if seccomp_export_bpf(bpf_filter, memfd.as_raw_fd()) != 0 { + return Err(CompilationError::LibSeccompExport); + } } + memfd.rewind().map_err(CompilationError::MemfdRewind)?; + + // Cast is safe because usize == u64 + #[allow(clippy::cast_possible_truncation)] + let size = memfd.metadata().unwrap().size() as usize; + // Bpf instructions are 8 byte values and 4 byte alignment. + // We use u64 to satisfy these requirements. + let instructions = size / std::mem::size_of::(); + let mut bpf = vec![0_u64; instructions]; + + memfd + .read_exact(bpf.as_mut_bytes()) + .map_err(CompilationError::MemfdRead)?; + memfd.rewind().map_err(CompilationError::MemfdRewind)?; + + bpf_map.insert(name.clone(), bpf); } - #[test] - fn test_filter_apply() { - // Test filter too large. - thread::spawn(|| { - let filter: BpfProgram = vec![ - sock_filter { - code: 6, - jt: 0, - jf: 0, - k: 0, - }; - 5000 // Limit is 4096 - ]; - - // Apply seccomp filter. - assert_eq!( - apply_filter(&filter).unwrap_err(), - InstallationError::FilterTooLarge - ); - }) - .join() - .unwrap(); + let output_file = File::create(out_path).map_err(CompilationError::OutputCreate)?; - // Test empty filter. - thread::spawn(|| { - let filter: BpfProgram = vec![]; - - assert_eq!(filter.len(), 0); - - let seccomp_level = unsafe { libc::prctl(libc::PR_GET_SECCOMP) }; - assert_eq!(seccomp_level, 0); - - apply_filter(&filter).unwrap(); - - // test that seccomp level remains 0 on failure. - let seccomp_level = unsafe { libc::prctl(libc::PR_GET_SECCOMP) }; - assert_eq!(seccomp_level, 0); - }) - .join() - .unwrap(); - - // Test invalid BPF code. - thread::spawn(|| { - let filter = vec![sock_filter { - // invalid opcode - code: 9999, - jt: 0, - jf: 0, - k: 0, - }]; - - let seccomp_level = unsafe { libc::prctl(libc::PR_GET_SECCOMP) }; - assert_eq!(seccomp_level, 0); - - assert_eq!( - apply_filter(&filter).unwrap_err(), - InstallationError::Prctl(22) - ); - - // test that seccomp level remains 0 on failure. - let seccomp_level = unsafe { libc::prctl(libc::PR_GET_SECCOMP) }; - assert_eq!(seccomp_level, 0); - }) - .join() - .unwrap(); - } + bincode::serialize_into(output_file, &bpf_map).map_err(CompilationError::BincodeSerialize)?; + Ok(()) } diff --git a/src/seccompiler/src/seccompiler_bin.rs b/src/seccompiler/src/seccompiler_bin.rs deleted file mode 100644 index 890a2a3ecdb..00000000000 --- a/src/seccompiler/src/seccompiler_bin.rs +++ /dev/null @@ -1,578 +0,0 @@ -// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -//! seccompiler-bin is a program that compiles multi-threaded seccomp-bpf filters expressed as JSON -//! into raw BPF programs, serializing them and outputting them to a file. -//! -//! Used in conjunction with the provided library crate, one can deserialize the binary filters -//! and easily install them on a per-thread basis, in order to achieve a quick and robust -//! seccomp-based jailing solution. -//! -//! See the documentation on github for more information. -//! -//! ```text -//! The compilation goes through a couple of steps, from JSON to BPF: -//! -//! JSON -//! | -//! (via serde_json) -//! | -//! V -//! collection of `Filter` objects -//! | -//! (via Compiler.compile_blob(...)) -//! | -//! V -//! collection of `SeccompFilter` objects -//! (IR - intermediate representation) -//! | -//! (via SeccompFilter.try_into::(...)) -//! | -//! V -//! collection of `BpfProgram` objects -//! ``` - -use std::collections::BTreeMap; -use std::convert::TryInto; -use std::fs::File; -use std::io::BufReader; -use std::path::PathBuf; - -mod backend; -mod common; -mod compiler; -mod syscall_table; - -use backend::{TargetArch, TargetArchError}; -use bincode::Error as BincodeError; -use common::BpfProgram; -use compiler::{CompilationError, Compiler, JsonFile}; -use serde_json::error::Error as JSONError; -use utils::arg_parser::{ - ArgParser, Argument, Arguments as ArgumentsBag, UtilsArgParserError as ArgParserError, -}; - -const SECCOMPILER_VERSION: &str = env!("CARGO_PKG_VERSION"); -const DEFAULT_OUTPUT_FILENAME: &str = "seccomp_binary_filter.out"; - -#[derive(Debug, thiserror::Error)] -enum SeccompError { - #[error("Bincode (de)serialization failed: {0}")] - Bincode(BincodeError), - #[error("{0}")] - Compilation(CompilationError), - #[error("{}", format!("Failed to open file {:?}: {1}", .0, .1).replace('\"', ""))] - FileOpen(PathBuf, std::io::Error), - #[error("Error parsing JSON: {0}")] - Json(JSONError), - #[error("Missing input file.")] - MissingInputFile, - #[error("Missing target arch.")] - MissingTargetArch, - #[error("{0}")] - Arch(#[from] TargetArchError), -} - -#[derive(Debug, PartialEq)] -struct Arguments { - input_file: String, - output_file: String, - target_arch: TargetArch, - is_basic: bool, -} - -fn build_arg_parser() -> ArgParser<'static> { - ArgParser::new() - .arg( - Argument::new("input-file") - .required(true) - .takes_value(true) - .help("File path of the JSON input."), - ) - .arg( - Argument::new("output-file") - .required(false) - .takes_value(true) - .default_value(DEFAULT_OUTPUT_FILENAME) - .help("Optional path of the output file."), - ) - .arg( - Argument::new("target-arch") - .required(true) - .takes_value(true) - .help( - "The computer architecture where the BPF program runs. Supported \ - architectures: x86_64, aarch64.", - ), - ) - .arg(Argument::new("basic").takes_value(false).help( - "Deprecated! Transforms the filters into basic filters. Drops all argument checks and \ - rule-level actions. Not recommended.", - )) -} - -fn get_argument_values(arguments: &ArgumentsBag) -> Result { - let Some(arch_string) = arguments.single_value("target-arch") else { - return Err(SeccompError::MissingTargetArch); - }; - let target_arch: TargetArch = arch_string.as_str().try_into()?; - - let Some(input_file) = arguments.single_value("input-file") else { - return Err(SeccompError::MissingInputFile); - }; - - let is_basic = arguments.flag_present("basic"); - if is_basic { - println!( - "Warning! You are using a deprecated parameter: --basic, that will be removed in a \ - future version.\n" - ); - } - - Ok(Arguments { - target_arch, - input_file: input_file.to_owned(), - // Safe to unwrap because it has a default value - output_file: arguments.single_value("output-file").unwrap().to_owned(), - is_basic, - }) -} - -fn compile(args: &Arguments) -> Result<(), SeccompError> { - let input_file = File::open(&args.input_file) - .map_err(|err| SeccompError::FileOpen(PathBuf::from(&args.input_file), err))?; - let mut input_reader = BufReader::new(input_file); - let filters = - serde_json::from_reader::<_, JsonFile>(&mut input_reader).map_err(SeccompError::Json)?; - let compiler = Compiler::new(args.target_arch); - - // transform the IR into a Map of BPFPrograms - let bpf_data: BTreeMap = compiler - .compile_blob(filters.0, args.is_basic) - .map_err(SeccompError::Compilation)?; - - // serialize the BPF programs & output them to a file - let output_file = File::create(&args.output_file) - .map_err(|err| SeccompError::FileOpen(PathBuf::from(&args.output_file), err))?; - bincode::serialize_into(output_file, &bpf_data).map_err(SeccompError::Bincode)?; - - Ok(()) -} - -#[derive(Debug, thiserror::Error, displaydoc::Display)] -enum SeccompilerError { - /// Argument Parsing Error: {0} - ArgParsing(ArgParserError), - /// {0} \n\nFor more information try --help. - InvalidArgumentValue(SeccompError), - /// {0} - Error(SeccompError), -} - -fn main() -> core::result::Result<(), SeccompilerError> { - let result = main_exec(); - if let Err(e) = result { - eprintln!("{}", e); - Err(e) - } else { - Ok(()) - } -} - -fn main_exec() -> core::result::Result<(), SeccompilerError> { - let mut arg_parser = build_arg_parser(); - - arg_parser - .parse_from_cmdline() - .map_err(SeccompilerError::ArgParsing)?; - - if arg_parser.arguments().flag_present("help") { - println!("Seccompiler-bin v{}\n", SECCOMPILER_VERSION); - println!("{}", arg_parser.formatted_help()); - return Ok(()); - } - if arg_parser.arguments().flag_present("version") { - println!("Seccompiler-bin v{}\n", SECCOMPILER_VERSION); - return Ok(()); - } - - let args = get_argument_values(arg_parser.arguments()) - .map_err(SeccompilerError::InvalidArgumentValue)?; - - compile(&args).map_err(SeccompilerError::Error)?; - - println!("Filter successfully compiled into: {}", args.output_file); - Ok(()) -} - -#[cfg(test)] -mod tests { - #![allow(clippy::undocumented_unsafe_blocks)] - - use std::io; - use std::io::Write; - use std::path::PathBuf; - - use bincode::Error as BincodeError; - use vmm_sys_util::tempfile::TempFile; - - use super::compiler::CompilationError as FilterFormatError; - use super::{ - build_arg_parser, compile, get_argument_values, Arguments, SeccompError, - DEFAULT_OUTPUT_FILENAME, - }; - use crate::backend::{TargetArch, TargetArchError}; - - // Correct JSON input data - static CORRECT_JSON_INPUT: &str = r#" - { - "thread_1": { - "default_action": { - "errno": 12 - }, - "filter_action": "allow", - "filter": [ - { - "syscall": "open" - }, - { - "syscall": "close" - }, - { - "syscall": "stat" - }, - { - "syscall": "futex", - "args": [ - { - "index": 2, - "type": "dword", - "op": "le", - "val": 65 - }, - { - "index": 1, - "type": "qword", - "op": "ne", - "val": 80 - } - ] - }, - { - "syscall": "futex", - "args": [ - { - "index": 3, - "type": "qword", - "op": "gt", - "val": 65 - }, - { - "index": 1, - "type": "qword", - "op": "lt", - "val": 80 - } - ] - }, - { - "syscall": "futex", - "args": [ - { - "index": 3, - "type": "qword", - "op": "ge", - "val": 65 - } - ] - }, - { - "syscall": "ioctl", - "args": [ - { - "index": 3, - "type": "dword", - "op": { - "masked_eq": 100 - }, - "val": 65 - } - ] - } - ] - }, - "thread_2": { - "default_action": "trap", - "filter_action": "allow", - "filter": [ - { - "syscall": "ioctl", - "args": [ - { - "index": 3, - "type": "dword", - "op": "eq", - "val": 65 - } - ] - } - ] - } - } - "#; - - #[test] - fn test_error_messages() { - let path = PathBuf::from("/path"); - assert_eq!( - format!( - "{}", - SeccompError::Bincode(BincodeError::new(bincode::ErrorKind::SizeLimit)) - ), - format!( - "Bincode (de)serialization failed: {}", - BincodeError::new(bincode::ErrorKind::SizeLimit) - ) - ); - assert_eq!( - format!( - "{}", - SeccompError::Compilation(FilterFormatError::SyscallName( - "dsaa".to_string(), - TargetArch::aarch64 - )) - ), - format!( - "{}", - FilterFormatError::SyscallName("dsaa".to_string(), TargetArch::aarch64) - ) - ); - assert_eq!( - format!( - "{}", - SeccompError::FileOpen(path.clone(), io::Error::from_raw_os_error(2)) - ), - format!( - "Failed to open file {:?}: {}", - path, - io::Error::from_raw_os_error(2) - ) - .replace('\"', "") - ); - assert_eq!( - format!( - "{}", - SeccompError::Json(serde_json::from_str::("").unwrap_err()) - ), - format!( - "Error parsing JSON: {}", - serde_json::from_str::("").unwrap_err() - ) - ); - assert_eq!( - format!("{}", SeccompError::MissingInputFile), - "Missing input file." - ); - assert_eq!( - format!("{}", SeccompError::MissingTargetArch), - "Missing target arch." - ); - assert_eq!( - format!( - "{}", - SeccompError::Arch(TargetArchError::InvalidString("lala".to_string())) - ), - format!("{}", TargetArchError::InvalidString("lala".to_string())) - ); - } - - #[test] - fn test_get_argument_values() { - let arg_parser = build_arg_parser(); - // correct arguments - let arguments = &mut arg_parser.arguments().clone(); - arguments - .parse( - vec![ - "seccompiler-bin", - "--input-file", - "foo.txt", - "--target-arch", - "x86_64", - ] - .into_iter() - .map(String::from) - .collect::>() - .as_ref(), - ) - .unwrap(); - assert_eq!( - get_argument_values(arguments).unwrap(), - Arguments { - input_file: "foo.txt".to_string(), - output_file: DEFAULT_OUTPUT_FILENAME.to_string(), - target_arch: TargetArch::x86_64, - is_basic: false, - } - ); - - let arguments = &mut arg_parser.arguments().clone(); - arguments - .parse( - vec![ - "seccompiler-bin", - "--input-file", - "foo.txt", - "--target-arch", - "x86_64", - "--output-file", - "/path.to/file.txt", - "--basic", - ] - .into_iter() - .map(String::from) - .collect::>() - .as_ref(), - ) - .unwrap(); - assert_eq!( - get_argument_values(arguments).unwrap(), - Arguments { - input_file: "foo.txt".to_string(), - output_file: "/path.to/file.txt".to_string(), - target_arch: TargetArch::x86_64, - is_basic: true - } - ); - - // no args - let arguments = &mut arg_parser.arguments().clone(); - assert!(arguments - .parse( - vec!["seccompiler-bin"] - .into_iter() - .map(String::from) - .collect::>() - .as_ref(), - ) - .is_err()); - - // missing --target-arch - let arguments = &mut arg_parser.arguments().clone(); - assert!(arguments - .parse( - vec!["seccompiler-bin", "--input-file", "foo.txt"] - .into_iter() - .map(String::from) - .collect::>() - .as_ref(), - ) - .is_err()); - - // missing --input-file - let arguments = &mut arg_parser.arguments().clone(); - assert!(arguments - .parse( - vec!["seccompiler-bin", "--target-arch", "x86_64"] - .into_iter() - .map(String::from) - .collect::>() - .as_ref(), - ) - .is_err()); - - // invalid --target-arch - let arguments = &mut arg_parser.arguments().clone(); - arguments - .parse( - vec![ - "seccompiler-bin", - "--input-file", - "foo.txt", - "--target-arch", - "x86_64das", - "--output-file", - "/path.to/file.txt", - ] - .into_iter() - .map(String::from) - .collect::>() - .as_ref(), - ) - .unwrap(); - get_argument_values(arguments).unwrap_err(); - - // invalid value supplied to --basic - let arguments = &mut arg_parser.arguments().clone(); - assert!(arguments - .parse( - vec![ - "seccompiler-bin", - "--input-file", - "foo.txt", - "--target-arch", - "x86_64", - "--basic", - "invalid", - ] - .into_iter() - .map(String::from) - .collect::>() - .as_ref(), - ) - .is_err()); - } - - #[allow(clippy::useless_asref)] - #[test] - fn test_compile() { - // --input-file was deleted - { - let mut in_file = TempFile::new().unwrap(); - in_file.remove().unwrap(); - let args = Arguments { - input_file: in_file.as_path().to_str().unwrap().to_string(), - target_arch: TargetArch::x86_64, - output_file: "bpf.out".to_string(), - is_basic: false, - }; - - match compile(&args).unwrap_err() { - SeccompError::FileOpen(buf, _) => assert_eq!(buf, PathBuf::from(in_file.as_path())), - _ => panic!("Expected FileOpen error."), - } - } - - // test a successful compilation - { - let in_file = TempFile::new().unwrap(); - let out_file = TempFile::new().unwrap(); - - in_file - .as_file() - .write_all(CORRECT_JSON_INPUT.as_bytes()) - .unwrap(); - - let arguments = Arguments { - input_file: in_file.as_path().to_str().unwrap().to_string(), - output_file: out_file.as_path().to_str().unwrap().to_string(), - target_arch: TargetArch::x86_64, - is_basic: false, - }; - - // do the compilation & check for errors - compile(&arguments).unwrap(); - - // also check with is_basic: true - let arguments = Arguments { - input_file: in_file.as_path().to_str().unwrap().to_string(), - output_file: out_file.as_path().to_str().unwrap().to_string(), - target_arch: TargetArch::x86_64, - is_basic: true, - }; - - // do the compilation & check for errors - compile(&arguments).unwrap(); - } - } -} diff --git a/src/seccompiler/src/syscall_table/aarch64.rs b/src/seccompiler/src/syscall_table/aarch64.rs deleted file mode 100644 index 386d09b78d3..00000000000 --- a/src/seccompiler/src/syscall_table/aarch64.rs +++ /dev/null @@ -1,308 +0,0 @@ -// Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -// This file is auto-generated by `tools/devtool generate_syscall_tables`. -// Do NOT manually edit! -// Generated at: Mon 15 Nov 11:41:50 UTC 2021 -// Kernel version: 5.10 - -use std::collections::HashMap; - -pub fn make_syscall_table(map: &mut HashMap) { - map.insert("accept4".to_string(), 242); - map.insert("accept".to_string(), 202); - map.insert("acct".to_string(), 89); - map.insert("add_key".to_string(), 217); - map.insert("adjtimex".to_string(), 171); - map.insert("bind".to_string(), 200); - map.insert("bpf".to_string(), 280); - map.insert("brk".to_string(), 214); - map.insert("capget".to_string(), 90); - map.insert("capset".to_string(), 91); - map.insert("chdir".to_string(), 49); - map.insert("chroot".to_string(), 51); - map.insert("clock_adjtime".to_string(), 266); - map.insert("clock_getres".to_string(), 114); - map.insert("clock_gettime".to_string(), 113); - map.insert("clock_nanosleep".to_string(), 115); - map.insert("clock_settime".to_string(), 112); - map.insert("clone3".to_string(), 435); - map.insert("clone".to_string(), 220); - map.insert("close_range".to_string(), 436); - map.insert("close".to_string(), 57); - map.insert("connect".to_string(), 203); - map.insert("copy_file_range".to_string(), 285); - map.insert("delete_module".to_string(), 106); - map.insert("dup3".to_string(), 24); - map.insert("dup".to_string(), 23); - map.insert("epoll_create1".to_string(), 20); - map.insert("epoll_ctl".to_string(), 21); - map.insert("epoll_pwait".to_string(), 22); - map.insert("eventfd2".to_string(), 19); - map.insert("execveat".to_string(), 281); - map.insert("execve".to_string(), 221); - map.insert("exit_group".to_string(), 94); - map.insert("exit".to_string(), 93); - map.insert("faccessat2".to_string(), 439); - map.insert("faccessat".to_string(), 48); - map.insert("fadvise64".to_string(), 223); - map.insert("fallocate".to_string(), 47); - map.insert("fanotify_init".to_string(), 262); - map.insert("fanotify_mark".to_string(), 263); - map.insert("fchdir".to_string(), 50); - map.insert("fchmodat".to_string(), 53); - map.insert("fchmod".to_string(), 52); - map.insert("fchownat".to_string(), 54); - map.insert("fchown".to_string(), 55); - map.insert("fcntl".to_string(), 25); - map.insert("fdatasync".to_string(), 83); - map.insert("fgetxattr".to_string(), 10); - map.insert("finit_module".to_string(), 273); - map.insert("flistxattr".to_string(), 13); - map.insert("flock".to_string(), 32); - map.insert("fremovexattr".to_string(), 16); - map.insert("fsconfig".to_string(), 431); - map.insert("fsetxattr".to_string(), 7); - map.insert("fsmount".to_string(), 432); - map.insert("fsopen".to_string(), 430); - map.insert("fspick".to_string(), 433); - map.insert("fstatfs".to_string(), 44); - map.insert("fstat".to_string(), 80); - map.insert("fsync".to_string(), 82); - map.insert("ftruncate".to_string(), 46); - map.insert("futex".to_string(), 98); - map.insert("getcpu".to_string(), 168); - map.insert("getcwd".to_string(), 17); - map.insert("getdents64".to_string(), 61); - map.insert("getegid".to_string(), 177); - map.insert("geteuid".to_string(), 175); - map.insert("getgid".to_string(), 176); - map.insert("getgroups".to_string(), 158); - map.insert("getitimer".to_string(), 102); - map.insert("get_mempolicy".to_string(), 236); - map.insert("getpeername".to_string(), 205); - map.insert("getpgid".to_string(), 155); - map.insert("getpid".to_string(), 172); - map.insert("getppid".to_string(), 173); - map.insert("getpriority".to_string(), 141); - map.insert("getrandom".to_string(), 278); - map.insert("getresgid".to_string(), 150); - map.insert("getresuid".to_string(), 148); - map.insert("getrlimit".to_string(), 163); - map.insert("get_robust_list".to_string(), 100); - map.insert("getrusage".to_string(), 165); - map.insert("getsid".to_string(), 156); - map.insert("getsockname".to_string(), 204); - map.insert("getsockopt".to_string(), 209); - map.insert("gettid".to_string(), 178); - map.insert("gettimeofday".to_string(), 169); - map.insert("getuid".to_string(), 174); - map.insert("getxattr".to_string(), 8); - map.insert("init_module".to_string(), 105); - map.insert("inotify_add_watch".to_string(), 27); - map.insert("inotify_init1".to_string(), 26); - map.insert("inotify_rm_watch".to_string(), 28); - map.insert("io_cancel".to_string(), 3); - map.insert("ioctl".to_string(), 29); - map.insert("io_destroy".to_string(), 1); - map.insert("io_getevents".to_string(), 4); - map.insert("io_pgetevents".to_string(), 292); - map.insert("ioprio_get".to_string(), 31); - map.insert("ioprio_set".to_string(), 30); - map.insert("io_setup".to_string(), 0); - map.insert("io_submit".to_string(), 2); - map.insert("io_uring_enter".to_string(), 426); - map.insert("io_uring_register".to_string(), 427); - map.insert("io_uring_setup".to_string(), 425); - map.insert("kcmp".to_string(), 272); - map.insert("kexec_file_load".to_string(), 294); - map.insert("kexec_load".to_string(), 104); - map.insert("keyctl".to_string(), 219); - map.insert("kill".to_string(), 129); - map.insert("lgetxattr".to_string(), 9); - map.insert("linkat".to_string(), 37); - map.insert("listen".to_string(), 201); - map.insert("listxattr".to_string(), 11); - map.insert("llistxattr".to_string(), 12); - map.insert("lookup_dcookie".to_string(), 18); - map.insert("lremovexattr".to_string(), 15); - map.insert("lseek".to_string(), 62); - map.insert("lsetxattr".to_string(), 6); - map.insert("madvise".to_string(), 233); - map.insert("mbind".to_string(), 235); - map.insert("membarrier".to_string(), 283); - map.insert("memfd_create".to_string(), 279); - map.insert("migrate_pages".to_string(), 238); - map.insert("mincore".to_string(), 232); - map.insert("mkdirat".to_string(), 34); - map.insert("mknodat".to_string(), 33); - map.insert("mlock2".to_string(), 284); - map.insert("mlockall".to_string(), 230); - map.insert("mlock".to_string(), 228); - map.insert("mmap".to_string(), 222); - map.insert("mount".to_string(), 40); - map.insert("move_mount".to_string(), 429); - map.insert("move_pages".to_string(), 239); - map.insert("mprotect".to_string(), 226); - map.insert("mq_getsetattr".to_string(), 185); - map.insert("mq_notify".to_string(), 184); - map.insert("mq_open".to_string(), 180); - map.insert("mq_timedreceive".to_string(), 183); - map.insert("mq_timedsend".to_string(), 182); - map.insert("mq_unlink".to_string(), 181); - map.insert("mremap".to_string(), 216); - map.insert("msgctl".to_string(), 187); - map.insert("msgget".to_string(), 186); - map.insert("msgrcv".to_string(), 188); - map.insert("msgsnd".to_string(), 189); - map.insert("msync".to_string(), 227); - map.insert("munlockall".to_string(), 231); - map.insert("munlock".to_string(), 229); - map.insert("munmap".to_string(), 215); - map.insert("name_to_handle_at".to_string(), 264); - map.insert("nanosleep".to_string(), 101); - map.insert("newfstatat".to_string(), 79); - map.insert("nfsservctl".to_string(), 42); - map.insert("openat2".to_string(), 437); - map.insert("openat".to_string(), 56); - map.insert("open_by_handle_at".to_string(), 265); - map.insert("open_tree".to_string(), 428); - map.insert("perf_event_open".to_string(), 241); - map.insert("personality".to_string(), 92); - map.insert("pidfd_getfd".to_string(), 438); - map.insert("pidfd_open".to_string(), 434); - map.insert("pidfd_send_signal".to_string(), 424); - map.insert("pipe2".to_string(), 59); - map.insert("pivot_root".to_string(), 41); - map.insert("pkey_alloc".to_string(), 289); - map.insert("pkey_free".to_string(), 290); - map.insert("pkey_mprotect".to_string(), 288); - map.insert("ppoll".to_string(), 73); - map.insert("prctl".to_string(), 167); - map.insert("pread64".to_string(), 67); - map.insert("preadv2".to_string(), 286); - map.insert("preadv".to_string(), 69); - map.insert("prlimit64".to_string(), 261); - map.insert("process_madvise".to_string(), 440); - map.insert("process_vm_readv".to_string(), 270); - map.insert("process_vm_writev".to_string(), 271); - map.insert("pselect6".to_string(), 72); - map.insert("ptrace".to_string(), 117); - map.insert("pwrite64".to_string(), 68); - map.insert("pwritev2".to_string(), 287); - map.insert("pwritev".to_string(), 70); - map.insert("quotactl".to_string(), 60); - map.insert("readahead".to_string(), 213); - map.insert("readlinkat".to_string(), 78); - map.insert("read".to_string(), 63); - map.insert("readv".to_string(), 65); - map.insert("reboot".to_string(), 142); - map.insert("recvfrom".to_string(), 207); - map.insert("recvmmsg".to_string(), 243); - map.insert("recvmsg".to_string(), 212); - map.insert("remap_file_pages".to_string(), 234); - map.insert("removexattr".to_string(), 14); - map.insert("renameat2".to_string(), 276); - map.insert("renameat".to_string(), 38); - map.insert("request_key".to_string(), 218); - map.insert("restart_syscall".to_string(), 128); - map.insert("rseq".to_string(), 293); - map.insert("rt_sigaction".to_string(), 134); - map.insert("rt_sigpending".to_string(), 136); - map.insert("rt_sigprocmask".to_string(), 135); - map.insert("rt_sigqueueinfo".to_string(), 138); - map.insert("rt_sigreturn".to_string(), 139); - map.insert("rt_sigsuspend".to_string(), 133); - map.insert("rt_sigtimedwait".to_string(), 137); - map.insert("rt_tgsigqueueinfo".to_string(), 240); - map.insert("sched_getaffinity".to_string(), 123); - map.insert("sched_getattr".to_string(), 275); - map.insert("sched_getparam".to_string(), 121); - map.insert("sched_get_priority_max".to_string(), 125); - map.insert("sched_get_priority_min".to_string(), 126); - map.insert("sched_getscheduler".to_string(), 120); - map.insert("sched_rr_get_interval".to_string(), 127); - map.insert("sched_setaffinity".to_string(), 122); - map.insert("sched_setattr".to_string(), 274); - map.insert("sched_setparam".to_string(), 118); - map.insert("sched_setscheduler".to_string(), 119); - map.insert("sched_yield".to_string(), 124); - map.insert("seccomp".to_string(), 277); - map.insert("semctl".to_string(), 191); - map.insert("semget".to_string(), 190); - map.insert("semop".to_string(), 193); - map.insert("semtimedop".to_string(), 192); - map.insert("sendfile".to_string(), 71); - map.insert("sendmmsg".to_string(), 269); - map.insert("sendmsg".to_string(), 211); - map.insert("sendto".to_string(), 206); - map.insert("setdomainname".to_string(), 162); - map.insert("setfsgid".to_string(), 152); - map.insert("setfsuid".to_string(), 151); - map.insert("setgid".to_string(), 144); - map.insert("setgroups".to_string(), 159); - map.insert("sethostname".to_string(), 161); - map.insert("setitimer".to_string(), 103); - map.insert("set_mempolicy".to_string(), 237); - map.insert("setns".to_string(), 268); - map.insert("setpgid".to_string(), 154); - map.insert("setpriority".to_string(), 140); - map.insert("setregid".to_string(), 143); - map.insert("setresgid".to_string(), 149); - map.insert("setresuid".to_string(), 147); - map.insert("setreuid".to_string(), 145); - map.insert("setrlimit".to_string(), 164); - map.insert("set_robust_list".to_string(), 99); - map.insert("setsid".to_string(), 157); - map.insert("setsockopt".to_string(), 208); - map.insert("set_tid_address".to_string(), 96); - map.insert("settimeofday".to_string(), 170); - map.insert("setuid".to_string(), 146); - map.insert("setxattr".to_string(), 5); - map.insert("shmat".to_string(), 196); - map.insert("shmctl".to_string(), 195); - map.insert("shmdt".to_string(), 197); - map.insert("shmget".to_string(), 194); - map.insert("shutdown".to_string(), 210); - map.insert("sigaltstack".to_string(), 132); - map.insert("signalfd4".to_string(), 74); - map.insert("socketpair".to_string(), 199); - map.insert("socket".to_string(), 198); - map.insert("splice".to_string(), 76); - map.insert("statfs".to_string(), 43); - map.insert("statx".to_string(), 291); - map.insert("swapoff".to_string(), 225); - map.insert("swapon".to_string(), 224); - map.insert("symlinkat".to_string(), 36); - map.insert("sync_file_range".to_string(), 84); - map.insert("syncfs".to_string(), 267); - map.insert("sync".to_string(), 81); - map.insert("sysinfo".to_string(), 179); - map.insert("syslog".to_string(), 116); - map.insert("tee".to_string(), 77); - map.insert("tgkill".to_string(), 131); - map.insert("timer_create".to_string(), 107); - map.insert("timer_delete".to_string(), 111); - map.insert("timerfd_create".to_string(), 85); - map.insert("timerfd_gettime".to_string(), 87); - map.insert("timerfd_settime".to_string(), 86); - map.insert("timer_getoverrun".to_string(), 109); - map.insert("timer_gettime".to_string(), 108); - map.insert("timer_settime".to_string(), 110); - map.insert("times".to_string(), 153); - map.insert("tkill".to_string(), 130); - map.insert("truncate".to_string(), 45); - map.insert("umask".to_string(), 166); - map.insert("umount2".to_string(), 39); - map.insert("uname".to_string(), 160); - map.insert("unlinkat".to_string(), 35); - map.insert("unshare".to_string(), 97); - map.insert("userfaultfd".to_string(), 282); - map.insert("utimensat".to_string(), 88); - map.insert("vhangup".to_string(), 58); - map.insert("vmsplice".to_string(), 75); - map.insert("wait4".to_string(), 260); - map.insert("waitid".to_string(), 95); - map.insert("write".to_string(), 64); - map.insert("writev".to_string(), 66); -} diff --git a/src/seccompiler/src/syscall_table/mod.rs b/src/seccompiler/src/syscall_table/mod.rs deleted file mode 100644 index 3dca50c748d..00000000000 --- a/src/seccompiler/src/syscall_table/mod.rs +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -mod aarch64; -mod x86_64; - -use std::collections::HashMap; - -use crate::backend::TargetArch; - -/// Creates and owns a mapping from the arch-specific syscall name to the right number. -#[derive(Debug)] -pub struct SyscallTable { - map: HashMap, - arch: TargetArch, -} - -/// Number of syscalls for x86_64 (rough upper bound). -const MAP_CAPACITY: usize = 351; - -impl SyscallTable { - /// Create new syscall table - pub fn new(arch: TargetArch) -> Self { - let mut instance = Self { - arch, - map: HashMap::with_capacity(MAP_CAPACITY), - }; - - instance.populate_map(); - - instance - } - - /// Returns the arch-specific syscall number based on the given name. - pub fn get_syscall_nr(&self, sys_name: &str) -> Option { - self.map.get(sys_name).copied() - } - - /// Populates the arch-specific syscall map. - fn populate_map(&mut self) { - match self.arch { - TargetArch::aarch64 => aarch64::make_syscall_table(&mut self.map), - TargetArch::x86_64 => x86_64::make_syscall_table(&mut self.map), - } - } -} - -#[cfg(test)] -mod tests { - use super::SyscallTable; - use crate::backend::TargetArch; - - #[test] - fn test_get_syscall_nr() { - // get number for a valid syscall - let instance_x86_64 = SyscallTable::new(TargetArch::x86_64); - let instance_aarch64 = SyscallTable::new(TargetArch::aarch64); - - assert_eq!(instance_x86_64.get_syscall_nr("close").unwrap(), 3); - assert_eq!(instance_aarch64.get_syscall_nr("close").unwrap(), 57); - - // invalid syscall name - assert!(instance_x86_64.get_syscall_nr("nosyscall").is_none()); - assert!(instance_aarch64.get_syscall_nr("nosyscall").is_none()); - } -} diff --git a/src/seccompiler/src/syscall_table/x86_64.rs b/src/seccompiler/src/syscall_table/x86_64.rs deleted file mode 100644 index 9350bd5ce57..00000000000 --- a/src/seccompiler/src/syscall_table/x86_64.rs +++ /dev/null @@ -1,364 +0,0 @@ -// Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -// This file is auto-generated by `tools/devtool generate_syscall_tables`. -// Do NOT manually edit! -// Generated at: Mon 15 Nov 11:41:50 UTC 2021 -// Kernel version: 5.10 - -use std::collections::HashMap; - -pub fn make_syscall_table(map: &mut HashMap) { - map.insert("accept4".to_string(), 288); - map.insert("accept".to_string(), 43); - map.insert("access".to_string(), 21); - map.insert("acct".to_string(), 163); - map.insert("add_key".to_string(), 248); - map.insert("adjtimex".to_string(), 159); - map.insert("afs_syscall".to_string(), 183); - map.insert("alarm".to_string(), 37); - map.insert("arch_prctl".to_string(), 158); - map.insert("bind".to_string(), 49); - map.insert("bpf".to_string(), 321); - map.insert("brk".to_string(), 12); - map.insert("capget".to_string(), 125); - map.insert("capset".to_string(), 126); - map.insert("chdir".to_string(), 80); - map.insert("chmod".to_string(), 90); - map.insert("chown".to_string(), 92); - map.insert("chroot".to_string(), 161); - map.insert("clock_adjtime".to_string(), 305); - map.insert("clock_getres".to_string(), 229); - map.insert("clock_gettime".to_string(), 228); - map.insert("clock_nanosleep".to_string(), 230); - map.insert("clock_settime".to_string(), 227); - map.insert("clone3".to_string(), 435); - map.insert("clone".to_string(), 56); - map.insert("close_range".to_string(), 436); - map.insert("close".to_string(), 3); - map.insert("connect".to_string(), 42); - map.insert("copy_file_range".to_string(), 326); - map.insert("create_module".to_string(), 174); - map.insert("creat".to_string(), 85); - map.insert("delete_module".to_string(), 176); - map.insert("dup2".to_string(), 33); - map.insert("dup3".to_string(), 292); - map.insert("dup".to_string(), 32); - map.insert("epoll_create1".to_string(), 291); - map.insert("epoll_create".to_string(), 213); - map.insert("epoll_ctl_old".to_string(), 214); - map.insert("epoll_ctl".to_string(), 233); - map.insert("epoll_pwait".to_string(), 281); - map.insert("epoll_wait_old".to_string(), 215); - map.insert("epoll_wait".to_string(), 232); - map.insert("eventfd2".to_string(), 290); - map.insert("eventfd".to_string(), 284); - map.insert("execveat".to_string(), 322); - map.insert("execve".to_string(), 59); - map.insert("exit_group".to_string(), 231); - map.insert("exit".to_string(), 60); - map.insert("faccessat2".to_string(), 439); - map.insert("faccessat".to_string(), 269); - map.insert("fadvise64".to_string(), 221); - map.insert("fallocate".to_string(), 285); - map.insert("fanotify_init".to_string(), 300); - map.insert("fanotify_mark".to_string(), 301); - map.insert("fchdir".to_string(), 81); - map.insert("fchmodat".to_string(), 268); - map.insert("fchmod".to_string(), 91); - map.insert("fchownat".to_string(), 260); - map.insert("fchown".to_string(), 93); - map.insert("fcntl".to_string(), 72); - map.insert("fdatasync".to_string(), 75); - map.insert("fgetxattr".to_string(), 193); - map.insert("finit_module".to_string(), 313); - map.insert("flistxattr".to_string(), 196); - map.insert("flock".to_string(), 73); - map.insert("fork".to_string(), 57); - map.insert("fremovexattr".to_string(), 199); - map.insert("fsconfig".to_string(), 431); - map.insert("fsetxattr".to_string(), 190); - map.insert("fsmount".to_string(), 432); - map.insert("fsopen".to_string(), 430); - map.insert("fspick".to_string(), 433); - map.insert("fstatfs".to_string(), 138); - map.insert("fstat".to_string(), 5); - map.insert("fsync".to_string(), 74); - map.insert("ftruncate".to_string(), 77); - map.insert("futex".to_string(), 202); - map.insert("futimesat".to_string(), 261); - map.insert("getcpu".to_string(), 309); - map.insert("getcwd".to_string(), 79); - map.insert("getdents64".to_string(), 217); - map.insert("getdents".to_string(), 78); - map.insert("getegid".to_string(), 108); - map.insert("geteuid".to_string(), 107); - map.insert("getgid".to_string(), 104); - map.insert("getgroups".to_string(), 115); - map.insert("getitimer".to_string(), 36); - map.insert("get_kernel_syms".to_string(), 177); - map.insert("get_mempolicy".to_string(), 239); - map.insert("getpeername".to_string(), 52); - map.insert("getpgid".to_string(), 121); - map.insert("getpgrp".to_string(), 111); - map.insert("getpid".to_string(), 39); - map.insert("getpmsg".to_string(), 181); - map.insert("getppid".to_string(), 110); - map.insert("getpriority".to_string(), 140); - map.insert("getrandom".to_string(), 318); - map.insert("getresgid".to_string(), 120); - map.insert("getresuid".to_string(), 118); - map.insert("getrlimit".to_string(), 97); - map.insert("get_robust_list".to_string(), 274); - map.insert("getrusage".to_string(), 98); - map.insert("getsid".to_string(), 124); - map.insert("getsockname".to_string(), 51); - map.insert("getsockopt".to_string(), 55); - map.insert("get_thread_area".to_string(), 211); - map.insert("gettid".to_string(), 186); - map.insert("gettimeofday".to_string(), 96); - map.insert("getuid".to_string(), 102); - map.insert("getxattr".to_string(), 191); - map.insert("init_module".to_string(), 175); - map.insert("inotify_add_watch".to_string(), 254); - map.insert("inotify_init1".to_string(), 294); - map.insert("inotify_init".to_string(), 253); - map.insert("inotify_rm_watch".to_string(), 255); - map.insert("io_cancel".to_string(), 210); - map.insert("ioctl".to_string(), 16); - map.insert("io_destroy".to_string(), 207); - map.insert("io_getevents".to_string(), 208); - map.insert("ioperm".to_string(), 173); - map.insert("io_pgetevents".to_string(), 333); - map.insert("iopl".to_string(), 172); - map.insert("ioprio_get".to_string(), 252); - map.insert("ioprio_set".to_string(), 251); - map.insert("io_setup".to_string(), 206); - map.insert("io_submit".to_string(), 209); - map.insert("io_uring_enter".to_string(), 426); - map.insert("io_uring_register".to_string(), 427); - map.insert("io_uring_setup".to_string(), 425); - map.insert("kcmp".to_string(), 312); - map.insert("kexec_file_load".to_string(), 320); - map.insert("kexec_load".to_string(), 246); - map.insert("keyctl".to_string(), 250); - map.insert("kill".to_string(), 62); - map.insert("lchown".to_string(), 94); - map.insert("lgetxattr".to_string(), 192); - map.insert("linkat".to_string(), 265); - map.insert("link".to_string(), 86); - map.insert("listen".to_string(), 50); - map.insert("listxattr".to_string(), 194); - map.insert("llistxattr".to_string(), 195); - map.insert("lookup_dcookie".to_string(), 212); - map.insert("lremovexattr".to_string(), 198); - map.insert("lseek".to_string(), 8); - map.insert("lsetxattr".to_string(), 189); - map.insert("lstat".to_string(), 6); - map.insert("madvise".to_string(), 28); - map.insert("mbind".to_string(), 237); - map.insert("membarrier".to_string(), 324); - map.insert("memfd_create".to_string(), 319); - map.insert("migrate_pages".to_string(), 256); - map.insert("mincore".to_string(), 27); - map.insert("mkdirat".to_string(), 258); - map.insert("mkdir".to_string(), 83); - map.insert("mknodat".to_string(), 259); - map.insert("mknod".to_string(), 133); - map.insert("mlock2".to_string(), 325); - map.insert("mlockall".to_string(), 151); - map.insert("mlock".to_string(), 149); - map.insert("mmap".to_string(), 9); - map.insert("modify_ldt".to_string(), 154); - map.insert("mount".to_string(), 165); - map.insert("move_mount".to_string(), 429); - map.insert("move_pages".to_string(), 279); - map.insert("mprotect".to_string(), 10); - map.insert("mq_getsetattr".to_string(), 245); - map.insert("mq_notify".to_string(), 244); - map.insert("mq_open".to_string(), 240); - map.insert("mq_timedreceive".to_string(), 243); - map.insert("mq_timedsend".to_string(), 242); - map.insert("mq_unlink".to_string(), 241); - map.insert("mremap".to_string(), 25); - map.insert("msgctl".to_string(), 71); - map.insert("msgget".to_string(), 68); - map.insert("msgrcv".to_string(), 70); - map.insert("msgsnd".to_string(), 69); - map.insert("msync".to_string(), 26); - map.insert("munlockall".to_string(), 152); - map.insert("munlock".to_string(), 150); - map.insert("munmap".to_string(), 11); - map.insert("name_to_handle_at".to_string(), 303); - map.insert("nanosleep".to_string(), 35); - map.insert("newfstatat".to_string(), 262); - map.insert("nfsservctl".to_string(), 180); - map.insert("openat2".to_string(), 437); - map.insert("openat".to_string(), 257); - map.insert("open_by_handle_at".to_string(), 304); - map.insert("open".to_string(), 2); - map.insert("open_tree".to_string(), 428); - map.insert("pause".to_string(), 34); - map.insert("perf_event_open".to_string(), 298); - map.insert("personality".to_string(), 135); - map.insert("pidfd_getfd".to_string(), 438); - map.insert("pidfd_open".to_string(), 434); - map.insert("pidfd_send_signal".to_string(), 424); - map.insert("pipe2".to_string(), 293); - map.insert("pipe".to_string(), 22); - map.insert("pivot_root".to_string(), 155); - map.insert("pkey_alloc".to_string(), 330); - map.insert("pkey_free".to_string(), 331); - map.insert("pkey_mprotect".to_string(), 329); - map.insert("poll".to_string(), 7); - map.insert("ppoll".to_string(), 271); - map.insert("prctl".to_string(), 157); - map.insert("pread64".to_string(), 17); - map.insert("preadv2".to_string(), 327); - map.insert("preadv".to_string(), 295); - map.insert("prlimit64".to_string(), 302); - map.insert("process_madvise".to_string(), 440); - map.insert("process_vm_readv".to_string(), 310); - map.insert("process_vm_writev".to_string(), 311); - map.insert("pselect6".to_string(), 270); - map.insert("ptrace".to_string(), 101); - map.insert("putpmsg".to_string(), 182); - map.insert("pwrite64".to_string(), 18); - map.insert("pwritev2".to_string(), 328); - map.insert("pwritev".to_string(), 296); - map.insert("query_module".to_string(), 178); - map.insert("quotactl".to_string(), 179); - map.insert("readahead".to_string(), 187); - map.insert("readlinkat".to_string(), 267); - map.insert("readlink".to_string(), 89); - map.insert("read".to_string(), 0); - map.insert("readv".to_string(), 19); - map.insert("reboot".to_string(), 169); - map.insert("recvfrom".to_string(), 45); - map.insert("recvmmsg".to_string(), 299); - map.insert("recvmsg".to_string(), 47); - map.insert("remap_file_pages".to_string(), 216); - map.insert("removexattr".to_string(), 197); - map.insert("renameat2".to_string(), 316); - map.insert("renameat".to_string(), 264); - map.insert("rename".to_string(), 82); - map.insert("request_key".to_string(), 249); - map.insert("restart_syscall".to_string(), 219); - map.insert("rmdir".to_string(), 84); - map.insert("rseq".to_string(), 334); - map.insert("rt_sigaction".to_string(), 13); - map.insert("rt_sigpending".to_string(), 127); - map.insert("rt_sigprocmask".to_string(), 14); - map.insert("rt_sigqueueinfo".to_string(), 129); - map.insert("rt_sigreturn".to_string(), 15); - map.insert("rt_sigsuspend".to_string(), 130); - map.insert("rt_sigtimedwait".to_string(), 128); - map.insert("rt_tgsigqueueinfo".to_string(), 297); - map.insert("sched_getaffinity".to_string(), 204); - map.insert("sched_getattr".to_string(), 315); - map.insert("sched_getparam".to_string(), 143); - map.insert("sched_get_priority_max".to_string(), 146); - map.insert("sched_get_priority_min".to_string(), 147); - map.insert("sched_getscheduler".to_string(), 145); - map.insert("sched_rr_get_interval".to_string(), 148); - map.insert("sched_setaffinity".to_string(), 203); - map.insert("sched_setattr".to_string(), 314); - map.insert("sched_setparam".to_string(), 142); - map.insert("sched_setscheduler".to_string(), 144); - map.insert("sched_yield".to_string(), 24); - map.insert("seccomp".to_string(), 317); - map.insert("security".to_string(), 185); - map.insert("select".to_string(), 23); - map.insert("semctl".to_string(), 66); - map.insert("semget".to_string(), 64); - map.insert("semop".to_string(), 65); - map.insert("semtimedop".to_string(), 220); - map.insert("sendfile".to_string(), 40); - map.insert("sendmmsg".to_string(), 307); - map.insert("sendmsg".to_string(), 46); - map.insert("sendto".to_string(), 44); - map.insert("setdomainname".to_string(), 171); - map.insert("setfsgid".to_string(), 123); - map.insert("setfsuid".to_string(), 122); - map.insert("setgid".to_string(), 106); - map.insert("setgroups".to_string(), 116); - map.insert("sethostname".to_string(), 170); - map.insert("setitimer".to_string(), 38); - map.insert("set_mempolicy".to_string(), 238); - map.insert("setns".to_string(), 308); - map.insert("setpgid".to_string(), 109); - map.insert("setpriority".to_string(), 141); - map.insert("setregid".to_string(), 114); - map.insert("setresgid".to_string(), 119); - map.insert("setresuid".to_string(), 117); - map.insert("setreuid".to_string(), 113); - map.insert("setrlimit".to_string(), 160); - map.insert("set_robust_list".to_string(), 273); - map.insert("setsid".to_string(), 112); - map.insert("setsockopt".to_string(), 54); - map.insert("set_thread_area".to_string(), 205); - map.insert("set_tid_address".to_string(), 218); - map.insert("settimeofday".to_string(), 164); - map.insert("setuid".to_string(), 105); - map.insert("setxattr".to_string(), 188); - map.insert("shmat".to_string(), 30); - map.insert("shmctl".to_string(), 31); - map.insert("shmdt".to_string(), 67); - map.insert("shmget".to_string(), 29); - map.insert("shutdown".to_string(), 48); - map.insert("sigaltstack".to_string(), 131); - map.insert("signalfd4".to_string(), 289); - map.insert("signalfd".to_string(), 282); - map.insert("socketpair".to_string(), 53); - map.insert("socket".to_string(), 41); - map.insert("splice".to_string(), 275); - map.insert("statfs".to_string(), 137); - map.insert("stat".to_string(), 4); - map.insert("statx".to_string(), 332); - map.insert("swapoff".to_string(), 168); - map.insert("swapon".to_string(), 167); - map.insert("symlinkat".to_string(), 266); - map.insert("symlink".to_string(), 88); - map.insert("sync_file_range".to_string(), 277); - map.insert("syncfs".to_string(), 306); - map.insert("sync".to_string(), 162); - map.insert("_sysctl".to_string(), 156); - map.insert("sysfs".to_string(), 139); - map.insert("sysinfo".to_string(), 99); - map.insert("syslog".to_string(), 103); - map.insert("tee".to_string(), 276); - map.insert("tgkill".to_string(), 234); - map.insert("timer_create".to_string(), 222); - map.insert("timer_delete".to_string(), 226); - map.insert("timerfd_create".to_string(), 283); - map.insert("timerfd_gettime".to_string(), 287); - map.insert("timerfd_settime".to_string(), 286); - map.insert("timer_getoverrun".to_string(), 225); - map.insert("timer_gettime".to_string(), 224); - map.insert("timer_settime".to_string(), 223); - map.insert("times".to_string(), 100); - map.insert("time".to_string(), 201); - map.insert("tkill".to_string(), 200); - map.insert("truncate".to_string(), 76); - map.insert("tuxcall".to_string(), 184); - map.insert("umask".to_string(), 95); - map.insert("umount2".to_string(), 166); - map.insert("uname".to_string(), 63); - map.insert("unlinkat".to_string(), 263); - map.insert("unlink".to_string(), 87); - map.insert("unshare".to_string(), 272); - map.insert("uselib".to_string(), 134); - map.insert("userfaultfd".to_string(), 323); - map.insert("ustat".to_string(), 136); - map.insert("utimensat".to_string(), 280); - map.insert("utimes".to_string(), 235); - map.insert("utime".to_string(), 132); - map.insert("vfork".to_string(), 58); - map.insert("vhangup".to_string(), 153); - map.insert("vmsplice".to_string(), 278); - map.insert("vserver".to_string(), 236); - map.insert("wait4".to_string(), 61); - map.insert("waitid".to_string(), 247); - map.insert("write".to_string(), 1); - map.insert("writev".to_string(), 20); -} diff --git a/src/seccompiler/src/types.rs b/src/seccompiler/src/types.rs new file mode 100644 index 00000000000..2035f8b8ea4 --- /dev/null +++ b/src/seccompiler/src/types.rs @@ -0,0 +1,192 @@ +// Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::collections::BTreeMap; +use std::ffi::CString; +use std::str::FromStr; + +use serde::*; + +// use libseccomp::{ScmpAction, ScmpArch, ScmpCompareOp}; +use crate::bindings::*; + +/// Comparison to perform when matching a condition. +#[derive(Debug, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum SeccompCmpOp { + Eq, + Ge, + Gt, + Le, + Lt, + MaskedEq(u64), + Ne, +} + +/// Seccomp argument value length. +#[derive(Clone, Debug, Deserialize, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum SeccompCmpArgLen { + /// Argument value length is 4 bytes. + Dword, + /// Argument value length is 8 bytes. + Qword, +} + +/// Condition that syscall must match in order to satisfy a rule. +#[derive(Debug, Deserialize)] +pub struct SeccompCondition { + pub index: u8, + pub op: SeccompCmpOp, + pub val: u64, + #[serde(rename = "type")] + pub val_len: SeccompCmpArgLen, +} + +impl SeccompCondition { + pub fn to_scmp_type(&self) -> scmp_arg_cmp { + match self.op { + SeccompCmpOp::Eq => { + // When using EQ libseccomp compares the whole 64 bits. In + // general this is not a problem, but for example we have + // observed musl `ioctl` to leave garbage in the upper bits of + // the `request` argument. There is a GH issue to allow 32bit + // comparisons (see + // https://github.com/seccomp/libseccomp/issues/383) but is not + // merged yet. Until that is available, do a masked comparison + // with the upper 32bits set to 0, so we will compare that `hi32 + // & 0x0 == 0`, which is always true. This costs one additional + // instruction, but will be likely be optimized away by the BPF + // JIT. + match self.val_len { + SeccompCmpArgLen::Dword => scmp_arg_cmp { + arg: self.index as u32, + op: scmp_compare::SCMP_CMP_MASKED_EQ, + datum_a: 0x00000000FFFFFFFF, + datum_b: self.val, + }, + SeccompCmpArgLen::Qword => scmp_arg_cmp { + arg: self.index as u32, + op: scmp_compare::SCMP_CMP_EQ, + datum_a: self.val, + datum_b: 0, + }, + } + } + SeccompCmpOp::Ge => scmp_arg_cmp { + arg: self.index as u32, + op: scmp_compare::SCMP_CMP_GE, + datum_a: self.val, + datum_b: 0, + }, + SeccompCmpOp::Gt => scmp_arg_cmp { + arg: self.index as u32, + op: scmp_compare::SCMP_CMP_GT, + datum_a: self.val, + datum_b: 0, + }, + SeccompCmpOp::Le => scmp_arg_cmp { + arg: self.index as u32, + op: scmp_compare::SCMP_CMP_LE, + datum_a: self.val, + datum_b: 0, + }, + SeccompCmpOp::Lt => scmp_arg_cmp { + arg: self.index as u32, + op: scmp_compare::SCMP_CMP_LT, + datum_a: self.val, + datum_b: 0, + }, + SeccompCmpOp::Ne => scmp_arg_cmp { + arg: self.index as u32, + op: scmp_compare::SCMP_CMP_NE, + datum_a: self.val, + datum_b: 0, + }, + + SeccompCmpOp::MaskedEq(m) => scmp_arg_cmp { + arg: self.index as u32, + op: scmp_compare::SCMP_CMP_MASKED_EQ, + datum_a: m, + datum_b: self.val, + }, + } + } +} + +/// Actions that `seccomp` can apply to process calling a syscall. +#[derive(Debug, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum SeccompAction { + Allow, + Errno(u16), + KillThread, + KillProcess, + Log, + Trace(u16), + Trap, +} + +impl SeccompAction { + pub fn to_scmp_type(&self) -> u32 { + match self { + SeccompAction::Allow => SCMP_ACT_ALLOW, + SeccompAction::Errno(e) => SCMP_ACT_ERRNO(*e), + SeccompAction::KillThread => SCMP_ACT_KILL_THREAD, + SeccompAction::KillProcess => SCMP_ACT_KILL_PROCESS, + SeccompAction::Log => SCMP_ACT_LOG, + SeccompAction::Trace(t) => SCMP_ACT_TRACE(*t), + SeccompAction::Trap => SCMP_ACT_TRAP, + } + } +} + +/// Rule that `seccomp` attempts to match for a syscall. +/// +/// If all conditions match then rule gets matched. +/// The action of the first rule that matches will be applied to the calling process. +/// If no rule matches the default action is applied. +#[derive(Debug, Deserialize)] +pub struct SyscallRule { + pub syscall: CString, + pub args: Option>, +} + +/// Filter containing rules assigned to syscall numbers. +#[derive(Debug, Deserialize)] +pub struct Filter { + pub default_action: SeccompAction, + pub filter_action: SeccompAction, + pub filter: Vec, +} + +/// Deserializable object that represents the Json filter file. +#[derive(Debug, Deserialize)] +pub struct BpfJson(pub BTreeMap); + +/// Supported target architectures. +#[derive(Debug)] +pub enum TargetArch { + X86_64, + Aarch64, +} + +impl TargetArch { + pub fn to_scmp_type(&self) -> u32 { + match self { + TargetArch::X86_64 => SCMP_ARCH_X86_64, + TargetArch::Aarch64 => SCMP_ARCH_AARCH64, + } + } +} + +impl FromStr for TargetArch { + type Err = String; + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "x86_64" => Ok(TargetArch::X86_64), + "aarch64" => Ok(TargetArch::Aarch64), + _ => Err(s.to_string()), + } + } +} diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index 043fc11528c..4d5e323ecbd 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -31,7 +31,6 @@ log-instrument = { path = "../log-instrument", optional = true } memfd = "0.6.3" micro_http = { git = "https://github.com/firecracker-microvm/micro-http" } -seccompiler = { path = "../seccompiler" } semver = { version = "1.0.24", features = ["serde"] } serde = { version = "1.0.216", features = ["derive", "rc"] } serde_json = "1.0.133" diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 8594da9f077..a9561479123 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -19,7 +19,6 @@ use linux_loader::loader::elf::Elf as Loader; #[cfg(target_arch = "aarch64")] use linux_loader::loader::pe::PE as Loader; use linux_loader::loader::KernelLoader; -use seccompiler::BpfThreadMap; use userfaultfd::Uffd; use utils::time::TimestampUs; use vm_memory::ReadVolatile; @@ -63,6 +62,7 @@ use crate::gdb; use crate::logger::{debug, error}; use crate::persist::{MicrovmState, MicrovmStateError}; use crate::resources::VmResources; +use crate::seccomp::BpfThreadMap; use crate::snapshot::Persist; use crate::utils::u64_to_usize; use crate::vmm_config::boot_source::BootConfig; @@ -372,7 +372,7 @@ pub fn build_microvm_for_boot( // Execution panics if filters cannot be loaded, use --no-seccomp if skipping filters // altogether is the desired behaviour. // Keep this as the last step before resuming vcpus. - seccompiler::apply_filter( + crate::seccomp::apply_filter( seccomp_filters .get("vmm") .ok_or_else(|| MissingSeccompFilters("vmm".to_string()))?, @@ -443,7 +443,7 @@ pub enum BuildMicrovmFromSnapshotError { /// Failed to apply VMM secccomp filter as none found. MissingVmmSeccompFilters, /// Failed to apply VMM secccomp filter: {0} - SeccompFiltersInternal(#[from] seccompiler::InstallationError), + SeccompFiltersInternal(#[from] crate::seccomp::InstallationError), /// Failed to restore ACPI device manager: {0} ACPIDeviManager(#[from] ACPIDeviceManagerRestoreError), /// VMGenID update failed: {0} @@ -559,7 +559,7 @@ pub fn build_microvm_from_snapshot( // Load seccomp filters for the VMM thread. // Keep this as the last step of the building process. - seccompiler::apply_filter( + crate::seccomp::apply_filter( seccomp_filters .get("vmm") .ok_or(BuildMicrovmFromSnapshotError::MissingVmmSeccompFilters)?, diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index 6a28e14f26b..77c0018c55a 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -97,7 +97,7 @@ pub mod resources; /// microVM RPC API adapters. pub mod rpc_interface; /// Seccomp filter utilities. -pub mod seccomp_filters; +pub mod seccomp; /// Signal handling utilities. pub mod signal_handler; /// Serialization and deserialization facilities @@ -122,7 +122,7 @@ use device_manager::acpi::ACPIDeviceManager; use device_manager::resources::ResourceAllocator; use devices::acpi::vmgenid::VmGenIdError; use event_manager::{EventManager as BaseEventManager, EventOps, Events, MutEventSubscriber}; -use seccompiler::BpfProgram; +use seccomp::BpfProgram; use userfaultfd::Uffd; use vmm_sys_util::epoll::EventSet; use vmm_sys_util::eventfd::EventFd; @@ -225,7 +225,7 @@ pub enum VmmError { /// Cannot add a device to the MMIO Bus. {0} RegisterMMIODevice(device_manager::mmio::MmioError), /// Cannot install seccomp filters: {0} - SeccompFilters(seccompiler::InstallationError), + SeccompFilters(seccomp::InstallationError), /// Error writing to the serial console: {0} Serial(io::Error), /// Error creating timer fd: {0} diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 3479e0b6309..1feef41ec30 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -11,7 +11,6 @@ use std::os::unix::net::UnixStream; use std::path::Path; use std::sync::{Arc, Mutex}; -use seccompiler::BpfThreadMap; use semver::Version; use serde::{Deserialize, Serialize}; use userfaultfd::{FeatureFlags, Uffd, UffdBuilder}; @@ -28,6 +27,7 @@ use crate::cpu_config::x86_64::cpuid::CpuidTrait; use crate::device_manager::persist::{ACPIDeviceManagerState, DevicePersistError, DeviceStates}; use crate::logger::{info, warn}; use crate::resources::VmResources; +use crate::seccomp::BpfThreadMap; use crate::snapshot::Snapshot; use crate::utils::u64_to_usize; use crate::vmm_config::boot_source::BootSourceConfig; diff --git a/src/vmm/src/rpc_interface.rs b/src/vmm/src/rpc_interface.rs index b0c95529f46..60a046f7e89 100644 --- a/src/vmm/src/rpc_interface.rs +++ b/src/vmm/src/rpc_interface.rs @@ -4,7 +4,6 @@ use std::fmt::{self, Debug}; use std::sync::{Arc, Mutex, MutexGuard}; -use seccompiler::BpfThreadMap; use serde_json::Value; use utils::time::{get_time_us, ClockType}; @@ -18,6 +17,7 @@ use crate::logger::{info, warn, LoggerConfig, *}; use crate::mmds::data_store::{self, Mmds}; use crate::persist::{CreateSnapshotError, RestoreFromSnapshotError, VmInfo}; use crate::resources::VmmConfig; +use crate::seccomp::BpfThreadMap; use crate::vmm_config::balloon::{ BalloonConfigError, BalloonDeviceConfig, BalloonStats, BalloonUpdateConfig, BalloonUpdateStatsConfig, @@ -852,12 +852,11 @@ impl RuntimeApiController { mod tests { use std::path::PathBuf; - use seccompiler::BpfThreadMap; - use super::*; use crate::builder::tests::default_vmm; use crate::devices::virtio::block::CacheType; use crate::mmds::data_store::MmdsVersion; + use crate::seccomp::BpfThreadMap; use crate::vmm_config::snapshot::{MemBackendConfig, MemBackendType}; use crate::HTTP_MAX_PAYLOAD_SIZE; diff --git a/src/vmm/src/seccomp.rs b/src/vmm/src/seccomp.rs new file mode 100644 index 00000000000..3da974e6027 --- /dev/null +++ b/src/vmm/src/seccomp.rs @@ -0,0 +1,234 @@ +// Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::collections::HashMap; +use std::io::Read; +use std::sync::Arc; + +use bincode::{DefaultOptions, Options}; + +/// Each BPF instruction is 8 bytes long and 4 byte aligned. +/// This alignment needs to be satisfied in order for a BPF code to be accepted +/// by the syscalls. Using u64 here is is safe as it has same size and even bigger alignment. +pub type BpfInstruction = u64; + +/// Program made up of a sequence of BPF instructions. +pub type BpfProgram = Vec; + +/// Reference to program made up of a sequence of BPF instructions. +pub type BpfProgramRef<'a> = &'a [BpfInstruction]; + +/// Type that associates a thread category to a BPF program. +pub type BpfThreadMap = HashMap>; + +/// Binary filter deserialization errors. +pub type DeserializationError = bincode::Error; + +/// Retrieve empty seccomp filters. +pub fn get_empty_filters() -> BpfThreadMap { + let mut map = BpfThreadMap::new(); + map.insert("vmm".to_string(), Arc::new(vec![])); + map.insert("api".to_string(), Arc::new(vec![])); + map.insert("vcpu".to_string(), Arc::new(vec![])); + map +} + +/// Deserialize binary with bpf filters +pub fn deserialize_binary( + reader: R, + bytes_limit: Option, +) -> Result { + let result = match bytes_limit { + Some(limit) => DefaultOptions::new() + .with_fixint_encoding() + .allow_trailing_bytes() + .with_limit(limit) + .deserialize_from::>(reader), + // No limit is the default. + None => bincode::deserialize_from::>(reader), + }?; + + Ok(result + .into_iter() + .map(|(k, v)| (k.to_lowercase(), Arc::new(v))) + .collect()) +} + +/// Filter installation errors. +#[derive(Debug, thiserror::Error, displaydoc::Display)] +pub enum InstallationError { + /// Filter length exceeds the maximum size of {BPF_MAX_LEN:} instructions + FilterTooLarge, + /// prctl` syscall failed with error code: {0} + Prctl(std::io::Error), +} + +/// The maximum seccomp-BPF program length allowed by the linux kernel. +pub const BPF_MAX_LEN: usize = 4096; + +/// BPF structure definition for filter array. +/// See /usr/include/linux/filter.h . +#[repr(C)] +#[derive(Debug)] +struct SockFprog { + len: u16, + filter: *const BpfInstruction, +} + +/// Apply bpf filter. +pub fn apply_filter(bpf_filter: BpfProgramRef) -> Result<(), InstallationError> { + // If the program is empty, don't install the filter. + if bpf_filter.is_empty() { + return Ok(()); + } + + // If the program length is greater than the limit allowed by the kernel, + // fail quickly. Otherwise, `prctl` will give a more cryptic error code. + if BPF_MAX_LEN < bpf_filter.len() { + return Err(InstallationError::FilterTooLarge); + } + + let bpf_filter_len = + u16::try_from(bpf_filter.len()).map_err(|_| InstallationError::FilterTooLarge)?; + + // SAFETY: Safe because the parameters are valid. + unsafe { + { + let rc = libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + if rc != 0 { + return Err(InstallationError::Prctl(std::io::Error::last_os_error())); + } + } + + let bpf_prog = SockFprog { + len: bpf_filter_len, + filter: bpf_filter.as_ptr(), + }; + let bpf_prog_ptr = &bpf_prog as *const SockFprog; + { + let rc = libc::syscall( + libc::SYS_seccomp, + libc::SECCOMP_SET_MODE_FILTER, + 0, + bpf_prog_ptr, + ); + if rc != 0 { + return Err(InstallationError::Prctl(std::io::Error::last_os_error())); + } + } + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + #![allow(clippy::undocumented_unsafe_blocks)] + + use std::collections::HashMap; + use std::sync::Arc; + use std::thread; + + use super::*; + + #[test] + fn test_deserialize_binary() { + // Malformed bincode binary. + { + let data = "adassafvc".to_string(); + deserialize_binary(data.as_bytes(), None).unwrap_err(); + } + + // Test that the binary deserialization is correct, and that the thread keys + // have been lowercased. + { + let bpf_prog = vec![0; 2]; + let mut filter_map: HashMap = HashMap::new(); + filter_map.insert("VcpU".to_string(), bpf_prog.clone()); + let bytes = bincode::serialize(&filter_map).unwrap(); + + let mut expected_res = BpfThreadMap::new(); + expected_res.insert("vcpu".to_string(), Arc::new(bpf_prog)); + assert_eq!(deserialize_binary(&bytes[..], None).unwrap(), expected_res); + } + + // Test deserialization with binary_limit. + { + let bpf_prog = vec![0; 2]; + + let mut filter_map: HashMap = HashMap::new(); + filter_map.insert("t1".to_string(), bpf_prog.clone()); + + let bytes = bincode::serialize(&filter_map).unwrap(); + + // Binary limit too low. + assert!(matches!( + deserialize_binary(&bytes[..], Some(20)).unwrap_err(), + error + if error.to_string() == "the size limit has been reached" + )); + + let mut expected_res = BpfThreadMap::new(); + expected_res.insert("t1".to_string(), Arc::new(bpf_prog)); + + // Correct binary limit. + assert_eq!( + deserialize_binary(&bytes[..], Some(50)).unwrap(), + expected_res + ); + } + } + + #[test] + fn test_filter_apply() { + // Test filter too large. + thread::spawn(|| { + let filter: BpfProgram = vec![0; 5000]; + + // Apply seccomp filter. + assert!(matches!( + apply_filter(&filter).unwrap_err(), + InstallationError::FilterTooLarge + )); + }) + .join() + .unwrap(); + + // Test empty filter. + thread::spawn(|| { + let filter: BpfProgram = vec![]; + + assert_eq!(filter.len(), 0); + + let seccomp_level = unsafe { libc::prctl(libc::PR_GET_SECCOMP) }; + assert_eq!(seccomp_level, 0); + + apply_filter(&filter).unwrap(); + + // test that seccomp level remains 0 on failure. + let seccomp_level = unsafe { libc::prctl(libc::PR_GET_SECCOMP) }; + assert_eq!(seccomp_level, 0); + }) + .join() + .unwrap(); + + // Test invalid BPF code. + thread::spawn(|| { + let filter = vec![0xFF; 1]; + + let seccomp_level = unsafe { libc::prctl(libc::PR_GET_SECCOMP) }; + assert_eq!(seccomp_level, 0); + + assert!(matches!( + apply_filter(&filter).unwrap_err(), + InstallationError::Prctl(_) + )); + + // test that seccomp level remains 0 on failure. + let seccomp_level = unsafe { libc::prctl(libc::PR_GET_SECCOMP) }; + assert_eq!(seccomp_level, 0); + }) + .join() + .unwrap(); + } +} diff --git a/src/vmm/src/seccomp_filters.rs b/src/vmm/src/seccomp_filters.rs deleted file mode 100644 index aabdc1ef2c1..00000000000 --- a/src/vmm/src/seccomp_filters.rs +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 -use std::sync::Arc; - -use seccompiler::BpfThreadMap; - -/// Retrieve empty seccomp filters. -pub fn get_empty_filters() -> BpfThreadMap { - let mut map = BpfThreadMap::new(); - map.insert("vmm".to_string(), Arc::new(vec![])); - map.insert("api".to_string(), Arc::new(vec![])); - map.insert("vcpu".to_string(), Arc::new(vec![])); - map -} diff --git a/src/vmm/src/signal_handler.rs b/src/vmm/src/signal_handler.rs index 5bcfd41fd06..ac4befcb3d1 100644 --- a/src/vmm/src/signal_handler.rs +++ b/src/vmm/src/signal_handler.rs @@ -177,7 +177,6 @@ mod tests { use std::{process, thread}; use libc::syscall; - use seccompiler::sock_filter; use super::*; @@ -186,11 +185,6 @@ mod tests { let child = thread::spawn(move || { register_signal_handlers().unwrap(); - let filter = make_test_seccomp_bpf_filter(); - - seccompiler::apply_filter(&filter).unwrap(); - assert_eq!(METRICS.seccomp.num_faults.fetch(), 0); - // Call the forbidden `SYS_mkdirat`. unsafe { libc::syscall(libc::SYS_mkdirat, "/foo/bar\0") }; @@ -238,7 +232,6 @@ mod tests { }); child.join().unwrap(); - assert!(METRICS.seccomp.num_faults.fetch() >= 1); assert!(METRICS.signals.sigbus.fetch() >= 1); assert!(METRICS.signals.sigsegv.fetch() >= 1); assert!(METRICS.signals.sigxfsz.fetch() >= 1); @@ -247,141 +240,4 @@ mod tests { assert!(METRICS.signals.sighup.fetch() >= 1); assert!(METRICS.signals.sigill.fetch() >= 1); } - - fn make_test_seccomp_bpf_filter() -> Vec { - // Create seccomp filter that allows all syscalls, except for `SYS_mkdirat`. - // For some reason, directly calling `SYS_kill` with SIGSYS, like we do with the - // other signals, results in an error. Probably because of the way `cargo test` is - // handling signals. - #[cfg(target_arch = "aarch64")] - #[allow(clippy::unreadable_literal)] - let bpf_filter = vec![ - sock_filter { - code: 32, - jt: 0, - jf: 0, - k: 4, - }, - sock_filter { - code: 21, - jt: 1, - jf: 0, - k: 3221225655, - }, - sock_filter { - code: 6, - jt: 0, - jf: 0, - k: 0, - }, - sock_filter { - code: 32, - jt: 0, - jf: 0, - k: 0, - }, - sock_filter { - code: 21, - jt: 0, - jf: 1, - k: 34, - }, - sock_filter { - code: 5, - jt: 0, - jf: 0, - k: 1, - }, - sock_filter { - code: 5, - jt: 0, - jf: 0, - k: 2, - }, - sock_filter { - code: 6, - jt: 0, - jf: 0, - k: 196608, - }, - sock_filter { - code: 6, - jt: 0, - jf: 0, - k: 2147418112, - }, - sock_filter { - code: 6, - jt: 0, - jf: 0, - k: 2147418112, - }, - ]; - #[cfg(target_arch = "x86_64")] - #[allow(clippy::unreadable_literal)] - let bpf_filter = vec![ - sock_filter { - code: 32, - jt: 0, - jf: 0, - k: 4, - }, - sock_filter { - code: 21, - jt: 1, - jf: 0, - k: 3221225534, - }, - sock_filter { - code: 6, - jt: 0, - jf: 0, - k: 0, - }, - sock_filter { - code: 32, - jt: 0, - jf: 0, - k: 0, - }, - sock_filter { - code: 21, - jt: 0, - jf: 1, - k: 258, - }, - sock_filter { - code: 5, - jt: 0, - jf: 0, - k: 1, - }, - sock_filter { - code: 5, - jt: 0, - jf: 0, - k: 2, - }, - sock_filter { - code: 6, - jt: 0, - jf: 0, - k: 196608, - }, - sock_filter { - code: 6, - jt: 0, - jf: 0, - k: 2147418112, - }, - sock_filter { - code: 6, - jt: 0, - jf: 0, - k: 2147418112, - }, - ]; - - bpf_filter - } } diff --git a/src/vmm/src/test_utils/mod.rs b/src/vmm/src/test_utils/mod.rs index f90ba7fbd0c..1ba79a55231 100644 --- a/src/vmm/src/test_utils/mod.rs +++ b/src/vmm/src/test_utils/mod.rs @@ -10,7 +10,7 @@ use vmm_sys_util::tempdir::TempDir; use crate::builder::build_microvm_for_boot; use crate::resources::VmResources; -use crate::seccomp_filters::get_empty_filters; +use crate::seccomp::get_empty_filters; use crate::test_utils::mock_resources::{MockBootSourceConfig, MockVmConfig, MockVmResources}; use crate::vmm_config::boot_source::BootSourceConfig; use crate::vmm_config::instance_info::InstanceInfo; diff --git a/src/vmm/src/vstate/vcpu/mod.rs b/src/vmm/src/vstate/vcpu/mod.rs index 73779fbd928..ddfeda21b4c 100644 --- a/src/vmm/src/vstate/vcpu/mod.rs +++ b/src/vmm/src/vstate/vcpu/mod.rs @@ -19,7 +19,6 @@ use kvm_ioctls::VcpuExit; use kvm_ioctls::VcpuFd; use libc::{c_int, c_void, siginfo_t}; use log::{error, info, warn}; -use seccompiler::{BpfProgram, BpfProgramRef}; use vmm_sys_util::errno; use vmm_sys_util::eventfd::EventFd; @@ -27,6 +26,7 @@ use crate::cpu_config::templates::{CpuConfiguration, GuestConfigError}; #[cfg(feature = "gdb")] use crate::gdb::target::{get_raw_tid, GdbTargetError}; use crate::logger::{IncMetric, METRICS}; +use crate::seccomp::{BpfProgram, BpfProgramRef}; use crate::utils::signal::{register_signal_handler, sigrtmin, Killable}; use crate::utils::sm::StateMachine; use crate::vstate::vm::Vm; @@ -288,7 +288,7 @@ impl Vcpu { // Load seccomp filters for this vCPU thread. // Execution panics if filters cannot be loaded, use --no-seccomp if skipping filters // altogether is the desired behaviour. - if let Err(err) = seccompiler::apply_filter(seccomp_filter) { + if let Err(err) = crate::seccomp::apply_filter(seccomp_filter) { panic!( "Failed to set the requested seccomp filters on vCPU {}: Error: {}", self.kvm_vcpu.index, err @@ -773,7 +773,7 @@ pub(crate) mod tests { use crate::builder::StartMicrovmError; use crate::devices::bus::DummyDevice; use crate::devices::BusDevice; - use crate::seccomp_filters::get_empty_filters; + use crate::seccomp::get_empty_filters; use crate::utils::signal::validate_signal_num; use crate::vstate::memory::{GuestAddress, GuestMemoryMmap}; use crate::vstate::vcpu::VcpuError as EmulationError; diff --git a/src/vmm/tests/integration_tests.rs b/src/vmm/tests/integration_tests.rs index 4312c6345db..40eab05c4a4 100644 --- a/src/vmm/tests/integration_tests.rs +++ b/src/vmm/tests/integration_tests.rs @@ -12,7 +12,7 @@ use vmm::resources::VmResources; use vmm::rpc_interface::{ LoadSnapshotError, PrebootApiController, RuntimeApiController, VmmAction, VmmActionError, }; -use vmm::seccomp_filters::get_empty_filters; +use vmm::seccomp::get_empty_filters; use vmm::snapshot::Snapshot; #[cfg(target_arch = "x86_64")] use vmm::test_utils::dirty_tracking_vmm; diff --git a/tools/devctr/Dockerfile b/tools/devctr/Dockerfile index 9340c4c9c98..78e0f52e9e8 100644 --- a/tools/devctr/Dockerfile +++ b/tools/devctr/Dockerfile @@ -149,6 +149,10 @@ RUN cd /usr/include/$ARCH-linux-musl \ && ln -s ../asm-generic asm-generic # Install static version of libseccomp +# We need to compile from source because +# libseccomp provided by the distribution is not +# compiled with musl-gcc and we need this +# for our musl builds. # RUN apt-get update \ && apt-get -y install \ diff --git a/tools/devtool b/tools/devtool index 7d41229b2e7..6e4d65e488c 100755 --- a/tools/devtool +++ b/tools/devtool @@ -375,12 +375,6 @@ cmd_help() { echo " This should be used as the last step in every commit, to ensure that the" echo " Rust style tests pass." echo "" - echo " generate_syscall_tables " - echo " Generates the syscall tables for seccompiler, according to a given kernel version." - echo " Release candidate (rc) linux versions are not allowed." - echo " Outputs a rust file for each supported arch: src/seccompiler/src/syscall_table/{arch}.rs" - echo " Supported architectures: x86_64 and aarch64." - echo "" echo " install [-p|--path] [--debug|--release]" echo " Install firecracker, jailer and seccomp binaries to /usr/local/bin or a given path." echo " Only the musl linked binaries are supported." @@ -1037,137 +1031,6 @@ cmd_checkenv() { check_vulns } -generate_syscall_table_x86_64() { - path_to_rust_file="$FC_ROOT_DIR/src/seccompiler/src/syscall_table/x86_64.rs" - - echo "$header" > $path_to_rust_file - - # the table for x86_64 is nicely formatted here: linux/arch/x86/entry/syscalls/syscall_64.tbl - cat linux/arch/x86/entry/syscalls/syscall_64.tbl | grep -v "^#" | grep -v -e '^$' |\ - awk '{print $2,$3,$1}' | grep -v "^x32" |\ - awk '{print " map.insert(\""$2"\".to_string(), "$3");"}' | sort >> $path_to_rust_file - - echo "$footer" >> $path_to_rust_file - - say "Generated at: $path_to_rust_file" -} - -generate_syscall_table_aarch64() { - path_to_rust_file="$FC_ROOT_DIR/src/seccompiler/src/syscall_table/aarch64.rs" - - # filter for substituting `#define`s that point to other macros; - # values taken from linux/include/uapi/asm-generic/unistd.h - replace+='s/__NR3264_fadvise64/223/;' - replace+='s/__NR3264_fcntl/25/;' - replace+='s/__NR3264_fstatat/79/;' - replace+='s/__NR3264_fstatfs/44/;' - replace+='s/__NR3264_fstat/80/;' - replace+='s/__NR3264_ftruncate/46/;' - replace+='s/__NR3264_lseek/62/;' - replace+='s/__NR3264_sendfile/71/;' - replace+='s/__NR3264_statfs/43/;' - replace+='s/__NR3264_truncate/45/;' - replace+='s/__NR3264_mmap/222/;' - - echo "$header" > $path_to_rust_file - - # run the gcc command in the Docker container (to make sure that we have gcc installed) - # the aarch64 syscall table is not located in a .tbl file, like x86; we run gcc's - # pre-processor to extract the numeric constants from header files. - run_devctr \ - --user "$(id -u):$(id -g)" \ - --workdir "$CTR_KERNEL_DIR" \ - -- \ - gcc -Ilinux/include/uapi -E -dM -D__ARCH_WANT_RENAMEAT\ - -D__BITS_PER_LONG=64\ - linux/arch/arm64/include/uapi/asm/unistd.h |\ - grep "#define __NR_" | grep -v "__NR_syscalls" |\ - grep -v "__NR_arch_specific_syscall" |\ - awk -F '__NR_' '{print $2}' |\ - sed $replace |\ - awk '{ print " map.insert(\""$1"\".to_string(), "$2");" }' |\ - sort -d >> $path_to_rust_file - ret=$? - - [ $ret -ne 0 ] && return $ret - - echo "$footer" >> $path_to_rust_file - - say "Generated at: $path_to_rust_file" -} - -cmd_generate_syscall_tables() { - # Parse any command line args. - while [ $# -gt 0 ]; do - case "$1" in - "-h"|"--help") { cmd_help; exit 1; } ;; - *) { kernel_version="$1"; break; } ;; - esac - shift - done - - validate_kernel_version "$kernel_version" - - kernel_major=v$(echo ${kernel_version} | cut -d . -f 1).x - kernel_baseurl=https://www.kernel.org/pub/linux/kernel/${kernel_major} - kernel_archive=linux-${kernel_version}.tar.xz - - ensure_devctr - - # Create the kernel clone directory - rm -rf "$KERNEL_DIR" - create_dir "$KERNEL_DIR" - cd "$KERNEL_DIR" - - say "Fetching linux kernel..." - - # Get sha256 checksum. - curl -fsSLO ${kernel_baseurl}/sha256sums.asc && \ - kernel_sha256=$(grep ${kernel_archive} sha256sums.asc | cut -d ' ' -f 1) - # Get kernel archive. - curl -fsSLO "$kernel_baseurl/$kernel_archive" && \ - # Verify checksum. - echo "${kernel_sha256} ${kernel_archive}" | sha256sum -c - && \ - # Decompress the kernel source. - xz -d "${kernel_archive}" && \ - cat linux-${kernel_version}.tar | tar -x && mv linux-${kernel_version} linux - - ret=$? - [ $ret -ne 0 ] && return $ret - - # rust file header - read -r -d '' header << EOM -// Copyright $(date +"%Y") Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -// This file is auto-generated by \`tools/devtool generate_syscall_tables\`. -// Do NOT manually edit! -// Generated at: $(date) -// Kernel version: $kernel_version - -use std::collections::HashMap; - -pub(crate) fn make_syscall_table(map: &mut HashMap) { -EOM - - # rust file footer - read -r -d '' footer << EOM -} - -EOM - - # generate syscall table for x86_64 - say "Generating table for x86_64..." - generate_syscall_table_x86_64 $header $footer - - # generate syscall table for aarch64 - say "Generating table for aarch64..." - generate_syscall_table_aarch64 $header $footer - - ret=$? - [ $ret -ne 0 ] && return $ret -} - cmd_install() { # By default we install release/musl binaries. profile="release"