diff --git a/common/recipes-core/fan-util/files/fan-util.c b/common/recipes-core/fan-util/files/fan-util.c index c9c4b2adae22..836a6a7bdc8b 100644 --- a/common/recipes-core/fan-util/files/fan-util.c +++ b/common/recipes-core/fan-util/files/fan-util.c @@ -27,16 +27,28 @@ #include #include #include +#include #define CMD_SET_FAN_STR "--set" #define CMD_GET_FAN_STR "--get" #define ALL_FAN_NUM 0xFF +#define SENSOR_FAIL_RECORD_DIR "/tmp/sensorfail_record" +#define FAN_FAIL_RECORD_DIR "/tmp/fanfail_record" +#define SENSOR_FAIL_FILE "/tmp/cache_store/sensor_fail_boost" +#define FAN_FAIL_FILE "/tmp/cache_store/fan_fail_boost" +#define FAN_MODE_FILE "/tmp/cache_store/fan_mode" enum { CMD_SET_FAN = 0, CMD_GET_FAN, }; +enum { + NORMAL = 0, + TRANSIT = 1, + BOOST = 2, +}; + static void print_usage(void) { printf("Usage: fan-util --set <[0..100] %%> < Fan# [%s] >\n", pal_pwm_list); @@ -67,6 +79,140 @@ parse_fan(char *str, uint8_t fan_cnt, uint8_t *fan) { return 0; } +static void +sensor_fail_check(bool status) { + DIR *dir; + struct dirent *ptr; + int cnt = 0; + + if (status) { + printf("Sensor Fail: Not support in manual mode(No fscd running)\n"); + return; + } + + if (access(SENSOR_FAIL_FILE, F_OK) == 0) { + dir = opendir(SENSOR_FAIL_RECORD_DIR); + if (dir != NULL) { + printf("Sensor Fail: "); + while((ptr = readdir(dir)) != NULL) { + if (strcmp(ptr->d_name, ".") == 0 || strcmp(ptr->d_name, "..") == 0) + continue; + cnt++; + if (cnt == 1) { + printf("\n%s\n", ptr->d_name); + continue; + } + printf("%s\n", ptr->d_name); + } + closedir(dir); + if (cnt == 0) + printf("None\n"); + } else { + printf("Sensor Fail: None\n"); + } + } else { + printf("Sensor Fail: None\n"); + } + return; +} + +static void +fan_fail_check(bool status) { + DIR *dir; + struct dirent *ptr; + int cnt = 0; + + if (status) { + printf("Fan Fail: Not support in manual mode(No fscd running)\n"); + return; + } + + if (access(FAN_FAIL_FILE, F_OK) == 0) { + dir = opendir(FAN_FAIL_RECORD_DIR); + if (dir != NULL) { + printf("Fan Fail: "); + while((ptr = readdir(dir)) != NULL) { + if (strcmp(ptr->d_name, ".") == 0 || strcmp(ptr->d_name, "..") == 0) + continue; + cnt++; + if (cnt == 1) { + printf("\n%s\n", ptr->d_name); + continue; + } + printf("%s\n", ptr->d_name); + } + closedir(dir); + if (cnt == 0) + printf("None\n"); + } else { + printf("Fan Fail: None\n"); + } + } else { + printf("Fan Fail: None\n"); + } + return; +} + +static bool +fan_mode_check(void) { + FILE* fp; + char cmd[128]; + char buf[32]; + int res; + int fd; + uint8_t mode; + int cnt; + + sprintf(cmd, "ps | grep /usr/bin/fscd.py | wc -l"); + if((fp = popen(cmd, "r")) == NULL) { + printf("Fan Mode: Unknown\n"); + return false; + } + + if(fgets(buf, sizeof(buf), fp) != NULL) { + res = atoi(buf); + if(res <= 2) { + printf("Fan Mode: Manual(No fscd running)\n"); + pclose(fp); + return true; + } + } + pclose(fp); + + fd = open(FAN_MODE_FILE, O_RDONLY); + if (fd < 0) { + printf("Fan Mode: Unknown\n"); + return false; + } + + cnt = read(fd, &mode, sizeof(uint8_t)); + + if (cnt <= 0) { + printf("Fan Mode: Unknown\n"); + close(fd); + return false; + } + + mode = mode - '0'; + switch(mode) { + case NORMAL: + printf("Fan Mode: Normal\n"); + break; + case TRANSIT: + printf("Fan Mode: Transitional\n"); + break; + case BOOST: + printf("Fan Mode: Boost\n"); + break; + default: + printf("Fan Mode: Unknown\n"); + break; + } + + close(fd); + return false; +} + int main(int argc, char **argv) { @@ -77,6 +223,7 @@ main(int argc, char **argv) { int ret; int rpm = 0; char fan_name[32]; + bool manu_flag = false; if (argc < 2 || argc > 4) { print_usage(); @@ -162,5 +309,12 @@ main(int argc, char **argv) { } } + if ((cmd == CMD_GET_FAN) && (argc == 2)) { + manu_flag = fan_mode_check(); + sensor_fail_check(manu_flag); + fan_fail_check(manu_flag); + pal_specific_plat_fan_check(manu_flag); + } + return 0; } diff --git a/common/recipes-core/fscd3/fscd/fsc_zone.py b/common/recipes-core/fscd3/fscd/fsc_zone.py index 2f987c164463..be1d59ca7862 100755 --- a/common/recipes-core/fscd3/fscd/fsc_zone.py +++ b/common/recipes-core/fscd3/fscd/fsc_zone.py @@ -17,13 +17,20 @@ # import sys import re +import os.path from fsc_util import Logger, clamp from fsc_sensor import FscSensorSourceSysfs, FscSensorSourceUtil import fsc_board verbose = "-v" in sys.argv - +RECORD_DIR = '/tmp/cache_store/' +SENSOR_FAIL_RECORD_DIR = '/tmp/sensorfail_record/' +fan_mode = { + 'normal_mode': 0, + 'trans_mode': 1, + 'boost_mode': 2 +} class Fan(object): def __init__(self, fan_name, pTable): @@ -78,11 +85,36 @@ def __init__(self, pwm_output, expr, expr_meta, transitional, counter, boost, se self.sensor_valid_pre = ([0] * len(self.expr_meta['ext_vars'])) self.sensor_valid_cur = ([0] * len(self.expr_meta['ext_vars'])) + def get_set_fan_mode(self, mode, action): + fan_mode_path = RECORD_DIR + 'fan_mode' + if action in 'read': + if os.path.isfile(fan_mode_path): + with open(fan_mode_path, "r") as f: + mode = f.read(1) + return mode + else: + return fan_mode['normal_mode'] + elif action in 'write': + if os.path.isfile(fan_mode_path): + with open(fan_mode_path, "r") as f: + mode_tmp = f.read(1) + if mode != mode_tmp: + fan_mode_record = open(fan_mode_path, 'w') + fan_mode_record.write(str(mode)) + fan_mode_record.close() + else: + fan_mode_record = open(fan_mode_path, 'w') + fan_mode_record.write(str(mode)) + fan_mode_record.close() + def run(self, sensors, dt): ctx = {'dt': dt} outmin = 0 fail_ssd_count = 0 sensor_index = 0 + cause_boost_count = 0 + no_sane_flag = 0 + mode = 0 for v in self.expr_meta['ext_vars']: sensor_valid_flag = 1 @@ -110,17 +142,30 @@ def run(self, sensors, dt): if sensor.status in ['ucr']: Logger.warn('Sensor %s reporting status %s' % (sensor.name, sensor.status)) outmin = max(outmin, self.transitional) + if outmin == self.transitional: + mode = fan_mode['trans_mode'] else: if self.sensor_fail == True: + sensor_fail_record_path = SENSOR_FAIL_RECORD_DIR + v if (sensor.status in ['na']) and (self.sensor_valid_cur[sensor_index] != -1): if re.match(r'.+_C[2-4]_[0-3]_NVME_.+', sensor.name) != None: Logger.warn("%s Fail" % v) outmin = max(outmin, self.boost) + cause_boost_count += 1 elif re.match(r'SSD', sensor.name) != None or re.match(r'(.*)nvme(.*)', sname) != None: fail_ssd_count = fail_ssd_count + 1 else: Logger.warn("%s Fail" % v) outmin = max(outmin, self.boost) + cause_boost_count += 1 + if not os.path.isfile(sensor_fail_record_path): + sensor_fail_record = open(sensor_fail_record_path, 'w') + sensor_fail_record.close() + if outmin == self.boost: + mode = fan_mode['boost_mode'] + else: + if os.path.isfile(sensor_fail_record_path): + os.remove(sensor_fail_record_path) else: if (not self.missing_sensor_assert_flag[sensor_index]) and (self.missing_sensor_assert_retry[sensor_index] >= 2): Logger.crit('ASSERT: Zone%d Missing sensors: %s' % (self.counter, v)) @@ -146,6 +191,8 @@ def run(self, sensors, dt): Logger.crit('ASSERT: Zone%d No sane fan speed could be \ calculated! Using transitional speed.' % (self.counter)) exprout = self.transitional + mode = fan_mode['trans_mode'] + no_sane_flag = 1 self.transitional_assert_flag = True else: if self.transitional_assert_flag: @@ -164,13 +211,31 @@ def run(self, sensors, dt): list_index = list_index + 1 if fail_ssd_count <= i[0]: exprout = exprout + i[1] + no_sane_flag = 0 break else: if list_index == len(self.ssd_progressive_algorithm['offset_algorithm']): outmin = max(outmin, self.boost) + cause_boost_count += 1 + if outmin == self.boost: + mode = fan_mode['boost_mode'] + + boost_record_path = RECORD_DIR + 'sensor_fail_boost' + if cause_boost_count != 0: + if not os.path.isfile(boost_record_path): + sensor_fail_boost_record = open(boost_record_path, 'w') + sensor_fail_boost_record.close() + else: + if os.path.isfile(boost_record_path): + os.remove(boost_record_path) + if not exprout: exprout = 0 if exprout < outmin: exprout = outmin + else: + if no_sane_flag != 1: + mode = fan_mode['normal_mode'] + self.get_set_fan_mode(mode, action='write') exprout = clamp(exprout, 0, 100) return exprout diff --git a/common/recipes-core/fscd3/fscd/fscd.py b/common/recipes-core/fscd3/fscd/fscd.py index 93238dd5b463..e265cc26e9ce 100644 --- a/common/recipes-core/fscd3/fscd/fscd.py +++ b/common/recipes-core/fscd3/fscd/fscd.py @@ -33,6 +33,9 @@ from fsc_board import board_fan_actions, board_host_actions, board_callout from fsc_sensor import FscSensorSourceUtil +RECORD_DIR = '/tmp/cache_store/' +SENSOR_FAIL_RECORD_DIR = '/tmp/sensorfail_record/' +FAN_FAIL_RECORD_DIR = '/tmp/fanfail_record/' RAMFS_CONFIG = '/etc/fsc-config.json' CONFIG_DIR = '/etc/fsc' # Enable the following for testing only @@ -42,6 +45,12 @@ DEFAULT_INIT_TRANSITIONAL = 70 WDTCLI_CMD = '/usr/local/bin/wdtcli' +fan_mode = { + 'normal_mode': 0, + 'trans_mode': 1, + 'boost_mode': 2 +} + def kick_watchdog(): """kick the watchdog device. """ @@ -64,6 +73,7 @@ def stop_watchdog(): if len(err) != 0: Logger.error("failed to kick watchdog device") + class Fscd(object): DEFAULT_BOOST = 100 @@ -280,6 +290,10 @@ def update_dead_fans(self, dead_fans): Logger.crit("%s dead, %d RPM" % (dead_fan.label, speeds[dead_fan])) Logger.usbdbg("%s fail" % (dead_fan.label)) + fan_fail_record_path = FAN_FAIL_RECORD_DIR + '%s' % (dead_fan.label) + if not os.path.isfile(fan_fail_record_path): + fan_fail_record = open(fan_fail_record_path, 'w') + fan_fail_record.close() for fan in recovered_fans: if self.fanpower: Logger.warn("%s has recovered" % (fan.label,)) @@ -287,6 +301,9 @@ def update_dead_fans(self, dead_fans): Logger.crit("%s has recovered" % (fan.label,)) Logger.usbdbg("%s recovered" % (fan.label)) self.fsc_fan_action(fan, action='recover') + fan_fail_record_path = FAN_FAIL_RECORD_DIR + '%s' % (fan.label) + if os.path.isfile(fan_fail_record_path): + os.remove(fan_fail_record_path) return dead_fans def update_zones(self, dead_fans, time_difference): @@ -310,7 +327,7 @@ def update_zones(self, dead_fans, time_difference): self.fsc_safe_guards(sensors_tuples) for zone in self.zones: Logger.info("PWM: %s" % (json.dumps(zone.pwm_output))) - + mode = 0 chassis_intrusion_boost_flag = 0 if self.chassis_intrusion: self_tray_pull_out = board_callout( @@ -320,10 +337,13 @@ def update_zones(self, dead_fans, time_difference): if chassis_intrusion_boost_flag == 0: pwmval = zone.run(sensors=sensors_tuples, dt=time_difference) + mode = zone.get_set_fan_mode(mode, action='read') else: pwmval = self.boost + mode = fan_mode['boost_mode'] if self.fan_fail: + boost_record_path = RECORD_DIR + 'fan_fail_boost' if self.boost_type == 'progressive' and self.fan_dead_boost: # Cases where we want to progressively bump PWMs dead = len(dead_fans) @@ -333,15 +353,32 @@ def update_zones(self, dead_fans, time_difference): for fan_count, rate in self.fan_dead_boost["data"]: if dead <= fan_count: pwmval = clamp(pwmval + (dead * rate), 0, 100) + mode = fan_mode['normal_mode'] + if os.path.isfile(boost_record_path): + os.remove(boost_record_path) break else: pwmval = self.boost + mode = fan_mode['boost_mode'] + if not os.path.isfile(boost_record_path): + fan_fail_boost_record = open(boost_record_path, 'w') + fan_fail_boost_record.close() + else: + if os.path.isfile(boost_record_path): + os.remove(boost_record_path) else: if dead_fans: # If not progressive ,when there is 1 fan failed, boost all fans Logger.info("Failed fans: %s" % ( ', '.join([str(i.label) for i in dead_fans],))) pwmval = self.boost + mode = fan_mode['boost_mode'] + if not os.path.isfile(boost_record_path): + fan_fail_boost_record = open(boost_record_path, 'w') + fan_fail_boost_record.close() + else: + if os.path.isfile(boost_record_path): + os.remove(boost_record_path) if self.fan_dead_boost: # If all the fans failed take action after a few cycles if len(dead_fans) == len(self.fans): @@ -373,6 +410,8 @@ def update_zones(self, dead_fans, time_difference): else: self.machine.set_pwm(self.fans[zone.pwm_output], pwmval) + zone.get_set_fan_mode(mode, action='write') + def builder(self): ''' Method to extract from json and build all internal data staructures @@ -397,6 +436,16 @@ def get_fan_power_status(self): return True return False + def fail_record_dir(self): + ''' + Create directory to store which sensors and fans failed + ''' + if not os.path.isdir(SENSOR_FAIL_RECORD_DIR): + os.mkdir(SENSOR_FAIL_RECORD_DIR) + + if not os.path.isdir(FAN_FAIL_RECORD_DIR): + os.mkdir(FAN_FAIL_RECORD_DIR) + def run(self): """ Main FSCD method that builds from the fscd config and runs @@ -405,9 +454,14 @@ def run(self): # Get everything from json and build profiles, fans, zones self.builder() + self.fail_record_dir() + self.machine.set_all_pwm(self.fans, self.transitional) self.fsc_set_all_fan_led(color='led_blue') + mode = fan_mode['trans_mode'] + self.zones[0].get_set_fan_mode(mode, action='write') + last = time.time() dead_fans = set() diff --git a/common/recipes-lib/obmc-pal/files/obmc-pal.c b/common/recipes-lib/obmc-pal/files/obmc-pal.c index 064a8da06a68..4c61903d9fca 100755 --- a/common/recipes-lib/obmc-pal/files/obmc-pal.c +++ b/common/recipes-lib/obmc-pal/files/obmc-pal.c @@ -2302,3 +2302,9 @@ int __attribute__((weak)) pal_force_update_bic_fw(uint8_t slot_id, uint8_t comp, char *path) { return -2; //means not support } + +void __attribute__((weak)) +pal_specific_plat_fan_check(bool status) +{ + return; +} diff --git a/common/recipes-lib/obmc-pal/files/obmc-pal.h b/common/recipes-lib/obmc-pal/files/obmc-pal.h index c1f1c69d2839..be94cc7c467e 100755 --- a/common/recipes-lib/obmc-pal/files/obmc-pal.h +++ b/common/recipes-lib/obmc-pal/files/obmc-pal.h @@ -343,6 +343,7 @@ int pal_set_tpm_physical_presence(uint8_t slot, uint8_t presence); int pal_get_tpm_physical_presence(uint8_t slot); int pal_create_TPMTimer(int fru); int pal_force_update_bic_fw(uint8_t slot_id, uint8_t comp, char *path); +void pal_specific_plat_fan_check(bool status); #ifdef __cplusplus } #endif