Skip to content

Commit

Permalink
Common: fan-util: display the current fan mode and fan boost cause (#…
Browse files Browse the repository at this point in the history
…730)

Summary:
1. As title, display the current fan mode and fan boost cause (e.g. failed sensors , or failed fans)
e.g. Show current fan mode and sensors or fans which caused boost mode when using "fan-util --get" command
2. Show fan mode as manual when there's no fscd running
Pull Request resolved: facebookexternal/openbmc.wiwynn#730

Test Plan:
Build and test pass on Y2/BC/TP/LTN system.

On YV2 system:
I. Sensor fail occurred:

root@bmc-oob:/tmp/cache_store# fan-util --get
Fan 0 Speed: 9408 RPM (100%)
Fan 1 Speed: 9359 RPM (100%)
Fan Mode: Boost
Sensor Fail:
slot2:soc_therm_margin
nic:mezz_sensor_temp
spb:sp_inlet_temp
slot4:nvme2_ctemp
slot4:nvme1_ctemp
slot2:nvme2_ctemp
slot2:nvme1_ctemp
slot3:nvme2_ctemp
slot3:nvme1_ctemp
slot1:nvme2_ctemp
slot1:nvme1_ctemp
Fan Fail: None
Sled Fan Latch Open: False

II. Fan fail occurred:

root@bmc-oob:/tmp/cache_store# fan-util --get
Fan 0 Speed: 9416 RPM (100%)
Fan 1 Speed: 9359 RPM (100%)
Fan Mode: Boost
Sensor Fail: None
Fan Fail:
Fan 1
Sled Fan Latch Open: False

III. Trigger transitional mode:

root@bmc-oob:/tmp/cache_store# fan-util --get
Fan 0 Speed: 6835 RPM (70%)
Fan 1 Speed: 6749 RPM (70%)
Fan Mode: Transitional
Sensor Fail: None
Fan Fail: None
Sled Fan Latch Open: False

IV. Stop fscd to enter manual mode:
root@bmc-oob:~# sv stop fscd
ok: down: fscd: 0s, normally up

root@bmc-oob:~# fan-util --get
Fan 0 Speed: 6807 RPM (70%)
Fan 1 Speed: 6763 RPM (70%)
Fan Mode: Manual(No fscd running)
Sensor Fail: Not support in manual mode(No fscd running)
Fan Fail: Not support in manual mode(No fscd running)
Sled Fan Latch Open: False

Reviewed By: benwei13

fbshipit-source-id: 1b83eee5d
  • Loading branch information
benwei13 authored and facebook-github-bot committed Oct 26, 2018
1 parent 6cc8067 commit 6346f9b
Show file tree
Hide file tree
Showing 5 changed files with 282 additions and 2 deletions.
154 changes: 154 additions & 0 deletions common/recipes-core/fan-util/files/fan-util.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,28 @@
#include <fcntl.h>
#include <openbmc/pal.h>
#include <string.h>
#include <dirent.h>

#define CMD_SET_FAN_STR "--set"
#define CMD_GET_FAN_STR "--get"
#define ALL_FAN_NUM 0xFF
#define SENSOR_FAIL_RECORD_DIR "/tmp/sensorfail_record"
#define FAN_FAIL_RECORD_DIR "/tmp/fanfail_record"
#define SENSOR_FAIL_FILE "/tmp/cache_store/sensor_fail_boost"
#define FAN_FAIL_FILE "/tmp/cache_store/fan_fail_boost"
#define FAN_MODE_FILE "/tmp/cache_store/fan_mode"

enum {
CMD_SET_FAN = 0,
CMD_GET_FAN,
};

enum {
NORMAL = 0,
TRANSIT = 1,
BOOST = 2,
};

static void
print_usage(void) {
printf("Usage: fan-util --set <[0..100] %%> < Fan# [%s] >\n", pal_pwm_list);
Expand Down Expand Up @@ -67,6 +79,140 @@ parse_fan(char *str, uint8_t fan_cnt, uint8_t *fan) {
return 0;
}

static void
sensor_fail_check(bool status) {
DIR *dir;
struct dirent *ptr;
int cnt = 0;

if (status) {
printf("Sensor Fail: Not support in manual mode(No fscd running)\n");
return;
}

if (access(SENSOR_FAIL_FILE, F_OK) == 0) {
dir = opendir(SENSOR_FAIL_RECORD_DIR);
if (dir != NULL) {
printf("Sensor Fail: ");
while((ptr = readdir(dir)) != NULL) {
if (strcmp(ptr->d_name, ".") == 0 || strcmp(ptr->d_name, "..") == 0)
continue;
cnt++;
if (cnt == 1) {
printf("\n%s\n", ptr->d_name);
continue;
}
printf("%s\n", ptr->d_name);
}
closedir(dir);
if (cnt == 0)
printf("None\n");
} else {
printf("Sensor Fail: None\n");
}
} else {
printf("Sensor Fail: None\n");
}
return;
}

static void
fan_fail_check(bool status) {
DIR *dir;
struct dirent *ptr;
int cnt = 0;

if (status) {
printf("Fan Fail: Not support in manual mode(No fscd running)\n");
return;
}

if (access(FAN_FAIL_FILE, F_OK) == 0) {
dir = opendir(FAN_FAIL_RECORD_DIR);
if (dir != NULL) {
printf("Fan Fail: ");
while((ptr = readdir(dir)) != NULL) {
if (strcmp(ptr->d_name, ".") == 0 || strcmp(ptr->d_name, "..") == 0)
continue;
cnt++;
if (cnt == 1) {
printf("\n%s\n", ptr->d_name);
continue;
}
printf("%s\n", ptr->d_name);
}
closedir(dir);
if (cnt == 0)
printf("None\n");
} else {
printf("Fan Fail: None\n");
}
} else {
printf("Fan Fail: None\n");
}
return;
}

static bool
fan_mode_check(void) {
FILE* fp;
char cmd[128];
char buf[32];
int res;
int fd;
uint8_t mode;
int cnt;

sprintf(cmd, "ps | grep /usr/bin/fscd.py | wc -l");
if((fp = popen(cmd, "r")) == NULL) {
printf("Fan Mode: Unknown\n");
return false;
}

if(fgets(buf, sizeof(buf), fp) != NULL) {
res = atoi(buf);
if(res <= 2) {
printf("Fan Mode: Manual(No fscd running)\n");
pclose(fp);
return true;
}
}
pclose(fp);

fd = open(FAN_MODE_FILE, O_RDONLY);
if (fd < 0) {
printf("Fan Mode: Unknown\n");
return false;
}

cnt = read(fd, &mode, sizeof(uint8_t));

if (cnt <= 0) {
printf("Fan Mode: Unknown\n");
close(fd);
return false;
}

mode = mode - '0';
switch(mode) {
case NORMAL:
printf("Fan Mode: Normal\n");
break;
case TRANSIT:
printf("Fan Mode: Transitional\n");
break;
case BOOST:
printf("Fan Mode: Boost\n");
break;
default:
printf("Fan Mode: Unknown\n");
break;
}

close(fd);
return false;
}

int
main(int argc, char **argv) {

Expand All @@ -77,6 +223,7 @@ main(int argc, char **argv) {
int ret;
int rpm = 0;
char fan_name[32];
bool manu_flag = false;

if (argc < 2 || argc > 4) {
print_usage();
Expand Down Expand Up @@ -162,5 +309,12 @@ main(int argc, char **argv) {
}
}

if ((cmd == CMD_GET_FAN) && (argc == 2)) {
manu_flag = fan_mode_check();
sensor_fail_check(manu_flag);
fan_fail_check(manu_flag);
pal_specific_plat_fan_check(manu_flag);
}

return 0;
}
67 changes: 66 additions & 1 deletion common/recipes-core/fscd3/fscd/fsc_zone.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,20 @@
#
import sys
import re
import os.path
from fsc_util import Logger, clamp
from fsc_sensor import FscSensorSourceSysfs, FscSensorSourceUtil
import fsc_board


verbose = "-v" in sys.argv

RECORD_DIR = '/tmp/cache_store/'
SENSOR_FAIL_RECORD_DIR = '/tmp/sensorfail_record/'
fan_mode = {
'normal_mode': 0,
'trans_mode': 1,
'boost_mode': 2
}

class Fan(object):
def __init__(self, fan_name, pTable):
Expand Down Expand Up @@ -78,11 +85,36 @@ def __init__(self, pwm_output, expr, expr_meta, transitional, counter, boost, se
self.sensor_valid_pre = ([0] * len(self.expr_meta['ext_vars']))
self.sensor_valid_cur = ([0] * len(self.expr_meta['ext_vars']))

def get_set_fan_mode(self, mode, action):
fan_mode_path = RECORD_DIR + 'fan_mode'
if action in 'read':
if os.path.isfile(fan_mode_path):
with open(fan_mode_path, "r") as f:
mode = f.read(1)
return mode
else:
return fan_mode['normal_mode']
elif action in 'write':
if os.path.isfile(fan_mode_path):
with open(fan_mode_path, "r") as f:
mode_tmp = f.read(1)
if mode != mode_tmp:
fan_mode_record = open(fan_mode_path, 'w')
fan_mode_record.write(str(mode))
fan_mode_record.close()
else:
fan_mode_record = open(fan_mode_path, 'w')
fan_mode_record.write(str(mode))
fan_mode_record.close()

def run(self, sensors, dt):
ctx = {'dt': dt}
outmin = 0
fail_ssd_count = 0
sensor_index = 0
cause_boost_count = 0
no_sane_flag = 0
mode = 0

for v in self.expr_meta['ext_vars']:
sensor_valid_flag = 1
Expand Down Expand Up @@ -110,17 +142,30 @@ def run(self, sensors, dt):
if sensor.status in ['ucr']:
Logger.warn('Sensor %s reporting status %s' % (sensor.name, sensor.status))
outmin = max(outmin, self.transitional)
if outmin == self.transitional:
mode = fan_mode['trans_mode']
else:
if self.sensor_fail == True:
sensor_fail_record_path = SENSOR_FAIL_RECORD_DIR + v
if (sensor.status in ['na']) and (self.sensor_valid_cur[sensor_index] != -1):
if re.match(r'.+_C[2-4]_[0-3]_NVME_.+', sensor.name) != None:
Logger.warn("%s Fail" % v)
outmin = max(outmin, self.boost)
cause_boost_count += 1
elif re.match(r'SSD', sensor.name) != None or re.match(r'(.*)nvme(.*)', sname) != None:
fail_ssd_count = fail_ssd_count + 1
else:
Logger.warn("%s Fail" % v)
outmin = max(outmin, self.boost)
cause_boost_count += 1
if not os.path.isfile(sensor_fail_record_path):
sensor_fail_record = open(sensor_fail_record_path, 'w')
sensor_fail_record.close()
if outmin == self.boost:
mode = fan_mode['boost_mode']
else:
if os.path.isfile(sensor_fail_record_path):
os.remove(sensor_fail_record_path)
else:
if (not self.missing_sensor_assert_flag[sensor_index]) and (self.missing_sensor_assert_retry[sensor_index] >= 2):
Logger.crit('ASSERT: Zone%d Missing sensors: %s' % (self.counter, v))
Expand All @@ -146,6 +191,8 @@ def run(self, sensors, dt):
Logger.crit('ASSERT: Zone%d No sane fan speed could be \
calculated! Using transitional speed.' % (self.counter))
exprout = self.transitional
mode = fan_mode['trans_mode']
no_sane_flag = 1
self.transitional_assert_flag = True
else:
if self.transitional_assert_flag:
Expand All @@ -164,13 +211,31 @@ def run(self, sensors, dt):
list_index = list_index + 1
if fail_ssd_count <= i[0]:
exprout = exprout + i[1]
no_sane_flag = 0
break
else:
if list_index == len(self.ssd_progressive_algorithm['offset_algorithm']):
outmin = max(outmin, self.boost)
cause_boost_count += 1
if outmin == self.boost:
mode = fan_mode['boost_mode']

boost_record_path = RECORD_DIR + 'sensor_fail_boost'
if cause_boost_count != 0:
if not os.path.isfile(boost_record_path):
sensor_fail_boost_record = open(boost_record_path, 'w')
sensor_fail_boost_record.close()
else:
if os.path.isfile(boost_record_path):
os.remove(boost_record_path)

if not exprout:
exprout = 0
if exprout < outmin:
exprout = outmin
else:
if no_sane_flag != 1:
mode = fan_mode['normal_mode']
self.get_set_fan_mode(mode, action='write')
exprout = clamp(exprout, 0, 100)
return exprout
Loading

0 comments on commit 6346f9b

Please sign in to comment.