Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

linux: support for monitoring syscall #1486

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,27 @@ case "$enable_capabilities" in
esac


AC_ARG_ENABLE([syscall],
[AS_HELP_STRING([--enable-syscall],
[enable 'syscall' monitoring. @<:@default=check@:>@])],
[],
[enable_syscall=check]
)

if test "x$enable_syscall" = xcheck; then
if "$cross_compiling" != no; then
enable_syscall=yes
elif test -f /proc/self/syscall && test -s /proc/self/syscall; then
enable_syscall=yes
else
enable_syscall=no
fi
fi
if test "x$enable_syscall" = xyes; then
AC_DEFINE([HAVE_SYSCALL], [1], [Define if syscall monitoring enabled.])
fi


AC_ARG_ENABLE([delayacct],
[AS_HELP_STRING([--enable-delayacct],
[enable Linux delay accounting support; requires pkg-config, libnl-3 and libnl-genl-3 @<:@default=check@:>@])],
Expand Down Expand Up @@ -859,6 +880,7 @@ AC_MSG_RESULT([
(Linux) delay accounting: $enable_delayacct
(Linux) sensors: $enable_sensors
(Linux) capabilities: $enable_capabilities
(Linux) syscall: $enable_syscall
unicode: $enable_unicode
affinity: $enable_affinity
unwind: $enable_unwind
Expand Down
30 changes: 30 additions & 0 deletions linux/LinuxProcess.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@ const ProcessFieldData Process_fields[LAST_PROCESSFIELD] = {
#endif
[GPU_TIME] = { .name = "GPU_TIME", .title = "GPU_TIME ", .description = "Total GPU time", .flags = PROCESS_FLAG_LINUX_GPU, .defaultSortDesc = true, },
[GPU_PERCENT] = { .name = "GPU_PERCENT", .title = " GPU% ", .description = "Percentage of the GPU time the process used in the last sampling", .flags = PROCESS_FLAG_LINUX_GPU, .defaultSortDesc = true, },
#ifdef HAVE_SYSCALL
[SYSCALL] = { .name = "SYSCALL", .title = "SYSCALL", .description = "Current syscall of the process", .flags = PROCESS_FLAG_LINUX_SYSCALL, .autoWidth = true, },
#endif
};

Process* LinuxProcess_new(const Machine* host) {
Expand Down Expand Up @@ -362,6 +365,24 @@ static void LinuxProcess_rowWriteField(const Row* super, RichString* str, Proces
xSnprintf(buffer, n, "N/A ");
}
break;
#ifdef HAVE_SYSCALL
case SYSCALL: {
switch (lp->syscall_state) {
case SYSCALL_STATE_CALLING:
xSnprintf(buffer, n, "%*d ", Row_fieldWidths[SYSCALL], lp->syscall_num);
break;
case SYSCALL_STATE_RUNNING:
attr = CRT_colors[PROCESS_RUN_STATE];
xSnprintf(buffer, n, "%-*s ", Row_fieldWidths[SYSCALL], "running");
break;
default:
attr = CRT_colors[PROCESS_SHADOW];
xSnprintf(buffer, n, "%-*s ", Row_fieldWidths[SYSCALL], "N/A");
}
RichString_appendWide(str, attr, buffer);
return;
}
#endif
default:
Process_writeField(this, str, field);
return;
Expand Down Expand Up @@ -466,6 +487,15 @@ static int LinuxProcess_compareByKey(const Process* v1, const Process* v2, Proce
return SPACESHIP_NUMBER(p1->gpu_time, p2->gpu_time);
case ISCONTAINER:
return SPACESHIP_NUMBER(v1->isRunningInContainer, v2->isRunningInContainer);
#ifdef HAVE_SYSCALL
case SYSCALL: {
int r = SPACESHIP_NUMBER(p1->syscall_state, p2->syscall_state);
if (r)
return r;

return SPACESHIP_NUMBER(p1->syscall_num, p2->syscall_num);
}
#endif
default:
return Process_compareByKey_Base(v1, v2, key);
}
Expand Down
12 changes: 12 additions & 0 deletions linux/LinuxProcess.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,13 @@ in the source distribution for its full text.
#define PROCESS_FLAG_LINUX_AUTOGROUP 0x00080000
#define PROCESS_FLAG_LINUX_GPU 0x00100000
#define PROCESS_FLAG_LINUX_CONTAINER 0x00200000
#define PROCESS_FLAG_LINUX_SYSCALL 0x00400000

typedef enum SyscallState_ {
SYSCALL_STATE_RUNNING,
SYSCALL_STATE_CALLING,
SYSCALL_STATE_NA,
} SyscallState;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you are doing something wrongly here. Normally a process won't be waiting for a syscall, so the presence of a syscall number would be more of an exception rather than rule.

I won't say you can't use an enum here. But the better way is to merge this status with the syscall number, so that you don't need two member variables for recording the same thing.

Here is what I mean:

typedef enum LinuxSyscallState_ {
   SYSCALL_STATE_NO_DATA = INT_MIN,
   SYSCALL_STATE_RUNNING = INT_MIN + 1,
   SYSCALL_STATE_BLOCKED = -1
} LinuxSyscallState;

By the way, Linux syscall table for x86-64 is in syscall_64.tbl. The file is located in different directories in Linux source code throughout Linux versions.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should also know that the x32 system call starts from 0x40000000, and the system call allows negative numbers to exist. Using one variable is always unsafe, but using two can completely eliminate this kind of thing and is beneficial for sorting

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should also know that the x32 system call starts from 0x40000000, and the system call allows negative numbers to exist. Using one variable is always unsafe, but using two can completely eliminate this kind of thing and is beneficial for sorting

Nah. 0x40000000 is positive. The negative should always be unused or otherwise they won't reserve -1 for the documented reason.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Firstly, 0x40000000 is certainly not a negative number, but it is a special case. With one special case, there may be more. Also, if you do this, it will cause all N/A to be ranked at the top, which is not user-friendly. 😄

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JohnSanpe With a mapping table implemented, you are not likely order the syscall by its number, but by the mapped name instead.
Linux assigns syscall number in a rather arbitrary way, so, with the table implemented, ordering by name would make more sense.

Copy link

@ffashion ffashion Jun 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Explorer09 @JohnSanpe
First, i think use negative number to show a syscall state. it's unsafe. Although Linux may not currently use negative numbers as syscall ID.
Second, The state of syscall and the syscall ID themselves are actually two different things, and merging them would reduce readability.
Third, i think we can use a struct to wrapper syscall id and syscall state, like

typedef enum LinuxSyscallState_ {
   SYSCALL_STATE_ID,
   SYSCALL_STATE_RUNNING,
   SYSCALL_STATE_NA
} LinuxSyscallState;

typedef struct LinuxSyscall_ {
  LinuxSyscallState state;
  int  syscall_id;
} LinuxSyscall;


typedef struct LinuxProcess_ {
Process super;
Expand Down Expand Up @@ -118,6 +125,11 @@ typedef struct LinuxProcess_ {
/* Autogroup scheduling (CFS) information */
long int autogroup_id;
int autogroup_nice;

#ifdef HAVE_SYSCALL
SyscallState syscall_state;
int syscall_num;
#endif
} LinuxProcess;

extern int pageSize;
Expand Down
38 changes: 38 additions & 0 deletions linux/LinuxProcessTable.c
Original file line number Diff line number Diff line change
Expand Up @@ -1098,6 +1098,38 @@ static void LinuxProcessTable_readCwd(LinuxProcess* process, openat_arg_t procFd
free_and_xStrdup(&process->super.procCwd, pathBuffer);
}

/*
* Read /proc/<pid>/syscall (thread-specific data)
*/
#ifdef HAVE_SYSCALL
static void LinuxProcessTable_readSyscall(LinuxProcess* process, openat_arg_t procFd) {
char buffer[1024];

ssize_t r = xReadfileat(procFd, "syscall", buffer, sizeof(buffer));
if (r <= 0) {
goto failed;
}

char* numPtr = buffer;
process->syscall_num = fast_strtoull_dec(&numPtr, r);
if (*numPtr == ' ') {
process->syscall_state = SYSCALL_STATE_CALLING;
Row_updateFieldWidth(SYSCALL, snprintf(NULL, 0, "%d", process->syscall_num));
return;
}

if (String_startsWith(buffer, "running")) {
process->syscall_state = SYSCALL_STATE_RUNNING;
Row_updateFieldWidth(SYSCALL, strlen("running"));
return;
}

failed:
process->syscall_state = SYSCALL_STATE_NA;
Row_updateFieldWidth(SYSCALL, strlen("N/A"));
}
#endif /* HAVE_SYSCALL */

/*
* Read /proc/<pid>/exe (process-shared data)
*/
Expand Down Expand Up @@ -1694,6 +1726,12 @@ static bool LinuxProcessTable_recurseProcTree(LinuxProcessTable* this, openat_ar
LinuxProcessTable_readCwd(lp, procFd, mainTask);
}

#ifdef HAVE_SYSCALL
if (ss->flags & PROCESS_FLAG_LINUX_SYSCALL) {
LinuxProcessTable_readSyscall(lp, procFd);
}
#endif

if ((ss->flags & PROCESS_FLAG_LINUX_AUTOGROUP) && this->haveAutogroup) {
LinuxProcessTable_readAutogroup(lp, procFd, mainTask);
}
Expand Down
1 change: 1 addition & 0 deletions linux/ProcessField.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ in the source distribution for its full text.
GPU_TIME = 132, \
GPU_PERCENT = 133, \
ISCONTAINER = 134, \
SYSCALL = 135, \
// End of list


Expand Down
Loading