Skip to content

Commit

Permalink
Now we only have a single operation size field (1 byte encoding).
Browse files Browse the repository at this point in the history
Register operands are back to 1 byte.
Encoding, decoding, tests, and doc updated accordingly.
  • Loading branch information
edeiana committed Mar 26, 2024
1 parent bc22a59 commit 2856ab0
Show file tree
Hide file tree
Showing 4 changed files with 151 additions and 110 deletions.
46 changes: 23 additions & 23 deletions core/ir/synthetic/decode.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,29 +48,25 @@
byte *
decode_from_synth(dcontext_t *dcontext, byte *encoded_instr, instr_t *instr)
{
/* Clear the instruction.
*/
memset((void *)instr, 0, sizeof(instr_t));

/* Interpret the first 4 bytes of encoded_instr (which are always present) as a uint
* for easier retrieving of category, eflags, #src, and #dst values.
* We can do this safely because encoded_instr is 4 bytes aligned.
*/
uint encoding = *((uint *)&encoded_instr[0]);
uint encoding_header = *((uint *)&encoded_instr[0]);

/* Decode number of register destination operands.
*/
uint num_dsts = encoding & DST_OPND_MASK;
uint num_dsts = encoding_header & DST_OPND_MASK;

/* Decode number of register source operands.
*/
uint num_srcs = (encoding & SRC_OPND_MASK) >> SRC_OPND_SHIFT;
uint num_srcs = (encoding_header & SRC_OPND_MASK) >> SRC_OPND_SHIFT;

instr_set_num_opnds(dcontext, instr, num_dsts, num_srcs);

/* Decode arithmetic flags.
*/
uint eflags = (encoding & FLAGS_MASK) >> FLAGS_SHIFT;
uint eflags = (encoding_header & FLAGS_MASK) >> FLAGS_SHIFT;
uint eflags_instr = 0;
if (TESTANY(SYNTHETIC_INSTR_WRITES_ARITH, eflags))
eflags_instr |= EFLAGS_WRITE_ARITH;
Expand All @@ -85,29 +81,31 @@ decode_from_synth(dcontext_t *dcontext, byte *encoded_instr, instr_t *instr)

/* Decode synthetic opcode as instruction category.
*/
uint category = (encoding & CATEGORY_MASK) >> CATEGORY_SHIFT;
uint category = (encoding_header & CATEGORY_MASK) >> CATEGORY_SHIFT;
instr_set_category(instr, category);

/* Decode register destination operands and their sizes, if present.
/* Decode register operand size, if there are any operands.
*/
uint num_opnds = num_dsts + num_srcs;
opnd_size_t max_reg_size = OPSZ_NA;
if (num_opnds > 0)
max_reg_size = (opnd_size_t)encoded_instr[HEADER_BYTES];

/* Decode register destination operands, if present.
*/
uint num_dst_bytes = num_dsts * OPERAND_BYTES;
for (uint i = 0; i < num_dst_bytes; i += OPERAND_BYTES) {
reg_id_t dst = (reg_id_t)encoded_instr[i + HEADER_BYTES];
opnd_size_t dst_size = (opnd_size_t)encoded_instr[i + 1 + HEADER_BYTES];
for (uint i = 0; i < num_dsts; ++i) {
reg_id_t dst = (reg_id_t)encoded_instr[i + HEADER_BYTES + 1];
opnd_t dst_opnd = opnd_create_reg(dst);
opnd_set_size(&dst_opnd, dst_size);
opnd_set_size(&dst_opnd, max_reg_size);
instr_set_dst(instr, i, dst_opnd);
}

/* Decode register source operands and their sizes, if present.
/* Decode register source operands, if present.
*/
uint num_src_bytes = num_srcs * OPERAND_BYTES;
for (uint i = 0; i < num_src_bytes; i += OPERAND_BYTES) {
reg_id_t src = (reg_id_t)encoded_instr[i + HEADER_BYTES + num_dst_bytes];
opnd_size_t src_size =
(opnd_size_t)encoded_instr[i + 1 + HEADER_BYTES + num_dst_bytes];
for (uint i = 0; i < num_srcs; ++i) {
reg_id_t src = (reg_id_t)encoded_instr[i + HEADER_BYTES + 1 + num_dsts];
opnd_t src_opnd = opnd_create_reg(src);
opnd_set_size(&src_opnd, src_size);
opnd_set_size(&src_opnd, max_reg_size);
instr_set_src(instr, i, src_opnd);
}

Expand All @@ -116,8 +114,10 @@ decode_from_synth(dcontext_t *dcontext, byte *encoded_instr, instr_t *instr)
instr_set_operands_valid(instr, true);

/* Compute instruction length including bytes for padding to reach 4 bytes alignment.
* Account for 1 additional byte containing max register operand size, if there are
* any operands.
*/
uint num_opnd_bytes = num_src_bytes + num_dst_bytes;
uint num_opnd_bytes = num_opnds > 0 ? num_opnds + 1 : 0;
uint instr_length = ALIGN_FORWARD(HEADER_BYTES + num_opnd_bytes, HEADER_BYTES);
instr->length = instr_length;

Expand Down
94 changes: 57 additions & 37 deletions core/ir/synthetic/encode.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,21 +49,26 @@ encode_to_synth(dcontext_t *dcontext, instr_t *instr, byte *encoded_instr)
/* Use a local uint variable for easier setting of category, eflags, #src, and #dst
* values.
*/
uint encoding = 0;
uint encoding_header = 0;

/* Encode number of register destination operands.
* Note that a destination operand that is a memory renference should have its
* registers (if any) counted as source operands, since they are being read.
* We use [src|dst]_reg_to_size to keep track of registers we've seen and avoid
* duplicates, and also to record their size, which we encode later.
* We use [src|dst]_reg_used to keep track of registers we've seen and avoid
* duplicates.
* We user max_reg_size to keep track of the largest containing register.
* max_reg_size is a uint instead of opnd_size_t to avoid relying on OPSZ_ enum
* values.
* We convert max_reg_size to OPSZ_ only before encoding.
*/
opnd_size_t src_reg_to_size[MAX_NUM_REGS];
memset((void *)src_reg_to_size, 0, sizeof(src_reg_to_size));
bool src_reg_used[MAX_NUM_REGS];
memset((void *)src_reg_used, 0, sizeof(src_reg_used));
uint num_srcs = 0;
opnd_size_t dst_reg_to_size[MAX_NUM_REGS];
memset((void *)dst_reg_to_size, 0, sizeof(dst_reg_to_size));
bool dst_reg_used[MAX_NUM_REGS];
memset((void *)dst_reg_used, 0, sizeof(dst_reg_used));
uint num_dsts = 0;
uint original_num_dsts = (uint)instr_num_dsts(instr);
uint max_reg_size = 0;
for (uint dst_index = 0; dst_index < original_num_dsts; ++dst_index) {
opnd_t dst_opnd = instr_get_dst(instr, dst_index);
uint num_regs_used_by_opnd = (uint)opnd_num_regs_used(dst_opnd);
Expand All @@ -73,10 +78,13 @@ encode_to_synth(dcontext_t *dcontext, instr_t *instr, byte *encoded_instr)
/* Map sub-registers to their containing register.
*/
reg_id_t reg_canonical = reg_to_pointer_sized(reg);
opnd_size_t reg_size = reg_get_size(reg);
if (src_reg_to_size[reg_canonical] != 0) {
opnd_size_t reg_size = reg_get_size(reg_canonical);
uint reg_size_in_bytes = opnd_size_in_bytes(reg_size);
if (!src_reg_used[reg_canonical]) {
++num_srcs;
src_reg_to_size[reg_canonical] = reg_size;
src_reg_used[reg_canonical] = true;
if (reg_size_in_bytes > max_reg_size)
max_reg_size = reg_size_in_bytes;
}
}
} else {
Expand All @@ -85,15 +93,18 @@ encode_to_synth(dcontext_t *dcontext, instr_t *instr, byte *encoded_instr)
/* Map sub-registers to their containing register.
*/
reg_id_t reg_canonical = reg_to_pointer_sized(reg);
opnd_size_t reg_size = reg_get_size(reg);
if (dst_reg_to_size[reg_canonical] != 0) {
opnd_size_t reg_size = reg_get_size(reg_canonical);
uint reg_size_in_bytes = opnd_size_in_bytes(reg_size);
if (!dst_reg_used[reg_canonical]) {
++num_dsts;
dst_reg_to_size[reg_canonical] = reg_size;
dst_reg_used[reg_canonical] = true;
if (reg_size_in_bytes > max_reg_size)
max_reg_size = reg_size_in_bytes;
}
}
}
}
encoding |= num_dsts;
encoding_header |= num_dsts;

/* Encode number of register source operands, adding on top of already existing ones.
*/
Expand All @@ -106,14 +117,17 @@ encode_to_synth(dcontext_t *dcontext, instr_t *instr, byte *encoded_instr)
/* Map sub-registers to their containing register.
*/
reg_id_t reg_canonical = reg_to_pointer_sized(reg);
opnd_size_t reg_size = reg_get_size(reg);
if (src_reg_to_size[reg_canonical] != 0) {
opnd_size_t reg_size = reg_get_size(reg_canonical);
uint reg_size_in_bytes = opnd_size_in_bytes(reg_size);
if (!src_reg_used[reg_canonical]) {
++num_srcs;
src_reg_to_size[reg_canonical] = reg_size;
src_reg_used[reg_canonical] = true;
if (reg_size_in_bytes > max_reg_size)
max_reg_size = reg_size_in_bytes;
}
}
}
encoding |= (num_srcs << SRC_OPND_SHIFT);
encoding_header |= (num_srcs << SRC_OPND_SHIFT);

/* Encode arithmetic flags.
*/
Expand All @@ -123,55 +137,61 @@ encode_to_synth(dcontext_t *dcontext, instr_t *instr, byte *encoded_instr)
eflags |= SYNTHETIC_INSTR_WRITES_ARITH;
if (TESTANY(EFLAGS_READ_ARITH, eflags_instr))
eflags |= SYNTHETIC_INSTR_READS_ARITH;
encoding |= (eflags << FLAGS_SHIFT);
encoding_header |= (eflags << FLAGS_SHIFT);

/* Encode category as synthetic opcode.
*/
uint category = instr_get_category(instr);
encoding |= (category << CATEGORY_SHIFT);
encoding_header |= (category << CATEGORY_SHIFT);

/* Copy encoding back into encoded_instr output.
*/
*((uint *)&encoded_instr[0]) = encoding;
*((uint *)&encoded_instr[0]) = encoding_header;

/* Encode largest register size, if there is at least one operand.
*/
uint num_opnds = num_dsts + num_srcs;
if (num_opnds > 0) {
CLIENT_ASSERT(max_reg_size != 0,
"instructions with register operands cannot have operand size 0");
encoded_instr[HEADER_BYTES] = opnd_size_from_bytes(max_reg_size);
}

/* Encode register destination operands and their sizes, if present.
/* Encode register destination operands, if present.
*/
uint dst_reg_counter = 0;
if (num_dsts > 0) {
uint reg_counter = 0;
for (uint reg = 0; reg < MAX_NUM_REGS; ++reg) {
if (dst_reg_to_size[reg] != 0) {
if (dst_reg_used[reg]) {
/* XXX i#6662: we might want to consider doing some kind of register
* shuffling.
*/
encoded_instr[dst_reg_counter + HEADER_BYTES] = (byte)reg;
encoded_instr[dst_reg_counter + 1 + HEADER_BYTES] =
(byte)dst_reg_to_size[reg];
dst_reg_counter += OPERAND_BYTES;
encoded_instr[reg_counter + HEADER_BYTES + 1] = (byte)reg;
++reg_counter;
}
}
}

/* Encode register source operands and their sizes, if present.
/* Encode register source operands, if present.
*/
uint src_reg_counter = 0;
if (num_srcs > 0) {
uint reg_counter = 0;
for (uint reg = 0; reg < MAX_NUM_REGS; ++reg) {
if (src_reg_to_size[reg] != 0) {
if (src_reg_used[reg]) {
/* XXX i#6662: we might want to consider doing some kind of register
* shuffling.
*/
encoded_instr[src_reg_counter + HEADER_BYTES + num_dsts * OPERAND_BYTES] =
(byte)reg;
encoded_instr[src_reg_counter + 1 + HEADER_BYTES +
num_dsts * OPERAND_BYTES] = (byte)src_reg_to_size[reg];
src_reg_counter += OPERAND_BYTES;
encoded_instr[reg_counter + HEADER_BYTES + 1 + num_dsts] = (byte)reg;
++reg_counter;
}
}
}

/* Compute instruction length including bytes for padding to reach 4 bytes alignment.
* Account for 1 additional byte containing max register operand size, if there are
* any operands.
*/
uint num_opnd_bytes = dst_reg_counter + src_reg_counter;
uint num_opnd_bytes = num_opnds > 0 ? num_opnds + 1 : 0;
uint instr_length = ALIGN_FORWARD(HEADER_BYTES + num_opnd_bytes, HEADER_BYTES);

/* Compute next instruction's PC as: current PC + instruction length.
Expand Down
41 changes: 21 additions & 20 deletions core/ir/synthetic/encoding_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,12 @@
* For this reason the majority of operations that would normally work on instructions
* coming from an actual ISA are not supported.
* The operations we currently support are instr_encode_to_copy() to encode an #instr_t
* of an actual ISA (e.g., x86) and decode() to obtain a synthetic #instr_t.
* of an actual ISA (e.g., x86) and decode_common() to obtain a synthetic #instr_t.
* Synthetic #instr_t can only return the encoded information: categories
* (from #dr_category_t) to indicate the type of operation performed (e.g., a load, a math
* operation, floating point math operation, a branch, etc.), arithmetic flags, number of
* source and destination register operands, register operands and their size, instruction
* length, and ISA mode (which is DR_ISA_SYNTHETIC).
* source and destination register operands, operation size, register operand IDs,
* instruction length, and ISA mode (which is DR_ISA_SYNTHETIC).
* Querying additional information (e.g., the instruction opcode) will return the default
* value generated by instr_create() (i.e., zero).
*/
Expand Down Expand Up @@ -81,29 +81,31 @@
* Note that we are only interested in register dependencies, hence operands that are
* not registers, such as immediates or memory references, are not present.
*
* Following the 4 header bytes are the bytes for encoding register operands.
* Each operand is 2 bytes.
* The first byte encodes the register id (#reg_id_t).
* The second byte encodes the register size in bytes following #opnd_size_t
* (e.g., OPSZ_4 = 4 bytes = 32 bits register).
* Following the 4 header bytes are the bytes for the operation size and for encoding
* register operands.
* The first byte contains the operation size encoded as a OPSZ_ enum value.
* Following the operation size are the register operand IDs.
* Each register operand is 1 byte.
* The destination operands go first, followed by the source operands.
* An instruction can have up to 8 operands (sources + destinations).
* Note that, because of 4 byte alignment, instructions with 1 or 2 operands will have a
* size of 8 bytes (4 header bytes + 4 operand-related bytes), instructions with 3 or 4
* operands will have a size of 12 bytes, and so on.
* Instructions with no operands only have the 4 header bytes.
* For example, an instruction with 3 operands (1 dst, 2 src) has 8 additional bytes that
* are encoded following this scheme:
* 1st chunk:
* Note that, because of 4 byte alignment, instructions with 1 to 3 operands will have a
* size of 8 bytes (4 header bytes + 1 byte for operands's size + 3 operand-related
* bytes), instructions with 4 to 7 operands will have a size of 12 bytes, while
* instructions with 8 operands will have the maximum size of 13 bytes.
* Instructions with no operands have only the 4 header bytes (no size-related byte nor
* operand-related bytes).
* For example, an instruction with 4 operands (1 dst, 3 src) has 8 additional bytes (on
* top of the 4 header bytes) that are encoded following this scheme:
* 1st chunk of 4 bytes:
* |--------| |--------| |--------| |--------|
* 31.. ..24 23.. ..16 15.. ..8 7.. ..0
* src_op0_sz src_op0 dst_op0_sz dst_op0
* 2nd chunk:
* src_op1 src_op0 dst_op0 op_size
* 2nd chunk of 4 bytes:
* |--------| |--------| |--------| |--------|
* 31.. ..24 23.. ..16 15.. ..8 7.. ..0
* padding src_op1_sz src_op1
* padding padding padding src_op2
*
* Because of 4 byte alignment, the last 2 bytes [31.. ..16] of the 2nd chunk are padding
* Because of 4 byte alignment, the last 3 bytes [31.. ..8] of the 2nd chunk are padding
* and are undefined (i.e., it cannot be assumed that they have been zeroed-out or contain
* any meaningful value).
*/
Expand All @@ -125,7 +127,6 @@
#define SYNTHETIC_INSTR_READS_ARITH 0x2

#define HEADER_BYTES 4
#define OPERAND_BYTES 2

/* Defines the maximum number of non-overlapping registers for any architecture we
* currently support.
Expand Down
Loading

0 comments on commit 2856ab0

Please sign in to comment.