Hi All.
我这边使用一些简单的代码验证MounRiver将C语言编译成汇编的时候,发现编译器在处理字节和半字加载的时候经常还需要添加左移和右移命令,按照RISCV的spec描述支持lh和hu,lb和lbu指令,应该是直接一条指令就能实现加载和扩展,实际应用中编译中无论是否是有无符号数加载都采用无符号加载然后再左移右移处理(参考下面问题1和问题2),明显效率比较低,请问是否有配置可以改善,以及为什么编译器默认会采用这些比较复杂的汇编?配置不同优化等级结果还是一样。谢谢!
测试代码:
typedef struct
{
__IO uint16_t Data_U16;
__IO int16_t Data_S16;
__IO uint8_t Data_U8;
__IO int8_t Data_S8;
} REG_TypeDef;
#define MCU_REG_BASE (0x10000000)
#define MCU_REG ((REG_TypeDef *) MCU_REG_BASE)
volatile uint32_t DataLoad_U32;
volatile int32_t DataLoad_S32;
int main(void)
{
DataLoad_U32 = MCU_REG->Data_U16;
DataLoad_U32 = MCU_REG->Data_U8;
DataLoad_U32 = MCU_REG->Data_S16;
DataLoad_U32 = MCU_REG->Data_S8;
DataLoad_S32 = MCU_REG->Data_U16;
DataLoad_S32 = MCU_REG->Data_U8;
DataLoad_S32 = MCU_REG->Data_S16;
DataLoad_S32 = MCU_REG->Data_S8;
}
编译结果
int main(void)
{
DataLoad_U32 = MCU_REG->Data_U16;
18:100007b7 luia5,0x10000
1c:2396 lhua3,0(a5)
1e:20000737 luia4,0x20000
22:00470713 addia4,a4,4 # 20000004
26:06c2 sllia3,a3,0x10
28:82c1 srlia3,a3,0x10
2a:c314 swa3,0(a4)
DataLoad_U32 = MCU_REG->Data_U8;
2c:23d4 lbua3,4(a5)
DataLoad_S32 = MCU_REG->Data_U8;
DataLoad_S32 = MCU_REG->Data_S16;
DataLoad_S32 = MCU_REG->Data_S8;
}
2e:4501 lia0,0
DataLoad_U32 = MCU_REG->Data_U8;
30:0ff6f693 andia3,a3,255
34:c314 swa3,0(a4)
DataLoad_U32 = MCU_REG->Data_S16;
36:23b6 lhua3,2(a5)
38:06c2 sllia3,a3,0x10
3a:86c1 sraia3,a3,0x10
3c:c314 swa3,0(a4)
DataLoad_U32 = MCU_REG->Data_S8;
3e:33d4 lbua3,5(a5)
40:06e2 sllia3,a3,0x18
42:86e1 sraia3,a3,0x18
44:c314 swa3,0(a4)
DataLoad_S32 = MCU_REG->Data_U16;
46:2396 lhua3,0(a5)
48:20000737 luia4,0x20000
4c:00070713 mva4,a4
50:06c2 sllia3,a3,0x10 #疑问点1,前面采用lhu,16位无符号数会自动扩展为32位,应该不需要左右移
52:82c1 srlia3,a3,0x10
54:c314 swa3,0(a4)
DataLoad_S32 = MCU_REG->Data_U8;
56:23d4 lbua3,4(a5)
58:0ff6f693 andia3,a3,255
5c:c314 swa3,0(a4)
DataLoad_S32 = MCU_REG->Data_S16;
5e:23b6 lhua3,2(a5) #疑问点2,这里应采用lh而不是lhu
60:06c2 sllia3,a3,0x10
62:86c1 sraia3,a3,0x10
64:c314 swa3,0(a4)
DataLoad_S32 = MCU_REG->Data_S8;
66:33dc lbua5,5(a5)
68:07e2 sllia5,a5,0x18
6a:87e1 sraia5,a5,0x18
6c:c31c swa5,0(a4)
}