2021-09-10 21:15:08 星期五
这是csapp里面关于callee-saved registers怎么用的讲解。当时看得还是不明就里,现在已经做了2个汇编相关的lab,才明白它确实讲得非常清楚。
题外话,图床改从这里传的了,现在看到图片的网址是https://s.pc.qq.com/tousu/img/20210910/8986371_1631279936.jpg
我的内心:@&¥%……&@#¥……&*(qq投诉emmmmmmmmm希望不会挂掉……)
part A
基础不牢,地动山摇。今晚在用Y86-64汇编语言实现以下c代码的时候,
long rsum_list(list_ptr ls)
{
if (!ls)
return 0;
else {
long val = ls->val;
long rest = rsum_list(ls->next);
return val + rest;
}
}
因为搞不明白callee saved register怎么用,还有push和pop还有一点模糊的地方,到最后还是参考了别人的代码才能写对,唉。
rsum_list:
pushq %rbp
irmovq $0,%rax
andq %rdi,%rdi
je return
mrmovq 0(%rdi),%rbp #ls->val
mrmovq 8(%rdi),%rdi #ls->next
call rsum_list
addq %rbp,%rax
return:
popq %rbp
ret
再次题外话,别人做这个lab是
我趁着考完期中的空闲时间,花了近一天时间啃了下第四章,并顺便做了个下这个 Architecture Lab。
而我光是配环境就是2个小时,到现在6.5h了才做完part A(一共3个part)的前两个函数(一共3个函数),还全是抄的自己完全做不出来,真是差距巨大啊orz
2021-09-11 12:13:35 星期六
终于自己写出来了一次……
/* copy_block - Copy src to dest and return xor checksum of src */
long copy_block(long *src, long *dest, long len)
{
long result = 0;
while (len > 0) {
long val = *src++;
*dest++ = val;
result ^= val;
len--;
}
return result;
}
# Execution begins at address 0
.pos 0
irmovq stack,%rsp
call main
halt
.align 8
# Source block
src:
.quad 0x00a
.quad 0x0b0
.quad 0xc00
# Destination block
dest:
.quad 0x111
.quad 0x222
.quad 0x333
main: irmovq src,%rdi
irmovq dest,%rsi
irmovq $3,%rdx
call copy_block
ret
copy_block:
pushq %r8
pushq %r9
irmovq $0,%rax
irmovq $8,%r8
irmovq $1,%r9
andq %rdx,%rdx
jmp loopjudge
loop: mrmovq (%rdi),%rcx #*src and val in %rcx
addq %r8,%rdi #src++
rmmovq %rcx,(%rsi) #*dest = val
addq %r8,%rsi #dest++
xorq %rcx,%rax #result ^= val
subq %r9,%rdx #len--, set CC
loopjudge:
jg loop
popq %r8
popq %r9
ret
.pos 0x100
stack:
root@bb0b19885c0d:/cmu15-213/archlab-handout/sim/misc# ./yas a-copy.ys
root@bb0b19885c0d:/cmu15-213/archlab-handout/sim/misc# ./yis a-copy.yo
Stopped in 40 steps at PC = 0x13. Status 'HLT', CC Z=1 S=0 O=0
Changes to registers:
%rax: 0x0000000000000000 0x0000000000000cba
%rcx: 0x0000000000000000 0x0000000000000c00
%rsp: 0x0000000000000000 0x0000000000000100
%rsi: 0x0000000000000000 0x0000000000000048
%rdi: 0x0000000000000000 0x0000000000000030
Changes to memory:
0x0030: 0x0000000000000111 0x000000000000000a
0x0038: 0x0000000000000222 0x00000000000000b0
0x0040: 0x0000000000000333 0x0000000000000c00
0x00f0: 0x0000000000000000 0x000000000000006f
0x00f8: 0x0000000000000000 0x0000000000000013
当然这也是参考过的,第一次不知道len>0的判断具体怎么写所以用了jne。
我用了%rcx来存val的值,看到其他人的代码用的是callee saved register,我不知道我这样是不是不太好……
part B
2021-09-11 13:15:56 星期六
1个小时大概看了一遍书上内容,完全看不懂。lab更是就算参考了其他做过的人的博客也是连头绪都没有,已经不想做了。
2021-09-13 12:38:00 星期一
##################################################################
#-----------------header comment------------------
#-----------------name: cutesnake
##Fetch
# icode:ifun <- M1[PC]
# rA:rB <- M1[PC + 1]
# valC <- M8[PC + 2]
# valP <- PC + 10
#
##Decode
# #valA <- R[rA]
# valB <- R[rB]
#
##Execute
# valE <- valB + valC
#
##Memory
#
##Write back
# R[rB] <- valE
#
##PC update
# PC <- valP
#
#/* $begin seq-all-hcl */
####################################################################
# HCL Description of Control for Single Cycle Y86-64 Processor SEQ #
# Copyright (C) Randal E. Bryant, David R. O'Hallaron, 2010 #
####################################################################
## Your task is to implement the iaddq instruction
## The file contains a declaration of the icodes
## for iaddq (IIADDQ)
## Your job is to add the rest of the logic to make it work
####################################################################
# C Include's. Don't alter these #
####################################################################
quote '#include <stdio.h>'
quote '#include "isa.h"'
quote '#include "sim.h"'
quote 'int sim_main(int argc, char *argv[]);'
quote 'word_t gen_pc(){return 0;}'
quote 'int main(int argc, char *argv[])'
quote ' {plusmode=0;return sim_main(argc,argv);}'
####################################################################
# Declarations. Do not change/remove/delete any of these #
####################################################################
##### Symbolic representation of Y86-64 Instruction Codes #############
wordsig INOP 'I_NOP'
wordsig IHALT 'I_HALT'
wordsig IRRMOVQ 'I_RRMOVQ'
wordsig IIRMOVQ 'I_IRMOVQ'
wordsig IRMMOVQ 'I_RMMOVQ'
wordsig IMRMOVQ 'I_MRMOVQ'
wordsig IOPQ 'I_ALU'
wordsig IJXX 'I_JMP'
wordsig ICALL 'I_CALL'
wordsig IRET 'I_RET'
wordsig IPUSHQ 'I_PUSHQ'
wordsig IPOPQ 'I_POPQ'
# Instruction code for iaddq instruction
wordsig IIADDQ 'I_IADDQ'
##### Symbolic represenations of Y86-64 function codes #####
wordsig FNONE 'F_NONE' # Default function code
##### Symbolic representation of Y86-64 Registers referenced explicitly #####
wordsig RRSP 'REG_RSP' # Stack Pointer
wordsig RNONE 'REG_NONE' # Special value indicating "no register"
##### ALU Functions referenced explicitly #####
wordsig ALUADD 'A_ADD' # ALU should add its arguments
##### Possible instruction status values #####
wordsig SAOK 'STAT_AOK' # Normal execution
wordsig SADR 'STAT_ADR' # Invalid memory address
wordsig SINS 'STAT_INS' # Invalid instruction
wordsig SHLT 'STAT_HLT' # Halt instruction encountered
##### Signals that can be referenced by control logic ####################
##### Fetch stage inputs #####
wordsig pc 'pc' # Program counter
##### Fetch stage computations #####
wordsig imem_icode 'imem_icode' # icode field from instruction memory
wordsig imem_ifun 'imem_ifun' # ifun field from instruction memory
wordsig icode 'icode' # Instruction control code
wordsig ifun 'ifun' # Instruction function
wordsig rA 'ra' # rA field from instruction
wordsig rB 'rb' # rB field from instruction
wordsig valC 'valc' # Constant from instruction
wordsig valP 'valp' # Address of following instruction
boolsig imem_error 'imem_error' # Error signal from instruction memory
boolsig instr_valid 'instr_valid' # Is fetched instruction valid?
##### Decode stage computations #####
wordsig valA 'vala' # Value from register A port
wordsig valB 'valb' # Value from register B port
##### Execute stage computations #####
wordsig valE 'vale' # Value computed by ALU
boolsig Cnd 'cond' # Branch test
##### Memory stage computations #####
wordsig valM 'valm' # Value read from memory
boolsig dmem_error 'dmem_error' # Error signal from data memory
####################################################################
# Control Signal Definitions. #
####################################################################
################ Fetch Stage ###################################
# Determine instruction code
word icode = [
imem_error: INOP;
1: imem_icode; # Default: get from instruction memory
];
# Determine instruction function
word ifun = [
imem_error: FNONE;
1: imem_ifun; # Default: get from instruction memory
];
bool instr_valid = icode in
{ INOP, IHALT, IRRMOVQ, IIRMOVQ, IRMMOVQ, IMRMOVQ,
IOPQ, IJXX, ICALL, IRET, IPUSHQ, IPOPQ, IIADDQ };
# Does fetched instruction require a regid byte?
bool need_regids =
icode in { IRRMOVQ, IOPQ, IPUSHQ, IPOPQ,
IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ };
# Does fetched instruction require a constant word?
bool need_valC =
icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IJXX, ICALL, IIADDQ };
################ Decode Stage ###################################
## What register should be used as the A source?
word srcA = [
icode in { IRRMOVQ, IRMMOVQ, IOPQ, IPUSHQ } : rA;
icode in { IPOPQ, IRET } : RRSP;
1 : RNONE; # Don't need register
];
## What register should be used as the B source?
word srcB = [
icode in { IOPQ, IRMMOVQ, IMRMOVQ, IIADDQ } : rB;
icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
1 : RNONE; # Don't need register
];
## What register should be used as the E destination?
word dstE = [
icode in { IRRMOVQ } && Cnd : rB;
icode in { IIRMOVQ, IOPQ, IIADDQ } : rB;
icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
1 : RNONE; # Don't write any register
];
## What register should be used as the M destination?
word dstM = [
icode in { IMRMOVQ, IPOPQ } : rA;
1 : RNONE; # Don't write any register
];
################ Execute Stage ###################################
## Select input A to ALU
word aluA = [
icode in { IRRMOVQ, IOPQ } : valA;
icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ } : valC;
icode in { ICALL, IPUSHQ } : -8;
icode in { IRET, IPOPQ } : 8;
# Other instructions don't need ALU
];
## Select input B to ALU
word aluB = [
icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL,
IPUSHQ, IRET, IPOPQ, IIADDQ } : valB;
icode in { IRRMOVQ, IIRMOVQ } : 0;
# Other instructions don't need ALU
];
## Set the ALU function
word alufun = [
icode == IOPQ : ifun;
1 : ALUADD;
];
## Should the condition codes be updated?
bool set_cc = icode in { IOPQ, IIADDQ };
################ Memory Stage ###################################
## Set read control signal
bool mem_read = icode in { IMRMOVQ, IPOPQ, IRET };
## Set write control signal
bool mem_write = icode in { IRMMOVQ, IPUSHQ, ICALL };
## Select memory address
word mem_addr = [
icode in { IRMMOVQ, IPUSHQ, ICALL, IMRMOVQ } : valE;
icode in { IPOPQ, IRET } : valA;
# Other instructions don't need address
];
## Select memory input data
word mem_data = [
# Value from register
icode in { IRMMOVQ, IPUSHQ } : valA;
# Return PC
icode == ICALL : valP;
# Default: Don't write anything
];
## Determine instruction status
word Stat = [
imem_error || dmem_error : SADR;
!instr_valid: SINS;
icode == IHALT : SHLT;
1 : SAOK;
];
################ Program Counter Update ############################
## What address should instruction be fetched at
word new_pc = [
# Call. Use instruction constant
icode == ICALL : valC;
# Taken branch. Use instruction constant
icode == IJXX && Cnd : valC;
# Completion of RET instruction. Use value from stack
icode == IRET : valM;
# Default: Use incremented PC
1 : valP;
];
#/* $end seq-all-hcl */
运行结果:
root@bb0b19885c0d:/cmu15-213/archlab-handout/sim/seq# (cd ../ptest; make SIM=../seq/ssim TFLAGS=-i)
./optest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
All 58 ISA Checks Succeed
./jtest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
All 96 ISA Checks Succeed
./ctest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
All 22 ISA Checks Succeed
./htest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
All 756 ISA Checks Succeed
实在不会,只能抄答案了。(其实看了书做了一点笔记已经大概能明白这是在干嘛了:对于指令IIADDQ,是否需要寄存器,是否需要常数,从哪来到哪去,哪里用到就往哪填,基本就是这么个套路。)
编译时候还是遇到很多小问题,还是参考了其他人的博客才能解决。但最后编译的时候还是报了一堆类似
ssim.c:880:2: warning: 'result' is deprecated: use Tcl_GetStringResult/Tcl_SetResult [-Wdeprecated-declarations]
interp->result = "No arguments allowed";
这样的错误,我完全不明所以(看起来好像是Y86出的时候用到的某些库版本更新后删了一些代码),还好最后不影响判题程序。
这个lab给我的感觉就是环境上问题多多;而且有关内容cmu的老师上课也不讲,应该属于不怎么重要那种lab吧=。=
2021-09-14 07:48:26 星期二
时隔一天回来自己添上注释,原来都是比对着书上照猫画虎就行,感觉已经不那么难了。看了一下之前版本的作业还有加上ILEAVE
的指令,这一版给删掉了,应该是已经降了难度了吧=。=
2021-09-14 09:41:44 星期二
partC发现%r10用得非常浪费,用iaddq指令替换了在寄存器里移来移去的指令。性能提高了,但还是零分。看了看博客发现要细看第五章的内容,而且partc里用到的主要都是编译器帮我们完成的,现在的编译器已经非常先进了,恐怕用到这些知识的机会不多,就先过了吧。
../misc/yas ncopy.ys 生成ncopy.yo
./check-len.pl < ncopy.yo 检测长度
make drivers 生成ncopy.ys的测试程序
make psim VERSION=full 生成新的psim
./psim -t sdriver.yo 测试small 4-element array
./psim -t ldriver.yo测试larger 63-element array
./correctness.pl测试不同Block length下code range是否符合
./benchmark.pl 评分
————————————————
版权声明:本文为CSDN博主「热爱学习的贾克斯」的原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.csdn.net/qq_42234461/article/details/108720264
2021-09-15 19:11:53 星期三
记一下题,方便以后回顾。
要求:优化以下ncopy函数的汇编代码,使得运行用时尽量短
#include <stdio.h>
typedef word_t word_t;
word_t src[8], dst[8];
/* $begin ncopy */
/*
* ncopy - copy src to dst, returning number of positive ints
* contained in src array.
*/
word_t ncopy(word_t *src, word_t *dst, word_t len)
{
word_t count = 0;
word_t val;
while (len > 0) {
val = *src++;
*dst++ = val;
if (val > 0)
count++;
len--;
}
return count;
}
/* $end ncopy */
int main()
{
word_t i, count;
for (i=0; i<8; i++)
src[i]= i+1;
count = ncopy(src, dst, 8);
printf ("count=%d\n", count);
exit(0);
}
我的代码,avg12.70(原始15.18)
#/* $begin ncopy-ys */
##################################################################
# ncopy.ys - Copy a src block of len words to dst.
# Return the number of positive words (>0) contained in src.
#
# Include your name and ID here.
#
# Describe how and why you modified the baseline code.
#
##################################################################
# Do not modify this portion
# Function prologue.
# %rdi = src, %rsi = dst, %rdx = len
ncopy:
##################################################################
# You can modify this portion
# Loop header
xorq %rax,%rax # count = 0;
andq %rdx,%rdx # len <= 0?
jle Done # if so, goto Done:
Loop: mrmovq (%rdi), %r10 # read val from src...
rmmovq %r10, (%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Npos # if so, goto Npos:
iaddq $1, %rax # count++
Npos:
iaddq $-1, %rdx # len--
iaddq $8, %rdi # src++
iaddq $8, %rsi # dst++
andq %rdx,%rdx # len > 0?
jg Loop # if so, goto Loop:
##################################################################
# Do not modify the following section of code
# Function epilogue.
Done:
ret
##################################################################
# Keep the following label at the end of your function
End:
#/* $end ncopy-ys */
满分60分答案,avg CPE7.49
#/* $begin ncopy-ys */
##################################################################
# ncopy.ys - Copy a src block of len words to dst.
# Return the number of positive words (>0) contained in src.
#
# Include your name and ID here.
#
# Describe how and why you modified the baseline code.
#
##################################################################
# Do not modify this portion
# Function prologue.
# %rdi = src, %rsi = dst, %rdx = len
ncopy:
##################################################################
# You can modify this portion
# Loop header
iaddq $-10,%rdx # len < 10?
jl Root # if so, goto Root:
Loop1: mrmovq (%rdi), %r10 # read val from src...
mrmovq 8(%rdi), %r11 # read val from src...
rmmovq %r10, (%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Loop2 # if so, goto Loop2:
iaddq $0x1, %rax # count++
Loop2: mrmovq 16(%rdi), %r10 # read val from src...
rmmovq %r11, 8(%rsi) # ...and store it to dst
andq %r11, %r11 # val <= 0?
jle Loop3 # if so, goto Loop3:
iaddq $0x1, %rax # count++
Loop3: mrmovq 24(%rdi), %r11 # read val from src...
rmmovq %r10, 16(%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Loop4 # if so, goto Loop4:
iaddq $0x1, %rax # count++
Loop4: mrmovq 32(%rdi), %r10 # read val from src...
rmmovq %r11, 24(%rsi) # ...and store it to dst
andq %r11, %r11 # val <= 0?
jle Loop5 # if so, goto Loop5:
iaddq $0x1, %rax # count++
Loop5: mrmovq 40(%rdi), %r11 # read val from src...
rmmovq %r10, 32(%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Loop6 # if so, goto Loop6:
iaddq $0x1, %rax # count++
Loop6: mrmovq 48(%rdi), %r10 # read val from src...
rmmovq %r11, 40(%rsi) # ...and store it to dst
andq %r11, %r11 # val <= 0?
jle Loop7 # if so, goto Loop7:
iaddq $0x1, %rax # count++
Loop7: mrmovq 56(%rdi), %r11 # read val from src...
rmmovq %r10, 48(%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Loop8 # if so, goto Loop8:
iaddq $0x1, %rax # count++
Loop8: mrmovq 64(%rdi), %r10 # read val from src...
rmmovq %r11, 56(%rsi) # ...and store it to dst
andq %r11, %r11 # val <= 0?
jle Loop9 # if so, goto Loop9:
iaddq $0x1, %rax # count++
Loop9: mrmovq 72(%rdi), %r11 # read val from src...
rmmovq %r10, 64(%rsi) # ...and store it to dst
andq %r10, %r10 # val <= 0?
jle Loop10 # if so, goto Loop10:
iaddq $0x1, %rax # count++
Loop10: #mrmovq 64(%rdi), %r10 # read val from src...
rmmovq %r11, 72(%rsi) # ...and store it to dst
andq %r11, %r11 # val <= 0?
jle Loop # if so, goto Loop:
iaddq $0x1, %rax # count++
Loop:
iaddq $0x50, %rdi # src++
iaddq $0x50, %rsi # dst++
iaddq $-10,%rdx # len >= 10?
jge Loop1 # if so, goto Loop1:
Root:
iaddq $7,%rdx # len <= 3
jl Left
jg Right
je Remain3 # len == 3 Middle
Left:
iaddq $2,%rdx # len == 1
je Remain1
iaddq $-1,%rdx # len == 2
je Remain2
ret # len == 0
Right:
iaddq $-3,%rdx # len <= 6
jg RightRight
je Remain6 # len == 6
iaddq $1,%rdx # RightLeft
je Remain5 # len == 5
jmp Remain4 # len == 4
RightRight:
iaddq $-2,%rdx
jl Remain7
je Remain8
Remain9:
mrmovq 64(%rdi), %r11 # read val from src...
rmmovq %r11, 64(%rsi)
andq %r11, %r11 # val <= 0?
Remain8:
mrmovq 56(%rdi), %r11 # read val from src...
jle Remain82 # if so, goto Npos:
iaddq $0x1, %rax # count++
Remain82:
rmmovq %r11, 56(%rsi)
andq %r11, %r11 # val <= 0?
Remain7:
mrmovq 48(%rdi), %r11 # read val from src...
jle Remain72 # if so, goto Npos:
iaddq $0x1, %rax # count++
Remain72:
rmmovq %r11, 48(%rsi)
andq %r11, %r11 # val <= 0?
Remain6:
mrmovq 40(%rdi), %r11 # read val from src...
jle Remain62 # if so, goto Npos:
iaddq $0x1, %rax # count++
Remain62:
rmmovq %r11, 40(%rsi)
andq %r11, %r11 # val <= 0?
Remain5:
mrmovq 32(%rdi), %r11 # read val from src...
jle Remain52 # if so, goto Npos:
iaddq $0x1, %rax # count++
Remain52:
rmmovq %r11, 32(%rsi)
andq %r11, %r11 # val <= 0?
Remain4:
mrmovq 24(%rdi), %r11 # read val from src...
jle Remain42 # if so, goto Npos:
iaddq $0x1, %rax # count++
Remain42:
rmmovq %r11, 24(%rsi)
andq %r11, %r11 # val <= 0?
Remain3:
mrmovq 16(%rdi), %r11 # read val from src...
jle Remain32 # if so, goto Npos:
iaddq $0x1, %rax # count++
Remain32:
rmmovq %r11, 16(%rsi)
andq %r11, %r11 # val <= 0?
Remain2:
mrmovq 8(%rdi), %r11 # read val from src...
jle Remain22 # if so, goto Npos:
iaddq $0x1, %rax # count++
Remain22:
rmmovq %r11, 8(%rsi)
andq %r11, %r11 # val <= 0?
Remain1:
mrmovq (%rdi), %r11 # read val from src...
jle Remain12 # if so, goto Npos:
iaddq $0x1, %rax # count++
Remain12:
rmmovq %r11, (%rsi)
andq %r11, %r11 # val <= 0?
jle Done # if so, goto Npos:
iaddq $0x1, %rax # count++
##################################################################
# Do not modify the following section of code
# Function epilogue.
Done:
ret
##################################################################
# Keep the following label at the end of your function
End:
#/* $end ncopy-ys */
#https://zhuanlan.zhihu.com/p/77072339
思路参考:
https://zhuanlan.zhihu.com/p/33751460
https://zhuanlan.zhihu.com/p/77072339
csapp 5.8节