2021-09-10 21:15:08 星期五

这是csapp里面关于callee-saved registers怎么用的讲解。当时看得还是不明就里,现在已经做了2个汇编相关的lab,才明白它确实讲得非常清楚。

题外话,图床改从这里传的了,现在看到图片的网址是https://s.pc.qq.com/tousu/img/20210910/8986371_1631279936.jpg
我的内心:@&¥%……&@#¥……&*(qq投诉emmmmmmmmm希望不会挂掉……)

part A

基础不牢,地动山摇。今晚在用Y86-64汇编语言实现以下c代码的时候,

long rsum_list(list_ptr ls)
{
    if (!ls)
    return 0;
    else {
    long val = ls->val;
    long rest = rsum_list(ls->next);
    return val + rest;
    }
}

因为搞不明白callee saved register怎么用,还有push和pop还有一点模糊的地方,到最后还是参考了别人的代码才能写对,唉。

rsum_list:
        pushq %rbp
        irmovq $0,%rax
        andq %rdi,%rdi
        je return
        mrmovq 0(%rdi),%rbp  #ls->val
        mrmovq 8(%rdi),%rdi  #ls->next
        call rsum_list
        addq %rbp,%rax
return:
        popq %rbp
        ret

再次题外话,别人做这个lab是

我趁着考完期中的空闲时间,花了近一天时间啃了下第四章,并顺便做了个下这个 Architecture Lab。

而我光是配环境就是2个小时,到现在6.5h了才做完part A(一共3个part)的前两个函数(一共3个函数),还全是抄的自己完全做不出来,真是差距巨大啊orz

2021-09-11 12:13:35 星期六
终于自己写出来了一次……

/* copy_block - Copy src to dest and return xor checksum of src */
long copy_block(long *src, long *dest, long len)
{
    long result = 0;
    while (len > 0) {
    long val = *src++;
    *dest++ = val;
    result ^= val;
    len--;
    }
    return result;
}
# Execution begins at address 0

        .pos 0
        irmovq stack,%rsp
        call main
        halt

.align 8
# Source block
src:
.quad 0x00a
.quad 0x0b0
.quad 0xc00
# Destination block
dest:
.quad 0x111
.quad 0x222
.quad 0x333

main:    irmovq src,%rdi
        irmovq dest,%rsi
        irmovq $3,%rdx
        call copy_block
        ret

copy_block:
        pushq %r8
        pushq %r9
        irmovq $0,%rax
        irmovq $8,%r8
        irmovq $1,%r9
        andq %rdx,%rdx
        jmp loopjudge
loop:   mrmovq (%rdi),%rcx  #*src and val in %rcx
        addq %r8,%rdi  #src++
        rmmovq %rcx,(%rsi)  #*dest = val
        addq %r8,%rsi  #dest++
        xorq %rcx,%rax  #result ^= val
        subq %r9,%rdx  #len--, set CC
loopjudge:
        jg loop
        popq %r8
        popq %r9
        ret

        .pos 0x100
stack:
root@bb0b19885c0d:/cmu15-213/archlab-handout/sim/misc# ./yas a-copy.ys
root@bb0b19885c0d:/cmu15-213/archlab-handout/sim/misc# ./yis a-copy.yo
Stopped in 40 steps at PC = 0x13.  Status 'HLT', CC Z=1 S=0 O=0
Changes to registers:
%rax:   0x0000000000000000      0x0000000000000cba
%rcx:   0x0000000000000000      0x0000000000000c00
%rsp:   0x0000000000000000      0x0000000000000100
%rsi:   0x0000000000000000      0x0000000000000048
%rdi:   0x0000000000000000      0x0000000000000030

Changes to memory:
0x0030: 0x0000000000000111      0x000000000000000a
0x0038: 0x0000000000000222      0x00000000000000b0
0x0040: 0x0000000000000333      0x0000000000000c00
0x00f0: 0x0000000000000000      0x000000000000006f
0x00f8: 0x0000000000000000      0x0000000000000013

当然这也是参考过的,第一次不知道len>0的判断具体怎么写所以用了jne。
我用了%rcx来存val的值,看到其他人的代码用的是callee saved register,我不知道我这样是不是不太好……

part B

2021-09-11 13:15:56 星期六
1个小时大概看了一遍书上内容,完全看不懂。lab更是就算参考了其他做过的人的博客也是连头绪都没有,已经不想做了。

2021-09-13 12:38:00 星期一

##################################################################
#-----------------header comment------------------
#-----------------name: cutesnake
##Fetch
#        icode:ifun <- M1[PC]
#        rA:rB <- M1[PC + 1]
#        valC <- M8[PC + 2]
#        valP <- PC + 10
#
##Decode
#        #valA <- R[rA]
#        valB <- R[rB]
#
##Execute
#        valE <- valB + valC
#
##Memory
#
##Write back
#        R[rB] <- valE
#
##PC update
#        PC <- valP
#
#/* $begin seq-all-hcl */
####################################################################
#  HCL Description of Control for Single Cycle Y86-64 Processor SEQ   #
#  Copyright (C) Randal E. Bryant, David R. O'Hallaron, 2010       #
####################################################################

## Your task is to implement the iaddq instruction
## The file contains a declaration of the icodes
## for iaddq (IIADDQ)
## Your job is to add the rest of the logic to make it work

####################################################################
#    C Include's.  Don't alter these                               #
####################################################################

quote '#include <stdio.h>'
quote '#include "isa.h"'
quote '#include "sim.h"'
quote 'int sim_main(int argc, char *argv[]);'
quote 'word_t gen_pc(){return 0;}'
quote 'int main(int argc, char *argv[])'
quote '  {plusmode=0;return sim_main(argc,argv);}'

####################################################################
#    Declarations.  Do not change/remove/delete any of these       #
####################################################################

##### Symbolic representation of Y86-64 Instruction Codes #############
wordsig INOP     'I_NOP'
wordsig IHALT    'I_HALT'
wordsig IRRMOVQ    'I_RRMOVQ'
wordsig IIRMOVQ    'I_IRMOVQ'
wordsig IRMMOVQ    'I_RMMOVQ'
wordsig IMRMOVQ    'I_MRMOVQ'
wordsig IOPQ    'I_ALU'
wordsig IJXX    'I_JMP'
wordsig ICALL    'I_CALL'
wordsig IRET    'I_RET'
wordsig IPUSHQ    'I_PUSHQ'
wordsig IPOPQ    'I_POPQ'
# Instruction code for iaddq instruction
wordsig IIADDQ    'I_IADDQ'

##### Symbolic represenations of Y86-64 function codes                  #####
wordsig FNONE    'F_NONE'        # Default function code

##### Symbolic representation of Y86-64 Registers referenced explicitly #####
wordsig RRSP     'REG_RSP'        # Stack Pointer
wordsig RNONE    'REG_NONE'       # Special value indicating "no register"

##### ALU Functions referenced explicitly                            #####
wordsig ALUADD    'A_ADD'        # ALU should add its arguments

##### Possible instruction status values                             #####
wordsig SAOK    'STAT_AOK'    # Normal execution
wordsig SADR    'STAT_ADR'    # Invalid memory address
wordsig SINS    'STAT_INS'    # Invalid instruction
wordsig SHLT    'STAT_HLT'    # Halt instruction encountered

##### Signals that can be referenced by control logic ####################

##### Fetch stage inputs        #####
wordsig pc 'pc'                # Program counter
##### Fetch stage computations        #####
wordsig imem_icode 'imem_icode'        # icode field from instruction memory
wordsig imem_ifun  'imem_ifun'         # ifun field from instruction memory
wordsig icode      'icode'        # Instruction control code
wordsig ifun      'ifun'        # Instruction function
wordsig rA      'ra'            # rA field from instruction
wordsig rB      'rb'            # rB field from instruction
wordsig valC      'valc'        # Constant from instruction
wordsig valP      'valp'        # Address of following instruction
boolsig imem_error 'imem_error'        # Error signal from instruction memory
boolsig instr_valid 'instr_valid'    # Is fetched instruction valid?

##### Decode stage computations        #####
wordsig valA    'vala'            # Value from register A port
wordsig valB    'valb'            # Value from register B port

##### Execute stage computations    #####
wordsig valE    'vale'            # Value computed by ALU
boolsig Cnd    'cond'            # Branch test

##### Memory stage computations        #####
wordsig valM    'valm'            # Value read from memory
boolsig dmem_error 'dmem_error'        # Error signal from data memory


####################################################################
#    Control Signal Definitions.                                   #
####################################################################

################ Fetch Stage     ###################################

# Determine instruction code
word icode = [
    imem_error: INOP;
    1: imem_icode;        # Default: get from instruction memory
];

# Determine instruction function
word ifun = [
    imem_error: FNONE;
    1: imem_ifun;        # Default: get from instruction memory
];

bool instr_valid = icode in 
    { INOP, IHALT, IRRMOVQ, IIRMOVQ, IRMMOVQ, IMRMOVQ,
           IOPQ, IJXX, ICALL, IRET, IPUSHQ, IPOPQ, IIADDQ };

# Does fetched instruction require a regid byte?
bool need_regids =
    icode in { IRRMOVQ, IOPQ, IPUSHQ, IPOPQ, 
             IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ };

# Does fetched instruction require a constant word?
bool need_valC =
    icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IJXX, ICALL, IIADDQ };

################ Decode Stage    ###################################

## What register should be used as the A source?
word srcA = [
    icode in { IRRMOVQ, IRMMOVQ, IOPQ, IPUSHQ  } : rA;
    icode in { IPOPQ, IRET } : RRSP;
    1 : RNONE; # Don't need register
];

## What register should be used as the B source?
word srcB = [
    icode in { IOPQ, IRMMOVQ, IMRMOVQ, IIADDQ } : rB;
    icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
    1 : RNONE;  # Don't need register
];

## What register should be used as the E destination?
word dstE = [
    icode in { IRRMOVQ } && Cnd : rB;
    icode in { IIRMOVQ, IOPQ, IIADDQ } : rB;
    icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
    1 : RNONE;  # Don't write any register
];

## What register should be used as the M destination?
word dstM = [
    icode in { IMRMOVQ, IPOPQ } : rA;
    1 : RNONE;  # Don't write any register
];

################ Execute Stage   ###################################

## Select input A to ALU
word aluA = [
    icode in { IRRMOVQ, IOPQ } : valA;
    icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ } : valC;
    icode in { ICALL, IPUSHQ } : -8;
    icode in { IRET, IPOPQ } : 8;
    # Other instructions don't need ALU
];

## Select input B to ALU
word aluB = [
    icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL, 
              IPUSHQ, IRET, IPOPQ, IIADDQ } : valB;
    icode in { IRRMOVQ, IIRMOVQ } : 0;
    # Other instructions don't need ALU
];

## Set the ALU function
word alufun = [
    icode == IOPQ : ifun;
    1 : ALUADD;
];

## Should the condition codes be updated?
bool set_cc = icode in { IOPQ, IIADDQ };

################ Memory Stage    ###################################

## Set read control signal
bool mem_read = icode in { IMRMOVQ, IPOPQ, IRET };

## Set write control signal
bool mem_write = icode in { IRMMOVQ, IPUSHQ, ICALL };

## Select memory address
word mem_addr = [
    icode in { IRMMOVQ, IPUSHQ, ICALL, IMRMOVQ } : valE;
    icode in { IPOPQ, IRET } : valA;
    # Other instructions don't need address
];

## Select memory input data
word mem_data = [
    # Value from register
    icode in { IRMMOVQ, IPUSHQ } : valA;
    # Return PC
    icode == ICALL : valP;
    # Default: Don't write anything
];

## Determine instruction status
word Stat = [
    imem_error || dmem_error : SADR;
    !instr_valid: SINS;
    icode == IHALT : SHLT;
    1 : SAOK;
];

################ Program Counter Update ############################

## What address should instruction be fetched at

word new_pc = [
    # Call.  Use instruction constant
    icode == ICALL : valC;
    # Taken branch.  Use instruction constant
    icode == IJXX && Cnd : valC;
    # Completion of RET instruction.  Use value from stack
    icode == IRET : valM;
    # Default: Use incremented PC
    1 : valP;
];
#/* $end seq-all-hcl */

运行结果:

root@bb0b19885c0d:/cmu15-213/archlab-handout/sim/seq# (cd ../ptest; make SIM=../seq/ssim TFLAGS=-i)
./optest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
  All 58 ISA Checks Succeed
./jtest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
  All 96 ISA Checks Succeed
./ctest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
  All 22 ISA Checks Succeed
./htest.pl -s ../seq/ssim -i
Simulating with ../seq/ssim
  All 756 ISA Checks Succeed

实在不会,只能抄答案了。(其实看了书做了一点笔记已经大概能明白这是在干嘛了:对于指令IIADDQ,是否需要寄存器,是否需要常数,从哪来到哪去,哪里用到就往哪填,基本就是这么个套路。)
编译时候还是遇到很多小问题,还是参考了其他人的博客才能解决。但最后编译的时候还是报了一堆类似

ssim.c:880:2: warning: 'result' is deprecated: use Tcl_GetStringResult/Tcl_SetResult [-Wdeprecated-declarations]
  interp->result = "No arguments allowed";

这样的错误,我完全不明所以(看起来好像是Y86出的时候用到的某些库版本更新后删了一些代码),还好最后不影响判题程序。
这个lab给我的感觉就是环境上问题多多;而且有关内容cmu的老师上课也不讲,应该属于不怎么重要那种lab吧=。=
2021-09-14 07:48:26 星期二
时隔一天回来自己添上注释,原来都是比对着书上照猫画虎就行,感觉已经不那么难了。看了一下之前版本的作业还有加上ILEAVE的指令,这一版给删掉了,应该是已经降了难度了吧=。=

2021-09-14 09:41:44 星期二
partC发现%r10用得非常浪费,用iaddq指令替换了在寄存器里移来移去的指令。性能提高了,但还是零分。看了看博客发现要细看第五章的内容,而且partc里用到的主要都是编译器帮我们完成的,现在的编译器已经非常先进了,恐怕用到这些知识的机会不多,就先过了吧。

../misc/yas ncopy.ys 生成ncopy.yo

./check-len.pl < ncopy.yo 检测长度

make drivers 生成ncopy.ys的测试程序

make psim VERSION=full 生成新的psim

./psim -t sdriver.yo 测试small 4-element array

./psim -t ldriver.yo测试larger 63-element array

./correctness.pl测试不同Block length下code range是否符合

./benchmark.pl 评分
————————————————
版权声明:本文为CSDN博主「热爱学习的贾克斯」的原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.csdn.net/qq_42234461/article/details/108720264

2021-09-15 19:11:53 星期三
记一下题,方便以后回顾。
要求:优化以下ncopy函数的汇编代码,使得运行用时尽量短

#include <stdio.h>

typedef word_t word_t;

word_t src[8], dst[8];

/* $begin ncopy */
/*
 * ncopy - copy src to dst, returning number of positive ints
 * contained in src array.
 */
word_t ncopy(word_t *src, word_t *dst, word_t len)
{
    word_t count = 0;
    word_t val;

    while (len > 0) {
    val = *src++;
    *dst++ = val;
    if (val > 0)
        count++;
    len--;
    }
    return count;
}
/* $end ncopy */

int main()
{
    word_t i, count;

    for (i=0; i<8; i++)
    src[i]= i+1;
    count = ncopy(src, dst, 8);
    printf ("count=%d\n", count);
    exit(0);
}


我的代码,avg12.70(原始15.18)

#/* $begin ncopy-ys */
##################################################################
# ncopy.ys - Copy a src block of len words to dst.
# Return the number of positive words (>0) contained in src.
#
# Include your name and ID here.
#
# Describe how and why you modified the baseline code.
#
##################################################################
# Do not modify this portion
# Function prologue.
# %rdi = src, %rsi = dst, %rdx = len
ncopy:

##################################################################
# You can modify this portion
    # Loop header
    xorq %rax,%rax        # count = 0;
    andq %rdx,%rdx        # len <= 0?
    jle Done        # if so, goto Done:

Loop:    mrmovq (%rdi), %r10    # read val from src...
    rmmovq %r10, (%rsi)    # ...and store it to dst
    andq %r10, %r10        # val <= 0?
    jle Npos        # if so, goto Npos:
    iaddq $1, %rax        # count++
Npos:    
    iaddq $-1, %rdx        # len--
    iaddq $8, %rdi        # src++
    iaddq $8, %rsi        # dst++
    andq %rdx,%rdx        # len > 0?
    jg Loop            # if so, goto Loop:
##################################################################
# Do not modify the following section of code
# Function epilogue.
Done:
    ret
##################################################################
# Keep the following label at the end of your function
End:
#/* $end ncopy-ys */

满分60分答案,avg CPE7.49

#/* $begin ncopy-ys */
##################################################################
# ncopy.ys - Copy a src block of len words to dst.
# Return the number of positive words (>0) contained in src.
#
# Include your name and ID here.
#
# Describe how and why you modified the baseline code.
#
##################################################################
# Do not modify this portion
# Function prologue.
# %rdi = src, %rsi = dst, %rdx = len
ncopy:

##################################################################
# You can modify this portion
    # Loop header
    iaddq $-10,%rdx        # len < 10?
    jl Root            # if so, goto Root:


Loop1:    mrmovq (%rdi), %r10    # read val from src...
    mrmovq 8(%rdi), %r11    # read val from src...
    rmmovq %r10, (%rsi)    # ...and store it to dst
    andq %r10, %r10        # val <= 0?
    jle Loop2        # if so, goto Loop2:
    iaddq $0x1, %rax        # count++
Loop2:    mrmovq 16(%rdi), %r10    # read val from src...
    rmmovq %r11, 8(%rsi)    # ...and store it to dst
    andq %r11, %r11        # val <= 0?
    jle Loop3        # if so, goto Loop3:
    iaddq $0x1, %rax        # count++
Loop3:    mrmovq 24(%rdi), %r11    # read val from src...
    rmmovq %r10, 16(%rsi)    # ...and store it to dst
    andq %r10, %r10        # val <= 0?
    jle Loop4        # if so, goto Loop4:
    iaddq $0x1, %rax        # count++
Loop4:    mrmovq 32(%rdi), %r10    # read val from src...
    rmmovq %r11, 24(%rsi)    # ...and store it to dst
    andq %r11, %r11        # val <= 0?
    jle Loop5        # if so, goto Loop5:
    iaddq $0x1, %rax        # count++
Loop5:    mrmovq 40(%rdi), %r11    # read val from src...
    rmmovq %r10, 32(%rsi)    # ...and store it to dst
    andq %r10, %r10        # val <= 0?
    jle Loop6        # if so, goto Loop6:
    iaddq $0x1, %rax        # count++
Loop6:    mrmovq 48(%rdi), %r10    # read val from src...
    rmmovq %r11, 40(%rsi)    # ...and store it to dst
    andq %r11, %r11        # val <= 0?
    jle Loop7        # if so, goto Loop7:
    iaddq $0x1, %rax        # count++
Loop7:    mrmovq 56(%rdi), %r11    # read val from src...
    rmmovq %r10, 48(%rsi)    # ...and store it to dst
    andq %r10, %r10        # val <= 0?
    jle Loop8        # if so, goto Loop8:
    iaddq $0x1, %rax        # count++
Loop8:    mrmovq 64(%rdi), %r10    # read val from src...
    rmmovq %r11, 56(%rsi)    # ...and store it to dst
    andq %r11, %r11        # val <= 0?
    jle Loop9        # if so, goto Loop9:
    iaddq $0x1, %rax        # count++
Loop9:    mrmovq 72(%rdi), %r11    # read val from src...
    rmmovq %r10, 64(%rsi)    # ...and store it to dst
    andq %r10, %r10        # val <= 0?
    jle Loop10        # if so, goto Loop10:
    iaddq $0x1, %rax        # count++
Loop10:    #mrmovq 64(%rdi), %r10    # read val from src...
    rmmovq %r11, 72(%rsi)    # ...and store it to dst
    andq %r11, %r11        # val <= 0?
    jle Loop        # if so, goto Loop:
    iaddq $0x1, %rax        # count++



Loop:
    iaddq $0x50, %rdi    # src++
    iaddq $0x50, %rsi    # dst++
    iaddq $-10,%rdx        # len >= 10?
    jge Loop1        # if so, goto Loop1:
Root:
    iaddq    $7,%rdx        # len <= 3
    jl    Left
    jg    Right    
    je    Remain3        # len == 3 Middle
    

Left:
    iaddq    $2,%rdx        # len == 1
    je    Remain1
    iaddq    $-1,%rdx    # len == 2
    je    Remain2
    ret            # len == 0 
Right:
    iaddq    $-3,%rdx    # len <= 6 
    jg    RightRight
    je    Remain6        # len == 6
    iaddq    $1,%rdx        # RightLeft
    je    Remain5        # len == 5
    jmp    Remain4        # len == 4
    
RightRight:
    iaddq    $-2,%rdx
    jl    Remain7
    je    Remain8

Remain9:
    mrmovq 64(%rdi), %r11    # read val from src...
    rmmovq %r11, 64(%rsi)
    andq %r11, %r11        # val <= 0?

Remain8:
    mrmovq 56(%rdi), %r11    # read val from src...
    jle Remain82        # if so, goto Npos:
    iaddq $0x1, %rax        # count++

Remain82:
    
    rmmovq %r11, 56(%rsi)
    andq %r11, %r11        # val <= 0?
Remain7:
    mrmovq 48(%rdi), %r11    # read val from src...
    jle Remain72        # if so, goto Npos:
    iaddq $0x1, %rax        # count++
Remain72:
        
    rmmovq %r11, 48(%rsi)
    andq %r11, %r11        # val <= 0?

Remain6:
    mrmovq 40(%rdi), %r11    # read val from src...
    jle Remain62        # if so, goto Npos:
    iaddq $0x1, %rax        # count++
Remain62:
        
    rmmovq %r11, 40(%rsi)
    andq %r11, %r11        # val <= 0?
Remain5:
    mrmovq 32(%rdi), %r11    # read val from src...
    jle Remain52        # if so, goto Npos:
    iaddq $0x1, %rax        # count++
Remain52:
        
    rmmovq %r11, 32(%rsi)
    andq %r11, %r11        # val <= 0?
Remain4:
    mrmovq 24(%rdi), %r11    # read val from src...
    jle Remain42    # if so, goto Npos:
    iaddq $0x1, %rax        # count++
Remain42:

    rmmovq %r11, 24(%rsi)
    andq %r11, %r11        # val <= 0?
Remain3:
    mrmovq 16(%rdi), %r11    # read val from src...
    jle Remain32        # if so, goto Npos:
    iaddq $0x1, %rax        # count++
Remain32:

    rmmovq %r11, 16(%rsi)
    andq %r11, %r11        # val <= 0?
Remain2:
    mrmovq 8(%rdi), %r11    # read val from src...
    jle Remain22        # if so, goto Npos:
    iaddq $0x1, %rax        # count++
Remain22:

    rmmovq %r11, 8(%rsi)
    andq %r11, %r11        # val <= 0?
Remain1:
    mrmovq (%rdi), %r11    # read val from src...
    jle Remain12        # if so, goto Npos:
    iaddq $0x1, %rax        # count++
Remain12:
    
    rmmovq %r11, (%rsi)
    andq %r11, %r11        # val <= 0?
    jle Done        # if so, goto Npos:
    iaddq $0x1, %rax        # count++


    




##################################################################
# Do not modify the following section of code
# Function epilogue.
Done:
    ret
##################################################################
# Keep the following label at the end of your function
End:
#/* $end ncopy-ys */
#https://zhuanlan.zhihu.com/p/77072339

思路参考:
https://zhuanlan.zhihu.com/p/33751460
https://zhuanlan.zhihu.com/p/77072339
csapp 5.8节