[funini.com] -> [kei@sodan] -> Kernel Reading

root/arch/x86/lib/csum-copy_64.S

/* [<][>][^][v][top][bottom][index][help] */
/*
 * Copyright 2002,2003 Andi Kleen, SuSE Labs.
 *      
 * This file is subject to the terms and conditions of the GNU General Public
 * License.  See the file COPYING in the main directory of this archive
 * for more details. No warranty for anything given at all.
 */
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/errno.h>

/*
 * Checksum copy with exception handling.
 * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the 
 * destination is zeroed.
 * 
 * Input
 * rdi  source
 * rsi  destination
 * edx  len (32bit)
 * ecx  sum (32bit) 
 * r8   src_err_ptr (int)
 * r9   dst_err_ptr (int)
 *
 * Output
 * eax  64bit sum. undefined in case of exception.
 * 
 * Wrappers need to take care of valid exception sum and zeroing.                
 * They also should align source or destination to 8 bytes.
 */

        .macro source
10:
        .section __ex_table,"a"
        .align 8
        .quad 10b,.Lbad_source
        .previous
        .endm
                
        .macro dest
20:
        .section __ex_table,"a"
        .align 8
        .quad 20b,.Lbad_dest
        .previous
        .endm
                        
        .macro ignore L=.Lignore
30:
        .section __ex_table,"a"
        .align 8
        .quad 30b,\L
        .previous
        .endm
        
                                
ENTRY(csum_partial_copy_generic)
        CFI_STARTPROC
        cmpl     $3*64,%edx
        jle      .Lignore

.Lignore:               
        subq  $7*8,%rsp
        CFI_ADJUST_CFA_OFFSET 7*8
        movq  %rbx,2*8(%rsp)
        CFI_REL_OFFSET rbx, 2*8
        movq  %r12,3*8(%rsp)
        CFI_REL_OFFSET r12, 3*8
        movq  %r14,4*8(%rsp)
        CFI_REL_OFFSET r14, 4*8
        movq  %r13,5*8(%rsp)
        CFI_REL_OFFSET r13, 5*8
        movq  %rbp,6*8(%rsp)
        CFI_REL_OFFSET rbp, 6*8

        movq  %r8,(%rsp)
        movq  %r9,1*8(%rsp)
        
        movl  %ecx,%eax
        movl  %edx,%ecx

        xorl  %r9d,%r9d
        movq  %rcx,%r12

        shrq  $6,%r12
        jz    .Lhandle_tail       /* < 64 */

        clc
        
        /* main loop. clear in 64 byte blocks */
        /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
        /* r11: temp3, rdx: temp4, r12 loopcnt */
        /* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */
        .p2align 4
.Lloop:
        source
        movq  (%rdi),%rbx
        source
        movq  8(%rdi),%r8
        source
        movq  16(%rdi),%r11
        source
        movq  24(%rdi),%rdx

        source
        movq  32(%rdi),%r10
        source
        movq  40(%rdi),%rbp
        source
        movq  48(%rdi),%r14
        source
        movq  56(%rdi),%r13
                
        ignore 2f
        prefetcht0 5*64(%rdi)
2:                                                      
        adcq  %rbx,%rax
        adcq  %r8,%rax
        adcq  %r11,%rax
        adcq  %rdx,%rax
        adcq  %r10,%rax
        adcq  %rbp,%rax
        adcq  %r14,%rax
        adcq  %r13,%rax

        decl %r12d
        
        dest
        movq %rbx,(%rsi)
        dest
        movq %r8,8(%rsi)
        dest
        movq %r11,16(%rsi)
        dest
        movq %rdx,24(%rsi)

        dest
        movq %r10,32(%rsi)
        dest
        movq %rbp,40(%rsi)
        dest
        movq %r14,48(%rsi)
        dest
        movq %r13,56(%rsi)
        
3:
        
        leaq 64(%rdi),%rdi
        leaq 64(%rsi),%rsi

        jnz   .Lloop

        adcq  %r9,%rax

        /* do last upto 56 bytes */
.Lhandle_tail:
        /* ecx: count */
        movl %ecx,%r10d
        andl $63,%ecx
        shrl $3,%ecx
        jz       .Lfold
        clc
        .p2align 4
.Lloop_8:       
        source
        movq (%rdi),%rbx
        adcq %rbx,%rax
        decl %ecx
        dest
        movq %rbx,(%rsi)
        leaq 8(%rsi),%rsi /* preserve carry */
        leaq 8(%rdi),%rdi
        jnz     .Lloop_8
        adcq %r9,%rax   /* add in carry */

.Lfold:
        /* reduce checksum to 32bits */
        movl %eax,%ebx
        shrq $32,%rax
        addl %ebx,%eax
        adcl %r9d,%eax

        /* do last upto 6 bytes */      
.Lhandle_7:
        movl %r10d,%ecx
        andl $7,%ecx
        shrl $1,%ecx
        jz   .Lhandle_1
        movl $2,%edx
        xorl %ebx,%ebx
        clc  
        .p2align 4
.Lloop_1:       
        source
        movw (%rdi),%bx
        adcl %ebx,%eax
        decl %ecx
        dest
        movw %bx,(%rsi)
        leaq 2(%rdi),%rdi
        leaq 2(%rsi),%rsi
        jnz .Lloop_1
        adcl %r9d,%eax  /* add in carry */
        
        /* handle last odd byte */
.Lhandle_1:
        testl $1,%r10d
        jz    .Lende
        xorl  %ebx,%ebx
        source
        movb (%rdi),%bl
        dest
        movb %bl,(%rsi)
        addl %ebx,%eax
        adcl %r9d,%eax          /* carry */
                        
        CFI_REMEMBER_STATE
.Lende:
        movq 2*8(%rsp),%rbx
        CFI_RESTORE rbx
        movq 3*8(%rsp),%r12
        CFI_RESTORE r12
        movq 4*8(%rsp),%r14
        CFI_RESTORE r14
        movq 5*8(%rsp),%r13
        CFI_RESTORE r13
        movq 6*8(%rsp),%rbp
        CFI_RESTORE rbp
        addq $7*8,%rsp
        CFI_ADJUST_CFA_OFFSET -7*8
        ret
        CFI_RESTORE_STATE

        /* Exception handlers. Very simple, zeroing is done in the wrappers */
.Lbad_source:
        movq (%rsp),%rax
        testq %rax,%rax
        jz   .Lende
        movl $-EFAULT,(%rax)
        jmp  .Lende
        
.Lbad_dest:
        movq 8(%rsp),%rax
        testq %rax,%rax
        jz   .Lende     
        movl $-EFAULT,(%rax)
        jmp .Lende
        CFI_ENDPROC
ENDPROC(csum_partial_copy_generic)

/* [<][>][^][v][top][bottom][index][help] */

[funini.com] -> [kei@sodan] -> Kernel Reading