sha1-x86_64.s 64.3 KB
Newer Older
3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642
	movl	%ecx,%edi
	shldl	$5,%ecx,%ecx
	xorl	%ebp,%esi
	addl	%ecx,%ebx
	shrdl	$7,%edx,%edx
	addl	%esi,%ebx
	vpor	%xmm8,%xmm5,%xmm5
	addl	28(%rsp),%eax
	xorl	%ebp,%edi
	vmovdqa	%xmm5,%xmm9
	movl	%ebx,%esi
	shldl	$5,%ebx,%ebx
	xorl	%edx,%edi
	addl	%ebx,%eax
	shrdl	$7,%ecx,%ecx
	addl	%edi,%eax
	vpalignr	$8,%xmm4,%xmm5,%xmm9
	vpxor	%xmm2,%xmm6,%xmm6
	movl	%ecx,%edi
	xorl	%edx,%ecx
	addl	32(%rsp),%ebp
	andl	%edx,%edi
	vpxor	%xmm7,%xmm6,%xmm6
	andl	%ecx,%esi
	shrdl	$7,%ebx,%ebx
	vmovdqa	%xmm10,%xmm8
	vpaddd	%xmm5,%xmm10,%xmm10
	addl	%edi,%ebp
	movl	%eax,%edi
	vpxor	%xmm9,%xmm6,%xmm6
	shldl	$5,%eax,%eax
	addl	%esi,%ebp
	xorl	%edx,%ecx
	addl	%eax,%ebp
	vpsrld	$30,%xmm6,%xmm9
	vmovdqa	%xmm10,16(%rsp)
	movl	%ebx,%esi
	xorl	%ecx,%ebx
	addl	36(%rsp),%edx
	andl	%ecx,%esi
	vpslld	$2,%xmm6,%xmm6
	andl	%ebx,%edi
	shrdl	$7,%eax,%eax
	addl	%esi,%edx
	movl	%ebp,%esi
	shldl	$5,%ebp,%ebp
	addl	%edi,%edx
	xorl	%ecx,%ebx
	addl	%ebp,%edx
	vpor	%xmm9,%xmm6,%xmm6
	movl	%eax,%edi
	xorl	%ebx,%eax
	vmovdqa	%xmm6,%xmm10
	addl	40(%rsp),%ecx
	andl	%ebx,%edi
	andl	%eax,%esi
	shrdl	$7,%ebp,%ebp
	addl	%edi,%ecx
	movl	%edx,%edi
	shldl	$5,%edx,%edx
	addl	%esi,%ecx
	xorl	%ebx,%eax
	addl	%edx,%ecx
	movl	%ebp,%esi
	xorl	%eax,%ebp
	addl	44(%rsp),%ebx
	andl	%eax,%esi
	andl	%ebp,%edi
	shrdl	$7,%edx,%edx
	addl	%esi,%ebx
	movl	%ecx,%esi
	shldl	$5,%ecx,%ecx
	addl	%edi,%ebx
	xorl	%eax,%ebp
	addl	%ecx,%ebx
	vpalignr	$8,%xmm5,%xmm6,%xmm10
	vpxor	%xmm3,%xmm7,%xmm7
	movl	%edx,%edi
	xorl	%ebp,%edx
	addl	48(%rsp),%eax
	andl	%ebp,%edi
	vpxor	%xmm0,%xmm7,%xmm7
	andl	%edx,%esi
	shrdl	$7,%ecx,%ecx
	vmovdqa	48(%r11),%xmm9
	vpaddd	%xmm6,%xmm8,%xmm8
	addl	%edi,%eax
	movl	%ebx,%edi
	vpxor	%xmm10,%xmm7,%xmm7
	shldl	$5,%ebx,%ebx
	addl	%esi,%eax
	xorl	%ebp,%edx
	addl	%ebx,%eax
	vpsrld	$30,%xmm7,%xmm10
	vmovdqa	%xmm8,32(%rsp)
	movl	%ecx,%esi
	xorl	%edx,%ecx
	addl	52(%rsp),%ebp
	andl	%edx,%esi
	vpslld	$2,%xmm7,%xmm7
	andl	%ecx,%edi
	shrdl	$7,%ebx,%ebx
	addl	%esi,%ebp
	movl	%eax,%esi
	shldl	$5,%eax,%eax
	addl	%edi,%ebp
	xorl	%edx,%ecx
	addl	%eax,%ebp
	vpor	%xmm10,%xmm7,%xmm7
	movl	%ebx,%edi
	xorl	%ecx,%ebx
	vmovdqa	%xmm7,%xmm8
	addl	56(%rsp),%edx
	andl	%ecx,%edi
	andl	%ebx,%esi
	shrdl	$7,%eax,%eax
	addl	%edi,%edx
	movl	%ebp,%edi
	shldl	$5,%ebp,%ebp
	addl	%esi,%edx
	xorl	%ecx,%ebx
	addl	%ebp,%edx
	movl	%eax,%esi
	xorl	%ebx,%eax
	addl	60(%rsp),%ecx
	andl	%ebx,%esi
	andl	%eax,%edi
	shrdl	$7,%ebp,%ebp
	addl	%esi,%ecx
	movl	%edx,%esi
	shldl	$5,%edx,%edx
	addl	%edi,%ecx
	xorl	%ebx,%eax
	addl	%edx,%ecx
	vpalignr	$8,%xmm6,%xmm7,%xmm8
	vpxor	%xmm4,%xmm0,%xmm0
	movl	%ebp,%edi
	xorl	%eax,%ebp
	addl	0(%rsp),%ebx
	andl	%eax,%edi
	vpxor	%xmm1,%xmm0,%xmm0
	andl	%ebp,%esi
	shrdl	$7,%edx,%edx
	vmovdqa	%xmm9,%xmm10
	vpaddd	%xmm7,%xmm9,%xmm9
	addl	%edi,%ebx
	movl	%ecx,%edi
	vpxor	%xmm8,%xmm0,%xmm0
	shldl	$5,%ecx,%ecx
	addl	%esi,%ebx
	xorl	%eax,%ebp
	addl	%ecx,%ebx
	vpsrld	$30,%xmm0,%xmm8
	vmovdqa	%xmm9,48(%rsp)
	movl	%edx,%esi
	xorl	%ebp,%edx
	addl	4(%rsp),%eax
	andl	%ebp,%esi
	vpslld	$2,%xmm0,%xmm0
	andl	%edx,%edi
	shrdl	$7,%ecx,%ecx
	addl	%esi,%eax
	movl	%ebx,%esi
	shldl	$5,%ebx,%ebx
	addl	%edi,%eax
	xorl	%ebp,%edx
	addl	%ebx,%eax
	vpor	%xmm8,%xmm0,%xmm0
	movl	%ecx,%edi
	xorl	%edx,%ecx
	vmovdqa	%xmm0,%xmm9
	addl	8(%rsp),%ebp
	andl	%edx,%edi
	andl	%ecx,%esi
	shrdl	$7,%ebx,%ebx
	addl	%edi,%ebp
	movl	%eax,%edi
	shldl	$5,%eax,%eax
	addl	%esi,%ebp
	xorl	%edx,%ecx
	addl	%eax,%ebp
	movl	%ebx,%esi
	xorl	%ecx,%ebx
	addl	12(%rsp),%edx
	andl	%ecx,%esi
	andl	%ebx,%edi
	shrdl	$7,%eax,%eax
	addl	%esi,%edx
	movl	%ebp,%esi
	shldl	$5,%ebp,%ebp
	addl	%edi,%edx
	xorl	%ecx,%ebx
	addl	%ebp,%edx
	vpalignr	$8,%xmm7,%xmm0,%xmm9
	vpxor	%xmm5,%xmm1,%xmm1
	movl	%eax,%edi
	xorl	%ebx,%eax
	addl	16(%rsp),%ecx
	andl	%ebx,%edi
	vpxor	%xmm2,%xmm1,%xmm1
	andl	%eax,%esi
	shrdl	$7,%ebp,%ebp
	vmovdqa	%xmm10,%xmm8
	vpaddd	%xmm0,%xmm10,%xmm10
	addl	%edi,%ecx
	movl	%edx,%edi
	vpxor	%xmm9,%xmm1,%xmm1
	shldl	$5,%edx,%edx
	addl	%esi,%ecx
	xorl	%ebx,%eax
	addl	%edx,%ecx
	vpsrld	$30,%xmm1,%xmm9
	vmovdqa	%xmm10,0(%rsp)
	movl	%ebp,%esi
	xorl	%eax,%ebp
	addl	20(%rsp),%ebx
	andl	%eax,%esi
	vpslld	$2,%xmm1,%xmm1
	andl	%ebp,%edi
	shrdl	$7,%edx,%edx
	addl	%esi,%ebx
	movl	%ecx,%esi
	shldl	$5,%ecx,%ecx
	addl	%edi,%ebx
	xorl	%eax,%ebp
	addl	%ecx,%ebx
	vpor	%xmm9,%xmm1,%xmm1
	movl	%edx,%edi
	xorl	%ebp,%edx
	vmovdqa	%xmm1,%xmm10
	addl	24(%rsp),%eax
	andl	%ebp,%edi
	andl	%edx,%esi
	shrdl	$7,%ecx,%ecx
	addl	%edi,%eax
	movl	%ebx,%edi
	shldl	$5,%ebx,%ebx
	addl	%esi,%eax
	xorl	%ebp,%edx
	addl	%ebx,%eax
	movl	%ecx,%esi
	xorl	%edx,%ecx
	addl	28(%rsp),%ebp
	andl	%edx,%esi
	andl	%ecx,%edi
	shrdl	$7,%ebx,%ebx
	addl	%esi,%ebp
	movl	%eax,%esi
	shldl	$5,%eax,%eax
	addl	%edi,%ebp
	xorl	%edx,%ecx
	addl	%eax,%ebp
	vpalignr	$8,%xmm0,%xmm1,%xmm10
	vpxor	%xmm6,%xmm2,%xmm2
	movl	%ebx,%edi
	xorl	%ecx,%ebx
	addl	32(%rsp),%edx
	andl	%ecx,%edi
	vpxor	%xmm3,%xmm2,%xmm2
	andl	%ebx,%esi
	shrdl	$7,%eax,%eax
	vmovdqa	%xmm8,%xmm9
	vpaddd	%xmm1,%xmm8,%xmm8
	addl	%edi,%edx
	movl	%ebp,%edi
	vpxor	%xmm10,%xmm2,%xmm2
	shldl	$5,%ebp,%ebp
	addl	%esi,%edx
	xorl	%ecx,%ebx
	addl	%ebp,%edx
	vpsrld	$30,%xmm2,%xmm10
	vmovdqa	%xmm8,16(%rsp)
	movl	%eax,%esi
	xorl	%ebx,%eax
	addl	36(%rsp),%ecx
	andl	%ebx,%esi
	vpslld	$2,%xmm2,%xmm2
	andl	%eax,%edi
	shrdl	$7,%ebp,%ebp
	addl	%esi,%ecx
	movl	%edx,%esi
	shldl	$5,%edx,%edx
	addl	%edi,%ecx
	xorl	%ebx,%eax
	addl	%edx,%ecx
	vpor	%xmm10,%xmm2,%xmm2
	movl	%ebp,%edi
	xorl	%eax,%ebp
	vmovdqa	%xmm2,%xmm8
	addl	40(%rsp),%ebx
	andl	%eax,%edi
	andl	%ebp,%esi
	shrdl	$7,%edx,%edx
	addl	%edi,%ebx
	movl	%ecx,%edi
	shldl	$5,%ecx,%ecx
	addl	%esi,%ebx
	xorl	%eax,%ebp
	addl	%ecx,%ebx
	movl	%edx,%esi
	xorl	%ebp,%edx
	addl	44(%rsp),%eax
	andl	%ebp,%esi
	andl	%edx,%edi
	shrdl	$7,%ecx,%ecx
	addl	%esi,%eax
	movl	%ebx,%esi
	shldl	$5,%ebx,%ebx
	addl	%edi,%eax
	xorl	%ebp,%edx
	addl	%ebx,%eax
	vpalignr	$8,%xmm1,%xmm2,%xmm8
	vpxor	%xmm7,%xmm3,%xmm3
	addl	48(%rsp),%ebp
	xorl	%edx,%esi
	movl	%eax,%edi
	shldl	$5,%eax,%eax
	vpxor	%xmm4,%xmm3,%xmm3
	xorl	%ecx,%esi
	addl	%eax,%ebp
	vmovdqa	%xmm9,%xmm10
	vpaddd	%xmm2,%xmm9,%xmm9
	shrdl	$7,%ebx,%ebx
	addl	%esi,%ebp
	vpxor	%xmm8,%xmm3,%xmm3
	addl	52(%rsp),%edx
	xorl	%ecx,%edi
	movl	%ebp,%esi
	shldl	$5,%ebp,%ebp
	vpsrld	$30,%xmm3,%xmm8
	vmovdqa	%xmm9,32(%rsp)
	xorl	%ebx,%edi
	addl	%ebp,%edx
	shrdl	$7,%eax,%eax
	addl	%edi,%edx
	vpslld	$2,%xmm3,%xmm3
	addl	56(%rsp),%ecx
	xorl	%ebx,%esi
	movl	%edx,%edi
	shldl	$5,%edx,%edx
	xorl	%eax,%esi
	addl	%edx,%ecx
	shrdl	$7,%ebp,%ebp
	addl	%esi,%ecx
	vpor	%xmm8,%xmm3,%xmm3
	addl	60(%rsp),%ebx
	xorl	%eax,%edi
	movl	%ecx,%esi
	shldl	$5,%ecx,%ecx
	xorl	%ebp,%edi
	addl	%ecx,%ebx
	shrdl	$7,%edx,%edx
	addl	%edi,%ebx
	addl	0(%rsp),%eax
	vpaddd	%xmm3,%xmm10,%xmm10
	xorl	%ebp,%esi
	movl	%ebx,%edi
	shldl	$5,%ebx,%ebx
	xorl	%edx,%esi
	movdqa	%xmm10,48(%rsp)
	addl	%ebx,%eax
	shrdl	$7,%ecx,%ecx
	addl	%esi,%eax
	addl	4(%rsp),%ebp
	xorl	%edx,%edi
	movl	%eax,%esi
	shldl	$5,%eax,%eax
	xorl	%ecx,%edi
	addl	%eax,%ebp
	shrdl	$7,%ebx,%ebx
	addl	%edi,%ebp
	addl	8(%rsp),%edx
	xorl	%ecx,%esi
	movl	%ebp,%edi
	shldl	$5,%ebp,%ebp
	xorl	%ebx,%esi
	addl	%ebp,%edx
	shrdl	$7,%eax,%eax
	addl	%esi,%edx
	addl	12(%rsp),%ecx
	xorl	%ebx,%edi
	movl	%edx,%esi
	shldl	$5,%edx,%edx
	xorl	%eax,%edi
	addl	%edx,%ecx
	shrdl	$7,%ebp,%ebp
	addl	%edi,%ecx
	cmpq	%r10,%r9
	je	.Ldone_avx
	vmovdqa	64(%r11),%xmm6
	vmovdqa	0(%r11),%xmm9
	vmovdqu	0(%r9),%xmm0
	vmovdqu	16(%r9),%xmm1
	vmovdqu	32(%r9),%xmm2
	vmovdqu	48(%r9),%xmm3
	vpshufb	%xmm6,%xmm0,%xmm0
	addq	$64,%r9
	addl	16(%rsp),%ebx
	xorl	%eax,%esi
	vpshufb	%xmm6,%xmm1,%xmm1
	movl	%ecx,%edi
	shldl	$5,%ecx,%ecx
	vpaddd	%xmm9,%xmm0,%xmm4
	xorl	%ebp,%esi
	addl	%ecx,%ebx
	shrdl	$7,%edx,%edx
	addl	%esi,%ebx
	vmovdqa	%xmm4,0(%rsp)
	addl	20(%rsp),%eax
	xorl	%ebp,%edi
	movl	%ebx,%esi
	shldl	$5,%ebx,%ebx
	xorl	%edx,%edi
	addl	%ebx,%eax
	shrdl	$7,%ecx,%ecx
	addl	%edi,%eax
	addl	24(%rsp),%ebp
	xorl	%edx,%esi
	movl	%eax,%edi
	shldl	$5,%eax,%eax
	xorl	%ecx,%esi
	addl	%eax,%ebp
	shrdl	$7,%ebx,%ebx
	addl	%esi,%ebp
	addl	28(%rsp),%edx
	xorl	%ecx,%edi
	movl	%ebp,%esi
	shldl	$5,%ebp,%ebp
	xorl	%ebx,%edi
	addl	%ebp,%edx
	shrdl	$7,%eax,%eax
	addl	%edi,%edx
	addl	32(%rsp),%ecx
	xorl	%ebx,%esi
	vpshufb	%xmm6,%xmm2,%xmm2
	movl	%edx,%edi
	shldl	$5,%edx,%edx
	vpaddd	%xmm9,%xmm1,%xmm5
	xorl	%eax,%esi
	addl	%edx,%ecx
	shrdl	$7,%ebp,%ebp
	addl	%esi,%ecx
	vmovdqa	%xmm5,16(%rsp)
	addl	36(%rsp),%ebx
	xorl	%eax,%edi
	movl	%ecx,%esi
	shldl	$5,%ecx,%ecx
	xorl	%ebp,%edi
	addl	%ecx,%ebx
	shrdl	$7,%edx,%edx
	addl	%edi,%ebx
	addl	40(%rsp),%eax
	xorl	%ebp,%esi
	movl	%ebx,%edi
	shldl	$5,%ebx,%ebx
	xorl	%edx,%esi
	addl	%ebx,%eax
	shrdl	$7,%ecx,%ecx
	addl	%esi,%eax
	addl	44(%rsp),%ebp
	xorl	%edx,%edi
	movl	%eax,%esi
	shldl	$5,%eax,%eax
	xorl	%ecx,%edi
	addl	%eax,%ebp
	shrdl	$7,%ebx,%ebx
	addl	%edi,%ebp
	addl	48(%rsp),%edx
	xorl	%ecx,%esi
	vpshufb	%xmm6,%xmm3,%xmm3
	movl	%ebp,%edi
	shldl	$5,%ebp,%ebp
	vpaddd	%xmm9,%xmm2,%xmm6
	xorl	%ebx,%esi
	addl	%ebp,%edx
	shrdl	$7,%eax,%eax
	addl	%esi,%edx
	vmovdqa	%xmm6,32(%rsp)
	addl	52(%rsp),%ecx
	xorl	%ebx,%edi
	movl	%edx,%esi
	shldl	$5,%edx,%edx
	xorl	%eax,%edi
	addl	%edx,%ecx
	shrdl	$7,%ebp,%ebp
	addl	%edi,%ecx
	addl	56(%rsp),%ebx
	xorl	%eax,%esi
	movl	%ecx,%edi
	shldl	$5,%ecx,%ecx
	xorl	%ebp,%esi
	addl	%ecx,%ebx
	shrdl	$7,%edx,%edx
	addl	%esi,%ebx
	addl	60(%rsp),%eax
	xorl	%ebp,%edi
	movl	%ebx,%esi
	shldl	$5,%ebx,%ebx
	xorl	%edx,%edi
	addl	%ebx,%eax
	shrdl	$7,%ecx,%ecx
	addl	%edi,%eax
	addl	0(%r8),%eax
	addl	4(%r8),%esi
	addl	8(%r8),%ecx
	addl	12(%r8),%edx
	movl	%eax,0(%r8)
	addl	16(%r8),%ebp
	movl	%esi,4(%r8)
	movl	%esi,%ebx
	movl	%ecx,8(%r8)
	movl	%edx,12(%r8)
	movl	%ebp,16(%r8)
	jmp	.Loop_avx

.align	16
.Ldone_avx:
	addl	16(%rsp),%ebx
	xorl	%eax,%esi
	movl	%ecx,%edi
	shldl	$5,%ecx,%ecx
	xorl	%ebp,%esi
	addl	%ecx,%ebx
	shrdl	$7,%edx,%edx
	addl	%esi,%ebx
	addl	20(%rsp),%eax
	xorl	%ebp,%edi
	movl	%ebx,%esi
	shldl	$5,%ebx,%ebx
	xorl	%edx,%edi
	addl	%ebx,%eax
	shrdl	$7,%ecx,%ecx
	addl	%edi,%eax
	addl	24(%rsp),%ebp
	xorl	%edx,%esi
	movl	%eax,%edi
	shldl	$5,%eax,%eax
	xorl	%ecx,%esi
	addl	%eax,%ebp
	shrdl	$7,%ebx,%ebx
	addl	%esi,%ebp
	addl	28(%rsp),%edx
	xorl	%ecx,%edi
	movl	%ebp,%esi
	shldl	$5,%ebp,%ebp
	xorl	%ebx,%edi
	addl	%ebp,%edx
	shrdl	$7,%eax,%eax
	addl	%edi,%edx
	addl	32(%rsp),%ecx
	xorl	%ebx,%esi
	movl	%edx,%edi
	shldl	$5,%edx,%edx
	xorl	%eax,%esi
	addl	%edx,%ecx
	shrdl	$7,%ebp,%ebp
	addl	%esi,%ecx
	addl	36(%rsp),%ebx
	xorl	%eax,%edi
	movl	%ecx,%esi
	shldl	$5,%ecx,%ecx
	xorl	%ebp,%edi
	addl	%ecx,%ebx
	shrdl	$7,%edx,%edx
	addl	%edi,%ebx
	addl	40(%rsp),%eax
	xorl	%ebp,%esi
	movl	%ebx,%edi
	shldl	$5,%ebx,%ebx
	xorl	%edx,%esi
	addl	%ebx,%eax
	shrdl	$7,%ecx,%ecx
	addl	%esi,%eax
	addl	44(%rsp),%ebp
	xorl	%edx,%edi
	movl	%eax,%esi
	shldl	$5,%eax,%eax
	xorl	%ecx,%edi
	addl	%eax,%ebp
	shrdl	$7,%ebx,%ebx
	addl	%edi,%ebp
	addl	48(%rsp),%edx
	xorl	%ecx,%esi
	movl	%ebp,%edi
	shldl	$5,%ebp,%ebp
	xorl	%ebx,%esi
	addl	%ebp,%edx
	shrdl	$7,%eax,%eax
	addl	%esi,%edx
	addl	52(%rsp),%ecx
	xorl	%ebx,%edi
	movl	%edx,%esi
	shldl	$5,%edx,%edx
	xorl	%eax,%edi
	addl	%edx,%ecx
	shrdl	$7,%ebp,%ebp
	addl	%edi,%ecx
	addl	56(%rsp),%ebx
	xorl	%eax,%esi
	movl	%ecx,%edi
	shldl	$5,%ecx,%ecx
	xorl	%ebp,%esi
	addl	%ecx,%ebx
	shrdl	$7,%edx,%edx
	addl	%esi,%ebx
	addl	60(%rsp),%eax
	xorl	%ebp,%edi
	movl	%ebx,%esi
	shldl	$5,%ebx,%ebx
	xorl	%edx,%edi
	addl	%ebx,%eax
	shrdl	$7,%ecx,%ecx
	addl	%edi,%eax
	vzeroupper

	addl	0(%r8),%eax
	addl	4(%r8),%esi
	addl	8(%r8),%ecx
	movl	%eax,0(%r8)
	addl	12(%r8),%edx
	movl	%esi,4(%r8)
	addl	16(%r8),%ebp
	movl	%ecx,8(%r8)
	movl	%edx,12(%r8)
	movl	%ebp,16(%r8)
	leaq	64(%rsp),%rsi
	movq	0(%rsi),%r12
	movq	8(%rsi),%rbp
	movq	16(%rsi),%rbx
	leaq	24(%rsi),%rsp
.Lepilogue_avx:
	.byte	0xf3,0xc3
.size	sha1_block_data_order_avx,.-sha1_block_data_order_avx
.align	64
K_XX_XX:
.long	0x5a827999,0x5a827999,0x5a827999,0x5a827999	
.long	0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1	
.long	0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc	
.long	0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6	
.long	0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f	
.byte	83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align	64