/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

 .syntax unified
/* BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) */
.section ".text.nndetailCryptoBignumMulAddWords"
.global nndetailCryptoBignumMulAddWords
nndetailCryptoBignumMulAddWords:
	push		{r4-r11,r14}
	
	cmp			r2,#0
	movle		r0,#0
	pople		{r4-r11,r15}

	mov			r12,r0	
	movs		r14,r2,lsr #2
	mov			r0,#0
	beq			MulAddLess4
	
	ldrd		r4,r5,[r1],#8
	ldrd		r8,r9,[r12]
MulAddLoop4:
	umaal		r8,r0,r3,r4
	ldrd		r6,r7,[r1],#8	
	umaal		r9,r0,r3,r5
	ldrd		r10,r11,[r12,#8]	
	subs		r14,r14,#1
    strd		r8,r9,[r12],#8    
    umaal		r10,r0,r3,r6
    ldrdgt  	r4,r5,[r1],#8    
    umaal		r11,r0,r3,r7
    ldrdgt  	r8,r9,[r12,#8]
	strd		r10,r11,[r12],#8	
	bgt			MulAddLoop4

MulAddLess4:
	ands		r2,r2,#3
	popeq		{r4-r11,r15}

	ldr			r8,[r12]
	ldr			r4,[r1],#4	
MulAddLoop:
	subs		r2,r2,#1
	umaal		r8,r0,r3,r4
	ldrgt		r9,[r12,#4]
	ldrgt		r5,[r1],#4	
	str			r8,[r12],#4
	popeq		{r4-r11,r15}
	subs		r2,r2,#1
	umaal		r9,r0,r3,r5
	ldrgt		r8,[r12,#4]
	ldrgt		r4,[r1],#4	
	str			r9,[r12],#4
	bgt			MulAddLoop	

	pop			{r4-r11,r15}
.size	nndetailCryptoBignumMulAddWords,.-nndetailCryptoBignumMulAddWords

/* BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) */
.section ".text.nndetailCryptoBignumMulWords"
.global nndetailCryptoBignumMulWords
nndetailCryptoBignumMulWords:
	push		{r4-r11,r14}
	
	cmp			r2,#0
	movle		r0,#0
	pople		{r4-r11,r15}
	
	mov			r12,r0	
	movs		r14,r2,lsr #2
	mov			r0,#0
	beq			MulLess4

	push		{r2}
	mov			r2,r14
	adr			r14,Zero
	
	ldrd		r4,r5,[r1],#8
	ldrd		r8,r9,[r14]
MulLoop4:
	umaal		r8,r0,r3,r4
	ldrd		r6,r7,[r1],#8	
	umaal		r9,r0,r3,r5
	ldrd		r10,r11,[r14]	
	subs		r2,r2,#1
    strd		r8,r9,[r12],#8    
    umaal		r10,r0,r3,r6
    ldrdgt		r4,r5,[r1],#8    
    umaal		r11,r0,r3,r7
    ldrdgt		r8,r9,[r14]
	strd		r10,r11,[r12],#8	
	bgt			MulLoop4

	pop			{r2}

MulLess4:
	ands		r2,r2,#3
	popeq		{r4-r11,r15}

	ldr			r4,[r1],#4	
	mov			r8,#0
MulLoop:
	subs		r2,r2,#1
	umaal		r8,r0,r3,r4
	movgt		r9,#0
	ldrgt		r5,[r1],#4	
	str			r8,[r12],#4
	popeq		{r4-r11,r15}
	subs		r2,r2,#1
	umaal		r9,r0,r3,r5
	movgt		r8,#0
	ldrgt		r4,[r1],#4	
	str			r9,[r12],#4
	bgt			MulLoop	

	pop			{r4-r11,r15}
Zero:
	.int 		0,0,0,0
.size	nndetailCryptoBignumMulWords,.-nndetailCryptoBignumMulWords


/* BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) */
.section ".text.nndetailCryptoBignumAddWords"
.global nndetailCryptoBignumAddWords
nndetailCryptoBignumAddWords:
	push		{r4-r11,r14}
	
	cmp			r3,#0
	movle		r0,#0
	pople		{r4-r11,r15}

	mov			r12,#0	
	
	movs		r14,r3,lsr #4
	beq			AddLess16	
	
AddLoop16:
	ldmia		r1!,{r4-r7}
	ldmia		r2!,{r8-r11}
	msr			CPSR_f,r12			
	adcs		r4,r4,r8
	adcs		r5,r5,r9
	adcs		r6,r6,r10
	adcs		r7,r7,r11
	stmia		r0!,{r4-r7}
	
	ldmia		r1!,{r4-r7}
	ldmia		r2!,{r8-r11}
	adcs		r4,r4,r8
	adcs		r5,r5,r9
	adcs		r6,r6,r10
	adcs		r7,r7,r11
	stmia		r0!,{r4-r7}
	
	ldmia		r1!,{r4-r7}
	ldmia		r2!,{r8-r11}
	adcs		r4,r4,r8
	adcs		r5,r5,r9
	adcs		r6,r6,r10
	adcs		r7,r7,r11
	stmia		r0!,{r4-r7}
	
	ldmia		r1!,{r4-r7}
	ldmia		r2!,{r8-r11}
	adcs		r4,r4,r8
	adcs		r5,r5,r9
	adcs		r6,r6,r10
	adcs		r7,r7,r11
	stmia		r0!,{r4-r7}
	
	mrs			r12,CPSR			
	subs		r14,r14,#1
	bgt			AddLoop16
	
AddLess16:
	ands		r3,r3,#15
	beq			AddEnd
	
	movs		r14,r3,lsr #2
	beq			AddLess4
	
AddLoop4:
	ldmia		r1!,{r4-r7}
	ldmia		r2!,{r8-r11}
	msr			CPSR_f,r12			
	adcs		r4,r4,r8
	adcs		r5,r5,r9
	adcs		r6,r6,r10
	adcs		r7,r7,r11
	stmia		r0!,{r4-r7}
	mrs			r12,CPSR			
	subs		r14,r14,#1
	bgt			AddLoop4

AddLess4:
	ands		r3,r3,#3
	beq			AddEnd

AddLoop:
	ldr			r4,[r1],#4
	ldr			r8,[r2],#4
	msr			CPSR_f,r12
	adcs		r4,r4,r8
	str			r4,[r0],#4
	mrs			r12,CPSR						
	subs		r3,r3,#1
	bgt			AddLoop		
	
AddEnd:
	msr			CPSR_f,r12
	movcs		r0,#1
	movcc		r0,#0	
	pop			{r4-r11,r15}
.size nndetailCryptoBignumAddWords,.-nndetailCryptoBignumAddWords


/* BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) */
.section ".text.nndetailCryptoBignumSubWords"
.global nndetailCryptoBignumSubWords
nndetailCryptoBignumSubWords:
	push		{r4-r11,r14}
	
	cmp			r3,#0
	movle		r0,#0
	pople		{r4-r11,r15}

	mov			r12,#0x20000000	
	
	movs		r14,r3,lsr #4
	beq			SubLess16	
	
SubLoop16:
	ldmia		r1!,{r4-r7}
	ldmia		r2!,{r8-r11}
	msr			CPSR_f,r12			
	sbcs		r4,r4,r8
	sbcs		r5,r5,r9
	sbcs		r6,r6,r10
	sbcs		r7,r7,r11
	stmia		r0!,{r4-r7}
	
	ldmia		r1!,{r4-r7}
	ldmia		r2!,{r8-r11}
	sbcs		r4,r4,r8
	sbcs		r5,r5,r9
	sbcs		r6,r6,r10
	sbcs		r7,r7,r11
	stmia		r0!,{r4-r7}
	
	ldmia		r1!,{r4-r7}
	ldmia		r2!,{r8-r11}
	sbcs		r4,r4,r8
	sbcs		r5,r5,r9
	sbcs		r6,r6,r10
	sbcs		r7,r7,r11
	stmia		r0!,{r4-r7}
	
	ldmia		r1!,{r4-r7}
	ldmia		r2!,{r8-r11}
	sbcs		r4,r4,r8
	sbcs		r5,r5,r9
	sbcs		r6,r6,r10
	sbcs		r7,r7,r11
	stmia		r0!,{r4-r7}
	
	mrs			r12,CPSR			
	subs		r14,r14,#1
	bgt			SubLoop16
	
SubLess16:
	ands		r3,r3,#15
	beq			SubEnd
	
	movs		r14,r3,lsr #2
	beq			SubLess4
	
SubLoop4:
	ldmia		r1!,{r4-r7}
	ldmia		r2!,{r8-r11}
	msr			CPSR_f,r12			
	sbcs		r4,r4,r8
	sbcs		r5,r5,r9
	sbcs		r6,r6,r10
	sbcs		r7,r7,r11
	stmia		r0!,{r4-r7}
	mrs			r12,CPSR			
	subs		r14,r14,#1
	bgt			SubLoop4

SubLess4:
	ands		r3,r3,#3
	beq			SubEnd

SubLoop:
	ldr			r4,[r1],#4
	ldr			r8,[r2],#4
	msr			CPSR_f,r12
	sbcs		r4,r4,r8
	str			r4,[r0],#4
	mrs			r12,CPSR						
	subs		r3,r3,#1
	bgt			SubLoop		
	
SubEnd:	
	msr			CPSR_f,r12
	movcc		r0,#1
	movcs		r0,#0	
	pop			{r4-r11,r15}
.size nndetailCryptoBignumSubWords,.-nndetailCryptoBignumSubWords


/* void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) */
.section ".text.nndetailCryptoBignum_mul_comba8"
.global nndetailCryptoBignum_mul_comba8
nndetailCryptoBignum_mul_comba8:
	push		{r4-r11,r14}
	
	ldrd		r6,r7,[r1]			// r6 r7 r8 r9				
	ldrd		r8,r9,[r2]			// a0 a1 b0 b1
	
	mov			r3,#0				// c1
	mov			r4,#0				// c2
	mov			r5,#0				// c3
	
	umull		r10,r11,r6,r8		// a0*b0
	umull		r12,r14,r6,r9		// a0*b1
	adds		r3,r3,r10
	adcs		r4,r4,r11
	adc			r5,r5,#0
	str			r3,[r0],#4
	mov			r3,#0
	
	umull		r10,r11,r7,r8		// a1*b0
	ldr			r6,[r1,#2*4]		// r6 r7 r8 r9	
									// a2 a1 b0 b1
	adds		r4,r4,r12
	adcs		r5,r5,r14
	adc			r3,r3,#0	
	umull		r12,r14,r6,r8		// a2*b0	
	adds		r4,r4,r10
	adcs		r5,r5,r11
	adc			r3,r3,#0
	str			r4,[r0],#4
	mov			r4,#0
	
	umull		r10,r11,r7,r9		// a1*b1					
	ldr			r6,[r1,#0*4]		// r6 r7 r8 r9	
									// a0 a1 b0 b1
	ldr			r8,[r2,#2*4]		// r6 r7 r8 r9	
									// a0 a1 b2 b1									
	adds		r5,r5,r12
	adcs		r3,r3,r14
	adc			r4,r4,#0
	
	umull		r12,r14,r6,r8		// a0*b2				
	ldr			r9,[r2,#3*4]		// r6 r7 r8 r9	
									// a0 a1 b2 b3
	adds		r5,r5,r10
	adcs		r3,r3,r11
	adc			r4,r4,#0
	
	umull		r10,r11,r6,r9		// a0*b3			
	adds		r5,r5,r12
	adcs		r3,r3,r14
	adc			r4,r4,#0
	str			r5,[r0],#4
	mov			r5,#0
	
	//------------------------
	
	umull		r12,r14,r7,r8		// a1*b2				
	ldrd		r6,r7,[r1,#2*4]		// r6 r7 r8 r9	
									// a2 a3 b2 b3
	ldrd		r8,r9,[r2,#0*4]		// r6 r7 r8 r9	
									// a2 a3 b0 b1									
	adds		r3,r3,r10
	adcs		r4,r4,r11
	adc			r5,r5,#0						
	
	umull		r10,r11,r6,r9		// a2*b1	
	adds		r3,r3,r12
	adcs		r4,r4,r14
	adc			r5,r5,#0					
	
	umull		r12,r14,r7,r8		// a3*b0			
	ldr			r6,[r1,#4*4]		// r6 r7 r8 r9	
									// a4 a3 b0 b1
	adds		r3,r3,r10
	adcs		r4,r4,r11
	adc			r5,r5,#0						
	
	umull		r10,r11,r6,r8		// a4*b0
	adds		r3,r3,r12
	adcs		r4,r4,r14
	adc			r5,r5,#0					
	str			r3,[r0],#4
	mov			r3,#0
	
	//------------------------
	
	umull		r12,r14,r7,r9		// a3*b1				
	ldrd		r6,r7,[r1,#1*4]		// r6 r7 r8 r9	
									// a1 a2 b0 b1
	ldrd		r8,r9,[r2,#2*4]		// r6 r7 r8 r9	
									// a1 a2 b2 b3
	adds		r4,r4,r10
	adcs		r5,r5,r11
	adc			r3,r3,#0						
	
	umull		r10,r11,r7,r8		// a2*b2
	adds		r4,r4,r12
	adcs		r5,r5,r14
	adc			r3,r3,#0					
		
	umull		r12,r14,r6,r9		// a1*b3			
	ldr			r7,[r1,#0*4]		// r6 r7 r8 r9	
									// a1 a0 b0 b1
	ldr			r8,[r2,#4*4]		// r6 r7 r8 r9	
									// a1 a0 b4 b3																		
	adds		r4,r4,r10
	adcs		r5,r5,r11
	adc			r3,r3,#0						
	
	umull		r10,r11,r7,r8		// a0*b4
	ldr			r9,[r2,#5*4]		// r6 r7 r8 r9	
									// a1 a0 b4 b5	
	adds		r4,r4,r12
	adcs		r5,r5,r14
	adc			r3,r3,#0	
	
	umull		r12,r14,r7,r9		// a0*b5			
	adds		r4,r4,r10
	adcs		r5,r5,r11
	adc			r3,r3,#0	
	str			r4,[r0],#4
	mov			r4,#0		
	
	//------------------------							
	
	umull		r10,r11,r6,r8		// a1*b4			
	ldrd		r6,r7,[r1,#2*4]		// r6 r7 r8 r9	
									// a2 a3 b4 b5	
	ldrd		r8,r9,[r2,#2*4]		// r6 r7 r8 r9	
									// a2 a3 b2 b3
	adds		r5,r5,r12
	adcs		r3,r3,r14
	adc			r4,r4,#0
		
	umull		r12,r14,r6,r9		// a2*b3			
	adds		r5,r5,r10
	adcs		r3,r3,r11
	adc			r4,r4,#0
	
	umull		r10,r11,r7,r8		// a3*b2			
	ldrd		r6,r7,[r1,#4*4]		// r6 r7 r8 r9	
									// a4 a5 b2 b3	
	ldrd		r8,r9,[r2,#0*4]		// r6 r7 r8 r9	
									// a4 a5 b0 b1
	adds		r5,r5,r12
	adcs		r3,r3,r14
	adc			r4,r4,#0	
	
	umull		r12,r14,r6,r9		// a4*b1			
	adds		r5,r5,r10
	adcs		r3,r3,r11
	adc			r4,r4,#0									
	
	umull		r10,r11,r7,r8		// a5*b0			
	ldr			r6,[r1,#6*4]		// r6 r7 r8 r9	
									// a6 a5 b0 b1
	adds		r5,r5,r12
	adcs		r3,r3,r14
	adc			r4,r4,#0
		
	umull		r12,r14,r6,r8		// a6*b0			
	adds		r5,r5,r10
	adcs		r3,r3,r11
	adc			r4,r4,#0		
	str			r5,[r0],#4
	mov			r5,#0					
	
	//------------------------	
	
	umull		r10,r11,r7,r9		// a5*b1			
	ldrd		r6,r7,[r1,#3*4]		// r6 r7 r8 r9	
									// a3 a4 b0 b1	
	ldrd		r8,r9,[r2,#2*4]		// r6 r7 r8 r9	
									// a3 a4 b2 b3
	adds		r3,r3,r12
	adcs		r4,r4,r14
	adc			r5,r5,#0
		
	umull		r12,r14,r7,r8		// a4*b2			
	adds		r3,r3,r10
	adcs		r4,r4,r11
	adc			r5,r5,#0
				
	umull		r10,r11,r6,r9		// a3*b3			
	ldrd		r6,r7,[r1,#1*4]		// r6 r7 r8 r9	
									// a1 a2 b2 b3	
	ldrd		r8,r9,[r2,#4*4]		// r6 r7 r8 r9	
									// a1 a2 b4 b5
	adds		r3,r3,r12
	adcs		r4,r4,r14
	adc			r5,r5,#0	
	
	umull		r12,r14,r7,r8		// a2*b4			
	adds		r3,r3,r10
	adcs		r4,r4,r11
	adc			r5,r5,#0
	
	umull		r10,r11,r6,r9		// a1*b5
	ldr			r7,[r1,#0*4]		// r6 r7 r8 r9	
									// a1 a0 b4 b5	
	ldr			r8,[r2,#6*4]		// r6 r7 r8 r9	
									// a1 a0 b6 b5
	adds		r3,r3,r12
	adcs		r4,r4,r14
	adc			r5,r5,#0	
	
	umull		r12,r14,r7,r8		// a0*b6			
	ldr			r9,[r2,#7*4]		// r6 r7 r8 r9	
									// a1 a0 b6 b7
	adds		r3,r3,r10
	adcs		r4,r4,r11
	adc			r5,r5,#0
		
	umull		r10,r11,r7,r9		// a0*b7
	adds		r3,r3,r12
	adcs		r4,r4,r14
	adc			r5,r5,#0
	str			r3,[r0],#4
	mov			r3,#0
	
	//------------------------	
	
	umull		r12,r14,r6,r8		// a1*b6
	ldrd		r6,r7,[r1,#2*4]		// r6 r7 r8 r9	
									// a2 a3 b4 b5
	ldrd		r8,r9,[r2,#4*4]		// r6 r7 r8 r9	
									// a2 a3 b4 b5	
	adds		r4,r4,r10
	adcs		r5,r5,r11
	adc			r3,r3,#0
		
	umull		r10,r11,r6,r9		// a2*b5
	adds		r4,r4,r12
	adcs		r5,r5,r14
	adc			r3,r3,#0
	
	umull		r12,r14,r7,r8		// a3*b4
	ldrd		r6,r7,[r1,#4*4]		// r6 r7 r8 r9	
									// a4 a5 b2 b3	
	ldrd		r8,r9,[r2,#2*4]		// r6 r7 r8 r9	
									// a4 a5 b2 b3	
	adds		r4,r4,r10
	adcs		r5,r5,r11
	adc			r3,r3,#0
		
	umull		r10,r11,r6,r9		// a4*b3
	adds		r4,r4,r12
	adcs		r5,r5,r14
	adc			r3,r3,#0									
	
	umull		r12,r14,r7,r8		// a5*b2
	ldrd		r6,r7,[r1,#6*4]		// r6 r7 r8 r9	
									// a6 a7 b2 b3	
	ldrd		r8,r9,[r2,#0*4]		// r6 r7 r8 r9	
									// a6 a7 b0 b1	
	adds		r4,r4,r10
	adcs		r5,r5,r11
	adc			r3,r3,#0
	
	umull		r10,r11,r6,r9		// a4*b1
	adds		r4,r4,r12
	adcs		r5,r5,r14
	adc			r3,r3,#0	
	
	umull		r12,r14,r7,r8		// a7*b0
	ldr			r8,[r2,#2*4]		// r6 r7 r8 r9	
									// a6 a7 b2 b1	
	adds		r4,r4,r10
	adcs		r5,r5,r11
	adc			r3,r3,#0
		
	umull		r10,r11,r7,r9		// a7*b1	
	adds		r4,r4,r12
	adcs		r5,r5,r14
	adc			r3,r3,#0
	str			r4,[r0],#4
	mov			r4,#0
	
	//------------------------	
	
	umull		r12,r14,r6,r8		// a6*b2																		
	ldrd		r6,r7,[r1,#4*4]		// r6 r7 r8 r9	
									// a4 a5 b2 b1
	ldrd		r8,r9,[r2,#3*4]		// r6 r7 r8 r9	
									// a4 a5 b3 b4
	adds		r5,r5,r10
	adcs		r3,r3,r11
	adc			r4,r4,#0	
	
	umull		r10,r11,r7,r8		// a5*b3	
	adds		r5,r5,r12
	adcs		r3,r3,r14
	adc			r4,r4,#0
	
	umull		r12,r14,r6,r9		// a4*b4								
	ldrd		r6,r7,[r1,#2*4]		// r6 r7 r8 r9	
									// a2 a3 b3 b4
	ldrd		r8,r9,[r2,#5*4]		// r6 r7 r8 r9	
									// a2 a3 b5 b6
	adds		r5,r5,r10
	adcs		r3,r3,r11
	adc			r4,r4,#0
		
	umull		r10,r11,r7,r8		// a3*b5									
	adds		r5,r5,r12
	adcs		r3,r3,r14
	adc			r4,r4,#0									
	
	umull		r12,r14,r6,r9		// a2*b6
	ldr			r7,[r1,#1*4]		// r6 r7 r8 r9	
									// a2 a1 b5 b6
	ldr			r8,[r2,#7*4]		// r6 r7 r8 r9	
									// a2 a1 b7 b6	
	adds		r5,r5,r10
	adcs		r3,r3,r11
	adc			r4,r4,#0
		
	umull		r10,r11,r7,r8		// a1*b7									
	ldr			r7,[r1,#3*4]		// r6 r7 r8 r9	
									// a2 a3 b7 b6
	adds		r5,r5,r12
	adcs		r3,r3,r14
	adc			r4,r4,#0
		
	umull		r12,r14,r6,r8		// a2*b7		
	adds		r5,r5,r10
	adcs		r3,r3,r11
	adc			r4,r4,#0	
	str			r5,[r0],#4
	mov			r5,#0								
	
	//------------------------	
	
	umull		r10,r11,r7,r9		// a3*b6																	
	ldrd		r6,r7,[r1,#4*4]		// r6 r7 r8 r9	
									// a4 a5 b7 b6
	ldrd		r8,r9,[r2,#4*4]		// r6 r7 r8 r9	
									// a4 a5 b4 b5
	adds		r3,r3,r12
	adcs		r4,r4,r14
	adc			r5,r5,#0
		
	umull		r12,r14,r6,r9		// a4*b5
	adds		r3,r3,r10
	adcs		r4,r4,r11
	adc			r5,r5,#0
	
	umull		r10,r11,r7,r8		// a5*b4									
	ldrd		r6,r7,[r1,#6*4]		// r6 r7 r8 r9	
									// a6 a7 b4 b5
	ldrd		r8,r9,[r2,#2*4]		// r6 r7 r8 r9	
									// a6 a7 b2 b3
	adds		r3,r3,r12
	adcs		r4,r4,r14
	adc			r5,r5,#0
		
	umull		r12,r14,r6,r9		// a6*b3
	adds		r3,r3,r10
	adcs		r4,r4,r11
	adc			r5,r5,#0									
	
	umull		r10,r11,r7,r8		// a7*b2									
	ldr			r8,[r2,#4*4]		// r6 r7 r8 r9	
									// a6 a7 b4 b3
	adds		r3,r3,r12
	adcs		r4,r4,r14
	adc			r5,r5,#0
		
	umull		r12,r14,r7,r9		// a7*b3
	adds		r3,r3,r10
	adcs		r4,r4,r11
	adc			r5,r5,#0	
	str			r3,[r0],#4
	mov			r3,#0	
	
	//------------------------
	
	umull		r10,r11,r6,r8		// a6*b4
	ldrd		r6,r7,[r1,#4*4]		// r6 r7 r8 r9	
									// a4 a5 b4 b3
	ldrd		r8,r9,[r2,#5*4]		// r6 r7 r8 r9	
									// a4 a5 b5 b6
	adds		r4,r4,r12
	adcs		r5,r5,r14
	adc			r3,r3,#0
		
	umull		r12,r14,r7,r8		// a5*b5
	adds		r4,r4,r10
	adcs		r5,r5,r11
	adc			r3,r3,#0
	
	umull		r10,r11,r6,r9		// a4*b6
	ldr			r7,[r1,#3*4]		// r6 r7 r8 r9	
									// a4 a3 b5 b6
	ldr			r8,[r2,#7*4]		// r6 r7 r8 r9	
									// a4 a3 b7 b6		
	adds		r4,r4,r12
	adcs		r5,r5,r14
	adc			r3,r3,#0
		
	umull		r12,r14,r7,r8		// a3*b7
	ldr			r7,[r1,#5*4]		// r6 r7 r8 r9	
									// a4 a5 b7 b6											
	adds		r4,r4,r10
	adcs		r5,r5,r11
	adc			r3,r3,#0
		
	umull		r10,r11,r6,r8		// a4*b7
	adds		r4,r4,r12
	adcs		r5,r5,r14
	adc			r3,r3,#0																
	str			r4,[r0],#4
	mov			r4,#0								
	
	//------------------------
	
	umull		r12,r14,r7,r9		// a5*b6	
	ldrd		r6,r7,[r1,#6*4]		// r6 r7 r8 r9	
									// a6 a7 b7 b6	
	ldrd		r8,r9,[r2,#4*4]		// r6 r7 r8 r9	
									// a6 a7 b4 b5		
	adds		r5,r5,r10
	adcs		r3,r3,r11
	adc			r4,r4,#0
	
	umull		r10,r11,r6,r9		// a6*b5		
	adds		r5,r5,r12
	adcs		r3,r3,r14
	adc			r4,r4,#0				
	
	umull		r12,r14,r7,r8		// a7*b4
	ldr			r8,[r2,#6*4]		// r6 r7 r8 r9	
									// a6 a7 b6 b5	
	adds		r5,r5,r10
	adcs		r3,r3,r11
	adc			r4,r4,#0
		
	umull		r10,r11,r7,r9		// a7*b5
	adds		r5,r5,r12
	adcs		r3,r3,r14
	adc			r4,r4,#0												
	str			r5,[r0],#4
	mov			r5,#0
		
	//------------------------
	
	umull		r12,r14,r6,r8		// a6*b6
	ldr			r6,[r1,#5*4]		// r6 r7 r8 r9	
									// a5 a7 b6 b5	
	ldr			r9,[r2,#7*4]		// r6 r7 r8 r9	
									// a5 a7 b6 b7	
	adds		r3,r3,r10
	adcs		r4,r4,r11
	adc			r5,r5,#0	
	
	umull		r10,r11,r6,r9		// a5*b7
	ldr			r6,[r1,#6*4]		// r6 r7 r8 r9	
									// a6 a7 b6 b7
	adds		r3,r3,r12
	adcs		r4,r4,r14
	adc			r5,r5,#0
		
	umull		r12,r14,r6,r9		// a6*b7
	adds		r3,r3,r10
	adcs		r4,r4,r11
	adc			r5,r5,#0																		
	str			r3,[r0],#4
	mov			r3,#0
	
	//------------------------
	
	umull		r10,r11,r7,r8		// a7*b6
	adds		r4,r4,r12
	adcs		r5,r5,r14
	adc			r3,r3,#0
	
	umull		r12,r14,r7,r9		// a7*b7
	adds		r4,r4,r10
	adcs		r5,r5,r11
	adc			r3,r3,#0
	str			r4,[r0],#4
	
	adds		r5,r5,r12
	str			r5,[r0],#4
	adcs		r3,r3,r14	
	str			r3,[r0],#4
	
	//------------------------
	
	pop			{r4-r11,r15}
.size nndetailCryptoBignum_mul_comba8,.-nndetailCryptoBignum_mul_comba8
    

/* BN_ULONG bn_sqr_comba8b(BN_ULONG *r,const BN_ULONG *a) */
.section ".text.nndetailCryptoBignum_sqr_comba8"
.global nndetailCryptoBignum_sqr_comba8
nndetailCryptoBignum_sqr_comba8:
	push		{r4-r11,r14}
	
	ldmia		r1,{r5-r9}			// r5 r6 r7 r8 r9
									// a0 a1 a2 a3 a4
										
	mov			r2,#0				// c1
	mov			r3,#0				// c2
	mov			r4,#0				// c3
		
	umull		r10,r11,r5,r5		// a0*a0
	umull		r12,r14,r6,r5		// a1*a0
	adds		r2,r2,r10
	adcs		r3,r3,r11
	adc			r4,r4,#0
	str			r2,[r0],#4
	mov			r2,#0
	
	umull		r10,r11,r6,r6		// a1*a1	
	adds		r3,r3,r12
	adcs		r4,r4,r14
	adc			r2,r2,#0
	adds		r3,r3,r12
	adcs		r4,r4,r14
	adc			r2,r2,#0
	str			r3,[r0],#4
	mov			r3,#0	
	
	umull		r12,r14,r7,r5		// a2*a0
	adds		r4,r4,r10
	adcs		r2,r2,r11
	adc			r3,r3,#0
	
	umull		r10,r11,r8,r5		// a3*a0
	adds		r4,r4,r12
	adcs		r2,r2,r14
	adc			r3,r3,#0
	adds		r4,r4,r12
	adcs		r2,r2,r14
	adc			r3,r3,#0
	str			r4,[r0],#4
	mov			r4,#0
	
	umull		r12,r14,r7,r6		// a2*a1
	adds		r2,r2,r10
	adcs		r3,r3,r11
	adc			r4,r4,#0
	adds		r2,r2,r10
	adcs		r3,r3,r11
	adc			r4,r4,#0
	
	umull		r10,r11,r7,r7		// a2*a2
	adds		r2,r2,r12
	adcs		r3,r3,r14
	adc			r4,r4,#0
	adds		r2,r2,r12
	adcs		r3,r3,r14
	adc			r4,r4,#0
	str			r2,[r0],#4
	mov			r2,#0	
	
	umull		r12,r14,r8,r6		// a3*a1
	adds		r3,r3,r10
	adcs		r4,r4,r11
	adc			r2,r2,#0	
	
	umull		r10,r11,r9,r5		// a4*a0
	ldr			r8,[r1,#5*4]		// r5 r6 r7 r8 r9
									// a0 a1 a2 a5 a4
	adds		r3,r3,r12
	adcs		r4,r4,r14
	adc			r2,r2,#0
	adds		r3,r3,r12
	adcs		r4,r4,r14
	adc			r2,r2,#0
	
	umull		r12,r14,r8,r5		// a5*a0
	adds		r3,r3,r10
	adcs		r4,r4,r11
	adc			r2,r2,#0	
	adds		r3,r3,r10
	adcs		r4,r4,r11
	adc			r2,r2,#0		
	str			r3,[r0],#4
	mov			r3,#0
		
	umull		r10,r11,r9,r6		// a4*a1
	ldr			r8,[r1,#3*4]		// r5 r6 r7 r8 r9
									// a0 a1 a2 a3 a4
	adds		r4,r4,r12
	adcs		r2,r2,r14
	adc			r3,r3,#0
	adds		r4,r4,r12
	adcs		r2,r2,r14
	adc			r3,r3,#0
	
	umull		r12,r14,r8,r7		// a3*a2
	adds		r4,r4,r10
	adcs		r2,r2,r11
	adc			r3,r3,#0
	adds		r4,r4,r10
	adcs		r2,r2,r11
	adc			r3,r3,#0
	
	umull		r10,r11,r8,r8		// a3*a3
	adds		r4,r4,r12
	adcs		r2,r2,r14
	adc			r3,r3,#0
	adds		r4,r4,r12
	adcs		r2,r2,r14
	adc			r3,r3,#0
	str			r4,[r0],#4
	mov			r4,#0
	
	umull		r12,r14,r9,r7		// a4*a2
	ldrd		r8,r9,[r1,#5*4]		// r5 r6 r7 r8 r9
									// a0 a1 a2 a5 a6
	adds		r2,r2,r10
	adcs		r3,r3,r11
	adc			r4,r4,#0
	
	umull		r10,r11,r8,r6		// a5*a1
	adds		r2,r2,r12
	adcs		r3,r3,r14
	adc			r4,r4,#0
	adds		r2,r2,r12
	adcs		r3,r3,r14
	adc			r4,r4,#0
	
	umull		r12,r14,r9,r5		// a6*a0
	ldr			r7,[r1,#7*4]		// r5 r6 r7 r8 r9
									// a0 a1 a7 a5 a6
	adds		r2,r2,r10
	adcs		r3,r3,r11
	adc			r4,r4,#0
	adds		r2,r2,r10
	adcs		r3,r3,r11
	adc			r4,r4,#0
	
	umull		r10,r11,r7,r5		// a7*a0
	adds		r2,r2,r12
	adcs		r3,r3,r14
	adc			r4,r4,#0
	adds		r2,r2,r12
	adcs		r3,r3,r14
	adc			r4,r4,#0
	str			r2,[r0],#4
	mov			r2,#0
		
	umull		r12,r14,r9,r6		// a6*a1
	ldr			r5,[r1,#2*4]		// r5 r6 r7 r8 r9
									// a2 a1 a7 a5 a6
	adds		r3,r3,r10
	adcs		r4,r4,r11
	adc			r2,r2,#0
	ldrd		r6,r7,[r1,#3*4]		// r5 r6 r7 r8 r9
									// a2 a3 a4 a5 a6	
	adds		r3,r3,r10
	adcs		r4,r4,r11
	adc			r2,r2,#0
	
	umull		r10,r11,r8,r5		// a5*a2
	adds		r3,r3,r12
	adcs		r4,r4,r14
	adc			r2,r2,#0
	adds		r3,r3,r12
	adcs		r4,r4,r14
	adc			r2,r2,#0
		
	umull		r12,r14,r7,r6		// a4*a3
	adds		r3,r3,r10
	adcs		r4,r4,r11
	adc			r2,r2,#0
	adds		r3,r3,r10
	adcs		r4,r4,r11
	adc			r2,r2,#0
	
	umull		r10,r11,r7,r7		// a4*a4
	adds		r3,r3,r12
	adcs		r4,r4,r14
	adc			r2,r2,#0
	adds		r3,r3,r12
	adcs		r4,r4,r14
	adc			r2,r2,#0
	str			r3,[r0],#4
	mov			r3,#0										
	
	umull		r12,r14,r8,r6		// a5*a3
	adds		r4,r4,r10
	adcs		r2,r2,r11
	adc			r3,r3,#0
	
	umull		r10,r11,r9,r5		// a6*a2
	ldr			r8,[r1,#1*4]		// r5 r6 r7 r8 r9
									// a2 a3 a4 a1 a6
	adds		r4,r4,r12
	adcs		r2,r2,r14
	adc			r3,r3,#0
	ldr			r9,[r1,#7*4]		// r5 r6 r7 r8 r9
									// a2 a3 a4 a1 a7	
	adds		r4,r4,r12
	adcs		r2,r2,r14
	adc			r3,r3,#0
	
	umull		r12,r14,r9,r8		// a7*a1
	adds		r4,r4,r10
	adcs		r2,r2,r11
	adc			r3,r3,#0
	adds		r4,r4,r10
	adcs		r2,r2,r11
	adc			r3,r3,#0
	
	umull		r10,r11,r9,r5		// a7*a2	
	ldrd		r8,r9,[r1,#5*4]		// r5 r6 r7 r8 r9
									// a2 a3 a4 a5 a6	
	adds		r4,r4,r12
	adcs		r2,r2,r14
	adc			r3,r3,#0									
	adds		r4,r4,r12
	adcs		r2,r2,r14
	adc			r3,r3,#0									
	str			r4,[r0],#4
	mov			r4,#0
	
	umull		r12,r14,r9,r6		// a6*a3
	adds		r2,r2,r10
	adcs		r3,r3,r11
	adc			r4,r4,#0
	adds		r2,r2,r10
	adcs		r3,r3,r11
	adc			r4,r4,#0
	
	umull		r10,r11,r8,r7		// a5*a4
	adds		r2,r2,r12
	adcs		r3,r3,r14
	adc			r4,r4,#0
	adds		r2,r2,r12
	adcs		r3,r3,r14
	adc			r4,r4,#0	
	
	umull		r12,r14,r8,r8		// a5*a5
	adds		r2,r2,r10
	adcs		r3,r3,r11
	adc			r4,r4,#0
	adds		r2,r2,r10
	adcs		r3,r3,r11
	adc			r4,r4,#0
	str			r2,[r0],#4
	mov			r2,#0
	
	umull		r10,r11,r9,r7		// a6*a4
	ldr			r5,[r1,#7*4]		// r5 r6 r7 r8 r9
									// a7 a3 a4 a5 a6	
	adds		r3,r3,r12
	adcs		r4,r4,r14
	adc			r2,r2,#0
	
	umull		r12,r14,r5,r6		// a7*a3
	adds		r3,r3,r10
	adcs		r4,r4,r11
	adc			r2,r2,#0
	adds		r3,r3,r10
	adcs		r4,r4,r11
	adc			r2,r2,#0
	
	umull		r10,r11,r5,r7		// a7*a4
	adds		r3,r3,r12
	adcs		r4,r4,r14
	adc			r2,r2,#0									
	adds		r3,r3,r12
	adcs		r4,r4,r14
	adc			r2,r2,#0	
	str			r3,[r0],#4
	mov			r3,#0	
	
	umull		r12,r14,r9,r8		// a6*a5
	adds		r4,r4,r10
	adcs		r2,r2,r11
	adc			r3,r3,#0									
	adds		r4,r4,r10
	adcs		r2,r2,r11
	adc			r3,r3,#0					
	
	umull		r10,r11,r9,r9		// a6*a6
	adds		r4,r4,r12
	adcs		r2,r2,r14
	adc			r3,r3,#0									
	adds		r4,r4,r12
	adcs		r2,r2,r14
	adc			r3,r3,#0									
	str			r4,[r0],#4
	mov			r4,#0
	
	umull		r12,r14,r5,r8		// a7*a5
	adds		r2,r2,r10
	adcs		r3,r3,r11
	adc			r4,r4,#0					
	
	umull		r10,r11,r5,r9		// a7*a6
	adds		r2,r2,r12
	adcs		r3,r3,r14
	adc			r4,r4,#0					
	adds		r2,r2,r12
	adcs		r3,r3,r14
	adc			r4,r4,#0	
	str			r2,[r0],#4
	mov			r2,#0				
	
	umull		r12,r14,r5,r5		// a7*a7
	adds		r3,r3,r10
	adcs		r4,r4,r11
	adc			r2,r2,#0					
	adds		r3,r3,r10
	adcs		r4,r4,r11
	adc			r2,r2,#0	
	str			r3,[r0],#4
	
	
	adds		r4,r4,r12
	str			r4,[r0],#4					
	adcs		r2,r2,r14
	str			r2,[r0],#4					

	pop			{r4-r11,r15}
.size nndetailCryptoBignum_sqr_comba8,.-nndetailCryptoBignum_sqr_comba8
