/*
This Source Code Form is subject to the terms of the Mozilla Public
License, v. 2.0. If a copy of the MPL was not distributed with this file,
You can obtain one at http://mozilla.org/MPL/2.0/.

Copyright (c) 2007-2014, Marvell International Ltd.

Alternatively, this software may be distributed under the terms of the GNU
General Public License Version 2, and any use shall comply with the terms and
conditions of the GPL.  A copy of the GPL is available at
http://www.gnu.org/licenses/old-licenses/gpl-2.0.html

THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE
IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
ARE EXPRESSLY DISCLAIMED.  The GPL license provides additional details about
this warranty disclaimer.
*/
 
#include <string.h>

void* memset(void *dest, int val, size_t len)
{
	void *ret;
	ret	= dest;

/*	dst in r0, val in r1, len in r2 */

	asm volatile 
	(
/* If the size is small, or either SRC or DST is unaligned,
	then punt into the byte set loop.  This should be rare.  */

"	mov	r10, r2				\n\t" // copy len to r10

"	and	r3, r0, #3			\n\t" // Check for word alignment
"	cmp	r3, #0				\n\t"
"	bne	_byteset			\n\t" 

"	mov	r2, r1				\n\t" // copy val to r2	4th byte
"	orr	r2, r2, r1, LSL #8	\n\t" // or in val to r2 3rd byte
"	orr	r2, r2, r1, LSL #16	\n\t" // or in val to r2 2nd byte
"	orr	r2, r2, r1, LSL #24	\n\t" // or in val to r2 1st byte
"	mov	r3, r2				\n\t" // copy r2 to r3
"	mov	r4, r2				\n\t" // copy r2 to r4
"	mov	r5, r2				\n\t" // copy r2 to r5

"	cmp	r10, #31			\n\t" // Check len > 8 longs
"	bls	_4wordset			\n\t"
"	orr	r9, r0, r1			\n\t"
"	and	r9, r9, #7			\n\t" // Check for double word alignment
"	cmp	r9, #0				\n\t"
"	bne	_wordset	 		\n\t"

"	.global _8wordset		\n\t"
"_8wordset:					\n\t"
"	mov	r6, r2				\n\t" // copy r2 to r6
"	mov	r7, r2				\n\t" // copy r2 to r7
"	mov	r8, r2				\n\t" // copy r2 to r8
"	mov	r9, r2				\n\t" // copy r2 to r9
"	cmp	r10, #31			\n\t" // Check len > 8 words
"	bls	_4wordset			\n\t"
"	stmia	r0!, {r2-r9}	\n\t" // store 8 words
"	sub	r10, r10, #32		\n\t" // decr len by 32
"	b	_8wordset			\n\t" // jump back to top of loop

"	.global _4wordset		\n\t"
"_4wordset:					\n\t"
"	cmp	r10, #0				\n\t"
"	beq	_done				\n\t"
"	cmp	r10, #15			\n\t" // Check len > 4 words
"	bls	_wordset			\n\t"
"	stmia	r0!, {r2-r5}	\n\t" // store 4 words
"	sub	r10, r10, #16		\n\t" // decr len by 16
"	b	_4wordset			\n\t" // jump back to top of loop

"	.global _wordset		\n\t"
"_wordset:					\n\t"
"	cmp	r10, #0				\n\t"
"	beq	_done				\n\t"
"	cmp	r10, #3				\n\t" // Check len > 1 words
"	bls	_byteset			\n\t"
"	str	r2, [r0]			\n\t" // store 1 words
"	add	r0, r0, #4			\n\t" // add 4 to dst
"	sub	r10, r10, #4		\n\t" // decr len by 4
"	b	_wordset			\n\t" // jump back to top of loop

"	.global _byteset		\n\t"
"_byteset:					\n\t"
"	cmp	r10, #0				\n\t" // Check for any left in len
"	beq	_done				\n\t"
"	strb	r1, [r0]		\n\t" // store 1 byte
"	add	r0, r0, #1			\n\t" // bump the addr
"	sub	r10, r10, #1		\n\t" // dec len
"	b	_byteset			\n\t"

"_done:						\n\t"

		::: "r4", "r5","r6","r7","r8","r9","r10","ip","lr"
	);
	return ret;
}
/* Used by vim and some versions of vi: set tabstop=4 shiftwidth=4: */
