1 #ifndef _ASMMEMFUNCS_H_
2 #define _ASMMEMFUNCS_H_
4 #define memset32(_dst, _c, _count) \
5 ({ uint32_t *dst = (_dst); uint32_t c = (_c); int count = (_count); uint32_t dummy0, dummy1, dummy2; \
6 __asm__ __volatile__ ( \
7 " cmp %[count], #4\n" \
9 " mov %[dummy0], %[c]\n" \
11 " strne %[c], [%[dst]], #4\n" \
12 " subne %[count], %[count], #1\n" \
14 " stmneia %[dst]!, {%[dummy0], %[c]}\n" \
15 " subne %[count], %[count], #2\n" \
16 " mov %[dummy1], %[c]\n" \
17 " mov %[dummy2], %[c]\n" \
19 " subs %[count], %[count], #4\n" \
20 " stmgeia %[dst]!, {%[dummy0], %[dummy1], %[dummy2], %[c]}\n" \
22 " add %[count], %[count], #4\n" \
24 " subs %[count], %[count], #1\n" \
25 " strge %[c], [%[dst]], #4\n" \
26 " subs %[count], %[count], #1\n" \
27 " strge %[c], [%[dst]], #4\n" \
28 " subs %[count], %[count], #1\n" \
29 " strge %[c], [%[dst]], #4\n" \
31 : [dst] "+&r" (dst), [count] "+&r" (count), [dummy0] "=&r" (dummy0), [dummy1] "=&r" (dummy1), [dummy2] "=&r" (dummy2), [c] "+&r" (c) \
37 #define memset16(_dst, _c, _count) \
38 ({ uint16_t *dst = (_dst); uint16_t c = (_c); int count = (_count); uint32_t dummy0, dummy1, dummy2; \
39 __asm__ __volatile__ ( \
40 " cmp %[count], #2\n" \
42 /* Alignment is known to be at least 16-bit */ \
44 " strneh %[c], [%[dst]], #2\n" \
45 " subne %[count], %[count], #1\n" \
46 /* Now we are 32-bit aligned (need to upgrade 'c' to 32-bit )*/ \
47 " orr %[c], %[c], %[c], asl #16\n" \
48 " mov %[dummy0], %[c]\n" \
49 " cmp %[count], #8\n" \
52 " strne %[c], [%[dst]], #4\n" \
53 " subne %[count], %[count], #2\n" \
55 " stmneia %[dst]!, {%[dummy0], %[c]}\n" \
56 " subne %[count], %[count], #4\n" \
57 /* Now we are 128-bit aligned */ \
58 " mov %[dummy1], %[c]\n" \
59 " mov %[dummy2], %[c]\n" \
60 "1:\n" /* Copy 4 32-bit values per loop iteration */ \
61 " subs %[count], %[count], #8\n" \
62 " stmgeia %[dst]!, {%[dummy0], %[dummy1], %[dummy2], %[c]}\n" \
64 " add %[count], %[count], #8\n" \
65 "2:\n" /* Copy up to 3 remaining 32-bit values */ \
66 " tst %[count], #4\n" \
67 " stmneia %[dst]!, {%[dummy0], %[c]}\n" \
68 " tst %[count], #2\n" \
69 " strne %[c], [%[dst]], #4\n" \
70 " and %[count], %[count], #1\n" \
71 "3:\n" /* Copy up to 1 remaining 16-bit value */ \
72 " subs %[count], %[count], #1\n" \
73 " strgeh %[c], [%[dst]], #2\n" \
75 : [dst] "+&r" (dst), [count] "+&r" (count), [dummy0] "=&r" (dummy0), [dummy1] "=&r" (dummy1), [dummy2] "=&r" (dummy2), [c] "+&r" (c) \
81 #define memcpy32(_dst, _src, _count) \
82 ({ uint32_t *dst = (_dst); uint32_t *src = (_src); int count = (_count); \
83 __asm__ __volatile__ ( \
84 " cmp %[count], #4\n" \
87 " ldrne r4, [%[src]], #4\n" \
88 " strne r4, [%[dst]], #4\n" \
89 " subne %[count], %[count], #1\n" \
91 " ldmneia %[src]!, {r4-r5}\n" \
92 " stmneia %[dst]!, {r4-r5}\n" \
93 " subne %[count], %[count], #2\n" \
95 " subs %[count], %[count], #4\n" \
96 " ldmgeia %[src]!, {r4-r7}\n" \
97 " stmgeia %[dst]!, {r4-r7}\n" \
99 " add %[count], %[count], #4\n" \
101 " tst %[count], #2\n" \
102 " ldmneia %[src]!, {r4-r5}\n" \
103 " stmneia %[dst]!, {r4-r5}\n" \
104 " tst %[count], #1\n" \
105 " ldrne r4, [%[src]], #4\n" \
106 " strne r4, [%[dst]], #4\n" \
108 : [dst] "+&r" (dst), [src] "+&r" (src), [count] "+&r" (count) \
110 : "r4", "r5", "r6", "r7", "cc", "memory" \
114 #define memcpy16(_dst, _src, _count) \
115 ({ uint16_t *dst = (_dst); uint16_t *src = (_src); int count = (_count); uint32_t dummy0; \
116 __asm__ __volatile__ ( \
117 " cmp %[count], #2\n" \
119 /* Alignment is known to be at least 16-bit */ \
120 " tst %[dst], #2\n" \
121 " ldrneh r4, [%[src]], #2\n" \
122 " strneh r4, [%[dst]], #2\n" \
123 " subne %[count], %[count], #1\n" \
124 /* Now destination address is 32-bit aligned, still need to check whether */ \
125 /* source is 32-bit aligned or not */ \
126 " tst %[src], #2\n" \
128 /* Both destination and source are 32-bit aligned */ \
129 " cmp %[count], #8\n" \
131 " tst %[dst], #4\n" \
132 " ldrne r4, [%[src]], #4\n" \
133 " strne r4, [%[dst]], #4\n" \
134 " subne %[count], %[count], #2\n" \
135 " tst %[dst], #8\n" \
136 " ldmneia %[src]!, {r4-r5}\n" \
137 " stmneia %[dst]!, {r4-r5}\n" \
138 " subne %[count], %[count], #4\n" \
139 /* Destination address is 128-bit aligned, source address is 32-bit aligned */ \
140 "1: subs %[count], %[count], #8\n" \
141 " ldmgeia %[src]!, {r4-r7}\n" \
142 " stmgeia %[dst]!, {r4-r7}\n" \
144 " add %[count], %[count], #8\n" \
145 /* Copy up to 3 remaining aligned 32-bit values */ \
146 "2: tst %[count], #4\n" \
147 " ldmneia %[src]!, {r4-r5}\n" \
148 " stmneia %[dst]!, {r4-r5}\n" \
149 " tst %[count], #2\n" \
150 " ldrne r4, [%[src]], #4\n" \
151 " strne r4, [%[dst]], #4\n" \
152 " and %[count], %[count], #1\n" \
154 /* Destination is 32-bit aligned, but source is only 16-bit aligned */ \
155 "3: cmp %[count], #8\n" \
157 " tst %[dst], #4\n" \
158 " ldrneh r4, [%[src]], #2\n" \
159 " ldrneh r5, [%[src]], #2\n" \
160 " orrne r4, r4, r5, asl #16\n" \
161 " strne r4, [%[dst]], #4\n" \
162 " subne %[count], %[count], #2\n" \
163 " tst %[dst], #8\n" \
164 " ldrneh r4, [%[src]], #2\n" \
165 " ldrne r5, [%[src]], #4\n" \
166 " ldrneh r6, [%[src]], #2\n" \
167 " orrne r4, r4, r5, asl #16\n" \
168 " movne r5, r5, lsr #16\n" \
169 " orrne r5, r5, r6, asl #16\n" \
170 " stmneia %[dst]!, {r4-r5}\n" \
171 " subne %[count], %[count], #4\n" \
172 /* Destination is 128-bit aligned, but source is only 16-bit aligned */ \
173 "4: subs %[count], %[count], #8\n" \
174 " ldrgeh r4, [%[src]], #2\n" \
175 " ldmgeia %[src]!, {r5-r7}\n" \
176 " ldrgeh %[dummy0], [%[src]], #2\n" \
177 " orrge r4, r4, r5, asl #16\n" \
178 " movge r5, r5, lsr #16\n" \
179 " orrge r5, r5, r6, asl #16\n" \
180 " movge r6, r6, lsr #16\n" \
181 " orrge r6, r6, r7, asl #16\n" \
182 " movge r7, r7, lsr #16\n" \
183 " orrge r7, r7, %[dummy0], asl #16\n" \
184 " stmgeia %[dst]!, {r4-r7}\n" \
186 " add %[count], %[count], #8\n" \
187 /* Copy up to 6 remaining 16-bit values (to 32-bit aligned destination) */ \
188 "5: subs %[count], %[count], #2\n" \
189 " ldrgeh r4, [%[src]], #2\n" \
190 " ldrgeh r5, [%[src]], #2\n" \
191 " orrge r4, r4, r5, asl #16\n" \
192 " strge r4, [%[dst]], #4\n" \
194 " add %[count], %[count], #2\n" \
195 /* Copy the last remaining 16-bit value if any */ \
196 "6: subs %[count], %[count], #1\n" \
197 " ldrgeh r4, [%[src]], #2\n" \
198 " strgeh r4, [%[dst]], #2\n" \
200 : [dst] "+&r" (dst), [src] "+&r" (src), [count] "+&r" (count), [dummy0] "=&r" (dummy0) \
202 : "r4", "r5", "r6", "r7", "cc", "memory" \