Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/sparc/lib/NGmemcpy.S
29519 views
1
/* SPDX-License-Identifier: GPL-2.0 */
2
/* NGmemcpy.S: Niagara optimized memcpy.
3
*
4
* Copyright (C) 2006, 2007 David S. Miller ([email protected])
5
*/
6
7
#ifdef __KERNEL__
8
#include <linux/linkage.h>
9
#include <asm/asi.h>
10
#include <asm/thread_info.h>
11
#define GLOBAL_SPARE %g7
12
#define RESTORE_ASI(TMP) \
13
wr %g0, ASI_AIUS, %asi
14
#else
15
#define GLOBAL_SPARE %g5
16
#define RESTORE_ASI(TMP) \
17
wr %g0, ASI_PNF, %asi
18
#endif
19
20
#ifdef __sparc_v9__
21
#define SAVE_AMOUNT 128
22
#else
23
#define SAVE_AMOUNT 64
24
#endif
25
26
#ifndef STORE_ASI
27
#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P
28
#endif
29
30
#ifndef EX_LD
31
#define EX_LD(x,y) x
32
#endif
33
34
#ifndef EX_ST
35
#define EX_ST(x,y) x
36
#endif
37
38
#ifndef LOAD
39
#ifndef MEMCPY_DEBUG
40
#define LOAD(type,addr,dest) type [addr], dest
41
#else
42
#define LOAD(type,addr,dest) type##a [addr] 0x80, dest
43
#endif
44
#endif
45
46
#ifndef LOAD_TWIN
47
#define LOAD_TWIN(addr_reg,dest0,dest1) \
48
ldda [addr_reg] ASI_BLK_INIT_QUAD_LDD_P, dest0
49
#endif
50
51
#ifndef STORE
52
#define STORE(type,src,addr) type src, [addr]
53
#endif
54
55
#ifndef STORE_INIT
56
#ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA
57
#define STORE_INIT(src,addr) stxa src, [addr] %asi
58
#else
59
#define STORE_INIT(src,addr) stx src, [addr + 0x00]
60
#endif
61
#endif
62
63
#ifndef FUNC_NAME
64
#define FUNC_NAME NGmemcpy
65
#endif
66
67
#ifndef PREAMBLE
68
#define PREAMBLE
69
#endif
70
71
#ifndef XCC
72
#define XCC xcc
73
#endif
74
75
.register %g2,#scratch
76
.register %g3,#scratch
77
78
.text
79
#ifndef EX_RETVAL
80
#define EX_RETVAL(x) x
81
__restore_asi:
82
wr %g0, ASI_AIUS, %asi
83
ret
84
restore
85
ENTRY(NG_ret_i2_plus_i4_plus_1)
86
ba,pt %xcc, __restore_asi
87
add %i2, %i5, %i0
88
ENDPROC(NG_ret_i2_plus_i4_plus_1)
89
ENTRY(NG_ret_i2_plus_g1)
90
ba,pt %xcc, __restore_asi
91
add %i2, %g1, %i0
92
ENDPROC(NG_ret_i2_plus_g1)
93
ENTRY(NG_ret_i2_plus_g1_minus_8)
94
sub %g1, 8, %g1
95
ba,pt %xcc, __restore_asi
96
add %i2, %g1, %i0
97
ENDPROC(NG_ret_i2_plus_g1_minus_8)
98
ENTRY(NG_ret_i2_plus_g1_minus_16)
99
sub %g1, 16, %g1
100
ba,pt %xcc, __restore_asi
101
add %i2, %g1, %i0
102
ENDPROC(NG_ret_i2_plus_g1_minus_16)
103
ENTRY(NG_ret_i2_plus_g1_minus_24)
104
sub %g1, 24, %g1
105
ba,pt %xcc, __restore_asi
106
add %i2, %g1, %i0
107
ENDPROC(NG_ret_i2_plus_g1_minus_24)
108
ENTRY(NG_ret_i2_plus_g1_minus_32)
109
sub %g1, 32, %g1
110
ba,pt %xcc, __restore_asi
111
add %i2, %g1, %i0
112
ENDPROC(NG_ret_i2_plus_g1_minus_32)
113
ENTRY(NG_ret_i2_plus_g1_minus_40)
114
sub %g1, 40, %g1
115
ba,pt %xcc, __restore_asi
116
add %i2, %g1, %i0
117
ENDPROC(NG_ret_i2_plus_g1_minus_40)
118
ENTRY(NG_ret_i2_plus_g1_minus_48)
119
sub %g1, 48, %g1
120
ba,pt %xcc, __restore_asi
121
add %i2, %g1, %i0
122
ENDPROC(NG_ret_i2_plus_g1_minus_48)
123
ENTRY(NG_ret_i2_plus_g1_minus_56)
124
sub %g1, 56, %g1
125
ba,pt %xcc, __restore_asi
126
add %i2, %g1, %i0
127
ENDPROC(NG_ret_i2_plus_g1_minus_56)
128
ENTRY(NG_ret_i2_plus_i4_plus_16)
129
add %i4, 16, %i4
130
ba,pt %xcc, __restore_asi
131
add %i2, %i4, %i0
132
ENDPROC(NG_ret_i2_plus_i4_plus_16)
133
ENTRY(NG_ret_i2_plus_i4_plus_8)
134
add %i4, 8, %i4
135
ba,pt %xcc, __restore_asi
136
add %i2, %i4, %i0
137
ENDPROC(NG_ret_i2_plus_i4_plus_8)
138
ENTRY(NG_ret_i2_plus_8)
139
ba,pt %xcc, __restore_asi
140
add %i2, 8, %i0
141
ENDPROC(NG_ret_i2_plus_8)
142
ENTRY(NG_ret_i2_plus_4)
143
ba,pt %xcc, __restore_asi
144
add %i2, 4, %i0
145
ENDPROC(NG_ret_i2_plus_4)
146
ENTRY(NG_ret_i2_plus_1)
147
ba,pt %xcc, __restore_asi
148
add %i2, 1, %i0
149
ENDPROC(NG_ret_i2_plus_1)
150
ENTRY(NG_ret_i2_plus_g1_plus_1)
151
add %g1, 1, %g1
152
ba,pt %xcc, __restore_asi
153
add %i2, %g1, %i0
154
ENDPROC(NG_ret_i2_plus_g1_plus_1)
155
ENTRY(NG_ret_i2)
156
ba,pt %xcc, __restore_asi
157
mov %i2, %i0
158
ENDPROC(NG_ret_i2)
159
ENTRY(NG_ret_i2_and_7_plus_i4)
160
and %i2, 7, %i2
161
ba,pt %xcc, __restore_asi
162
add %i2, %i4, %i0
163
ENDPROC(NG_ret_i2_and_7_plus_i4)
164
ENTRY(NG_ret_i2_and_7_plus_i4_plus_8)
165
and %i2, 7, %i2
166
add %i4, 8, %i4
167
ba,pt %xcc, __restore_asi
168
add %i2, %i4, %i0
169
ENDPROC(NG_ret_i2_and_7_plus_i4)
170
#endif
171
172
.align 64
173
174
.globl FUNC_NAME
175
.type FUNC_NAME,#function
176
FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
177
PREAMBLE
178
save %sp, -SAVE_AMOUNT, %sp
179
srlx %i2, 31, %g2
180
cmp %g2, 0
181
tne %xcc, 5
182
mov %i0, %o0
183
cmp %i2, 0
184
be,pn %XCC, 85f
185
or %o0, %i1, %i3
186
cmp %i2, 16
187
blu,a,pn %XCC, 80f
188
or %i3, %i2, %i3
189
190
/* 2 blocks (128 bytes) is the minimum we can do the block
191
* copy with. We need to ensure that we'll iterate at least
192
* once in the block copy loop. At worst we'll need to align
193
* the destination to a 64-byte boundary which can chew up
194
* to (64 - 1) bytes from the length before we perform the
195
* block copy loop.
196
*/
197
cmp %i2, (2 * 64)
198
blu,pt %XCC, 70f
199
andcc %i3, 0x7, %g0
200
201
/* %o0: dst
202
* %i1: src
203
* %i2: len (known to be >= 128)
204
*
205
* The block copy loops will use %i4/%i5,%g2/%g3 as
206
* temporaries while copying the data.
207
*/
208
209
LOAD(prefetch, %i1, #one_read)
210
wr %g0, STORE_ASI, %asi
211
212
/* Align destination on 64-byte boundary. */
213
andcc %o0, (64 - 1), %i4
214
be,pt %XCC, 2f
215
sub %i4, 64, %i4
216
sub %g0, %i4, %i4 ! bytes to align dst
217
sub %i2, %i4, %i2
218
1: subcc %i4, 1, %i4
219
EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_i4_plus_1)
220
EX_ST(STORE(stb, %g1, %o0), NG_ret_i2_plus_i4_plus_1)
221
add %i1, 1, %i1
222
bne,pt %XCC, 1b
223
add %o0, 1, %o0
224
225
/* If the source is on a 16-byte boundary we can do
226
* the direct block copy loop. If it is 8-byte aligned
227
* we can do the 16-byte loads offset by -8 bytes and the
228
* init stores offset by one register.
229
*
230
* If the source is not even 8-byte aligned, we need to do
231
* shifting and masking (basically integer faligndata).
232
*
233
* The careful bit with init stores is that if we store
234
* to any part of the cache line we have to store the whole
235
* cacheline else we can end up with corrupt L2 cache line
236
* contents. Since the loop works on 64-bytes of 64-byte
237
* aligned store data at a time, this is easy to ensure.
238
*/
239
2:
240
andcc %i1, (16 - 1), %i4
241
andn %i2, (64 - 1), %g1 ! block copy loop iterator
242
be,pt %XCC, 50f
243
sub %i2, %g1, %i2 ! final sub-block copy bytes
244
245
cmp %i4, 8
246
be,pt %XCC, 10f
247
sub %i1, %i4, %i1
248
249
/* Neither 8-byte nor 16-byte aligned, shift and mask. */
250
and %i4, 0x7, GLOBAL_SPARE
251
sll GLOBAL_SPARE, 3, GLOBAL_SPARE
252
mov 64, %i5
253
EX_LD(LOAD_TWIN(%i1, %g2, %g3), NG_ret_i2_plus_g1)
254
sub %i5, GLOBAL_SPARE, %i5
255
mov 16, %o4
256
mov 32, %o5
257
mov 48, %o7
258
mov 64, %i3
259
260
bg,pn %XCC, 9f
261
nop
262
263
#define MIX_THREE_WORDS(WORD1, WORD2, WORD3, PRE_SHIFT, POST_SHIFT, TMP) \
264
sllx WORD1, POST_SHIFT, WORD1; \
265
srlx WORD2, PRE_SHIFT, TMP; \
266
sllx WORD2, POST_SHIFT, WORD2; \
267
or WORD1, TMP, WORD1; \
268
srlx WORD3, PRE_SHIFT, TMP; \
269
or WORD2, TMP, WORD2;
270
271
8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
272
MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
273
LOAD(prefetch, %i1 + %i3, #one_read)
274
275
EX_ST(STORE_INIT(%g2, %o0 + 0x00), NG_ret_i2_plus_g1)
276
EX_ST(STORE_INIT(%g3, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
277
278
EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
279
MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
280
281
EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
282
EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
283
284
EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
285
MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
286
287
EX_ST(STORE_INIT(%g2, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
288
EX_ST(STORE_INIT(%g3, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
289
290
EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
291
add %i1, 64, %i1
292
MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
293
294
EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
295
EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
296
297
subcc %g1, 64, %g1
298
bne,pt %XCC, 8b
299
add %o0, 64, %o0
300
301
ba,pt %XCC, 60f
302
add %i1, %i4, %i1
303
304
9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
305
MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
306
LOAD(prefetch, %i1 + %i3, #one_read)
307
308
EX_ST(STORE_INIT(%g3, %o0 + 0x00), NG_ret_i2_plus_g1)
309
EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
310
311
EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
312
MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
313
314
EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
315
EX_ST(STORE_INIT(%g2, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
316
317
EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
318
MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
319
320
EX_ST(STORE_INIT(%g3, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
321
EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
322
323
EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
324
add %i1, 64, %i1
325
MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
326
327
EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
328
EX_ST(STORE_INIT(%g2, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
329
330
subcc %g1, 64, %g1
331
bne,pt %XCC, 9b
332
add %o0, 64, %o0
333
334
ba,pt %XCC, 60f
335
add %i1, %i4, %i1
336
337
10: /* Destination is 64-byte aligned, source was only 8-byte
338
* aligned but it has been subtracted by 8 and we perform
339
* one twin load ahead, then add 8 back into source when
340
* we finish the loop.
341
*/
342
EX_LD(LOAD_TWIN(%i1, %o4, %o5), NG_ret_i2_plus_g1)
343
mov 16, %o7
344
mov 32, %g2
345
mov 48, %g3
346
mov 64, %o1
347
1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
348
LOAD(prefetch, %i1 + %o1, #one_read)
349
EX_ST(STORE_INIT(%o5, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line
350
EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
351
EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
352
EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
353
EX_ST(STORE_INIT(%o4, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
354
EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
355
EX_ST(STORE_INIT(%o5, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
356
EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
357
EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5), NG_ret_i2_plus_g1_minus_48)
358
add %i1, 64, %i1
359
EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
360
EX_ST(STORE_INIT(%o4, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
361
subcc %g1, 64, %g1
362
bne,pt %XCC, 1b
363
add %o0, 64, %o0
364
365
ba,pt %XCC, 60f
366
add %i1, 0x8, %i1
367
368
50: /* Destination is 64-byte aligned, and source is 16-byte
369
* aligned.
370
*/
371
mov 16, %o7
372
mov 32, %g2
373
mov 48, %g3
374
mov 64, %o1
375
1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5), NG_ret_i2_plus_g1)
376
EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
377
LOAD(prefetch, %i1 + %o1, #one_read)
378
EX_ST(STORE_INIT(%o4, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line
379
EX_ST(STORE_INIT(%o5, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
380
EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
381
EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
382
EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
383
EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
384
add %i1, 64, %i1
385
EX_ST(STORE_INIT(%o4, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
386
EX_ST(STORE_INIT(%o5, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
387
EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
388
EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
389
subcc %g1, 64, %g1
390
bne,pt %XCC, 1b
391
add %o0, 64, %o0
392
/* fall through */
393
394
60:
395
membar #Sync
396
397
/* %i2 contains any final bytes still needed to be copied
398
* over. If anything is left, we copy it one byte at a time.
399
*/
400
RESTORE_ASI(%i3)
401
brz,pt %i2, 85f
402
sub %o0, %i1, %i3
403
ba,a,pt %XCC, 90f
404
nop
405
406
.align 64
407
70: /* 16 < len <= 64 */
408
bne,pn %XCC, 75f
409
sub %o0, %i1, %i3
410
411
72:
412
andn %i2, 0xf, %i4
413
and %i2, 0xf, %i2
414
1: subcc %i4, 0x10, %i4
415
EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4_plus_16)
416
add %i1, 0x08, %i1
417
EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4_plus_16)
418
sub %i1, 0x08, %i1
419
EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4_plus_16)
420
add %i1, 0x8, %i1
421
EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_plus_8)
422
bgu,pt %XCC, 1b
423
add %i1, 0x8, %i1
424
73: andcc %i2, 0x8, %g0
425
be,pt %XCC, 1f
426
nop
427
sub %i2, 0x8, %i2
428
EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_8)
429
EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_8)
430
add %i1, 0x8, %i1
431
1: andcc %i2, 0x4, %g0
432
be,pt %XCC, 1f
433
nop
434
sub %i2, 0x4, %i2
435
EX_LD(LOAD(lduw, %i1, %i5), NG_ret_i2_plus_4)
436
EX_ST(STORE(stw, %i5, %i1 + %i3), NG_ret_i2_plus_4)
437
add %i1, 0x4, %i1
438
1: cmp %i2, 0
439
be,pt %XCC, 85f
440
nop
441
ba,pt %xcc, 90f
442
nop
443
444
75:
445
andcc %o0, 0x7, %g1
446
sub %g1, 0x8, %g1
447
be,pn %icc, 2f
448
sub %g0, %g1, %g1
449
sub %i2, %g1, %i2
450
451
1: subcc %g1, 1, %g1
452
EX_LD(LOAD(ldub, %i1, %i5), NG_ret_i2_plus_g1_plus_1)
453
EX_ST(STORE(stb, %i5, %i1 + %i3), NG_ret_i2_plus_g1_plus_1)
454
bgu,pt %icc, 1b
455
add %i1, 1, %i1
456
457
2: add %i1, %i3, %o0
458
andcc %i1, 0x7, %g1
459
bne,pt %icc, 8f
460
sll %g1, 3, %g1
461
462
cmp %i2, 16
463
bgeu,pt %icc, 72b
464
nop
465
ba,a,pt %xcc, 73b
466
467
8: mov 64, %i3
468
andn %i1, 0x7, %i1
469
EX_LD(LOAD(ldx, %i1, %g2), NG_ret_i2)
470
sub %i3, %g1, %i3
471
andn %i2, 0x7, %i4
472
sllx %g2, %g1, %g2
473
1: add %i1, 0x8, %i1
474
EX_LD(LOAD(ldx, %i1, %g3), NG_ret_i2_and_7_plus_i4)
475
subcc %i4, 0x8, %i4
476
srlx %g3, %i3, %i5
477
or %i5, %g2, %i5
478
EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4_plus_8)
479
add %o0, 0x8, %o0
480
bgu,pt %icc, 1b
481
sllx %g3, %g1, %g2
482
483
srl %g1, 3, %g1
484
andcc %i2, 0x7, %i2
485
be,pn %icc, 85f
486
add %i1, %g1, %i1
487
ba,pt %xcc, 90f
488
sub %o0, %i1, %i3
489
490
.align 64
491
80: /* 0 < len <= 16 */
492
andcc %i3, 0x3, %g0
493
bne,pn %XCC, 90f
494
sub %o0, %i1, %i3
495
496
1:
497
subcc %i2, 4, %i2
498
EX_LD(LOAD(lduw, %i1, %g1), NG_ret_i2_plus_4)
499
EX_ST(STORE(stw, %g1, %i1 + %i3), NG_ret_i2_plus_4)
500
bgu,pt %XCC, 1b
501
add %i1, 4, %i1
502
503
85: ret
504
restore EX_RETVAL(%i0), %g0, %o0
505
506
.align 32
507
90:
508
subcc %i2, 1, %i2
509
EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_1)
510
EX_ST(STORE(stb, %g1, %i1 + %i3), NG_ret_i2_plus_1)
511
bgu,pt %XCC, 90b
512
add %i1, 1, %i1
513
ret
514
restore EX_RETVAL(%i0), %g0, %o0
515
516
.size FUNC_NAME, .-FUNC_NAME
517
518