Contents
- String length
- String copy
- String copy (n-bytes)
- Reverse a string
- Sum of an integer array
- Bubble sort
- Distance formula with floats
- Min/max with floats
- Add element to singly linked list
- Calling a C function from assembly
- Binary search
- Vector x matrix
Most of these instructions do not use aliases (aka pseudo instructions) so that you can test your ISA emulator without the assembler changing instructions on you. However, some instructions you will see use aliases, such as la (load address) since we need the assembler to fill in the memory address of a given symbol.
We will also discuss the choices of instructions and flow of program in class. Make sure you follow each element, and then we will discuss why I chose not to do a 1-for-1 translation of C-to-assembly in some of these programs.
When testing your emulator, test the following by assembling using the given linker script org.lds.
String Length
Determine the length of a C-style string by adding 1 until we find the terminator ‘\0’.
int strlen(const char *str) {
int i;
for (i = 0;str[i] != '\0';i++);
return i;
}
.section .text
.global strlen
strlen:
# a0 = const char *str
li t0, 0 # i = 0
1: # Start of for loop
add t1, t0, a0 # Add the byte offset for str[i]
lb t1, 0(t1) # Dereference str[i]
beqz t1, 1f # if str[i] == 0, break for loop
addi t0, t0, 1 # Add 1 to our iterator
j 1b # Jump back to condition (1 backwards)
1: # End of for loop
mv a0, t0 # Move t0 into a0 to return
ret # Return back via the return address register
String Copy
Copy one C-style string into another. I expanded the actual implementation to make it easier to see each element in assembly.
void stringcopy(char *dst, const char *src) {
do {
// Copy the source's byte into the destination's byte
*dst = *src;
// We check '\0' here so that we copy the source's \0
// into the destination.
if (*src == '\0') break;
// Advance both the destination and source to the next byte.
dst++;
src++;
} while (true);
}
.section .text
.global stringcopy
stringcopy:
# a0 = destination
# a1 = source
1:
lb t0, 0(a1) # Load a char from the src
sb t0, 0(a0) # Store the value of the src
beqz t0, 1f # Check if it's 0
addi a0, a0, 1 # Advance destination one byte
addi a1, a1, 1 # Advance source one byte
j 1b # Go back to the start of the loop
1:
ret # Return back via the return address
String Copy (n bytes)
// from the strncpy man pages
char *strncpy(char *dst, const char *src, unsigned long n) {
unsigned long i;
for (i = 0; i < n && src[i] != '\0'; i++)
dst[i] = src[i];
for ( ; i < n; i++)
dst[i] = '\0';
return dst;
}
.section .text
.global strncpy
strncpy:
# a0 = char *dst
# a1 = const char *src
# a2 = unsigned long n
# t0 = i
li t0, 0 # i = 0
1: # first for loop
bge t0, a2, 1f # break if i >= n
add t1, a1, t0 # src + i
lb t1, 0(t1) # t1 = src[i]
beqz t1, 1f # break if src[i] == '\0'
add t2, a0, t0 # t2 = dst + i
sb t1, 0(t2) # dst[i] = src[i]
addi t0, t0, 1 # i++
j 1b # back to beginning of loop
1: # second for loop
bge t0, a2, 1f # break if i >= n
add t1, a0, t0 # t1 = dst + i
sb zero, 0(t1) # dst[i] = 0
addi t0, t0, 1 # i++
j 1b # back to beginning of loop
1:
# we don't have to move anything since
# a0 hasn't changed.
ret # return via return address register
Reverse a string
void strrev(char *str) {
int i;
int sz = strlen(str);
for (i = 0;i < sz / 2;i++) {
char c = str[i];
str[i] = str[sz - i - 1];
str[sz - i - 1] = c;
}
}
.section .text
.global strrev
strrev:
# s1 = str
# a0 = sz
# t0 = sz / 2
# t1 = i
# Enter stack frame
addi sp, sp, -16
sd ra, 0(sp)
sd s1, 8(sp)
# Get the size of the string
mv s1, a0
call strlen
srai t0, a0, 1 # Divide sz by 2
li t1, 0 # i = 0
1: # for loop
bge t1, t0, 1f
add t2, s1, t1 # str + i
sub t3, a0, t1 # sz - i
addi t3, t3, -1 # sz - i - 1
add t3, t3, s1 # str + sz - i - 1
lb t4, 0(t2) # str[i]
lb t5, 0(t3) # str[sz - i - 1]
sb t4, 0(t3) # swap
sb t5, 0(t2)
addi t1, t1, 1
j 1b
1:
# Leave stack frame
ld s1, 8(sp)
ld ra, 0(sp)
addi sp, sp, 16
ret
Sum of an Integer Array
int arraysum(int a[], int size) {
int ret = 0;
int i;
for (i = 0;i < size;i++) {
ret = ret + a[i];
}
return ret;
}
.section .text
.global arraysum
arraysum:
# a0 = int a[]
# a1 = int size
# t0 = ret
# t1 = i
li t0, 0 # ret = 0
li t1, 0 # i = 0
1: # For loop
bge t1, a1, 1f # if i >= size, break
slli t2, t1, 2 # Multiply i by 4 (1 << 2 = 4)
add t2, a0, t2 # Update memory address
lw t2, 0(t2) # Dereference address to get integer
add t0, t0, t2 # Add integer value to ret
addi t1, t1, 1 # Increment the iterator
j 1b # Jump back to start of loop (1 backwards)
1:
mv a0, t0 # Move t0 (ret) into a0
ret # Return via return address register
Bubble Sort
void bubsort(long *list, int size) {
bool swapped;
do {
swapped = false;
for (int i = 1;i < size;i++) {
if (list[i-1] > list[i]) {
swapped = true;
long tmp = list[i-1];
list[i-1] = list[i];
list[i] = tmp;
}
}
} while (swapped);
}
.section .text
.global bubsort
bubsort:
# a0 = long *list
# a1 = size
# t0 = swapped
# t1 = i
1: # do loop
li t0, 0 # swapped = false
li t1, 1 # i = 1
2: # for loop
bge t1, a1, 2f # break if i >= size
slli t3, t1, 3 # scale i by 8 (for long)
add t3, a0, t3 # new scaled memory address
ld t4, -8(t3) # load list[i-1] into t4
ld t5, 0(t3) # load list[i] into t5
ble t4, t5, 3f # if list[i-1] < list[i], it's in position
# if we get here, we need to swap
li t0, 1 # swapped = true
sd t4, 0(t3) # list[i] = list[i-1]
sd t5, -8(t3) # list[i-1] = list[i]
3: # bottom of for loop body
addi t1, t1, 1 # i++
j 2b # loop again
2: # bottom of do loop body
bnez t0, 1b # loop if swapped = true
ret # return via return address register
Distance Formula w/ 64-bit Floats
struct Coordinate {
double x;
double y;
};
double distance(const Coordinate &from, const Coordinate &to) {
double x = to.x - from.x;
double y = to.y - from.y;
return sqrt(x*x + y*y);
}
.section .text
.global distance
distance:
# a0 = Coordinate &from
# a1 = Coordinate &to
# Coordinate structure
# Name Offset Size (bytes)
# x 0 8
# y 8 8
# Total size = 16 bytes
fld ft0, 0(a0) # ft0 = from.x
fld ft1, 8(a0) # ft1 = from.y
fld ft2, 0(a1) # ft2 = to.x
fld ft3, 8(a1) # ft3 = to.y
fsub.d ft0, ft2, ft0 # ft0 = to.x - from.x
fsub.d ft1, ft3, ft1 # ft1 = to.y - from.y
fmul.d ft0, ft0, ft0 # ft0 = ft0 * ft0
fmul.d ft1, ft1, ft1 # ft1 = ft1 * ft1
fadd.d ft0, ft0, ft1 # ft0 = ft0 + ft1
fsqrt.d fa0, ft0 # fa0 = sqrt(ft0)
# Return value goes in fa0
ret # Return
Min Max w/ 32-bit Floats
void minmax(float a, float b, float c, float &mn, float &mx) {
mn = mx = a;
if (mn > b) mn = b;
else if (mx < b) mx = b;
if (mn > c) mn = c;
else if (mx < c) mx = c;
}
.section .text
.global minmax
minmax:
# fa0 = float a
# fa1 = float b
# fa2 = float c
# ft0 = min
# ft1 = max
# a0 = float &mn
# a1 = float &mx
# Set mn = mx = a
fmv.s ft0, fa0
fmv.s ft1, fa0
fgt.s t0, ft0, fa1 # if (mn > b)
beqz t0, 1f # skip if false
# if we get here, then mn is > b
fmv.s ft0, fa1 # ft0 is mn, set it to b
j 2f # jump past the else if statement
1:
flt.s t0, ft1, fa1 # else if (mx < b)
beqz t0, 2f # skip if false
# if we get here then mx < b
fmv.s ft1, fa1 # ft1 is mx, set it to b
2:
fgt.s t0, ft0, fa2 # if (mn > c)
beqz t0, 1f # skip if false
# if we get here then mn > c
fmv.s ft0, fa2 # ft0 is mn, set it to c
j 2f # skip the else if statement
1:
flt.s t0, ft1, fa2 # else if (mx < c)
beqz t0, 2f # skip if false
# If we get here then mx < c
fmv.s ft1, fa2 # ft1 is mx, set it to c
2:
fsw ft0, 0(a0) # store minimum into &mn
fsw ft1, 0(a1) # store maximum into &mx
ret # return via return address register
Add Element to Singly Linked List
LL *addll(LL *list, LL *element) {
element->next = list;
return element;
}
.section .text
.global addll
addll:
# a0 = list
# a1 = element
# LL structure
# Name Offset Size (bytes)
# data 0 2
# next 8 8
sd a0, 8(a1) # element->next = list
mv a0, a1 # set a0 to return element instead of list
ret # return via return address register
Calling a C Function from Assembly
.section .rodata
enter_prompt: .asciz "Enter a, b, and c: "
scan: .asciz "%d %d %d"
result_out: .asciz "Result = %d\n"
.section .text
.global main
main:
addi sp, sp, -32 # Allocate 32 bytes from the stack
sd ra, 0(sp) # Since we are making calls, we need the original ra
# Prompt the user first
la a0, enter_prompt
call printf
# We've printed the prompt, now wait for user input
la a0, scan
addi a1, sp, 8 # Address of a is sp + 8
addi a2, sp, 16 # Address of b is sp + 16
addi a3, sp, 24 # Address of c is sp + 24
call scanf
# Now all of the values are in memory, load them
# so we can jal ra, the c function.
lw a0, 8(sp)
lw a1, 16(sp)
lw a2, 24(sp)
call cfunc
# The result should be in a0, but that needs to be
# the second parameter to printf.
mv a1, a0
la a0, result_out
call printf
# Restore original RA and return
ld ra, 0(sp)
addi sp, sp, 32 # Always deallocate the stack!
ret
// extern "C" is required so we can link the name into assembly
// without knowing how C++ mangles it.
extern "C" {
int cfunc(int a, int b, int c);
}
// Simple function we're going to call from assembly.
int cfunc(int a, int b, int c) {
return a + b * c;
}
Binary Search
int binsearch(const int arr[], int needle, int size) {
int mid;
int left = 0;
int right = size -1;
while (left <= right) {
mid = (left + right) / 2;
// Needle is bigger than what we're looking at,
// only consider the upper half of the list.
if (needle > arr[mid]) {
left = mid + 1;
}
else if (needle < arr[mid]) {
right = mid - 1;
}
else {
return mid;
}
}
// We've gone through all elements and the needle wasn't found.
return -1;
}
.section .text
.global binsearch
binsearch:
# a0 = int arr[]
# a1 = int needle
# a2 = int size
# t0 = mid
# t1 = left
# t2 = right
li t1, 0 # left = 0
addi t2, a2, -1 # right = size - 1
1: # while loop
bgt t1, t2, 1f # left > right, break
add t0, t1, t2 # mid = left + right
srai t0, t0, 1 # mid = (left + right) / 2
# Get the element at the midpoint
slli t4, t0, 2 # Scale the midpoint by 4
add t4, a0, t4 # Get the memory address of arr[mid]
lw t4, 0(t4) # Dereference arr[mid]
# See if the needle (a1) > arr[mid] (t3)
ble a1, t4, 2f # if needle <= t3, we need to check the next condition
# If we get here, then the needle is > arr[mid]
addi t1, t0, 1 # left = mid + 1
j 1b
2:
bge a1, t4, 2f # skip if needle >= arr[mid]
# If we get here, then needle < arr[mid]
addi t2, t0, -1 # right = mid - 1
j 1b
2:
# If we get here, then needle == arr[mid]
mv a0, t0
1:
ret
Vector x Matrix
void matmul(float dst[3], float matrix[3][3], float vector[3]) {
int r;
int c;
float d;
for (r = 0;r < 3;r++) {
d = 0;
for (c = 0;c < 3;c++) {
d = d + matrix[r][c] * vector[c];
}
dst[r] = d;
}
}
We don’t always have to translate 100% from C++ into assembly. We can take shortcuts if we know how the data structure is set up. matrix[3][3] can be calculated by using row * width + column, and then scaling it by the data size. However, we can take a shortcut and just advance the matrix pointer by each integer for every iteration of the loop. Since our outer and inner loops both iterate 3 times, we will iterate a total of 9 times–exactly the number of elements in our matrix.
.section .text
.global matmul
matmul:
# a0 = dst[3]
# a1 = matrix[3][3]
# a2 = vector[3]
# t0 = r
# t1 = c
# t3 = 3
# ft0 = d
# Row for loop
li t0, 0
li t3, 3
1:
bge t0, t3, 1f # break when we are done
fmv.s.x fa0, zero # Set d = 0
# Column for loop
li t1, 0
2:
bge t1, t3, 2f
flw ft0, 0(a1) # Load matrix value
flw ft1, 0(a2) # Load vector value
fmul.s ft0, ft0, ft1 # ft0 = matrix[r][c] * vec[c]
fadd.s fa0, fa0, ft0 # d = d + ft0
addi t1, t1, 1
addi a1, a1, 4 # Move to the next matrix value
addi a2, a2, 4 # Move to the next vector value
j 2b
2:
addi a2, a2, -12 # Move the vector back to the top
fsw fa0, 0(a0) # dst[r] = d
addi t0, t0, 1
addi a0, a0, 4 # Move to next destination
j 1b
1:
ret