Example RISC-V AssemblyTable of ContentsWriting RISC-V Assembly

Contents

  1. Word wrapper
  2. Mandlebrot set
  3. Sudoku solver

Word Wrapper

This came from Rosetta Code, but to the date of me writing this, there is no RISC-V assembly section for this code.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

/* nonsensical hyphens to make greedy wrapping method look bad */
const char *string = "In olden times when wishing still helped one, there lived a king "
    "whose daughters were all beautiful, but the youngest was so beautiful "
    "that the sun itself, which has seen so much, was astonished whenever "
    "it shone-in-her-face.  Close-by-the-king's castle lay a great dark "
    "forest, and under an old lime-tree in the forest was a well, and when "
    "the day was very warm, the king's child went out into the forest and "
    "sat down by the side of the cool-fountain, and when she was bored she "
    "took a golden ball, and threw it up on high and caught it, and this "
    "ball was her favorite plaything.";

/*  Each but the last of wrapped lines comes with some penalty as the square
    of the diff between line length and desired line length.  If the line
    is longer than desired length, the penalty is multiplied by 100.  This
    pretty much prohibits the wrapping routine from going over right margin.
    If is ok to exceed the margin just a little, something like 20 or 40 will
    do.

    Knuth uses a per-paragraph penalty for line-breaking in TeX, which is--
    unlike what I have here--probably bug-free.
*/

#define PENALTY_LONG    100
#define PENALTY_SHORT   1

typedef struct word_t {
    const char *s;
    int len;
} *word;

word make_word_list(const char *s, int *n)
{
    int max_n = 0;
    word words = 0;

    *n = 0;
    while (1) {
        while (*s && isspace(*s)) s++;
        if (!*s) break;

        if (*n >= max_n) {
            if (!(max_n *= 2)) max_n = 2;
            words = realloc(words, max_n * sizeof(*words));
        }
        words[*n].s = s;
        while (*s && !isspace(*s)) s++;
        words[*n].len = s - words[*n].s;
        (*n) ++;
    }

    return words;
}

int greedy_wrap(word words, int count, int cols, int *breaks)
{
    int score = 0, line, i, j, d;

    i = j = line = 0;
    while (1) {
        if (i == count) {
            breaks[j++] = i;
            break;
        }

        if (!line) {
            line = words[i++].len;
            continue;
        }

        if (line + words[i].len < cols) {
            line += words[i++].len + 1;
            continue;
        }

        breaks[j++] = i;
        if (i < count) {
            d = cols - line;
            if (d > 0)  score += PENALTY_SHORT * d * d;
            else if (d < 0) score += PENALTY_LONG * d * d;
        }

        line = 0;
    }
    breaks[j] = 0;

    return score;
}

void show_wrap(word list, int count, int *breaks)
{
    int i, j;
    for (i = j = 0; i < count && breaks[i]; i++) {
        while (j < breaks[i]) {
            printf("%.*s", list[j].len, list[j].s);
            if (j < breaks[i] - 1)
                putchar(' ');
            j++;
        }
        if (breaks[i]) putchar('\n');
    }
}

int main(void)
{
    int len, score, cols;
    word list = make_word_list(string, &len);
    int *breaks = malloc(sizeof(int) * (len + 1));

    cols = 80;
    score = greedy_wrap(list, len, cols, breaks);
    printf("\n== greedy wrap at %d (score %d) ==\n\n", cols, score);
    show_wrap(list, len, breaks);

    cols = 32;
    score = greedy_wrap(list, len, cols, breaks);
    printf("\n== greedy wrap at %d (score %d) ==\n\n", cols, score);
    show_wrap(list, len, breaks);

    return 0;
}
#define PENALTY_LONG 100
#define PENALTY_SHORT 1

.section .rodata
string: .asciz "In olden times when wishing still helped one, there lived a king whose daughters were all beautiful, but the youngest was so beautiful that the sun itself, which has seen so much, was astonished whenever it shone-in-her-face.  Close-by-the-king's castle lay a great dark forest, and under an old lime-tree in the forest was a well, and when the day was very warm, the king's child went out into the forest and sat down by the side of the cool-fountain, and when she was bored she took a golden ball, and threw it up on high and caught it, and this ball was her favorite plaything."

greedy_wrap_printf: .asciz "\n== greedy wrap at %d (score %d) ==\n\n"
show_wrap_printf: .asciz "%.*s"
# NOS Table for word structure
#  Name             Off    Size
# const char *s     0      8
# len               8      4
# Total size = 16

.section .text
# By default, C/C++ will make functions global unless they re declared "static"
.global make_word_list
make_word_list:
    # a0 = const char *s
    # a1 = int *n
    # Stack frame for make_word_list
    # ra (d): sp + 0
    # s1 (d): sp + 8   (const char *s)
    # s2 (d): sp + 16  (int *n)
    # s3 (d): sp + 24  (word words)
    # s4 (w): sp + 32  (int max_n)
    # Need 40 bytes, aligned to a multiple of 8 = 40

    # Enter stack frame
    addi    sp, sp, -40
    sd      ra, 0(sp)
    sd      s1, 8(sp)
    sd      s2, 16(sp)
    sd      s3, 24(sp)
    sd      s4, 32(sp)

    # Copy parameters to saved registers
    mv      s1, a0       # Copy const char *s into s0
    mv      s2, a1       # Copy int *n into s1
    mv      s3, zero     # words = 0
    mv      s4, zero     # max_n = 0

    sw      zero, 0(s2)  # *n = 0
1:  # while 1
2:  # while (*s && isspace(*s)) s++;
    lb      a0, 0(s1)    # *s
    beqz    a0, 2f       # Is *s == 0?
    call    isspace
    beqz    a0, 2f       # Break if isspace(*s) == false
    addi    s1, s1, 1    # s++
    j       2b           # Loop again
2:
    lb      a0, 0(s1)    # *s
    beqz    a0, 1f       # if (!*s) break;

    lw      a0, 0(s2)    # *n
    blt     a0, s4, 2f   # if (*n >= max_n)
    bnez    s4, 3f       # if (!(max_n *= 2))
    li      s4, 2        # max_n = 2
    j       4f
3:
    slliw   s4, s4, 1    # max_n *= 2
4:
    mv      a0, s3       # words
    mv      a1, s4       # max_n
    slliw   a1, a1, 4    # max_n * 16
    call    realloc
    mv      s3, a0       # words = realloc(...)
2:
    lw      t0, 0(s2)    # *n
    slliw   t0, t0, 4    # *n * 16
    add     t0, t0, s3   # words[*n]
    sd      s1, 0(t0)    # words[*n].s = s
2:  # while (*s && !isspace(*s)) s++;
    lb      a0, 0(s1)    # *s
    beqz    a0, 2f       # while(*s)
    call    isspace
    bnez    a0, 2f       # && !isspace(*s)
    addi    s1, s1, 1    # s++
    j       2b           # loop again
2:
    lw      t2, 0(s2)    # *n
    slliw   t0, t2, 4    # *n * 16
    add     t0, s3, t0   # words[*n]
    ld      t1, 0(t0)    # words[*n].s
    sub     t1, s1, t1   # s - words[*n].s
    sw      t1, 8(t0)    # words[*n].len = s - words[*n].s
    addiw   t2, t2, 1    # (*n)++
    sw      t2, 0(s2)    # (*n)++
    j       1b           # loop again
1:
    # return words
    mv      a0, s3       # words is in s3 right now
    # Leave stack frame
    ld      ra, 0(sp)
    ld      s1, 8(sp)
    ld      s2, 16(sp)
    ld      s3, 24(sp)
    ld      s4, 32(sp)
    addi    sp, sp, 40
    ret

.global show_wrap
show_wrap:
    # a0 = word *list
    # a1 = int count
    # a2 = int *breaks

    # Stack frame for show_wrap
    # sp + 0 = ra
    # sp + 8 = s1   (word list)
    # sp + 16 = s2  (int count)
    # sp + 24 = s3  (int *breaks)
    # sp + 32 = s4  (i)
    # sp + 40 = s5  (j)
    # sp + 48 = s6  (breaks[i])
    # Needs 56 bytes

    # Enter stack frame
    addi    sp, sp, -56
    sd      ra, 0(sp)
    sd      s1, 8(sp)
    sd      s2, 16(sp)
    sd      s3, 24(sp)
    sd      s4, 32(sp)
    sd      s5, 40(sp)
    sd      s6, 48(sp)

    mv      s1, a0
    mv      s2, a1
    mv      s3, a2
    li      s4, 0       # i = 0
    li      s5, 0       # j = 0
1: # for loop
    bge     s4, s2, 1f  # i < count
    slli    t0, s4, 2   # i * 4
    add     t1, s3, t0  # breaks[i]
    lw      s6, 0(t1)   # breaks[i]
    beqz    s6, 1f      # && breaks[i]
2: # while loop
    bge     s5, s6, 2f  # Break if j >= breaks[i]
    slli    t0, s5, 4   # j * 16
    add     t0, t0, s1  # list[j]
    la      a0, show_wrap_printf
    lw      a1, 8(t0)   # list[j].len
    ld      a2, 0(t0)   # list[j].s
    call    printf
    addi    t0, s6, -1  # breaks[i] - 1
    bge     s5, t0, 3f  # if (j < breaks[i] - 1)
    li      a0, ' '
    call    putchar
3:
    addi    s5, s5, 1   # j++
    j       2b          # loop again
2:
    beqz    s6, 3f      # if (breaks[i])
    li      a0, '\n'
    call    putchar     # putchar('\n')
3:
    addi    s4, s4, 1   # i++
    j       1b          # loop again
1:

    # Leave stack frame
    ld      ra, 0(sp)
    ld      s1, 8(sp)
    ld      s2, 16(sp)
    ld      s3, 24(sp)
    ld      s4, 32(sp)
    ld      s5, 40(sp)
    ld      s6, 48(sp)
    addi    sp, sp, 56
    ret

.global greedy_wrap
greedy_wrap:
    # a0 = word words
    # a1 = int count
    # a2 = int cols
    # a3 = int *breaks

    # There are no function calls within greedy_wrap, so no
    # stack frame is necessary

    # t0 = score
    # t1 = line
    # t2 = i
    # t3 = j
    # t4 = d
    li      t0, 0    # score = 0
    li      t1, 0    # line = 0
    li      t2, 0    # i = 0
    li      t3, 0    # j = 0
1:  # while (1)
    bne     t2, a1, 2f # if (i == count)
    slli    t5, t3, 2  # breaks[j]
    add     t5, t5, a3 # breaks[j]
    sw      t2, 0(t5)  # breaks[j] = i
    addi    t3, t3, 1  # j++
    j       1f         # break
2:
    bnez    t1, 2f     # if (!line)
    slli    t5, t2, 4  # i * 16
    add     t5, t5, a0 # words[i]
    lw      t1, 8(t5)  # line = words[i].len
    addi    t2, t2, 1  # i++
    j       1b         # continue
2:
    slli    t5, t2, 4  # i * 16
    add     t5, t5, a0 # words[i]
    lw      a5, 8(t5)  # words[i].len
    add     t6, t1, a5 # line + words[i].len
    bge     t6, a2, 2f # if (line + words[i].len < cols)
    addi    a5, a5, 1  # words[i].len + 1
    add     t1, t1, a5 # line += words[i].len + 1
    addi    t2, t2, 1  # i++
    j       1b         # continue
2:
    slli    t5, t3, 2  # breaks[j]
    add     t5, t5, a3 # breaks[j]
    sw      t2, 0(t5)  # breaks[j] = i
    addi    t3, t3, 1  # j++

    bge     t2, a1, 2f # if (i < count)
    sub     t4, a2, t1 # d = cols - line
    blez    t4, 3f     # if (d > 0)
    # score += PENALTY_SHORT * d * d
    li      a5, PENALTY_SHORT
    mul     t4, t4, t4  # d * d
    mul     t4, a5, t4  # PENALTY_SHORT * d * d
    add     t0, t0, t4  # score += PENALTY_SHORT * d * d
    j       2f
3:
    beqz    t4, 2f     # else if (d < 0)
    # score += PENALTY_LONG * d * d
    li      a5, PENALTY_LONG
    mul     t4, t4, t4  # d * d
    mul     t4, a5, t4  # PENALTY_SHORT * d * d
    add     t0, t0, t4  # score += PENALTY_SHORT * d * d
2:
    li      t1, 0      # line = 0
    j       1b         # loop again
1:
    slli    t5, t3, 2  # breaks[j]
    add     t5, t5, a3 # breaks[j]
    sw      zero, 0(t5) # breaks[j] = 0
    mv      a0, t0      # return score
    ret

.global main
main:
    # tail cmain
    # Stack frame for main
    # ra (d): sp + 0
    # s1 (d): sp + 8  (int len)
    # s2 (d): sp + 16 (int score)
    # s3 (d): sp + 24 (int cols)
    # s4 (d): sp + 32 (word list)
    # s5 (d): sp + 40 (int *breaks)
    # &len  : sp + 48
    # Need 56 bytes
    addi    sp, sp, -56
    sd      ra, 0(sp)
    sd      s1, 8(sp)
    sd      s2, 16(sp)
    sd      s3, 24(sp)
    sd      s4, 32(sp)
    sd      s5, 40(sp)

    la      a0, string   # string
    addi    a1, sp, 48   # &len
    call    make_word_list
    lw      s1, 48(sp)   # get the value of len
    mv      s4, a0       # list = ...

    addi    a0, s1, 1    # len + 1
    slli    a0, a0, 2    # 4 * (len + 1)
    call    malloc
    mv      s5, a0       # int * breaks = malloc(...)

    # With cols = 80
    mv      a0, s4
    mv      a1, s1       # len
    li      a2, 80       # cols = 80
    mv      a3, s5       # breaks
    call    greedy_wrap

    mv      a2, a0       # score
    la      a0, greedy_wrap_printf
    li      a1, 80       # cols = 80
    call    printf

    mv      a0, s4       # list
    mv      a1, s1       # len
    mv      a2, s5       # breaks
    call    show_wrap

    # With cols = 32
    mv      a0, s4
    mv      a1, s1       # len
    li      a2, 32       # cols = 32
    mv      a3, s5       # breaks
    call    greedy_wrap

    mv      a2, a0       # score
    la      a0, greedy_wrap_printf
    li      a1, 32       # cols = 32
    call    printf

    mv      a0, s4       # list
    mv      a1, s1       # len
    mv      a2, s5       # breaks
    call    show_wrap

    # Leave stack frame
    ld      ra, 0(sp)
    ld      s1, 8(sp)
    ld      s2, 16(sp)
    ld      s3, 24(sp)
    ld      s4, 32(sp)
    ld      s5, 40(sp)
    addi    sp, sp, 56
    li      a0, 0  # return 0
    ret

Mandlebrot Set

 /* 
 c program:
 --------------------------------
  1. draws Mandelbrot set for Fc(z)=z*z +c
  using Mandelbrot algorithm ( boolean escape time )
 -------------------------------         
 2. technique of creating ppm file is  based on the code of Claudio Rocchini
 http://en.wikipedia.org/wiki/Image:Color_complex_plot.jpg
 create 24 bit color graphic file ,  portable pixmap file = PPM 
 see http://en.wikipedia.org/wiki/Portable_pixmap
 to see the file use external application ( graphic viewer)
  */
 #include <stdio.h>
 #include <math.h>
 int main()
 {
          /* screen ( integer) coordinate */
        int iX,iY;
        const int iXmax = 800; 
        const int iYmax = 800;
        /* world ( double) coordinate = parameter plane*/
        double Cx,Cy;
        const double CxMin=-2.5;
        const double CxMax=1.5;
        const double CyMin=-2.0;
        const double CyMax=2.0;
        /* */
        double PixelWidth=(CxMax-CxMin)/iXmax;
        double PixelHeight=(CyMax-CyMin)/iYmax;
        /* color component ( R or G or B) is coded from 0 to 255 */
        /* it is 24 bit color RGB file */
        const int MaxColorComponentValue=255; 
        FILE * fp;
        char *filename="new1.ppm";
        char *comment="# ";/* comment should start with # */
        static unsigned char color[3];
        /* Z=Zx+Zy*i  ;   Z0 = 0 */
        double Zx, Zy;
        double Zx2, Zy2; /* Zx2=Zx*Zx;  Zy2=Zy*Zy  */
        /*  */
        int Iteration;
        const int IterationMax=200;
        /* bail-out value , radius of circle ;  */
        const double EscapeRadius=2;
        double ER2=EscapeRadius*EscapeRadius;
        /*create new file,give it a name and open it in binary mode  */
        fp = fopen(filename,"wb"); /* b -  binary mode */
        /*write ASCII header to the file*/
        fprintf(fp,"P6\n %s\n %d\n %d\n %d\n",comment,iXmax,iYmax,MaxColorComponentValue);
        /* compute and write image data bytes to the file*/
        for(iY=0;iY<iYmax;iY++)
        {
             Cy=CyMin + iY*PixelHeight;
             if (fabs(Cy)< PixelHeight/2) Cy=0.0; /* Main antenna */
             for(iX=0;iX<iXmax;iX++)
             {         
                        Cx=CxMin + iX*PixelWidth;
                        /* initial value of orbit = critical point Z= 0 */
                        Zx=0.0;
                        Zy=0.0;
                        Zx2=Zx*Zx;
                        Zy2=Zy*Zy;
                        /* */
                        for (Iteration=0;Iteration<IterationMax && ((Zx2+Zy2)<ER2);Iteration++)
                        {
                            Zy=2*Zx*Zy + Cy;
                            Zx=Zx2-Zy2 +Cx;
                            Zx2=Zx*Zx;
                            Zy2=Zy*Zy;
                        };
                        /* compute  pixel color (24 bit = 3 bytes) */
                        if (Iteration==IterationMax)
                        { /*  interior of Mandelbrot set = black */
                           color[0]=0;
                           color[1]=0;
                           color[2]=0;                           
                        }
                     else 
                        { /* exterior of Mandelbrot set = white */
                             color[0]=255; /* Red*/
                             color[1]=255;  /* Green */ 
                             color[2]=255;/* Blue */
                        };
                        /*write color to the file*/
                        fwrite(color,1,3,fp);
                }
        }
        fclose(fp);
        return 0;
 }
.section .rodata
filename: .asciz "new1.ppm"
filemode: .asciz "wb"
comment: .asciz "# "
header: .asciz "P6\n %s\n %d\n %d\n %d\n"
color_out: .asciz "%d %d %d\n"

iXmax: .word 800
iYmax: .word 800
CxMin: .double -2.5
CxMax: .double 1.5
CyMin: .double -2.0
CyMax: .double 2.0
EscapeRadius: .double 2.0

MaxColorComponentValue: .word 255
IterationMax: .word 200

.section .text
.global main
main:
    # Since we make function calls, we need to save our state
    # sp + 0   ra
    # sp + 8   s1  (FILE *fp)
    # sp + 16  s2  (iX)
    # sp + 24  s3  (iY)
    # sp + 32  fs1 (Cx)
    # sp + 40  fs2 (Cy)
    # sp + 48  fs3 (Zx)
    # sp + 56  fs4 (Zy)
    # sp + 64  fs5 (Zx2)
    # sp + 72  fs6 (Zy2)
    # sp + 80  fs7 (PixelWidth)
    # sp + 88  fs8 (PixelHeight)
    # sp + 96  fs9 (ER2)
    # sp + 104 s4  (Iteration)
    # sp + 112 color[0:2] (3 bytes)

    # Enter stack frame
    addi    sp, sp, -120
    sd      ra, 0(sp)
    sd      s1, 8(sp)
    sd      s1, 16(sp)
    sd      s1, 24(sp)
    fsd     fs1, 32(sp)
    fsd     fs2, 40(sp)
    fsd     fs3, 48(sp)
    fsd     fs4, 56(sp)
    fsd     fs5, 64(sp)
    fsd     fs6, 72(sp)
    fsd     fs7, 80(sp)
    fsd     fs8, 88(sp)
    fsd     fs9, 96(sp)
    sd      s4, 104(sp)

    # Tons of variables
    # Calculate PixelWidth (fs7)
    la      t0, CxMin
    la      t1, CxMax
    la      t2, iXmax
    fld     ft0, (t0)
    fld     ft1, (t1)
    lw      a0, (t2)
    fcvt.d.w ft2, a0         # Typecast iXmax
    fsub.d  fa0, ft1, ft0    # CxMax - CxMin
    fdiv.d  fs7, fa0, ft2    # (CxMax-CxMin)/iXmax

    # Calculate PixelHeight (fs8)
    la      t0, CyMin
    la      t1, CyMax
    la      t2, iYmax
    fld     ft0, (t0)
    fld     ft1, (t1)
    lw      a0, (t2)
    fcvt.d.w ft2, a0         # Typecast iYmax
    fsub.d  fa0, ft1, ft0    # CyMax - CyMin
    fdiv.d  fs8, fa0, ft2    # (CyMax-CyMin)/iYmax

    # Calculate ER2 (fs9)
    la      t0, EscapeRadius
    fld     ft0, (t0)
    fmul.d  fs9, ft0, ft0     # EscapeRadius^2

    # Open the file
    la      a0, filename
    la      a1, filemode
    call    fopen
    mv      s1, a0             # Copy fp into s1

    # Write the ASCII header
    #mv     a0, s1             # fp
    la      a1, header         # header
    la      a2, comment        # comment
    la      t3, iXmax
    lw      a3, (t3)           # iXmax
    la      t4, iYmax
    lw      a4, (t4)           # iYmax
    la      t5, MaxColorComponentValue
    lw      a5, (t5)           # MaxColorComponentValue
    call    fprintf

    li      s3, 0              # iY = 0
1:  # for (iY=0;iY<iYmax;iY++)
    #      s3   s3       s3
    la      t6, iYmax
    lw      t4, (t6)           # t4 = iYmax
    bge     s3, t4, 1f         # iY<iYmax

    # Cy  = CyMin + iY * PixelHeight
    # fs2 =  ft6  + s3 * fs8
    la      t5, CyMin
    fld     ft6, (t5)          # CyMin
    fcvt.d.w fs2, s3           # Typecast iY
    fmul.d   fs2, fs2, fs8     # iY * PixelHeight
    fadd.d   fs2, fs2, ft6     # CyMin + ...

    # fabs(Cy)
    fabs.d  fa0, fs2           # fabs(Cy)
    # fa0 = fabs(Cy)
    li      t0, 2
    fcvt.d.w ft0, t0           # 2 -> 2.0
    fdiv.d  fa1, fs8, ft0      # PixelHeight / 2.0
    flt.d   t0, fa0, fa1       # fabs(Cy) < PixelHeight/2
    beqz    t0, 2f
    fmv.d.x fs2, zero          # Cy = 0.0
2:  # for (iX = 0;iX<iXmax;iX++)
    #      s2
    li      s2, 0
2:
    la      t0, iXmax
    lw      t1, (t0)
    bge     s2, t1, 2f          # iX < iXmax

    # Cx = CxMin + iX * PixelWidth
    # fs1  ft1    ft0     fs7
    fcvt.d.w ft0, s2            # Typecast iX
    la      t1, CxMin
    fld     ft1, (t1)
    fmul.d  fs1, ft0, fs7       # iX * PixelWidth
    fadd.d  fs1, fs1, ft1       # CxMin + iX * PixelWidth

    fmv.d.x fs3, zero           # Zx = 0.0  (fs3)
    fmv.d   fs4, fs3            # Zy = 0.0  (fs4)
    fmv.d   fs5, fs3            # Zx2 = 0.0  (fs5)
    fmv.d   fs6, fs3            # Zy2 = 0.0  (fs6)

    li      s4, 0               # Iteration = 0
    la      t0, IterationMax
    lw      a0, (t0)
3: # for (Iteration=0;Iteration<IterationMax && ((Zx2+Zy2)<ER2);Iteration++)
    bge     s4, a0, 3f          # Iteration < IterationMax
    fadd.d  ft0, fs5, fs6       # Zx2 + Zy2
    flt.d   t0, ft0, fs9        # (Zx2+Zy2)<ER2
    beqz    t0, 3f

    # Zy=2*Zx*Zy + Cy
    fmul.d  fs4, fs3, fs4       # Zx * Zy
    li      t1, 2
    fcvt.d.w    ft1, t1         # Typecast
    fmul.d  fs4, fs4, ft1       # 2 * Zx * Zy
    fadd.d  fs4, fs4, fs2       # 2 * Zx * Zy + Cy
    # Zx=Zx2-Zy2 +Cx
    fsub.d  fs3, fs5, fs6       # Zx2 - Zy2
    fadd.d  fs3, fs3, fs1       # Zx2 - Zy2 + Cx
    # Zx2=Zx*Zx
    fmul.d  fs5, fs3, fs3       # Zx * Zx
    # Zy2=Zy*Zy
    fmul.d  fs6, fs4, fs4       # Zy * Zy

    addi    s4, s4, 1           # Iteration++
    j       3b
3: # end for (Iteration=0;...;Iteration++)
    # This is already loaded above
    # la      t0, IterationMax
    # lw      a0, (t0)
    # (Iteration==IterationMax)
    bne     s4, a0, 3f          # Break if !=
    addi    t1, sp, 112         # color[0:2]
    sb      zero, 0(t1)         # color[0] = 0
    sb      zero, 1(t1)         # color[1] = 0
    sb      zero, 2(t1)         # color[2] = 0
    j       4f                  # Skip over else
3:  # else
    addi    t1, sp, 112         # color[0:2]
    li      t2, 255
    sb      t2, 0(t1)           # color[0] = 255
    sb      t2, 1(t1)           # color[1] = 255
    sb      t2, 2(t1)           # color[2] = 255
4:
    # fwrite(color, 1, 3, fp)
    addi    a0, sp, 112         # color
    li      a1, 1               # 1 byte size
    li      a2, 3               # 3 bytes members
    mv      a3, s1              # fp
    call    fwrite

    addi    s2, s2, 1           # iX++
    j       2b
2:
    addi    s3, s3, 1           # iY++
    j       1b
1:

    # Close the file
    mv      a0, s1      # s1 is fp
    call    fclose

    # Leave stack frame
    ld      ra, 0(sp)
    ld      s1, 8(sp)
    ld      s1, 16(sp)
    ld      s1, 24(sp)
    fld     fs1, 32(sp)
    fld     fs2, 40(sp)
    fld     fs3, 48(sp)
    fld     fs4, 56(sp)
    fld     fs5, 64(sp)
    fld     fs6, 72(sp)
    fld     fs7, 80(sp)
    fld     fs8, 88(sp)
    fld     fs9, 96(sp)
    ld      s4, 104(sp)
    addi    sp, sp, 120
    ret

When finished, you will see a new1.ppm file in the directory you executed the program.


Sudoku Solver

#include <stdio.h>
 
void show(int *x)
{
	int i, j;
	for (i = 0; i < 9; i++) {
		if (!(i % 3)) putchar('\n');
		for (j = 0; j < 9; j++)
			printf(j % 3 ? "%2d" : "%3d", *x++);
		putchar('\n');
	}
}
 
int trycell(int *x, int pos)
{
	int row = pos / 9;
	int col = pos % 9;
	int i, j, used = 0;
 
	if (pos == 81) return 1;
	if (x[pos]) return trycell(x, pos + 1);
 
	for (i = 0; i < 9; i++)
		used |= 1 << (x[i * 9 + col] - 1);
 
	for (j = 0; j < 9; j++)
		used |= 1 << (x[row * 9 + j] - 1);
 
	row = row / 3 * 3;
	col = col / 3 * 3;
	for (i = row; i < row + 3; i++)
		for (j = col; j < col + 3; j++)
			used |= 1 << (x[i * 9 + j] - 1);
 
	for (x[pos] = 1; x[pos] <= 9; x[pos]++, used >>= 1)
		if (!(used & 1) && trycell(x, pos + 1)) return 1;
 
	x[pos] = 0;
	return 0;
}
 
void solve(const char *s)
{
	int i, x[81];
	for (i = 0; i < 81; i++)
		x[i] = s[i] >= '1' && s[i] <= '9' ? s[i] - '0' : 0;
 
	if (trycell(x, 0))
		show(x);
	else
		puts("no solution");
}
 
int main(void)
{
	solve(	"5x..7...."
		"6..195..."
		".98....6."
		"8...6...3"
		"4..8.3..1"
		"7...2...6"
		".6....28."
		"...419..5"
		"....8..79"	);
 
	return 0;
}
.section .rodata
twod: .asciz "%2d"
threed: .asciz "%3d"
nosoln: .asciz "no solution"
test_solve: .asciz "5x..7....6..195....98....6.8...6...34..8.3..17...2...6.6....28....419..5....8..79"

.section .text
.global show
show:
    # a0 = int *x
    addi    sp, sp, -32
    sd      ra, 0(sp)
    sd      s1, 8(sp)
    sd      s2, 16(sp)
    sd      s3, 24(sp)

    # int i (s1)
    # int j (s2)
    # int *x (s3)
    mv      s3, a0
    li      s1, 0           # i = 0
1:  # for (i = 0; i < 9; i++)
    #      s1     s1     s1
    li       t0, 9
    bge      s1, t0, 1f      # i < 9
    li       t1, 3
    rem      t2, s1, t1      # i % 3
    bnez     t2, 2f
    li       a0, '\n'
    call     putchar        # putchar('\n')
2:
    li      s2, 0           # j = 0
2:
    li      t0, 9
    bge     s2, t0, 2f      # j < 9
    li      t1, 3
    rem     t2, s2, t1      # j % 3
    beqz    t2, 3f
    la      a0, twod
    j       4f              # skip past else
3:
    la      a0, threed
4:
    lw      a1, 0(s3)       # *x
    addi    s3, s3, 4       # x++ (pointer arithmetic)
    call    printf
    addi    s2, s2, 1       # j++
    j       2b
2:
    li      a0, '\n'
    call    putchar         # putchar('\n')
    addi    s1, s1, 1       # i++
    j       1b
1:


    ld      ra, 0(sp)
    ld      s1, 8(sp)
    ld      s2, 16(sp)
    ld      s3, 24(sp)
    addi    sp, sp, 32
    ret

.global trycell
trycell:
    # a0 = int *x
    # a1 = int pos
    # Stack frame
    # sp + 0: ra
    # sp + 8: int row  (s1)
    # sp + 16: int col (s2)
    # sp + 24: int i   (s3)
    # sp + 32: int j   (s4)
    # sp + 40: int used (s5)
    # sp + 48: int *x   (s6)
    # sp + 56: int pos  (s7)
    # Enter stack frame
    addi    sp, sp, -64
    sd      ra, 0(sp)
    sd      s1, 8(sp)
    sd      s2, 16(sp)
    sd      s3, 24(sp)
    sd      s4, 32(sp)
    sd      s5, 40(sp)
    sd      s6, 48(sp)
    sd      s7, 56(sp)

    mv      s7, a1              # pos
    mv      s6, a0              # int *x

    li      t0, 9
    div     s1, s7, t0          # pos / 9
    rem     s2, s7, t0          # pos % 9
    li      s5, 0               # used = 0

    # -if (pos == 81) return 1;
    li      t0, 81
    bne     s7, t0, 1f
    li      a0, 1               # return 1
    j       999f                # We still have to leave the stack frame
1:
    # -if (x[pos]) return trycell(x, pos + 1)
    slli    t0, s7, 2           # Scale pos by 4
    add     t0, t0, s6          # x + pos
    lw      t1, 0(t0)           # *(x + pos)
    beqz    t1, 1f              # if x[pos] == 0, dont execute if statement
    mv      a0, s6              # x
    addi    a1, s7, 1           # pos + 1
    call    trycell             # trycell(x, pos + 1)
    j       999f                # When we return leave the stack frame
1:

    li      s3, 0               # i = 0
1:
    li      t0, 9
    bge     s3, t0, 1f          # i < 9

    # used |= 1 << ( x[i * 9 + col] - 1)
    mul     t1, s3, t0          # t1 = i * 9
    add     t1, t1, s2          # t1 = i * 9 + col
    slli    t2, t1, 2           # Scale by 4
    add     t2, t2, s6          # x + i * 9 + col
    lw      t3, 0(t2)           # x[i * 9 + col]
    addi    t3, t3, -1          # x[i * 9 + col] - 1
    li      t4, 1
    sll     t4, t4, t3          # 1 << x[i * 9 + col] - 1
    or      s5, s5, t4          # used |= ...

    addi    s3, s3, 1           # i++
    j       1b
1:
    li      s4, 0               # j = 0
1:
    li      t0, 9
    bge     s4, t0, 1f          # j < 9

    # used |= 1 << (x[row * 9 + j] - 1)
    mul     t1, t0, s1          # row * 9
    add     t1, t1, s4          # row * 9 + j
    slli    t2, t1, 2           # scale by 4
    add     t3, t2, s6          # x + row * 9 + j
    lw      t4, 0(t3)           # x[row * 9 + j]
    addi    t4, t4, -1          # x[row * 9 + j] - 1
    li      t0, 1
    sll     t1, t0, t4          # 1 << (x[row*9+j] - 1)
    or      s5, s5, t1          # used |= ...

    addi    s4, s4, 1           # j++
    j       1b
1:
    li      t0, 3
    div     s1, s1, t0          # row / 3
    mul     s1, s1, t0          # row / 3 * 3
    div     s2, s2, t0          # col / 3
    mul     s2, s2, t0          # col * 3
    mv      s3, s1              # i = row
1: # for (i = row; i < row + 3;i++)
    addi    t0, s1, 3           # row + 3
    bge     s3, t0, 1f          # i < row + 3

    mv      s4, s2              # j = col
2: # for (j = col; j < col + 3; j++)
    addi    t0, s2, 3           # col + 3
    bge     s4, t0, 2f          # j < col + 3

    # used |= 1 << (x[i * 9 + j] - 1)
    li      t0, 9
    mul     t1, s3, t0          # i * 9
    add     t1, t1, s4          # i * 9 + j
    slli    t1, t1, 4           # scale by 4
    add     t1, t1, s6          # x + i * 9 + j
    lw      t0, 0(t1)           # x[i * 9 + j]
    addi    t0, t0, -1          # x[i * 9 + j] - 1
    li      t1, 1
    sll     t1, t1, t0          # 1 << (x[i * 9 + j] - 1)
    or      s5, s5, t1          # used |= ...

    addi    s4, s4, 1           # j++
    j       2b
2:

    addi    s3, s3, 1           # i++
    j       1b
1:
    slli    s3, s7, 2           # scale pos by 4
    add     s3, t0, s6          # x[pos]
    li      s4, 1
    sw      s4, 0(s3)           # x[pos] = 1
1: # for (x[pos] = 1; x[pos] <= 9; x[pos]++, used >>= 1)
    li      t2, 9
    lw      s4, 0(s3)           # x[pos]
    bgt     s4, t2, 1f          # x[pos] <= 9

    andi    t0, s5, 1           # used & 1
    bnez    t0, 2f              # !(used & 1)
    mv      a0, s6              # x
    addi    a1, s7, 1           # pos + 1
    call    trycell             # trycell(x, pos + 1)
    beqz    a0, 2f
    li      a0, 1
    j       999f                # return 1
2:
    addi    s4, s4, 1           # x[pos]++
    sw      s4, 0(s3)
    srai    s5, s5, 1           # used >>= 1
    j       1b
1:
    # x[pos] = 0
    sw      zero, 0(s3)         # x[pos] = 0
    mv      a0, zero
999:
    # Leave stack frame
    ld      ra, 0(sp)
    ld      s1, 8(sp)
    ld      s2, 16(sp)
    ld      s3, 24(sp)
    ld      s4, 32(sp)
    ld      s5, 40(sp)
    ld      s6, 48(sp)
    ld      s7, 56(sp)
    addi    sp, sp, 64
    ret


.global solve
solve:
    # a0 = const char *s
    # Enter stack frame
    # sp + 0: ra
    # sp + 8: x[0]
    addi    sp, sp, -336
    sd      ra, 0(sp)

    li      t0, 0           # i = 0
1:
    li      t1, 81
    bge     t0, t1, 1f      # i < 81

    slli    t2, t0, 2       # Scale i by 4
    add     t6, a0, t2      # s[i]
    lw      t4, 0(t6)       # s[i]
    li      a1, '1'
    blt     t4, a1, 2f
    li      a1, '9'
    bgt     t4, a1, 2f
    li      t5, '0'
    sub     t5, t4, t5      # s[i] - '0'
    j       3f
2:
    li      t5, 0
3:
    addi    t6, sp, 8       # sp + 8 is x
    add     t6, t6, t2      # x[i]
    sw      t5, 0(t6)       # x[i] = ...

    addi    t0, t0, 1       # i++
    j       1b
1:
    #-if trycell(x, 0)
    addi    a0, sp, 8       # x
    li      a1, 0           # 0
    call    trycell
    beqz    a0, 1f
    addi    a0, sp, 8
    call    show
    j       2f
1:
    la      a0, nosoln
    call    puts
2:
    ld      ra, 0(sp)
    addi    sp, sp, 336
    ret


.global main
main:
    addi    sp, sp, -8
    sd      ra, 0(sp)
    la      a0, test_solve
    call    solve
    mv      a0, zero
    ld      ra, 0(sp)
    addi    sp, sp, 8
    ret
Example RISC-V AssemblyTable of ContentsWriting RISC-V Assembly