Contents
Word Wrapper
This came from Rosetta Code, but to the date of me writing this, there is no RISC-V assembly section for this code.
#include <stdio.h> #include <stdlib.h> #include <string.h> #include <ctype.h> /* nonsensical hyphens to make greedy wrapping method look bad */ const char *string = "In olden times when wishing still helped one, there lived a king " "whose daughters were all beautiful, but the youngest was so beautiful " "that the sun itself, which has seen so much, was astonished whenever " "it shone-in-her-face. Close-by-the-king's castle lay a great dark " "forest, and under an old lime-tree in the forest was a well, and when " "the day was very warm, the king's child went out into the forest and " "sat down by the side of the cool-fountain, and when she was bored she " "took a golden ball, and threw it up on high and caught it, and this " "ball was her favorite plaything."; /* Each but the last of wrapped lines comes with some penalty as the square of the diff between line length and desired line length. If the line is longer than desired length, the penalty is multiplied by 100. This pretty much prohibits the wrapping routine from going over right margin. If is ok to exceed the margin just a little, something like 20 or 40 will do. Knuth uses a per-paragraph penalty for line-breaking in TeX, which is-- unlike what I have here--probably bug-free. */ #define PENALTY_LONG 100 #define PENALTY_SHORT 1 typedef struct word_t { const char *s; int len; } *word; word make_word_list(const char *s, int *n) { int max_n = 0; word words = 0; *n = 0; while (1) { while (*s && isspace(*s)) s++; if (!*s) break; if (*n >= max_n) { if (!(max_n *= 2)) max_n = 2; words = realloc(words, max_n * sizeof(*words)); } words[*n].s = s; while (*s && !isspace(*s)) s++; words[*n].len = s - words[*n].s; (*n) ++; } return words; } int greedy_wrap(word words, int count, int cols, int *breaks) { int score = 0, line, i, j, d; i = j = line = 0; while (1) { if (i == count) { breaks[j++] = i; break; } if (!line) { line = words[i++].len; continue; } if (line + words[i].len < cols) { line += words[i++].len + 1; continue; } breaks[j++] = i; if (i < count) { d = cols - line; if (d > 0) score += PENALTY_SHORT * d * d; else if (d < 0) score += PENALTY_LONG * d * d; } line = 0; } breaks[j] = 0; return score; } void show_wrap(word list, int count, int *breaks) { int i, j; for (i = j = 0; i < count && breaks[i]; i++) { while (j < breaks[i]) { printf("%.*s", list[j].len, list[j].s); if (j < breaks[i] - 1) putchar(' '); j++; } if (breaks[i]) putchar('\n'); } } int main(void) { int len, score, cols; word list = make_word_list(string, &len); int *breaks = malloc(sizeof(int) * (len + 1)); cols = 80; score = greedy_wrap(list, len, cols, breaks); printf("\n== greedy wrap at %d (score %d) ==\n\n", cols, score); show_wrap(list, len, breaks); cols = 32; score = greedy_wrap(list, len, cols, breaks); printf("\n== greedy wrap at %d (score %d) ==\n\n", cols, score); show_wrap(list, len, breaks); return 0; }
#define PENALTY_LONG 100 #define PENALTY_SHORT 1 .section .rodata string: .asciz "In olden times when wishing still helped one, there lived a king whose daughters were all beautiful, but the youngest was so beautiful that the sun itself, which has seen so much, was astonished whenever it shone-in-her-face. Close-by-the-king's castle lay a great dark forest, and under an old lime-tree in the forest was a well, and when the day was very warm, the king's child went out into the forest and sat down by the side of the cool-fountain, and when she was bored she took a golden ball, and threw it up on high and caught it, and this ball was her favorite plaything." greedy_wrap_printf: .asciz "\n== greedy wrap at %d (score %d) ==\n\n" show_wrap_printf: .asciz "%.*s" # NOS Table for word structure # Name Off Size # const char *s 0 8 # len 8 4 # Total size = 16 .section .text # By default, C/C++ will make functions global unless they re declared "static" .global make_word_list make_word_list: # a0 = const char *s # a1 = int *n # Stack frame for make_word_list # ra (d): sp + 0 # s1 (d): sp + 8 (const char *s) # s2 (d): sp + 16 (int *n) # s3 (d): sp + 24 (word words) # s4 (w): sp + 32 (int max_n) # Need 40 bytes, aligned to a multiple of 8 = 40 # Enter stack frame addi sp, sp, -40 sd ra, 0(sp) sd s1, 8(sp) sd s2, 16(sp) sd s3, 24(sp) sd s4, 32(sp) # Copy parameters to saved registers mv s1, a0 # Copy const char *s into s0 mv s2, a1 # Copy int *n into s1 mv s3, zero # words = 0 mv s4, zero # max_n = 0 sw zero, 0(s2) # *n = 0 1: # while 1 2: # while (*s && isspace(*s)) s++; lb a0, 0(s1) # *s beqz a0, 2f # Is *s == 0? call isspace beqz a0, 2f # Break if isspace(*s) == false addi s1, s1, 1 # s++ j 2b # Loop again 2: lb a0, 0(s1) # *s beqz a0, 1f # if (!*s) break; lw a0, 0(s2) # *n blt a0, s4, 2f # if (*n >= max_n) bnez s4, 3f # if (!(max_n *= 2)) li s4, 2 # max_n = 2 j 4f 3: slliw s4, s4, 1 # max_n *= 2 4: mv a0, s3 # words mv a1, s4 # max_n slliw a1, a1, 4 # max_n * 16 call realloc mv s3, a0 # words = realloc(...) 2: lw t0, 0(s2) # *n slliw t0, t0, 4 # *n * 16 add t0, t0, s3 # words[*n] sd s1, 0(t0) # words[*n].s = s 2: # while (*s && !isspace(*s)) s++; lb a0, 0(s1) # *s beqz a0, 2f # while(*s) call isspace bnez a0, 2f # && !isspace(*s) addi s1, s1, 1 # s++ j 2b # loop again 2: lw t2, 0(s2) # *n slliw t0, t2, 4 # *n * 16 add t0, s3, t0 # words[*n] ld t1, 0(t0) # words[*n].s sub t1, s1, t1 # s - words[*n].s sw t1, 8(t0) # words[*n].len = s - words[*n].s addiw t2, t2, 1 # (*n)++ sw t2, 0(s2) # (*n)++ j 1b # loop again 1: # return words mv a0, s3 # words is in s3 right now # Leave stack frame ld ra, 0(sp) ld s1, 8(sp) ld s2, 16(sp) ld s3, 24(sp) ld s4, 32(sp) addi sp, sp, 40 ret .global show_wrap show_wrap: # a0 = word *list # a1 = int count # a2 = int *breaks # Stack frame for show_wrap # sp + 0 = ra # sp + 8 = s1 (word list) # sp + 16 = s2 (int count) # sp + 24 = s3 (int *breaks) # sp + 32 = s4 (i) # sp + 40 = s5 (j) # sp + 48 = s6 (breaks[i]) # Needs 56 bytes # Enter stack frame addi sp, sp, -56 sd ra, 0(sp) sd s1, 8(sp) sd s2, 16(sp) sd s3, 24(sp) sd s4, 32(sp) sd s5, 40(sp) sd s6, 48(sp) mv s1, a0 mv s2, a1 mv s3, a2 li s4, 0 # i = 0 li s5, 0 # j = 0 1: # for loop bge s4, s2, 1f # i < count slli t0, s4, 2 # i * 4 add t1, s3, t0 # breaks[i] lw s6, 0(t1) # breaks[i] beqz s6, 1f # && breaks[i] 2: # while loop bge s5, s6, 2f # Break if j >= breaks[i] slli t0, s5, 4 # j * 16 add t0, t0, s1 # list[j] la a0, show_wrap_printf lw a1, 8(t0) # list[j].len ld a2, 0(t0) # list[j].s call printf addi t0, s6, -1 # breaks[i] - 1 bge s5, t0, 3f # if (j < breaks[i] - 1) li a0, ' ' call putchar 3: addi s5, s5, 1 # j++ j 2b # loop again 2: beqz s6, 3f # if (breaks[i]) li a0, '\n' call putchar # putchar('\n') 3: addi s4, s4, 1 # i++ j 1b # loop again 1: # Leave stack frame ld ra, 0(sp) ld s1, 8(sp) ld s2, 16(sp) ld s3, 24(sp) ld s4, 32(sp) ld s5, 40(sp) ld s6, 48(sp) addi sp, sp, 56 ret .global greedy_wrap greedy_wrap: # a0 = word words # a1 = int count # a2 = int cols # a3 = int *breaks # There are no function calls within greedy_wrap, so no # stack frame is necessary # t0 = score # t1 = line # t2 = i # t3 = j # t4 = d li t0, 0 # score = 0 li t1, 0 # line = 0 li t2, 0 # i = 0 li t3, 0 # j = 0 1: # while (1) bne t2, a1, 2f # if (i == count) slli t5, t3, 2 # breaks[j] add t5, t5, a3 # breaks[j] sw t2, 0(t5) # breaks[j] = i addi t3, t3, 1 # j++ j 1f # break 2: bnez t1, 2f # if (!line) slli t5, t2, 4 # i * 16 add t5, t5, a0 # words[i] lw t1, 8(t5) # line = words[i].len addi t2, t2, 1 # i++ j 1b # continue 2: slli t5, t2, 4 # i * 16 add t5, t5, a0 # words[i] lw a5, 8(t5) # words[i].len add t6, t1, a5 # line + words[i].len bge t6, a2, 2f # if (line + words[i].len < cols) addi a5, a5, 1 # words[i].len + 1 add t1, t1, a5 # line += words[i].len + 1 addi t2, t2, 1 # i++ j 1b # continue 2: slli t5, t3, 2 # breaks[j] add t5, t5, a3 # breaks[j] sw t2, 0(t5) # breaks[j] = i addi t3, t3, 1 # j++ bge t2, a1, 2f # if (i < count) sub t4, a2, t1 # d = cols - line blez t4, 3f # if (d > 0) # score += PENALTY_SHORT * d * d li a5, PENALTY_SHORT mul t4, t4, t4 # d * d mul t4, a5, t4 # PENALTY_SHORT * d * d add t0, t0, t4 # score += PENALTY_SHORT * d * d j 2f 3: beqz t4, 2f # else if (d < 0) # score += PENALTY_LONG * d * d li a5, PENALTY_LONG mul t4, t4, t4 # d * d mul t4, a5, t4 # PENALTY_SHORT * d * d add t0, t0, t4 # score += PENALTY_SHORT * d * d 2: li t1, 0 # line = 0 j 1b # loop again 1: slli t5, t3, 2 # breaks[j] add t5, t5, a3 # breaks[j] sw zero, 0(t5) # breaks[j] = 0 mv a0, t0 # return score ret .global main main: # tail cmain # Stack frame for main # ra (d): sp + 0 # s1 (d): sp + 8 (int len) # s2 (d): sp + 16 (int score) # s3 (d): sp + 24 (int cols) # s4 (d): sp + 32 (word list) # s5 (d): sp + 40 (int *breaks) # &len : sp + 48 # Need 56 bytes addi sp, sp, -56 sd ra, 0(sp) sd s1, 8(sp) sd s2, 16(sp) sd s3, 24(sp) sd s4, 32(sp) sd s5, 40(sp) la a0, string # string addi a1, sp, 48 # &len call make_word_list lw s1, 48(sp) # get the value of len mv s4, a0 # list = ... addi a0, s1, 1 # len + 1 slli a0, a0, 2 # 4 * (len + 1) call malloc mv s5, a0 # int * breaks = malloc(...) # With cols = 80 mv a0, s4 mv a1, s1 # len li a2, 80 # cols = 80 mv a3, s5 # breaks call greedy_wrap mv a2, a0 # score la a0, greedy_wrap_printf li a1, 80 # cols = 80 call printf mv a0, s4 # list mv a1, s1 # len mv a2, s5 # breaks call show_wrap # With cols = 32 mv a0, s4 mv a1, s1 # len li a2, 32 # cols = 32 mv a3, s5 # breaks call greedy_wrap mv a2, a0 # score la a0, greedy_wrap_printf li a1, 32 # cols = 32 call printf mv a0, s4 # list mv a1, s1 # len mv a2, s5 # breaks call show_wrap # Leave stack frame ld ra, 0(sp) ld s1, 8(sp) ld s2, 16(sp) ld s3, 24(sp) ld s4, 32(sp) ld s5, 40(sp) addi sp, sp, 56 li a0, 0 # return 0 ret
Mandlebrot Set
/* c program: -------------------------------- 1. draws Mandelbrot set for Fc(z)=z*z +c using Mandelbrot algorithm ( boolean escape time ) ------------------------------- 2. technique of creating ppm file is based on the code of Claudio Rocchini http://en.wikipedia.org/wiki/Image:Color_complex_plot.jpg create 24 bit color graphic file , portable pixmap file = PPM see http://en.wikipedia.org/wiki/Portable_pixmap to see the file use external application ( graphic viewer) */ #include <stdio.h> #include <math.h> int main() { /* screen ( integer) coordinate */ int iX,iY; const int iXmax = 800; const int iYmax = 800; /* world ( double) coordinate = parameter plane*/ double Cx,Cy; const double CxMin=-2.5; const double CxMax=1.5; const double CyMin=-2.0; const double CyMax=2.0; /* */ double PixelWidth=(CxMax-CxMin)/iXmax; double PixelHeight=(CyMax-CyMin)/iYmax; /* color component ( R or G or B) is coded from 0 to 255 */ /* it is 24 bit color RGB file */ const int MaxColorComponentValue=255; FILE * fp; char *filename="new1.ppm"; char *comment="# ";/* comment should start with # */ static unsigned char color[3]; /* Z=Zx+Zy*i ; Z0 = 0 */ double Zx, Zy; double Zx2, Zy2; /* Zx2=Zx*Zx; Zy2=Zy*Zy */ /* */ int Iteration; const int IterationMax=200; /* bail-out value , radius of circle ; */ const double EscapeRadius=2; double ER2=EscapeRadius*EscapeRadius; /*create new file,give it a name and open it in binary mode */ fp = fopen(filename,"wb"); /* b - binary mode */ /*write ASCII header to the file*/ fprintf(fp,"P6\n %s\n %d\n %d\n %d\n",comment,iXmax,iYmax,MaxColorComponentValue); /* compute and write image data bytes to the file*/ for(iY=0;iY<iYmax;iY++) { Cy=CyMin + iY*PixelHeight; if (fabs(Cy)< PixelHeight/2) Cy=0.0; /* Main antenna */ for(iX=0;iX<iXmax;iX++) { Cx=CxMin + iX*PixelWidth; /* initial value of orbit = critical point Z= 0 */ Zx=0.0; Zy=0.0; Zx2=Zx*Zx; Zy2=Zy*Zy; /* */ for (Iteration=0;Iteration<IterationMax && ((Zx2+Zy2)<ER2);Iteration++) { Zy=2*Zx*Zy + Cy; Zx=Zx2-Zy2 +Cx; Zx2=Zx*Zx; Zy2=Zy*Zy; }; /* compute pixel color (24 bit = 3 bytes) */ if (Iteration==IterationMax) { /* interior of Mandelbrot set = black */ color[0]=0; color[1]=0; color[2]=0; } else { /* exterior of Mandelbrot set = white */ color[0]=255; /* Red*/ color[1]=255; /* Green */ color[2]=255;/* Blue */ }; /*write color to the file*/ fwrite(color,1,3,fp); } } fclose(fp); return 0; }
.section .rodata filename: .asciz "new1.ppm" filemode: .asciz "wb" comment: .asciz "# " header: .asciz "P6\n %s\n %d\n %d\n %d\n" color_out: .asciz "%d %d %d\n" iXmax: .word 800 iYmax: .word 800 CxMin: .double -2.5 CxMax: .double 1.5 CyMin: .double -2.0 CyMax: .double 2.0 EscapeRadius: .double 2.0 MaxColorComponentValue: .word 255 IterationMax: .word 200 .section .text .global main main: # Since we make function calls, we need to save our state # sp + 0 ra # sp + 8 s1 (FILE *fp) # sp + 16 s2 (iX) # sp + 24 s3 (iY) # sp + 32 fs1 (Cx) # sp + 40 fs2 (Cy) # sp + 48 fs3 (Zx) # sp + 56 fs4 (Zy) # sp + 64 fs5 (Zx2) # sp + 72 fs6 (Zy2) # sp + 80 fs7 (PixelWidth) # sp + 88 fs8 (PixelHeight) # sp + 96 fs9 (ER2) # sp + 104 s4 (Iteration) # sp + 112 color[0:2] (3 bytes) # Enter stack frame addi sp, sp, -120 sd ra, 0(sp) sd s1, 8(sp) sd s1, 16(sp) sd s1, 24(sp) fsd fs1, 32(sp) fsd fs2, 40(sp) fsd fs3, 48(sp) fsd fs4, 56(sp) fsd fs5, 64(sp) fsd fs6, 72(sp) fsd fs7, 80(sp) fsd fs8, 88(sp) fsd fs9, 96(sp) sd s4, 104(sp) # Tons of variables # Calculate PixelWidth (fs7) la t0, CxMin la t1, CxMax la t2, iXmax fld ft0, (t0) fld ft1, (t1) lw a0, (t2) fcvt.d.w ft2, a0 # Typecast iXmax fsub.d fa0, ft1, ft0 # CxMax - CxMin fdiv.d fs7, fa0, ft2 # (CxMax-CxMin)/iXmax # Calculate PixelHeight (fs8) la t0, CyMin la t1, CyMax la t2, iYmax fld ft0, (t0) fld ft1, (t1) lw a0, (t2) fcvt.d.w ft2, a0 # Typecast iYmax fsub.d fa0, ft1, ft0 # CyMax - CyMin fdiv.d fs8, fa0, ft2 # (CyMax-CyMin)/iYmax # Calculate ER2 (fs9) la t0, EscapeRadius fld ft0, (t0) fmul.d fs9, ft0, ft0 # EscapeRadius^2 # Open the file la a0, filename la a1, filemode call fopen mv s1, a0 # Copy fp into s1 # Write the ASCII header #mv a0, s1 # fp la a1, header # header la a2, comment # comment la t3, iXmax lw a3, (t3) # iXmax la t4, iYmax lw a4, (t4) # iYmax la t5, MaxColorComponentValue lw a5, (t5) # MaxColorComponentValue call fprintf li s3, 0 # iY = 0 1: # for (iY=0;iY<iYmax;iY++) # s3 s3 s3 la t6, iYmax lw t4, (t6) # t4 = iYmax bge s3, t4, 1f # iY<iYmax # Cy = CyMin + iY * PixelHeight # fs2 = ft6 + s3 * fs8 la t5, CyMin fld ft6, (t5) # CyMin fcvt.d.w fs2, s3 # Typecast iY fmul.d fs2, fs2, fs8 # iY * PixelHeight fadd.d fs2, fs2, ft6 # CyMin + ... # fabs(Cy) fabs.d fa0, fs2 # fabs(Cy) # fa0 = fabs(Cy) li t0, 2 fcvt.d.w ft0, t0 # 2 -> 2.0 fdiv.d fa1, fs8, ft0 # PixelHeight / 2.0 flt.d t0, fa0, fa1 # fabs(Cy) < PixelHeight/2 beqz t0, 2f fmv.d.x fs2, zero # Cy = 0.0 2: # for (iX = 0;iX<iXmax;iX++) # s2 li s2, 0 2: la t0, iXmax lw t1, (t0) bge s2, t1, 2f # iX < iXmax # Cx = CxMin + iX * PixelWidth # fs1 ft1 ft0 fs7 fcvt.d.w ft0, s2 # Typecast iX la t1, CxMin fld ft1, (t1) fmul.d fs1, ft0, fs7 # iX * PixelWidth fadd.d fs1, fs1, ft1 # CxMin + iX * PixelWidth fmv.d.x fs3, zero # Zx = 0.0 (fs3) fmv.d fs4, fs3 # Zy = 0.0 (fs4) fmv.d fs5, fs3 # Zx2 = 0.0 (fs5) fmv.d fs6, fs3 # Zy2 = 0.0 (fs6) li s4, 0 # Iteration = 0 la t0, IterationMax lw a0, (t0) 3: # for (Iteration=0;Iteration<IterationMax && ((Zx2+Zy2)<ER2);Iteration++) bge s4, a0, 3f # Iteration < IterationMax fadd.d ft0, fs5, fs6 # Zx2 + Zy2 flt.d t0, ft0, fs9 # (Zx2+Zy2)<ER2 beqz t0, 3f # Zy=2*Zx*Zy + Cy fmul.d fs4, fs3, fs4 # Zx * Zy li t1, 2 fcvt.d.w ft1, t1 # Typecast fmul.d fs4, fs4, ft1 # 2 * Zx * Zy fadd.d fs4, fs4, fs2 # 2 * Zx * Zy + Cy # Zx=Zx2-Zy2 +Cx fsub.d fs3, fs5, fs6 # Zx2 - Zy2 fadd.d fs3, fs3, fs1 # Zx2 - Zy2 + Cx # Zx2=Zx*Zx fmul.d fs5, fs3, fs3 # Zx * Zx # Zy2=Zy*Zy fmul.d fs6, fs4, fs4 # Zy * Zy addi s4, s4, 1 # Iteration++ j 3b 3: # end for (Iteration=0;...;Iteration++) # This is already loaded above # la t0, IterationMax # lw a0, (t0) # (Iteration==IterationMax) bne s4, a0, 3f # Break if != addi t1, sp, 112 # color[0:2] sb zero, 0(t1) # color[0] = 0 sb zero, 1(t1) # color[1] = 0 sb zero, 2(t1) # color[2] = 0 j 4f # Skip over else 3: # else addi t1, sp, 112 # color[0:2] li t2, 255 sb t2, 0(t1) # color[0] = 255 sb t2, 1(t1) # color[1] = 255 sb t2, 2(t1) # color[2] = 255 4: # fwrite(color, 1, 3, fp) addi a0, sp, 112 # color li a1, 1 # 1 byte size li a2, 3 # 3 bytes members mv a3, s1 # fp call fwrite addi s2, s2, 1 # iX++ j 2b 2: addi s3, s3, 1 # iY++ j 1b 1: # Close the file mv a0, s1 # s1 is fp call fclose # Leave stack frame ld ra, 0(sp) ld s1, 8(sp) ld s1, 16(sp) ld s1, 24(sp) fld fs1, 32(sp) fld fs2, 40(sp) fld fs3, 48(sp) fld fs4, 56(sp) fld fs5, 64(sp) fld fs6, 72(sp) fld fs7, 80(sp) fld fs8, 88(sp) fld fs9, 96(sp) ld s4, 104(sp) addi sp, sp, 120 ret
When finished, you will see a new1.ppm file in the directory you executed the program.
Sudoku Solver
#include <stdio.h> void show(int *x) { int i, j; for (i = 0; i < 9; i++) { if (!(i % 3)) putchar('\n'); for (j = 0; j < 9; j++) printf(j % 3 ? "%2d" : "%3d", *x++); putchar('\n'); } } int trycell(int *x, int pos) { int row = pos / 9; int col = pos % 9; int i, j, used = 0; if (pos == 81) return 1; if (x[pos]) return trycell(x, pos + 1); for (i = 0; i < 9; i++) used |= 1 << (x[i * 9 + col] - 1); for (j = 0; j < 9; j++) used |= 1 << (x[row * 9 + j] - 1); row = row / 3 * 3; col = col / 3 * 3; for (i = row; i < row + 3; i++) for (j = col; j < col + 3; j++) used |= 1 << (x[i * 9 + j] - 1); for (x[pos] = 1; x[pos] <= 9; x[pos]++, used >>= 1) if (!(used & 1) && trycell(x, pos + 1)) return 1; x[pos] = 0; return 0; } void solve(const char *s) { int i, x[81]; for (i = 0; i < 81; i++) x[i] = s[i] >= '1' && s[i] <= '9' ? s[i] - '0' : 0; if (trycell(x, 0)) show(x); else puts("no solution"); } int main(void) { solve( "5x..7...." "6..195..." ".98....6." "8...6...3" "4..8.3..1" "7...2...6" ".6....28." "...419..5" "....8..79" ); return 0; }
.section .rodata twod: .asciz "%2d" threed: .asciz "%3d" nosoln: .asciz "no solution" test_solve: .asciz "5x..7....6..195....98....6.8...6...34..8.3..17...2...6.6....28....419..5....8..79" .section .text .global show show: # a0 = int *x addi sp, sp, -32 sd ra, 0(sp) sd s1, 8(sp) sd s2, 16(sp) sd s3, 24(sp) # int i (s1) # int j (s2) # int *x (s3) mv s3, a0 li s1, 0 # i = 0 1: # for (i = 0; i < 9; i++) # s1 s1 s1 li t0, 9 bge s1, t0, 1f # i < 9 li t1, 3 rem t2, s1, t1 # i % 3 bnez t2, 2f li a0, '\n' call putchar # putchar('\n') 2: li s2, 0 # j = 0 2: li t0, 9 bge s2, t0, 2f # j < 9 li t1, 3 rem t2, s2, t1 # j % 3 beqz t2, 3f la a0, twod j 4f # skip past else 3: la a0, threed 4: lw a1, 0(s3) # *x addi s3, s3, 4 # x++ (pointer arithmetic) call printf addi s2, s2, 1 # j++ j 2b 2: li a0, '\n' call putchar # putchar('\n') addi s1, s1, 1 # i++ j 1b 1: ld ra, 0(sp) ld s1, 8(sp) ld s2, 16(sp) ld s3, 24(sp) addi sp, sp, 32 ret .global trycell trycell: # a0 = int *x # a1 = int pos # Stack frame # sp + 0: ra # sp + 8: int row (s1) # sp + 16: int col (s2) # sp + 24: int i (s3) # sp + 32: int j (s4) # sp + 40: int used (s5) # sp + 48: int *x (s6) # sp + 56: int pos (s7) # Enter stack frame addi sp, sp, -64 sd ra, 0(sp) sd s1, 8(sp) sd s2, 16(sp) sd s3, 24(sp) sd s4, 32(sp) sd s5, 40(sp) sd s6, 48(sp) sd s7, 56(sp) mv s7, a1 # pos mv s6, a0 # int *x li t0, 9 div s1, s7, t0 # pos / 9 rem s2, s7, t0 # pos % 9 li s5, 0 # used = 0 # -if (pos == 81) return 1; li t0, 81 bne s7, t0, 1f li a0, 1 # return 1 j 999f # We still have to leave the stack frame 1: # -if (x[pos]) return trycell(x, pos + 1) slli t0, s7, 2 # Scale pos by 4 add t0, t0, s6 # x + pos lw t1, 0(t0) # *(x + pos) beqz t1, 1f # if x[pos] == 0, dont execute if statement mv a0, s6 # x addi a1, s7, 1 # pos + 1 call trycell # trycell(x, pos + 1) j 999f # When we return leave the stack frame 1: li s3, 0 # i = 0 1: li t0, 9 bge s3, t0, 1f # i < 9 # used |= 1 << ( x[i * 9 + col] - 1) mul t1, s3, t0 # t1 = i * 9 add t1, t1, s2 # t1 = i * 9 + col slli t2, t1, 2 # Scale by 4 add t2, t2, s6 # x + i * 9 + col lw t3, 0(t2) # x[i * 9 + col] addi t3, t3, -1 # x[i * 9 + col] - 1 li t4, 1 sll t4, t4, t3 # 1 << x[i * 9 + col] - 1 or s5, s5, t4 # used |= ... addi s3, s3, 1 # i++ j 1b 1: li s4, 0 # j = 0 1: li t0, 9 bge s4, t0, 1f # j < 9 # used |= 1 << (x[row * 9 + j] - 1) mul t1, t0, s1 # row * 9 add t1, t1, s4 # row * 9 + j slli t2, t1, 2 # scale by 4 add t3, t2, s6 # x + row * 9 + j lw t4, 0(t3) # x[row * 9 + j] addi t4, t4, -1 # x[row * 9 + j] - 1 li t0, 1 sll t1, t0, t4 # 1 << (x[row*9+j] - 1) or s5, s5, t1 # used |= ... addi s4, s4, 1 # j++ j 1b 1: li t0, 3 div s1, s1, t0 # row / 3 mul s1, s1, t0 # row / 3 * 3 div s2, s2, t0 # col / 3 mul s2, s2, t0 # col * 3 mv s3, s1 # i = row 1: # for (i = row; i < row + 3;i++) addi t0, s1, 3 # row + 3 bge s3, t0, 1f # i < row + 3 mv s4, s2 # j = col 2: # for (j = col; j < col + 3; j++) addi t0, s2, 3 # col + 3 bge s4, t0, 2f # j < col + 3 # used |= 1 << (x[i * 9 + j] - 1) li t0, 9 mul t1, s3, t0 # i * 9 add t1, t1, s4 # i * 9 + j slli t1, t1, 4 # scale by 4 add t1, t1, s6 # x + i * 9 + j lw t0, 0(t1) # x[i * 9 + j] addi t0, t0, -1 # x[i * 9 + j] - 1 li t1, 1 sll t1, t1, t0 # 1 << (x[i * 9 + j] - 1) or s5, s5, t1 # used |= ... addi s4, s4, 1 # j++ j 2b 2: addi s3, s3, 1 # i++ j 1b 1: slli s3, s7, 2 # scale pos by 4 add s3, t0, s6 # x[pos] li s4, 1 sw s4, 0(s3) # x[pos] = 1 1: # for (x[pos] = 1; x[pos] <= 9; x[pos]++, used >>= 1) li t2, 9 lw s4, 0(s3) # x[pos] bgt s4, t2, 1f # x[pos] <= 9 andi t0, s5, 1 # used & 1 bnez t0, 2f # !(used & 1) mv a0, s6 # x addi a1, s7, 1 # pos + 1 call trycell # trycell(x, pos + 1) beqz a0, 2f li a0, 1 j 999f # return 1 2: addi s4, s4, 1 # x[pos]++ sw s4, 0(s3) srai s5, s5, 1 # used >>= 1 j 1b 1: # x[pos] = 0 sw zero, 0(s3) # x[pos] = 0 mv a0, zero 999: # Leave stack frame ld ra, 0(sp) ld s1, 8(sp) ld s2, 16(sp) ld s3, 24(sp) ld s4, 32(sp) ld s5, 40(sp) ld s6, 48(sp) ld s7, 56(sp) addi sp, sp, 64 ret .global solve solve: # a0 = const char *s # Enter stack frame # sp + 0: ra # sp + 8: x[0] addi sp, sp, -336 sd ra, 0(sp) li t0, 0 # i = 0 1: li t1, 81 bge t0, t1, 1f # i < 81 slli t2, t0, 2 # Scale i by 4 add t6, a0, t2 # s[i] lw t4, 0(t6) # s[i] li a1, '1' blt t4, a1, 2f li a1, '9' bgt t4, a1, 2f li t5, '0' sub t5, t4, t5 # s[i] - '0' j 3f 2: li t5, 0 3: addi t6, sp, 8 # sp + 8 is x add t6, t6, t2 # x[i] sw t5, 0(t6) # x[i] = ... addi t0, t0, 1 # i++ j 1b 1: #-if trycell(x, 0) addi a0, sp, 8 # x li a1, 0 # 0 call trycell beqz a0, 1f addi a0, sp, 8 call show j 2f 1: la a0, nosoln call puts 2: ld ra, 0(sp) addi sp, sp, 336 ret .global main main: addi sp, sp, -8 sd ra, 0(sp) la a0, test_solve call solve mv a0, zero ld ra, 0(sp) addi sp, sp, 8 ret