Contents
Word Wrapper
This came from Rosetta Code, but to the date of me writing this, there is no RISC-V assembly section for this code.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
/* nonsensical hyphens to make greedy wrapping method look bad */
const char *string = "In olden times when wishing still helped one, there lived a king "
"whose daughters were all beautiful, but the youngest was so beautiful "
"that the sun itself, which has seen so much, was astonished whenever "
"it shone-in-her-face. Close-by-the-king's castle lay a great dark "
"forest, and under an old lime-tree in the forest was a well, and when "
"the day was very warm, the king's child went out into the forest and "
"sat down by the side of the cool-fountain, and when she was bored she "
"took a golden ball, and threw it up on high and caught it, and this "
"ball was her favorite plaything.";
/* Each but the last of wrapped lines comes with some penalty as the square
of the diff between line length and desired line length. If the line
is longer than desired length, the penalty is multiplied by 100. This
pretty much prohibits the wrapping routine from going over right margin.
If is ok to exceed the margin just a little, something like 20 or 40 will
do.
Knuth uses a per-paragraph penalty for line-breaking in TeX, which is--
unlike what I have here--probably bug-free.
*/
#define PENALTY_LONG 100
#define PENALTY_SHORT 1
typedef struct word_t {
const char *s;
int len;
} *word;
word make_word_list(const char *s, int *n)
{
int max_n = 0;
word words = 0;
*n = 0;
while (1) {
while (*s && isspace(*s)) s++;
if (!*s) break;
if (*n >= max_n) {
if (!(max_n *= 2)) max_n = 2;
words = realloc(words, max_n * sizeof(*words));
}
words[*n].s = s;
while (*s && !isspace(*s)) s++;
words[*n].len = s - words[*n].s;
(*n) ++;
}
return words;
}
int greedy_wrap(word words, int count, int cols, int *breaks)
{
int score = 0, line, i, j, d;
i = j = line = 0;
while (1) {
if (i == count) {
breaks[j++] = i;
break;
}
if (!line) {
line = words[i++].len;
continue;
}
if (line + words[i].len < cols) {
line += words[i++].len + 1;
continue;
}
breaks[j++] = i;
if (i < count) {
d = cols - line;
if (d > 0) score += PENALTY_SHORT * d * d;
else if (d < 0) score += PENALTY_LONG * d * d;
}
line = 0;
}
breaks[j] = 0;
return score;
}
void show_wrap(word list, int count, int *breaks)
{
int i, j;
for (i = j = 0; i < count && breaks[i]; i++) {
while (j < breaks[i]) {
printf("%.*s", list[j].len, list[j].s);
if (j < breaks[i] - 1)
putchar(' ');
j++;
}
if (breaks[i]) putchar('\n');
}
}
int main(void)
{
int len, score, cols;
word list = make_word_list(string, &len);
int *breaks = malloc(sizeof(int) * (len + 1));
cols = 80;
score = greedy_wrap(list, len, cols, breaks);
printf("\n== greedy wrap at %d (score %d) ==\n\n", cols, score);
show_wrap(list, len, breaks);
cols = 32;
score = greedy_wrap(list, len, cols, breaks);
printf("\n== greedy wrap at %d (score %d) ==\n\n", cols, score);
show_wrap(list, len, breaks);
return 0;
}
#define PENALTY_LONG 100
#define PENALTY_SHORT 1
.section .rodata
string: .asciz "In olden times when wishing still helped one, there lived a king whose daughters were all beautiful, but the youngest was so beautiful that the sun itself, which has seen so much, was astonished whenever it shone-in-her-face. Close-by-the-king's castle lay a great dark forest, and under an old lime-tree in the forest was a well, and when the day was very warm, the king's child went out into the forest and sat down by the side of the cool-fountain, and when she was bored she took a golden ball, and threw it up on high and caught it, and this ball was her favorite plaything."
greedy_wrap_printf: .asciz "\n== greedy wrap at %d (score %d) ==\n\n"
show_wrap_printf: .asciz "%.*s"
# NOS Table for word structure
# Name Off Size
# const char *s 0 8
# len 8 4
# Total size = 16
.section .text
# By default, C/C++ will make functions global unless they re declared "static"
.global make_word_list
make_word_list:
# a0 = const char *s
# a1 = int *n
# Stack frame for make_word_list
# ra (d): sp + 0
# s1 (d): sp + 8 (const char *s)
# s2 (d): sp + 16 (int *n)
# s3 (d): sp + 24 (word words)
# s4 (w): sp + 32 (int max_n)
# Need 40 bytes, aligned to a multiple of 8 = 40
# Enter stack frame
addi sp, sp, -40
sd ra, 0(sp)
sd s1, 8(sp)
sd s2, 16(sp)
sd s3, 24(sp)
sd s4, 32(sp)
# Copy parameters to saved registers
mv s1, a0 # Copy const char *s into s0
mv s2, a1 # Copy int *n into s1
mv s3, zero # words = 0
mv s4, zero # max_n = 0
sw zero, 0(s2) # *n = 0
1: # while 1
2: # while (*s && isspace(*s)) s++;
lb a0, 0(s1) # *s
beqz a0, 2f # Is *s == 0?
call isspace
beqz a0, 2f # Break if isspace(*s) == false
addi s1, s1, 1 # s++
j 2b # Loop again
2:
lb a0, 0(s1) # *s
beqz a0, 1f # if (!*s) break;
lw a0, 0(s2) # *n
blt a0, s4, 2f # if (*n >= max_n)
bnez s4, 3f # if (!(max_n *= 2))
li s4, 2 # max_n = 2
j 4f
3:
slliw s4, s4, 1 # max_n *= 2
4:
mv a0, s3 # words
mv a1, s4 # max_n
slliw a1, a1, 4 # max_n * 16
call realloc
mv s3, a0 # words = realloc(...)
2:
lw t0, 0(s2) # *n
slliw t0, t0, 4 # *n * 16
add t0, t0, s3 # words[*n]
sd s1, 0(t0) # words[*n].s = s
2: # while (*s && !isspace(*s)) s++;
lb a0, 0(s1) # *s
beqz a0, 2f # while(*s)
call isspace
bnez a0, 2f # && !isspace(*s)
addi s1, s1, 1 # s++
j 2b # loop again
2:
lw t2, 0(s2) # *n
slliw t0, t2, 4 # *n * 16
add t0, s3, t0 # words[*n]
ld t1, 0(t0) # words[*n].s
sub t1, s1, t1 # s - words[*n].s
sw t1, 8(t0) # words[*n].len = s - words[*n].s
addiw t2, t2, 1 # (*n)++
sw t2, 0(s2) # (*n)++
j 1b # loop again
1:
# return words
mv a0, s3 # words is in s3 right now
# Leave stack frame
ld ra, 0(sp)
ld s1, 8(sp)
ld s2, 16(sp)
ld s3, 24(sp)
ld s4, 32(sp)
addi sp, sp, 40
ret
.global show_wrap
show_wrap:
# a0 = word *list
# a1 = int count
# a2 = int *breaks
# Stack frame for show_wrap
# sp + 0 = ra
# sp + 8 = s1 (word list)
# sp + 16 = s2 (int count)
# sp + 24 = s3 (int *breaks)
# sp + 32 = s4 (i)
# sp + 40 = s5 (j)
# sp + 48 = s6 (breaks[i])
# Needs 56 bytes
# Enter stack frame
addi sp, sp, -56
sd ra, 0(sp)
sd s1, 8(sp)
sd s2, 16(sp)
sd s3, 24(sp)
sd s4, 32(sp)
sd s5, 40(sp)
sd s6, 48(sp)
mv s1, a0
mv s2, a1
mv s3, a2
li s4, 0 # i = 0
li s5, 0 # j = 0
1: # for loop
bge s4, s2, 1f # i < count
slli t0, s4, 2 # i * 4
add t1, s3, t0 # breaks[i]
lw s6, 0(t1) # breaks[i]
beqz s6, 1f # && breaks[i]
2: # while loop
bge s5, s6, 2f # Break if j >= breaks[i]
slli t0, s5, 4 # j * 16
add t0, t0, s1 # list[j]
la a0, show_wrap_printf
lw a1, 8(t0) # list[j].len
ld a2, 0(t0) # list[j].s
call printf
addi t0, s6, -1 # breaks[i] - 1
bge s5, t0, 3f # if (j < breaks[i] - 1)
li a0, ' '
call putchar
3:
addi s5, s5, 1 # j++
j 2b # loop again
2:
beqz s6, 3f # if (breaks[i])
li a0, '\n'
call putchar # putchar('\n')
3:
addi s4, s4, 1 # i++
j 1b # loop again
1:
# Leave stack frame
ld ra, 0(sp)
ld s1, 8(sp)
ld s2, 16(sp)
ld s3, 24(sp)
ld s4, 32(sp)
ld s5, 40(sp)
ld s6, 48(sp)
addi sp, sp, 56
ret
.global greedy_wrap
greedy_wrap:
# a0 = word words
# a1 = int count
# a2 = int cols
# a3 = int *breaks
# There are no function calls within greedy_wrap, so no
# stack frame is necessary
# t0 = score
# t1 = line
# t2 = i
# t3 = j
# t4 = d
li t0, 0 # score = 0
li t1, 0 # line = 0
li t2, 0 # i = 0
li t3, 0 # j = 0
1: # while (1)
bne t2, a1, 2f # if (i == count)
slli t5, t3, 2 # breaks[j]
add t5, t5, a3 # breaks[j]
sw t2, 0(t5) # breaks[j] = i
addi t3, t3, 1 # j++
j 1f # break
2:
bnez t1, 2f # if (!line)
slli t5, t2, 4 # i * 16
add t5, t5, a0 # words[i]
lw t1, 8(t5) # line = words[i].len
addi t2, t2, 1 # i++
j 1b # continue
2:
slli t5, t2, 4 # i * 16
add t5, t5, a0 # words[i]
lw a5, 8(t5) # words[i].len
add t6, t1, a5 # line + words[i].len
bge t6, a2, 2f # if (line + words[i].len < cols)
addi a5, a5, 1 # words[i].len + 1
add t1, t1, a5 # line += words[i].len + 1
addi t2, t2, 1 # i++
j 1b # continue
2:
slli t5, t3, 2 # breaks[j]
add t5, t5, a3 # breaks[j]
sw t2, 0(t5) # breaks[j] = i
addi t3, t3, 1 # j++
bge t2, a1, 2f # if (i < count)
sub t4, a2, t1 # d = cols - line
blez t4, 3f # if (d > 0)
# score += PENALTY_SHORT * d * d
li a5, PENALTY_SHORT
mul t4, t4, t4 # d * d
mul t4, a5, t4 # PENALTY_SHORT * d * d
add t0, t0, t4 # score += PENALTY_SHORT * d * d
j 2f
3:
beqz t4, 2f # else if (d < 0)
# score += PENALTY_LONG * d * d
li a5, PENALTY_LONG
mul t4, t4, t4 # d * d
mul t4, a5, t4 # PENALTY_SHORT * d * d
add t0, t0, t4 # score += PENALTY_SHORT * d * d
2:
li t1, 0 # line = 0
j 1b # loop again
1:
slli t5, t3, 2 # breaks[j]
add t5, t5, a3 # breaks[j]
sw zero, 0(t5) # breaks[j] = 0
mv a0, t0 # return score
ret
.global main
main:
# tail cmain
# Stack frame for main
# ra (d): sp + 0
# s1 (d): sp + 8 (int len)
# s2 (d): sp + 16 (int score)
# s3 (d): sp + 24 (int cols)
# s4 (d): sp + 32 (word list)
# s5 (d): sp + 40 (int *breaks)
# &len : sp + 48
# Need 56 bytes
addi sp, sp, -56
sd ra, 0(sp)
sd s1, 8(sp)
sd s2, 16(sp)
sd s3, 24(sp)
sd s4, 32(sp)
sd s5, 40(sp)
la a0, string # string
addi a1, sp, 48 # &len
call make_word_list
lw s1, 48(sp) # get the value of len
mv s4, a0 # list = ...
addi a0, s1, 1 # len + 1
slli a0, a0, 2 # 4 * (len + 1)
call malloc
mv s5, a0 # int * breaks = malloc(...)
# With cols = 80
mv a0, s4
mv a1, s1 # len
li a2, 80 # cols = 80
mv a3, s5 # breaks
call greedy_wrap
mv a2, a0 # score
la a0, greedy_wrap_printf
li a1, 80 # cols = 80
call printf
mv a0, s4 # list
mv a1, s1 # len
mv a2, s5 # breaks
call show_wrap
# With cols = 32
mv a0, s4
mv a1, s1 # len
li a2, 32 # cols = 32
mv a3, s5 # breaks
call greedy_wrap
mv a2, a0 # score
la a0, greedy_wrap_printf
li a1, 32 # cols = 32
call printf
mv a0, s4 # list
mv a1, s1 # len
mv a2, s5 # breaks
call show_wrap
# Leave stack frame
ld ra, 0(sp)
ld s1, 8(sp)
ld s2, 16(sp)
ld s3, 24(sp)
ld s4, 32(sp)
ld s5, 40(sp)
addi sp, sp, 56
li a0, 0 # return 0
ret
Mandlebrot Set
/*
c program:
--------------------------------
1. draws Mandelbrot set for Fc(z)=z*z +c
using Mandelbrot algorithm ( boolean escape time )
-------------------------------
2. technique of creating ppm file is based on the code of Claudio Rocchini
http://en.wikipedia.org/wiki/Image:Color_complex_plot.jpg
create 24 bit color graphic file , portable pixmap file = PPM
see http://en.wikipedia.org/wiki/Portable_pixmap
to see the file use external application ( graphic viewer)
*/
#include <stdio.h>
#include <math.h>
int main()
{
/* screen ( integer) coordinate */
int iX,iY;
const int iXmax = 800;
const int iYmax = 800;
/* world ( double) coordinate = parameter plane*/
double Cx,Cy;
const double CxMin=-2.5;
const double CxMax=1.5;
const double CyMin=-2.0;
const double CyMax=2.0;
/* */
double PixelWidth=(CxMax-CxMin)/iXmax;
double PixelHeight=(CyMax-CyMin)/iYmax;
/* color component ( R or G or B) is coded from 0 to 255 */
/* it is 24 bit color RGB file */
const int MaxColorComponentValue=255;
FILE * fp;
char *filename="new1.ppm";
char *comment="# ";/* comment should start with # */
static unsigned char color[3];
/* Z=Zx+Zy*i ; Z0 = 0 */
double Zx, Zy;
double Zx2, Zy2; /* Zx2=Zx*Zx; Zy2=Zy*Zy */
/* */
int Iteration;
const int IterationMax=200;
/* bail-out value , radius of circle ; */
const double EscapeRadius=2;
double ER2=EscapeRadius*EscapeRadius;
/*create new file,give it a name and open it in binary mode */
fp = fopen(filename,"wb"); /* b - binary mode */
/*write ASCII header to the file*/
fprintf(fp,"P6\n %s\n %d\n %d\n %d\n",comment,iXmax,iYmax,MaxColorComponentValue);
/* compute and write image data bytes to the file*/
for(iY=0;iY<iYmax;iY++)
{
Cy=CyMin + iY*PixelHeight;
if (fabs(Cy)< PixelHeight/2) Cy=0.0; /* Main antenna */
for(iX=0;iX<iXmax;iX++)
{
Cx=CxMin + iX*PixelWidth;
/* initial value of orbit = critical point Z= 0 */
Zx=0.0;
Zy=0.0;
Zx2=Zx*Zx;
Zy2=Zy*Zy;
/* */
for (Iteration=0;Iteration<IterationMax && ((Zx2+Zy2)<ER2);Iteration++)
{
Zy=2*Zx*Zy + Cy;
Zx=Zx2-Zy2 +Cx;
Zx2=Zx*Zx;
Zy2=Zy*Zy;
};
/* compute pixel color (24 bit = 3 bytes) */
if (Iteration==IterationMax)
{ /* interior of Mandelbrot set = black */
color[0]=0;
color[1]=0;
color[2]=0;
}
else
{ /* exterior of Mandelbrot set = white */
color[0]=255; /* Red*/
color[1]=255; /* Green */
color[2]=255;/* Blue */
};
/*write color to the file*/
fwrite(color,1,3,fp);
}
}
fclose(fp);
return 0;
}
.section .rodata
filename: .asciz "new1.ppm"
filemode: .asciz "wb"
comment: .asciz "# "
header: .asciz "P6\n %s\n %d\n %d\n %d\n"
color_out: .asciz "%d %d %d\n"
iXmax: .word 800
iYmax: .word 800
CxMin: .double -2.5
CxMax: .double 1.5
CyMin: .double -2.0
CyMax: .double 2.0
EscapeRadius: .double 2.0
MaxColorComponentValue: .word 255
IterationMax: .word 200
.section .text
.global main
main:
# Since we make function calls, we need to save our state
# sp + 0 ra
# sp + 8 s1 (FILE *fp)
# sp + 16 s2 (iX)
# sp + 24 s3 (iY)
# sp + 32 fs1 (Cx)
# sp + 40 fs2 (Cy)
# sp + 48 fs3 (Zx)
# sp + 56 fs4 (Zy)
# sp + 64 fs5 (Zx2)
# sp + 72 fs6 (Zy2)
# sp + 80 fs7 (PixelWidth)
# sp + 88 fs8 (PixelHeight)
# sp + 96 fs9 (ER2)
# sp + 104 s4 (Iteration)
# sp + 112 color[0:2] (3 bytes)
# Enter stack frame
addi sp, sp, -120
sd ra, 0(sp)
sd s1, 8(sp)
sd s1, 16(sp)
sd s1, 24(sp)
fsd fs1, 32(sp)
fsd fs2, 40(sp)
fsd fs3, 48(sp)
fsd fs4, 56(sp)
fsd fs5, 64(sp)
fsd fs6, 72(sp)
fsd fs7, 80(sp)
fsd fs8, 88(sp)
fsd fs9, 96(sp)
sd s4, 104(sp)
# Tons of variables
# Calculate PixelWidth (fs7)
la t0, CxMin
la t1, CxMax
la t2, iXmax
fld ft0, (t0)
fld ft1, (t1)
lw a0, (t2)
fcvt.d.w ft2, a0 # Typecast iXmax
fsub.d fa0, ft1, ft0 # CxMax - CxMin
fdiv.d fs7, fa0, ft2 # (CxMax-CxMin)/iXmax
# Calculate PixelHeight (fs8)
la t0, CyMin
la t1, CyMax
la t2, iYmax
fld ft0, (t0)
fld ft1, (t1)
lw a0, (t2)
fcvt.d.w ft2, a0 # Typecast iYmax
fsub.d fa0, ft1, ft0 # CyMax - CyMin
fdiv.d fs8, fa0, ft2 # (CyMax-CyMin)/iYmax
# Calculate ER2 (fs9)
la t0, EscapeRadius
fld ft0, (t0)
fmul.d fs9, ft0, ft0 # EscapeRadius^2
# Open the file
la a0, filename
la a1, filemode
call fopen
mv s1, a0 # Copy fp into s1
# Write the ASCII header
#mv a0, s1 # fp
la a1, header # header
la a2, comment # comment
la t3, iXmax
lw a3, (t3) # iXmax
la t4, iYmax
lw a4, (t4) # iYmax
la t5, MaxColorComponentValue
lw a5, (t5) # MaxColorComponentValue
call fprintf
li s3, 0 # iY = 0
1: # for (iY=0;iY<iYmax;iY++)
# s3 s3 s3
la t6, iYmax
lw t4, (t6) # t4 = iYmax
bge s3, t4, 1f # iY<iYmax
# Cy = CyMin + iY * PixelHeight
# fs2 = ft6 + s3 * fs8
la t5, CyMin
fld ft6, (t5) # CyMin
fcvt.d.w fs2, s3 # Typecast iY
fmul.d fs2, fs2, fs8 # iY * PixelHeight
fadd.d fs2, fs2, ft6 # CyMin + ...
# fabs(Cy)
fabs.d fa0, fs2 # fabs(Cy)
# fa0 = fabs(Cy)
li t0, 2
fcvt.d.w ft0, t0 # 2 -> 2.0
fdiv.d fa1, fs8, ft0 # PixelHeight / 2.0
flt.d t0, fa0, fa1 # fabs(Cy) < PixelHeight/2
beqz t0, 2f
fmv.d.x fs2, zero # Cy = 0.0
2: # for (iX = 0;iX<iXmax;iX++)
# s2
li s2, 0
2:
la t0, iXmax
lw t1, (t0)
bge s2, t1, 2f # iX < iXmax
# Cx = CxMin + iX * PixelWidth
# fs1 ft1 ft0 fs7
fcvt.d.w ft0, s2 # Typecast iX
la t1, CxMin
fld ft1, (t1)
fmul.d fs1, ft0, fs7 # iX * PixelWidth
fadd.d fs1, fs1, ft1 # CxMin + iX * PixelWidth
fmv.d.x fs3, zero # Zx = 0.0 (fs3)
fmv.d fs4, fs3 # Zy = 0.0 (fs4)
fmv.d fs5, fs3 # Zx2 = 0.0 (fs5)
fmv.d fs6, fs3 # Zy2 = 0.0 (fs6)
li s4, 0 # Iteration = 0
la t0, IterationMax
lw a0, (t0)
3: # for (Iteration=0;Iteration<IterationMax && ((Zx2+Zy2)<ER2);Iteration++)
bge s4, a0, 3f # Iteration < IterationMax
fadd.d ft0, fs5, fs6 # Zx2 + Zy2
flt.d t0, ft0, fs9 # (Zx2+Zy2)<ER2
beqz t0, 3f
# Zy=2*Zx*Zy + Cy
fmul.d fs4, fs3, fs4 # Zx * Zy
li t1, 2
fcvt.d.w ft1, t1 # Typecast
fmul.d fs4, fs4, ft1 # 2 * Zx * Zy
fadd.d fs4, fs4, fs2 # 2 * Zx * Zy + Cy
# Zx=Zx2-Zy2 +Cx
fsub.d fs3, fs5, fs6 # Zx2 - Zy2
fadd.d fs3, fs3, fs1 # Zx2 - Zy2 + Cx
# Zx2=Zx*Zx
fmul.d fs5, fs3, fs3 # Zx * Zx
# Zy2=Zy*Zy
fmul.d fs6, fs4, fs4 # Zy * Zy
addi s4, s4, 1 # Iteration++
j 3b
3: # end for (Iteration=0;...;Iteration++)
# This is already loaded above
# la t0, IterationMax
# lw a0, (t0)
# (Iteration==IterationMax)
bne s4, a0, 3f # Break if !=
addi t1, sp, 112 # color[0:2]
sb zero, 0(t1) # color[0] = 0
sb zero, 1(t1) # color[1] = 0
sb zero, 2(t1) # color[2] = 0
j 4f # Skip over else
3: # else
addi t1, sp, 112 # color[0:2]
li t2, 255
sb t2, 0(t1) # color[0] = 255
sb t2, 1(t1) # color[1] = 255
sb t2, 2(t1) # color[2] = 255
4:
# fwrite(color, 1, 3, fp)
addi a0, sp, 112 # color
li a1, 1 # 1 byte size
li a2, 3 # 3 bytes members
mv a3, s1 # fp
call fwrite
addi s2, s2, 1 # iX++
j 2b
2:
addi s3, s3, 1 # iY++
j 1b
1:
# Close the file
mv a0, s1 # s1 is fp
call fclose
# Leave stack frame
ld ra, 0(sp)
ld s1, 8(sp)
ld s1, 16(sp)
ld s1, 24(sp)
fld fs1, 32(sp)
fld fs2, 40(sp)
fld fs3, 48(sp)
fld fs4, 56(sp)
fld fs5, 64(sp)
fld fs6, 72(sp)
fld fs7, 80(sp)
fld fs8, 88(sp)
fld fs9, 96(sp)
ld s4, 104(sp)
addi sp, sp, 120
ret
When finished, you will see a new1.ppm file in the directory you executed the program.
Sudoku Solver
#include <stdio.h>
void show(int *x)
{
int i, j;
for (i = 0; i < 9; i++) {
if (!(i % 3)) putchar('\n');
for (j = 0; j < 9; j++)
printf(j % 3 ? "%2d" : "%3d", *x++);
putchar('\n');
}
}
int trycell(int *x, int pos)
{
int row = pos / 9;
int col = pos % 9;
int i, j, used = 0;
if (pos == 81) return 1;
if (x[pos]) return trycell(x, pos + 1);
for (i = 0; i < 9; i++)
used |= 1 << (x[i * 9 + col] - 1);
for (j = 0; j < 9; j++)
used |= 1 << (x[row * 9 + j] - 1);
row = row / 3 * 3;
col = col / 3 * 3;
for (i = row; i < row + 3; i++)
for (j = col; j < col + 3; j++)
used |= 1 << (x[i * 9 + j] - 1);
for (x[pos] = 1; x[pos] <= 9; x[pos]++, used >>= 1)
if (!(used & 1) && trycell(x, pos + 1)) return 1;
x[pos] = 0;
return 0;
}
void solve(const char *s)
{
int i, x[81];
for (i = 0; i < 81; i++)
x[i] = s[i] >= '1' && s[i] <= '9' ? s[i] - '0' : 0;
if (trycell(x, 0))
show(x);
else
puts("no solution");
}
int main(void)
{
solve( "5x..7...."
"6..195..."
".98....6."
"8...6...3"
"4..8.3..1"
"7...2...6"
".6....28."
"...419..5"
"....8..79" );
return 0;
}
.section .rodata
twod: .asciz "%2d"
threed: .asciz "%3d"
nosoln: .asciz "no solution"
test_solve: .asciz "5x..7....6..195....98....6.8...6...34..8.3..17...2...6.6....28....419..5....8..79"
.section .text
.global show
show:
# a0 = int *x
addi sp, sp, -32
sd ra, 0(sp)
sd s1, 8(sp)
sd s2, 16(sp)
sd s3, 24(sp)
# int i (s1)
# int j (s2)
# int *x (s3)
mv s3, a0
li s1, 0 # i = 0
1: # for (i = 0; i < 9; i++)
# s1 s1 s1
li t0, 9
bge s1, t0, 1f # i < 9
li t1, 3
rem t2, s1, t1 # i % 3
bnez t2, 2f
li a0, '\n'
call putchar # putchar('\n')
2:
li s2, 0 # j = 0
2:
li t0, 9
bge s2, t0, 2f # j < 9
li t1, 3
rem t2, s2, t1 # j % 3
beqz t2, 3f
la a0, twod
j 4f # skip past else
3:
la a0, threed
4:
lw a1, 0(s3) # *x
addi s3, s3, 4 # x++ (pointer arithmetic)
call printf
addi s2, s2, 1 # j++
j 2b
2:
li a0, '\n'
call putchar # putchar('\n')
addi s1, s1, 1 # i++
j 1b
1:
ld ra, 0(sp)
ld s1, 8(sp)
ld s2, 16(sp)
ld s3, 24(sp)
addi sp, sp, 32
ret
.global trycell
trycell:
# a0 = int *x
# a1 = int pos
# Stack frame
# sp + 0: ra
# sp + 8: int row (s1)
# sp + 16: int col (s2)
# sp + 24: int i (s3)
# sp + 32: int j (s4)
# sp + 40: int used (s5)
# sp + 48: int *x (s6)
# sp + 56: int pos (s7)
# Enter stack frame
addi sp, sp, -64
sd ra, 0(sp)
sd s1, 8(sp)
sd s2, 16(sp)
sd s3, 24(sp)
sd s4, 32(sp)
sd s5, 40(sp)
sd s6, 48(sp)
sd s7, 56(sp)
mv s7, a1 # pos
mv s6, a0 # int *x
li t0, 9
div s1, s7, t0 # pos / 9
rem s2, s7, t0 # pos % 9
li s5, 0 # used = 0
# -if (pos == 81) return 1;
li t0, 81
bne s7, t0, 1f
li a0, 1 # return 1
j 999f # We still have to leave the stack frame
1:
# -if (x[pos]) return trycell(x, pos + 1)
slli t0, s7, 2 # Scale pos by 4
add t0, t0, s6 # x + pos
lw t1, 0(t0) # *(x + pos)
beqz t1, 1f # if x[pos] == 0, dont execute if statement
mv a0, s6 # x
addi a1, s7, 1 # pos + 1
call trycell # trycell(x, pos + 1)
j 999f # When we return leave the stack frame
1:
li s3, 0 # i = 0
1:
li t0, 9
bge s3, t0, 1f # i < 9
# used |= 1 << ( x[i * 9 + col] - 1)
mul t1, s3, t0 # t1 = i * 9
add t1, t1, s2 # t1 = i * 9 + col
slli t2, t1, 2 # Scale by 4
add t2, t2, s6 # x + i * 9 + col
lw t3, 0(t2) # x[i * 9 + col]
addi t3, t3, -1 # x[i * 9 + col] - 1
li t4, 1
sll t4, t4, t3 # 1 << x[i * 9 + col] - 1
or s5, s5, t4 # used |= ...
addi s3, s3, 1 # i++
j 1b
1:
li s4, 0 # j = 0
1:
li t0, 9
bge s4, t0, 1f # j < 9
# used |= 1 << (x[row * 9 + j] - 1)
mul t1, t0, s1 # row * 9
add t1, t1, s4 # row * 9 + j
slli t2, t1, 2 # scale by 4
add t3, t2, s6 # x + row * 9 + j
lw t4, 0(t3) # x[row * 9 + j]
addi t4, t4, -1 # x[row * 9 + j] - 1
li t0, 1
sll t1, t0, t4 # 1 << (x[row*9+j] - 1)
or s5, s5, t1 # used |= ...
addi s4, s4, 1 # j++
j 1b
1:
li t0, 3
div s1, s1, t0 # row / 3
mul s1, s1, t0 # row / 3 * 3
div s2, s2, t0 # col / 3
mul s2, s2, t0 # col * 3
mv s3, s1 # i = row
1: # for (i = row; i < row + 3;i++)
addi t0, s1, 3 # row + 3
bge s3, t0, 1f # i < row + 3
mv s4, s2 # j = col
2: # for (j = col; j < col + 3; j++)
addi t0, s2, 3 # col + 3
bge s4, t0, 2f # j < col + 3
# used |= 1 << (x[i * 9 + j] - 1)
li t0, 9
mul t1, s3, t0 # i * 9
add t1, t1, s4 # i * 9 + j
slli t1, t1, 4 # scale by 4
add t1, t1, s6 # x + i * 9 + j
lw t0, 0(t1) # x[i * 9 + j]
addi t0, t0, -1 # x[i * 9 + j] - 1
li t1, 1
sll t1, t1, t0 # 1 << (x[i * 9 + j] - 1)
or s5, s5, t1 # used |= ...
addi s4, s4, 1 # j++
j 2b
2:
addi s3, s3, 1 # i++
j 1b
1:
slli s3, s7, 2 # scale pos by 4
add s3, t0, s6 # x[pos]
li s4, 1
sw s4, 0(s3) # x[pos] = 1
1: # for (x[pos] = 1; x[pos] <= 9; x[pos]++, used >>= 1)
li t2, 9
lw s4, 0(s3) # x[pos]
bgt s4, t2, 1f # x[pos] <= 9
andi t0, s5, 1 # used & 1
bnez t0, 2f # !(used & 1)
mv a0, s6 # x
addi a1, s7, 1 # pos + 1
call trycell # trycell(x, pos + 1)
beqz a0, 2f
li a0, 1
j 999f # return 1
2:
addi s4, s4, 1 # x[pos]++
sw s4, 0(s3)
srai s5, s5, 1 # used >>= 1
j 1b
1:
# x[pos] = 0
sw zero, 0(s3) # x[pos] = 0
mv a0, zero
999:
# Leave stack frame
ld ra, 0(sp)
ld s1, 8(sp)
ld s2, 16(sp)
ld s3, 24(sp)
ld s4, 32(sp)
ld s5, 40(sp)
ld s6, 48(sp)
ld s7, 56(sp)
addi sp, sp, 64
ret
.global solve
solve:
# a0 = const char *s
# Enter stack frame
# sp + 0: ra
# sp + 8: x[0]
addi sp, sp, -336
sd ra, 0(sp)
li t0, 0 # i = 0
1:
li t1, 81
bge t0, t1, 1f # i < 81
slli t2, t0, 2 # Scale i by 4
add t6, a0, t2 # s[i]
lw t4, 0(t6) # s[i]
li a1, '1'
blt t4, a1, 2f
li a1, '9'
bgt t4, a1, 2f
li t5, '0'
sub t5, t4, t5 # s[i] - '0'
j 3f
2:
li t5, 0
3:
addi t6, sp, 8 # sp + 8 is x
add t6, t6, t2 # x[i]
sw t5, 0(t6) # x[i] = ...
addi t0, t0, 1 # i++
j 1b
1:
#-if trycell(x, 0)
addi a0, sp, 8 # x
li a1, 0 # 0
call trycell
beqz a0, 1f
addi a0, sp, 8
call show
j 2f
1:
la a0, nosoln
call puts
2:
ld ra, 0(sp)
addi sp, sp, 336
ret
.global main
main:
addi sp, sp, -8
sd ra, 0(sp)
la a0, test_solve
call solve
mv a0, zero
ld ra, 0(sp)
addi sp, sp, 8
ret