This post is about page 245 - 254 of The Unix Programming Environment.
Stage 3 is when we add variables and functions to the language and this adds a ton of complexity. I think I could rewrite this to be more clear for myself but damn it does get heavy. The logic is now spread out across 5 files which I always find makes things harder for me to understand. A single file is much easier for my brain to wrap itself around so I think that will be the next step that I do before I continue.
I need to merge everything into a single file, rewrite parts of it and do the exercises. But I did transcribe the code properly as I only had a single bug to fix to get everything to work.
The most interesting thing is that they use a linked list for the symbol table and its lovely to see it being used in the wild. I've used a hashmap for a symbol table but here they had to make the data structure themselves. A good mod might be to switch to using a hashmap, this way its not a search but a real look up. The other cool thing is how they've wrapped the functions in a way so that they can send out error messages. I never thought about doing that with built in functions. It's an elegant way of adding error checking while exposing the underlying functions.
I finally learned what those declarations after the function are, they are the types of the function parameters. It is an interesting way of declaring them that I haven't run into before.
Now for the code, this will be in 5 files and I'll also add the make file here.
hoc.h
typedef struct Symbol {
char *name;
short type;
union {
float val;
float (*ptr)();
} u;
struct Symbol *next;
} Symbol;
Symbol *install(), *lookup();
This is the symbol table, it keeps track of the name, type, value which can be a float or a function pointer, and a pointer to the next node.
The header file also exposes 2 functions, the install function adds things to the symbol table while lookup finds an existing symbol, otherwise it will return null.
symbol.c
#include "hoc.h"
#include "y.tab.h"
static Symbol *symlist = 0;
Symbol *lookup(s)
char *s;
{
Symbol *sp;
for (sp = symlist; sp != (Symbol*) 0; sp = sp->next) {
if (strcmp(sp->name, s) == 0) {
return sp;
}
}
return 0;
}
Symbol *install(s, t, d)
char *s;
int t;
float d;
{
Symbol *sp;
char *emalloc();
sp = (Symbol*) emalloc(sizeof(Symbol));
sp->name = emalloc(strlen(s) + 1);
strcpy(sp->name, s);
sp->type = t;
sp->u.val = d;
sp->next = symlist;
symlist = sp;
return sp;
}
char *emalloc(n)
unsigned n;
{
char *p, *malloc();
p = malloc(n);
if (p == 0) {
execerror("out of memory", (char*) 0);
}
return p;
}
This is the implementation of the symbol table. The install command is straightforward function to create a node and add it to the front of the linked list. The lookup function is also pretty straightforward, it is a traversal of the linked list.I really like emalloc. Here malloc is wrapped so that the error of running out of memory will be dealt with properly.
init.c
#include "hoc.h"
#include "y.tab.h"
#include <math.h>
extern float Log(), Log10(), Exp(), Sqrt(), integer();
static struct {
char *name;
float cval;
} consts[] = {
"PI", 3.14159265358979323846,
"E", 2.71828182845904523536,
"GAMMA", 0.57721566490153286060,
"DEG", 57.29577951308232087680,
"PHI", 1.61803398874989484820,
0, 0
};
static struct {
char *name;
float (*func)();
} builtins[] = {
"sin", sin,
"cos", cos,
"atan", atan,
"log", Log,
"log10", Log10,
"exp", Exp,
"sqrt", Sqrt,
"int", integer,
"abs", fabs,
0, 0
};
init()
{
int i;
Symbol *s;
for (i=0;consts[i].name; i++) {
install(consts[i].name, VAR, consts[i].cval);
}
for (i=0;builtins[i].name; i++) {
s = install(builtins[i].name, BLTN, 0.0);
s->u.ptr = builtins[i].func;
}
}
This file will add the constants and functions we want to expose to the symbol table right at the beginning of program.
math.c
#include <math.h>
#include <errno.h>
extern int errno;
float errcheck();
float Log(x)
float x;
{
return errcheck(log(x), "log");
}
float Log10(x)
float x;
{
return errcheck(log10(x), "log10");
}
float Exp(x)
float x;
{
return errcheck(exp(x), "exp");
}
float Sqrt(x)
float x;
{
return errcheck(sqrt(x), "sqrt");
}
float Pow(x, y)
float x, y;
{
return errcheck(pow(x,y), "exponentiation");
}
float integer(x)
float x;
{
return (float)(long)x;
}
float errcheck(d, s)
float d;
char *s;
{
if (errno == EDOM) {
errno = 0;
execerror(s, "argument out of domain");
} else if (errno == ERANGE) {
errno = 0;
execerror(s, "result out of range");
}
return d;
}
This is the implementation of the various builtins that could have errors. This way all of them will get checked for errors.
hoc.y
%{
#include "hoc.h"
extern float Pow();
%}
%union {
float val;
Symbol *sym;
}
%token <val> NUMBER
%token <sym> VAR BLTN UNDEF
%type <val> expr asgn
%right '='
%left '+' '-'
%left '*' '/'
%left '%'
%left UNARYMINUS
%right '^'
%%
list:
| list '\n'
| list ';'
| list asgn ';'
| list asgn '\n'
| list expr ';' { printf("s => \t%.8g\n",$2); }
| list expr '\n' { printf("s => \t%.8g\n",$2); }
| list error '\n' { yyerrok; }
;
asgn: VAR '=' expr { $$ = $1->u.val = $3; $1->type = VAR; }
;
expr: NUMBER { $$ = $1; }
| VAR {
if ($1->type == UNDEF) {
execerror("undefined variable", $1->name);
}
$$ = $1->u.val;
}
| asgn
| BLTN '(' expr ')' { $$ = (*($1->u.ptr))($3); }
| '+' expr %prec UNARYMINUS { $$ = $2; }
| '-' expr %prec UNARYMINUS { $$ = -$2; }
| expr '+' expr { $$ = $1 + $3; }
| expr '-' expr { $$ = $1 - $3; }
| expr '*' expr { $$ = $1 * $3; }
| expr '/' expr {
if ($3 == 0.0) execerror("Division by 0.", "");
$$ = $1 / $3;
}
| expr '%' expr { $$ = fmod($1,$3); }
| expr '^' expr { $$ = Pow($1,$3); }
| '(' expr ')' { $$ = $2; }
;
%%
#include <stdio.h>
#include <ctype.h>
#include <math.h>
#include <signal.h>
#include <setjmp.h>
jmp_buf begin;
char *progname;
int lineno = 1;
main(argc, argv)
char *argv[];
{
int fpecatch();
progname = argv[0];
init();
setjmp(begin);
signal(SIGFPE, fpecatch);
yyparse();
}
execerror(s,t)
char *s, *t;
{
warning (s, t);
longjmp(begin, 0);
}
fpecatch()
{
execerror("floating point exception", (char*)0);
}
yylex()
{
int c;
while ((c=getchar()) == ' ' || c == '\t');
if (c == EOF) return 0;
if (c == '.' || isdigit(c)) {
ungetc(c,stdin);
scanf("%f",&yylval.val);
return NUMBER;
}
if (isalpha(c)) {
Symbol *s;
char sbuf[100];
char *p = sbuf;
do {
*p++ = c;
} while ((c=getchar()) != EOF && isalnum(c));
ungetc(c, stdin);
*p = '\0';
if ((s=lookup(sbuf)) == 0) {
s = install(sbuf, UNDEF, 0.0);
}
yylval.sym = s;
return s->type == UNDEF ? VAR : s->type;
}
if (c == '\n')
{
lineno++;
}
return c;
}
yyerror(s)
char *s;
{
warning(s, (char *) 0);
}
warning(s,t)
char *s, *t;
{
fprintf(stderr, "%s: %s", progname, s);
if (t) fprintf(stderr, " %s", t);
fprintf(stderr, " near line %d\n", lineno);
}
This is the new and improved hoc.y. This file has 2 big areas where there are changes. The first is the grammer rules now take in assignment as an option and have a structure for it.
The second place is that now we read in alpha characters and create variable names out of them. If the variable doesn't exist, we add it to the symbol table, otherwise we get the symbol from the symbol table.
There is a lot of magic in that parser generation. I've written recursive descent parsers and this is definitely cool how that is abstracted away here.
Makefile
YFLAGS = -d
OBJS = hoc.o init.o math.o symbol.o
hoc: $(OBJS)
cc $(OBJS) -o hoc1 -lm
hoc.o: hoc.h
init.o symbol.o : hoc.h y.tab.h
pr:
$pr hoc.y hoc.h init.c math.c symbol.c makefile
clean:
rm -f $(OBJS) y.tab.[ch]
This is the makefile, significantly more involved than before.
Command to run:
$ make
$ ./hoc1
x = 1
y = 3
x*3
s => 3
x+3
s => 4
x+y
s => 4
sin(y)
s => 7.0730253e+27
Very fun to get this far.