C Declarations

I ordered myself a copy of “Expert C Programming” by Peter van der Linden after reading this article, a “Beginner’s Guide to Linkers and Loaders”. Trying to get myself to grips with C and how it goes from being C to being machine code, apparently van der Linden’s book is an excellent resource for this topic :)

This post is my solution to his “The Piece of Code That Understandeth All Parsing”.

So I’m currently 85 pages into this book and the chapter is talking about the weird, wonderful and ambiguous-seeming world of C declarations. In it, he poses questions such as what does the following mean:

1
char *(*c[10])(int **p)

I didn’t have a clue. He says that the English translation is:

c is an array[0..9] of pointer to a function returning a pointer-to-char

… wat.

Then he sets the challenge of writing code to do this C to English translation for you, giving a diagram of how to do it (which is here) and some pseudo-code.

There is also a code solution to it a few pages ahead and I do have to admit that I looked up some parts of the answer. I’m no C expert yet and C declarations were confusing enough as it was :) However, I think the solution I got holds up well. It’s not perfect but it does an okay job.

"C Declaration Parser" (cdecl.c) download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>

#define MAX_TOKEN_LEN 256
#define MAX_TOKENS    256

#define pop() stack[top--]
#define peek() stack[top]
#define push(s) stack[++top]=s

enum type_tag {
    IDENTIFIER,
    QUALIFIER,
    TYPE
};

struct token {
    char type;
    char string[MAX_TOKEN_LEN];
};

/* holds tokens we read before reading first identifier */
struct token stack[MAX_TOKENS];
int top = -1;

/* holds the token just read */
struct token this;

enum type_tag classify_string() {
    if (!strcmp(this.string, "const")) {
        strcpy(this.string, "read-only");
        return QUALIFIER;
    }

    if (!strcmp(this.string, "volatile")) return QUALIFIER;
    if (!strcmp(this.string, "void"))     return TYPE;
    if (!strcmp(this.string, "char"))     return TYPE;
    if (!strcmp(this.string, "signed"))   return TYPE;
    if (!strcmp(this.string, "unsigned")) return TYPE;
    if (!strcmp(this.string, "short"))    return TYPE;
    if (!strcmp(this.string, "int"))      return TYPE;
    if (!strcmp(this.string, "long"))     return TYPE;
    if (!strcmp(this.string, "float"))    return TYPE;
    if (!strcmp(this.string, "double"))   return TYPE;
    if (!strcmp(this.string, "struct"))   return TYPE;
    if (!strcmp(this.string, "union"))    return TYPE;
    if (!strcmp(this.string, "enum"))     return TYPE;

    return IDENTIFIER;
}

void gettoken(void) {
    char *p = this.string;

    while((*p = getchar()) == ' ');

    if (isalnum(*p)) {
        while(isalnum(*++p = getchar()));
        ungetc(*p, stdin); /* push last non-alnum character back onto input */
        *p = '\0'; /* nul-terminate string */

        this.type = classify_string();
        return;
    }

    if (*p == '*') {
        strcpy(this.string, "pointer to");
        this.type = '*';
        return;
    }

    /*
     * Everything else that remains must be single character stuff, make it a
     * self describing type, nul-terminate it and return.
     */
    this.string[1] = '\0';
    this.type      = *p;
    return;
}

void read_to_first_identifier(void) {
    do {
        gettoken();
        push(this);
    } while(this.type != IDENTIFIER);

    printf("identifier '%s' is a ", this.string);
    pop();
    gettoken();
}

void deal_with_arrays(void) {
    gettoken();
    printf("size %s array of ", this.string);
    gettoken();
    gettoken();
}

void deal_with_function_args(void) {
    gettoken();
    printf("function that returns a ");
}

void deal_with_any_pointers(void) {
    do {
        printf("pointer to a ");
        pop();
    } while(peek().type == '*');
}

void deal_with_declarator(void) {
    if (this.type == '[') deal_with_arrays();
    if (this.type == '(') deal_with_function_args();
    if (this.type == '*') deal_with_any_pointers();

    struct token current;
    while(top > -1) {
        current = peek();

        if (current.type == '(') {
            pop();
            gettoken();
            deal_with_declarator();
        } else {
            pop();
            printf("%s ", current.string);
        }
    }
}

int main(int argc, char** argv) {
    read_to_first_identifier();
    deal_with_declarator();
    printf("\n");

    return 0;
}

So, classify_string and gettoken were more or less copied from the answer but the rest of it is my own doing. Sample compile and run:

$ gcc cdecl.c
$ ./a.out
char *(*c[10])(int **p);
identifier 'c' is a size 10 array of pointer to function that returns a pointer to char

The line that starts with char is the user input. The line immediately after is the output, the C to English translation. Could be worse :)

Really enjoying this book. Will try and post more gems as I come across them.

Comments