From cc159c2244770332385f9f1a3e893d4cb5498b7e Mon Sep 17 00:00:00 2001 From: Sara Date: Mon, 11 Sep 2023 18:45:33 +0200 Subject: [PATCH] implemented kwil parsing --- src/kwil_parse.c | 282 +++++++++++++++++++++++++++++++++++++++++++++++ src/kwil_parse.h | 17 +++ 2 files changed, 299 insertions(+) create mode 100644 src/kwil_parse.c create mode 100644 src/kwil_parse.h diff --git a/src/kwil_parse.c b/src/kwil_parse.c new file mode 100644 index 0000000..e39e2c4 --- /dev/null +++ b/src/kwil_parse.c @@ -0,0 +1,282 @@ +#include "kwil_parse.h" +#include +#include +#include +#include + +int fpeek(FILE* file) { + int get = fgetc(file); + if(get != EOF) { + ungetc(get, file); + } + return get; +} + +static +int read_line_without_whitespace(FILE* file, char* out_buf, int max_size) { + // int read this loop + int c = 0; + + // initialize writer and end of buffer pointer + char* out_writer = out_buf; + char* out_end = out_buf + max_size; + + do { + c = fgetc(file); + + if(!isspace(c)) { + *out_writer = c; + ++out_writer; + } + } while(!feof(file) && c != '\n' && out_writer < out_end); + + if(out_writer < out_end) { + *out_writer = '\0'; + } + + return out_writer - out_buf; +} + +static +int next_non_whitespace(FILE* file) { + int c = 0; + do { + c = fgetc(file); + } while(isspace(c)); + + return c; +} + +static +int read_typename(FILE* file, char* out_buf, int max_size) { + int c = 0; + char* out_end = out_buf + max_size; + + // initialze buffer to check for "struct" + char buffer[7]; + buffer[6] = '\0'; + + for(int i = 0; i < 6; ++i) { + buffer[i] = fgetc(file); + } + + // next line is not a compatible type declaration, + // write null terminator and return zero + if(strncmp(buffer, "struct", 7) != 0 && strncmp(buffer, "enum", 4) != 0) { + *out_buf = '\0'; + return 0; + } + + // ignore the whitespace after "struct" + c = next_non_whitespace(file); + + // write all text until the next whitespace or '{' to the out buffer + char* out_writer = out_buf; + for(;(isalnum(c) || ispunct(c)) && c != '{' && out_writer < out_end; ++out_writer) { + *out_writer = c; + c = fgetc(file); + } + + if(out_writer < out_end) { + *out_writer = '\0'; + } + + // read until the end of the line, + // setting the file pointer up for parsing the struct + while(c != '\n') { + c = fgetc(file); + } + + return out_writer - out_buf; +} + +void kwil_header_parse(struct kwil_header_t* self) { + printf("Reading top level type definitions for header \"%s\"\n", self->file_name); + + // .h file associated with self + FILE* file = fopen(self->file_name, "r"); + // should be able to store + // KWIL_ENUM() or KWIL_STRUCT() or KWIL_FIELD() + char line_buffer[48]; + line_buffer[47] = '\0'; + struct kwil_struct_t kwil_struct; + struct kwil_enum_t kwil_enum; + struct kwil_type_t type; + + // read the entire file + while(!feof(file)) { + // search for a match with a kwil statemenent + read_line_without_whitespace(file, line_buffer, 47); + + if(strncmp(line_buffer, "KWIL_STRUCT()", 48) == 0) { + // get the typename and update the user + read_typename(file, line_buffer, 47); + printf("\"%s\": {\n", line_buffer); + + // line matches KWIL_STRUCT() instantiate a struct and pass control to struct parser + kwil_struct_parse(&kwil_struct, file); + + // move struct definition into a type struct and store in self + kwil_type_from_struct(&type, &kwil_struct, line_buffer); + kwil_header_add_type(self, &type); + } else if(strncmp(line_buffer, "KWIL_ENUM()", 48) == 0) { + //read typename and update the user + read_typename(file, line_buffer, 47); + printf("\"%s\": [\n", line_buffer); + + // line matches KWIL_ENUM() instantiate an enum and pass control to the enum parser + kwil_enum_parse(&kwil_enum, file); + + // move enum definition into a type struct and register with header + kwil_type_from_enum(&type, &kwil_enum, line_buffer); + kwil_header_add_type(self, &type); + } + } + + // close file after finishing + fclose(file); +} + +void kwil_struct_parse(struct kwil_struct_t *self, FILE *file) { + kwil_init_struct(self); + // create a line buffer with a null terminator + // this should always be treated as a 47-character-long array + // this way the string will always be valid + char line_buffer[48]; + line_buffer[47] = '\0'; + + struct kwil_field_t field; + + do { + // read the next line + read_line_without_whitespace(file, line_buffer, 47); + if(strncmp("KWIL_FIELD()", line_buffer, 12) == 0) { + kwil_field_parse(&field, file); + kwil_struct_add_field(self, &field); + printf("\t\"%s\": \"%s%s\",\n", field.name_str, field.type_str, field.array_length > 0 ? "*" : ""); + } + } while(!feof(file) && line_buffer[0] != '}'); + + // print an end of line to separate structs when declaring fields + printf("}\n"); +} + +static +int kwil_field_parse_split(FILE* file, char* line_buffer, + char** o_words, int* o_word_lengths, int* o_word_count, + int* o_is_array) { + // last character read from file + int c = 0; + char* line_buffer_writer = line_buffer; + size_t word_index = 1; + // the "start" of the line is considered a whitespace character, this way leading whitespace is ignored + int last_was_space = 1; + + do { + c = fgetc(file); + if(c == '*') { + *o_is_array = 2; + } else if(isalnum(c) || c == '_' || c == ']') { + // write non-whitespace characters to the line buffer + *line_buffer_writer = c; + ++line_buffer_writer; + last_was_space = 0; + } else if(c == '[') { + *o_is_array = 1; + // store the start of the next word + o_words[word_index] = line_buffer_writer + 1; + // get the length of the previous word + o_word_lengths[word_index-1] = o_words[word_index] - o_words[word_index-1] - 1; + ++word_index; + // add "\0[" or "\0]" to the line buffer + line_buffer_writer[0] = '\0'; + line_buffer_writer[1] = c; + line_buffer_writer += 2; + } else if(last_was_space == 0) { + // write a null terminator at the end of the last word + *line_buffer_writer = '\0'; + last_was_space = 1; + // store the start of the next word + o_words[word_index] = line_buffer_writer + 1; + // get the length of the previous word + o_word_lengths[word_index-1] = o_words[word_index] - o_words[word_index-1] - 1; + ++word_index; + ++line_buffer_writer; + } + } while(!feof(file) && c != ';'); + + *o_word_count = word_index - 1; + + // consume the line to set up for the next field + while(c != '\n') { + c = fgetc(file); + } + + return line_buffer_writer - line_buffer; +} + +void kwil_field_parse(struct kwil_field_t* self, FILE* file) { + // 47 character long temp buffer for reading from file + char line_buffer[48]; + // array of pointers into line_buffer that point at the starts of words + char* words[5]; + // array of word lengths matched to start_ptrs + int word_lengths[5]; + + // initialize these as 0x0 + memset(words, '\0', sizeof(words)); + memset(word_lengths, '\0', sizeof(word_lengths)); + // the start of the first word matches the start of the line buffer + words[0] = line_buffer; + + // set the terminal element to null + line_buffer[47] = '\0'; + + // Set to true if this field is a pointer or array + // Both dynamic and static sized arrays are supported, + // though dynamic arrays will need to be marked with their minimum length + int is_array = 0; + // length of the entire buffer + int word_count; + int length = kwil_field_parse_split(file, line_buffer, + words, word_lengths, &word_count, &is_array); + line_buffer[length - 1] = '\0'; + + // scan for the length of static arrays + int array_len = 0; + if(is_array == 1) { + sscanf(words[word_count-1], "[%d]", + &array_len); + word_count -= 1; + } + + // get the typename from the split declaration + char* type = words[0]; + if(strncmp("struct", type, 7) == 0 || strncmp("enum", type, 5) == 0) { + words[0][word_lengths[0]] = ' '; + } + + kwil_init_field(self, words[word_count - 1], words[0]); + self->array_length = array_len; + self->array_dynamic = is_array == 2; +} + +void kwil_enum_parse(struct kwil_enum_t* self, FILE* file) { + kwil_init_enum(self); + + char value_buffer[24]; + int length; + value_buffer[23] = '\0'; + + length = read_line_without_whitespace(file, value_buffer, 23); + while(!feof(file) && value_buffer[0] != '}') { + if(value_buffer[length-1] == ',') { + value_buffer[length-1] = '\0'; + } + kwil_enum_add_value(self, value_buffer); + length = read_line_without_whitespace(file, value_buffer, 23); + printf("\t\"%s\",\n", self->enum_values[self->enum_values_len-1].name); + } + + printf("]\n\n"); +} diff --git a/src/kwil_parse.h b/src/kwil_parse.h new file mode 100644 index 0000000..ca223c4 --- /dev/null +++ b/src/kwil_parse.h @@ -0,0 +1,17 @@ +#ifndef _kwil_parse_H +#define _kwil_parse_H + +#include +#include "kwil_def.h" + +// Open a header file and parse it for it's kwil types +extern void kwil_header_parse(struct kwil_header_t* self); +// Read a file until the end of struct definition. +extern void kwil_struct_parse(struct kwil_struct_t* self, FILE* file); +// Parse a field without checking custom types. +// Custom types will not be checked yet as they may not be known to kwel yet. +extern void kwil_field_parse(struct kwil_field_t* self, FILE* file); +// Parse an enum and it's possible values. +extern void kwil_enum_parse(struct kwil_enum_t* self, FILE* file); + +#endif // !_kwil_parse_H