From 3948e8aa82a778a4574a65e1d2d20fa9655e4171 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Tue, 23 May 2017 12:53:40 -0400 Subject: [PATCH 01/28] FIXME: add json.c and json.h gcc/ChangeLog: * Makefile.in (OBJS): Add json.o. * json.c: New file. * json.h: New file. * selftest-run-tests.c (selftest::run_tests): Call json_c_tests. * selftest.h (selftest::json_c_tests): New decl. --- gcc/Makefile.in | 1 + gcc/json.c | 1400 ++++++++++++++++++++++++++++++++++++++++++++++ gcc/json.h | 202 +++++++ gcc/selftest-run-tests.c | 1 + gcc/selftest.h | 1 + 5 files changed, 1605 insertions(+) create mode 100644 gcc/json.c create mode 100644 gcc/json.h diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 6e0e55a..d8f61c0 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1366,6 +1366,7 @@ OBJS = \ ira-color.o \ ira-emit.o \ ira-lives.o \ + json.o \ jump.o \ langhooks.o \ lcm.o \ diff --git a/gcc/json.c b/gcc/json.c new file mode 100644 index 0000000..4d3e8fa --- /dev/null +++ b/gcc/json.c @@ -0,0 +1,1400 @@ +/* JSON parsing + Copyright (C) 2017 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "json.h" +#include "pretty-print.h" +#include "selftest.h" + +using namespace json; + +/* class json::value. */ + +/* Generate a char * for this json::value tree. + The returned value must be freed by the caller. */ + +char * +value::to_str () const +{ + pretty_printer pp; + print (&pp); + return xstrdup (pp_formatted_text (&pp)); +} + +/* Dump this json::value tree to OUTF. + No formatting is done. There are no guarantees about the order + in which the key/value pairs of json::objects are printed. */ + +void +value::dump (FILE *outf) const +{ + pretty_printer pp; + pp_buffer (&pp)->stream = outf; + print (&pp); + pp_flush (&pp); +} + +/* If this json::value is a json::object, return it, + otherwise return NULL. */ + +const object * +value::as_object () const +{ + if (get_kind () != JSON_OBJECT) + return NULL; + return static_cast (this); +} + +/* If this json::value is a json::array, return it, + otherwise return NULL. */ + +const array * +value::as_array () const +{ + if (get_kind () != JSON_ARRAY) + return NULL; + return static_cast (this); +} + +/* If this json::value is a json::number, return it, + otherwise return NULL. */ + +const number * +value::as_number () const +{ + if (get_kind () != JSON_NUMBER) + return NULL; + return static_cast (this); +} + +/* If this json::value is a json::string, return it, + otherwise return NULL. */ + +const string * +value::as_string () const +{ + if (get_kind () != JSON_STRING) + return NULL; + return static_cast (this); +} + +/* Attempt to get lookup the value of a key/value pair from this value + as if this value were an object. + + To succeed, THIS must be a json::object, and it must have a key named + NAME. + + On success, return true and write the value to OUT_VALUE. + On failure, return false and write an error message to OUT_ERR + (which must be freed by the caller). */ + +bool +value::get_value_by_key (const char *name, const value *&out_value, + char *&out_err) const +{ + const json::object *obj = as_object (); + if (!obj) + { + out_err = xstrdup ("not an object"); + return false; + } + const json::value *v = obj->get (name); + if (!v) + { + out_err = xasprintf ("missing attribute: \"%s\"", name); + return false; + } + out_value = v; + return true; +} + +/* As value::get_value_by_key, but the value must be a number; + if successful, write it as an int to OUT_VALUE. */ + +bool +value::get_int_by_key (const char *name, int &out_value, char *&out_err) const +{ + const json::value *v; + if (!get_value_by_key (name, v, out_err)) + return false; + const json::number *n = v->as_number (); + if (!n) + { + out_err = xasprintf ("not a number: \"%s\"", name); + return false; + } + out_value = n->get (); + return true; +} + +/* As value::get_value_by_key, but the value must be a string; + if successful, write it as const char * to OUT_VALUE. */ + +bool +value::get_string_by_key (const char *name, const char *&out_value, + char *&out_err) const +{ + const json::value *v; + if (!get_value_by_key (name, v, out_err)) + return false; + const json::string *s = v->as_string (); + if (!s) + { + out_err = xasprintf ("not a string: \"%s\"", name); + return false; + } + out_value = s->get_string (); + return true; +} + +/* class json::object, a subclass of json::value, representing + an unordered collection of key/value pairs. */ + +/* json:object's dtor. */ + +object::~object () +{ + for (map_t::iterator it = m_map.begin (); it != m_map.end (); ++it) + { + free (const_cast ((*it).first)); + delete ((*it).second); + } +} + +/* Implementation of json::value::print for json::object. */ + +void +object::print (pretty_printer *pp) const +{ + /* Note that the order is not guaranteed. */ + pp_character (pp, '{'); + for (map_t::iterator it = m_map.begin (); it != m_map.end (); ++it) + { + if (it != m_map.begin ()) + pp_string (pp, ", "); + const char *key = const_cast ((*it).first); + value *value = (*it).second; + pp_printf (pp, "\"%s\": ", key); // FIXME: escaping? + value->print (pp); + } + pp_character (pp, '}'); +} + +/* Implementation of json::value::clone for json::object. */ + +value * +object::clone () const +{ + object *other = new object (); + for (map_t::iterator it = m_map.begin (); it != m_map.end (); ++it) + { + const char *key = const_cast ((*it).first); + value *value = (*it).second; + other->set (key, value->clone ()); + } + return other; +} + +/* Get the json::value * for KEY, or NULL if the key is not present. */ + +value * +object::get (const char *key) const +{ + value **slot = const_cast (this)->m_map.get (key); + if (slot) + return *slot; + return NULL; +} + +/* Set the json::value * for KEY, taking ownership of VALUE + (and taking a copy of KEY). */ + +void +object::set (const char *key, value *v) +{ + m_map.put (xstrdup (key), v); +} + +/* class json::array, a subclass of json::value, representing + an ordered collection of values. */ + +/* json::array's dtor. */ + +array::~array () +{ + unsigned i; + value *v; + FOR_EACH_VEC_ELT (m_elements, i, v) + delete v; +} + +/* Implementation of json::value::print for json::array. */ + +void +array::print (pretty_printer *pp) const +{ + pp_character (pp, '['); + unsigned i; + value *v; + FOR_EACH_VEC_ELT (m_elements, i, v) + { + if (i) + pp_string (pp, ", "); + v->print (pp); + } + pp_character (pp, ']'); +} + +/* Implementation of json::value::clone for json::array. */ + +value * +array::clone () const +{ + array *other = new array (); + unsigned i; + value *v; + FOR_EACH_VEC_ELT (m_elements, i, v) + other->append (v->clone ()); + return other; +} + +/* class json::number, a subclass of json::value, wrapping a double. */ + +/* Implementation of json::value::print for json::number. */ + +void +number::print (pretty_printer *pp) const +{ + pp_printf (pp, "%i", (int)m_value); // FIXME +} + +/* Implementation of json::value::clone for json::number. */ + +value * +number::clone () const +{ + return new number (m_value); +} + +/* class json::string, a subclass of json::value. */ + +void +string::print (pretty_printer *pp) const +{ + pp_character (pp, '"'); + for (const char *ptr = m_utf8; *ptr; ptr++) + { + char ch = *ptr; + switch (ch) + { + case '\\': + pp_string (pp, "\\n"); + break; + case '"': + pp_string (pp, "\\\""); + break; + + // FIXME: handle other escapes + + default: + pp_character (pp, ch); + } + } + pp_character (pp, '"'); +} + +/* Implementation of json::value::clone for json::string. */ + +value * +string::clone () const +{ + return new string (m_utf8); +} + +/* class json::literal, a subclass of json::value. */ + +/* Implementation of json::value::print for json::literal. */ + +void +literal::print (pretty_printer *pp) const +{ + switch (m_kind) + { + case JSON_TRUE: + pp_string (pp, "true"); + break; + case JSON_FALSE: + pp_string (pp, "false"); + break; + case JSON_NULL: + pp_string (pp, "null"); + break; + default: + gcc_unreachable (); + } +} + +/* Implementation of json::value::clone for json::literal. */ + +value * +literal::clone () const +{ + return new literal (m_kind); +} + + +/* Declarations relating to parsing JSON, all within an + anonymous namespace. */ + +namespace { + +/* A typedef representing a single unicode character. */ + +typedef unsigned unichar; + +/* An enum for discriminating different kinds of JSON token. */ + +enum token_id +{ + TOK_ERROR, + + TOK_EOF, + + /* Punctuation. */ + TOK_OPEN_SQUARE, + TOK_OPEN_CURLY, + TOK_CLOSE_SQUARE, + TOK_CLOSE_CURLY, + TOK_COLON, + TOK_COMMA, + + /* Literal names. */ + TOK_TRUE, + TOK_FALSE, + TOK_NULL, + + TOK_STRING, + TOK_NUMBER +}; + +/* Human-readable descriptions of enum token_id. */ + +static const char *token_id_name[] = { + "error", + "EOF", + "'['", + "'{'", + "']'", + "'}'", + "':'", + "','", + "'true'", + "'false'", + "'null'", + "string", + "number" +}; + +/* Tokens within the JSON lexer. */ + +struct token +{ + /* The kind of token. */ + enum token_id id; + + /* The location of this token within the unicode + character stream. */ + int index; + + union + { + /* Value for TOK_STRING. */ + char *string; + + /* Value for TOK_NUMBER. */ + double number; + } u; +}; + +/* A class for lexing JSON. */ + +class lexer +{ + public: + lexer (); + void add_utf8 (size_t length, const char *utf8_buf); + + const token *peek (); + void consume (); + + private: + bool get_char (unichar &out); + void unget_char (); + static void dump_token (FILE *outf, const token *tok); + void lex_token (token *out); + void lex_string (token *out); + void lex_number (token *out, unichar first_char); + bool rest_of_literal_p (const char *suffix) const; + + private: + auto_vec m_buffer; + int m_next_char_idx; + + static const int MAX_TOKENS = 1; + token m_next_tokens[MAX_TOKENS]; + int m_num_next_tokens; +}; + +/* A class for parsing JSON. */ + +class parser +{ + public: + parser (char **err_out); + void add_utf8 (size_t length, const char *utf8_buf); + value *parse_value (); + object *parse_object (); + array *parse_array (); + + bool seen_error_p () const { return *m_err_out; } + + private: + void require (enum token_id tok_id); + void error_at (int, const char *, ...); + + private: + lexer m_lexer; + char **m_err_out; +}; + +} // anonymous namespace for parsing implementation + +/* Parser implementation. */ + +/* lexer's ctor. */ + +lexer::lexer () +: m_buffer (), m_next_char_idx (0), m_num_next_tokens (0) +{ +} + +/* Peek the next token. */ + +const token * +lexer::peek () +{ + if (m_num_next_tokens == 0) + { + lex_token (&m_next_tokens[0]); + m_num_next_tokens++; + } + return &m_next_tokens[0]; +} + +/* Consume the next token. */ + +void +lexer::consume () +{ + if (m_num_next_tokens == 0) + peek (); + + gcc_assert (m_num_next_tokens > 0); + gcc_assert (m_num_next_tokens <= MAX_TOKENS); + + if (0) + { + fprintf (stderr, "consuming token: "); + dump_token (stderr, &m_next_tokens[0]); + fprintf (stderr, "\n"); + } + + if (m_next_tokens[0].id == TOK_STRING) + free (m_next_tokens[0].u.string); + + m_num_next_tokens--; + memmove (&m_next_tokens[0], &m_next_tokens[1], + sizeof (token) * m_num_next_tokens); +} + +/* Add LENGTH bytes of UTF-8 encoded text from UTF8_BUF to this lexer' + buffer. */ + +void +lexer::add_utf8 (size_t length, const char *utf8_buf) +{ + /* FIXME: this blithely ignores the niceties of UTF-8 and simply pushes the + bytes into the buffer. */ + for (size_t i = 0; i < length; i++) + m_buffer.safe_push (utf8_buf[i]); +} + +/* Attempt to get the next unicode character from this lexer's buffer. + If successful, write it to OUT and return true. + Otherwise, return false. */ + +bool +lexer::get_char (unichar &out) +{ + if (m_next_char_idx >= (int)m_buffer.length ()) + return false; + + out = m_buffer[m_next_char_idx++]; + return true; +} + +/* FIXME. */ + +void +lexer::unget_char () +{ + --m_next_char_idx; +} + +/* Print a textual representation of TOK to OUTF. + This is intended for debugging the lexer and parser, + rather than for user-facing output. */ + +void +lexer::dump_token (FILE *outf, const token *tok) +{ + switch (tok->id) + { + case TOK_ERROR: + fprintf (outf, "TOK_ERROR"); + break; + + case TOK_EOF: + fprintf (outf, "TOK_EOF"); + break; + + case TOK_OPEN_SQUARE: + fprintf (outf, "TOK_OPEN_SQUARE"); + break; + + case TOK_OPEN_CURLY: + fprintf (outf, "TOK_OPEN_CURLY"); + break; + + case TOK_CLOSE_SQUARE: + fprintf (outf, "TOK_CLOSE_SQUARE"); + break; + + case TOK_CLOSE_CURLY: + fprintf (outf, "TOK_CLOSE_CURLY"); + break; + + case TOK_COLON: + fprintf (outf, "TOK_COLON"); + break; + + case TOK_COMMA: + fprintf (outf, "TOK_COMMA"); + break; + + case TOK_TRUE: + fprintf (outf, "TOK_TRUE"); + break; + + case TOK_FALSE: + fprintf (outf, "TOK_FALSE"); + break; + + case TOK_NULL: + fprintf (outf, "TOK_NULL"); + break; + + case TOK_STRING: + fprintf (outf, "TOK_STRING (\"%s\")", tok->u.string); + break; + + case TOK_NUMBER: + fprintf (outf, "TOK_NUMBER (%f)", tok->u.number); + break; + + default: + gcc_unreachable (); + break; + } +} + +/* Attempt to lex the input buffer, writing the next token to OUT. + On errors, TOK_ERROR (or TOK_EOF) is written to OUT. */ + +void +lexer::lex_token (token *out) +{ + /* Skip to next non-whitespace char. */ + unichar next_char; + while (1) + { + out->index = m_next_char_idx; + if (!get_char (next_char)) + { + out->id = TOK_EOF; + return; + } + if (next_char != ' ' && next_char != '\t') + break; + } + + switch (next_char) + { + case '[': + out->id = TOK_OPEN_SQUARE; + break; + + case '{': + out->id = TOK_OPEN_CURLY; + break; + + case ']': + out->id = TOK_CLOSE_SQUARE; + break; + + case '}': + out->id = TOK_CLOSE_CURLY; + break; + + case ':': + out->id = TOK_COLON; + break; + + case ',': + out->id = TOK_COMMA; + break; + + case '"': + lex_string (out); + break; + + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + lex_number (out, next_char); + break; + + case 't': + /* Handle literal "true". */ + if (rest_of_literal_p ("rue")) + { + out->id = TOK_TRUE; + break; + } + else + goto err; + + case 'f': + /* Handle literal "false". */ + if (rest_of_literal_p ("alse")) + { + out->id = TOK_FALSE; + break; + } + else + goto err; + + case 'n': + /* Handle literal "null". */ + if (rest_of_literal_p ("ull")) + { + out->id = TOK_NULL; + break; + } + else + goto err; + + err: + default: + out->id = TOK_ERROR; + break; + } +} + +/* Having consumed an open-quote character from the lexer's buffer, attempt + to lex the rest of a JSON string, writing the result to OUT (or TOK_ERROR) + if an error occurred. */ + +void +lexer::lex_string (token *out) +{ + auto_vec content; + bool still_going = true; + while (still_going) + { + unichar uc; + if (!get_char (uc)) + { + out->id = TOK_ERROR; + return; + } + switch (uc) + { + case '"': + still_going = false; + break; + case '\\': + { + unichar next_char; + if (!get_char (next_char)) + { + out->id = TOK_ERROR; + return; + } + switch (next_char) + { + case '\\': + case '"': + content.safe_push (next_char); + break; + + // FIXME: implement other chars + + default: + out->id = TOK_ERROR; + return; + } + } + break; + + default: + content.safe_push (uc); + break; + } + } + + out->id = TOK_STRING; + + // FIXME: this assume it's all Latin-1 + out->u.string = XNEWVEC (char, content.length () + 1); + for (unsigned i = 0; i < content.length (); i++) + out->u.string[i] = content[i]; + out->u.string[content.length ()] = '\0'; + + // FIXME: leaks? have a json_context do the allocation +} + +/* Having consumed FIRST_CHAR, an initial digit or '-' character from + the lexer's buffer attempt to lex the rest of a JSON number, writing + the result to OUT (or TOK_ERROR) if an error occurred. */ + +void +lexer::lex_number (token *out, unichar first_char) +{ + // FIXME: proper lexing here + + bool negate = false; + double value = 0.0; + if (first_char == '-') + negate = true; + else if (first_char != '0') + value = (first_char - '0'); + + while (1) + { + unichar uc; + if (!get_char (uc)) + break; + if (ISDIGIT (uc)) + { + value *= 10; + value += uc -'0'; + continue; + } + else + { + unget_char (); + break; + } + // FIXME '.' and 'e'/'E' + } + + if (negate) + value = -value; + + out->id = TOK_NUMBER; + out->u.number = value; +} + +/* Determine if the next characters to be lexed match SUFFIX. */ + +bool +lexer::rest_of_literal_p (const char *suffix) const +{ + int suffix_idx = 0; + int buf_idx = m_next_char_idx; + while (1) + { + if (suffix[suffix_idx] == '\0') + return true; + /* FIXME: this assumes ASCII. */ + if (m_buffer[buf_idx] != (unichar)suffix[suffix_idx]) + return false; + buf_idx++; + suffix_idx++; + } +} + +/* parser's ctor. */ + +parser::parser (char **err_out) +: m_lexer (), m_err_out (err_out) +{ + gcc_assert (err_out); + gcc_assert (*err_out == NULL); + *err_out = NULL; +} + +/* Add LENGTH bytes of UTF-8 encoded text from UTF8_BUF to this parser's + lexer's buffer. */ + +void +parser::add_utf8 (size_t length, const char *utf8_buf) +{ + m_lexer.add_utf8 (length, utf8_buf); +} + +/* FIXME. */ + +value * +parser::parse_value () +{ + const token *tok = m_lexer.peek (); + switch (tok->id) + { + case TOK_OPEN_CURLY: + return parse_object (); + + case TOK_STRING: + { + string *result = new string (tok->u.string); + m_lexer.consume (); + return result; + } + + case TOK_OPEN_SQUARE: + return parse_array (); + + case TOK_NUMBER: + { + number *result = new number (tok->u.number); + m_lexer.consume (); + return result; + } + + case TOK_TRUE: + { + literal *result = new literal (JSON_TRUE); + m_lexer.consume (); + return result; + } + + case TOK_FALSE: + { + literal *result = new literal (JSON_FALSE); + m_lexer.consume (); + return result; + } + + case TOK_NULL: + { + literal *result = new literal (JSON_NULL); + m_lexer.consume (); + return result; + } + + default: + error_at (tok->index, "unexpected token: %s", token_id_name[tok->id]); + return NULL; + } +} + +/* FIXME. */ + +object * +parser::parse_object () +{ + require (TOK_OPEN_CURLY); + + object *result = new object (); + + const token *tok = m_lexer.peek (); + if (tok->id == TOK_CLOSE_CURLY) + { + require (TOK_CLOSE_CURLY); + return result; + } + if (tok->id != TOK_STRING) + { + error_at (tok->index, "expected string for object key"); + return result; + } + while (!seen_error_p ()) + { + tok = m_lexer.peek (); + if (tok->id != TOK_STRING) + error_at (tok->index, "expected string for object key"); + char *key = xstrdup (tok->u.string); + m_lexer.consume (); + + require (TOK_COLON); + + value *v = parse_value (); + if (!v) + { + free (key); + return result; + } + result->set (key, v); + free (key); + + tok = m_lexer.peek (); + if (tok->id == TOK_COMMA) + { + m_lexer.consume (); + continue; + } + else + { + require (TOK_CLOSE_CURLY); + break; + } + } + return result; +} + +/* FIXME. */ + +array * +parser::parse_array () +{ + require (TOK_OPEN_SQUARE); + + array *result = new array (); + + const token *tok = m_lexer.peek (); + if (tok->id == TOK_CLOSE_SQUARE) + { + m_lexer.consume (); + return result; + } + + while (!seen_error_p ()) + { + value *v = parse_value (); + if (!v) + return result; + + result->append (v); + + tok = m_lexer.peek (); + if (tok->id == TOK_COMMA) + { + m_lexer.consume (); + continue; + } + else + { + require (TOK_CLOSE_SQUARE); + break; + } + } + + return result; +} + +/* Consume the next token, issuing an error if it is not of kind TOK_ID. */ + +void +parser::require (enum token_id tok_id) +{ + const token *tok = m_lexer.peek (); + if (tok->id != tok_id) + error_at (tok->index, "expected %s; got %s", token_id_name[tok_id], + token_id_name[tok->id]); + m_lexer.consume (); +} + +/* Issue a parsing error. If this is the first error that has occurred on + the parser, store it within the parser's m_err_out (the buffer will + eventually need to be free by the caller of the parser). + Otherwise the error is discarded. + + TODO: maybe provide a callback so that client code can print all errors? */ + +void +parser::error_at (int index, const char *fmt, ...) +{ + va_list ap; + va_start (ap, fmt); + char *formatted = xvasprintf (fmt, ap); + va_end (ap); + + char *msg_with_index = xasprintf ("error at index %i: %s", + index, formatted); + free (formatted); + + if (0) + fprintf (stderr, "%s\n", msg_with_index); + if (*m_err_out == NULL) + *m_err_out = msg_with_index; + else + free (msg_with_index); +} + +/* Attempt to parse the UTF-8 encoded buffer at UTF8_BUF + of the given LENGTH. + If successful, return a non-NULL json::value *. + if there was a problem, return NULL and write an error + message to err_out, which must be freed by the caller. */ + +value * +json::parse_utf8_string (size_t length, const char *utf8_buf, + char **err_out) +{ + gcc_assert (err_out); + gcc_assert (*err_out == NULL); + + parser p (err_out); + p.add_utf8 (length, utf8_buf); + value *result = p.parse_value (); + if (p.seen_error_p ()) + { + gcc_assert (*err_out); + delete result; + return NULL; + } + return result; +} + +/* Attempt to parse the nil-terminated UTF-8 encoded buffer at + UTF8_BUF. + If successful, return a non-NULL json::value *. + if there was a problem, return NULL and write an error + message to err_out, which must be freed by the caller. */ + +value * +json::parse_utf8_string (const char *utf8, char **err_out) +{ + return parse_utf8_string (strlen (utf8), utf8, err_out); +} + + +#if CHECKING_P + +namespace selftest { + +/* Selftests. */ + +/* Verify that JV->to_str () equals EXPECTED_JSON. */ + +static void +assert_to_str_eq (const char *expected_json, json::value *jv) +{ + char *json = jv->to_str (); + ASSERT_STREQ (expected_json, json); + free (json); +} + +/* FIXME. */ + +static void +test_parse_string () +{ + char *err = NULL; + json::value *jv = parse_utf8_string ("\"foo\"", &err); + ASSERT_EQ (NULL, err); + ASSERT_EQ (JSON_STRING, jv->get_kind ()); + ASSERT_STREQ ("foo", ((json::string *)jv)->get_string ()); + assert_to_str_eq ("\"foo\"", jv); + + json::value *clone = jv->clone (); + ASSERT_EQ (JSON_STRING, clone->get_kind ()); + ASSERT_STREQ ("foo", ((json::string *)clone)->get_string ()); + assert_to_str_eq ("\"foo\"", clone); + delete clone; + delete jv; + + const char *contains_quotes = "\"before \\\"quoted\\\" after\""; + jv = parse_utf8_string (contains_quotes, &err); + ASSERT_EQ (NULL, err); + ASSERT_EQ (JSON_STRING, jv->get_kind ()); + ASSERT_STREQ ("before \"quoted\" after", ((json::string *)jv)->get_string ()); + assert_to_str_eq (contains_quotes, jv); + delete jv; + + /* Test of non-ASCII input. This string is the Japanese word "mojibake", + written as C octal-escaped UTF-8. */ + const char *mojibake = (/* Opening quote. */ + "\"" + /* U+6587 CJK UNIFIED IDEOGRAPH-6587 + UTF-8: 0xE6 0x96 0x87 + C octal escaped UTF-8: \346\226\207. */ + "\346\226\207" + /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57 + UTF-8: 0xE5 0xAD 0x97 + C octal escaped UTF-8: \345\255\227. */ + "\345\255\227" + /* U+5316 CJK UNIFIED IDEOGRAPH-5316 + UTF-8: 0xE5 0x8C 0x96 + C octal escaped UTF-8: \345\214\226. */ + "\345\214\226" + /* U+3051 HIRAGANA LETTER KE + UTF-8: 0xE3 0x81 0x91 + C octal escaped UTF-8: \343\201\221. */ + "\343\201\221" + /* Closing quote. */ + "\""); + jv = parse_utf8_string (mojibake, &err); + ASSERT_EQ (NULL, err); + ASSERT_EQ (JSON_STRING, jv->get_kind ()); + /* Result of get_string should be UTF-8 encoded, without quotes. */ + ASSERT_STREQ ("\346\226\207" "\345\255\227" "\345\214\226" "\343\201\221", + ((json::string *)jv)->get_string ()); + /* Result of dump should be UTF-8 encoded, with quotes. */ + assert_to_str_eq (mojibake, jv); + delete jv; +} + +/* FIXME. */ + +static void +test_parse_number () +{ + json::value *jv, *clone; + + char *err = NULL; + jv = parse_utf8_string ("42", &err); + ASSERT_EQ (NULL, err); + ASSERT_EQ (JSON_NUMBER, jv->get_kind ()); + ASSERT_EQ (42.0, ((json::number *)jv)->get ()); + assert_to_str_eq ("42", jv); + clone = jv->clone (); + ASSERT_EQ (JSON_NUMBER, clone->get_kind ()); + delete clone; + delete jv; +} + +/* FIXME. */ + +static void +test_parse_array () +{ + json::value *jv, *clone; + + char *err = NULL; + jv = parse_utf8_string ("[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", &err); + ASSERT_EQ (NULL, err); + ASSERT_EQ (JSON_ARRAY, jv->get_kind ()); + json::array *arr = static_cast (jv); + ASSERT_EQ (10, arr->get_length ()); + for (int i = 0; i < 10; i++) + { + json::value *element = arr->get (i); + ASSERT_EQ (JSON_NUMBER, element->get_kind ()); + ASSERT_EQ (i, ((json::number *)element)->get ()); + } + assert_to_str_eq ("[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", jv); + + clone = jv->clone (); + ASSERT_EQ (JSON_ARRAY, clone->get_kind ()); + arr = static_cast (clone); + ASSERT_EQ (10, arr->get_length ()); + for (int i = 0; i < 10; i++) + { + json::value *element = arr->get (i); + ASSERT_EQ (JSON_NUMBER, element->get_kind ()); + ASSERT_EQ (i, ((json::number *)element)->get ()); + } + assert_to_str_eq ("[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", clone); + delete clone; + + delete jv; +} + +/* FIXME. */ + +static void +test_parse_object () +{ + char *err = NULL; + json::value *jv + = parse_utf8_string ("{\"foo\": \"bar\", \"baz\": [42, 43]}", &err); + ASSERT_EQ (NULL, err); + ASSERT_TRUE (jv != NULL); + ASSERT_EQ (JSON_OBJECT, jv->get_kind ()); + json::object *jo = static_cast (jv); + + json::value *foo_value = jo->get ("foo"); + ASSERT_TRUE (foo_value != NULL); + ASSERT_EQ (JSON_STRING, foo_value->get_kind ()); + ASSERT_STREQ ("bar", ((json::string *)foo_value)->get_string ()); + + json::value *baz_value = jo->get ("baz"); + ASSERT_TRUE (baz_value != NULL); + ASSERT_EQ (JSON_ARRAY, baz_value->get_kind ()); + + json::array *baz_array = (json::array *)baz_value; + ASSERT_EQ (2, baz_array->get_length ()); + + // etc, verify numbers + + // TODO: error-handling + // TODO: partial document + + /* We can't use assert_to_str_eq since ordering is not guaranteed. */ + + json::value *clone = jv->clone (); + ASSERT_EQ (JSON_OBJECT, clone->get_kind ()); + ASSERT_EQ (JSON_STRING, clone->as_object ()->get ("foo")->get_kind ()); + delete clone; + + delete jv; +} + +/* Verify that the literals "true", "false" and "null" are parsed, + dumped, and are clonable. */ + +static void +test_parse_literals () +{ + json::value *jv, *clone; + char *err = NULL; + jv = parse_utf8_string ("true", &err); + ASSERT_EQ (NULL, err); + ASSERT_TRUE (jv != NULL); + ASSERT_EQ (JSON_TRUE, jv->get_kind ()); + assert_to_str_eq ("true", jv); + clone = jv->clone (); + ASSERT_EQ (JSON_TRUE, clone->get_kind ()); + delete clone; + delete jv; + + jv = parse_utf8_string ("false", &err); + ASSERT_EQ (NULL, err); + ASSERT_TRUE (jv != NULL); + ASSERT_EQ (JSON_FALSE, jv->get_kind ()); + assert_to_str_eq ("false", jv); + clone = jv->clone (); + ASSERT_EQ (JSON_FALSE, clone->get_kind ()); + delete clone; + delete jv; + + jv = parse_utf8_string ("null", &err); + ASSERT_EQ (NULL, err); + ASSERT_TRUE (jv != NULL); + ASSERT_EQ (JSON_NULL, jv->get_kind ()); + assert_to_str_eq ("null", jv); + clone = jv->clone (); + ASSERT_EQ (JSON_NULL, clone->get_kind ()); + delete clone; + delete jv; +} + +/* FIXME. */ + +static void +test_parse_jsonrpc () +{ + char *err = NULL; + const char *request + = ("{\"jsonrpc\": \"2.0\", \"method\": \"subtract\"," + " \"params\": [42, 23], \"id\": 1}"); + json::value *jv = parse_utf8_string (request, &err); + ASSERT_EQ (NULL, err); + ASSERT_TRUE (jv != NULL); + delete jv; +} + +/* FIXME. */ + +static void +test_parse_empty_object () +{ + char *err = NULL; + json::value *jv = parse_utf8_string ("{}", &err); + ASSERT_EQ (NULL, err); + ASSERT_TRUE (jv != NULL); + ASSERT_EQ (JSON_OBJECT, jv->get_kind ()); + assert_to_str_eq ("{}", jv); + delete jv; +} + +/* FIXME. */ + +static void +test_error_empty_string () +{ + char *err = NULL; + json::value *jv = parse_utf8_string ("", &err); + ASSERT_STREQ ("error at index 0: unexpected token: EOF", err); + ASSERT_TRUE (jv == NULL); + free (err); +} + +/* FIXME. */ + +static void +test_error_missing_comma () +{ + char *err = NULL; + /* 01234567. */ + const char *json = "[0, 1 2]"; + json::value *jv = parse_utf8_string (json, &err); + ASSERT_STREQ ("error at index 6: expected ']'; got number", + err); + // FIXME: unittest the lexer? + ASSERT_TRUE (jv == NULL); + free (err); +} + +/* Run all of the selftests within this file. */ + +void +json_c_tests () +{ + test_parse_string (); + test_parse_number (); + test_parse_array (); + test_parse_object (); + test_parse_literals (); + test_parse_jsonrpc (); + test_parse_empty_object (); + test_error_empty_string (); + test_error_missing_comma (); + + /* FIXME: tests for roundtripping (noting that we don't preserve + object key ordering). */ + + /* FIXME: cloning. */ +} + +} // namespace selftest + +#endif /* #if CHECKING_P */ diff --git a/gcc/json.h b/gcc/json.h new file mode 100644 index 0000000..b6bdb13 --- /dev/null +++ b/gcc/json.h @@ -0,0 +1,202 @@ +/* JSON parsing + Copyright (C) 2017 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef GCC_JSON_H +#define GCC_JSON_H + +/* Implementation of JSON, a lightweight data-interchange format. + + See http://www.json.org/ + and http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf + + Supports parsing text into a DOM-like tree of json::value *, dumping + json::value * to text. */ + +namespace json +{ + +/* Forward decls of json::value and its subclasses (using indentation + to denote inheritance. */ + +class value; + class object; + class array; + class number; + class string; + class literal; + +/* An enum for discriminating the subclasses of json::value. */ + +enum kind +{ + /* class json::object. */ + JSON_OBJECT, + + /* class json::array. */ + JSON_ARRAY, + + /* class json::number. */ + JSON_NUMBER, + + /* class json::string. */ + JSON_STRING, + + /* class json::literal uses these three values to identify the + particular literal. */ + JSON_TRUE, + JSON_FALSE, + JSON_NULL +}; + +/* Base class of JSON value. */ + +class value +{ + public: + virtual ~value () {} + virtual enum kind get_kind () const = 0; + virtual void print (pretty_printer *pp) const = 0; + + /* Create a deep copy of the value, returning a value which must be + deleted by the caller. */ + virtual value *clone () const = 0; + + char *to_str () const; + void dump (FILE *) const; + + /* Methods for dynamically casting a value to one of the subclasses, + returning NULL if the value is of the wrong kind. */ + const object *as_object () const; + const array *as_array () const; + const number *as_number () const; + const string *as_string () const; + + /* Convenience accessors for attempting to perform key/value lookups + on this value as if it were an json::object. + + On success, return true and write the value to OUT_VALUE. + On failure, return false and write an error message to OUT_ERR + (which must be freed by the caller). */ + bool get_value_by_key (const char *name, const value *&out_value, + char *&out_err) const; + bool get_int_by_key (const char *name, int &out_value, char *&out_err) const; + bool get_string_by_key (const char *name, const char *&out_value, + char *&out_err) const; +}; + +/* Subclass of value for objects: an unordered collection of + key/value pairs. */ + +class object : public value +{ + public: + ~object (); + + enum kind get_kind () const FINAL OVERRIDE { return JSON_OBJECT; } + void print (pretty_printer *pp) const FINAL OVERRIDE; + value *clone () const FINAL OVERRIDE; + + value *get (const char *key) const; + void set (const char *key, value *v); + + private: + typedef hash_map > map_t; + map_t m_map; +}; + +/* Subclass of value for arrays. */ + +class array : public value +{ + public: + ~array (); + + enum kind get_kind () const FINAL OVERRIDE { return JSON_ARRAY; } + void print (pretty_printer *pp) const FINAL OVERRIDE; + value *clone () const FINAL OVERRIDE; + + unsigned get_length () const { return m_elements.length (); } + value *get (int idx) const { return m_elements[idx]; } + void append (value *v) { m_elements.safe_push (v); } + + private: + auto_vec m_elements; +}; + +/* Subclass of value for numbers. */ + +class number : public value +{ + public: + number (double value) : m_value (value) {} + + enum kind get_kind () const FINAL OVERRIDE { return JSON_NUMBER; } + void print (pretty_printer *pp) const FINAL OVERRIDE; + value *clone () const FINAL OVERRIDE; + + double get () const { return m_value; } + + private: + double m_value; +}; + +/* Subclass of value for strings. */ + +class string : public value +{ + public: + string (const char *utf8) : m_utf8 (xstrdup (utf8)) {} + ~string () { free (m_utf8); } + + enum kind get_kind () const FINAL OVERRIDE { return JSON_STRING; } + void print (pretty_printer *pp) const FINAL OVERRIDE; + value *clone () const FINAL OVERRIDE; + + const char *get_string () const { return m_utf8; } + + private: + char *m_utf8; +}; + +/* Subclass of value for the three JSON literals "true", "false", + and "null". */ + +class literal : public value +{ + public: + literal (enum kind kind) : m_kind (kind) {} + + enum kind get_kind () const FINAL OVERRIDE { return m_kind; } + void print (pretty_printer *pp) const FINAL OVERRIDE; + value *clone () const FINAL OVERRIDE; + + private: + enum kind m_kind; +}; + +/* Declarations for parsing JSON to a json::value * tree. */ + +extern value *parse_utf8_string (size_t length, const char *utf8_buf, + char **err_out); +extern value *parse_utf8_string (const char *utf8, char **err_out); + +} // namespace json + +#endif /* GCC_JSON_H */ diff --git a/gcc/selftest-run-tests.c b/gcc/selftest-run-tests.c index f62bc72..b8fe4a1 100644 --- a/gcc/selftest-run-tests.c +++ b/gcc/selftest-run-tests.c @@ -66,6 +66,7 @@ selftest::run_tests () sreal_c_tests (); fibonacci_heap_c_tests (); typed_splay_tree_c_tests (); + json_c_tests (); /* Mid-level data structures. */ input_c_tests (); diff --git a/gcc/selftest.h b/gcc/selftest.h index dad53e9..6b03e39 100644 --- a/gcc/selftest.h +++ b/gcc/selftest.h @@ -183,6 +183,7 @@ extern void ggc_tests_c_tests (); extern void hash_map_tests_c_tests (); extern void hash_set_tests_c_tests (); extern void input_c_tests (); +extern void json_c_tests (); extern void pretty_print_c_tests (); extern void read_rtl_function_c_tests (); extern void rtl_tests_c_tests (); -- 1.8.5.3