From 05d0903db7a5822d046c32c8f294d71923ccbc40 Mon Sep 17 00:00:00 2001
From: David Malcolm <dmalcolm@redhat.com>
Date: Tue, 23 May 2017 07:25:49 -0400
Subject: [PATCH 31/31] FIXME: json cleanups

---
 gcc/json.c | 134 ++++++++++++++++++++++++++++++++++++++++++++++++-------------
 gcc/json.h |   3 +-
 2 files changed, 107 insertions(+), 30 deletions(-)
diff --git a/gcc/json.c b/gcc/json.c
index c9b3d97..4d3e8fa 100644
--- a/gcc/json.c
+++ b/gcc/json.c
@@ -28,7 +28,8 @@ using namespace json;
 
 /* class json::value.  */
 
-/* FIXME.  */
+/* Generate a char * for this json::value tree.
+   The returned value must be freed by the caller.  */
 
 char *
 value::to_str () const
@@ -38,7 +39,9 @@ value::to_str () const
   return xstrdup (pp_formatted_text (&pp));
 }
 
-/* FIXME.  */
+/* Dump this json::value tree to OUTF.
+   No formatting is done.  There are no guarantees about the order
+   in which the key/value pairs of json::objects are printed.  */
 
 void
 value::dump (FILE *outf) const
@@ -49,7 +52,8 @@ value::dump (FILE *outf) const
   pp_flush (&pp);
 }
 
-/* FIXME.  */
+/* If this json::value is a json::object, return it,
+   otherwise return NULL.  */
 
 const object *
 value::as_object () const
@@ -59,7 +63,8 @@ value::as_object () const
   return static_cast <const object *> (this);
 }
 
-/* FIXME.  */
+/* If this json::value is a json::array, return it,
+   otherwise return NULL.  */
 
 const array *
 value::as_array () const
@@ -69,7 +74,8 @@ value::as_array () const
   return static_cast <const array *> (this);
 }
 
-/* FIXME.  */
+/* If this json::value is a json::number, return it,
+   otherwise return NULL.  */
 
 const number *
 value::as_number () const
@@ -79,7 +85,8 @@ value::as_number () const
   return static_cast <const number *> (this);
 }
 
-/* FIXME.  */
+/* If this json::value is a json::string, return it,
+   otherwise return NULL.  */
 
 const string *
 value::as_string () const
@@ -90,7 +97,10 @@ value::as_string () const
 }
 
 /* Attempt to get lookup the value of a key/value pair from this value
-   as if it is an object.
+   as if this value were an object.
+
+   To succeed, THIS must be a json::object, and it must have a key named
+   NAME.
 
    On success, return true and write the value to OUT_VALUE.
    On failure, return false and write an error message to OUT_ERR
@@ -155,7 +165,8 @@ value::get_string_by_key (const char *name, const char *&out_value,
   return true;
 }
 
-/* class json::object, a subclass of json::value.  */
+/* class json::object, a subclass of json::value, representing
+   an unordered collection of key/value pairs.  */
 
 /* json:object's dtor.  */
 
@@ -222,7 +233,8 @@ object::set (const char *key, value *v)
   m_map.put (xstrdup (key), v);
 }
 
-/* class json::array, a subclass of json::value.  */
+/* class json::array, a subclass of json::value, representing
+   an ordered collection of values.  */
 
 /* json::array's dtor.  */
 
@@ -264,7 +276,7 @@ array::clone () const
   return other;
 }
 
-/* class json::number, a subclass of json::value.  */
+/* class json::number, a subclass of json::value, wrapping a double.  */
 
 /* Implementation of json::value::print for json::number.  */
 
@@ -348,16 +360,17 @@ literal::clone () const
   return new literal (m_kind);
 }
 
+
 /* Declarations relating to parsing JSON, all within an
    anonymous namespace.  */
 
 namespace {
 
-/* FIXME.  */
+/* A typedef representing a single unicode character.  */
 
 typedef unsigned unichar;
 
-/* FIXME.  */
+/* An enum for discriminating different kinds of JSON token.  */
 
 enum token_id
 {
@@ -382,7 +395,7 @@ enum token_id
   TOK_NUMBER
 };
 
-/* FIXME.  */
+/* Human-readable descriptions of enum token_id.  */
 
 static const char *token_id_name[] = {
   "error",
@@ -400,15 +413,23 @@ static const char *token_id_name[] = {
   "number"
 };
 
-/* FIXME.  */
+/* Tokens within the JSON lexer.  */
 
 struct token
 {
+  /* The kind of token.  */
   enum token_id id;
+
+  /* The location of this token within the unicode
+     character stream.  */
   int index;
+
   union
   {
+    /* Value for TOK_STRING.  */
     char *string;
+
+    /* Value for TOK_NUMBER.  */
     double number;
   } u;
 };
@@ -468,14 +489,14 @@ class parser
 
 /* Parser implementation.  */
 
-/* FIXME.  */
+/* lexer's ctor.  */
 
 lexer::lexer ()
 : m_buffer (), m_next_char_idx (0), m_num_next_tokens (0)
 {
 }
 
-/* FIXME.  */
+/* Peek the next token.  */
 
 const token *
 lexer::peek ()
@@ -488,7 +509,7 @@ lexer::peek ()
   return &m_next_tokens[0];
 }
 
-/* FIXME.  */
+/* Consume the next token.  */
 
 void
 lexer::consume ()
@@ -514,17 +535,21 @@ lexer::consume ()
 	   sizeof (token) * m_num_next_tokens);
 }
 
-/* FIXME.  */
+/* Add LENGTH bytes of UTF-8 encoded text from UTF8_BUF to this lexer'
+   buffer.  */
 
 void
 lexer::add_utf8 (size_t length, const char *utf8_buf)
 {
-  // FIXME: this assumes Latin-1.
+  /* FIXME: this blithely ignores the niceties of UTF-8 and simply pushes the
+     bytes into the buffer.  */
   for (size_t i = 0; i < length; i++)
     m_buffer.safe_push (utf8_buf[i]);
 }
 
-/* FIXME.  */
+/* Attempt to get the next unicode character from this lexer's buffer.
+   If successful, write it to OUT and return true.
+   Otherwise, return false.  */
 
 bool
 lexer::get_char (unichar &out)
@@ -544,7 +569,9 @@ lexer::unget_char ()
   --m_next_char_idx;
 }
 
-/* FIXME.  */
+/* Print a textual representation of TOK to OUTF.
+   This is intended for debugging the lexer and parser,
+   rather than for user-facing output.  */
 
 void
 lexer::dump_token (FILE *outf, const token *tok)
@@ -609,7 +636,8 @@ lexer::dump_token (FILE *outf, const token *tok)
     }
 }
 
-/* FIXME.  */
+/* Attempt to lex the input buffer, writing the next token to OUT.
+   On errors, TOK_ERROR (or TOK_EOF) is written to OUT.  */
 
 void
 lexer::lex_token (token *out)
@@ -709,7 +737,9 @@ lexer::lex_token (token *out)
     }
 }
 
-/* FIXME.  */
+/* Having consumed an open-quote character from the lexer's buffer, attempt
+   to lex the rest of a JSON string, writing the result to OUT (or TOK_ERROR)
+   if an error occurred.  */
 
 void
 lexer::lex_string (token *out)
@@ -770,7 +800,9 @@ lexer::lex_string (token *out)
   // FIXME: leaks?  have a json_context do the allocation
 }
 
-/* FIXME.  */
+/* Having consumed FIRST_CHAR, an initial digit or '-' character from
+   the lexer's buffer attempt to lex the rest of a JSON number, writing
+   the result to OUT (or TOK_ERROR) if an error occurred.  */
 
 void
 lexer::lex_number (token *out, unichar first_char)
@@ -829,7 +861,7 @@ lexer::rest_of_literal_p (const char *suffix) const
     }
 }
 
-/* FIXME.  */
+/* parser's ctor.  */
 
 parser::parser (char **err_out)
 : m_lexer (), m_err_out (err_out)
@@ -839,7 +871,8 @@ parser::parser (char **err_out)
   *err_out = NULL;
 }
 
-/* FIXME.  */
+/* Add LENGTH bytes of UTF-8 encoded text from UTF8_BUF to this parser's
+   lexer's buffer.  */
 
 void
 parser::add_utf8 (size_t length, const char *utf8_buf)
@@ -996,7 +1029,7 @@ parser::parse_array ()
   return result;
 }
 
-/* FIXME.  */
+/* Consume the next token, issuing an error if it is not of kind TOK_ID.  */
 
 void
 parser::require (enum token_id tok_id)
@@ -1008,7 +1041,12 @@ parser::require (enum token_id tok_id)
   m_lexer.consume ();
 }
 
-/* FIXME.  */
+/* Issue a parsing error.  If this is the first error that has occurred on
+   the parser, store it within the parser's m_err_out (the buffer will
+   eventually need to be free by the caller of the parser).
+   Otherwise the error is discarded.
+
+   TODO: maybe provide a callback so that client code can print all errors?  */
 
 void
 parser::error_at (int index, const char *fmt, ...)
@@ -1067,7 +1105,7 @@ json::parse_utf8_string (const char *utf8, char **err_out)
   return parse_utf8_string (strlen (utf8), utf8, err_out);
 }
 
-
+
 #if CHECKING_P
 
 namespace selftest {
@@ -1095,6 +1133,12 @@ test_parse_string ()
   ASSERT_EQ (JSON_STRING, jv->get_kind ());
   ASSERT_STREQ ("foo", ((json::string *)jv)->get_string ());
   assert_to_str_eq ("\"foo\"", jv);
+
+  json::value *clone = jv->clone ();
+  ASSERT_EQ (JSON_STRING, clone->get_kind ());
+  ASSERT_STREQ ("foo", ((json::string *)clone)->get_string ());
+  assert_to_str_eq ("\"foo\"", clone);
+  delete clone;
   delete jv;
 
   const char *contains_quotes = "\"before \\\"quoted\\\" after\"";
@@ -1104,6 +1148,38 @@ test_parse_string ()
   ASSERT_STREQ ("before \"quoted\" after", ((json::string *)jv)->get_string ());
   assert_to_str_eq (contains_quotes, jv);
   delete jv;
+
+  /* Test of non-ASCII input.  This string is the Japanese word "mojibake",
+     written as C octal-escaped UTF-8.  */
+  const char *mojibake = (/* Opening quote.  */
+			  "\""
+			  /* U+6587 CJK UNIFIED IDEOGRAPH-6587
+			     UTF-8: 0xE6 0x96 0x87
+			     C octal escaped UTF-8: \346\226\207.  */
+			  "\346\226\207"
+			  /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
+			     UTF-8: 0xE5 0xAD 0x97
+			     C octal escaped UTF-8: \345\255\227.  */
+			  "\345\255\227"
+			 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
+			      UTF-8: 0xE5 0x8C 0x96
+			      C octal escaped UTF-8: \345\214\226.  */
+			  "\345\214\226"
+			 /* U+3051 HIRAGANA LETTER KE
+			      UTF-8: 0xE3 0x81 0x91
+			      C octal escaped UTF-8: \343\201\221.  */
+			  "\343\201\221"
+			  /* Closing quote.  */
+			  "\"");
+  jv = parse_utf8_string (mojibake, &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_STRING, jv->get_kind ());
+  /* Result of get_string should be UTF-8 encoded, without quotes.  */
+  ASSERT_STREQ ("\346\226\207" "\345\255\227" "\345\214\226" "\343\201\221",
+		((json::string *)jv)->get_string ());
+  /* Result of dump should be UTF-8 encoded, with quotes.  */
+  assert_to_str_eq (mojibake, jv);
+  delete jv;
 }
 
 /* FIXME.  */
diff --git a/gcc/json.h b/gcc/json.h
index 6e71beb..b6bdb13 100644
--- a/gcc/json.h
+++ b/gcc/json.h
@@ -100,7 +100,8 @@ class value
 			  char *&out_err) const;
 };
 
-/* Subclass of value for objects: key/value pairs.  */
+/* Subclass of value for objects: an unordered collection of
+   key/value pairs.  */
 
 class object : public value
 {
-- 
1.8.5.3