From fa97d47ba54c770ebb8c30d92e3e7d2cabe4f203 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Fri, 22 May 2020 11:42:24 -0400 Subject: [PATCH 175/179] FIXME: add 'touched' flag to clusters --- gcc/analyzer/region-model2.cc | 3 +- gcc/analyzer/store2.cc | 37 +++++++- gcc/analyzer/store2.h | 93 ++++++++++++++++--- gcc/testsuite/gcc.dg/analyzer/data-model-18.c | 4 +- gcc/testsuite/gcc.dg/analyzer/symbolic-1.c | 45 +++++++-- 5 files changed, 156 insertions(+), 26 deletions(-) diff --git a/gcc/analyzer/region-model2.cc b/gcc/analyzer/region-model2.cc index 3f19bc37a6b..2e775335bc5 100644 --- a/gcc/analyzer/region-model2.cc +++ b/gcc/analyzer/region-model2.cc @@ -5417,7 +5417,8 @@ region_model2::get_store_value (const region2 *reg) return m_mgr->get_or_create_cast (reg->get_type (), char_sval); } - /* FIXME: symbolic value. */ + /* FIXME: not if touched by symbolic value; handled in + binding_cluster2::get_any_binding. */ return m_mgr->get_or_create_initial_value (reg); } diff --git a/gcc/analyzer/store2.cc b/gcc/analyzer/store2.cc index de60129018a..5cc47d62c07 100644 --- a/gcc/analyzer/store2.cc +++ b/gcc/analyzer/store2.cc @@ -239,7 +239,7 @@ simplify_for_binding (const svalue2 *sval) binding_cluster2::binding_cluster2 (const binding_cluster2 &other) : m_base_region (other.m_base_region), m_map (other.m_map), - m_escaped (other.m_escaped) + m_escaped (other.m_escaped), m_touched (other.m_touched) { #if 0 for (map_t::iterator iter = other.m_map.begin (); iter != other.m_map.end (); @@ -268,6 +268,7 @@ binding_cluster2::operator= (const binding_cluster2 &other) m_map.put (key, sval); } m_escaped = other.m_escaped; + m_touched = other.m_touched; return *this; } @@ -319,6 +320,16 @@ binding_cluster2::dump_to_pp (pretty_printer *pp, bool simple, else pp_string (pp, "(ESCAPED)"); } + if (m_touched) + { + if (multiline) + { + pp_string (pp, " TOUCHED"); + pp_newline (pp); + } + else + pp_string (pp, "(TOUCHED)"); + } const binding_key2 *key; unsigned i; @@ -359,6 +370,8 @@ binding_cluster2::bind (store2_manager *mgr, { const binding_key2 *binding = binding_key2::make (mgr, reg, kind); m_map.put (binding, val); + if (binding->symbolic_p ()) + m_touched = true; } /* FIXME. */ @@ -397,6 +410,8 @@ binding_cluster2::zero_fill_region (store2_manager *mgr, const region2 *reg) bound_sval = sval_mgr->get_or_create_unaryop (reg->get_type (), NOP_EXPR, cst_sval); bind (mgr, reg, bound_sval, BK_default); + + // FIXME: clear m_touched; testcase for this } /* FIXME. */ @@ -471,6 +486,16 @@ binding_cluster2::get_any_binding (store2_manager *mgr, if (const svalue2 *default_sval = get_binding_recursive (mgr, reg, BK_default)) return default_sval; + + /* If this cluster has been touched by a symbolic write, then the content + of any subregion not currently specifically bound is "UNKNOWN". */ + if (m_touched) + { + region_model2_manager *rmm_mgr = mgr->get_svalue2_manager (); + return rmm_mgr->get_or_create_unknown_svalue2 (reg->get_type ()); + } + + /* Otherwise, the initial value, or uninitialized. */ return NULL; } @@ -636,12 +661,14 @@ binding_cluster2::on_escape_to_unknown_fncall (store2_manager *mgr) /* Return true if this binding_cluster2 has no information i.e. if there are no bindings, and it hasn't been marked as having - escaped. */ + escaped, or touched symbolically. */ bool binding_cluster2::redundant_p () const { - return m_map.elements () == 0 && !m_escaped; + return (m_map.elements () == 0 + && !m_escaped + && !m_touched); } /* Find representative path_vars for SVAL within this binding of BASE_REG, @@ -902,6 +929,8 @@ store2::dump_to_pp (pretty_printer *pp, bool simple, bool multiline, sval->dump_to_pp (pp, simple); if (cluster->escaped_p ()) pp_string (pp, " (ESCAPED)"); + if (cluster->touched_p ()) + pp_string (pp, " (TOUCHED)"); pp_newline (pp); } else @@ -914,6 +943,8 @@ store2::dump_to_pp (pretty_printer *pp, bool simple, bool multiline, sval->dump_to_pp (pp, simple); if (cluster->escaped_p ()) pp_string (pp, " (ESCAPED)"); + if (cluster->touched_p ()) + pp_string (pp, " (TOUCHED)"); pp_string (pp, "}"); } } diff --git a/gcc/analyzer/store2.h b/gcc/analyzer/store2.h index 35c043c4250..54c33dddc68 100644 --- a/gcc/analyzer/store2.h +++ b/gcc/analyzer/store2.h @@ -30,6 +30,74 @@ along with GCC; see the file COPYING3. If not see (Zhongxing Xu, Ted Kremenek, and Jian Zhang) http://lcs.ios.ac.cn/~xuzb/canalyze/memmodel.pdf */ +/* The store models memory as a collection of "clusters", where regions + are partitioned into clusters via their base region. + + For example, given: + int p, q, r; + struct coord { double x; double y; } c[3];} + then each of p, q, r, and c will have their own clusters, so that we + know that writes to e.g. "c[1].x".don't affect e.g. "p". + + Within each cluster we store a map of bindings to values, where the + binding keys can be either concrete or symbolic. + + Concrete bindings affect a specific range of bits relative to the start + of the base region of the cluster, whereas symbolic bindings affect + a specific subregion within the cluster. + + Consider (from the symbolic-1.c testcase): + + char arr[1024]; + arr[2] = a; (1) + arr[3] = b; (2) + After (1) and (2), the cluster for "arr" has concrete bindings + for bits 16-23 and for bits 24-31, with svalues "INIT_VAL(a)" + and "INIT_VAL(b)" respectively: + cluster: {bits 16-23: "INIT_VAL(a)", + bits 24-31: "INIT_VAL(b)"; + flags: {}} + Attempting to query unbound subregions e.g. arr[4] will + return "UNINITIALIZED". + "a" and "b" are each in their own clusters, with no explicit + bindings, and thus implicitly have value INIT_VAL(a) and INIT_VAL(b). + + arr[3] = c; (3) + After (3), the concrete binding for bits 24-31 is replaced with the + svalue "INIT_VAL(b)": + cluster: {bits 16-23: "INIT_VAL(a)", (from before) + bits 24-31: "INIT_VAL(c)"; (updated) + flags: {}} + + arr[i] = d; (4) + After (4), we lose the concrete bindings and replace them with a + symbolic binding for "arr[i]", with svalue "INIT_VAL(d)". We also + mark the cluster as having been "symbolically touched": future + attempts to query the values of subregions other than "arr[i]", + such as "arr[3]" are "UNKNOWN", since we don't know if the write + to arr[i] affected them. + cluster: {symbolic_key(arr[i]): "INIT_VAL(d)"; + flags: {TOUCHED}} + + arr[j] = e; (5) + After (5), we lose the symbolic binding for "arr[i]" since we could + have overwritten it, and add a symbolic binding for "arr[j]". + cluster: {symbolic_key(arr[j]): "INIT_VAL(d)"; (different symbolic + flags: {TOUCHED}} binding) + + arr[3] = f; (6) + After (6), we lose the symbolic binding for "arr[j]" since we could + have overwritten it, and gain a concrete binding for bits 24-31 + again, this time with svalue "INIT_VAL(e)": + cluster: {bits 24-31: "INIT_VAL(d)"; + flags: {TOUCHED}} + The cluster is still flagged as touched, so that we know that + accesses to other elements are "UNKNOWN" rather than + "UNINITIALIZED". + + TODO: turn this into symbolic-1.c + TODO: example of memset to zero. */ + namespace ana { class concrete_binding2; @@ -56,6 +124,7 @@ class binding_key2 public: virtual ~binding_key2 () {} virtual bool concrete_p () const = 0; + bool symbolic_p () const { return !concrete_p (); } static const binding_key2 *make (store2_manager *mgr, const region2 *r, enum binding_kind kind); @@ -175,7 +244,8 @@ class binding_cluster2 { public: binding_cluster2 (const region2 *base_region) - : m_base_region (base_region), m_map (), m_escaped (false) {} + : m_base_region (base_region), m_map (), + m_escaped (false), m_touched (false) {} binding_cluster2 (const binding_cluster2 &other); binding_cluster2& operator=(const binding_cluster2 &other); @@ -222,6 +292,7 @@ public: void on_escape_to_unknown_fncall (store2_manager *mgr); bool escaped_p () const { return m_escaped; } + bool touched_p () const { return m_touched; } bool redundant_p () const; @@ -233,23 +304,12 @@ public: private: const svalue2 *get_any_value (const binding_key2 *key) const; -#if 0 - void get_overlapping_bindings (store2_manager *mgr, const region2 *reg, - auto_vec *out); -#else void get_overlapping_bindings (store2_manager *mgr, const region2 *reg, auto_vec *out); -#endif const region2 *m_base_region; -#if 0 - /* FIXME: experimental representation; ultimately will probably want - something we can do faster than O(n) lookup in. */ - typedef hash_map map_t; -#else typedef hash_map map_t; -#endif map_t m_map; /* Has a pointer to this cluster "escaped" into a part of the program @@ -260,6 +320,15 @@ private: /* FIXME: Don't track this for globals, as they have automatically escaped already. TODO: but what about file-local statics? */ bool m_escaped; + + /* Has this cluster been written to via a symbolic binding? + If so, then we don't know anything about unbound subregions, + so we can't use initial_svalue2, treat them as uninitialized, or + inherit values from a parent region. */ + bool m_touched; + + /* FIXME: would it be better to simply store a single last symbolic binding, + and store concrete keys separately? */ }; /* The mapping from regions to values. diff --git a/gcc/testsuite/gcc.dg/analyzer/data-model-18.c b/gcc/testsuite/gcc.dg/analyzer/data-model-18.c index 0a9ae9ff4d4..86e8110ccf6 100644 --- a/gcc/testsuite/gcc.dg/analyzer/data-model-18.c +++ b/gcc/testsuite/gcc.dg/analyzer/data-model-18.c @@ -16,7 +16,5 @@ void test (int *p, int i, int j) __analyzer_eval (p[3] == 42); /* { dg-warning "UNKNOWN" } */ __analyzer_eval (p[i] == 17); /* { dg-warning "TRUE" } */ - __analyzer_eval (p[j] == 17); /* { dg-warning "UNKNOWN" "desired" { xfail *-*-* } } */ - /* { dg-bogus "TRUE" "status quo" { xfail *-*-* } .-1 } */ - // FIXME(xfails) ^^^ + __analyzer_eval (p[j] == 17); /* { dg-warning "UNKNOWN" } */ } diff --git a/gcc/testsuite/gcc.dg/analyzer/symbolic-1.c b/gcc/testsuite/gcc.dg/analyzer/symbolic-1.c index 1e9af88fd5d..9d228e6331c 100644 --- a/gcc/testsuite/gcc.dg/analyzer/symbolic-1.c +++ b/gcc/testsuite/gcc.dg/analyzer/symbolic-1.c @@ -1,12 +1,43 @@ #include "analyzer-decls.h" -void test_1 (int i, int j) +/* The example from store2.h */ + +void test_1 (char a, char b, char c, char d, char e, char f, + int i, int j) { char arr[1024]; - arr[0] = 42; - arr[1] = 17; - __analyzer_eval (arr[0] == 42); /* { dg-warning "TRUE" } */ - __analyzer_eval (arr[1] == 17); /* { dg-warning "TRUE" } */ - // TODO: add symbolic binding: - // arr[i] = j; + arr[2] = a; /* (1) */ + arr[3] = b; /* (2) */ + + __analyzer_eval (arr[2] == a); /* { dg-warning "TRUE" } */ + __analyzer_eval (arr[3] == b); /* { dg-warning "TRUE" } */ + __analyzer_eval (arr[4]); /* { dg-warning "UNKNOWN" } */ // TODO: report uninit + + /* Replace one concrete binding's value with a different value. */ + arr[3] = c; /* (3) */ + __analyzer_eval (arr[2] == a); /* { dg-warning "TRUE" } */ + __analyzer_eval (arr[3] == c); /* { dg-warning "TRUE" } */ + __analyzer_eval (arr[3] == b); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (arr[4]); /* { dg-warning "UNKNOWN" } */ // TODO: report uninit + + /* Symbolic binding. */ + arr[i] = d; /* (4) */ + __analyzer_eval (arr[i] == d); /* { dg-warning "TRUE" } */ + __analyzer_eval (arr[2] == a); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (arr[3] == c); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (arr[4]); /* { dg-warning "UNKNOWN" } */ /* Don't report uninit. */ + + /* Replace symbolic binding with a different one. */ + arr[j] = e; /* (5) */ + __analyzer_eval (arr[j] == e); /* { dg-warning "TRUE" } */ + __analyzer_eval (arr[i] == d); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (arr[4]); /* { dg-warning "UNKNOWN" } */ /* Don't report uninit. */ + + /* Add a concrete binding. */ + arr[3] = f; /* (6) */ + __analyzer_eval (arr[3] == f); /* { dg-warning "TRUE" } */ + __analyzer_eval (arr[j] == e); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (arr[4]); /* { dg-warning "UNKNOWN" } */ /* Don't report uninit. */ } + +// TODO: as above, but with int rather than char so there's a cast -- 2.21.0