From 610208583ce971e9e1d1917892110ae67626e41e Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Tue, 24 Feb 2026 17:54:39 -0500 Subject: [PATCH 19/30] FIXME: sarif-replay: decode event IDs (PR sarif-replay/123056) --- gcc/libsarifreplay.cc | 65 +++++++++- .../3.10.3-bad-json-pointer.sarif | 112 ++++++++++++++++++ 2 files changed, 173 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/sarif-replay.dg/2.1.0-invalid/3.10.3-bad-json-pointer.sarif diff --git a/gcc/libsarifreplay.cc b/gcc/libsarifreplay.cc index 9cc37fc7708..21eb5e3b012 100644 --- a/gcc/libsarifreplay.cc +++ b/gcc/libsarifreplay.cc @@ -28,6 +28,7 @@ along with GCC; see the file COPYING3. If not see #include "libgdiagnostics++.h" #include "libgdiagnostics-private.h" #include "json-parsing.h" +#include "json-pointer-parsing.h" #include "intl.h" #include "sarif-spec-urls.def" #include "libsarifreplay.h" @@ -307,6 +308,7 @@ public: libgdiagnostics::manager &&control_manager) : m_output_mgr (std::move (output_manager)), m_control_mgr (std::move (control_manager)), + m_root_val (nullptr), m_driver_obj (nullptr), m_artifacts_arr (nullptr) { @@ -711,7 +713,12 @@ private: void append_embeddded_link (libgdiagnostics::message_buffer &result, - const embedded_link &link); + const embedded_link &link, + const json::object &message_obj); + + const json::value * + decode_link_within_sarif (const char *dst, + const json::object &message_obj); /* The manager to replay the SARIF files to. */ libgdiagnostics::manager m_output_mgr; @@ -724,6 +731,7 @@ private: json::simple_location_map m_json_location_map; + const json::value *m_root_val; const json::object *m_driver_obj; const json::array *m_artifacts_arr; }; @@ -857,6 +865,7 @@ sarif_replayer::replay_file (const char *filename, } gcc_assert (result.m_val.get ()); + m_root_val = result.m_val.get (); return emit_sarif_as_diagnostics (*result.m_val.get ()); } @@ -1563,11 +1572,36 @@ maybe_consume_embedded_link (const char *&iter_src) void sarif_replayer::append_embeddded_link (libgdiagnostics::message_buffer &result, - const embedded_link &link) + const embedded_link &link, + const json::object &message_obj) { - /* We can't yet decode intra-sarif links, so simply use their text. */ + /* Try to convert intra-sarif links into event ids. */ if (!strncmp (link.destination.c_str (), "sarif:/", strlen ("sarif:/"))) { + if (auto linked_val = decode_link_within_sarif (link.destination.c_str (), + message_obj)) + { + /* Assume we have a threadFlowLocation object, and that it's + for the correct code flow. */ + if (const json::object *linked_obj + = dyn_cast (linked_val)) + { + const property_spec_ref location_prop + ("threadFlowLocation", "executionOrder", "3.38.11"); + if (auto execution_order + = get_optional_property (*linked_obj, + location_prop)) + { + // FIXME: reject bad values + diagnostic_event_id event_id = execution_order->get () - 1; + diagnostic_message_buffer_append_event_id (result.m_inner, + event_id); + return; + } + } + } + + /* If we can't use the sarif link, simply use the text. */ result += link.text.c_str (); return; } @@ -1576,6 +1610,29 @@ sarif_replayer::append_embeddded_link (libgdiagnostics::message_buffer &result, result.end_url (); } +const json::value * +sarif_replayer::decode_link_within_sarif (const char *dst, + const json::object &message_obj) +{ + gcc_assert (!strncmp (dst, "sarif:/", strlen ("sarif:/"))); + gcc_assert (m_root_val); + + auto result + = json::pointer::parse_utf8_string (dst + strlen ("sarif:/") - 1, + m_root_val); + if (result.m_err) + { + const spec_ref uris_with_sarif_scheme ("3.10.3"); + pp_token_buffer_element e (result.m_err->m_tokens); + report_invalid_sarif + (message_obj, uris_with_sarif_scheme, + "error parsing JSON pointer in SARIF link %qs: %e", + dst, &e); + return nullptr; + } + return result.m_val; +} + /* Lookup the plain text string within a result.message (§3.27.11), and substitute for any placeholders (§3.11.5) and handle any embedded links (§3.11.6). @@ -1660,7 +1717,7 @@ make_plain_text_within_result_message (const json::object *tool_component_obj, } } else if (auto link = maybe_consume_embedded_link (iter_src)) - append_embeddded_link (result, *link); + append_embeddded_link (result, *link, message_obj); else { result += ch; diff --git a/gcc/testsuite/sarif-replay.dg/2.1.0-invalid/3.10.3-bad-json-pointer.sarif b/gcc/testsuite/sarif-replay.dg/2.1.0-invalid/3.10.3-bad-json-pointer.sarif new file mode 100644 index 00000000000..6a4acf936b7 --- /dev/null +++ b/gcc/testsuite/sarif-replay.dg/2.1.0-invalid/3.10.3-bad-json-pointer.sarif @@ -0,0 +1,112 @@ +{"$schema": "https://docs.oasis-open.org/sarif/sarif/v2.1.0/errata01/os/schemas/sarif-schema-2.1.0.json", + "version": "2.1.0", + "runs": [{"tool": {"driver": {"name": "GNU C23", + "fullName": "GNU C23 (GCC) version 16.0.1 20260114 (experimental) (x86_64-pc-linux-gnu)", + "version": "16.0.1 20260114 (experimental)", + "informationUri": "https://gcc.gnu.org/gcc-16/", + "rules": [{"id": "-Wanalyzer-malloc-leak", + "helpUri": "https://gcc.gnu.org/onlinedocs/gcc/Static-Analyzer-Options.html#index-Wanalyzer-malloc-leak"}]}}, + "taxonomies": [{"name": "CWE", + "version": "4.7", + "organization": "MITRE", + "shortDescription": {"text": "The MITRE Common Weakness Enumeration"}, + "taxa": [{"id": "401", + "helpUri": "https://cwe.mitre.org/data/definitions/401.html"}]}], + "invocations": [{"arguments": ["./cc1", + "-quiet", + "-iprefix", + "/home/david/coding-3/gcc-newgit-queued-for-next-stage-1/build/gcc/../lib/gcc/x86_64-pc-linux-gnu/16.0.1/", + "-isystem", + "./include", + "-isystem", + "./include-fixed", + "pr123056.c", + "-quiet", + "-dumpbase", + "pr123056.c", + "-dumpbase-ext", + ".c", + "-mtune=generic", + "-march=x86-64", + "-fanalyzer", + "-fdiagnostics-add-output=sarif", + "-fdiagnostics-add-output=experimental-html", + "-o", + "pr123056.s"], + "workingDirectory": {"uri": "/home/david/coding-3/gcc-newgit-queued-for-next-stage-1/build/gcc"}, + "startTimeUtc": "2026-01-16T17:43:19Z", + "executionSuccessful": true, + "toolExecutionNotifications": [], + "endTimeUtc": "2026-01-16T17:43:19Z"}], + "originalUriBaseIds": {"PWD": {"uri": "file:///home/david/coding-3/gcc-newgit-queued-for-next-stage-1/build/gcc/"}}, + "artifacts": [{"location": {"uri": "pr123056.c", + "uriBaseId": "PWD"}, + "sourceLanguage": "c", + "contents": {"text": "void test (void)\n{\n void *p = __builtin_malloc (1024);\n}\n"}, + "roles": ["analysisTarget", + "tracedFile"]}], + "results": [{"ruleId": "-Wanalyzer-malloc-leak", + "taxa": [{"id": "401", + "toolComponent": {"name": "cwe"}}], + "properties": {"gcc/analyzer/saved_diagnostic/sm": "malloc", + "gcc/analyzer/saved_diagnostic/ploc": {"enode": 5}, + "gcc/analyzer/saved_diagnostic/var": "p_3", + "gcc/analyzer/saved_diagnostic/sval": "&HEAP_ALLOCATED_REGION(14)", + "gcc/analyzer/saved_diagnostic/state": "unchecked ({free})", + "gcc/analyzer/saved_diagnostic/idx": 0, + "gcc/analyzer/saved_diagnostic/duplicates": [{"properties": {"gcc/analyzer/saved_diagnostic/sm": "malloc", + "gcc/analyzer/saved_diagnostic/ploc": {"enode": 5}, + "gcc/analyzer/saved_diagnostic/var": "p_3", + "gcc/analyzer/saved_diagnostic/sval": "&HEAP_ALLOCATED_REGION(14)", + "gcc/analyzer/saved_diagnostic/state": "unchecked ({free})", + "gcc/analyzer/saved_diagnostic/idx": 1, + "gcc/analyzer/pending_diagnostic/kind": "malloc_leak"}}], + "gcc/analyzer/pending_diagnostic/kind": "malloc_leak"}, + "level": "warning", + "message": {"text": "leak of ‘p’"}, + "locations": [{"physicalLocation": {"artifactLocation": {"uri": "pr123056.c", + "uriBaseId": "PWD"}, + "region": {"startLine": 4, + "startColumn": 1, + "endColumn": 2}, + "contextRegion": {"startLine": 4, + "snippet": {"text": "}\n"}}}, + "logicalLocations": [{"index": 0, + "fullyQualifiedName": "test"}]}], + "codeFlows": [{"threadFlows": [{"id": "main", + "locations": [{"properties": {"gcc/analyzer/checker_event/emission_id": "(1)", + "gcc/analyzer/checker_event/kind": "state_change"}, + "location": {"physicalLocation": {"artifactLocation": {"uri": "pr123056.c", + "uriBaseId": "PWD"}, + "region": {"startLine": 3, + "startColumn": 13, + "endColumn": 36}, + "contextRegion": {"startLine": 3, + "snippet": {"text": " void *p = __builtin_malloc (1024);\n"}}}, + "logicalLocations": [{"index": 0, + "fullyQualifiedName": "test"}], + "message": {"text": "allocated here"}}, + "kinds": ["acquire", + "memory"], + "nestingLevel": 1, + "executionOrder": 1}, + {"properties": {"gcc/analyzer/checker_event/emission_id": "(2)", + "gcc/analyzer/checker_event/kind": "warning"}, + "location": {"physicalLocation": {"artifactLocation": {"uri": "pr123056.c", + "uriBaseId": "PWD"}, + "region": {"startLine": 4, + "startColumn": 1, + "endColumn": 2}, + "contextRegion": {"startLine": 4, + "snippet": {"text": "}\n"}}}, + "logicalLocations": [{"index": 0, + "fullyQualifiedName": "test"}], + "message": {"text": "‘p’ leaks here; was allocated at [(1)](sarif:/runs/1066/results/0/codeFlows/0/threadFlows/0/locations/0)"}}, + "kinds": ["danger"], + "nestingLevel": 1, + "executionOrder": 2}]}]}]}], + "logicalLocations": [{"name": "test", + "fullyQualifiedName": "test", + "decoratedName": "test", + "kind": "function", + "index": 0}]}]} -- 2.49.0