source: rtems-tools/rtemstoolkit/libiberty/rust-demangle.c @ 78bbe4c

5
Last change on this file since 78bbe4c was 78bbe4c, checked in by Chris Johns <chrisj@…>, on 08/16/17 at 08:09:59

linkers/exe-info Support ARM static constructors.

Note, ARM destructors are registered at runtime and currently not
easly found.

Update libiberty to get a newer demangler.

Closes #3102.

  • Property mode set to 100644
File size: 9.8 KB
Line 
1/* Demangler for the Rust programming language
2   Copyright (C) 2016-2017 Free Software Foundation, Inc.
3   Written by David Tolnay (dtolnay@gmail.com).
4
5This file is part of the libiberty library.
6Libiberty is free software; you can redistribute it and/or
7modify it under the terms of the GNU Library General Public
8License as published by the Free Software Foundation; either
9version 2 of the License, or (at your option) any later version.
10
11In addition to the permissions in the GNU Library General Public
12License, the Free Software Foundation gives you unlimited permission
13to link the compiled version of this file into combinations with other
14programs, and to distribute those combinations without any restriction
15coming from the use of this file.  (The Library Public License
16restrictions do apply in other respects; for example, they cover
17modification of the file, and distribution when not linked into a
18combined executable.)
19
20Libiberty is distributed in the hope that it will be useful,
21but WITHOUT ANY WARRANTY; without even the implied warranty of
22MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23Library General Public License for more details.
24
25You should have received a copy of the GNU Library General Public
26License along with libiberty; see the file COPYING.LIB.
27If not, see <http://www.gnu.org/licenses/>.  */
28
29
30#ifdef HAVE_CONFIG_H
31#include "config.h"
32#endif
33
34#include "safe-ctype.h"
35
36#include <sys/types.h>
37#include <string.h>
38#include <stdio.h>
39
40#ifdef HAVE_STRING_H
41#include <string.h>
42#else
43extern size_t strlen(const char *s);
44extern int strncmp(const char *s1, const char *s2, size_t n);
45extern void *memset(void *s, int c, size_t n);
46#endif
47
48#include <demangle.h>
49#include "libiberty.h"
50
51
52/* Mangled Rust symbols look like this:
53     _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a
54
55   The original symbol is:
56     <std::sys::fd::FileDesc as core::ops::Drop>::drop
57
58   The last component of the path is a 64-bit hash in lowercase hex,
59   prefixed with "h". Rust does not have a global namespace between
60   crates, an illusion which Rust maintains by using the hash to
61   distinguish things that would otherwise have the same symbol.
62
63   Any path component not starting with a XID_Start character is
64   prefixed with "_".
65
66   The following escape sequences are used:
67
68   ","  =>  $C$
69   "@"  =>  $SP$
70   "*"  =>  $BP$
71   "&"  =>  $RF$
72   "<"  =>  $LT$
73   ">"  =>  $GT$
74   "("  =>  $LP$
75   ")"  =>  $RP$
76   " "  =>  $u20$
77   "\"" =>  $u22$
78   "'"  =>  $u27$
79   "+"  =>  $u2b$
80   ";"  =>  $u3b$
81   "["  =>  $u5b$
82   "]"  =>  $u5d$
83   "{"  =>  $u7b$
84   "}"  =>  $u7d$
85   "~"  =>  $u7e$
86
87   A double ".." means "::" and a single "." means "-".
88
89   The only characters allowed in the mangled symbol are a-zA-Z0-9 and _.:$  */
90
91static const char *hash_prefix = "::h";
92static const size_t hash_prefix_len = 3;
93static const size_t hash_len = 16;
94
95static int is_prefixed_hash (const char *start);
96static int looks_like_rust (const char *sym, size_t len);
97static int unescape (const char **in, char **out, const char *seq, char value);
98
99/* INPUT: sym: symbol that has been through C++ (gnu v3) demangling
100
101   This function looks for the following indicators:
102
103   1. The hash must consist of "h" followed by 16 lowercase hex digits.
104
105   2. As a sanity check, the hash must use between 5 and 15 of the 16
106      possible hex digits. This is true of 99.9998% of hashes so once
107      in your life you may see a false negative. The point is to
108      notice path components that could be Rust hashes but are
109      probably not, like "haaaaaaaaaaaaaaaa". In this case a false
110      positive (non-Rust symbol has an important path component
111      removed because it looks like a Rust hash) is worse than a false
112      negative (the rare Rust symbol is not demangled) so this sets
113      the balance in favor of false negatives.
114
115   3. There must be no characters other than a-zA-Z0-9 and _.:$
116
117   4. There must be no unrecognized $-sign sequences.
118
119   5. There must be no sequence of three or more dots in a row ("...").  */
120
121int
122rust_is_mangled (const char *sym)
123{
124  size_t len, len_without_hash;
125
126  if (!sym)
127    return 0;
128
129  len = strlen (sym);
130  if (len <= hash_prefix_len + hash_len)
131    /* Not long enough to contain "::h" + hash + something else */
132    return 0;
133
134  len_without_hash = len - (hash_prefix_len + hash_len);
135  if (!is_prefixed_hash (sym + len_without_hash))
136    return 0;
137
138  return looks_like_rust (sym, len_without_hash);
139}
140
141/* A hash is the prefix "::h" followed by 16 lowercase hex digits. The
142   hex digits must comprise between 5 and 15 (inclusive) distinct
143   digits.  */
144
145static int
146is_prefixed_hash (const char *str)
147{
148  const char *end;
149  char seen[16];
150  size_t i;
151  int count;
152
153  if (strncmp (str, hash_prefix, hash_prefix_len))
154    return 0;
155  str += hash_prefix_len;
156
157  memset (seen, 0, sizeof(seen));
158  for (end = str + hash_len; str < end; str++)
159    if (*str >= '0' && *str <= '9')
160      seen[*str - '0'] = 1;
161    else if (*str >= 'a' && *str <= 'f')
162      seen[*str - 'a' + 10] = 1;
163    else
164      return 0;
165
166  /* Count how many distinct digits seen */
167  count = 0;
168  for (i = 0; i < 16; i++)
169    if (seen[i])
170      count++;
171
172  return count >= 5 && count <= 15;
173}
174
175static int
176looks_like_rust (const char *str, size_t len)
177{
178  const char *end = str + len;
179
180  while (str < end)
181    switch (*str)
182      {
183      case '$':
184        if (!strncmp (str, "$C$", 3))
185          str += 3;
186        else if (!strncmp (str, "$SP$", 4)
187                 || !strncmp (str, "$BP$", 4)
188                 || !strncmp (str, "$RF$", 4)
189                 || !strncmp (str, "$LT$", 4)
190                 || !strncmp (str, "$GT$", 4)
191                 || !strncmp (str, "$LP$", 4)
192                 || !strncmp (str, "$RP$", 4))
193          str += 4;
194        else if (!strncmp (str, "$u20$", 5)
195                 || !strncmp (str, "$u22$", 5)
196                 || !strncmp (str, "$u27$", 5)
197                 || !strncmp (str, "$u2b$", 5)
198                 || !strncmp (str, "$u3b$", 5)
199                 || !strncmp (str, "$u5b$", 5)
200                 || !strncmp (str, "$u5d$", 5)
201                 || !strncmp (str, "$u7b$", 5)
202                 || !strncmp (str, "$u7d$", 5)
203                 || !strncmp (str, "$u7e$", 5))
204          str += 5;
205        else
206          return 0;
207        break;
208      case '.':
209        /* Do not allow three or more consecutive dots */
210        if (!strncmp (str, "...", 3))
211          return 0;
212        /* Fall through */
213      case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
214      case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
215      case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
216      case 's': case 't': case 'u': case 'v': case 'w': case 'x':
217      case 'y': case 'z':
218      case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
219      case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
220      case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
221      case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
222      case 'Y': case 'Z':
223      case '0': case '1': case '2': case '3': case '4': case '5':
224      case '6': case '7': case '8': case '9':
225      case '_':
226      case ':':
227        str++;
228        break;
229      default:
230        return 0;
231      }
232
233  return 1;
234}
235
236/*
237  INPUT: sym: symbol for which rust_is_mangled(sym) returned 1.
238
239  The input is demangled in-place because the mangled name is always
240  longer than the demangled one.  */
241
242void
243rust_demangle_sym (char *sym)
244{
245  const char *in;
246  char *out;
247  const char *end;
248
249  if (!sym)
250    return;
251
252  in = sym;
253  out = sym;
254  end = sym + strlen (sym) - (hash_prefix_len + hash_len);
255
256  while (in < end)
257    switch (*in)
258      {
259      case '$':
260        if (!(unescape (&in, &out, "$C$", ',')
261              || unescape (&in, &out, "$SP$", '@')
262              || unescape (&in, &out, "$BP$", '*')
263              || unescape (&in, &out, "$RF$", '&')
264              || unescape (&in, &out, "$LT$", '<')
265              || unescape (&in, &out, "$GT$", '>')
266              || unescape (&in, &out, "$LP$", '(')
267              || unescape (&in, &out, "$RP$", ')')
268              || unescape (&in, &out, "$u20$", ' ')
269              || unescape (&in, &out, "$u22$", '\"')
270              || unescape (&in, &out, "$u27$", '\'')
271              || unescape (&in, &out, "$u2b$", '+')
272              || unescape (&in, &out, "$u3b$", ';')
273              || unescape (&in, &out, "$u5b$", '[')
274              || unescape (&in, &out, "$u5d$", ']')
275              || unescape (&in, &out, "$u7b$", '{')
276              || unescape (&in, &out, "$u7d$", '}')
277              || unescape (&in, &out, "$u7e$", '~'))) {
278          /* unexpected escape sequence, not looks_like_rust. */
279          goto fail;
280        }
281        break;
282      case '_':
283        /* If this is the start of a path component and the next
284           character is an escape sequence, ignore the underscore. The
285           mangler inserts an underscore to make sure the path
286           component begins with a XID_Start character. */
287        if ((in == sym || in[-1] == ':') && in[1] == '$')
288          in++;
289        else
290          *out++ = *in++;
291        break;
292      case '.':
293        if (in[1] == '.')
294          {
295            /* ".." becomes "::" */
296            *out++ = ':';
297            *out++ = ':';
298            in += 2;
299          }
300        else
301          {
302            /* "." becomes "-" */
303            *out++ = '-';
304            in++;
305          }
306        break;
307      case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
308      case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
309      case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
310      case 's': case 't': case 'u': case 'v': case 'w': case 'x':
311      case 'y': case 'z':
312      case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
313      case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
314      case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
315      case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
316      case 'Y': case 'Z':
317      case '0': case '1': case '2': case '3': case '4': case '5':
318      case '6': case '7': case '8': case '9':
319      case ':':
320        *out++ = *in++;
321        break;
322      default:
323        /* unexpected character in symbol, not looks_like_rust.  */
324        goto fail;
325      }
326  goto done;
327
328fail:
329  *out++ = '?'; /* This is pretty lame, but it's hard to do better. */
330done:
331  *out = '\0';
332}
333
334static int
335unescape (const char **in, char **out, const char *seq, char value)
336{
337  size_t len = strlen (seq);
338
339  if (strncmp (*in, seq, len))
340    return 0;
341
342  **out = value;
343
344  *in += len;
345  *out += 1;
346
347  return 1;
348}
Note: See TracBrowser for help on using the repository browser.