LLVM OpenMP* Runtime Library
extractExternal.cpp
1 /*
2  * extractExternal.cpp
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include <fstream>
15 #include <iostream>
16 #include <map>
17 #include <set>
18 #include <stdlib.h>
19 #include <string>
20 #include <strstream>
21 
22 /* Given a set of n object files h ('external' object files) and a set of m
23  object files o ('internal' object files),
24  1. Determines r, the subset of h that o depends on, directly or indirectly
25  2. Removes the files in h - r from the file system
26  3. For each external symbol defined in some file in r, rename it in r U o
27  by prefixing it with "__kmp_external_"
28  Usage:
29  hide.exe <n> <filenames for h> <filenames for o>
30 
31  Thus, the prefixed symbols become hidden in the sense that they now have a
32  special prefix.
33 */
34 
35 using namespace std;
36 
37 void stop(char *errorMsg) {
38  printf("%s\n", errorMsg);
39  exit(1);
40 }
41 
42 // an entry in the symbol table of a .OBJ file
43 class Symbol {
44 public:
45  __int64 name;
46  unsigned value;
47  unsigned short sectionNum, type;
48  char storageClass, nAux;
49 };
50 
51 class _rstream : public istrstream {
52 private:
53  const char *buf;
54 
55 protected:
56  _rstream(pair<const char *, streamsize> p)
57  : istrstream(p.first, p.second), buf(p.first) {}
58  ~_rstream() { delete[] buf; }
59 };
60 
61 // A stream encapuslating the content of a file or the content of a string,
62 // overriding the >> operator to read various integer types in binary form,
63 // as well as a symbol table entry.
64 class rstream : public _rstream {
65 private:
66  template <class T> inline rstream &doRead(T &x) {
67  read((char *)&x, sizeof(T));
68  return *this;
69  }
70  static pair<const char *, streamsize> getBuf(const char *fileName) {
71  ifstream raw(fileName, ios::binary | ios::in);
72  if (!raw.is_open())
73  stop("rstream.getBuf: Error opening file");
74  raw.seekg(0, ios::end);
75  streampos fileSize = raw.tellg();
76  if (fileSize < 0)
77  stop("rstream.getBuf: Error reading file");
78  char *buf = new char[fileSize];
79  raw.seekg(0, ios::beg);
80  raw.read(buf, fileSize);
81  return pair<const char *, streamsize>(buf, fileSize);
82  }
83 
84 public:
85  // construct from a string
86  rstream(const char *buf, streamsize size)
87  : _rstream(pair<const char *, streamsize>(buf, size)) {}
88  // construct from a file whole content is fully read once to initialize the
89  // content of this stream
90  rstream(const char *fileName) : _rstream(getBuf(fileName)) {}
91  rstream &operator>>(int &x) { return doRead(x); }
92  rstream &operator>>(unsigned &x) { return doRead(x); }
93  rstream &operator>>(short &x) { return doRead(x); }
94  rstream &operator>>(unsigned short &x) { return doRead(x); }
95  rstream &operator>>(Symbol &e) {
96  read((char *)&e, 18);
97  return *this;
98  }
99 };
100 
101 // string table in a .OBJ file
102 class StringTable {
103 private:
104  map<string, unsigned> directory;
105  size_t length;
106  char *data;
107 
108  // make <directory> from <length> bytes in <data>
109  void makeDirectory(void) {
110  unsigned i = 4;
111  while (i < length) {
112  string s = string(data + i);
113  directory.insert(make_pair(s, i));
114  i += s.size() + 1;
115  }
116  }
117  // initialize <length> and <data> with contents specified by the arguments
118  void init(const char *_data) {
119  unsigned _length = *(unsigned *)_data;
120 
121  if (_length < sizeof(unsigned) || _length != *(unsigned *)_data)
122  stop("StringTable.init: Invalid symbol table");
123  if (_data[_length - 1]) {
124  // to prevent runaway strings, make sure the data ends with a zero
125  data = new char[length = _length + 1];
126  data[_length] = 0;
127  } else {
128  data = new char[length = _length];
129  }
130  *(unsigned *)data = length;
131  KMP_MEMCPY(data + sizeof(unsigned), _data + sizeof(unsigned),
132  length - sizeof(unsigned));
133  makeDirectory();
134  }
135 
136 public:
137  StringTable(rstream &f) {
138  // Construct string table by reading from f.
139  streampos s;
140  unsigned strSize;
141  char *strData;
142 
143  s = f.tellg();
144  f >> strSize;
145  if (strSize < sizeof(unsigned))
146  stop("StringTable: Invalid string table");
147  strData = new char[strSize];
148  *(unsigned *)strData = strSize;
149  // read the raw data into <strData>
150  f.read(strData + sizeof(unsigned), strSize - sizeof(unsigned));
151  s = f.tellg() - s;
152  if (s < strSize)
153  stop("StringTable: Unexpected EOF");
154  init(strData);
155  delete[] strData;
156  }
157  StringTable(const set<string> &strings) {
158  // Construct string table from given strings.
159  char *p;
160  set<string>::const_iterator it;
161  size_t s;
162 
163  // count required size for data
164  for (length = sizeof(unsigned), it = strings.begin(); it != strings.end();
165  ++it) {
166  size_t l = (*it).size();
167 
168  if (l > (unsigned)0xFFFFFFFF)
169  stop("StringTable: String too long");
170  if (l > 8) {
171  length += l + 1;
172  if (length > (unsigned)0xFFFFFFFF)
173  stop("StringTable: Symbol table too long");
174  }
175  }
176  data = new char[length];
177  *(unsigned *)data = length;
178  // populate data and directory
179  for (p = data + sizeof(unsigned), it = strings.begin(); it != strings.end();
180  ++it) {
181  const string &str = *it;
182  size_t l = str.size();
183  if (l > 8) {
184  directory.insert(make_pair(str, p - data));
185  KMP_MEMCPY(p, str.c_str(), l);
186  p[l] = 0;
187  p += l + 1;
188  }
189  }
190  }
191  ~StringTable() { delete[] data; }
192  // Returns encoding for given string based on this string table. Error if
193  // string length is greater than 8 but string is not in the string table
194  // -- returns 0.
195  __int64 encode(const string &str) {
196  __int64 r;
197 
198  if (str.size() <= 8) {
199  // encoded directly
200  ((char *)&r)[7] = 0;
201  KMP_STRNCPY_S((char *)&r, sizeof(r), str.c_str(), 8);
202  return r;
203  } else {
204  // represented as index into table
205  map<string, unsigned>::const_iterator it = directory.find(str);
206  if (it == directory.end())
207  stop("StringTable::encode: String now found in string table");
208  ((unsigned *)&r)[0] = 0;
209  ((unsigned *)&r)[1] = (*it).second;
210  return r;
211  }
212  }
213  // Returns string represented by x based on this string table. Error if x
214  // references an invalid position in the table--returns the empty string.
215  string decode(__int64 x) const {
216  if (*(unsigned *)&x == 0) {
217  // represented as index into table
218  unsigned &p = ((unsigned *)&x)[1];
219  if (p >= length)
220  stop("StringTable::decode: Invalid string table lookup");
221  return string(data + p);
222  } else {
223  // encoded directly
224  char *p = (char *)&x;
225  int i;
226 
227  for (i = 0; i < 8 && p[i]; ++i)
228  ;
229  return string(p, i);
230  }
231  }
232  void write(ostream &os) { os.write(data, length); }
233 };
234 
235 // for the named object file, determines the set of defined symbols and the set
236 // of undefined external symbols and writes them to <defined> and <undefined>
237 // respectively
238 void computeExternalSymbols(const char *fileName, set<string> *defined,
239  set<string> *undefined) {
240  streampos fileSize;
241  size_t strTabStart;
242  unsigned symTabStart, symNEntries;
243  rstream f(fileName);
244 
245  f.seekg(0, ios::end);
246  fileSize = f.tellg();
247 
248  f.seekg(8);
249  f >> symTabStart >> symNEntries;
250  // seek to the string table
251  f.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
252  if (f.eof()) {
253  printf("computeExternalSymbols: fileName='%s', fileSize = %lu, symTabStart "
254  "= %u, symNEntries = %u\n",
255  fileName, (unsigned long)fileSize, symTabStart, symNEntries);
256  stop("computeExternalSymbols: Unexpected EOF 1");
257  }
258  StringTable stringTable(f); // read the string table
259  if (f.tellg() != fileSize)
260  stop("computeExternalSymbols: Unexpected data after string table");
261 
262  f.clear();
263  f.seekg(symTabStart); // seek to the symbol table
264 
265  defined->clear();
266  undefined->clear();
267  for (int i = 0; i < symNEntries; ++i) {
268  // process each entry
269  Symbol e;
270 
271  if (f.eof())
272  stop("computeExternalSymbols: Unexpected EOF 2");
273  f >> e;
274  if (f.fail())
275  stop("computeExternalSymbols: File read error");
276  if (e.nAux) { // auxiliary entry: skip
277  f.seekg(e.nAux * 18, ios::cur);
278  i += e.nAux;
279  }
280  // if symbol is extern and defined in the current file, insert it
281  if (e.storageClass == 2)
282  if (e.sectionNum)
283  defined->insert(stringTable.decode(e.name));
284  else
285  undefined->insert(stringTable.decode(e.name));
286  }
287 }
288 
289 // For each occurrence of an external symbol in the object file named by
290 // by <fileName> that is a member of <hide>, renames it by prefixing
291 // with "__kmp_external_", writing back the file in-place
292 void hideSymbols(char *fileName, const set<string> &hide) {
293  static const string prefix("__kmp_external_");
294  set<string> strings; // set of all occurring symbols, appropriately prefixed
295  streampos fileSize;
296  size_t strTabStart;
297  unsigned symTabStart, symNEntries;
298  int i;
299  rstream in(fileName);
300 
301  in.seekg(0, ios::end);
302  fileSize = in.tellg();
303 
304  in.seekg(8);
305  in >> symTabStart >> symNEntries;
306  in.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
307  if (in.eof())
308  stop("hideSymbols: Unexpected EOF");
309  StringTable stringTableOld(in); // read original string table
310 
311  if (in.tellg() != fileSize)
312  stop("hideSymbols: Unexpected data after string table");
313 
314  // compute set of occurring strings with prefix added
315  for (i = 0; i < symNEntries; ++i) {
316  Symbol e;
317 
318  in.seekg(symTabStart + i * 18);
319  if (in.eof())
320  stop("hideSymbols: Unexpected EOF");
321  in >> e;
322  if (in.fail())
323  stop("hideSymbols: File read error");
324  if (e.nAux)
325  i += e.nAux;
326  const string &s = stringTableOld.decode(e.name);
327  // if symbol is extern and found in <hide>, prefix and insert into strings,
328  // otherwise, just insert into strings without prefix
329  strings.insert(
330  (e.storageClass == 2 && hide.find(s) != hide.end()) ? prefix + s : s);
331  }
332 
333  ofstream out(fileName, ios::trunc | ios::out | ios::binary);
334  if (!out.is_open())
335  stop("hideSymbols: Error opening output file");
336 
337  // make new string table from string set
338  StringTable stringTableNew = StringTable(strings);
339 
340  // copy input file to output file up to just before the symbol table
341  in.seekg(0);
342  char *buf = new char[symTabStart];
343  in.read(buf, symTabStart);
344  out.write(buf, symTabStart);
345  delete[] buf;
346 
347  // copy input symbol table to output symbol table with name translation
348  for (i = 0; i < symNEntries; ++i) {
349  Symbol e;
350 
351  in.seekg(symTabStart + i * 18);
352  if (in.eof())
353  stop("hideSymbols: Unexpected EOF");
354  in >> e;
355  if (in.fail())
356  stop("hideSymbols: File read error");
357  const string &s = stringTableOld.decode(e.name);
358  out.seekp(symTabStart + i * 18);
359  e.name = stringTableNew.encode(
360  (e.storageClass == 2 && hide.find(s) != hide.end()) ? prefix + s : s);
361  out.write((char *)&e, 18);
362  if (out.fail())
363  stop("hideSymbols: File write error");
364  if (e.nAux) {
365  // copy auxiliary symbol table entries
366  int nAux = e.nAux;
367  for (int j = 1; j <= nAux; ++j) {
368  in >> e;
369  out.seekp(symTabStart + (i + j) * 18);
370  out.write((char *)&e, 18);
371  }
372  i += nAux;
373  }
374  }
375  // output string table
376  stringTableNew.write(out);
377 }
378 
379 // returns true iff <a> and <b> have no common element
380 template <class T> bool isDisjoint(const set<T> &a, const set<T> &b) {
381  set<T>::const_iterator ita, itb;
382 
383  for (ita = a.begin(), itb = b.begin(); ita != a.end() && itb != b.end();) {
384  const T &ta = *ita, &tb = *itb;
385  if (ta < tb)
386  ++ita;
387  else if (tb < ta)
388  ++itb;
389  else
390  return false;
391  }
392  return true;
393 }
394 
395 // PRE: <defined> and <undefined> are arrays with <nTotal> elements where
396 // <nTotal> >= <nExternal>. The first <nExternal> elements correspond to the
397 // external object files and the rest correspond to the internal object files.
398 // POST: file x is said to depend on file y if undefined[x] and defined[y] are
399 // not disjoint. Returns the transitive closure of the set of internal object
400 // files, as a set of file indexes, under the 'depends on' relation, minus the
401 // set of internal object files.
402 set<int> *findRequiredExternal(int nExternal, int nTotal, set<string> *defined,
403  set<string> *undefined) {
404  set<int> *required = new set<int>;
405  set<int> fresh[2];
406  int i, cur = 0;
407  bool changed;
408 
409  for (i = nTotal - 1; i >= nExternal; --i)
410  fresh[cur].insert(i);
411  do {
412  changed = false;
413  for (set<int>::iterator it = fresh[cur].begin(); it != fresh[cur].end();
414  ++it) {
415  set<string> &s = undefined[*it];
416 
417  for (i = 0; i < nExternal; ++i) {
418  if (required->find(i) == required->end()) {
419  if (!isDisjoint(defined[i], s)) {
420  // found a new qualifying element
421  required->insert(i);
422  fresh[1 - cur].insert(i);
423  changed = true;
424  }
425  }
426  }
427  }
428  fresh[cur].clear();
429  cur = 1 - cur;
430  } while (changed);
431  return required;
432 }
433 
434 int main(int argc, char **argv) {
435  int nExternal, nInternal, i;
436  set<string> *defined, *undefined;
437  set<int>::iterator it;
438 
439  if (argc < 3)
440  stop("Please specify a positive integer followed by a list of object "
441  "filenames");
442  nExternal = atoi(argv[1]);
443  if (nExternal <= 0)
444  stop("Please specify a positive integer followed by a list of object "
445  "filenames");
446  if (nExternal + 2 > argc)
447  stop("Too few external objects");
448  nInternal = argc - nExternal - 2;
449  defined = new set<string>[argc - 2];
450  undefined = new set<string>[argc - 2];
451 
452  // determine the set of defined and undefined external symbols
453  for (i = 2; i < argc; ++i)
454  computeExternalSymbols(argv[i], defined + i - 2, undefined + i - 2);
455 
456  // determine the set of required external files
457  set<int> *requiredExternal =
458  findRequiredExternal(nExternal, argc - 2, defined, undefined);
459  set<string> hide;
460 
461  // determine the set of symbols to hide--namely defined external symbols of
462  // the required external files
463  for (it = requiredExternal->begin(); it != requiredExternal->end(); ++it) {
464  int idx = *it;
465  set<string>::iterator it2;
466  // We have to insert one element at a time instead of inserting a range
467  // because the insert member function taking a range doesn't exist on
468  // Windows* OS, at least at the time of this writing.
469  for (it2 = defined[idx].begin(); it2 != defined[idx].end(); ++it2)
470  hide.insert(*it2);
471  }
472 
473  // process the external files--removing those that are not required and hiding
474  // the appropriate symbols in the others
475  for (i = 0; i < nExternal; ++i)
476  if (requiredExternal->find(i) != requiredExternal->end())
477  hideSymbols(argv[2 + i], hide);
478  else
479  remove(argv[2 + i]);
480  // hide the appropriate symbols in the internal files
481  for (i = nExternal + 2; i < argc; ++i)
482  hideSymbols(argv[i], hide);
483  return 0;
484 }