1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 *
3 * ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 *
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
10 *
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
15 *
16 * The Original Code is msmap2tsv.c code, released
17 * Oct 3, 2002.
18 *
19 * The Initial Developer of the Original Code is
20 * Netscape Communications Corporation.
21 * Portions created by the Initial Developer are Copyright (C) 2002
22 * the Initial Developer. All Rights Reserved.
23 *
24 * Contributor(s):
25 * Garrett Arch Blythe, 03-October-2002
26 *
27 * Alternatively, the contents of this file may be used under the terms of
28 * either the GNU General Public License Version 2 or later (the "GPL"), or
29 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 * in which case the provisions of the GPL or the LGPL are applicable instead
31 * of those above. If you wish to allow use of your version of this file only
32 * under the terms of either the GPL or the LGPL, and not to allow others to
33 * use your version of this file under the terms of the MPL, indicate your
34 * decision by deleting the provisions above and replace them with the notice
35 * and other provisions required by the GPL or the LGPL. If you do not delete
36 * the provisions above, a recipient may use your version of this file under
37 * the terms of any one of the MPL, the GPL or the LGPL.
38 *
39 * ***** END LICENSE BLOCK ***** */
40
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <time.h>
45 #include <ctype.h>
46
47 #include "msmap.h"
48
49 #if defined(_WIN32)
50 #include <windows.h>
51 #include <imagehlp.h>
52
53 #define F_DEMANGLE 1
54 #define DEMANGLE_STATE_NORMAL 0
55 #define DEMANGLE_STATE_QDECODE 1
56 #define DEMANGLE_STATE_PROLOGUE_1 2
57 #define DEMANGLE_STATE_HAVE_TYPE 3
58 #define DEMANGLE_STATE_DEC_LENGTH 4
59 #define DEMANGLE_STATE_HEX_LENGTH 5
60 #define DEMANGLE_STATE_PROLOGUE_SECONDARY 6
61 #define DEMANGLE_STATE_DOLLAR_1 7
62 #define DEMANGLE_STATE_DOLLAR_2 8
63 #define DEMANGLE_STATE_START 9
64 #define DEMANGLE_STATE_STOP 10
65 #define DEMANGLE_SAFE_CHAR(eval) (isprint(eval) ? eval : ' ')
66
67 #else
68 #define F_DEMANGLE 0
69 #endif /* WIN32 */
70
71
72 #define ERROR_REPORT(num, val, msg) fprintf(stderr, "error(%d):\t\"%s\"\t%s\n", (num), (val), (msg));
73 #define CLEANUP(ptr) do { if(NULL != ptr) { free(ptr); ptr = NULL; } } while(0)
74
75
76 typedef struct __struct_SymDB_Size
77 /*
78 ** The size of the symbol.
79 ** The size is nested withing a symbols structures to produce a fast
80 ** lookup path.
81 ** The objects are listed in case the client of the symdb needs to
82 ** match the object name in the scenario where multiple symbol
83 ** sizes are present.
84 **
85 ** mSize The size of the symbol in these objects.
86 ** mObjects A list of objects containing said symbol.
87 ** mObjectCount Number of objects.
88 */
89 {
90 unsigned mSize;
91 char** mObjects;
92 unsigned mObjectCount;
93 }
94 SymDB_Size;
95
96
97 typedef struct __struct_SymDB_Section
98 /*
99 ** Each section for a symbol has a list of sizes.
100 ** Should there be exactly one size for the symbol, then that
101 ** is the size that should be accepted.
102 ** If there is more than one size, then a match on the object
103 ** should be attempted, held withing each size.
104 **
105 ** mName The section name.
106 ** mSizes The varoius sizes of the symbol in this section.
107 ** mSizeCount The number of available sizes.
108 */
109 {
110 char* mName;
111 SymDB_Size* mSizes;
112 unsigned mSizeCount;
113 }
114 SymDB_Section;
115
116
117 typedef struct __struct_SymDB_Symbol
118 /*
119 ** Each symbol has at least one section.
120 ** The section indicates what type of symbol a client may be looking for.
121 ** If there is no match on the section, then the client should not trust
122 ** the symbdb.
123 **
124 ** mName The mangled name of the symbol.
125 ** mSections Various sections this symbol belongs to.
126 ** mSectionCount The number of sections.
127 */
128 {
129 char* mName;
130 SymDB_Section* mSections;
131 unsigned mSectionCount;
132 }
133 SymDB_Symbol;
134
135
136 #define SYMDB_SYMBOL_GROWBY 0x1000 /* how many sybols to allocate at a time */
137
138
139 typedef struct __struct_SymDB_Container
140 /*
141 ** The symbol DB container object.
142 ** The goal of the symbol DB is to have exactly one SymDB_Symbol for each
143 ** mangled name, no matter how ever many identical mangled names there
144 ** are in the input.
145 ** The input is already expected to be well sorted, futher this leads to
146 ** the ability to binary search for symbol name matches.
147 **
148 ** mSymbols The symbols.
149 ** mSymbolCount The number of symbols in the DB.
150 ** mSymbolCapacity The number of symbols we can hold (before realloc).
151 */
152 {
153 SymDB_Symbol* mSymbols;
154 unsigned mSymbolCount;
155 unsigned mSymbolCapacity;
156 }
157 SymDB_Container;
158
159
160 typedef struct __struct_Options
161 /*
162 ** Options to control how we perform.
163 **
164 ** mProgramName Used in help text.
165 ** mInput File to read for input.
166 ** Default is stdin.
167 ** mInputName Name of the file.
168 ** mOutput Output file, append.
169 ** Default is stdout.
170 ** mOutputName Name of the file.
171 ** mHelp Whether or not help should be shown.
172 ** mMatchModules Array of strings which the module name should match.
173 ** mMatchModuleCount Number of items in array.
174 ** mSymDBName Symbol DB filename.
175 ** mBatchMode Batch mode.
176 ** When in batch mode, the input file contains a list of
177 ** map files to process.
178 ** Normally the input file is a single map file itself.
179 */
180 {
181 const char* mProgramName;
182 FILE* mInput;
183 char* mInputName;
184 FILE* mOutput;
185 char* mOutputName;
186 int mHelp;
187 char** mMatchModules;
188 unsigned mMatchModuleCount;
189 char* mSymDBName;
190 SymDB_Container* mSymDB;
191 int mBatchMode;
192 }
193 Options;
194
195
196 typedef struct __struct_Switch
197 /*
198 ** Command line options.
199 */
200 {
201 const char* mLongName;
202 const char* mShortName;
203 int mHasValue;
204 const char* mValue;
205 const char* mDescription;
206 }
207 Switch;
208
209 #define DESC_NEWLINE "\n\t\t"
210
211 static Switch gInputSwitch = {"--input", "-i", 1, NULL, "Specify input file." DESC_NEWLINE "stdin is default."};
212 static Switch gOutputSwitch = {"--output", "-o", 1, NULL, "Specify output file." DESC_NEWLINE "Appends if file exists." DESC_NEWLINE "stdout is default."};
213 static Switch gHelpSwitch = {"--help", "-h", 0, NULL, "Information on usage."};
214 static Switch gMatchModuleSwitch = {"--match-module", "-mm", 1, NULL, "Specify a valid module name." DESC_NEWLINE "Multiple specifications allowed." DESC_NEWLINE "If a module name does not match one of the names specified then no output will occur."};
215 static Switch gSymDBSwitch = {"--symdb", "-sdb", 1, NULL, "Specify a symbol tsv db input file." DESC_NEWLINE "Such a symdb is produced using the tool msdump2symdb." DESC_NEWLINE "This allows better symbol size approximations." DESC_NEWLINE "The symdb file must be pre-sorted."};
216 static Switch gBatchModeSwitch = {"--batch", "-b", 0, NULL, "Runs in batch mode." DESC_NEWLINE "The input file contains a list of map files." DESC_NEWLINE "Normally the input file is a map file itself." DESC_NEWLINE "This eliminates reprocessing the symdb for multiple map files."};
217
218 static Switch* gSwitches[] = {
219 &gInputSwitch,
220 &gOutputSwitch,
221 &gMatchModuleSwitch,
222 &gSymDBSwitch,
223 &gBatchModeSwitch,
224 &gHelpSwitch
225 };
226
227
228 typedef struct __struct_MSMap_ReadState
229 /*
230 ** Keep track of what state we are while reading input.
231 ** This gives the input context in which we absorb the datum.
232 */
233 {
234 int mHasModule;
235
236 int mHasTimestamp;
237
238 int mHasPreferredLoadAddress;
239
240 int mHasSegmentData;
241 int mSegmentDataSkippedLine;
242
243 int mHasPublicSymbolData;
244 int mHasPublicSymbolDataSkippedLines;
245
246 int mHasEntryPoint;
247
248 int mFoundStaticSymbols;
249 }
250 MSMap_ReadState;
251
252
skipWhite(char * inScan)253 char* skipWhite(char* inScan)
254 /*
255 ** Skip whitespace.
256 */
257 {
258 char* retval = inScan;
259
260 while(isspace(*retval))
261 {
262 retval++;
263 }
264
265 return retval;
266 }
267
trimWhite(char * inString)268 void trimWhite(char* inString)
269 /*
270 ** Remove any whitespace from the end of the string.
271 */
272 {
273 int len = strlen(inString);
274
275 while(len)
276 {
277 len--;
278
279 if(isspace(*(inString + len)))
280 {
281 *(inString + len) = '\0';
282 }
283 else
284 {
285 break;
286 }
287 }
288 }
289
290
lastWord(char * inString)291 char* lastWord(char* inString)
292 /*
293 ** Finds and returns the last word in a string.
294 ** It is assumed no whitespace is at the end of the string.
295 */
296 {
297 int mod = 0;
298 int len = strlen(inString);
299
300 while(len)
301 {
302 len--;
303 if(isspace(*(inString + len)))
304 {
305 mod = 1;
306 break;
307 }
308 }
309
310 return inString + len + mod;
311 }
312
313
getSymbolSection(MSMap_Module * inModule,MSMap_Symbol * inoutSymbol)314 MSMap_Segment* getSymbolSection(MSMap_Module* inModule, MSMap_Symbol* inoutSymbol)
315 /*
316 ** Perform a lookup for the section of the symbol.
317 ** The function could cache the value.
318 */
319 {
320 MSMap_Segment* retval = NULL;
321
322 if(NULL != inoutSymbol->mSection)
323 {
324 /*
325 ** Use cached value.
326 */
327 retval = inoutSymbol->mSection;
328 }
329 else
330 {
331 unsigned secLoop = 0;
332
333 /*
334 ** Go through sections in module to find the match for the symbol.
335 */
336 for(secLoop = 0; secLoop < inModule->mSegmentCount; secLoop++)
337 {
338 if(inoutSymbol->mPrefix == inModule->mSegments[secLoop].mPrefix)
339 {
340 if(inoutSymbol->mOffset >= inModule->mSegments[secLoop].mOffset)
341 {
342 if(inoutSymbol->mOffset < (inModule->mSegments[secLoop].mOffset + inModule->mSegments[secLoop].mLength))
343 {
344 /*
345 ** We have the section.
346 */
347 retval = &inModule->mSegments[secLoop];
348 break;
349 }
350 }
351 }
352 }
353
354 /*
355 ** Cache the value for next time.
356 */
357 inoutSymbol->mSection = retval;
358 }
359
360 return retval;
361 }
362
363
readSymDB(const char * inDBName,SymDB_Container ** outDB)364 int readSymDB(const char* inDBName, SymDB_Container** outDB)
365 /*
366 ** Intialize the symbol DB.
367 ** Only call if the symbol DB should be initialized.
368 */
369 {
370 int retval = 0;
371
372 /*
373 ** Initialize out arguments.
374 */
375 if(NULL != outDB)
376 {
377 *outDB = NULL;
378 }
379
380 if(NULL != outDB && NULL != inDBName)
381 {
382 FILE* symDB = NULL;
383
384 symDB = fopen(inDBName, "r");
385 if(NULL != symDB)
386 {
387 *outDB = (SymDB_Container*)calloc(1, sizeof(SymDB_Container));
388 if(NULL != *outDB)
389 {
390 char lineBuf[0x400];
391 char* symbol = NULL;
392 char* section = NULL;
393 char* object = NULL;
394 char* length = NULL;
395 unsigned lengthNum = 0;
396 char* endLength = NULL;
397
398 /*
399 ** Read the file line by line.
400 */
401 while(0 == retval && NULL != fgets(lineBuf, sizeof(lineBuf), symDB))
402 {
403 trimWhite(lineBuf);
404
405 /*
406 ** Each line has four arguments. tab separated values (tsv).
407 ** Symbol
408 ** Section
409 ** Length
410 ** Object
411 */
412
413 symbol = skipWhite(lineBuf);
414 if(NULL == symbol)
415 {
416 retval = __LINE__;
417 ERROR_REPORT(retval, inDBName, "File does not appear to be a symbol DB.");
418 break;
419 }
420
421 section = strchr(symbol, '\t');
422 if(NULL == section)
423 {
424 retval = __LINE__;
425 ERROR_REPORT(retval, inDBName, "File does not appear to be a symbol DB.");
426 break;
427 }
428 *section = '\0';
429 section++;
430
431 length = strchr(section, '\t');
432 if(NULL == length)
433 {
434 retval = __LINE__;
435 ERROR_REPORT(retval, inDBName, "File does not appear to be a symbol DB.");
436 break;
437 }
438 *length = '\0';
439 length++;
440
441 object = strchr(length, '\t');
442 if(NULL == object)
443 {
444 retval = __LINE__;
445 ERROR_REPORT(retval, inDBName, "File does not appear to be a symbol DB.");
446 break;
447 }
448 *object = '\0';
449 object++;
450
451 /*
452 ** Convert the length into a number.
453 */
454 errno = 0;
455 lengthNum = strtoul(length, &endLength, 16);
456 if(0 == errno && endLength != length)
457 {
458 SymDB_Symbol* dbSymbol = NULL;
459 SymDB_Section* dbSection = NULL;
460 SymDB_Size* dbSize = NULL;
461 char* dbObject = NULL;
462 void* moved = NULL;
463
464 /*
465 ** Are we looking at the same symbol as last line?
466 ** This assumes the symdb is pre sorted!!!
467 */
468 if(0 != (*outDB)->mSymbolCount)
469 {
470 unsigned index = (*outDB)->mSymbolCount - 1;
471
472 if(0 == strcmp((*outDB)->mSymbols[index].mName, symbol))
473 {
474 dbSymbol = &(*outDB)->mSymbols[index];
475 }
476 }
477
478 /*
479 ** May need to create symbol.
480 */
481 if(NULL == dbSymbol)
482 {
483 /*
484 ** Could be time to grow the symbol pool.
485 */
486 if((*outDB)->mSymbolCount >= (*outDB)->mSymbolCapacity)
487 {
488 moved = realloc((*outDB)->mSymbols, sizeof(SymDB_Symbol) * ((*outDB)->mSymbolCapacity + SYMDB_SYMBOL_GROWBY));
489 if(NULL != moved)
490 {
491 (*outDB)->mSymbols = (SymDB_Symbol*)moved;
492 memset(&(*outDB)->mSymbols[(*outDB)->mSymbolCapacity], 0, sizeof(SymDB_Symbol) * SYMDB_SYMBOL_GROWBY);
493 (*outDB)->mSymbolCapacity += SYMDB_SYMBOL_GROWBY;
494 }
495 else
496 {
497 retval = __LINE__;
498 ERROR_REPORT(retval, inDBName, "Unable to grow symbol DB symbol array.");
499 break;
500 }
501 }
502
503 if((*outDB)->mSymbolCount < (*outDB)->mSymbolCapacity)
504 {
505 dbSymbol = &(*outDB)->mSymbols[(*outDB)->mSymbolCount];
506 (*outDB)->mSymbolCount++;
507
508 dbSymbol->mName = strdup(symbol);
509 if(NULL == dbSymbol->mName)
510 {
511 retval = __LINE__;
512 ERROR_REPORT(retval, symbol, "Unable to duplicate string.");
513 break;
514 }
515 }
516 else
517 {
518 retval = __LINE__;
519 ERROR_REPORT(retval, symbol, "Unable to grow symbol DB for symbol.");
520 break;
521 }
522 }
523
524 /*
525 ** Assume we have the symbol.
526 **
527 ** Is this the same section as the last section in the symbol?
528 ** This assumes the symdb was presorted!!!!
529 */
530 if(0 != dbSymbol->mSectionCount)
531 {
532 unsigned index = dbSymbol->mSectionCount - 1;
533
534 if(0 == strcmp(dbSymbol->mSections[index].mName, section))
535 {
536 dbSection = &dbSymbol->mSections[index];
537 }
538 }
539
540 /*
541 ** May need to create the section.
542 */
543 if(NULL == dbSection)
544 {
545 moved = realloc(dbSymbol->mSections, sizeof(SymDB_Section) * (dbSymbol->mSectionCount + 1));
546 if(NULL != moved)
547 {
548 dbSymbol->mSections = (SymDB_Section*)moved;
549 dbSection = &dbSymbol->mSections[dbSymbol->mSectionCount];
550 dbSymbol->mSectionCount++;
551
552 memset(dbSection, 0, sizeof(SymDB_Section));
553
554 dbSection->mName = strdup(section);
555 if(NULL == dbSection->mName)
556 {
557 retval = __LINE__;
558 ERROR_REPORT(retval, section, "Unable to duplicate string.");
559 break;
560 }
561 }
562 else
563 {
564 retval = __LINE__;
565 ERROR_REPORT(retval, section, "Unable to grow symbol sections for symbol DB.");
566 break;
567 }
568 }
569
570 /*
571 ** Assume we have the section.
572 **
573 ** Is this the same size as the last size?
574 ** This assumes the symdb was presorted!!!
575 */
576 if(0 != dbSection->mSizeCount)
577 {
578 unsigned index = dbSection->mSizeCount - 1;
579
580 if(dbSection->mSizes[index].mSize == lengthNum)
581 {
582 dbSize = &dbSection->mSizes[index];
583 }
584 }
585
586 /*
587 ** May need to create the size in question.
588 */
589 if(NULL == dbSize)
590 {
591 moved = realloc(dbSection->mSizes, sizeof(SymDB_Size) * (dbSection->mSizeCount + 1));
592 if(NULL != moved)
593 {
594 dbSection->mSizes = (SymDB_Size*)moved;
595 dbSize = &dbSection->mSizes[dbSection->mSizeCount];
596 dbSection->mSizeCount++;
597
598 memset(dbSize, 0, sizeof(SymDB_Size));
599
600 dbSize->mSize = lengthNum;
601 }
602 else
603 {
604 retval = __LINE__;
605 ERROR_REPORT(retval, length, "Unable to grow symbol section sizes for symbol DB.");
606 break;
607 }
608 }
609
610 /*
611 ** Assume we have the size.
612 **
613 ** We assume a one to one correllation between size and object.
614 ** Always try to add the new object name.
615 ** As the symdb is assumed to be sorted, the object names should also be in order.
616 */
617 moved = realloc(dbSize->mObjects, sizeof(char*) * (dbSize->mObjectCount + 1));
618 if(NULL != moved)
619 {
620 dbObject = strdup(object);
621
622 dbSize->mObjects = (char**)moved;
623 dbSize->mObjects[dbSize->mObjectCount] = dbObject;
624 dbSize->mObjectCount++;
625
626 if(NULL == dbObject)
627 {
628 retval = __LINE__;
629 ERROR_REPORT(retval, object, "Unable to duplicate string.");
630 break;
631 }
632 }
633 else
634 {
635 retval = __LINE__;
636 ERROR_REPORT(retval, object, "Unable to grow symbol section size objects for symbol DB.");
637 break;
638 }
639 }
640 else
641 {
642 retval = __LINE__;
643 ERROR_REPORT(retval, length, "Unable to convert symbol DB length into a number.");
644 break;
645 }
646 }
647
648 if(0 == retval && 0 != ferror(symDB))
649 {
650 retval = __LINE__;
651 ERROR_REPORT(retval, inDBName, "Unable to read file.");
652 }
653 }
654 else
655 {
656 retval = __LINE__;
657 ERROR_REPORT(retval, inDBName, "Unable to allocate symbol DB.");
658 }
659
660 fclose(symDB);
661 symDB = NULL;
662 }
663 else
664 {
665 retval = __LINE__;
666 ERROR_REPORT(retval, inDBName, "Unable to open symbol DB.");
667 }
668 }
669 else
670 {
671 retval = __LINE__;
672 ERROR_REPORT(retval, "(NULL)", "Invalid arguments.");
673 }
674
675 return retval;
676 }
677
678
cleanSymDB(SymDB_Container ** inDB)679 void cleanSymDB(SymDB_Container** inDB)
680 /*
681 ** Free it all up.
682 */
683 {
684 if(NULL != inDB && NULL != *inDB)
685 {
686 unsigned symLoop = 0;
687 unsigned secLoop = 0;
688 unsigned sizLoop = 0;
689 unsigned objLoop = 0;
690
691 for(symLoop = 0; symLoop < (*inDB)->mSymbolCount; symLoop++)
692 {
693 for(secLoop = 0; secLoop < (*inDB)->mSymbols[symLoop].mSectionCount; secLoop++)
694 {
695 for(sizLoop = 0; sizLoop < (*inDB)->mSymbols[symLoop].mSections[secLoop].mSizeCount; sizLoop++)
696 {
697 for(objLoop = 0; objLoop < (*inDB)->mSymbols[symLoop].mSections[secLoop].mSizes[sizLoop].mObjectCount; objLoop++)
698 {
699 CLEANUP((*inDB)->mSymbols[symLoop].mSections[secLoop].mSizes[sizLoop].mObjects[objLoop]);
700 }
701 CLEANUP((*inDB)->mSymbols[symLoop].mSections[secLoop].mSizes[sizLoop].mObjects);
702 }
703 CLEANUP((*inDB)->mSymbols[symLoop].mSections[secLoop].mName);
704 CLEANUP((*inDB)->mSymbols[symLoop].mSections[secLoop].mSizes);
705 }
706 CLEANUP((*inDB)->mSymbols[symLoop].mName);
707 CLEANUP((*inDB)->mSymbols[symLoop].mSections);
708 }
709 CLEANUP((*inDB)->mSymbols);
710 CLEANUP(*inDB);
711 }
712 }
713
714
symDBLookup(const void * inKey,const void * inItem)715 int symDBLookup(const void* inKey, const void* inItem)
716 /*
717 ** bsearch utility routine to find the symbol in the symdb.
718 */
719 {
720 int retval = 0;
721 const char* key = (const char*)inKey;
722 const SymDB_Symbol* symbol = (const SymDB_Symbol*)inItem;
723
724 retval = strcmp(key, symbol->mName);
725
726 return retval;
727 }
728
729
fillSymbolSizeFromDB(Options * inOptions,MSMap_Module * inModule,MSMap_Symbol * inoutSymbol,const char * inMangledName)730 int fillSymbolSizeFromDB(Options* inOptions, MSMap_Module* inModule, MSMap_Symbol* inoutSymbol, const char* inMangledName)
731 /*
732 ** If we have a symbol DB, attempt to determine the real size of the symbol
733 ** up front.
734 ** This helps us later in the game to avoid performing size guesses by
735 ** offset.
736 */
737 {
738 int retval = 0;
739
740 /*
741 ** May need to initialize symdb.
742 */
743 if(NULL == inOptions->mSymDB && NULL != inOptions->mSymDBName)
744 {
745 retval = readSymDB(inOptions->mSymDBName, &inOptions->mSymDB);
746 }
747
748 /*
749 ** Optional
750 */
751 if(0 == retval && NULL != inOptions->mSymDB)
752 {
753 void* match = NULL;
754
755 /*
756 ** Find the symbol.
757 */
758 match = bsearch(inMangledName, inOptions->mSymDB->mSymbols, inOptions->mSymDB->mSymbolCount, sizeof(SymDB_Symbol), symDBLookup);
759 if(NULL != match)
760 {
761 SymDB_Symbol* symbol = (SymDB_Symbol*)match;
762 unsigned symDBSize = 0;
763 MSMap_Segment* mapSection = NULL;
764
765 /*
766 ** We found the symbol.
767 **
768 ** See if it has the section in question.
769 */
770 mapSection = getSymbolSection(inModule, inoutSymbol);
771 if(NULL != mapSection)
772 {
773 unsigned secLoop = 0;
774
775 for(secLoop = 0; secLoop < symbol->mSectionCount; secLoop++)
776 {
777 if(0 == strcmp(mapSection->mSegment, symbol->mSections[secLoop].mName))
778 {
779 SymDB_Section* section = &symbol->mSections[secLoop];
780
781 /*
782 ** We have a section match.
783 ** Should there be a single size for the symbol,
784 ** then we just default to that.
785 ** If more than one size, we have to do an
786 ** object match search.
787 ** Should there be no object match, we do nothign.
788 */
789 if(1 == section->mSizeCount)
790 {
791 symDBSize = section->mSizes[0].mSize;
792 }
793 else
794 {
795 char* mapObject = NULL;
796
797 /*
798 ** Figure out the map object file name.
799 ** Skip any colon.
800 ** If it doesn't have a .obj in it, not worth continuing.
801 */
802 mapObject = strrchr(inoutSymbol->mObject, ':');
803 if(NULL == mapObject)
804 {
805 mapObject = inoutSymbol->mObject;
806 }
807 else
808 {
809 mapObject++; /* colon */
810 }
811
812 if(NULL != strstr(mapObject, ".obj"))
813 {
814 unsigned sizLoop = 0;
815 unsigned objLoop = 0;
816 SymDB_Size* size = NULL;
817
818 for(sizLoop = 0; sizLoop < section->mSizeCount; sizLoop++)
819 {
820 size = §ion->mSizes[sizLoop];
821
822 for(objLoop = 0; objLoop < size->mObjectCount; objLoop++)
823 {
824 if(NULL != strstr(size->mObjects[objLoop], mapObject))
825 {
826 /*
827 ** As we matched the object, in a particular section,
828 ** we'll go with this as the number.
829 */
830 symDBSize = size->mSize;
831 break;
832 }
833 }
834
835 /*
836 ** If the object loop broke early, we break too.
837 */
838 if(objLoop < size->mObjectCount)
839 {
840 break;
841 }
842 }
843 }
844 }
845
846 break;
847 }
848 }
849 }
850
851 /*
852 ** Put the size in.
853 */
854 inoutSymbol->mSymDBSize = symDBSize;
855 }
856 }
857
858 return retval;
859 }
860
861
symdup(const char * inSymbol)862 char* symdup(const char* inSymbol)
863 /*
864 ** Attempts to demangle the symbol if appropriate.
865 ** Otherwise acts like strdup.
866 */
867 {
868 char* retval = NULL;
869
870 #if F_DEMANGLE
871 {
872 int isImport = 0;
873
874 if(0 == strncmp("__imp_", inSymbol, 6))
875 {
876 isImport = __LINE__;
877 inSymbol += 6;
878 }
879
880 if('?' == inSymbol[0])
881 {
882 char demangleBuf[0x200];
883 DWORD demangleRes = 0;
884
885 demangleRes = UnDecorateSymbolName(inSymbol, demangleBuf, sizeof(demangleBuf), UNDNAME_COMPLETE);
886 if(0 != demangleRes)
887 {
888 if (strcmp(demangleBuf, "`string'") == 0)
889 {
890
891 /* attempt manual demangling of string prefix.. */
892
893 /* first make sure we have enough space for the
894 updated string - the demangled string will
895 always be shorter than strlen(inSymbol) and the
896 prologue will always be longer than the
897 "string: " that we tack on the front of the string
898 */
899 char *curresult = retval = malloc(strlen(inSymbol) + 11);
900 const char *curchar = inSymbol;
901
902 int state = DEMANGLE_STATE_START;
903
904 /* the hex state is for stuff like ?$EA which
905 really means hex value 0x40 */
906 char hex_state = 0;
907 char string_is_unicode = 0;
908
909 /* sometimes we get a null-termination before the
910 final @ sign - in that case, remember that
911 we've seen the whole string */
912 int have_null_char = 0;
913
914 /* stick our user-readable prefix on */
915 strcpy(curresult, "string: \"");
916 curresult += 9;
917
918 while (*curchar) {
919
920 // process current state
921 switch (state) {
922
923 /* the Prologue states are divided up so
924 that someday we can try to decode
925 the random letters in between the '@'
926 signs. Also, some strings only have 2
927 prologue '@' signs, so we have to
928 figure out how to distinguish between
929 them at some point. */
930 case DEMANGLE_STATE_START:
931 if (*curchar == '@')
932 state = DEMANGLE_STATE_PROLOGUE_1;
933 /* ignore all other states */
934 break;
935
936 case DEMANGLE_STATE_PROLOGUE_1:
937 switch (*curchar) {
938 case '0':
939 string_is_unicode=0;
940 state = DEMANGLE_STATE_HAVE_TYPE;
941 break;
942 case '1':
943 string_is_unicode=1;
944 state = DEMANGLE_STATE_HAVE_TYPE;
945 break;
946
947 /* ignore all other characters */
948 }
949 break;
950
951 case DEMANGLE_STATE_HAVE_TYPE:
952 if (*curchar >= '0' && *curchar <= '9') {
953 state = DEMANGLE_STATE_DEC_LENGTH;
954 } else if (*curchar >= 'A' && *curchar <= 'Z') {
955 state = DEMANGLE_STATE_HEX_LENGTH;
956 }
957 case DEMANGLE_STATE_DEC_LENGTH:
958 /* decimal lengths don't have the 2nd
959 field
960 */
961 if (*curchar == '@')
962 state = DEMANGLE_STATE_NORMAL;
963 break;
964
965 case DEMANGLE_STATE_HEX_LENGTH:
966 /* hex lengths have a 2nd field
967 (though I have no idea what it is for)
968 */
969 if (*curchar == '@')
970 state = DEMANGLE_STATE_PROLOGUE_SECONDARY;
971 break;
972
973 case DEMANGLE_STATE_PROLOGUE_SECONDARY:
974 if (*curchar == '@')
975 state = DEMANGLE_STATE_NORMAL;
976 break;
977
978 case DEMANGLE_STATE_NORMAL:
979 switch (*curchar) {
980 case '?':
981 state = DEMANGLE_STATE_QDECODE;
982 break;
983 case '@':
984 state = DEMANGLE_STATE_STOP;
985 break;
986 default:
987 *curresult++ = DEMANGLE_SAFE_CHAR(*curchar);
988 state = DEMANGLE_STATE_NORMAL;
989 break;
990 }
991 break;
992
993 /* found a '?' */
994 case DEMANGLE_STATE_QDECODE:
995 state = DEMANGLE_STATE_NORMAL;
996
997 /* there are certain shortcuts, like
998 "?3" means ":"
999 */
1000 switch (*curchar) {
1001 case '1':
1002 *curresult++ = '/';
1003 break;
1004 case '2':
1005 *curresult++ = '\\';
1006 break;
1007 case '3':
1008 *curresult++ = ':';
1009 break;
1010 case '4':
1011 *curresult++ = '.';
1012 break;
1013 case '5':
1014 *curresult++ = ' ';
1015 break;
1016 case '6':
1017 *curresult++ = '\\';
1018 *curresult++ = 'n';
1019 break;
1020 case '8':
1021 *curresult++ = '\'';
1022 break;
1023 case '9':
1024 *curresult++ = '-';
1025 break;
1026
1027 /* any other arbitrary ASCII value can
1028 be stored by prefixing it with ?$
1029 */
1030 case '$':
1031 state = DEMANGLE_STATE_DOLLAR_1;
1032 }
1033 break;
1034
1035 case DEMANGLE_STATE_DOLLAR_1:
1036 /* first digit of ?$ notation. All digits
1037 are hex, represented starting with the
1038 capital leter 'A' such that 'A' means 0x0,
1039 'B' means 0x1, 'K' means 0xA
1040 */
1041 hex_state = (*curchar - 'A') * 0x10;
1042 state = DEMANGLE_STATE_DOLLAR_2;
1043 break;
1044
1045 case DEMANGLE_STATE_DOLLAR_2:
1046 /* same mechanism as above */
1047 hex_state += (*curchar - 'A');
1048 if (hex_state) {
1049 *curresult++ = DEMANGLE_SAFE_CHAR(hex_state);
1050 have_null_char = 0;
1051 }
1052 else {
1053 have_null_char = 1;
1054 }
1055
1056 state = DEMANGLE_STATE_NORMAL;
1057 break;
1058
1059 case DEMANGLE_STATE_STOP:
1060 break;
1061 }
1062
1063 curchar++;
1064 }
1065
1066 /* add the appropriate termination depending
1067 if we completed the string or not */
1068 if (!have_null_char)
1069 strcpy(curresult, "...\"");
1070 else
1071 strcpy(curresult, "\"");
1072 } else {
1073 retval = strdup(demangleBuf);
1074 }
1075 }
1076 else
1077 {
1078 /*
1079 ** fall back to normal.
1080 */
1081 retval = strdup(inSymbol);
1082 }
1083 }
1084 else if('_' == inSymbol[0])
1085 {
1086 retval = strdup(inSymbol + 1);
1087 }
1088 else
1089 {
1090 retval = strdup(inSymbol);
1091 }
1092
1093 /*
1094 ** May need to rewrite the symbol if an import.
1095 */
1096 if(NULL != retval && isImport)
1097 {
1098 const char importPrefix[] = "__declspec(dllimport) ";
1099 char importBuf[0x200];
1100 int printRes = 0;
1101
1102 printRes = _snprintf(importBuf, sizeof(importBuf), "%s%s", importPrefix, retval);
1103 free(retval);
1104 retval = NULL;
1105
1106 if(printRes > 0)
1107 {
1108 retval = strdup(importBuf);
1109 }
1110 }
1111 }
1112 #else /* F_DEMANGLE */
1113 retval = strdup(inSymbol);
1114 #endif /* F_DEMANGLE */
1115
1116 return retval;
1117 }
1118
1119
readmap(Options * inOptions,MSMap_Module * inModule)1120 int readmap(Options* inOptions, MSMap_Module* inModule)
1121 /*
1122 ** Read the input line by line, adding it to the module.
1123 */
1124 {
1125 int retval = 0;
1126 char lineBuffer[0x400];
1127 char* current = NULL;
1128 MSMap_ReadState fsm;
1129 int len = 0;
1130 int forceContinue = 0;
1131
1132 memset(&fsm, 0, sizeof(fsm));
1133
1134 /*
1135 ** Read the map file line by line.
1136 ** We keep a simple state machine to determine what we're looking at.
1137 */
1138 while(0 == retval && NULL != fgets(lineBuffer, sizeof(lineBuffer), inOptions->mInput))
1139 {
1140 if(forceContinue)
1141 {
1142 /*
1143 ** Used to skip anticipated blank lines.
1144 */
1145 forceContinue--;
1146 continue;
1147 }
1148
1149 current = skipWhite(lineBuffer);
1150 trimWhite(current);
1151
1152 len = strlen(current);
1153
1154 if(fsm.mHasModule)
1155 {
1156 if(fsm.mHasTimestamp)
1157 {
1158 if(fsm.mHasPreferredLoadAddress)
1159 {
1160 if(fsm.mHasSegmentData)
1161 {
1162 if(fsm.mHasPublicSymbolData)
1163 {
1164 if(fsm.mHasEntryPoint)
1165 {
1166 if(fsm.mFoundStaticSymbols)
1167 {
1168 /*
1169 ** A blank line means we've reached the end of all static symbols.
1170 */
1171 if(len)
1172 {
1173 /*
1174 ** We're adding a new symbol.
1175 ** Make sure we have room for it.
1176 */
1177 if(inModule->mSymbolCapacity == inModule->mSymbolCount)
1178 {
1179 void* moved = NULL;
1180
1181 moved = realloc(inModule->mSymbols, sizeof(MSMap_Symbol) * (inModule->mSymbolCapacity + MSMAP_SYMBOL_GROWBY));
1182 if(NULL != moved)
1183 {
1184 inModule->mSymbolCapacity += MSMAP_SYMBOL_GROWBY;
1185 inModule->mSymbols = (MSMap_Symbol*)moved;
1186 }
1187 else
1188 {
1189 retval = __LINE__;
1190 ERROR_REPORT(retval, inModule->mModule, "Unable to grow symbols.");
1191 }
1192 }
1193
1194 if(0 == retval && inModule->mSymbolCapacity > inModule->mSymbolCount)
1195 {
1196 MSMap_Symbol* theSymbol = NULL;
1197 unsigned index = 0;
1198 int scanRes = 0;
1199 char symbolBuf[0x200];
1200
1201 index = inModule->mSymbolCount;
1202 inModule->mSymbolCount++;
1203 theSymbol = (inModule->mSymbols + index);
1204
1205 memset(theSymbol, 0, sizeof(MSMap_Symbol));
1206 theSymbol->mScope = STATIC;
1207
1208 scanRes = sscanf(current, "%x:%x %s %x", (unsigned*)&(theSymbol->mPrefix), (unsigned*)&(theSymbol->mOffset), symbolBuf, (unsigned*)&(theSymbol->mRVABase));
1209 if(4 == scanRes)
1210 {
1211 theSymbol->mSymbol = symdup(symbolBuf);
1212
1213 if(0 == retval)
1214 {
1215 if(NULL != theSymbol->mSymbol)
1216 {
1217 char *last = lastWord(current);
1218
1219 theSymbol->mObject = strdup(last);
1220 if(NULL == theSymbol->mObject)
1221 {
1222 retval = __LINE__;
1223 ERROR_REPORT(retval, last, "Unable to copy object name.");
1224 }
1225 }
1226 else
1227 {
1228 retval = __LINE__;
1229 ERROR_REPORT(retval, symbolBuf, "Unable to copy symbol name.");
1230 }
1231 }
1232 }
1233 else
1234 {
1235 retval = __LINE__;
1236 ERROR_REPORT(retval, inModule->mModule, "Unable to scan static symbols.");
1237 }
1238 }
1239 }
1240 else
1241 {
1242 /*
1243 ** All done.
1244 */
1245 break;
1246 }
1247 }
1248 else
1249 {
1250 /*
1251 ** Static symbols are optional.
1252 ** If no static symbols we're done.
1253 ** Otherwise, set the flag such that it will work more.
1254 */
1255 if(0 == strcmp(current, "Static symbols"))
1256 {
1257 fsm.mFoundStaticSymbols = __LINE__;
1258 forceContinue = 1;
1259 }
1260 else
1261 {
1262 /*
1263 ** All done.
1264 */
1265 break;
1266 }
1267 }
1268 }
1269 else
1270 {
1271 int scanRes = 0;
1272
1273 scanRes = sscanf(current, "entry point at %x:%x", (unsigned*)&(inModule->mEntryPrefix), (unsigned*)&(inModule->mEntryOffset));
1274 if(2 == scanRes)
1275 {
1276 fsm.mHasEntryPoint = __LINE__;
1277 forceContinue = 1;
1278 }
1279 else
1280 {
1281 retval = __LINE__;
1282 ERROR_REPORT(retval, current, "Unable to obtain entry point.");
1283 }
1284 }
1285 }
1286 else
1287 {
1288 /*
1289 ** Skip the N lines of public symbol data (column headers).
1290 */
1291 if(2 <= fsm.mHasPublicSymbolDataSkippedLines)
1292 {
1293 /*
1294 ** A blank line indicates end of public symbols.
1295 */
1296 if(len)
1297 {
1298 /*
1299 ** We're adding a new symbol.
1300 ** Make sure we have room for it.
1301 */
1302 if(inModule->mSymbolCapacity == inModule->mSymbolCount)
1303 {
1304 void* moved = NULL;
1305
1306 moved = realloc(inModule->mSymbols, sizeof(MSMap_Symbol) * (inModule->mSymbolCapacity + MSMAP_SYMBOL_GROWBY));
1307 if(NULL != moved)
1308 {
1309 inModule->mSymbolCapacity += MSMAP_SYMBOL_GROWBY;
1310 inModule->mSymbols = (MSMap_Symbol*)moved;
1311 }
1312 else
1313 {
1314 retval = __LINE__;
1315 ERROR_REPORT(retval, inModule->mModule, "Unable to grow symbols.");
1316 }
1317 }
1318
1319 if(0 == retval && inModule->mSymbolCapacity > inModule->mSymbolCount)
1320 {
1321 MSMap_Symbol* theSymbol = NULL;
1322 unsigned index = 0;
1323 int scanRes = 0;
1324 char symbolBuf[0x200];
1325
1326 index = inModule->mSymbolCount;
1327 inModule->mSymbolCount++;
1328 theSymbol = (inModule->mSymbols + index);
1329
1330 memset(theSymbol, 0, sizeof(MSMap_Symbol));
1331 theSymbol->mScope = PUBLIC;
1332
1333 scanRes = sscanf(current, "%x:%x %s %x", (unsigned*)&(theSymbol->mPrefix), (unsigned*)&(theSymbol->mOffset), symbolBuf, (unsigned *)&(theSymbol->mRVABase));
1334 if(4 == scanRes)
1335 {
1336 theSymbol->mSymbol = symdup(symbolBuf);
1337
1338 if(NULL != theSymbol->mSymbol)
1339 {
1340 char *last = lastWord(current);
1341
1342 theSymbol->mObject = strdup(last);
1343 if(NULL != theSymbol->mObject)
1344 {
1345 /*
1346 ** Finally, attempt to lookup the actual size of the symbol
1347 ** if there is a symbol DB available.
1348 */
1349 retval = fillSymbolSizeFromDB(inOptions, inModule, theSymbol, symbolBuf);
1350 }
1351 else
1352 {
1353 retval = __LINE__;
1354 ERROR_REPORT(retval, last, "Unable to copy object name.");
1355 }
1356 }
1357 else
1358 {
1359 retval = __LINE__;
1360 ERROR_REPORT(retval, symbolBuf, "Unable to copy symbol name.");
1361 }
1362 }
1363 else
1364 {
1365 retval = __LINE__;
1366 ERROR_REPORT(retval, inModule->mModule, "Unable to scan public symbols.");
1367 }
1368 }
1369 }
1370 else
1371 {
1372 fsm.mHasPublicSymbolData = __LINE__;
1373 }
1374 }
1375 else
1376 {
1377 fsm.mHasPublicSymbolDataSkippedLines++;
1378 }
1379 }
1380 }
1381 else
1382 {
1383 /*
1384 ** Skip the first line of segment data (column headers).
1385 ** Mark that we've begun grabbing segement data.
1386 */
1387 if(fsm.mSegmentDataSkippedLine)
1388 {
1389 /*
1390 ** A blank line means end of the segment data.
1391 */
1392 if(len)
1393 {
1394 /*
1395 ** We're adding a new segment.
1396 ** Make sure we have room for it.
1397 */
1398 if(inModule->mSegmentCapacity == inModule->mSegmentCount)
1399 {
1400 void* moved = NULL;
1401
1402 moved = realloc(inModule->mSegments, sizeof(MSMap_Segment) * (inModule->mSegmentCapacity + MSMAP_SEGMENT_GROWBY));
1403 if(NULL != moved)
1404 {
1405 inModule->mSegmentCapacity += MSMAP_SEGMENT_GROWBY;
1406 inModule->mSegments = (MSMap_Segment*)moved;
1407 }
1408 else
1409 {
1410 retval = __LINE__;
1411 ERROR_REPORT(retval, inModule->mModule, "Unable to grow segments.");
1412 }
1413 }
1414
1415 if(0 == retval && inModule->mSegmentCapacity > inModule->mSegmentCount)
1416 {
1417 MSMap_Segment* theSegment = NULL;
1418 unsigned index = 0;
1419 char classBuf[0x10];
1420 char nameBuf[0x20];
1421 int scanRes = 0;
1422
1423 index = inModule->mSegmentCount;
1424 inModule->mSegmentCount++;
1425 theSegment = (inModule->mSegments + index);
1426
1427 memset(theSegment, 0, sizeof(MSMap_Segment));
1428
1429 scanRes = sscanf(current, "%x:%x %xH %s %s", (unsigned*)&(theSegment->mPrefix), (unsigned*)&(theSegment->mOffset), (unsigned*)&(theSegment->mLength), nameBuf, classBuf);
1430 if(5 == scanRes)
1431 {
1432 if('.' == nameBuf[0])
1433 {
1434 theSegment->mSegment = strdup(&nameBuf[1]);
1435 }
1436 else
1437 {
1438 theSegment->mSegment = strdup(nameBuf);
1439 }
1440
1441 if(NULL != theSegment->mSegment)
1442 {
1443 if(0 == strcmp("DATA", classBuf))
1444 {
1445 theSegment->mClass = DATA;
1446 }
1447 else if(0 == strcmp("CODE", classBuf))
1448 {
1449 theSegment->mClass = CODE;
1450 }
1451 else
1452 {
1453 retval = __LINE__;
1454 ERROR_REPORT(retval, classBuf, "Unrecognized segment class.");
1455 }
1456 }
1457 else
1458 {
1459 retval = __LINE__;
1460 ERROR_REPORT(retval, nameBuf, "Unable to copy segment name.");
1461 }
1462 }
1463 else
1464 {
1465 retval = __LINE__;
1466 ERROR_REPORT(retval, inModule->mModule, "Unable to scan segments.");
1467 }
1468 }
1469 }
1470 else
1471 {
1472 fsm.mHasSegmentData = __LINE__;
1473 }
1474 }
1475 else
1476 {
1477 fsm.mSegmentDataSkippedLine = __LINE__;
1478 }
1479 }
1480 }
1481 else
1482 {
1483 int scanRes = 0;
1484
1485 /*
1486 ** The PLA has a particular format.
1487 */
1488 scanRes = sscanf(current, "Preferred load address is %x", (unsigned*)&(inModule->mPreferredLoadAddress));
1489 if(1 == scanRes)
1490 {
1491 fsm.mHasPreferredLoadAddress = __LINE__;
1492 forceContinue = 1;
1493 }
1494 else
1495 {
1496 retval = __LINE__;
1497 ERROR_REPORT(retval, current, "Unable to obtain preferred load address.");
1498 }
1499 }
1500 }
1501 else
1502 {
1503 int scanRes = 0;
1504
1505 /*
1506 ** The timestamp has a particular format.
1507 */
1508 scanRes = sscanf(current, "Timestamp is %x", (unsigned*)&(inModule->mTimestamp));
1509 if(1 == scanRes)
1510 {
1511 fsm.mHasTimestamp = __LINE__;
1512 forceContinue = 1;
1513 }
1514 else
1515 {
1516 retval = __LINE__;
1517 ERROR_REPORT(retval, current, "Unable to obtain timestamp.");
1518 }
1519 }
1520 }
1521 else
1522 {
1523 /*
1524 ** The module is on a line by itself.
1525 */
1526 inModule->mModule = strdup(current);
1527 if(NULL != inModule->mModule)
1528 {
1529 fsm.mHasModule = __LINE__;
1530 forceContinue = 1;
1531
1532 if(0 != inOptions->mMatchModuleCount)
1533 {
1534 unsigned matchLoop = 0;
1535
1536 /*
1537 ** If this module name doesn't match, then bail.
1538 ** Compare in a case sensitive manner, exact match only.
1539 */
1540 for(matchLoop = 0; matchLoop < inOptions->mMatchModuleCount; matchLoop++)
1541 {
1542 if(0 == strcmp(inModule->mModule, inOptions->mMatchModules[matchLoop]))
1543 {
1544 break;
1545 }
1546 }
1547
1548 if(matchLoop == inOptions->mMatchModuleCount)
1549 {
1550 /*
1551 ** A match did not occur, bail out of read loop.
1552 ** No error, however.
1553 */
1554 break;
1555 }
1556 }
1557 }
1558 else
1559 {
1560 retval = __LINE__;
1561 ERROR_REPORT(retval, current, "Unable to obtain module.");
1562 }
1563 }
1564 }
1565
1566 if(0 == retval && 0 != ferror(inOptions->mInput))
1567 {
1568 retval = __LINE__;
1569 ERROR_REPORT(retval, inOptions->mInputName, "Unable to read file.");
1570 }
1571
1572 return retval;
1573 }
1574
1575
qsortRVABase(const void * in1,const void * in2)1576 static int qsortRVABase(const void* in1, const void* in2)
1577 /*
1578 ** qsort callback to sort the symbols by their RVABase.
1579 */
1580 {
1581 MSMap_Symbol* sym1 = (MSMap_Symbol*)in1;
1582 MSMap_Symbol* sym2 = (MSMap_Symbol*)in2;
1583 int retval = 0;
1584
1585 if(sym1->mRVABase < sym2->mRVABase)
1586 {
1587 retval = -1;
1588 }
1589 else if(sym1->mRVABase > sym2->mRVABase)
1590 {
1591 retval = 1;
1592 }
1593
1594 return retval;
1595 }
1596
1597
tsvout(Options * inOptions,unsigned inSize,MSMap_SegmentClass inClass,MSMap_SymbolScope inScope,const char * inModule,const char * inSegment,const char * inObject,const char * inSymbol)1598 static int tsvout(Options* inOptions, unsigned inSize, MSMap_SegmentClass inClass, MSMap_SymbolScope inScope, const char* inModule, const char* inSegment, const char* inObject, const char* inSymbol)
1599 /*
1600 ** Output a line of map information separated by tabs.
1601 ** Some items (const char*), if not present, will receive a default value.
1602 */
1603 {
1604 int retval = 0;
1605
1606 /*
1607 ** No need to output on no size.
1608 ** This can happen with zero sized segments,
1609 ** or an imported symbol which has multiple names (one will count).
1610 */
1611 if(0 != inSize)
1612 {
1613 char objectBuf[0x100];
1614 const char* symScope = NULL;
1615 const char* segClass = NULL;
1616 const char* undefined = "UNDEF";
1617
1618 /*
1619 ** Fill in unspecified values.
1620 */
1621 if(NULL == inObject)
1622 {
1623 sprintf(objectBuf, "%s:%s:%s", undefined, inModule, inSegment);
1624 inObject = objectBuf;
1625 }
1626 if(NULL == inSymbol)
1627 {
1628 inSymbol = inObject;
1629 }
1630
1631 /*
1632 ** Convert some enumerations to text.
1633 */
1634 switch(inClass)
1635 {
1636 case CODE:
1637 segClass = "CODE";
1638 break;
1639 case DATA:
1640 segClass = "DATA";
1641 break;
1642 default:
1643 retval = __LINE__;
1644 ERROR_REPORT(retval, "", "Unable to determine class for output.");
1645 break;
1646 }
1647
1648 switch(inScope)
1649 {
1650 case PUBLIC:
1651 symScope = "PUBLIC";
1652 break;
1653 case STATIC:
1654 symScope = "STATIC";
1655 break;
1656 case UNDEFINED:
1657 symScope = undefined;
1658 break;
1659 default:
1660 retval = __LINE__;
1661 ERROR_REPORT(retval, "", "Unable to determine scope for symbol.");
1662 break;
1663 }
1664
1665 if(0 == retval)
1666 {
1667 int printRes = 0;
1668
1669 printRes = fprintf(inOptions->mOutput,
1670 "%.8X\t%s\t%s\t%s\t%s\t%s\t%s\n",
1671 inSize,
1672 segClass,
1673 symScope,
1674 inModule,
1675 inSegment,
1676 inObject,
1677 inSymbol
1678 );
1679
1680 if(0 > printRes)
1681 {
1682 retval = __LINE__;
1683 ERROR_REPORT(retval, inOptions->mOutputName, "Unable to output tsv data.");
1684 }
1685 }
1686 }
1687
1688 return retval;
1689 }
1690
1691
cleanModule(MSMap_Module * inModule)1692 void cleanModule(MSMap_Module* inModule)
1693 {
1694 unsigned loop = 0;
1695
1696 for(loop = 0; loop < inModule->mSymbolCount; loop++)
1697 {
1698 CLEANUP(inModule->mSymbols[loop].mObject);
1699 CLEANUP(inModule->mSymbols[loop].mSymbol);
1700 }
1701 CLEANUP(inModule->mSymbols);
1702
1703 for(loop = 0; loop < inModule->mSegmentCount; loop++)
1704 {
1705 CLEANUP(inModule->mSegments[loop].mSegment);
1706 }
1707 CLEANUP(inModule->mSegments);
1708
1709 CLEANUP(inModule->mModule);
1710
1711 memset(inModule, 0, sizeof(MSMap_Module));
1712 }
1713
1714
map2tsv(Options * inOptions)1715 int map2tsv(Options* inOptions)
1716 /*
1717 ** Read all input.
1718 ** Output tab separated value data.
1719 */
1720 {
1721 int retval = 0;
1722 MSMap_Module module;
1723
1724 memset(&module, 0, sizeof(module));
1725
1726 /*
1727 ** Read in the map file.
1728 */
1729 retval = readmap(inOptions, &module);
1730 if(0 == retval)
1731 {
1732 unsigned symLoop = 0;
1733 MSMap_Symbol* symbol = NULL;
1734 unsigned secLoop = 0;
1735 MSMap_Segment* section = NULL;
1736 unsigned size = 0;
1737 unsigned dbSize = 0;
1738 unsigned offsetSize = 0;
1739 unsigned endOffset = 0;
1740
1741 /*
1742 ** Quick sort the symbols via RVABase.
1743 */
1744 qsort(module.mSymbols, module.mSymbolCount, sizeof(MSMap_Symbol), qsortRVABase);
1745
1746 /*
1747 ** Go through all the symbols (in order by sort).
1748 ** Output their sizes.
1749 */
1750 for(symLoop = 0; 0 == retval && symLoop < module.mSymbolCount; symLoop++)
1751 {
1752 symbol = &module.mSymbols[symLoop];
1753 section = getSymbolSection(&module, symbol);
1754 if (!section)
1755 continue;
1756
1757 /*
1758 ** Use the symbol DB size if available.
1759 */
1760 dbSize = symbol->mSymDBSize;
1761
1762 /*
1763 ** Guess using offsets.
1764 ** Is there a next symbol available? If so, its start offset is the end of this symbol.
1765 ** Otherwise, our section offset + length is the end of this symbol.
1766 **
1767 ** The trick is, the DB size can not go beyond the offset size, for sanity.
1768 */
1769
1770 /*
1771 ** Try next symbol, but only if in same section.
1772 ** If still not, use the end of the segment.
1773 ** This implies we were the last symbol in the segment.
1774 */
1775 if((symLoop + 1) < module.mSymbolCount)
1776 {
1777 MSMap_Symbol* nextSymbol = NULL;
1778 MSMap_Segment* nextSection = NULL;
1779
1780 nextSymbol = &module.mSymbols[symLoop + 1];
1781 nextSection = getSymbolSection(&module, nextSymbol);
1782
1783 if(section == nextSection)
1784 {
1785 endOffset = nextSymbol->mOffset;
1786 }
1787 else
1788 {
1789 endOffset = section->mOffset + section->mLength;
1790 }
1791 }
1792 else
1793 {
1794 endOffset = section->mOffset + section->mLength;
1795 }
1796
1797 /*
1798 ** Can now guess at size.
1799 */
1800 offsetSize = endOffset - symbol->mOffset;
1801
1802 /*
1803 ** Now, determine which size to use.
1804 ** This is really a sanity check as well.
1805 */
1806 size = offsetSize;
1807 if(0 != dbSize)
1808 {
1809 if(dbSize < offsetSize)
1810 {
1811 size = dbSize;
1812 }
1813 }
1814
1815 /*
1816 ** Output the symbol with the size.
1817 */
1818 retval = tsvout(inOptions,
1819 size,
1820 section->mClass,
1821 symbol->mScope,
1822 module.mModule,
1823 section->mSegment,
1824 symbol->mObject,
1825 symbol->mSymbol
1826 );
1827
1828 /*
1829 ** Make sure we mark this amount of space as used in the section.
1830 */
1831 section->mUsed += size;
1832 }
1833
1834 /*
1835 ** Go through the sections, and those whose length is longer than the
1836 ** amount of space used, output dummy filler values.
1837 */
1838 for(secLoop = 0; 0 == retval && secLoop < module.mSegmentCount; secLoop++)
1839 {
1840 section = &module.mSegments[secLoop];
1841
1842 if(section && section->mUsed < section->mLength)
1843 {
1844 retval = tsvout(inOptions,
1845 section->mLength - section->mUsed,
1846 section->mClass,
1847 UNDEFINED,
1848 module.mModule,
1849 section->mSegment,
1850 NULL,
1851 NULL
1852 );
1853 }
1854 }
1855 }
1856
1857 /*
1858 ** Cleanup.
1859 */
1860 cleanModule(&module);
1861
1862 return retval;
1863 }
1864
1865
initOptions(Options * outOptions,int inArgc,char ** inArgv)1866 int initOptions(Options* outOptions, int inArgc, char** inArgv)
1867 /*
1868 ** returns int 0 if successful.
1869 */
1870 {
1871 int retval = 0;
1872 int loop = 0;
1873 int switchLoop = 0;
1874 int match = 0;
1875 const int switchCount = sizeof(gSwitches) / sizeof(gSwitches[0]);
1876 Switch* current = NULL;
1877
1878 /*
1879 ** Set any defaults.
1880 */
1881 memset(outOptions, 0, sizeof(Options));
1882 outOptions->mProgramName = inArgv[0];
1883 outOptions->mInput = stdin;
1884 outOptions->mInputName = strdup("stdin");
1885 outOptions->mOutput = stdout;
1886 outOptions->mOutputName = strdup("stdout");
1887
1888 if(NULL == outOptions->mOutputName || NULL == outOptions->mInputName)
1889 {
1890 retval = __LINE__;
1891 ERROR_REPORT(retval, "stdin/stdout", "Unable to strdup.");
1892 }
1893
1894 /*
1895 ** Go through and attempt to do the right thing.
1896 */
1897 for(loop = 1; loop < inArgc && 0 == retval; loop++)
1898 {
1899 match = 0;
1900 current = NULL;
1901
1902 for(switchLoop = 0; switchLoop < switchCount && 0 == retval; switchLoop++)
1903 {
1904 if(0 == strcmp(gSwitches[switchLoop]->mLongName, inArgv[loop]))
1905 {
1906 match = __LINE__;
1907 }
1908 else if(0 == strcmp(gSwitches[switchLoop]->mShortName, inArgv[loop]))
1909 {
1910 match = __LINE__;
1911 }
1912
1913 if(match)
1914 {
1915 if(gSwitches[switchLoop]->mHasValue)
1916 {
1917 /*
1918 ** Attempt to absorb next option to fullfill value.
1919 */
1920 if(loop + 1 < inArgc)
1921 {
1922 loop++;
1923
1924 current = gSwitches[switchLoop];
1925 current->mValue = inArgv[loop];
1926 }
1927 }
1928 else
1929 {
1930 current = gSwitches[switchLoop];
1931 }
1932
1933 break;
1934 }
1935 }
1936
1937 if(0 == match)
1938 {
1939 outOptions->mHelp = __LINE__;
1940 retval = __LINE__;
1941 ERROR_REPORT(retval, inArgv[loop], "Unknown command line switch.");
1942 }
1943 else if(NULL == current)
1944 {
1945 outOptions->mHelp = __LINE__;
1946 retval = __LINE__;
1947 ERROR_REPORT(retval, inArgv[loop], "Command line switch requires a value.");
1948 }
1949 else
1950 {
1951 /*
1952 ** Do something based on address/swtich.
1953 */
1954 if(current == &gInputSwitch)
1955 {
1956 CLEANUP(outOptions->mInputName);
1957 if(NULL != outOptions->mInput && stdin != outOptions->mInput)
1958 {
1959 fclose(outOptions->mInput);
1960 outOptions->mInput = NULL;
1961 }
1962
1963 outOptions->mInput = fopen(current->mValue, "r");
1964 if(NULL == outOptions->mInput)
1965 {
1966 retval = __LINE__;
1967 ERROR_REPORT(retval, current->mValue, "Unable to open input file.");
1968 }
1969 else
1970 {
1971 outOptions->mInputName = strdup(current->mValue);
1972 if(NULL == outOptions->mInputName)
1973 {
1974 retval = __LINE__;
1975 ERROR_REPORT(retval, current->mValue, "Unable to strdup.");
1976 }
1977 }
1978 }
1979 else if(current == &gOutputSwitch)
1980 {
1981 CLEANUP(outOptions->mOutputName);
1982 if(NULL != outOptions->mOutput && stdout != outOptions->mOutput)
1983 {
1984 fclose(outOptions->mOutput);
1985 outOptions->mOutput = NULL;
1986 }
1987
1988 outOptions->mOutput = fopen(current->mValue, "a");
1989 if(NULL == outOptions->mOutput)
1990 {
1991 retval = __LINE__;
1992 ERROR_REPORT(retval, current->mValue, "Unable to open output file.");
1993 }
1994 else
1995 {
1996 outOptions->mOutputName = strdup(current->mValue);
1997 if(NULL == outOptions->mOutputName)
1998 {
1999 retval = __LINE__;
2000 ERROR_REPORT(retval, current->mValue, "Unable to strdup.");
2001 }
2002 }
2003 }
2004 else if(current == &gHelpSwitch)
2005 {
2006 outOptions->mHelp = __LINE__;
2007 }
2008 else if(current == &gMatchModuleSwitch)
2009 {
2010 void* moved = NULL;
2011
2012 /*
2013 ** Add the value to the list of allowed module names.
2014 */
2015 moved = realloc(outOptions->mMatchModules, sizeof(char*) * (outOptions->mMatchModuleCount + 1));
2016 if(NULL != moved)
2017 {
2018 outOptions->mMatchModules = (char**)moved;
2019 outOptions->mMatchModules[outOptions->mMatchModuleCount] = strdup(current->mValue);
2020 if(NULL != outOptions->mMatchModules[outOptions->mMatchModuleCount])
2021 {
2022 outOptions->mMatchModuleCount++;
2023 }
2024 else
2025 {
2026 retval = __LINE__;
2027 ERROR_REPORT(retval, current->mValue, "Unable to duplicate string.");
2028 }
2029 }
2030 else
2031 {
2032 retval = __LINE__;
2033 ERROR_REPORT(retval, current->mValue, "Unable to allocate space for string.");
2034 }
2035 }
2036 else if(current == &gSymDBSwitch)
2037 {
2038 CLEANUP(outOptions->mSymDBName);
2039 outOptions->mSymDBName = strdup(current->mValue);
2040 if(NULL == outOptions->mSymDBName)
2041 {
2042 retval = __LINE__;
2043 ERROR_REPORT(retval, current->mValue, "Unable to duplicate symbol db name.");
2044 }
2045 }
2046 else if(current == &gBatchModeSwitch)
2047 {
2048 outOptions->mBatchMode = __LINE__;
2049 }
2050 else
2051 {
2052 retval = __LINE__;
2053 ERROR_REPORT(retval, current->mLongName, "No handler for command line switch.");
2054 }
2055 }
2056 }
2057
2058 return retval;
2059 }
2060
2061
cleanOptions(Options * inOptions)2062 void cleanOptions(Options* inOptions)
2063 /*
2064 ** Clean up any open handles, et. al.
2065 */
2066 {
2067 CLEANUP(inOptions->mInputName);
2068 if(NULL != inOptions->mInput && stdin != inOptions->mInput)
2069 {
2070 fclose(inOptions->mInput);
2071 }
2072 CLEANUP(inOptions->mOutputName);
2073 if(NULL != inOptions->mOutput && stdout != inOptions->mOutput)
2074 {
2075 fclose(inOptions->mOutput);
2076 }
2077 while(0 != inOptions->mMatchModuleCount)
2078 {
2079 inOptions->mMatchModuleCount--;
2080 CLEANUP(inOptions->mMatchModules[inOptions->mMatchModuleCount]);
2081 }
2082 CLEANUP(inOptions->mMatchModules);
2083
2084 cleanSymDB(&inOptions->mSymDB);
2085
2086 memset(inOptions, 0, sizeof(Options));
2087 }
2088
2089
showHelp(Options * inOptions)2090 void showHelp(Options* inOptions)
2091 /*
2092 ** Show some simple help text on usage.
2093 */
2094 {
2095 int loop = 0;
2096 const int switchCount = sizeof(gSwitches) / sizeof(gSwitches[0]);
2097 const char* valueText = NULL;
2098
2099 printf("usage:\t%s [arguments]\n", inOptions->mProgramName);
2100 printf("\n");
2101 printf("arguments:\n");
2102
2103 for(loop = 0; loop < switchCount; loop++)
2104 {
2105 if(gSwitches[loop]->mHasValue)
2106 {
2107 valueText = " <value>";
2108 }
2109 else
2110 {
2111 valueText = "";
2112 }
2113
2114 printf("\t%s%s\n", gSwitches[loop]->mLongName, valueText);
2115 printf("\t %s%s", gSwitches[loop]->mShortName, valueText);
2116 printf(DESC_NEWLINE "%s\n\n", gSwitches[loop]->mDescription);
2117 }
2118
2119 printf("This tool normalizes MS linker .map files for use by other tools.\n");
2120 }
2121
2122
batchMode(Options * inOptions)2123 int batchMode(Options* inOptions)
2124 /*
2125 ** Batch mode means that the input file is actually a list of map files.
2126 ** We simply swap out our input file names while we do this.
2127 */
2128 {
2129 int retval = 0;
2130 char lineBuf[0x400];
2131 FILE* realInput = NULL;
2132 char* realInputName = NULL;
2133 FILE* mapFile = NULL;
2134 int finalRes = 0;
2135
2136 realInput = inOptions->mInput;
2137 realInputName = inOptions->mInputName;
2138
2139 while(0 == retval && NULL != fgets(lineBuf, sizeof(lineBuf), realInput))
2140 {
2141 trimWhite(lineBuf);
2142
2143 /*
2144 ** Skip/allow blank lines.
2145 */
2146 if('\0' == lineBuf[0])
2147 {
2148 continue;
2149 }
2150
2151 /*
2152 ** Override what we believe to be the input for this line.
2153 */
2154 inOptions->mInputName = lineBuf;
2155 inOptions->mInput = fopen(lineBuf, "r");
2156 if(NULL != inOptions->mInput)
2157 {
2158 int mapRes = 0;
2159
2160 /*
2161 ** Do it.
2162 */
2163 mapRes = map2tsv(inOptions);
2164
2165 /*
2166 ** We report the first error that we encounter, but we continue.
2167 ** This is batch mode after all.
2168 */
2169 if(0 == finalRes)
2170 {
2171 finalRes = mapRes;
2172 }
2173
2174 /*
2175 ** Close the input file.
2176 */
2177 fclose(inOptions->mInput);
2178 }
2179 else
2180 {
2181 retval = __LINE__;
2182 ERROR_REPORT(retval, lineBuf, "Unable to open map file.");
2183 break;
2184 }
2185 }
2186
2187 if(0 == retval && 0 != ferror(realInput))
2188 {
2189 retval = __LINE__;
2190 ERROR_REPORT(retval, realInputName, "Unable to read file.");
2191 }
2192
2193 /*
2194 ** Restore what we've swapped.
2195 */
2196 inOptions->mInput = realInput;
2197 inOptions->mInputName = realInputName;
2198
2199 /*
2200 ** Report first map file error if there were no other operational
2201 ** problems.
2202 */
2203 if(0 == retval)
2204 {
2205 retval = finalRes;
2206 }
2207
2208 return retval;
2209 }
2210
2211
main(int inArgc,char ** inArgv)2212 int main(int inArgc, char** inArgv)
2213 {
2214 int retval = 0;
2215 Options options;
2216
2217 retval = initOptions(&options, inArgc, inArgv);
2218 if(options.mHelp)
2219 {
2220 showHelp(&options);
2221 }
2222 else if(0 == retval)
2223 {
2224 if(options.mBatchMode)
2225 {
2226 retval = batchMode(&options);
2227 }
2228 else
2229 {
2230 retval = map2tsv(&options);
2231 }
2232 }
2233
2234 cleanOptions(&options);
2235 return retval;
2236 }
2237
2238