diff --git a/dwarf/attr_string.go b/dwarf/attr_string.go index 34e3659..8a4fff8 100644 --- a/dwarf/attr_string.go +++ b/dwarf/attr_string.go @@ -4,81 +4,257 @@ package dwarf import "strconv" -const _Attr_name = "SiblingLocationNameOrderingByteSizeBitOffsetBitSizeStmtListLowpcHighpcLanguageDiscrDiscrValueVisibilityImportStringLengthCommonRefCompDirConstValueContainingTypeDefaultValueInlineIsOptionalLowerBoundProducerPrototypedReturnAddrStartScopeStrideSizeUpperBoundAbstractOriginAccessibilityAddrClassArtificialBaseTypesCallingCountDataMemberLocDeclColumnDeclFileDeclLineDeclarationDiscrListEncodingExternalFrameBaseFriendIdentifierCaseMacroInfoNamelistItemPrioritySegmentSpecificationStaticLinkTypeUseLocationVarParamVirtualityVtableElemLocAllocatedAssociatedDataLocationStrideEntrypcUseUTF8ExtensionRangesTrampolineCallColumnCallFileCallLineDescription" +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[AttrSibling-1] + _ = x[AttrLocation-2] + _ = x[AttrName-3] + _ = x[AttrOrdering-9] + _ = x[AttrByteSize-11] + _ = x[AttrBitOffset-12] + _ = x[AttrBitSize-13] + _ = x[AttrStmtList-16] + _ = x[AttrLowpc-17] + _ = x[AttrHighpc-18] + _ = x[AttrLanguage-19] + _ = x[AttrDiscr-21] + _ = x[AttrDiscrValue-22] + _ = x[AttrVisibility-23] + _ = x[AttrImport-24] + _ = x[AttrStringLength-25] + _ = x[AttrCommonRef-26] + _ = x[AttrCompDir-27] + _ = x[AttrConstValue-28] + _ = x[AttrContainingType-29] + _ = x[AttrDefaultValue-30] + _ = x[AttrInline-32] + _ = x[AttrIsOptional-33] + _ = x[AttrLowerBound-34] + _ = x[AttrProducer-37] + _ = x[AttrPrototyped-39] + _ = x[AttrReturnAddr-42] + _ = x[AttrStartScope-44] + _ = x[AttrStrideSize-46] + _ = x[AttrUpperBound-47] + _ = x[AttrAbstractOrigin-49] + _ = x[AttrAccessibility-50] + _ = x[AttrAddrClass-51] + _ = x[AttrArtificial-52] + _ = x[AttrBaseTypes-53] + _ = x[AttrCalling-54] + _ = x[AttrCount-55] + _ = x[AttrDataMemberLoc-56] + _ = x[AttrDeclColumn-57] + _ = x[AttrDeclFile-58] + _ = x[AttrDeclLine-59] + _ = x[AttrDeclaration-60] + _ = x[AttrDiscrList-61] + _ = x[AttrEncoding-62] + _ = x[AttrExternal-63] + _ = x[AttrFrameBase-64] + _ = x[AttrFriend-65] + _ = x[AttrIdentifierCase-66] + _ = x[AttrMacroInfo-67] + _ = x[AttrNamelistItem-68] + _ = x[AttrPriority-69] + _ = x[AttrSegment-70] + _ = x[AttrSpecification-71] + _ = x[AttrStaticLink-72] + _ = x[AttrType-73] + _ = x[AttrUseLocation-74] + _ = x[AttrVarParam-75] + _ = x[AttrVirtuality-76] + _ = x[AttrVtableElemLoc-77] + _ = x[AttrAllocated-78] + _ = x[AttrAssociated-79] + _ = x[AttrDataLocation-80] + _ = x[AttrStride-81] + _ = x[AttrEntrypc-82] + _ = x[AttrUseUTF8-83] + _ = x[AttrExtension-84] + _ = x[AttrRanges-85] + _ = x[AttrTrampoline-86] + _ = x[AttrCallColumn-87] + _ = x[AttrCallFile-88] + _ = x[AttrCallLine-89] + _ = x[AttrDescription-90] + _ = x[AttrBinaryScale-91] + _ = x[AttrDecimalScale-92] + _ = x[AttrSmall-93] + _ = x[AttrDecimalSign-94] + _ = x[AttrDigitCount-95] + _ = x[AttrPictureString-96] + _ = x[AttrMutable-97] + _ = x[AttrThreadsScaled-98] + _ = x[AttrExplicit-99] + _ = x[AttrObjectPointer-100] + _ = x[AttrEndianity-101] + _ = x[AttrElemental-102] + _ = x[AttrPure-103] + _ = x[AttrRecursive-104] + _ = x[AttrSignature-105] + _ = x[AttrMainSubprogram-106] + _ = x[AttrDataBitOffset-107] + _ = x[AttrConstExpr-108] + _ = x[AttrEnumClass-109] + _ = x[AttrLinkageName-110] + _ = x[AttrStringLengthBitSize-111] + _ = x[AttrStringLengthByteSize-112] + _ = x[AttrRank-113] + _ = x[AttrStrOffsetsBase-114] + _ = x[AttrAddrBase-115] + _ = x[AttrRnglistsBase-116] + _ = x[AttrDwoName-118] + _ = x[AttrReference-119] + _ = x[AttrRvalueReference-120] + _ = x[AttrMacros-121] + _ = x[AttrCallAllCalls-122] + _ = x[AttrCallAllSourceCalls-123] + _ = x[AttrCallAllTailCalls-124] + _ = x[AttrCallReturnPC-125] + _ = x[AttrCallValue-126] + _ = x[AttrCallOrigin-127] + _ = x[AttrCallParameter-128] + _ = x[AttrCallPC-129] + _ = x[AttrCallTailCall-130] + _ = x[AttrCallTarget-131] + _ = x[AttrCallTargetClobbered-132] + _ = x[AttrCallDataLocation-133] + _ = x[AttrCallDataValue-134] + _ = x[AttrNoreturn-135] + _ = x[AttrAlignment-136] + _ = x[AttrExportSymbols-137] + _ = x[AttrDeleted-138] + _ = x[AttrDefaulted-139] + _ = x[AttrLoclistsBase-140] +} + +const _Attr_name = "SiblingLocationNameOrderingByteSizeBitOffsetBitSizeStmtListLowpcHighpcLanguageDiscrDiscrValueVisibilityImportStringLengthCommonRefCompDirConstValueContainingTypeDefaultValueInlineIsOptionalLowerBoundProducerPrototypedReturnAddrStartScopeStrideSizeUpperBoundAbstractOriginAccessibilityAddrClassArtificialBaseTypesCallingCountDataMemberLocDeclColumnDeclFileDeclLineDeclarationDiscrListEncodingExternalFrameBaseFriendIdentifierCaseMacroInfoNamelistItemPrioritySegmentSpecificationStaticLinkTypeUseLocationVarParamVirtualityVtableElemLocAllocatedAssociatedDataLocationStrideEntrypcUseUTF8ExtensionRangesTrampolineCallColumnCallFileCallLineDescriptionBinaryScaleDecimalScaleSmallDecimalSignDigitCountPictureStringMutableThreadsScaledExplicitObjectPointerEndianityElementalPureRecursiveSignatureMainSubprogramDataBitOffsetConstExprEnumClassLinkageNameStringLengthBitSizeStringLengthByteSizeRankStrOffsetsBaseAddrBaseRnglistsBaseDwoNameReferenceRvalueReferenceMacrosCallAllCallsCallAllSourceCallsCallAllTailCallsCallReturnPCCallValueCallOriginCallParameterCallPCCallTailCallCallTargetCallTargetClobberedCallDataLocationCallDataValueNoreturnAlignmentExportSymbolsDeletedDefaultedLoclistsBase" var _Attr_map = map[Attr]string{ - 1: _Attr_name[0:7], - 2: _Attr_name[7:15], - 3: _Attr_name[15:19], - 9: _Attr_name[19:27], - 11: _Attr_name[27:35], - 12: _Attr_name[35:44], - 13: _Attr_name[44:51], - 16: _Attr_name[51:59], - 17: _Attr_name[59:64], - 18: _Attr_name[64:70], - 19: _Attr_name[70:78], - 21: _Attr_name[78:83], - 22: _Attr_name[83:93], - 23: _Attr_name[93:103], - 24: _Attr_name[103:109], - 25: _Attr_name[109:121], - 26: _Attr_name[121:130], - 27: _Attr_name[130:137], - 28: _Attr_name[137:147], - 29: _Attr_name[147:161], - 30: _Attr_name[161:173], - 32: _Attr_name[173:179], - 33: _Attr_name[179:189], - 34: _Attr_name[189:199], - 37: _Attr_name[199:207], - 39: _Attr_name[207:217], - 42: _Attr_name[217:227], - 44: _Attr_name[227:237], - 46: _Attr_name[237:247], - 47: _Attr_name[247:257], - 49: _Attr_name[257:271], - 50: _Attr_name[271:284], - 51: _Attr_name[284:293], - 52: _Attr_name[293:303], - 53: _Attr_name[303:312], - 54: _Attr_name[312:319], - 55: _Attr_name[319:324], - 56: _Attr_name[324:337], - 57: _Attr_name[337:347], - 58: _Attr_name[347:355], - 59: _Attr_name[355:363], - 60: _Attr_name[363:374], - 61: _Attr_name[374:383], - 62: _Attr_name[383:391], - 63: _Attr_name[391:399], - 64: _Attr_name[399:408], - 65: _Attr_name[408:414], - 66: _Attr_name[414:428], - 67: _Attr_name[428:437], - 68: _Attr_name[437:449], - 69: _Attr_name[449:457], - 70: _Attr_name[457:464], - 71: _Attr_name[464:477], - 72: _Attr_name[477:487], - 73: _Attr_name[487:491], - 74: _Attr_name[491:502], - 75: _Attr_name[502:510], - 76: _Attr_name[510:520], - 77: _Attr_name[520:533], - 78: _Attr_name[533:542], - 79: _Attr_name[542:552], - 80: _Attr_name[552:564], - 81: _Attr_name[564:570], - 82: _Attr_name[570:577], - 83: _Attr_name[577:584], - 84: _Attr_name[584:593], - 85: _Attr_name[593:599], - 86: _Attr_name[599:609], - 87: _Attr_name[609:619], - 88: _Attr_name[619:627], - 89: _Attr_name[627:635], - 90: _Attr_name[635:646], + 1: _Attr_name[0:7], + 2: _Attr_name[7:15], + 3: _Attr_name[15:19], + 9: _Attr_name[19:27], + 11: _Attr_name[27:35], + 12: _Attr_name[35:44], + 13: _Attr_name[44:51], + 16: _Attr_name[51:59], + 17: _Attr_name[59:64], + 18: _Attr_name[64:70], + 19: _Attr_name[70:78], + 21: _Attr_name[78:83], + 22: _Attr_name[83:93], + 23: _Attr_name[93:103], + 24: _Attr_name[103:109], + 25: _Attr_name[109:121], + 26: _Attr_name[121:130], + 27: _Attr_name[130:137], + 28: _Attr_name[137:147], + 29: _Attr_name[147:161], + 30: _Attr_name[161:173], + 32: _Attr_name[173:179], + 33: _Attr_name[179:189], + 34: _Attr_name[189:199], + 37: _Attr_name[199:207], + 39: _Attr_name[207:217], + 42: _Attr_name[217:227], + 44: _Attr_name[227:237], + 46: _Attr_name[237:247], + 47: _Attr_name[247:257], + 49: _Attr_name[257:271], + 50: _Attr_name[271:284], + 51: _Attr_name[284:293], + 52: _Attr_name[293:303], + 53: _Attr_name[303:312], + 54: _Attr_name[312:319], + 55: _Attr_name[319:324], + 56: _Attr_name[324:337], + 57: _Attr_name[337:347], + 58: _Attr_name[347:355], + 59: _Attr_name[355:363], + 60: _Attr_name[363:374], + 61: _Attr_name[374:383], + 62: _Attr_name[383:391], + 63: _Attr_name[391:399], + 64: _Attr_name[399:408], + 65: _Attr_name[408:414], + 66: _Attr_name[414:428], + 67: _Attr_name[428:437], + 68: _Attr_name[437:449], + 69: _Attr_name[449:457], + 70: _Attr_name[457:464], + 71: _Attr_name[464:477], + 72: _Attr_name[477:487], + 73: _Attr_name[487:491], + 74: _Attr_name[491:502], + 75: _Attr_name[502:510], + 76: _Attr_name[510:520], + 77: _Attr_name[520:533], + 78: _Attr_name[533:542], + 79: _Attr_name[542:552], + 80: _Attr_name[552:564], + 81: _Attr_name[564:570], + 82: _Attr_name[570:577], + 83: _Attr_name[577:584], + 84: _Attr_name[584:593], + 85: _Attr_name[593:599], + 86: _Attr_name[599:609], + 87: _Attr_name[609:619], + 88: _Attr_name[619:627], + 89: _Attr_name[627:635], + 90: _Attr_name[635:646], + 91: _Attr_name[646:657], + 92: _Attr_name[657:669], + 93: _Attr_name[669:674], + 94: _Attr_name[674:685], + 95: _Attr_name[685:695], + 96: _Attr_name[695:708], + 97: _Attr_name[708:715], + 98: _Attr_name[715:728], + 99: _Attr_name[728:736], + 100: _Attr_name[736:749], + 101: _Attr_name[749:758], + 102: _Attr_name[758:767], + 103: _Attr_name[767:771], + 104: _Attr_name[771:780], + 105: _Attr_name[780:789], + 106: _Attr_name[789:803], + 107: _Attr_name[803:816], + 108: _Attr_name[816:825], + 109: _Attr_name[825:834], + 110: _Attr_name[834:845], + 111: _Attr_name[845:864], + 112: _Attr_name[864:884], + 113: _Attr_name[884:888], + 114: _Attr_name[888:902], + 115: _Attr_name[902:910], + 116: _Attr_name[910:922], + 118: _Attr_name[922:929], + 119: _Attr_name[929:938], + 120: _Attr_name[938:953], + 121: _Attr_name[953:959], + 122: _Attr_name[959:971], + 123: _Attr_name[971:989], + 124: _Attr_name[989:1005], + 125: _Attr_name[1005:1017], + 126: _Attr_name[1017:1026], + 127: _Attr_name[1026:1036], + 128: _Attr_name[1036:1049], + 129: _Attr_name[1049:1055], + 130: _Attr_name[1055:1067], + 131: _Attr_name[1067:1077], + 132: _Attr_name[1077:1096], + 133: _Attr_name[1096:1112], + 134: _Attr_name[1112:1125], + 135: _Attr_name[1125:1133], + 136: _Attr_name[1133:1142], + 137: _Attr_name[1142:1155], + 138: _Attr_name[1155:1162], + 139: _Attr_name[1162:1171], + 140: _Attr_name[1171:1183], } func (i Attr) String() string { diff --git a/dwarf/buf.go b/dwarf/buf.go index 24d266d..7ac53ef 100644 --- a/dwarf/buf.go +++ b/dwarf/buf.go @@ -7,6 +7,7 @@ package dwarf import ( + "bytes" "encoding/binary" "strconv" ) @@ -66,7 +67,7 @@ func (b *buf) uint8() uint8 { } func (b *buf) bytes(n int) []byte { - if len(b.data) < n { + if n < 0 || len(b.data) < n { b.error("underflow") return nil } @@ -79,16 +80,16 @@ func (b *buf) bytes(n int) []byte { func (b *buf) skip(n int) { b.bytes(n) } func (b *buf) string() string { - for i := 0; i < len(b.data); i++ { - if b.data[i] == 0 { - s := string(b.data[0:i]) - b.data = b.data[i+1:] - b.off += Offset(i + 1) - return s - } + i := bytes.IndexByte(b.data, 0) + if i < 0 { + b.error("underflow") + return "" } - b.error("underflow") - return "" + + s := string(b.data[0:i]) + b.data = b.data[i+1:] + b.off += Offset(i + 1) + return s } func (b *buf) uint16() uint16 { @@ -99,6 +100,18 @@ func (b *buf) uint16() uint16 { return b.order.Uint16(a) } +func (b *buf) uint24() uint32 { + a := b.bytes(3) + if a == nil { + return 0 + } + if b.dwarf.bigEndian { + return uint32(a[2]) | uint32(a[1])<<8 | uint32(a[0])<<16 + } else { + return uint32(a[0]) | uint32(a[1])<<8 | uint32(a[2])<<16 + } +} + func (b *buf) uint32() uint32 { a := b.bytes(4) if a == nil { diff --git a/dwarf/class_string.go b/dwarf/class_string.go index a6aabff..163bed7 100644 --- a/dwarf/class_string.go +++ b/dwarf/class_string.go @@ -4,9 +4,35 @@ package dwarf import "strconv" -const _Class_name = "ClassUnknownClassAddressClassBlockClassConstantClassExprLocClassFlagClassLinePtrClassLocListPtrClassMacPtrClassRangeListPtrClassReferenceClassReferenceSigClassStringClassReferenceAltClassStringAlt" +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[ClassUnknown-0] + _ = x[ClassAddress-1] + _ = x[ClassBlock-2] + _ = x[ClassConstant-3] + _ = x[ClassExprLoc-4] + _ = x[ClassFlag-5] + _ = x[ClassLinePtr-6] + _ = x[ClassLocListPtr-7] + _ = x[ClassMacPtr-8] + _ = x[ClassRangeListPtr-9] + _ = x[ClassReference-10] + _ = x[ClassReferenceSig-11] + _ = x[ClassString-12] + _ = x[ClassReferenceAlt-13] + _ = x[ClassStringAlt-14] + _ = x[ClassAddrPtr-15] + _ = x[ClassLocList-16] + _ = x[ClassRngList-17] + _ = x[ClassRngListsPtr-18] + _ = x[ClassStrOffsetsPtr-19] +} + +const _Class_name = "ClassUnknownClassAddressClassBlockClassConstantClassExprLocClassFlagClassLinePtrClassLocListPtrClassMacPtrClassRangeListPtrClassReferenceClassReferenceSigClassStringClassReferenceAltClassStringAltClassAddrPtrClassLocListClassRngListClassRngListsPtrClassStrOffsetsPtr" -var _Class_index = [...]uint8{0, 12, 24, 34, 47, 59, 68, 80, 95, 106, 123, 137, 154, 165, 182, 196} +var _Class_index = [...]uint16{0, 12, 24, 34, 47, 59, 68, 80, 95, 106, 123, 137, 154, 165, 182, 196, 208, 220, 232, 248, 266} func (i Class) String() string { if i < 0 || i >= Class(len(_Class_index)-1) { diff --git a/dwarf/const.go b/dwarf/const.go index 4dda83e..ea52460 100644 --- a/dwarf/const.go +++ b/dwarf/const.go @@ -8,7 +8,7 @@ package dwarf //go:generate stringer -type Attr -trimprefix=Attr -// An Attr identifies the attribute type in a DWARF Entry's Field. +// An Attr identifies the attribute type in a DWARF [Entry.Field]. type Attr uint32 const ( @@ -71,19 +71,71 @@ const ( AttrVarParam Attr = 0x4B AttrVirtuality Attr = 0x4C AttrVtableElemLoc Attr = 0x4D - AttrAllocated Attr = 0x4E - AttrAssociated Attr = 0x4F - AttrDataLocation Attr = 0x50 - AttrStride Attr = 0x51 - AttrEntrypc Attr = 0x52 - AttrUseUTF8 Attr = 0x53 - AttrExtension Attr = 0x54 - AttrRanges Attr = 0x55 - AttrTrampoline Attr = 0x56 - AttrCallColumn Attr = 0x57 - AttrCallFile Attr = 0x58 - AttrCallLine Attr = 0x59 - AttrDescription Attr = 0x5A + // The following are new in DWARF 3. + AttrAllocated Attr = 0x4E + AttrAssociated Attr = 0x4F + AttrDataLocation Attr = 0x50 + AttrStride Attr = 0x51 + AttrEntrypc Attr = 0x52 + AttrUseUTF8 Attr = 0x53 + AttrExtension Attr = 0x54 + AttrRanges Attr = 0x55 + AttrTrampoline Attr = 0x56 + AttrCallColumn Attr = 0x57 + AttrCallFile Attr = 0x58 + AttrCallLine Attr = 0x59 + AttrDescription Attr = 0x5A + AttrBinaryScale Attr = 0x5B + AttrDecimalScale Attr = 0x5C + AttrSmall Attr = 0x5D + AttrDecimalSign Attr = 0x5E + AttrDigitCount Attr = 0x5F + AttrPictureString Attr = 0x60 + AttrMutable Attr = 0x61 + AttrThreadsScaled Attr = 0x62 + AttrExplicit Attr = 0x63 + AttrObjectPointer Attr = 0x64 + AttrEndianity Attr = 0x65 + AttrElemental Attr = 0x66 + AttrPure Attr = 0x67 + AttrRecursive Attr = 0x68 + // The following are new in DWARF 4. + AttrSignature Attr = 0x69 + AttrMainSubprogram Attr = 0x6A + AttrDataBitOffset Attr = 0x6B + AttrConstExpr Attr = 0x6C + AttrEnumClass Attr = 0x6D + AttrLinkageName Attr = 0x6E + // The following are new in DWARF 5. + AttrStringLengthBitSize Attr = 0x6F + AttrStringLengthByteSize Attr = 0x70 + AttrRank Attr = 0x71 + AttrStrOffsetsBase Attr = 0x72 + AttrAddrBase Attr = 0x73 + AttrRnglistsBase Attr = 0x74 + AttrDwoName Attr = 0x76 + AttrReference Attr = 0x77 + AttrRvalueReference Attr = 0x78 + AttrMacros Attr = 0x79 + AttrCallAllCalls Attr = 0x7A + AttrCallAllSourceCalls Attr = 0x7B + AttrCallAllTailCalls Attr = 0x7C + AttrCallReturnPC Attr = 0x7D + AttrCallValue Attr = 0x7E + AttrCallOrigin Attr = 0x7F + AttrCallParameter Attr = 0x80 + AttrCallPC Attr = 0x81 + AttrCallTailCall Attr = 0x82 + AttrCallTarget Attr = 0x83 + AttrCallTargetClobbered Attr = 0x84 + AttrCallDataLocation Attr = 0x85 + AttrCallDataValue Attr = 0x86 + AttrNoreturn Attr = 0x87 + AttrAlignment Attr = 0x88 + AttrExportSymbols Attr = 0x89 + AttrDeleted Attr = 0x8A + AttrDefaulted Attr = 0x8B + AttrLoclistsBase Attr = 0x8C ) func (a Attr) GoString() string { @@ -124,6 +176,25 @@ const ( formExprloc format = 0x18 formFlagPresent format = 0x19 formRefSig8 format = 0x20 + // The following are new in DWARF 5. + formStrx format = 0x1A + formAddrx format = 0x1B + formRefSup4 format = 0x1C + formStrpSup format = 0x1D + formData16 format = 0x1E + formLineStrp format = 0x1F + formImplicitConst format = 0x21 + formLoclistx format = 0x22 + formRnglistx format = 0x23 + formRefSup8 format = 0x24 + formStrx1 format = 0x25 + formStrx2 format = 0x26 + formStrx3 format = 0x27 + formStrx4 format = 0x28 + formAddrx1 format = 0x29 + formAddrx2 format = 0x2A + formAddrx3 format = 0x2B + formAddrx4 format = 0x2C // Extensions for multi-file compression (.dwz) // http://www.dwarfstd.org/ShowIssue.php?issue=120604.1 formGnuRefAlt format = 0x1f20 @@ -132,7 +203,7 @@ const ( //go:generate stringer -type Tag -trimprefix=Tag -// A Tag is the classification (the type) of an Entry. +// A Tag is the classification (the type) of an [Entry]. type Tag uint32 const ( @@ -199,6 +270,15 @@ const ( TagTypeUnit Tag = 0x41 TagRvalueReferenceType Tag = 0x42 TagTemplateAlias Tag = 0x43 + // The following are new in DWARF 5. + TagCoarrayType Tag = 0x44 + TagGenericSubrange Tag = 0x45 + TagDynamicType Tag = 0x46 + TagAtomicType Tag = 0x47 + TagCallSite Tag = 0x48 + TagCallSiteParameter Tag = 0x49 + TagSkeletonUnit Tag = 0x4A + TagImmutableType Tag = 0x4B ) func (t Tag) GoString() string { @@ -269,25 +349,54 @@ const ( opDerefSize = 0x94 /* 1-byte size of data retrieved */ opXderefSize = 0x95 /* 1-byte size of data retrieved */ opNop = 0x96 - /* next four new in Dwarf v3 */ - opPushObjAddr = 0x97 - opCall2 = 0x98 /* 2-byte offset of DIE */ - opCall4 = 0x99 /* 4-byte offset of DIE */ - opCallRef = 0x9A /* 4- or 8- byte offset of DIE */ + // The following are new in DWARF 3. + opPushObjAddr = 0x97 + opCall2 = 0x98 /* 2-byte offset of DIE */ + opCall4 = 0x99 /* 4-byte offset of DIE */ + opCallRef = 0x9A /* 4- or 8- byte offset of DIE */ + opFormTLSAddress = 0x9B + opCallFrameCFA = 0x9C + opBitPiece = 0x9D + // The following are new in DWARF 4. + opImplicitValue = 0x9E + opStackValue = 0x9F + // The following a new in DWARF 5. + opImplicitPointer = 0xA0 + opAddrx = 0xA1 + opConstx = 0xA2 + opEntryValue = 0xA3 + opConstType = 0xA4 + opRegvalType = 0xA5 + opDerefType = 0xA6 + opXderefType = 0xA7 + opConvert = 0xA8 + opReinterpret = 0xA9 /* 0xE0-0xFF reserved for user-specific */ ) // Basic type encodings -- the value for AttrEncoding in a TagBaseType Entry. const ( - encAddress = 0x01 - encBoolean = 0x02 - encComplexFloat = 0x03 - encFloat = 0x04 - encSigned = 0x05 - encSignedChar = 0x06 - encUnsigned = 0x07 - encUnsignedChar = 0x08 + encAddress = 0x01 + encBoolean = 0x02 + encComplexFloat = 0x03 + encFloat = 0x04 + encSigned = 0x05 + encSignedChar = 0x06 + encUnsigned = 0x07 + encUnsignedChar = 0x08 + // The following are new in DWARF 3. encImaginaryFloat = 0x09 + encPackedDecimal = 0x0A + encNumericString = 0x0B + encEdited = 0x0C + encSignedFixed = 0x0D + encUnsignedFixed = 0x0E + encDecimalFloat = 0x0F + // The following are new in DWARF 4. + encUTF = 0x10 + // The following are new in DWARF 5. + encUCS = 0x11 + encASCII = 0x12 ) // Statement program standard opcode encodings. @@ -317,3 +426,50 @@ const ( // DWARF 4 lneSetDiscriminator = 4 ) + +// Line table directory and file name entry formats. +// These are new in DWARF 5. +const ( + lnctPath = 0x01 + lnctDirectoryIndex = 0x02 + lnctTimestamp = 0x03 + lnctSize = 0x04 + lnctMD5 = 0x05 +) + +// Location list entry codes. +// These are new in DWARF 5. +const ( + lleEndOfList = 0x00 + lleBaseAddressx = 0x01 + lleStartxEndx = 0x02 + lleStartxLength = 0x03 + lleOffsetPair = 0x04 + lleDefaultLocation = 0x05 + lleBaseAddress = 0x06 + lleStartEnd = 0x07 + lleStartLength = 0x08 +) + +// Unit header unit type encodings. +// These are new in DWARF 5. +const ( + utCompile = 0x01 + utType = 0x02 + utPartial = 0x03 + utSkeleton = 0x04 + utSplitCompile = 0x05 + utSplitType = 0x06 +) + +// Opcodes for DWARFv5 debug_rnglists section. +const ( + rleEndOfList = 0x0 + rleBaseAddressx = 0x1 + rleStartxEndx = 0x2 + rleStartxLength = 0x3 + rleOffsetPair = 0x4 + rleBaseAddress = 0x5 + rleStartEnd = 0x6 + rleStartLength = 0x7 +) diff --git a/dwarf/dwarf5ranges_test.go b/dwarf/dwarf5ranges_test.go new file mode 100644 index 0000000..8bc50bc --- /dev/null +++ b/dwarf/dwarf5ranges_test.go @@ -0,0 +1,41 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package dwarf + +import ( + "encoding/binary" + "os" + "reflect" + "testing" +) + +func TestDwarf5Ranges(t *testing.T) { + rngLists, err := os.ReadFile("testdata/debug_rnglists") + if err != nil { + t.Fatalf("could not read test data: %v", err) + } + + d := &Data{} + d.order = binary.LittleEndian + if err := d.AddSection(".debug_rnglists", rngLists); err != nil { + t.Fatal(err) + } + u := &unit{ + asize: 8, + vers: 5, + is64: true, + } + ret, err := d.dwarf5Ranges(u, nil, 0x5fbd, 0xc, [][2]uint64{}) + if err != nil { + t.Fatalf("could not read rnglist: %v", err) + } + t.Logf("%#v", ret) + + tgt := [][2]uint64{{0x0000000000006712, 0x000000000000679f}, {0x00000000000067af}, {0x00000000000067b3}} + + if reflect.DeepEqual(ret, tgt) { + t.Errorf("expected %#v got %#x", tgt, ret) + } +} diff --git a/dwarf/entry.go b/dwarf/entry.go index 6be0700..4541d74 100644 --- a/dwarf/entry.go +++ b/dwarf/entry.go @@ -11,7 +11,9 @@ package dwarf import ( + "encoding/binary" "errors" + "fmt" "strconv" ) @@ -26,12 +28,13 @@ type afield struct { attr Attr fmt format class Class + val int64 // for formImplicitConst } // a map from entry format ids to their descriptions type abbrevTable map[uint32]abbrev -// ParseAbbrev returns the abbreviation table that starts at byte off +// parseAbbrev returns the abbreviation table that starts at byte off // in the .debug_abbrev section. func (d *Data) parseAbbrev(off uint64, vers int) (abbrevTable, error) { if m, ok := d.abbrevCache[off]; ok { @@ -67,6 +70,9 @@ func (d *Data) parseAbbrev(off uint64, vers int) (abbrevTable, error) { if tag == 0 && fmt == 0 { break } + if format(fmt) == formImplicitConst { + b1.int() + } n++ } if b1.err != nil { @@ -82,6 +88,9 @@ func (d *Data) parseAbbrev(off uint64, vers int) (abbrevTable, error) { a.field[i].attr = Attr(b.uint()) a.field[i].fmt = format(b.uint()) a.field[i].class = formToClass(a.field[i].fmt, a.field[i].attr, vers, &b) + if a.field[i].fmt == formImplicitConst { + a.field[i].val = b.int() + } } b.uint() b.uint() @@ -137,6 +146,11 @@ var attrPtrClass = map[Attr]Class{ AttrUseLocation: ClassLocListPtr, AttrVtableElemLoc: ClassLocListPtr, AttrRanges: ClassRangeListPtr, + // The following are new in DWARF 5. + AttrStrOffsetsBase: ClassStrOffsetsPtr, + AttrAddrBase: ClassAddrPtr, + AttrRnglistsBase: ClassRngListsPtr, + AttrLoclistsBase: ClassLocListPtr, } // formToClass returns the DWARF 4 Class for the given form. If the @@ -148,7 +162,10 @@ func formToClass(form format, attr Attr, vers int, b *buf) Class { b.error("cannot determine class of unknown attribute form") return 0 - case formAddr: + case formIndirect: + return ClassUnknown + + case formAddr, formAddrx, formAddrx1, formAddrx2, formAddrx3, formAddrx4: return ClassAddress case formDwarfBlock1, formDwarfBlock2, formDwarfBlock4, formDwarfBlock: @@ -163,7 +180,7 @@ func formToClass(form format, attr Attr, vers int, b *buf) Class { } return ClassBlock - case formData1, formData2, formData4, formData8, formSdata, formUdata: + case formData1, formData2, formData4, formData8, formSdata, formUdata, formData16, formImplicitConst: // In DWARF 2 and 3, ClassPtr was encoded as a // constant. Unlike ClassExprLoc/ClassBlock, some // DWARF 4 attributes need to distinguish Class*Ptr @@ -177,13 +194,13 @@ func formToClass(form format, attr Attr, vers int, b *buf) Class { case formFlag, formFlagPresent: return ClassFlag - case formRefAddr, formRef1, formRef2, formRef4, formRef8, formRefUdata: + case formRefAddr, formRef1, formRef2, formRef4, formRef8, formRefUdata, formRefSup4, formRefSup8: return ClassReference case formRefSig8: return ClassReferenceSig - case formString, formStrp: + case formString, formStrp, formStrx, formStrpSup, formLineStrp, formStrx1, formStrx2, formStrx3, formStrx4: return ClassString case formSecOffset: @@ -203,6 +220,12 @@ func formToClass(form format, attr Attr, vers int, b *buf) Class { case formGnuStrpAlt: return ClassStringAlt + + case formLoclistx: + return ClassLocList + + case formRnglistx: + return ClassRngList } } @@ -214,32 +237,32 @@ type Entry struct { Field []Field } -// A Field is a single attribute/value pair in an Entry. +// A Field is a single attribute/value pair in an [Entry]. // // A value can be one of several "attribute classes" defined by DWARF. // The Go types corresponding to each class are: // -// DWARF class Go type Class -// ----------- ------- ----- -// address uint64 ClassAddress -// block []byte ClassBlock -// constant int64 ClassConstant -// flag bool ClassFlag -// reference -// to info dwarf.Offset ClassReference -// to type unit uint64 ClassReferenceSig -// string string ClassString -// exprloc []byte ClassExprLoc -// lineptr int64 ClassLinePtr -// loclistptr int64 ClassLocListPtr -// macptr int64 ClassMacPtr -// rangelistptr int64 ClassRangeListPtr +// DWARF class Go type Class +// ----------- ------- ----- +// address uint64 ClassAddress +// block []byte ClassBlock +// constant int64 ClassConstant +// flag bool ClassFlag +// reference +// to info dwarf.Offset ClassReference +// to type unit uint64 ClassReferenceSig +// string string ClassString +// exprloc []byte ClassExprLoc +// lineptr int64 ClassLinePtr +// loclistptr int64 ClassLocListPtr +// macptr int64 ClassMacPtr +// rangelistptr int64 ClassRangeListPtr // -// For unrecognized or vendor-defined attributes, Class may be -// ClassUnknown. +// For unrecognized or vendor-defined attributes, [Class] may be +// [ClassUnknown]. type Field struct { Attr Attr - Val interface{} + Val any Class Class } @@ -295,7 +318,7 @@ const ( // the "mac" section. ClassMacPtr - // ClassMacPtr represents values that are an int64 offset into + // ClassRangeListPtr represents values that are an int64 offset into // the "rangelist" section. ClassRangeListPtr @@ -324,6 +347,27 @@ const ( // offset into the DWARF string section of an alternate object // file. ClassStringAlt + + // ClassAddrPtr represents values that are an int64 offset + // into the "addr" section. + ClassAddrPtr + + // ClassLocList represents values that are an int64 offset + // into the "loclists" section. + ClassLocList + + // ClassRngList represents values that are a uint64 offset + // from the base of the "rnglists" section. + ClassRngList + + // ClassRngListsPtr represents values that are an int64 offset + // into the "rnglists" section. These are used as the base for + // ClassRngList values. + ClassRngListsPtr + + // ClassStrOffsetsPtr represents values that are an int64 + // offset into the "str_offsets" section. + ClassStrOffsetsPtr ) //go:generate stringer -type=Class @@ -332,22 +376,22 @@ func (i Class) GoString() string { return "dwarf." + i.String() } -// Val returns the value associated with attribute Attr in Entry, +// Val returns the value associated with attribute [Attr] in [Entry], // or nil if there is no such attribute. // // A common idiom is to merge the check for nil return with // the check that the value has the expected dynamic type, as in: -// v, ok := e.Val(AttrSibling).(int64) // -func (e *Entry) Val(a Attr) interface{} { +// v, ok := e.Val(AttrSibling).(int64) +func (e *Entry) Val(a Attr) any { if f := e.AttrField(a); f != nil { return f.Val } return nil } -// AttrField returns the Field associated with attribute Attr in -// Entry, or nil if there is no such attribute. +// AttrField returns the [Field] associated with attribute [Attr] in +// [Entry], or nil if there is no such attribute. func (e *Entry) AttrField(a Attr) *Field { for i, f := range e.Field { if f.Attr == a { @@ -357,13 +401,13 @@ func (e *Entry) AttrField(a Attr) *Field { return nil } -// An Offset represents the location of an Entry within the DWARF info. -// (See Reader.Seek.) +// An Offset represents the location of an [Entry] within the DWARF info. +// (See [Reader.Seek].) type Offset uint32 // Entry reads a single entry from buf, decoding // according to the given abbreviation table. -func (b *buf) entry(atab abbrevTable, ubase Offset) *Entry { +func (b *buf) entry(cu *Entry, atab abbrevTable, ubase Offset, vers int) *Entry { off := b.off id := uint32(b.uint()) if id == 0 { @@ -380,14 +424,85 @@ func (b *buf) entry(atab abbrevTable, ubase Offset) *Entry { Children: a.children, Field: make([]Field, len(a.field)), } + + // If we are currently parsing the compilation unit, + // we can't evaluate Addrx or Strx until we've seen the + // relevant base entry. + type delayed struct { + idx int + off uint64 + fmt format + } + var delay []delayed + + resolveStrx := func(strBase, off uint64) string { + off += strBase + if uint64(int(off)) != off { + b.error("DW_FORM_strx offset out of range") + } + + b1 := makeBuf(b.dwarf, b.format, "str_offsets", 0, b.dwarf.strOffsets) + b1.skip(int(off)) + is64, _ := b.format.dwarf64() + if is64 { + off = b1.uint64() + } else { + off = uint64(b1.uint32()) + } + if b1.err != nil { + b.err = b1.err + return "" + } + if uint64(int(off)) != off { + b.error("DW_FORM_strx indirect offset out of range") + } + b1 = makeBuf(b.dwarf, b.format, "str", 0, b.dwarf.str) + b1.skip(int(off)) + val := b1.string() + if b1.err != nil { + b.err = b1.err + } + return val + } + + resolveRnglistx := func(rnglistsBase, off uint64) uint64 { + is64, _ := b.format.dwarf64() + if is64 { + off *= 8 + } else { + off *= 4 + } + off += rnglistsBase + if uint64(int(off)) != off { + b.error("DW_FORM_rnglistx offset out of range") + } + + b1 := makeBuf(b.dwarf, b.format, "rnglists", 0, b.dwarf.rngLists) + b1.skip(int(off)) + if is64 { + off = b1.uint64() + } else { + off = uint64(b1.uint32()) + } + if b1.err != nil { + b.err = b1.err + return 0 + } + if uint64(int(off)) != off { + b.error("DW_FORM_rnglistx indirect offset out of range") + } + return rnglistsBase + off + } + for i := range e.Field { e.Field[i].Attr = a.field[i].attr e.Field[i].Class = a.field[i].class fmt := a.field[i].fmt if fmt == formIndirect { fmt = format(b.uint()) + e.Field[i].Class = formToClass(fmt, a.field[i].attr, vers, b) } - var val interface{} + var val any switch fmt { default: b.error("unknown entry attr format 0x" + strconv.FormatInt(int64(fmt), 16)) @@ -395,6 +510,47 @@ func (b *buf) entry(atab abbrevTable, ubase Offset) *Entry { // address case formAddr: val = b.addr() + case formAddrx, formAddrx1, formAddrx2, formAddrx3, formAddrx4: + var off uint64 + switch fmt { + case formAddrx: + off = b.uint() + case formAddrx1: + off = uint64(b.uint8()) + case formAddrx2: + off = uint64(b.uint16()) + case formAddrx3: + off = uint64(b.uint24()) + case formAddrx4: + off = uint64(b.uint32()) + } + if b.dwarf.addr == nil { + b.error("DW_FORM_addrx with no .debug_addr section") + } + if b.err != nil { + return nil + } + + // We have to adjust by the offset of the + // compilation unit. This won't work if the + // program uses Reader.Seek to skip over the + // unit. Not much we can do about that. + var addrBase int64 + if cu != nil { + addrBase, _ = cu.Val(AttrAddrBase).(int64) + } else if a.tag == TagCompileUnit { + delay = append(delay, delayed{i, off, formAddrx}) + break + } + + var err error + val, err = b.dwarf.debugAddr(b.format, uint64(addrBase), off) + if err != nil { + if b.err == nil { + b.err = err + } + return nil + } // block case formDwarfBlock1: @@ -415,10 +571,14 @@ func (b *buf) entry(atab abbrevTable, ubase Offset) *Entry { val = int64(b.uint32()) case formData8: val = int64(b.uint64()) + case formData16: + val = b.bytes(16) case formSdata: val = int64(b.int()) case formUdata: val = int64(b.uint()) + case formImplicitConst: + val = a.field[i].val // flag case formFlag: @@ -460,29 +620,91 @@ func (b *buf) entry(atab abbrevTable, ubase Offset) *Entry { // string case formString: val = b.string() - case formStrp: + case formStrp, formLineStrp: var off uint64 // offset into .debug_str is64, known := b.format.dwarf64() if !known { - b.error("unknown size for DW_FORM_strp") + b.error("unknown size for DW_FORM_strp/line_strp") } else if is64 { off = b.uint64() } else { off = uint64(b.uint32()) } if uint64(int(off)) != off { - b.error("DW_FORM_strp offset out of range") + b.error("DW_FORM_strp/line_strp offset out of range") } if b.err != nil { return nil } - b1 := makeBuf(b.dwarf, unknownFormat{}, "str", 0, b.dwarf.str) + var b1 buf + if fmt == formStrp { + b1 = makeBuf(b.dwarf, b.format, "str", 0, b.dwarf.str) + } else { + if len(b.dwarf.lineStr) == 0 { + b.error("DW_FORM_line_strp with no .debug_line_str section") + return nil + } + b1 = makeBuf(b.dwarf, b.format, "line_str", 0, b.dwarf.lineStr) + } b1.skip(int(off)) val = b1.string() if b1.err != nil { b.err = b1.err return nil } + case formStrx, formStrx1, formStrx2, formStrx3, formStrx4: + var off uint64 + switch fmt { + case formStrx: + off = b.uint() + case formStrx1: + off = uint64(b.uint8()) + case formStrx2: + off = uint64(b.uint16()) + case formStrx3: + off = uint64(b.uint24()) + case formStrx4: + off = uint64(b.uint32()) + } + if len(b.dwarf.strOffsets) == 0 { + b.error("DW_FORM_strx with no .debug_str_offsets section") + } + is64, known := b.format.dwarf64() + if !known { + b.error("unknown offset size for DW_FORM_strx") + } + if b.err != nil { + return nil + } + if is64 { + off *= 8 + } else { + off *= 4 + } + + // We have to adjust by the offset of the + // compilation unit. This won't work if the + // program uses Reader.Seek to skip over the + // unit. Not much we can do about that. + var strBase int64 + if cu != nil { + strBase, _ = cu.Val(AttrStrOffsetsBase).(int64) + } else if a.tag == TagCompileUnit { + delay = append(delay, delayed{i, off, formStrx}) + break + } + + val = resolveStrx(uint64(strBase), off) + + case formStrpSup: + is64, known := b.format.dwarf64() + if !known { + b.error("unknown size for DW_FORM_strp_sup") + } else if is64 { + val = b.uint64() + } else { + val = b.uint32() + } // lineptr, loclistptr, macptr, rangelistptr // New in DWARF 4, but clang can generate them with -gdwarf-2. @@ -507,31 +729,86 @@ func (b *buf) entry(atab abbrevTable, ubase Offset) *Entry { case formRefSig8: // 64-bit type signature. val = b.uint64() + case formRefSup4: + val = b.uint32() + case formRefSup8: + val = b.uint64() + + // loclist + case formLoclistx: + val = b.uint() + + // rnglist + case formRnglistx: + off := b.uint() + + // We have to adjust by the rnglists_base of + // the compilation unit. This won't work if + // the program uses Reader.Seek to skip over + // the unit. Not much we can do about that. + var rnglistsBase int64 + if cu != nil { + rnglistsBase, _ = cu.Val(AttrRnglistsBase).(int64) + } else if a.tag == TagCompileUnit { + delay = append(delay, delayed{i, off, formRnglistx}) + break + } + + val = resolveRnglistx(uint64(rnglistsBase), off) } + e.Field[i].Val = val } if b.err != nil { return nil } + + for _, del := range delay { + switch del.fmt { + case formAddrx: + addrBase, _ := e.Val(AttrAddrBase).(int64) + val, err := b.dwarf.debugAddr(b.format, uint64(addrBase), del.off) + if err != nil { + b.err = err + return nil + } + e.Field[del.idx].Val = val + case formStrx: + strBase, _ := e.Val(AttrStrOffsetsBase).(int64) + e.Field[del.idx].Val = resolveStrx(uint64(strBase), del.off) + if b.err != nil { + return nil + } + case formRnglistx: + rnglistsBase, _ := e.Val(AttrRnglistsBase).(int64) + e.Field[del.idx].Val = resolveRnglistx(uint64(rnglistsBase), del.off) + if b.err != nil { + return nil + } + } + } + return e } -// A Reader allows reading Entry structures from a DWARF ``info'' section. -// The Entry structures are arranged in a tree. The Reader's Next function +// A Reader allows reading [Entry] structures from a DWARF “info” section. +// The [Entry] structures are arranged in a tree. The [Reader.Next] function // return successive entries from a pre-order traversal of the tree. // If an entry has children, its Children field will be true, and the children -// follow, terminated by an Entry with Tag 0. +// follow, terminated by an [Entry] with [Tag] 0. type Reader struct { b buf d *Data err error unit int + lastUnit bool // set if last entry returned by Next is TagCompileUnit/TagPartialUnit lastChildren bool // .Children of last entry returned by Next lastSibling Offset // .Val(AttrSibling) of last entry returned by Next + cu *Entry // current compilation unit } -// Reader returns a new Reader for Data. -// The reader is positioned at byte offset 0 in the DWARF ``info'' section. +// Reader returns a new Reader for [Data]. +// The reader is positioned at byte offset 0 in the DWARF “info” section. func (d *Data) Reader() *Reader { r := &Reader{d: d} r.Seek(0) @@ -544,7 +821,12 @@ func (r *Reader) AddressSize() int { return r.d.unit[r.unit].asize } -// Seek positions the Reader at offset off in the encoded entry stream. +// ByteOrder returns the byte order in the current compilation unit. +func (r *Reader) ByteOrder() binary.ByteOrder { + return r.b.order +} + +// Seek positions the [Reader] at offset off in the encoded entry stream. // Offset 0 can be used to denote the first entry. func (r *Reader) Seek(off Offset) { d := r.d @@ -557,6 +839,7 @@ func (r *Reader) Seek(off Offset) { u := &d.unit[0] r.unit = 0 r.b = makeBuf(r.d, u, "info", u.off, u.data) + r.cu = nil return } @@ -565,6 +848,9 @@ func (r *Reader) Seek(off Offset) { r.err = errors.New("offset out of range") return } + if i != r.unit { + r.cu = nil + } u := &d.unit[i] r.unit = i r.b = makeBuf(r.d, u, "info", off, u.data[off-u.off:]) @@ -573,16 +859,22 @@ func (r *Reader) Seek(off Offset) { // maybeNextUnit advances to the next unit if this one is finished. func (r *Reader) maybeNextUnit() { for len(r.b.data) == 0 && r.unit+1 < len(r.d.unit) { - r.unit++ - u := &r.d.unit[r.unit] - r.b = makeBuf(r.d, u, "info", u.off, u.data) + r.nextUnit() } } +// nextUnit advances to the next unit. +func (r *Reader) nextUnit() { + r.unit++ + u := &r.d.unit[r.unit] + r.b = makeBuf(r.d, u, "info", u.off, u.data) + r.cu = nil +} + // Next reads the next entry from the encoded entry stream. // It returns nil, nil when it reaches the end of the section. // It returns an error if the current offset is invalid or the data at the -// offset cannot be decoded as a valid Entry. +// offset cannot be decoded as a valid [Entry]. func (r *Reader) Next() (*Entry, error) { if r.err != nil { return nil, r.err @@ -592,16 +884,21 @@ func (r *Reader) Next() (*Entry, error) { return nil, nil } u := &r.d.unit[r.unit] - e := r.b.entry(u.atable, u.base) + e := r.b.entry(r.cu, u.atable, u.base, u.vers) if r.b.err != nil { r.err = r.b.err return nil, r.err } + r.lastUnit = false if e != nil { r.lastChildren = e.Children if r.lastChildren { r.lastSibling, _ = e.Val(AttrSibling).(Offset) } + if e.Tag == TagCompileUnit || e.Tag == TagPartialUnit { + r.lastUnit = true + r.cu = e + } } else { r.lastChildren = false } @@ -609,8 +906,8 @@ func (r *Reader) Next() (*Entry, error) { } // SkipChildren skips over the child entries associated with -// the last Entry returned by Next. If that Entry did not have -// children or Next has not been called, SkipChildren is a no-op. +// the last [Entry] returned by [Reader.Next]. If that [Entry] did not have +// children or [Reader.Next] has not been called, SkipChildren is a no-op. func (r *Reader) SkipChildren() { if r.err != nil || !r.lastChildren { return @@ -625,6 +922,11 @@ func (r *Reader) SkipChildren() { return } + if r.lastUnit && r.unit+1 < len(r.d.unit) { + r.nextUnit() + return + } + for { e, err := r.Next() if err != nil || e == nil || e.Tag == 0 { @@ -648,9 +950,9 @@ func (r *Reader) offset() Offset { return r.b.off } -// SeekPC returns the Entry for the compilation unit that includes pc, +// SeekPC returns the [Entry] for the compilation unit that includes pc, // and positions the reader to read the children of that unit. If pc -// is not covered by any unit, SeekPC returns ErrUnknownPC and the +// is not covered by any unit, SeekPC returns [ErrUnknownPC] and the // position of the reader is undefined. // // Because compilation units can describe multiple regions of the @@ -669,12 +971,16 @@ func (r *Reader) SeekPC(pc uint64) (*Entry, error) { r.err = nil r.lastChildren = false r.unit = unit + r.cu = nil u := &r.d.unit[unit] r.b = makeBuf(r.d, u, "info", u.off, u.data) e, err := r.Next() if err != nil { return nil, err } + if e == nil || e.Tag == 0 { + return nil, ErrUnknownPC + } ranges, err := r.d.Ranges(e) if err != nil { return nil, err @@ -690,7 +996,7 @@ func (r *Reader) SeekPC(pc uint64) (*Entry, error) { } // Ranges returns the PC ranges covered by e, a slice of [low,high) pairs. -// Only some entry types, such as TagCompileUnit or TagSubprogram, have PC +// Only some entry types, such as [TagCompileUnit] or [TagSubprogram], have PC // ranges; for others, this will return nil with no error. func (d *Data) Ranges(e *Entry) ([][2]uint64, error) { var ret [][2]uint64 @@ -717,53 +1023,199 @@ func (d *Data) Ranges(e *Entry) ([][2]uint64, error) { ret = append(ret, [2]uint64{low, high}) } - ranges, rangesOK := e.Val(AttrRanges).(int64) - if rangesOK && d.ranges != nil { - // The initial base address is the lowpc attribute - // of the enclosing compilation unit. - // Although DWARF specifies the lowpc attribute, - // comments in gdb/dwarf2read.c say that some versions - // of GCC use the entrypc attribute, so we check that too. - var cu *Entry - if e.Tag == TagCompileUnit { - cu = e - } else { - i := d.offsetToUnit(e.Offset) - if i == -1 { - return nil, errors.New("no unit for entry") + var u *unit + if uidx := d.offsetToUnit(e.Offset); uidx >= 0 && uidx < len(d.unit) { + u = &d.unit[uidx] + } + + if u != nil && u.vers >= 5 && d.rngLists != nil { + // DWARF version 5 and later + field := e.AttrField(AttrRanges) + if field == nil { + return ret, nil + } + switch field.Class { + case ClassRangeListPtr: + ranges, rangesOK := field.Val.(int64) + if !rangesOK { + return ret, nil } - u := &d.unit[i] - b := makeBuf(d, u, "info", u.off, u.data) - cu = b.entry(u.atable, u.base) - if b.err != nil { - return nil, b.err + cu, base, err := d.baseAddressForEntry(e) + if err != nil { + return nil, err + } + return d.dwarf5Ranges(u, cu, base, ranges, ret) + + case ClassRngList: + rnglist, ok := field.Val.(uint64) + if !ok { + return ret, nil + } + cu, base, err := d.baseAddressForEntry(e) + if err != nil { + return nil, err } + return d.dwarf5Ranges(u, cu, base, int64(rnglist), ret) + + default: + return ret, nil + } + } + + // DWARF version 2 through 4 + ranges, rangesOK := e.Val(AttrRanges).(int64) + if rangesOK && d.ranges != nil { + _, base, err := d.baseAddressForEntry(e) + if err != nil { + return nil, err + } + return d.dwarf2Ranges(u, base, ranges, ret) + } + + return ret, nil +} + +// baseAddressForEntry returns the initial base address to be used when +// looking up the range list of entry e. +// DWARF specifies that this should be the lowpc attribute of the enclosing +// compilation unit, however comments in gdb/dwarf2read.c say that some +// versions of GCC use the entrypc attribute, so we check that too. +func (d *Data) baseAddressForEntry(e *Entry) (*Entry, uint64, error) { + var cu *Entry + if e.Tag == TagCompileUnit { + cu = e + } else { + i := d.offsetToUnit(e.Offset) + if i == -1 { + return nil, 0, errors.New("no unit for entry") } + u := &d.unit[i] + b := makeBuf(d, u, "info", u.off, u.data) + cu = b.entry(nil, u.atable, u.base, u.vers) + if b.err != nil { + return nil, 0, b.err + } + } - var base uint64 - if cuEntry, cuEntryOK := cu.Val(AttrEntrypc).(uint64); cuEntryOK { - base = cuEntry - } else if cuLow, cuLowOK := cu.Val(AttrLowpc).(uint64); cuLowOK { - base = cuLow + if cuEntry, cuEntryOK := cu.Val(AttrEntrypc).(uint64); cuEntryOK { + return cu, cuEntry, nil + } else if cuLow, cuLowOK := cu.Val(AttrLowpc).(uint64); cuLowOK { + return cu, cuLow, nil + } + + return cu, 0, nil +} + +func (d *Data) dwarf2Ranges(u *unit, base uint64, ranges int64, ret [][2]uint64) ([][2]uint64, error) { + if ranges < 0 || ranges > int64(len(d.ranges)) { + return nil, fmt.Errorf("invalid range offset %d (max %d)", ranges, len(d.ranges)) + } + buf := makeBuf(d, u, "ranges", Offset(ranges), d.ranges[ranges:]) + for len(buf.data) > 0 { + low := buf.addr() + high := buf.addr() + + if low == 0 && high == 0 { + break } - u := &d.unit[d.offsetToUnit(e.Offset)] - buf := makeBuf(d, u, "ranges", Offset(ranges), d.ranges[ranges:]) - for len(buf.data) > 0 { - low = buf.addr() - high = buf.addr() + if low == ^uint64(0)>>uint((8-u.addrsize())*8) { + base = high + } else { + ret = append(ret, [2]uint64{base + low, base + high}) + } + } - if low == 0 && high == 0 { - break + return ret, nil +} + +// dwarf5Ranges interprets a debug_rnglists sequence, see DWARFv5 section +// 2.17.3 (page 53). +func (d *Data) dwarf5Ranges(u *unit, cu *Entry, base uint64, ranges int64, ret [][2]uint64) ([][2]uint64, error) { + if ranges < 0 || ranges > int64(len(d.rngLists)) { + return nil, fmt.Errorf("invalid rnglist offset %d (max %d)", ranges, len(d.ranges)) + } + var addrBase int64 + if cu != nil { + addrBase, _ = cu.Val(AttrAddrBase).(int64) + } + + buf := makeBuf(d, u, "rnglists", 0, d.rngLists) + buf.skip(int(ranges)) + for { + opcode := buf.uint8() + switch opcode { + case rleEndOfList: + if buf.err != nil { + return nil, buf.err + } + return ret, nil + + case rleBaseAddressx: + baseIdx := buf.uint() + var err error + base, err = d.debugAddr(u, uint64(addrBase), baseIdx) + if err != nil { + return nil, err } - if low == ^uint64(0)>>uint((8-u.addrsize())*8) { - base = high - } else { - ret = append(ret, [2]uint64{base + low, base + high}) + case rleStartxEndx: + startIdx := buf.uint() + endIdx := buf.uint() + + start, err := d.debugAddr(u, uint64(addrBase), startIdx) + if err != nil { + return nil, err + } + end, err := d.debugAddr(u, uint64(addrBase), endIdx) + if err != nil { + return nil, err } + ret = append(ret, [2]uint64{start, end}) + + case rleStartxLength: + startIdx := buf.uint() + len := buf.uint() + start, err := d.debugAddr(u, uint64(addrBase), startIdx) + if err != nil { + return nil, err + } + ret = append(ret, [2]uint64{start, start + len}) + + case rleOffsetPair: + off1 := buf.uint() + off2 := buf.uint() + ret = append(ret, [2]uint64{base + off1, base + off2}) + + case rleBaseAddress: + base = buf.addr() + + case rleStartEnd: + start := buf.addr() + end := buf.addr() + ret = append(ret, [2]uint64{start, end}) + + case rleStartLength: + start := buf.addr() + len := buf.uint() + ret = append(ret, [2]uint64{start, start + len}) } } +} - return ret, nil +// debugAddr returns the address at idx in debug_addr +func (d *Data) debugAddr(format dataFormat, addrBase, idx uint64) (uint64, error) { + off := idx*uint64(format.addrsize()) + addrBase + + if uint64(int(off)) != off { + return 0, errors.New("offset out of range") + } + + b := makeBuf(d, format, "addr", 0, d.addr) + b.skip(int(off)) + val := b.addr() + if b.err != nil { + return 0, b.err + } + return val, nil } diff --git a/dwarf/entry_test.go b/dwarf/entry_test.go index 58f3023..1ce1c98 100644 --- a/dwarf/entry_test.go +++ b/dwarf/entry_test.go @@ -6,6 +6,8 @@ package dwarf_test import ( . "debug/dwarf" + "encoding/binary" + "path/filepath" "reflect" "testing" ) @@ -53,6 +55,26 @@ func TestReaderSeek(t *testing.T) { {0x400611, nil}, } testRanges(t, "testdata/line-gcc.elf", want) + + want = []wantRange{ + {0x401122, [][2]uint64{{0x401122, 0x401166}}}, + {0x401165, [][2]uint64{{0x401122, 0x401166}}}, + {0x401166, [][2]uint64{{0x401166, 0x401179}}}, + } + testRanges(t, "testdata/line-gcc-dwarf5.elf", want) + + want = []wantRange{ + {0x401130, [][2]uint64{{0x401130, 0x40117e}}}, + {0x40117d, [][2]uint64{{0x401130, 0x40117e}}}, + {0x40117e, nil}, + } + testRanges(t, "testdata/line-clang-dwarf5.elf", want) + + want = []wantRange{ + {0x401126, [][2]uint64{{0x401126, 0x40116a}}}, + {0x40116a, [][2]uint64{{0x40116a, 0x401180}}}, + } + testRanges(t, "testdata/line-gcc-zstd.elf", want) } func TestRangesSection(t *testing.T) { @@ -68,6 +90,19 @@ func TestRangesSection(t *testing.T) { testRanges(t, "testdata/ranges.elf", want) } +func TestRangesRnglistx(t *testing.T) { + want := []wantRange{ + {0x401000, [][2]uint64{{0x401020, 0x40102c}, {0x401000, 0x40101d}}}, + {0x40101c, [][2]uint64{{0x401020, 0x40102c}, {0x401000, 0x40101d}}}, + {0x40101d, nil}, + {0x40101f, nil}, + {0x401020, [][2]uint64{{0x401020, 0x40102c}, {0x401000, 0x40101d}}}, + {0x40102b, [][2]uint64{{0x401020, 0x40102c}, {0x401000, 0x40101d}}}, + {0x40102c, nil}, + } + testRanges(t, "testdata/rnglistx.elf", want) +} + func testRanges(t *testing.T, name string, want []wantRange) { d := elfData(t, name) r := d.Reader() @@ -95,44 +130,81 @@ func testRanges(t *testing.T, name string, want []wantRange) { } func TestReaderRanges(t *testing.T) { - d := elfData(t, "testdata/line-gcc.elf") - - subprograms := []struct { + type subprograms []struct { name string ranges [][2]uint64 + } + tests := []struct { + filename string + subprograms subprograms }{ - {"f1", [][2]uint64{{0x40059d, 0x4005e7}}}, - {"main", [][2]uint64{{0x4005e7, 0x400601}}}, - {"f2", [][2]uint64{{0x400601, 0x400611}}}, + { + "testdata/line-gcc.elf", + subprograms{ + {"f1", [][2]uint64{{0x40059d, 0x4005e7}}}, + {"main", [][2]uint64{{0x4005e7, 0x400601}}}, + {"f2", [][2]uint64{{0x400601, 0x400611}}}, + }, + }, + { + "testdata/line-gcc-dwarf5.elf", + subprograms{ + {"main", [][2]uint64{{0x401147, 0x401166}}}, + {"f1", [][2]uint64{{0x401122, 0x401147}}}, + {"f2", [][2]uint64{{0x401166, 0x401179}}}, + }, + }, + { + "testdata/line-clang-dwarf5.elf", + subprograms{ + {"main", [][2]uint64{{0x401130, 0x401144}}}, + {"f1", [][2]uint64{{0x401150, 0x40117e}}}, + {"f2", [][2]uint64{{0x401180, 0x401197}}}, + }, + }, + { + "testdata/line-gcc-zstd.elf", + subprograms{ + {"f2", nil}, + {"main", [][2]uint64{{0x40114b, 0x40116a}}}, + {"f1", [][2]uint64{{0x401126, 0x40114b}}}, + {"f2", [][2]uint64{{0x40116a, 0x401180}}}, + }, + }, } - r := d.Reader() - i := 0 - for entry, err := r.Next(); entry != nil && err == nil; entry, err = r.Next() { - if entry.Tag != TagSubprogram { - continue - } + for _, test := range tests { + d := elfData(t, test.filename) + subprograms := test.subprograms - if i > len(subprograms) { - t.Fatalf("too many subprograms (expected at most %d)", i) - } + r := d.Reader() + i := 0 + for entry, err := r.Next(); entry != nil && err == nil; entry, err = r.Next() { + if entry.Tag != TagSubprogram { + continue + } - if got := entry.Val(AttrName).(string); got != subprograms[i].name { - t.Errorf("subprogram %d name is %s, expected %s", i, got, subprograms[i].name) - } - ranges, err := d.Ranges(entry) - if err != nil { - t.Errorf("subprogram %d: %v", i, err) - continue - } - if !reflect.DeepEqual(ranges, subprograms[i].ranges) { - t.Errorf("subprogram %d ranges are %x, expected %x", i, ranges, subprograms[i].ranges) + if i > len(subprograms) { + t.Fatalf("%s: too many subprograms (expected at most %d)", test.filename, i) + } + + if got := entry.Val(AttrName).(string); got != subprograms[i].name { + t.Errorf("%s: subprogram %d name is %s, expected %s", test.filename, i, got, subprograms[i].name) + } + ranges, err := d.Ranges(entry) + if err != nil { + t.Errorf("%s: subprogram %d: %v", test.filename, i, err) + continue + } + if !reflect.DeepEqual(ranges, subprograms[i].ranges) { + t.Errorf("%s: subprogram %d ranges are %x, expected %x", test.filename, i, ranges, subprograms[i].ranges) + } + i++ } - i++ - } - if i < len(subprograms) { - t.Errorf("saw only %d subprograms, expected %d", i, len(subprograms)) + if i < len(subprograms) { + t.Errorf("%s: saw only %d subprograms, expected %d", test.filename, i, len(subprograms)) + } } } @@ -141,8 +213,10 @@ func Test64Bit(t *testing.T) { // compilation unit except by using XCOFF, so this is // hand-written. tests := []struct { - name string - info []byte + name string + info []byte + addrSize int + byteOrder binary.ByteOrder }{ { "32-bit little", @@ -157,6 +231,7 @@ func Test64Bit(t *testing.T) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + 8, binary.LittleEndian, }, { "64-bit little", @@ -171,6 +246,7 @@ func Test64Bit(t *testing.T) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + 8, binary.LittleEndian, }, { "64-bit big", @@ -185,13 +261,199 @@ func Test64Bit(t *testing.T) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, + 8, binary.BigEndian, }, } for _, test := range tests { - _, err := New(nil, nil, nil, test.info, nil, nil, nil, nil) + data, err := New(nil, nil, nil, test.info, nil, nil, nil, nil) if err != nil { t.Errorf("%s: %v", test.name, err) } + + r := data.Reader() + if r.AddressSize() != test.addrSize { + t.Errorf("%s: got address size %d, want %d", test.name, r.AddressSize(), test.addrSize) + } + if r.ByteOrder() != test.byteOrder { + t.Errorf("%s: got byte order %s, want %s", test.name, r.ByteOrder(), test.byteOrder) + } + } +} + +func TestUnitIteration(t *testing.T) { + // Iterate over all ELF test files we have and ensure that + // we get the same set of compilation units skipping (method 0) + // and not skipping (method 1) CU children. + files, err := filepath.Glob(filepath.Join("testdata", "*.elf")) + if err != nil { + t.Fatal(err) + } + for _, file := range files { + t.Run(file, func(t *testing.T) { + d := elfData(t, file) + var units [2][]any + for method := range units { + for r := d.Reader(); ; { + ent, err := r.Next() + if err != nil { + t.Fatal(err) + } + if ent == nil { + break + } + if ent.Tag == TagCompileUnit { + units[method] = append(units[method], ent.Val(AttrName)) + } + if method == 0 { + if ent.Tag != TagCompileUnit { + t.Fatalf("found unexpected tag %v on top level", ent.Tag) + } + r.SkipChildren() + } + } + } + t.Logf("skipping CUs: %v", units[0]) + t.Logf("not-skipping CUs: %v", units[1]) + if !reflect.DeepEqual(units[0], units[1]) { + t.Fatal("set of CUs differ") + } + }) + } +} + +func TestIssue51758(t *testing.T) { + abbrev := []byte{0x21, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5c, + 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, + 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, + 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, + 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, + 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, + 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, + 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, + 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, + 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, + 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, + 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, + 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, + 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, + 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, + 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, + 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, + 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x22, 0x5c, + 0x6e, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6e, 0x66, 0x6f, 0x3a, 0x20, + 0x5c, 0x22, 0x5c, 0x5c, 0x30, 0x30, 0x35, 0x5c, 0x5c, 0x30, 0x30, + 0x30, 0x5c, 0x5c, 0x30, 0x30, 0x30, 0x5c, 0x5c, 0x30, 0x30, 0x30, + 0x5c, 0x5c, 0x30, 0x30, 0x34, 0x5c, 0x5c, 0x30, 0x30, 0x30, 0x5c, + 0x5c, 0x30, 0x30, 0x30, 0x2d, 0x5c, 0x5c, 0x30, 0x30, 0x30, 0x5c, + 0x22, 0x5c, 0x6e, 0x20, 0x20, 0x7d, 0x5c, 0x6e, 0x7d, 0x5c, 0x6e, + 0x22, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x72, 0x61, 0x6d, 0x65, + 0x3a, 0x20, 0x22, 0x21, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, + 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, + 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, + 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, + 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, + 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, + 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, + 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, + 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, + 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, + 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, + 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, + 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, + 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, + 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, + 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, + 0x5c, 0x33, 0x37, 0x37, 0x22, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, + 0x6e, 0x66, 0x6f, 0x3a, 0x20, 0x22, 0x5c, 0x30, 0x30, 0x35, 0x5c, + 0x30, 0x30, 0x30, 0x5c, 0x30, 0x30, 0x30, 0x5c, 0x30, 0x30, 0x30, + 0x5c, 0x30, 0x30, 0x34, 0x5c, 0x30, 0x30, 0x30, 0x5c, 0x30, 0x30, + 0x30, 0x2d, 0x5c, 0x30, 0x30, 0x30, 0x22, 0x0a, 0x20, 0x20, 0x7d, + 0x0a, 0x7d, 0x0a, 0x6c, 0x69, 0x73, 0x74, 0x20, 0x7b, 0x0a, 0x7d, + 0x0a, 0x6c, 0x69, 0x73, 0x74, 0x20, 0x7b, 0x0a, 0x7d, 0x0a, 0x6c, + 0x69, 0x73, 0x74, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x4e, 0x65, 0x77, + 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x62, 0x72, + 0x65, 0x76, 0x3a, 0x20, 0x22, 0x5c, 0x30, 0x30, 0x35, 0x5c, 0x30, + 0x30, 0x30, 0x5c, 0x30, 0x30, 0x30, 0x5c, 0x30, 0x30, 0x30, 0x5c, + 0x30, 0x30, 0x34, 0x5c, 0x30, 0x30, 0x30, 0x5c, 0x30, 0x30, 0x30, + 0x2d, 0x5c, 0x30, 0x30, 0x30, 0x6c, 0x69, 0x73, 0x74, 0x20, 0x7b, + 0x5c, 0x6e, 0x20, 0x20, 0x4e, 0x65, 0x77, 0x20, 0x7b, 0x5c, 0x6e, + 0x20, 0x20, 0x20, 0x20, 0x61, 0x62, 0x62, 0x72, 0x65, 0x76, 0x3a, + 0x20, 0x5c, 0x22, 0x21, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, + 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, + 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, + 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, + 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, + 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, + 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, + 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, + 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, + 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, + 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, + 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, + 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, + 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, + 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, + 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, + 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, + 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, + 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, + 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x5c, 0x33, 0x37, 0x37, 0x5c, + 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x22, 0x5c, 0x6e, 0x20, 0x20, 0x20, + 0x20, 0x69, 0x6e, 0x66, 0x6f, 0x3a, 0x20, 0x5c, 0x22, 0x5c, 0x5c, + 0x30, 0x30, 0x35, 0x5c, 0x5c, 0x30, 0x30, 0x30, 0x5c, 0x5c, 0x30, + 0x30, 0x30, 0x5c, 0x5c, 0x30, 0x30, 0x30, 0x5c, 0x5c, 0x30, 0x30, + 0x34, 0x5c, 0x5c, 0x30, 0x30, 0x30, 0x5c, 0x5c, 0x30, 0x30, 0x30, + 0x2d, 0x5c, 0x5c, 0x30, 0x30, 0x30, 0x5c, 0x22, 0x5c, 0x6e, 0x20, + 0x20, 0x7d, 0x5c, 0x6e, 0x7d, 0x5c, 0x6e, 0x22, 0x0a, 0x20, 0x20, + 0x20, 0x20, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x3a, 0x20, 0x22, 0x21, + 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, + 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, + 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, + 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, + 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, + 0x37, 0x5c, 0x33, 0x37, 0x37, 0x5c, 0x33, 0x37, 0x37, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff} + aranges := []byte{0x2c} + frame := []byte{} + info := []byte{0x5, 0x0, 0x0, 0x0, 0x4, 0x0, 0x0, 0x2d, 0x0, 0x5, + 0x0, 0x0, 0x0, 0x4, 0x0, 0x0, 0x2d, 0x0} + + // The input above is malformed; the goal here it just to make sure + // that we don't get a panic or other bad behavior while trying to + // construct a dwarf.Data object from the input. For good measure, + // test to make sure we can handle the case where the input is + // truncated as well. + for i := 0; i <= len(info); i++ { + truncated := info[:i] + dw, err := New(abbrev, aranges, frame, truncated, nil, nil, nil, nil) + if err == nil { + t.Errorf("expected error") + } else { + if dw != nil { + t.Errorf("got non-nil dw, wanted nil") + } + } + } +} + +func TestIssue52045(t *testing.T) { + var abbrev, aranges, frame, line, pubnames, ranges, str []byte + info := []byte{0x7, 0x0, 0x0, 0x0, 0x2, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} + + // A hand-crafted input corresponding to a minimal-size + // .debug_info (header only, no DIEs) and an empty abbrev table. + data0, _ := New(abbrev, aranges, frame, info, line, pubnames, ranges, str) + reader0 := data0.Reader() + entry0, _ := reader0.SeekPC(0x0) + // main goal is to make sure we can get here without crashing + if entry0 != nil { + t.Errorf("got non-nil entry0, wanted nil") } } diff --git a/dwarf/line.go b/dwarf/line.go index b862b49..3a02c8e 100644 --- a/dwarf/line.go +++ b/dwarf/line.go @@ -12,19 +12,24 @@ import ( "strings" ) -// A LineReader reads a sequence of LineEntry structures from a DWARF +// A LineReader reads a sequence of [LineEntry] structures from a DWARF // "line" section for a single compilation unit. LineEntries occur in -// order of increasing PC and each LineEntry gives metadata for the -// instructions from that LineEntry's PC to just before the next -// LineEntry's PC. The last entry will have its EndSequence field set. +// order of increasing PC and each [LineEntry] gives metadata for the +// instructions from that [LineEntry]'s PC to just before the next +// [LineEntry]'s PC. The last entry will have the [LineEntry.EndSequence] field set. type LineReader struct { buf buf // Original .debug_line section data. Used by Seek. section []byte + str []byte // .debug_str + lineStr []byte // .debug_line_str + // Header information version uint16 + addrsize int + segmentSelectorSize int minInstructionLength int maxOpsPerInstruction int defaultIsStmt bool @@ -132,7 +137,7 @@ type LineFile struct { } // LineReader returns a new reader for the line table of compilation -// unit cu, which must be an Entry with tag TagCompileUnit. +// unit cu, which must be an [Entry] with tag [TagCompileUnit]. // // If this compilation unit has no line table, it returns nil, nil. func (d *Data) LineReader(cu *Entry) (*LineReader, error) { @@ -147,7 +152,7 @@ func (d *Data) LineReader(cu *Entry) (*LineReader, error) { // cu has no line table. return nil, nil } - if off > int64(len(d.line)) { + if off < 0 || off > int64(len(d.line)) { return nil, errors.New("AttrStmtList value out of range") } // AttrCompDir is optional if all file names are absolute. Use @@ -158,10 +163,15 @@ func (d *Data) LineReader(cu *Entry) (*LineReader, error) { u := &d.unit[d.offsetToUnit(cu.Offset)] buf := makeBuf(d, u, "line", Offset(off), d.line[off:]) // The compilation directory is implicitly directories[0]. - r := LineReader{buf: buf, section: d.line, directories: []string{compDir}} + r := LineReader{ + buf: buf, + section: d.line, + str: d.str, + lineStr: d.lineStr, + } // Read the header. - if err := r.readHeader(); err != nil { + if err := r.readHeader(compDir); err != nil { return nil, err } @@ -173,7 +183,7 @@ func (d *Data) LineReader(cu *Entry) (*LineReader, error) { // readHeader reads the line number program header from r.buf and sets // all of the header fields in r. -func (r *LineReader) readHeader() error { +func (r *LineReader) readHeader(compDir string) error { buf := &r.buf // Read basic header fields [DWARF2 6.2.4]. @@ -184,7 +194,7 @@ func (r *LineReader) readHeader() error { return DecodeError{"line", hdrOffset, fmt.Sprintf("line table end %d exceeds section size %d", r.endOffset, buf.off+Offset(len(buf.data)))} } r.version = buf.uint16() - if buf.err == nil && (r.version < 2 || r.version > 4) { + if buf.err == nil && (r.version < 2 || r.version > 5) { // DWARF goes to all this effort to make new opcodes // backward-compatible, and then adds fields right in // the middle of the header in new versions, so we're @@ -192,13 +202,24 @@ func (r *LineReader) readHeader() error { // versions. return DecodeError{"line", hdrOffset, fmt.Sprintf("unknown line table version %d", r.version)} } + if r.version >= 5 { + r.addrsize = int(buf.uint8()) + r.segmentSelectorSize = int(buf.uint8()) + } else { + r.addrsize = buf.format.addrsize() + r.segmentSelectorSize = 0 + } var headerLength Offset if dwarf64 { headerLength = Offset(buf.uint64()) } else { headerLength = Offset(buf.uint32()) } - r.programOffset = buf.off + headerLength + programOffset := buf.off + headerLength + if programOffset > r.endOffset { + return DecodeError{"line", hdrOffset, fmt.Sprintf("malformed line table: program offset %d exceeds end offset %d", programOffset, r.endOffset)} + } + r.programOffset = programOffset r.minInstructionLength = int(buf.uint8()) if r.version >= 4 { // [DWARF4 6.2.4] @@ -238,39 +259,170 @@ func (r *LineReader) readHeader() error { } } - // Read include directories table. The caller already set - // directories[0] to the compilation directory. - for { - directory := buf.string() - if buf.err != nil { - return buf.err + if r.version < 5 { + // Read include directories table. + r.directories = []string{compDir} + for { + directory := buf.string() + if buf.err != nil { + return buf.err + } + if len(directory) == 0 { + break + } + if !pathIsAbs(directory) { + // Relative paths are implicitly relative to + // the compilation directory. + directory = pathJoin(compDir, directory) + } + r.directories = append(r.directories, directory) } - if len(directory) == 0 { - break + + // Read file name list. File numbering starts with 1, + // so leave the first entry nil. + r.fileEntries = make([]*LineFile, 1) + for { + if done, err := r.readFileEntry(); err != nil { + return err + } else if done { + break + } } - if !pathIsAbs(directory) { - // Relative paths are implicitly relative to - // the compilation directory. - directory = pathJoin(r.directories[0], directory) + } else { + dirFormat := r.readLNCTFormat() + c := buf.uint() + r.directories = make([]string, c) + for i := range r.directories { + dir, _, _, err := r.readLNCT(dirFormat, dwarf64) + if err != nil { + return err + } + r.directories[i] = dir } - r.directories = append(r.directories, directory) - } - - // Read file name list. File numbering starts with 1, so leave - // the first entry nil. - r.fileEntries = make([]*LineFile, 1) - for { - if done, err := r.readFileEntry(); err != nil { - return err - } else if done { - break + fileFormat := r.readLNCTFormat() + c = buf.uint() + r.fileEntries = make([]*LineFile, c) + for i := range r.fileEntries { + name, mtime, size, err := r.readLNCT(fileFormat, dwarf64) + if err != nil { + return err + } + r.fileEntries[i] = &LineFile{name, mtime, int(size)} } } + r.initialFileEntries = len(r.fileEntries) return buf.err } +// lnctForm is a pair of an LNCT code and a form. This represents an +// entry in the directory name or file name description in the DWARF 5 +// line number program header. +type lnctForm struct { + lnct int + form format +} + +// readLNCTFormat reads an LNCT format description. +func (r *LineReader) readLNCTFormat() []lnctForm { + c := r.buf.uint8() + ret := make([]lnctForm, c) + for i := range ret { + ret[i].lnct = int(r.buf.uint()) + ret[i].form = format(r.buf.uint()) + } + return ret +} + +// readLNCT reads a sequence of LNCT entries and returns path information. +func (r *LineReader) readLNCT(s []lnctForm, dwarf64 bool) (path string, mtime uint64, size uint64, err error) { + var dir string + for _, lf := range s { + var str string + var val uint64 + switch lf.form { + case formString: + str = r.buf.string() + case formStrp, formLineStrp: + var off uint64 + if dwarf64 { + off = r.buf.uint64() + } else { + off = uint64(r.buf.uint32()) + } + if uint64(int(off)) != off { + return "", 0, 0, DecodeError{"line", r.buf.off, "strp/line_strp offset out of range"} + } + var b1 buf + if lf.form == formStrp { + b1 = makeBuf(r.buf.dwarf, r.buf.format, "str", 0, r.str) + } else { + b1 = makeBuf(r.buf.dwarf, r.buf.format, "line_str", 0, r.lineStr) + } + b1.skip(int(off)) + str = b1.string() + if b1.err != nil { + return "", 0, 0, DecodeError{"line", r.buf.off, b1.err.Error()} + } + case formStrpSup: + // Supplemental sections not yet supported. + if dwarf64 { + r.buf.uint64() + } else { + r.buf.uint32() + } + case formStrx: + // .debug_line.dwo sections not yet supported. + r.buf.uint() + case formStrx1: + r.buf.uint8() + case formStrx2: + r.buf.uint16() + case formStrx3: + r.buf.uint24() + case formStrx4: + r.buf.uint32() + case formData1: + val = uint64(r.buf.uint8()) + case formData2: + val = uint64(r.buf.uint16()) + case formData4: + val = uint64(r.buf.uint32()) + case formData8: + val = r.buf.uint64() + case formData16: + r.buf.bytes(16) + case formDwarfBlock: + r.buf.bytes(int(r.buf.uint())) + case formUdata: + val = r.buf.uint() + } + + switch lf.lnct { + case lnctPath: + path = str + case lnctDirectoryIndex: + if val >= uint64(len(r.directories)) { + return "", 0, 0, DecodeError{"line", r.buf.off, "directory index out of range"} + } + dir = r.directories[val] + case lnctTimestamp: + mtime = val + case lnctSize: + size = val + case lnctMD5: + // Ignored. + } + } + + if dir != "" && path != "" { + path = pathJoin(dir, path) + } + + return path, mtime, size, nil +} + // readFileEntry reads a file entry from either the header or a // DW_LNE_define_file extended opcode and adds it to r.fileEntries. A // true return value indicates that there are no more entries to read. @@ -293,6 +445,19 @@ func (r *LineReader) readFileEntry() (bool, error) { mtime := r.buf.uint() length := int(r.buf.uint()) + // If this is a dynamically added path and the cursor was + // backed up, we may have already added this entry. Avoid + // updating existing line table entries in this case. This + // avoids an allocation and potential racy access to the slice + // backing store if the user called Files. + if len(r.fileEntries) < cap(r.fileEntries) { + fe := r.fileEntries[:len(r.fileEntries)+1] + if fe[len(fe)-1] != nil { + // We already processed this addition. + r.fileEntries = fe + return false, nil + } + } r.fileEntries = append(r.fileEntries, &LineFile{name, mtime, length}) return false, nil } @@ -309,7 +474,7 @@ func (r *LineReader) updateFile() { // Next sets *entry to the next row in this line table and moves to // the next row. If there are no more entries and the line table is -// properly terminated, it returns io.EOF. +// properly terminated, it returns [io.EOF]. // // Rows are always in order of increasing entry.Address, but // entry.Line may go forward or backward. @@ -381,7 +546,18 @@ func (r *LineReader) step(entry *LineEntry) bool { r.resetState() case lneSetAddress: - r.state.Address = r.buf.addr() + switch r.addrsize { + case 1: + r.state.Address = uint64(r.buf.uint8()) + case 2: + r.state.Address = uint64(r.buf.uint16()) + case 4: + r.state.Address = uint64(r.buf.uint32()) + case 8: + r.state.Address = r.buf.uint64() + default: + r.buf.error("unknown address size") + } case lneDefineFile: if done, err := r.readFileEntry(); err != nil { @@ -486,9 +662,9 @@ func (r *LineReader) Tell() LineReaderPos { return LineReaderPos{r.buf.off, len(r.fileEntries), r.state, r.fileIndex} } -// Seek restores the line table reader to a position returned by Tell. +// Seek restores the line table reader to a position returned by [LineReader.Tell]. // -// The argument pos must have been returned by a call to Tell on this +// The argument pos must have been returned by a call to [LineReader.Tell] on this // line table. func (r *LineReader) Seek(pos LineReaderPos) { r.buf.off = pos.off @@ -533,16 +709,32 @@ func (r *LineReader) resetState() { r.updateFile() } +// Files returns the file name table of this compilation unit as of +// the current position in the line table. The file name table may be +// referenced from attributes in this compilation unit such as +// [AttrDeclFile]. +// +// Entry 0 is always nil, since file index 0 represents "no file". +// +// The file name table of a compilation unit is not fixed. Files +// returns the file table as of the current position in the line +// table. This may contain more entries than the file table at an +// earlier position in the line table, though existing entries never +// change. +func (r *LineReader) Files() []*LineFile { + return r.fileEntries +} + // ErrUnknownPC is the error returned by LineReader.ScanPC when the // seek PC is not covered by any entry in the line table. var ErrUnknownPC = errors.New("ErrUnknownPC") -// SeekPC sets *entry to the LineEntry that includes pc and positions +// SeekPC sets *entry to the [LineEntry] that includes pc and positions // the reader on the next entry in the line table. If necessary, this // will seek backwards to find pc. // // If pc is not covered by any entry in this line table, SeekPC -// returns ErrUnknownPC. In this case, *entry and the final seek +// returns [ErrUnknownPC]. In this case, *entry and the final seek // position are unspecified. // // Note that DWARF line tables only permit sequential, forward scans. @@ -618,7 +810,7 @@ func pathJoin(dirname, filename string) string { // DOS-style path. drive2, filename := splitDrive(filename) if drive2 != "" { - if strings.ToLower(drive) != strings.ToLower(drive2) { + if !strings.EqualFold(drive, drive2) { // Different drives. There's not much we can // do here, so just ignore the directory. return drive2 + filename @@ -626,7 +818,11 @@ func pathJoin(dirname, filename string) string { // Drives are the same. Ignore drive on filename. } if !(strings.HasSuffix(dirname, "/") || strings.HasSuffix(dirname, `\`)) && dirname != "" { - dirname += `\` + sep := `\` + if strings.HasPrefix(dirname, "/") { + sep = `/` + } + dirname += sep } return drive + dirname + filename } diff --git a/dwarf/line_test.go b/dwarf/line_test.go index 11a2544..e947d99 100644 --- a/dwarf/line_test.go +++ b/dwarf/line_test.go @@ -43,8 +43,47 @@ func TestLineELFGCC(t *testing.T) { {Address: 0x40060f, File: file2C, Line: 6, IsStmt: true}, {Address: 0x400611, EndSequence: true}, } + files := [][]*LineFile{{nil, file1H, file1C}, {nil, file2C}} - testLineTable(t, want, elfData(t, "testdata/line-gcc.elf")) + testLineTable(t, want, files, elfData(t, "testdata/line-gcc.elf")) +} + +func TestLineELFGCCZstd(t *testing.T) { + // Generated by: + // # gcc --version | head -n1 + // gcc (Debian 12.2.0-10) 12.2.0 + // # gcc -g -no-pie -Wl,--compress-debug-sections=zstd line*.c + + zfile1H := &LineFile{Name: "/home/iant/go/src/debug/dwarf/testdata/line1.h"} + zfile1C := &LineFile{Name: "/home/iant/go/src/debug/dwarf/testdata/line1.c"} + zfile2C := &LineFile{Name: "/home/iant/go/src/debug/dwarf/testdata/line2.c"} + + // Line table based on readelf --debug-dump=rawline,decodedline + want := []LineEntry{ + {Address: 0x401126, File: zfile1H, Line: 2, Column: 1, IsStmt: true}, + {Address: 0x40112a, File: zfile1H, Line: 5, Column: 8, IsStmt: true}, + {Address: 0x401131, File: zfile1H, Line: 5, Column: 2, IsStmt: true}, + {Address: 0x401133, File: zfile1H, Line: 6, Column: 10, IsStmt: true, Discriminator: 3}, + {Address: 0x40113d, File: zfile1H, Line: 5, Column: 22, IsStmt: true, Discriminator: 3}, + {Address: 0x401141, File: zfile1H, Line: 5, Column: 15, IsStmt: true, Discriminator: 1}, + {Address: 0x401147, File: zfile1H, Line: 7, Column: 1, IsStmt: true}, + {Address: 0x40114b, File: zfile1C, Line: 6, Column: 1, IsStmt: true}, + {Address: 0x40114f, File: zfile1C, Line: 7, Column: 2, IsStmt: true}, + {Address: 0x401159, File: zfile1C, Line: 8, Column: 2, IsStmt: true}, + {Address: 0x401168, File: zfile1C, Line: 9, Column: 1, IsStmt: true}, + {Address: 0x40116a, EndSequence: true}, + + {Address: 0x40116a, File: zfile2C, Line: 4, Column: 1, IsStmt: true}, + {Address: 0x40116e, File: zfile2C, Line: 5, Column: 2, IsStmt: true}, + {Address: 0x40117d, File: zfile2C, Line: 6, Column: 1, IsStmt: true}, + {Address: 0x401180, EndSequence: true}, + } + files := [][]*LineFile{ + {zfile1C, zfile1H, zfile1C}, + {zfile2C, zfile2C}, + } + + testLineTable(t, want, files, elfData(t, "testdata/line-gcc-zstd.elf")) } func TestLineGCCWindows(t *testing.T) { @@ -83,8 +122,9 @@ func TestLineGCCWindows(t *testing.T) { {Address: 0x401595, File: file2C, Line: 6, IsStmt: true}, {Address: 0x40159b, EndSequence: true}, } + files := [][]*LineFile{{nil, file1H, file1C}, {nil, file2C}} - testLineTable(t, want, peData(t, "testdata/line-gcc-win.bin")) + testLineTable(t, want, files, peData(t, "testdata/line-gcc-win.bin")) } func TestLineELFClang(t *testing.T) { @@ -110,8 +150,32 @@ func TestLineELFClang(t *testing.T) { {Address: 0x4005a7, File: file2C, Line: 6, IsStmt: true}, {Address: 0x4005b0, EndSequence: true}, } + files := [][]*LineFile{{nil, file1C, file1H}, {nil, file2C}} - testLineTable(t, want, elfData(t, "testdata/line-clang.elf")) + testLineTable(t, want, files, elfData(t, "testdata/line-clang.elf")) +} + +func TestLineRnglists(t *testing.T) { + // Test a newer file, generated by clang. + file := &LineFile{Name: "/usr/local/google/home/iant/foo.c"} + want := []LineEntry{ + {Address: 0x401020, File: file, Line: 12, IsStmt: true}, + {Address: 0x401020, File: file, Line: 13, Column: 12, IsStmt: true, PrologueEnd: true}, + {Address: 0x401022, File: file, Line: 13, Column: 7}, + {Address: 0x401024, File: file, Line: 17, Column: 1, IsStmt: true}, + {Address: 0x401027, File: file, Line: 16, Column: 10, IsStmt: true}, + {Address: 0x40102c, EndSequence: true}, + {Address: 0x401000, File: file, Line: 2, IsStmt: true}, + {Address: 0x401000, File: file, Line: 6, Column: 17, IsStmt: true, PrologueEnd: true}, + {Address: 0x401002, File: file, Line: 6, Column: 3}, + {Address: 0x401019, File: file, Line: 9, Column: 3, IsStmt: true}, + {Address: 0x40101a, File: file, Line: 0, Column: 3}, + {Address: 0x40101c, File: file, Line: 9, Column: 3}, + {Address: 0x40101d, EndSequence: true}, + } + files := [][]*LineFile{{file}} + + testLineTable(t, want, files, elfData(t, "testdata/rnglistx.elf")) } func TestLineSeek(t *testing.T) { @@ -190,7 +254,7 @@ func TestLineSeek(t *testing.T) { } } -func testLineTable(t *testing.T, want []LineEntry, d *Data) { +func testLineTable(t *testing.T, want []LineEntry, files [][]*LineFile, d *Data) { // Get line table from d. var got []LineEntry dr := d.Reader() @@ -207,6 +271,12 @@ func testLineTable(t *testing.T, want []LineEntry, d *Data) { continue } + // Ignore system compilation units (this happens in + // the Windows binary). We'll still decode the line + // table, but won't check it. + name := ent.Val(AttrName).(string) + ignore := strings.HasPrefix(name, "C:/crossdev/") || strings.HasPrefix(name, "../../") + // Decode CU's line table. lr, err := d.LineReader(ent) if err != nil { @@ -225,16 +295,27 @@ func testLineTable(t *testing.T, want []LineEntry, d *Data) { t.Fatal("lr.Next:", err) } // Ignore sources from the Windows build environment. - if strings.HasPrefix(line.File.Name, "C:\\crossdev\\") || - strings.HasPrefix(line.File.Name, "C:/crossdev/") { + if ignore { continue } got = append(got, line) } + + // Check file table. + if !ignore { + if !compareFiles(files[0], lr.Files()) { + t.Log("File tables do not match. Got:") + dumpFiles(t, lr.Files()) + t.Log("Want:") + dumpFiles(t, files[0]) + t.Fail() + } + files = files[1:] + } } // Compare line tables. - if !compareLines(got, want) { + if !compareLines(t, got, want) { t.Log("Line tables do not match. Got:") dumpLines(t, got) t.Log("Want:") @@ -243,8 +324,36 @@ func testLineTable(t *testing.T, want []LineEntry, d *Data) { } } -func compareLines(a, b []LineEntry) bool { +func compareFiles(a, b []*LineFile) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] == nil && b[i] == nil { + continue + } + if a[i] != nil && b[i] != nil && a[i].Name == b[i].Name { + continue + } + return false + } + return true +} + +func dumpFiles(t *testing.T, files []*LineFile) { + for i, f := range files { + name := "" + if f != nil { + name = f.Name + } + t.Logf(" %d %s", i, name) + } +} + +func compareLines(t *testing.T, a, b []LineEntry) bool { + t.Helper() if len(a) != len(b) { + t.Errorf("len(a) == %d, len(b) == %d", len(a), len(b)) return false } @@ -257,11 +366,13 @@ func compareLines(a, b []LineEntry) bool { continue } if al.File.Name != bl.File.Name { + t.Errorf("%d: name %v != name %v", i, al.File.Name, bl.File.Name) return false } al.File = nil bl.File = nil if al != bl { + t.Errorf("%d: %#v != %#v", i, al, bl) return false } } @@ -295,6 +406,14 @@ var joinTests = []joinTest{ {`\\host\share\`, `foo\bar`, `\\host\share\foo\bar`}, {`//host/share/`, `foo/bar`, `//host/share/foo/bar`}, + // Note: the Go compiler currently emits DWARF line table paths + // with '/' instead of '\' (see issues #19784, #36495). These + // tests are to cover cases that might come up for Windows Go + // binaries. + {`c:/workdir/go/src/x`, `y.go`, `c:/workdir/go/src/x/y.go`}, + {`d:/some/thing/`, `b.go`, `d:/some/thing/b.go`}, + {`e:\blah\`, `foo.c`, `e:\blah\foo.c`}, + // The following are "best effort". We shouldn't see relative // base directories in DWARF, but these test that pathJoin // doesn't fail miserably if it sees one. @@ -312,3 +431,32 @@ func TestPathJoin(t *testing.T) { } } } + +func TestPathLineReaderMalformed(t *testing.T) { + // This test case drawn from issue #52354. What's happening + // here is that the stmtList attribute in the compilation + // unit is malformed (negative). + var aranges, frame, pubnames, ranges, str []byte + abbrev := []byte{0x10, 0x20, 0x20, 0x20, 0x21, 0x20, 0x10, 0x21, 0x61, + 0x0, 0x0, 0xff, 0x20, 0xff, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20} + info := []byte{0x0, 0x0, 0x0, 0x9, 0x0, 0x2, 0x0, 0x0, 0x0, 0x0, + 0x20, 0x10, 0x10} + line := []byte{0x20} + Data0, err := New(abbrev, aranges, frame, info, line, pubnames, ranges, str) + if err != nil { + t.Fatalf("error unexpected: %v", err) + } + Reader0 := Data0.Reader() + Entry0, err := Reader0.Next() + if err != nil { + t.Fatalf("error unexpected: %v", err) + } + LineReader0, err := Data0.LineReader(Entry0) + if err == nil { + t.Fatalf("expected error") + } + if LineReader0 != nil { + t.Fatalf("expected nil line reader") + } +} diff --git a/dwarf/open.go b/dwarf/open.go index 57344d8..0901341 100644 --- a/dwarf/open.go +++ b/dwarf/open.go @@ -2,12 +2,25 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Package dwarf provides access to DWARF debugging information loaded from -// executable files, as defined in the DWARF 2.0 Standard at -// http://dwarfstd.org/doc/dwarf-2.0.0.pdf +/* +Package dwarf provides access to DWARF debugging information loaded from +executable files, as defined in the DWARF 2.0 Standard at +http://dwarfstd.org/doc/dwarf-2.0.0.pdf. + +# Security + +This package is not designed to be hardened against adversarial inputs, and is +outside the scope of https://go.dev/security/policy. In particular, only basic +validation is done when parsing object files. As such, care should be taken when +parsing untrusted inputs, as parsing malformed files may consume significant +resources, or cause panics. +*/ package dwarf -import "encoding/binary" +import ( + "encoding/binary" + "errors" +) // Data represents the DWARF debugging information // loaded from an executable file (for example, an ELF or Mach-O executable). @@ -22,18 +35,27 @@ type Data struct { ranges []byte str []byte + // New sections added in DWARF 5. + addr []byte + lineStr []byte + strOffsets []byte + rngLists []byte + // parsed data abbrevCache map[uint64]abbrevTable + bigEndian bool order binary.ByteOrder typeCache map[Offset]Type typeSigs map[uint64]*typeUnit unit []unit } -// New returns a new Data object initialized from the given parameters. +var errSegmentSelector = errors.New("non-zero segment_selector size not supported") + +// New returns a new [Data] object initialized from the given parameters. // Rather than calling this function directly, clients should typically use -// the DWARF method of the File type of the appropriate package debug/elf, -// debug/macho, or debug/pe. +// the DWARF method of the File type of the appropriate package [debug/elf], +// [debug/macho], or [debug/pe]. // // The []byte arguments are the data from the corresponding debug section // in the object file; for example, for an ELF object, abbrev is the contents of @@ -72,8 +94,10 @@ func New(abbrev, aranges, frame, info, line, pubnames, ranges, str []byte) (*Dat case x == 0 && y == 0: return nil, DecodeError{"info", 4, "unsupported version 0"} case x == 0: + d.bigEndian = true d.order = binary.BigEndian case y == 0: + d.bigEndian = false d.order = binary.LittleEndian default: return nil, DecodeError{"info", 4, "cannot determine byte order"} @@ -94,3 +118,23 @@ func New(abbrev, aranges, frame, info, line, pubnames, ranges, str []byte) (*Dat func (d *Data) AddTypes(name string, types []byte) error { return d.parseTypes(name, types) } + +// AddSection adds another DWARF section by name. The name should be a +// DWARF section name such as ".debug_addr", ".debug_str_offsets", and +// so forth. This approach is used for new DWARF sections added in +// DWARF 5 and later. +func (d *Data) AddSection(name string, contents []byte) error { + var err error + switch name { + case ".debug_addr": + d.addr = contents + case ".debug_line_str": + d.lineStr = contents + case ".debug_str_offsets": + d.strOffsets = contents + case ".debug_rnglists": + d.rngLists = contents + } + // Just ignore names that we don't yet support. + return err +} diff --git a/dwarf/tag_string.go b/dwarf/tag_string.go index ac396af..b79ea17 100644 --- a/dwarf/tag_string.go +++ b/dwarf/tag_string.go @@ -4,20 +4,95 @@ package dwarf import "strconv" +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[TagArrayType-1] + _ = x[TagClassType-2] + _ = x[TagEntryPoint-3] + _ = x[TagEnumerationType-4] + _ = x[TagFormalParameter-5] + _ = x[TagImportedDeclaration-8] + _ = x[TagLabel-10] + _ = x[TagLexDwarfBlock-11] + _ = x[TagMember-13] + _ = x[TagPointerType-15] + _ = x[TagReferenceType-16] + _ = x[TagCompileUnit-17] + _ = x[TagStringType-18] + _ = x[TagStructType-19] + _ = x[TagSubroutineType-21] + _ = x[TagTypedef-22] + _ = x[TagUnionType-23] + _ = x[TagUnspecifiedParameters-24] + _ = x[TagVariant-25] + _ = x[TagCommonDwarfBlock-26] + _ = x[TagCommonInclusion-27] + _ = x[TagInheritance-28] + _ = x[TagInlinedSubroutine-29] + _ = x[TagModule-30] + _ = x[TagPtrToMemberType-31] + _ = x[TagSetType-32] + _ = x[TagSubrangeType-33] + _ = x[TagWithStmt-34] + _ = x[TagAccessDeclaration-35] + _ = x[TagBaseType-36] + _ = x[TagCatchDwarfBlock-37] + _ = x[TagConstType-38] + _ = x[TagConstant-39] + _ = x[TagEnumerator-40] + _ = x[TagFileType-41] + _ = x[TagFriend-42] + _ = x[TagNamelist-43] + _ = x[TagNamelistItem-44] + _ = x[TagPackedType-45] + _ = x[TagSubprogram-46] + _ = x[TagTemplateTypeParameter-47] + _ = x[TagTemplateValueParameter-48] + _ = x[TagThrownType-49] + _ = x[TagTryDwarfBlock-50] + _ = x[TagVariantPart-51] + _ = x[TagVariable-52] + _ = x[TagVolatileType-53] + _ = x[TagDwarfProcedure-54] + _ = x[TagRestrictType-55] + _ = x[TagInterfaceType-56] + _ = x[TagNamespace-57] + _ = x[TagImportedModule-58] + _ = x[TagUnspecifiedType-59] + _ = x[TagPartialUnit-60] + _ = x[TagImportedUnit-61] + _ = x[TagMutableType-62] + _ = x[TagCondition-63] + _ = x[TagSharedType-64] + _ = x[TagTypeUnit-65] + _ = x[TagRvalueReferenceType-66] + _ = x[TagTemplateAlias-67] + _ = x[TagCoarrayType-68] + _ = x[TagGenericSubrange-69] + _ = x[TagDynamicType-70] + _ = x[TagAtomicType-71] + _ = x[TagCallSite-72] + _ = x[TagCallSiteParameter-73] + _ = x[TagSkeletonUnit-74] + _ = x[TagImmutableType-75] +} + const ( _Tag_name_0 = "ArrayTypeClassTypeEntryPointEnumerationTypeFormalParameter" _Tag_name_1 = "ImportedDeclaration" _Tag_name_2 = "LabelLexDwarfBlock" _Tag_name_3 = "Member" _Tag_name_4 = "PointerTypeReferenceTypeCompileUnitStringTypeStructType" - _Tag_name_5 = "SubroutineTypeTypedefUnionTypeUnspecifiedParametersVariantCommonDwarfBlockCommonInclusionInheritanceInlinedSubroutineModulePtrToMemberTypeSetTypeSubrangeTypeWithStmtAccessDeclarationBaseTypeCatchDwarfBlockConstTypeConstantEnumeratorFileTypeFriendNamelistNamelistItemPackedTypeSubprogramTemplateTypeParameterTemplateValueParameterThrownTypeTryDwarfBlockVariantPartVariableVolatileTypeDwarfProcedureRestrictTypeInterfaceTypeNamespaceImportedModuleUnspecifiedTypePartialUnitImportedUnitMutableTypeConditionSharedTypeTypeUnitRvalueReferenceTypeTemplateAlias" + _Tag_name_5 = "SubroutineTypeTypedefUnionTypeUnspecifiedParametersVariantCommonDwarfBlockCommonInclusionInheritanceInlinedSubroutineModulePtrToMemberTypeSetTypeSubrangeTypeWithStmtAccessDeclarationBaseTypeCatchDwarfBlockConstTypeConstantEnumeratorFileTypeFriendNamelistNamelistItemPackedTypeSubprogramTemplateTypeParameterTemplateValueParameterThrownTypeTryDwarfBlockVariantPartVariableVolatileTypeDwarfProcedureRestrictTypeInterfaceTypeNamespaceImportedModuleUnspecifiedTypePartialUnitImportedUnitMutableTypeConditionSharedTypeTypeUnitRvalueReferenceTypeTemplateAliasCoarrayTypeGenericSubrangeDynamicTypeAtomicTypeCallSiteCallSiteParameterSkeletonUnitImmutableType" ) var ( _Tag_index_0 = [...]uint8{0, 9, 18, 28, 43, 58} _Tag_index_2 = [...]uint8{0, 5, 18} _Tag_index_4 = [...]uint8{0, 11, 24, 35, 45, 55} - _Tag_index_5 = [...]uint16{0, 14, 21, 30, 51, 58, 74, 89, 100, 117, 123, 138, 145, 157, 165, 182, 190, 205, 214, 222, 232, 240, 246, 254, 266, 276, 286, 307, 329, 339, 352, 363, 371, 383, 397, 409, 422, 431, 445, 460, 471, 483, 494, 503, 513, 521, 540, 553} + _Tag_index_5 = [...]uint16{0, 14, 21, 30, 51, 58, 74, 89, 100, 117, 123, 138, 145, 157, 165, 182, 190, 205, 214, 222, 232, 240, 246, 254, 266, 276, 286, 307, 329, 339, 352, 363, 371, 383, 397, 409, 422, 431, 445, 460, 471, 483, 494, 503, 513, 521, 540, 553, 564, 579, 590, 600, 608, 625, 637, 650} ) func (i Tag) String() string { @@ -35,7 +110,7 @@ func (i Tag) String() string { case 15 <= i && i <= 19: i -= 15 return _Tag_name_4[_Tag_index_4[i]:_Tag_index_4[i+1]] - case 21 <= i && i <= 67: + case 21 <= i && i <= 75: i -= 21 return _Tag_name_5[_Tag_index_5[i]:_Tag_index_5[i+1]] default: diff --git a/dwarf/testdata/bitfields.c b/dwarf/testdata/bitfields.c new file mode 100644 index 0000000..0583333 --- /dev/null +++ b/dwarf/testdata/bitfields.c @@ -0,0 +1,17 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Linux ELF: +gcc -gdwarf-4 -m64 -c bitfields.c -o bitfields.elf4 +*/ + +typedef struct another_struct { + unsigned short quix; + int xyz[0]; + unsigned x:1; + long long array[40]; +} t_another_struct; +t_another_struct q2; + diff --git a/dwarf/testdata/bitfields.elf4 b/dwarf/testdata/bitfields.elf4 new file mode 100644 index 0000000..2e06e68 Binary files /dev/null and b/dwarf/testdata/bitfields.elf4 differ diff --git a/dwarf/testdata/cppunsuptypes.cc b/dwarf/testdata/cppunsuptypes.cc new file mode 100644 index 0000000..e9281c7 --- /dev/null +++ b/dwarf/testdata/cppunsuptypes.cc @@ -0,0 +1,34 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// cppunsuptypes.elf built with g++ 7.3 +// g++ -g -c -o cppunsuptypes.elf cppunsuptypes.cc + +int i = 3; +double d = 3; + +// anonymous reference type +int &culprit = i; + +// named reference type +typedef double &dref; +dref dr = d; + +// incorporated into another type +typedef struct { + dref q; + int &r; +} hasrefs; + +hasrefs hr = { d, i }; + +// This code is intended to trigger a DWARF "pointer to member" type DIE +struct CS { int dm; }; + +int foo() +{ + int CS::* pdm = &CS::dm; + CS cs = {42}; + return cs.*pdm; +} diff --git a/dwarf/testdata/cppunsuptypes.elf b/dwarf/testdata/cppunsuptypes.elf new file mode 100644 index 0000000..e955512 Binary files /dev/null and b/dwarf/testdata/cppunsuptypes.elf differ diff --git a/dwarf/testdata/debug_rnglists b/dwarf/testdata/debug_rnglists new file mode 100644 index 0000000..985ec6c Binary files /dev/null and b/dwarf/testdata/debug_rnglists differ diff --git a/dwarf/testdata/line-clang-dwarf5.elf b/dwarf/testdata/line-clang-dwarf5.elf new file mode 100644 index 0000000..7b80c9c Binary files /dev/null and b/dwarf/testdata/line-clang-dwarf5.elf differ diff --git a/dwarf/testdata/line-gcc-dwarf5.elf b/dwarf/testdata/line-gcc-dwarf5.elf new file mode 100644 index 0000000..34ce17c Binary files /dev/null and b/dwarf/testdata/line-gcc-dwarf5.elf differ diff --git a/dwarf/testdata/line-gcc-zstd.elf b/dwarf/testdata/line-gcc-zstd.elf new file mode 100644 index 0000000..45cbe72 Binary files /dev/null and b/dwarf/testdata/line-gcc-zstd.elf differ diff --git a/dwarf/testdata/rnglistx.c b/dwarf/testdata/rnglistx.c new file mode 100644 index 0000000..8770435 --- /dev/null +++ b/dwarf/testdata/rnglistx.c @@ -0,0 +1,19 @@ +// clang -gdwarf-5 -O2 -nostdlib + +__attribute__((noinline, cold)) +static int sum(int i) { + int j, s; + + s = 0; + for (j = 0; j < i; j++) { + s += j * i; + } + return s; +} + +int main(int argc, char** argv) { + if (argc == 0) { + return 0; + } + return sum(argc); +} diff --git a/dwarf/testdata/rnglistx.elf b/dwarf/testdata/rnglistx.elf new file mode 100755 index 0000000..c2d7f55 Binary files /dev/null and b/dwarf/testdata/rnglistx.elf differ diff --git a/dwarf/testdata/typedef.c b/dwarf/testdata/typedef.c index 4780a0b..3e7e008 100644 --- a/dwarf/testdata/typedef.c +++ b/dwarf/testdata/typedef.c @@ -8,6 +8,7 @@ gcc -gdwarf-2 -m64 -c typedef.c && gcc -gdwarf-2 -m64 -o typedef.elf typedef.o OS X Mach-O: gcc -gdwarf-2 -m64 -c typedef.c -o typedef.macho +gcc -gdwarf-4 -m64 -c typedef.c -o typedef.macho4 */ #include diff --git a/dwarf/testdata/typedef.elf5 b/dwarf/testdata/typedef.elf5 new file mode 100644 index 0000000..aec48f6 Binary files /dev/null and b/dwarf/testdata/typedef.elf5 differ diff --git a/dwarf/testdata/typedef.macho4 b/dwarf/testdata/typedef.macho4 new file mode 100644 index 0000000..093ff37 Binary files /dev/null and b/dwarf/testdata/typedef.macho4 differ diff --git a/dwarf/type.go b/dwarf/type.go index 4352092..627d3a1 100644 --- a/dwarf/type.go +++ b/dwarf/type.go @@ -11,7 +11,7 @@ package dwarf import "strconv" // A Type conventionally represents a pointer to any of the -// specific Type structures (CharType, StructType, etc.). +// specific Type structures ([CharType], [StructType], etc.). type Type interface { Common() *CommonType String() string @@ -33,10 +33,14 @@ func (c *CommonType) Size() int64 { return c.ByteSize } // Basic types // A BasicType holds fields common to all basic types. +// +// See the documentation for [StructField] for more info on the interpretation of +// the BitSize/BitOffset/DataBitOffset fields. type BasicType struct { CommonType - BitSize int64 - BitOffset int64 + BitSize int64 + BitOffset int64 + DataBitOffset int64 } func (b *BasicType) Basic() *BasicType { return b } @@ -150,13 +154,86 @@ type StructType struct { } // A StructField represents a field in a struct, union, or C++ class type. +// +// # Bit Fields +// +// The BitSize, BitOffset, and DataBitOffset fields describe the bit +// size and offset of data members declared as bit fields in C/C++ +// struct/union/class types. +// +// BitSize is the number of bits in the bit field. +// +// DataBitOffset, if non-zero, is the number of bits from the start of +// the enclosing entity (e.g. containing struct/class/union) to the +// start of the bit field. This corresponds to the DW_AT_data_bit_offset +// DWARF attribute that was introduced in DWARF 4. +// +// BitOffset, if non-zero, is the number of bits between the most +// significant bit of the storage unit holding the bit field to the +// most significant bit of the bit field. Here "storage unit" is the +// type name before the bit field (for a field "unsigned x:17", the +// storage unit is "unsigned"). BitOffset values can vary depending on +// the endianness of the system. BitOffset corresponds to the +// DW_AT_bit_offset DWARF attribute that was deprecated in DWARF 4 and +// removed in DWARF 5. +// +// At most one of DataBitOffset and BitOffset will be non-zero; +// DataBitOffset/BitOffset will only be non-zero if BitSize is +// non-zero. Whether a C compiler uses one or the other +// will depend on compiler vintage and command line options. +// +// Here is an example of C/C++ bit field use, along with what to +// expect in terms of DWARF bit offset info. Consider this code: +// +// struct S { +// int q; +// int j:5; +// int k:6; +// int m:5; +// int n:8; +// } s; +// +// For the code above, one would expect to see the following for +// DW_AT_bit_offset values (using GCC 8): +// +// Little | Big +// Endian | Endian +// | +// "j": 27 | 0 +// "k": 21 | 5 +// "m": 16 | 11 +// "n": 8 | 16 +// +// Note that in the above the offsets are purely with respect to the +// containing storage unit for j/k/m/n -- these values won't vary based +// on the size of prior data members in the containing struct. +// +// If the compiler emits DW_AT_data_bit_offset, the expected values +// would be: +// +// "j": 32 +// "k": 37 +// "m": 43 +// "n": 48 +// +// Here the value 32 for "j" reflects the fact that the bit field is +// preceded by other data members (recall that DW_AT_data_bit_offset +// values are relative to the start of the containing struct). Hence +// DW_AT_data_bit_offset values can be quite large for structs with +// many fields. +// +// DWARF also allow for the possibility of base types that have +// non-zero bit size and bit offset, so this information is also +// captured for base types, but it is worth noting that it is not +// possible to trigger this behavior using mainstream languages. type StructField struct { - Name string - Type Type - ByteOffset int64 - ByteSize int64 // usually zero; use Type.Size() for normal fields - BitOffset int64 // within the ByteSize bytes at ByteOffset - BitSize int64 // zero if not a bit field + Name string + Type Type + ByteOffset int64 + ByteSize int64 // usually zero; use Type.Size() for normal fields + BitOffset int64 + DataBitOffset int64 + BitSize int64 // zero if not a bit field } func (t *StructType) String() string { @@ -166,6 +243,13 @@ func (t *StructType) String() string { return t.Defn() } +func (f *StructField) bitOffset() int64 { + if f.BitOffset != 0 { + return f.BitOffset + } + return f.DataBitOffset +} + func (t *StructType) Defn() string { s := t.Kind if t.StructName != "" { @@ -184,7 +268,7 @@ func (t *StructType) Defn() string { s += "@" + strconv.FormatInt(f.ByteOffset, 10) if f.BitSize > 0 { s += " : " + strconv.FormatInt(f.BitSize, 10) - s += "@" + strconv.FormatInt(f.BitOffset, 10) + s += "@" + strconv.FormatInt(f.bitOffset(), 10) } } s += "}" @@ -193,7 +277,7 @@ func (t *StructType) Defn() string { // An EnumType represents an enumerated type. // The only indication of its native integer type is its ByteSize -// (inside CommonType). +// (inside [CommonType]). type EnumType struct { CommonType EnumName string @@ -261,6 +345,20 @@ func (t *TypedefType) String() string { return t.Name } func (t *TypedefType) Size() int64 { return t.Type.Size() } +// An UnsupportedType is a placeholder returned in situations where we +// encounter a type that isn't supported. +type UnsupportedType struct { + CommonType + Tag Tag +} + +func (t *UnsupportedType) String() string { + if t.Name != "" { + return t.Name + } + return t.Name + "(unsupported type " + t.Tag.String() + ")" +} + // typeReader is used to read from either the info section or the // types section. type typeReader interface { @@ -273,16 +371,40 @@ type typeReader interface { AddressSize() int } -// Type reads the type at off in the DWARF ``info'' section. +// Type reads the type at off in the DWARF “info” section. func (d *Data) Type(off Offset) (Type, error) { return d.readType("info", d.Reader(), off, d.typeCache, nil) } +type typeFixer struct { + typedefs []*TypedefType + arraytypes []*Type +} + +func (tf *typeFixer) recordArrayType(t *Type) { + if t == nil { + return + } + _, ok := (*t).(*ArrayType) + if ok { + tf.arraytypes = append(tf.arraytypes, t) + } +} + +func (tf *typeFixer) apply() { + for _, t := range tf.typedefs { + t.Common().ByteSize = t.Type.Size() + } + for _, t := range tf.arraytypes { + zeroArray(t) + } +} + // readType reads a type from r at off of name. It adds types to the // type cache, appends new typedef types to typedefs, and computes the // sizes of types. Callers should pass nil for typedefs; this is used // for internal recursion. -func (d *Data) readType(name string, r typeReader, off Offset, typeCache map[Offset]Type, typedefs *[]*TypedefType) (Type, error) { +func (d *Data) readType(name string, r typeReader, off Offset, typeCache map[Offset]Type, fixups *typeFixer) (Type, error) { if t, ok := typeCache[off]; ok { return t, nil } @@ -297,18 +419,16 @@ func (d *Data) readType(name string, r typeReader, off Offset, typeCache map[Off } // If this is the root of the recursion, prepare to resolve - // typedef sizes once the recursion is done. This must be done - // after the type graph is constructed because it may need to - // resolve cycles in a different order than readType - // encounters them. - if typedefs == nil { - var typedefList []*TypedefType + // typedef sizes and perform other fixups once the recursion is + // done. This must be done after the type graph is constructed + // because it may need to resolve cycles in a different order than + // readType encounters them. + if fixups == nil { + var fixer typeFixer defer func() { - for _, t := range typedefList { - t.Common().ByteSize = t.Type.Size() - } + fixer.apply() }() - typedefs = &typedefList + fixups = &fixer } // Parse type from Entry. @@ -362,7 +482,7 @@ func (d *Data) readType(name string, r typeReader, off Offset, typeCache map[Off var t Type switch toff := tval.(type) { case Offset: - if t, err = d.readType(name, r.clone(), toff, typeCache, typedefs); err != nil { + if t, err = d.readType(name, r.clone(), toff, typeCache, fixups); err != nil { return nil } case uint64: @@ -433,8 +553,12 @@ func (d *Data) readType(name string, r typeReader, off Offset, typeCache map[Off // AttrName: name of base type in programming language of the compilation unit [required] // AttrEncoding: encoding value for type (encFloat etc) [required] // AttrByteSize: size of type in bytes [required] - // AttrBitOffset: for sub-byte types, size in bits - // AttrBitSize: for sub-byte types, bit offset of high order bit in the AttrByteSize bytes + // AttrBitOffset: bit offset of value within containing storage unit + // AttrDataBitOffset: bit offset of value within containing storage unit + // AttrBitSize: size in bits + // + // For most languages BitOffset/DataBitOffset/BitSize will not be present + // for base types. name, _ := e.Val(AttrName).(string) enc, ok := e.Val(AttrEncoding).(int64) if !ok { @@ -480,7 +604,14 @@ func (d *Data) readType(name string, r typeReader, off Offset, typeCache map[Off }).Basic() t.Name = name t.BitSize, _ = e.Val(AttrBitSize).(int64) - t.BitOffset, _ = e.Val(AttrBitOffset).(int64) + haveBitOffset := false + haveDataBitOffset := false + t.BitOffset, haveBitOffset = e.Val(AttrBitOffset).(int64) + t.DataBitOffset, haveDataBitOffset = e.Val(AttrDataBitOffset).(int64) + if haveBitOffset && haveDataBitOffset { + err = DecodeError{name, e.Offset, "duplicate bit offset attributes"} + goto Error + } case TagClassType, TagStructType, TagUnionType: // Structure, union, or class type. (DWARF v2 §5.5) @@ -494,6 +625,7 @@ func (d *Data) readType(name string, r typeReader, off Offset, typeCache map[Off // AttrType: type of member [required] // AttrByteSize: size in bytes // AttrBitOffset: bit offset within bytes for bit fields + // AttrDataBitOffset: field bit offset relative to struct start // AttrBitSize: bit size for bit fields // AttrDataMemberLoc: location within struct [required for struct, class] // There is much more to handle C++, all ignored for now. @@ -512,7 +644,8 @@ func (d *Data) readType(name string, r typeReader, off Offset, typeCache map[Off t.Incomplete = e.Val(AttrDeclaration) != nil t.Field = make([]*StructField, 0, 8) var lastFieldType *Type - var lastFieldBitOffset int64 + var lastFieldBitSize int64 + var lastFieldByteOffset int64 for kid := next(); kid != nil; kid = next() { if kid.Tag != TagMember { continue @@ -539,30 +672,33 @@ func (d *Data) readType(name string, r typeReader, off Offset, typeCache map[Off f.ByteOffset = loc } - haveBitOffset := false f.Name, _ = kid.Val(AttrName).(string) f.ByteSize, _ = kid.Val(AttrByteSize).(int64) + haveBitOffset := false + haveDataBitOffset := false f.BitOffset, haveBitOffset = kid.Val(AttrBitOffset).(int64) + f.DataBitOffset, haveDataBitOffset = kid.Val(AttrDataBitOffset).(int64) + if haveBitOffset && haveDataBitOffset { + err = DecodeError{name, e.Offset, "duplicate bit offset attributes"} + goto Error + } f.BitSize, _ = kid.Val(AttrBitSize).(int64) t.Field = append(t.Field, f) - bito := f.BitOffset - if !haveBitOffset { - bito = f.ByteOffset * 8 - } - if bito == lastFieldBitOffset && t.Kind != "union" { + if lastFieldBitSize == 0 && lastFieldByteOffset == f.ByteOffset && t.Kind != "union" { // Last field was zero width. Fix array length. // (DWARF writes out 0-length arrays as if they were 1-length arrays.) - zeroArray(lastFieldType) + fixups.recordArrayType(lastFieldType) } lastFieldType = &f.Type - lastFieldBitOffset = bito + lastFieldByteOffset = f.ByteOffset + lastFieldBitSize = f.BitSize } if t.Kind != "union" { b, ok := e.Val(AttrByteSize).(int64) - if ok && b*8 == lastFieldBitOffset { + if ok && b == lastFieldByteOffset { // Final field must be zero width. Fix array length. - zeroArray(lastFieldType) + fixups.recordArrayType(lastFieldType) } } @@ -680,6 +816,16 @@ func (d *Data) readType(name string, r typeReader, off Offset, typeCache map[Off typ = t typeCache[off] = t t.Name, _ = e.Val(AttrName).(string) + + default: + // This is some other type DIE that we're currently not + // equipped to handle. Return an abstract "unsupported type" + // object in such cases. + t := new(UnsupportedType) + typ = t + typeCache[off] = t + t.Tag = e.Tag + t.Name, _ = e.Val(AttrName).(string) } if err != nil { @@ -695,7 +841,7 @@ func (d *Data) readType(name string, r typeReader, off Offset, typeCache map[Off // Record that we need to resolve this // type's size once the type graph is // constructed. - *typedefs = append(*typedefs, t) + fixups.typedefs = append(fixups.typedefs, t) case *PtrType: b = int64(addressSize) } @@ -713,11 +859,8 @@ Error: } func zeroArray(t *Type) { - if t == nil { - return - } - at, ok := (*t).(*ArrayType) - if !ok || at.Type.Size() == 0 { + at := (*t).(*ArrayType) + if at.Type.Size() == 0 { return } // Make a copy to avoid invalidating typeCache. diff --git a/dwarf/type_test.go b/dwarf/type_test.go index 6c06731..5858ef5 100644 --- a/dwarf/type_test.go +++ b/dwarf/type_test.go @@ -9,6 +9,8 @@ import ( "debug/elf" "debug/macho" "debug/pe" + "fmt" + "strconv" "testing" ) @@ -81,15 +83,19 @@ func peData(t *testing.T, name string) *Data { return d } -func TestTypedefsELF(t *testing.T) { testTypedefs(t, elfData(t, "testdata/typedef.elf"), "elf") } +func TestTypedefsELF(t *testing.T) { + testTypedefs(t, elfData(t, "testdata/typedef.elf"), "elf", typedefTests) +} func TestTypedefsMachO(t *testing.T) { - testTypedefs(t, machoData(t, "testdata/typedef.macho"), "macho") + testTypedefs(t, machoData(t, "testdata/typedef.macho"), "macho", typedefTests) } -func TestTypedefsELFDwarf4(t *testing.T) { testTypedefs(t, elfData(t, "testdata/typedef.elf4"), "elf") } +func TestTypedefsELFDwarf4(t *testing.T) { + testTypedefs(t, elfData(t, "testdata/typedef.elf4"), "elf", typedefTests) +} -func testTypedefs(t *testing.T, d *Data, kind string) { +func testTypedefs(t *testing.T, d *Data, kind string, testcases map[string]string) { r := d.Reader() seen := make(map[string]bool) for { @@ -113,7 +119,7 @@ func testTypedefs(t *testing.T, d *Data, kind string) { typstr = t1.Type.String() } - if want, ok := typedefTests[t1.Name]; ok { + if want, ok := testcases[t1.Name]; ok { if seen[t1.Name] { t.Errorf("multiple definitions for %s", t1.Name) } @@ -128,7 +134,7 @@ func testTypedefs(t *testing.T, d *Data, kind string) { } } - for k := range typedefTests { + for k := range testcases { if !seen[k] { t.Errorf("missing %s", k) } @@ -168,3 +174,162 @@ func TestTypedefCycle(t *testing.T) { } } } + +var unsupportedTypeTests = []string{ + // varname:typename:string:size + "culprit::(unsupported type ReferenceType):8", + "pdm::(unsupported type PtrToMemberType):-1", +} + +func TestUnsupportedTypes(t *testing.T) { + // Issue 29601: + // When reading DWARF from C++ load modules, we can encounter + // oddball type DIEs. These will be returned as "UnsupportedType" + // objects; check to make sure this works properly. + d := elfData(t, "testdata/cppunsuptypes.elf") + r := d.Reader() + seen := make(map[string]bool) + for { + e, err := r.Next() + if err != nil { + t.Fatal("r.Next:", err) + } + if e == nil { + break + } + if e.Tag == TagVariable { + vname, _ := e.Val(AttrName).(string) + tAttr := e.Val(AttrType) + typOff, ok := tAttr.(Offset) + if !ok { + t.Errorf("variable at offset %v has no type", e.Offset) + continue + } + typ, err := d.Type(typOff) + if err != nil { + t.Errorf("err in type decode: %v\n", err) + continue + } + unsup, isok := typ.(*UnsupportedType) + if !isok { + continue + } + tag := vname + ":" + unsup.Name + ":" + unsup.String() + + ":" + strconv.FormatInt(unsup.Size(), 10) + seen[tag] = true + } + } + dumpseen := false + for _, v := range unsupportedTypeTests { + if !seen[v] { + t.Errorf("missing %s", v) + dumpseen = true + } + } + if dumpseen { + for k := range seen { + fmt.Printf("seen: %s\n", k) + } + } +} + +var expectedBitOffsets1 = map[string]string{ + "x": "S:1 DBO:32", + "y": "S:4 DBO:33", +} + +var expectedBitOffsets2 = map[string]string{ + "x": "S:1 BO:7", + "y": "S:4 BO:27", +} + +func TestBitOffsetsELF(t *testing.T) { + f := "testdata/typedef.elf" + testBitOffsets(t, elfData(t, f), f, expectedBitOffsets2) +} + +func TestBitOffsetsMachO(t *testing.T) { + f := "testdata/typedef.macho" + testBitOffsets(t, machoData(t, f), f, expectedBitOffsets2) +} + +func TestBitOffsetsMachO4(t *testing.T) { + f := "testdata/typedef.macho4" + testBitOffsets(t, machoData(t, f), f, expectedBitOffsets1) +} + +func TestBitOffsetsELFDwarf4(t *testing.T) { + f := "testdata/typedef.elf4" + testBitOffsets(t, elfData(t, f), f, expectedBitOffsets1) +} + +func TestBitOffsetsELFDwarf5(t *testing.T) { + f := "testdata/typedef.elf5" + testBitOffsets(t, elfData(t, f), f, expectedBitOffsets1) +} + +func testBitOffsets(t *testing.T, d *Data, tag string, expectedBitOffsets map[string]string) { + r := d.Reader() + for { + e, err := r.Next() + if err != nil { + t.Fatal("r.Next:", err) + } + if e == nil { + break + } + + if e.Tag == TagStructType { + typ, err := d.Type(e.Offset) + if err != nil { + t.Fatal("d.Type:", err) + } + + t1 := typ.(*StructType) + + bitInfoDump := func(f *StructField) string { + res := fmt.Sprintf("S:%d", f.BitSize) + if f.BitOffset != 0 { + res += fmt.Sprintf(" BO:%d", f.BitOffset) + } + if f.DataBitOffset != 0 { + res += fmt.Sprintf(" DBO:%d", f.DataBitOffset) + } + return res + } + + for _, field := range t1.Field { + // We're only testing for bitfields + if field.BitSize == 0 { + continue + } + got := bitInfoDump(field) + want := expectedBitOffsets[field.Name] + if got != want { + t.Errorf("%s: field %s in %s: got info %q want %q", tag, field.Name, t1.StructName, got, want) + } + } + } + if e.Tag != TagCompileUnit { + r.SkipChildren() + } + } +} + +var bitfieldTests = map[string]string{ + "t_another_struct": "struct another_struct {quix short unsigned int@0; xyz [0]int@4; x unsigned int@4 : 1@31; array [40]long long int@8}", +} + +// TestBitFieldZeroArrayIssue50685 checks to make sure that the DWARF +// type reading code doesn't get confused by the presence of a +// specifically-sized bitfield member immediately following a field +// whose type is a zero-length array. Prior to the fix for issue +// 50685, we would get this type for the case in testdata/bitfields.c: +// +// another_struct {quix short unsigned int@0; xyz [-1]int@4; x unsigned int@4 : 1@31; array [40]long long int@8} +// +// Note the "-1" for the xyz field, which should be zero. +func TestBitFieldZeroArrayIssue50685(t *testing.T) { + f := "testdata/bitfields.elf4" + testTypedefs(t, elfData(t, f), "elf", bitfieldTests) +} diff --git a/dwarf/typeunit.go b/dwarf/typeunit.go index 76b357c..8ecf876 100644 --- a/dwarf/typeunit.go +++ b/dwarf/typeunit.go @@ -129,7 +129,7 @@ func (tur *typeUnitReader) AddressSize() int { return tur.tu.unit.asize } -// Next reads the next Entry from the type unit. +// Next reads the next [Entry] from the type unit. func (tur *typeUnitReader) Next() (*Entry, error) { if tur.err != nil { return nil, tur.err @@ -137,7 +137,7 @@ func (tur *typeUnitReader) Next() (*Entry, error) { if len(tur.tu.data) == 0 { return nil, nil } - e := tur.b.entry(tur.tu.atable, tur.tu.base) + e := tur.b.entry(nil, tur.tu.atable, tur.tu.base, tur.tu.vers) if tur.b.err != nil { tur.err = tur.b.err return nil, tur.err diff --git a/dwarf/unit.go b/dwarf/unit.go index 98024ca..8b810d0 100644 --- a/dwarf/unit.go +++ b/dwarf/unit.go @@ -19,7 +19,8 @@ type unit struct { atable abbrevTable asize int vers int - is64 bool // True for 64-bit DWARF format + utype uint8 // DWARF 5 unit type + is64 bool // True for 64-bit DWARF format } // Implement the dataFormat interface. @@ -47,7 +48,9 @@ func (d *Data) parseUnits() ([]unit, error) { break } b.skip(int(len)) - nunit++ + if len > 0 { + nunit++ + } } if b.err != nil { return nil, b.err @@ -60,14 +63,23 @@ func (d *Data) parseUnits() ([]unit, error) { u := &units[i] u.base = b.off var n Offset - n, u.is64 = b.unitLength() + if b.err != nil { + return nil, b.err + } + for n == 0 { + n, u.is64 = b.unitLength() + } dataOff := b.off vers := b.uint16() - if vers != 2 && vers != 3 && vers != 4 { + if vers < 2 || vers > 5 { b.error("unsupported DWARF version " + strconv.Itoa(int(vers))) break } u.vers = int(vers) + if vers >= 5 { + u.utype = b.uint8() + u.asize = int(b.uint8()) + } var abbrevOff uint64 if u.is64 { abbrevOff = b.uint64() @@ -82,7 +94,22 @@ func (d *Data) parseUnits() ([]unit, error) { break } u.atable = atable - u.asize = int(b.uint8()) + if vers < 5 { + u.asize = int(b.uint8()) + } + + switch u.utype { + case utSkeleton, utSplitCompile: + b.uint64() // unit ID + case utType, utSplitType: + b.uint64() // type signature + if u.is64 { // type offset + b.uint64() + } else { + b.uint32() + } + } + u.off = b.off u.data = b.bytes(int(n - (b.off - dataOff))) } diff --git a/elf/elf.go b/elf/elf.go index 18324a3..e783677 100644 --- a/elf/elf.go +++ b/elf/elf.go @@ -13,7 +13,7 @@ * $FreeBSD: src/sys/sparc64/include/elf.h,v 1.12 2003/09/25 01:10:26 peter Exp $ * "System V ABI" (http://www.sco.com/developers/gabi/latest/ch4.eheader.html) * "ELF for the ARM® 64-bit Architecture (AArch64)" (ARM IHI 0056B) - * "RISC-V ELF psABI specification" (https://github.com/riscv/riscv-elf-psabi-doc/blob/master/riscv-elf.md) + * "RISC-V ELF psABI specification" (https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc) * llvm/BinaryFormat/ELF.h - ELF constants and structures * * Copyright (c) 1996-1998 John D. Polstra. All rights reserved. @@ -123,8 +123,8 @@ const ( ELFOSABI_NONE OSABI = 0 /* UNIX System V ABI */ ELFOSABI_HPUX OSABI = 1 /* HP-UX operating system */ ELFOSABI_NETBSD OSABI = 2 /* NetBSD */ - ELFOSABI_LINUX OSABI = 3 /* GNU/Linux */ - ELFOSABI_HURD OSABI = 4 /* GNU/Hurd */ + ELFOSABI_LINUX OSABI = 3 /* Linux */ + ELFOSABI_HURD OSABI = 4 /* Hurd */ ELFOSABI_86OPEN OSABI = 5 /* 86Open common IA32 ABI */ ELFOSABI_SOLARIS OSABI = 6 /* Solaris */ ELFOSABI_AIX OSABI = 7 /* AIX */ @@ -384,6 +384,7 @@ const ( EM_RISCV Machine = 243 /* RISC-V */ EM_LANAI Machine = 244 /* Lanai 32-bit processor */ EM_BPF Machine = 247 /* Linux BPF – in-kernel virtual machine */ + EM_LOONGARCH Machine = 258 /* LoongArch */ /* Non-standard or deprecated. */ EM_486 Machine = 6 /* Intel i486. */ @@ -575,6 +576,7 @@ var machineStrings = []intName{ {243, "EM_RISCV"}, {244, "EM_LANAI"}, {247, "EM_BPF"}, + {258, "EM_LOONGARCH"}, /* Non-standard or deprecated. */ {6, "EM_486"}, @@ -644,6 +646,7 @@ const ( SHT_GNU_VERSYM SectionType = 0x6fffffff /* GNU version symbol table */ SHT_HIOS SectionType = 0x6fffffff /* Last of OS specific semantics */ SHT_LOPROC SectionType = 0x70000000 /* reserved range for processor */ + SHT_MIPS_ABIFLAGS SectionType = 0x7000002a /* .MIPS.abiflags */ SHT_HIPROC SectionType = 0x7fffffff /* specific section header types */ SHT_LOUSER SectionType = 0x80000000 /* reserved range for application */ SHT_HIUSER SectionType = 0xffffffff /* specific indexes */ @@ -675,6 +678,7 @@ var shtStrings = []intName{ {0x6ffffffe, "SHT_GNU_VERNEED"}, {0x6fffffff, "SHT_GNU_VERSYM"}, {0x70000000, "SHT_LOPROC"}, + {0x7000002a, "SHT_MIPS_ABIFLAGS"}, {0x7fffffff, "SHT_HIPROC"}, {0x80000000, "SHT_LOUSER"}, {0xffffffff, "SHT_HIUSER"}, @@ -724,6 +728,7 @@ type CompressionType int const ( COMPRESS_ZLIB CompressionType = 1 /* ZLIB compression. */ + COMPRESS_ZSTD CompressionType = 2 /* ZSTD compression. */ COMPRESS_LOOS CompressionType = 0x60000000 /* First OS-specific. */ COMPRESS_HIOS CompressionType = 0x6fffffff /* Last OS-specific. */ COMPRESS_LOPROC CompressionType = 0x70000000 /* First processor-specific type. */ @@ -731,7 +736,8 @@ const ( ) var compressionStrings = []intName{ - {0, "COMPRESS_ZLIB"}, + {1, "COMPRESS_ZLIB"}, + {2, "COMPRESS_ZSTD"}, {0x60000000, "COMPRESS_LOOS"}, {0x6fffffff, "COMPRESS_HIOS"}, {0x70000000, "COMPRESS_LOPROC"}, @@ -745,18 +751,51 @@ func (i CompressionType) GoString() string { return stringName(uint32(i), compre type ProgType int const ( - PT_NULL ProgType = 0 /* Unused entry. */ - PT_LOAD ProgType = 1 /* Loadable segment. */ - PT_DYNAMIC ProgType = 2 /* Dynamic linking information segment. */ - PT_INTERP ProgType = 3 /* Pathname of interpreter. */ - PT_NOTE ProgType = 4 /* Auxiliary information. */ - PT_SHLIB ProgType = 5 /* Reserved (not used). */ - PT_PHDR ProgType = 6 /* Location of program header itself. */ - PT_TLS ProgType = 7 /* Thread local storage segment */ - PT_LOOS ProgType = 0x60000000 /* First OS-specific. */ - PT_HIOS ProgType = 0x6fffffff /* Last OS-specific. */ - PT_LOPROC ProgType = 0x70000000 /* First processor-specific type. */ - PT_HIPROC ProgType = 0x7fffffff /* Last processor-specific type. */ + PT_NULL ProgType = 0 /* Unused entry. */ + PT_LOAD ProgType = 1 /* Loadable segment. */ + PT_DYNAMIC ProgType = 2 /* Dynamic linking information segment. */ + PT_INTERP ProgType = 3 /* Pathname of interpreter. */ + PT_NOTE ProgType = 4 /* Auxiliary information. */ + PT_SHLIB ProgType = 5 /* Reserved (not used). */ + PT_PHDR ProgType = 6 /* Location of program header itself. */ + PT_TLS ProgType = 7 /* Thread local storage segment */ + + PT_LOOS ProgType = 0x60000000 /* First OS-specific. */ + + PT_GNU_EH_FRAME ProgType = 0x6474e550 /* Frame unwind information */ + PT_GNU_STACK ProgType = 0x6474e551 /* Stack flags */ + PT_GNU_RELRO ProgType = 0x6474e552 /* Read only after relocs */ + PT_GNU_PROPERTY ProgType = 0x6474e553 /* GNU property */ + PT_GNU_MBIND_LO ProgType = 0x6474e555 /* Mbind segments start */ + PT_GNU_MBIND_HI ProgType = 0x6474f554 /* Mbind segments finish */ + + PT_PAX_FLAGS ProgType = 0x65041580 /* PAX flags */ + + PT_OPENBSD_RANDOMIZE ProgType = 0x65a3dbe6 /* Random data */ + PT_OPENBSD_WXNEEDED ProgType = 0x65a3dbe7 /* W^X violations */ + PT_OPENBSD_BOOTDATA ProgType = 0x65a41be6 /* Boot arguments */ + + PT_SUNW_EH_FRAME ProgType = 0x6474e550 /* Frame unwind information */ + PT_SUNWSTACK ProgType = 0x6ffffffb /* Stack segment */ + + PT_HIOS ProgType = 0x6fffffff /* Last OS-specific. */ + + PT_LOPROC ProgType = 0x70000000 /* First processor-specific type. */ + + PT_ARM_ARCHEXT ProgType = 0x70000000 /* Architecture compatibility */ + PT_ARM_EXIDX ProgType = 0x70000001 /* Exception unwind tables */ + + PT_AARCH64_ARCHEXT ProgType = 0x70000000 /* Architecture compatibility */ + PT_AARCH64_UNWIND ProgType = 0x70000001 /* Exception unwind tables */ + + PT_MIPS_REGINFO ProgType = 0x70000000 /* Register usage */ + PT_MIPS_RTPROC ProgType = 0x70000001 /* Runtime procedures */ + PT_MIPS_OPTIONS ProgType = 0x70000002 /* Options */ + PT_MIPS_ABIFLAGS ProgType = 0x70000003 /* ABI flags */ + + PT_S390_PGSTE ProgType = 0x70000000 /* 4k page table size */ + + PT_HIPROC ProgType = 0x7fffffff /* Last processor-specific type. */ ) var ptStrings = []intName{ @@ -769,8 +808,19 @@ var ptStrings = []intName{ {6, "PT_PHDR"}, {7, "PT_TLS"}, {0x60000000, "PT_LOOS"}, + {0x6474e550, "PT_GNU_EH_FRAME"}, + {0x6474e551, "PT_GNU_STACK"}, + {0x6474e552, "PT_GNU_RELRO"}, + {0x6474e553, "PT_GNU_PROPERTY"}, + {0x65041580, "PT_PAX_FLAGS"}, + {0x65a3dbe6, "PT_OPENBSD_RANDOMIZE"}, + {0x65a3dbe7, "PT_OPENBSD_WXNEEDED"}, + {0x65a41be6, "PT_OPENBSD_BOOTDATA"}, + {0x6ffffffb, "PT_SUNWSTACK"}, {0x6fffffff, "PT_HIOS"}, {0x70000000, "PT_LOPROC"}, + // We don't list the processor-dependent ProgTypes, + // as the values overlap. {0x7fffffff, "PT_HIPROC"}, } @@ -837,15 +887,114 @@ const ( the interpretation of the d_un union as follows: even == 'd_ptr', even == 'd_val' or none */ - DT_PREINIT_ARRAY DynTag = 32 /* Address of the array of pointers to pre-initialization functions. */ - DT_PREINIT_ARRAYSZ DynTag = 33 /* Size in bytes of the array of pre-initialization functions. */ - DT_LOOS DynTag = 0x6000000d /* First OS-specific */ - DT_HIOS DynTag = 0x6ffff000 /* Last OS-specific */ - DT_VERSYM DynTag = 0x6ffffff0 - DT_VERNEED DynTag = 0x6ffffffe - DT_VERNEEDNUM DynTag = 0x6fffffff - DT_LOPROC DynTag = 0x70000000 /* First processor-specific type. */ - DT_HIPROC DynTag = 0x7fffffff /* Last processor-specific type. */ + DT_PREINIT_ARRAY DynTag = 32 /* Address of the array of pointers to pre-initialization functions. */ + DT_PREINIT_ARRAYSZ DynTag = 33 /* Size in bytes of the array of pre-initialization functions. */ + DT_SYMTAB_SHNDX DynTag = 34 /* Address of SHT_SYMTAB_SHNDX section. */ + + DT_LOOS DynTag = 0x6000000d /* First OS-specific */ + DT_HIOS DynTag = 0x6ffff000 /* Last OS-specific */ + + DT_VALRNGLO DynTag = 0x6ffffd00 + DT_GNU_PRELINKED DynTag = 0x6ffffdf5 + DT_GNU_CONFLICTSZ DynTag = 0x6ffffdf6 + DT_GNU_LIBLISTSZ DynTag = 0x6ffffdf7 + DT_CHECKSUM DynTag = 0x6ffffdf8 + DT_PLTPADSZ DynTag = 0x6ffffdf9 + DT_MOVEENT DynTag = 0x6ffffdfa + DT_MOVESZ DynTag = 0x6ffffdfb + DT_FEATURE DynTag = 0x6ffffdfc + DT_POSFLAG_1 DynTag = 0x6ffffdfd + DT_SYMINSZ DynTag = 0x6ffffdfe + DT_SYMINENT DynTag = 0x6ffffdff + DT_VALRNGHI DynTag = 0x6ffffdff + + DT_ADDRRNGLO DynTag = 0x6ffffe00 + DT_GNU_HASH DynTag = 0x6ffffef5 + DT_TLSDESC_PLT DynTag = 0x6ffffef6 + DT_TLSDESC_GOT DynTag = 0x6ffffef7 + DT_GNU_CONFLICT DynTag = 0x6ffffef8 + DT_GNU_LIBLIST DynTag = 0x6ffffef9 + DT_CONFIG DynTag = 0x6ffffefa + DT_DEPAUDIT DynTag = 0x6ffffefb + DT_AUDIT DynTag = 0x6ffffefc + DT_PLTPAD DynTag = 0x6ffffefd + DT_MOVETAB DynTag = 0x6ffffefe + DT_SYMINFO DynTag = 0x6ffffeff + DT_ADDRRNGHI DynTag = 0x6ffffeff + + DT_VERSYM DynTag = 0x6ffffff0 + DT_RELACOUNT DynTag = 0x6ffffff9 + DT_RELCOUNT DynTag = 0x6ffffffa + DT_FLAGS_1 DynTag = 0x6ffffffb + DT_VERDEF DynTag = 0x6ffffffc + DT_VERDEFNUM DynTag = 0x6ffffffd + DT_VERNEED DynTag = 0x6ffffffe + DT_VERNEEDNUM DynTag = 0x6fffffff + + DT_LOPROC DynTag = 0x70000000 /* First processor-specific type. */ + + DT_MIPS_RLD_VERSION DynTag = 0x70000001 + DT_MIPS_TIME_STAMP DynTag = 0x70000002 + DT_MIPS_ICHECKSUM DynTag = 0x70000003 + DT_MIPS_IVERSION DynTag = 0x70000004 + DT_MIPS_FLAGS DynTag = 0x70000005 + DT_MIPS_BASE_ADDRESS DynTag = 0x70000006 + DT_MIPS_MSYM DynTag = 0x70000007 + DT_MIPS_CONFLICT DynTag = 0x70000008 + DT_MIPS_LIBLIST DynTag = 0x70000009 + DT_MIPS_LOCAL_GOTNO DynTag = 0x7000000a + DT_MIPS_CONFLICTNO DynTag = 0x7000000b + DT_MIPS_LIBLISTNO DynTag = 0x70000010 + DT_MIPS_SYMTABNO DynTag = 0x70000011 + DT_MIPS_UNREFEXTNO DynTag = 0x70000012 + DT_MIPS_GOTSYM DynTag = 0x70000013 + DT_MIPS_HIPAGENO DynTag = 0x70000014 + DT_MIPS_RLD_MAP DynTag = 0x70000016 + DT_MIPS_DELTA_CLASS DynTag = 0x70000017 + DT_MIPS_DELTA_CLASS_NO DynTag = 0x70000018 + DT_MIPS_DELTA_INSTANCE DynTag = 0x70000019 + DT_MIPS_DELTA_INSTANCE_NO DynTag = 0x7000001a + DT_MIPS_DELTA_RELOC DynTag = 0x7000001b + DT_MIPS_DELTA_RELOC_NO DynTag = 0x7000001c + DT_MIPS_DELTA_SYM DynTag = 0x7000001d + DT_MIPS_DELTA_SYM_NO DynTag = 0x7000001e + DT_MIPS_DELTA_CLASSSYM DynTag = 0x70000020 + DT_MIPS_DELTA_CLASSSYM_NO DynTag = 0x70000021 + DT_MIPS_CXX_FLAGS DynTag = 0x70000022 + DT_MIPS_PIXIE_INIT DynTag = 0x70000023 + DT_MIPS_SYMBOL_LIB DynTag = 0x70000024 + DT_MIPS_LOCALPAGE_GOTIDX DynTag = 0x70000025 + DT_MIPS_LOCAL_GOTIDX DynTag = 0x70000026 + DT_MIPS_HIDDEN_GOTIDX DynTag = 0x70000027 + DT_MIPS_PROTECTED_GOTIDX DynTag = 0x70000028 + DT_MIPS_OPTIONS DynTag = 0x70000029 + DT_MIPS_INTERFACE DynTag = 0x7000002a + DT_MIPS_DYNSTR_ALIGN DynTag = 0x7000002b + DT_MIPS_INTERFACE_SIZE DynTag = 0x7000002c + DT_MIPS_RLD_TEXT_RESOLVE_ADDR DynTag = 0x7000002d + DT_MIPS_PERF_SUFFIX DynTag = 0x7000002e + DT_MIPS_COMPACT_SIZE DynTag = 0x7000002f + DT_MIPS_GP_VALUE DynTag = 0x70000030 + DT_MIPS_AUX_DYNAMIC DynTag = 0x70000031 + DT_MIPS_PLTGOT DynTag = 0x70000032 + DT_MIPS_RWPLT DynTag = 0x70000034 + DT_MIPS_RLD_MAP_REL DynTag = 0x70000035 + + DT_PPC_GOT DynTag = 0x70000000 + DT_PPC_OPT DynTag = 0x70000001 + + DT_PPC64_GLINK DynTag = 0x70000000 + DT_PPC64_OPD DynTag = 0x70000001 + DT_PPC64_OPDSZ DynTag = 0x70000002 + DT_PPC64_OPT DynTag = 0x70000003 + + DT_SPARC_REGISTER DynTag = 0x70000001 + + DT_AUXILIARY DynTag = 0x7ffffffd + DT_USED DynTag = 0x7ffffffe + DT_FILTER DynTag = 0x7fffffff + + DT_HIPROC DynTag = 0x7fffffff /* Last processor-specific type. */ ) var dtStrings = []intName{ @@ -883,25 +1032,56 @@ var dtStrings = []intName{ {32, "DT_ENCODING"}, {32, "DT_PREINIT_ARRAY"}, {33, "DT_PREINIT_ARRAYSZ"}, + {34, "DT_SYMTAB_SHNDX"}, {0x6000000d, "DT_LOOS"}, {0x6ffff000, "DT_HIOS"}, + {0x6ffffd00, "DT_VALRNGLO"}, + {0x6ffffdf5, "DT_GNU_PRELINKED"}, + {0x6ffffdf6, "DT_GNU_CONFLICTSZ"}, + {0x6ffffdf7, "DT_GNU_LIBLISTSZ"}, + {0x6ffffdf8, "DT_CHECKSUM"}, + {0x6ffffdf9, "DT_PLTPADSZ"}, + {0x6ffffdfa, "DT_MOVEENT"}, + {0x6ffffdfb, "DT_MOVESZ"}, + {0x6ffffdfc, "DT_FEATURE"}, + {0x6ffffdfd, "DT_POSFLAG_1"}, + {0x6ffffdfe, "DT_SYMINSZ"}, + {0x6ffffdff, "DT_SYMINENT"}, + {0x6ffffdff, "DT_VALRNGHI"}, + {0x6ffffe00, "DT_ADDRRNGLO"}, + {0x6ffffef5, "DT_GNU_HASH"}, + {0x6ffffef6, "DT_TLSDESC_PLT"}, + {0x6ffffef7, "DT_TLSDESC_GOT"}, + {0x6ffffef8, "DT_GNU_CONFLICT"}, + {0x6ffffef9, "DT_GNU_LIBLIST"}, + {0x6ffffefa, "DT_CONFIG"}, + {0x6ffffefb, "DT_DEPAUDIT"}, + {0x6ffffefc, "DT_AUDIT"}, + {0x6ffffefd, "DT_PLTPAD"}, + {0x6ffffefe, "DT_MOVETAB"}, + {0x6ffffeff, "DT_SYMINFO"}, + {0x6ffffeff, "DT_ADDRRNGHI"}, {0x6ffffff0, "DT_VERSYM"}, + {0x6ffffff9, "DT_RELACOUNT"}, + {0x6ffffffa, "DT_RELCOUNT"}, + {0x6ffffffb, "DT_FLAGS_1"}, + {0x6ffffffc, "DT_VERDEF"}, + {0x6ffffffd, "DT_VERDEFNUM"}, {0x6ffffffe, "DT_VERNEED"}, {0x6fffffff, "DT_VERNEEDNUM"}, {0x70000000, "DT_LOPROC"}, - {0x7fffffff, "DT_HIPROC"}, + // We don't list the processor-dependent DynTags, + // as the values overlap. + {0x7ffffffd, "DT_AUXILIARY"}, + {0x7ffffffe, "DT_USED"}, + {0x7fffffff, "DT_FILTER"}, } func (i DynTag) String() string { return stringName(uint32(i), dtStrings, false) } func (i DynTag) GoString() string { return stringName(uint32(i), dtStrings, true) } -type DynTagValue struct { - Tag DynTag - Value uint64 -} - // DT_FLAGS values. -type DynFlag uint64 +type DynFlag int const ( DF_ORIGIN DynFlag = 0x0001 /* Indicates that the object being loaded may @@ -929,6 +1109,124 @@ var dflagStrings = []intName{ func (i DynFlag) String() string { return flagName(uint32(i), dflagStrings, false) } func (i DynFlag) GoString() string { return flagName(uint32(i), dflagStrings, true) } +// DT_FLAGS_1 values. +type DynFlag1 uint32 + +const ( + // Indicates that all relocations for this object must be processed before + // returning control to the program. + DF_1_NOW DynFlag1 = 0x00000001 + // Unused. + DF_1_GLOBAL DynFlag1 = 0x00000002 + // Indicates that the object is a member of a group. + DF_1_GROUP DynFlag1 = 0x00000004 + // Indicates that the object cannot be deleted from a process. + DF_1_NODELETE DynFlag1 = 0x00000008 + // Meaningful only for filters. Indicates that all associated filtees be + // processed immediately. + DF_1_LOADFLTR DynFlag1 = 0x00000010 + // Indicates that this object's initialization section be run before any other + // objects loaded. + DF_1_INITFIRST DynFlag1 = 0x00000020 + // Indicates that the object cannot be added to a running process with dlopen. + DF_1_NOOPEN DynFlag1 = 0x00000040 + // Indicates the object requires $ORIGIN processing. + DF_1_ORIGIN DynFlag1 = 0x00000080 + // Indicates that the object should use direct binding information. + DF_1_DIRECT DynFlag1 = 0x00000100 + // Unused. + DF_1_TRANS DynFlag1 = 0x00000200 + // Indicates that the objects symbol table is to interpose before all symbols + // except the primary load object, which is typically the executable. + DF_1_INTERPOSE DynFlag1 = 0x00000400 + // Indicates that the search for dependencies of this object ignores any + // default library search paths. + DF_1_NODEFLIB DynFlag1 = 0x00000800 + // Indicates that this object is not dumped by dldump. Candidates are objects + // with no relocations that might get included when generating alternative + // objects using. + DF_1_NODUMP DynFlag1 = 0x00001000 + // Identifies this object as a configuration alternative object generated by + // crle. Triggers the runtime linker to search for a configuration file $ORIGIN/ld.config.app-name. + DF_1_CONFALT DynFlag1 = 0x00002000 + // Meaningful only for filtees. Terminates a filters search for any + // further filtees. + DF_1_ENDFILTEE DynFlag1 = 0x00004000 + // Indicates that this object has displacement relocations applied. + DF_1_DISPRELDNE DynFlag1 = 0x00008000 + // Indicates that this object has displacement relocations pending. + DF_1_DISPRELPND DynFlag1 = 0x00010000 + // Indicates that this object contains symbols that cannot be directly + // bound to. + DF_1_NODIRECT DynFlag1 = 0x00020000 + // Reserved for internal use by the kernel runtime-linker. + DF_1_IGNMULDEF DynFlag1 = 0x00040000 + // Reserved for internal use by the kernel runtime-linker. + DF_1_NOKSYMS DynFlag1 = 0x00080000 + // Reserved for internal use by the kernel runtime-linker. + DF_1_NOHDR DynFlag1 = 0x00100000 + // Indicates that this object has been edited or has been modified since the + // objects original construction by the link-editor. + DF_1_EDITED DynFlag1 = 0x00200000 + // Reserved for internal use by the kernel runtime-linker. + DF_1_NORELOC DynFlag1 = 0x00400000 + // Indicates that the object contains individual symbols that should interpose + // before all symbols except the primary load object, which is typically the + // executable. + DF_1_SYMINTPOSE DynFlag1 = 0x00800000 + // Indicates that the executable requires global auditing. + DF_1_GLOBAUDIT DynFlag1 = 0x01000000 + // Indicates that the object defines, or makes reference to singleton symbols. + DF_1_SINGLETON DynFlag1 = 0x02000000 + // Indicates that the object is a stub. + DF_1_STUB DynFlag1 = 0x04000000 + // Indicates that the object is a position-independent executable. + DF_1_PIE DynFlag1 = 0x08000000 + // Indicates that the object is a kernel module. + DF_1_KMOD DynFlag1 = 0x10000000 + // Indicates that the object is a weak standard filter. + DF_1_WEAKFILTER DynFlag1 = 0x20000000 + // Unused. + DF_1_NOCOMMON DynFlag1 = 0x40000000 +) + +var dflag1Strings = []intName{ + {0x00000001, "DF_1_NOW"}, + {0x00000002, "DF_1_GLOBAL"}, + {0x00000004, "DF_1_GROUP"}, + {0x00000008, "DF_1_NODELETE"}, + {0x00000010, "DF_1_LOADFLTR"}, + {0x00000020, "DF_1_INITFIRST"}, + {0x00000040, "DF_1_NOOPEN"}, + {0x00000080, "DF_1_ORIGIN"}, + {0x00000100, "DF_1_DIRECT"}, + {0x00000200, "DF_1_TRANS"}, + {0x00000400, "DF_1_INTERPOSE"}, + {0x00000800, "DF_1_NODEFLIB"}, + {0x00001000, "DF_1_NODUMP"}, + {0x00002000, "DF_1_CONFALT"}, + {0x00004000, "DF_1_ENDFILTEE"}, + {0x00008000, "DF_1_DISPRELDNE"}, + {0x00010000, "DF_1_DISPRELPND"}, + {0x00020000, "DF_1_NODIRECT"}, + {0x00040000, "DF_1_IGNMULDEF"}, + {0x00080000, "DF_1_NOKSYMS"}, + {0x00100000, "DF_1_NOHDR"}, + {0x00200000, "DF_1_EDITED"}, + {0x00400000, "DF_1_NORELOC"}, + {0x00800000, "DF_1_SYMINTPOSE"}, + {0x01000000, "DF_1_GLOBAUDIT"}, + {0x02000000, "DF_1_SINGLETON"}, + {0x04000000, "DF_1_STUB"}, + {0x08000000, "DF_1_PIE"}, + {0x10000000, "DF_1_KMOD"}, + {0x20000000, "DF_1_WEAKFILTER"}, + {0x40000000, "DF_1_NOCOMMON"}, +} + +func (i DynFlag1) String() string { return flagName(uint32(i), dflag1Strings, false) } +func (i DynFlag1) GoString() string { return flagName(uint32(i), dflag1Strings, true) } + // NType values; used in core files. type NType int @@ -1918,6 +2216,8 @@ const ( R_MIPS_TLS_TPREL64 R_MIPS = 48 /* TP-relative offset, 64 bit */ R_MIPS_TLS_TPREL_HI16 R_MIPS = 49 /* TP-relative offset, high 16 bits */ R_MIPS_TLS_TPREL_LO16 R_MIPS = 50 /* TP-relative offset, low 16 bits */ + + R_MIPS_PC32 R_MIPS = 248 /* 32 bit PC relative reference */ ) var rmipsStrings = []intName{ @@ -1969,11 +2269,220 @@ var rmipsStrings = []intName{ {48, "R_MIPS_TLS_TPREL64"}, {49, "R_MIPS_TLS_TPREL_HI16"}, {50, "R_MIPS_TLS_TPREL_LO16"}, + {248, "R_MIPS_PC32"}, } func (i R_MIPS) String() string { return stringName(uint32(i), rmipsStrings, false) } func (i R_MIPS) GoString() string { return stringName(uint32(i), rmipsStrings, true) } +// Relocation types for LoongArch. +type R_LARCH int + +const ( + R_LARCH_NONE R_LARCH = 0 + R_LARCH_32 R_LARCH = 1 + R_LARCH_64 R_LARCH = 2 + R_LARCH_RELATIVE R_LARCH = 3 + R_LARCH_COPY R_LARCH = 4 + R_LARCH_JUMP_SLOT R_LARCH = 5 + R_LARCH_TLS_DTPMOD32 R_LARCH = 6 + R_LARCH_TLS_DTPMOD64 R_LARCH = 7 + R_LARCH_TLS_DTPREL32 R_LARCH = 8 + R_LARCH_TLS_DTPREL64 R_LARCH = 9 + R_LARCH_TLS_TPREL32 R_LARCH = 10 + R_LARCH_TLS_TPREL64 R_LARCH = 11 + R_LARCH_IRELATIVE R_LARCH = 12 + R_LARCH_MARK_LA R_LARCH = 20 + R_LARCH_MARK_PCREL R_LARCH = 21 + R_LARCH_SOP_PUSH_PCREL R_LARCH = 22 + R_LARCH_SOP_PUSH_ABSOLUTE R_LARCH = 23 + R_LARCH_SOP_PUSH_DUP R_LARCH = 24 + R_LARCH_SOP_PUSH_GPREL R_LARCH = 25 + R_LARCH_SOP_PUSH_TLS_TPREL R_LARCH = 26 + R_LARCH_SOP_PUSH_TLS_GOT R_LARCH = 27 + R_LARCH_SOP_PUSH_TLS_GD R_LARCH = 28 + R_LARCH_SOP_PUSH_PLT_PCREL R_LARCH = 29 + R_LARCH_SOP_ASSERT R_LARCH = 30 + R_LARCH_SOP_NOT R_LARCH = 31 + R_LARCH_SOP_SUB R_LARCH = 32 + R_LARCH_SOP_SL R_LARCH = 33 + R_LARCH_SOP_SR R_LARCH = 34 + R_LARCH_SOP_ADD R_LARCH = 35 + R_LARCH_SOP_AND R_LARCH = 36 + R_LARCH_SOP_IF_ELSE R_LARCH = 37 + R_LARCH_SOP_POP_32_S_10_5 R_LARCH = 38 + R_LARCH_SOP_POP_32_U_10_12 R_LARCH = 39 + R_LARCH_SOP_POP_32_S_10_12 R_LARCH = 40 + R_LARCH_SOP_POP_32_S_10_16 R_LARCH = 41 + R_LARCH_SOP_POP_32_S_10_16_S2 R_LARCH = 42 + R_LARCH_SOP_POP_32_S_5_20 R_LARCH = 43 + R_LARCH_SOP_POP_32_S_0_5_10_16_S2 R_LARCH = 44 + R_LARCH_SOP_POP_32_S_0_10_10_16_S2 R_LARCH = 45 + R_LARCH_SOP_POP_32_U R_LARCH = 46 + R_LARCH_ADD8 R_LARCH = 47 + R_LARCH_ADD16 R_LARCH = 48 + R_LARCH_ADD24 R_LARCH = 49 + R_LARCH_ADD32 R_LARCH = 50 + R_LARCH_ADD64 R_LARCH = 51 + R_LARCH_SUB8 R_LARCH = 52 + R_LARCH_SUB16 R_LARCH = 53 + R_LARCH_SUB24 R_LARCH = 54 + R_LARCH_SUB32 R_LARCH = 55 + R_LARCH_SUB64 R_LARCH = 56 + R_LARCH_GNU_VTINHERIT R_LARCH = 57 + R_LARCH_GNU_VTENTRY R_LARCH = 58 + R_LARCH_B16 R_LARCH = 64 + R_LARCH_B21 R_LARCH = 65 + R_LARCH_B26 R_LARCH = 66 + R_LARCH_ABS_HI20 R_LARCH = 67 + R_LARCH_ABS_LO12 R_LARCH = 68 + R_LARCH_ABS64_LO20 R_LARCH = 69 + R_LARCH_ABS64_HI12 R_LARCH = 70 + R_LARCH_PCALA_HI20 R_LARCH = 71 + R_LARCH_PCALA_LO12 R_LARCH = 72 + R_LARCH_PCALA64_LO20 R_LARCH = 73 + R_LARCH_PCALA64_HI12 R_LARCH = 74 + R_LARCH_GOT_PC_HI20 R_LARCH = 75 + R_LARCH_GOT_PC_LO12 R_LARCH = 76 + R_LARCH_GOT64_PC_LO20 R_LARCH = 77 + R_LARCH_GOT64_PC_HI12 R_LARCH = 78 + R_LARCH_GOT_HI20 R_LARCH = 79 + R_LARCH_GOT_LO12 R_LARCH = 80 + R_LARCH_GOT64_LO20 R_LARCH = 81 + R_LARCH_GOT64_HI12 R_LARCH = 82 + R_LARCH_TLS_LE_HI20 R_LARCH = 83 + R_LARCH_TLS_LE_LO12 R_LARCH = 84 + R_LARCH_TLS_LE64_LO20 R_LARCH = 85 + R_LARCH_TLS_LE64_HI12 R_LARCH = 86 + R_LARCH_TLS_IE_PC_HI20 R_LARCH = 87 + R_LARCH_TLS_IE_PC_LO12 R_LARCH = 88 + R_LARCH_TLS_IE64_PC_LO20 R_LARCH = 89 + R_LARCH_TLS_IE64_PC_HI12 R_LARCH = 90 + R_LARCH_TLS_IE_HI20 R_LARCH = 91 + R_LARCH_TLS_IE_LO12 R_LARCH = 92 + R_LARCH_TLS_IE64_LO20 R_LARCH = 93 + R_LARCH_TLS_IE64_HI12 R_LARCH = 94 + R_LARCH_TLS_LD_PC_HI20 R_LARCH = 95 + R_LARCH_TLS_LD_HI20 R_LARCH = 96 + R_LARCH_TLS_GD_PC_HI20 R_LARCH = 97 + R_LARCH_TLS_GD_HI20 R_LARCH = 98 + R_LARCH_32_PCREL R_LARCH = 99 + R_LARCH_RELAX R_LARCH = 100 + R_LARCH_DELETE R_LARCH = 101 + R_LARCH_ALIGN R_LARCH = 102 + R_LARCH_PCREL20_S2 R_LARCH = 103 + R_LARCH_CFA R_LARCH = 104 + R_LARCH_ADD6 R_LARCH = 105 + R_LARCH_SUB6 R_LARCH = 106 + R_LARCH_ADD_ULEB128 R_LARCH = 107 + R_LARCH_SUB_ULEB128 R_LARCH = 108 + R_LARCH_64_PCREL R_LARCH = 109 +) + +var rlarchStrings = []intName{ + {0, "R_LARCH_NONE"}, + {1, "R_LARCH_32"}, + {2, "R_LARCH_64"}, + {3, "R_LARCH_RELATIVE"}, + {4, "R_LARCH_COPY"}, + {5, "R_LARCH_JUMP_SLOT"}, + {6, "R_LARCH_TLS_DTPMOD32"}, + {7, "R_LARCH_TLS_DTPMOD64"}, + {8, "R_LARCH_TLS_DTPREL32"}, + {9, "R_LARCH_TLS_DTPREL64"}, + {10, "R_LARCH_TLS_TPREL32"}, + {11, "R_LARCH_TLS_TPREL64"}, + {12, "R_LARCH_IRELATIVE"}, + {20, "R_LARCH_MARK_LA"}, + {21, "R_LARCH_MARK_PCREL"}, + {22, "R_LARCH_SOP_PUSH_PCREL"}, + {23, "R_LARCH_SOP_PUSH_ABSOLUTE"}, + {24, "R_LARCH_SOP_PUSH_DUP"}, + {25, "R_LARCH_SOP_PUSH_GPREL"}, + {26, "R_LARCH_SOP_PUSH_TLS_TPREL"}, + {27, "R_LARCH_SOP_PUSH_TLS_GOT"}, + {28, "R_LARCH_SOP_PUSH_TLS_GD"}, + {29, "R_LARCH_SOP_PUSH_PLT_PCREL"}, + {30, "R_LARCH_SOP_ASSERT"}, + {31, "R_LARCH_SOP_NOT"}, + {32, "R_LARCH_SOP_SUB"}, + {33, "R_LARCH_SOP_SL"}, + {34, "R_LARCH_SOP_SR"}, + {35, "R_LARCH_SOP_ADD"}, + {36, "R_LARCH_SOP_AND"}, + {37, "R_LARCH_SOP_IF_ELSE"}, + {38, "R_LARCH_SOP_POP_32_S_10_5"}, + {39, "R_LARCH_SOP_POP_32_U_10_12"}, + {40, "R_LARCH_SOP_POP_32_S_10_12"}, + {41, "R_LARCH_SOP_POP_32_S_10_16"}, + {42, "R_LARCH_SOP_POP_32_S_10_16_S2"}, + {43, "R_LARCH_SOP_POP_32_S_5_20"}, + {44, "R_LARCH_SOP_POP_32_S_0_5_10_16_S2"}, + {45, "R_LARCH_SOP_POP_32_S_0_10_10_16_S2"}, + {46, "R_LARCH_SOP_POP_32_U"}, + {47, "R_LARCH_ADD8"}, + {48, "R_LARCH_ADD16"}, + {49, "R_LARCH_ADD24"}, + {50, "R_LARCH_ADD32"}, + {51, "R_LARCH_ADD64"}, + {52, "R_LARCH_SUB8"}, + {53, "R_LARCH_SUB16"}, + {54, "R_LARCH_SUB24"}, + {55, "R_LARCH_SUB32"}, + {56, "R_LARCH_SUB64"}, + {57, "R_LARCH_GNU_VTINHERIT"}, + {58, "R_LARCH_GNU_VTENTRY"}, + {64, "R_LARCH_B16"}, + {65, "R_LARCH_B21"}, + {66, "R_LARCH_B26"}, + {67, "R_LARCH_ABS_HI20"}, + {68, "R_LARCH_ABS_LO12"}, + {69, "R_LARCH_ABS64_LO20"}, + {70, "R_LARCH_ABS64_HI12"}, + {71, "R_LARCH_PCALA_HI20"}, + {72, "R_LARCH_PCALA_LO12"}, + {73, "R_LARCH_PCALA64_LO20"}, + {74, "R_LARCH_PCALA64_HI12"}, + {75, "R_LARCH_GOT_PC_HI20"}, + {76, "R_LARCH_GOT_PC_LO12"}, + {77, "R_LARCH_GOT64_PC_LO20"}, + {78, "R_LARCH_GOT64_PC_HI12"}, + {79, "R_LARCH_GOT_HI20"}, + {80, "R_LARCH_GOT_LO12"}, + {81, "R_LARCH_GOT64_LO20"}, + {82, "R_LARCH_GOT64_HI12"}, + {83, "R_LARCH_TLS_LE_HI20"}, + {84, "R_LARCH_TLS_LE_LO12"}, + {85, "R_LARCH_TLS_LE64_LO20"}, + {86, "R_LARCH_TLS_LE64_HI12"}, + {87, "R_LARCH_TLS_IE_PC_HI20"}, + {88, "R_LARCH_TLS_IE_PC_LO12"}, + {89, "R_LARCH_TLS_IE64_PC_LO20"}, + {90, "R_LARCH_TLS_IE64_PC_HI12"}, + {91, "R_LARCH_TLS_IE_HI20"}, + {92, "R_LARCH_TLS_IE_LO12"}, + {93, "R_LARCH_TLS_IE64_LO20"}, + {94, "R_LARCH_TLS_IE64_HI12"}, + {95, "R_LARCH_TLS_LD_PC_HI20"}, + {96, "R_LARCH_TLS_LD_HI20"}, + {97, "R_LARCH_TLS_GD_PC_HI20"}, + {98, "R_LARCH_TLS_GD_HI20"}, + {99, "R_LARCH_32_PCREL"}, + {100, "R_LARCH_RELAX"}, + {101, "R_LARCH_DELETE"}, + {102, "R_LARCH_ALIGN"}, + {103, "R_LARCH_PCREL20_S2"}, + {104, "R_LARCH_CFA"}, + {105, "R_LARCH_ADD6"}, + {106, "R_LARCH_SUB6"}, + {107, "R_LARCH_ADD_ULEB128"}, + {108, "R_LARCH_SUB_ULEB128"}, + {109, "R_LARCH_64_PCREL"}, +} + +func (i R_LARCH) String() string { return stringName(uint32(i), rlarchStrings, false) } +func (i R_LARCH) GoString() string { return stringName(uint32(i), rlarchStrings, true) } + // Relocation types for PowerPC. // // Values that are shared by both R_PPC and R_PPC64 are prefixed with @@ -2172,14 +2681,32 @@ const ( R_PPC64_GOT16_LO R_PPC64 = 15 // R_POWERPC_GOT16_LO R_PPC64_GOT16_HI R_PPC64 = 16 // R_POWERPC_GOT16_HI R_PPC64_GOT16_HA R_PPC64 = 17 // R_POWERPC_GOT16_HA + R_PPC64_COPY R_PPC64 = 19 // R_POWERPC_COPY + R_PPC64_GLOB_DAT R_PPC64 = 20 // R_POWERPC_GLOB_DAT R_PPC64_JMP_SLOT R_PPC64 = 21 // R_POWERPC_JMP_SLOT + R_PPC64_RELATIVE R_PPC64 = 22 // R_POWERPC_RELATIVE + R_PPC64_UADDR32 R_PPC64 = 24 // R_POWERPC_UADDR32 + R_PPC64_UADDR16 R_PPC64 = 25 // R_POWERPC_UADDR16 R_PPC64_REL32 R_PPC64 = 26 // R_POWERPC_REL32 + R_PPC64_PLT32 R_PPC64 = 27 // R_POWERPC_PLT32 + R_PPC64_PLTREL32 R_PPC64 = 28 // R_POWERPC_PLTREL32 + R_PPC64_PLT16_LO R_PPC64 = 29 // R_POWERPC_PLT16_LO + R_PPC64_PLT16_HI R_PPC64 = 30 // R_POWERPC_PLT16_HI + R_PPC64_PLT16_HA R_PPC64 = 31 // R_POWERPC_PLT16_HA + R_PPC64_SECTOFF R_PPC64 = 33 // R_POWERPC_SECTOFF + R_PPC64_SECTOFF_LO R_PPC64 = 34 // R_POWERPC_SECTOFF_LO + R_PPC64_SECTOFF_HI R_PPC64 = 35 // R_POWERPC_SECTOFF_HI + R_PPC64_SECTOFF_HA R_PPC64 = 36 // R_POWERPC_SECTOFF_HA + R_PPC64_REL30 R_PPC64 = 37 // R_POWERPC_ADDR30 R_PPC64_ADDR64 R_PPC64 = 38 R_PPC64_ADDR16_HIGHER R_PPC64 = 39 R_PPC64_ADDR16_HIGHERA R_PPC64 = 40 R_PPC64_ADDR16_HIGHEST R_PPC64 = 41 R_PPC64_ADDR16_HIGHESTA R_PPC64 = 42 + R_PPC64_UADDR64 R_PPC64 = 43 R_PPC64_REL64 R_PPC64 = 44 + R_PPC64_PLT64 R_PPC64 = 45 + R_PPC64_PLTREL64 R_PPC64 = 46 R_PPC64_TOC16 R_PPC64 = 47 R_PPC64_TOC16_LO R_PPC64 = 48 R_PPC64_TOC16_HI R_PPC64 = 49 @@ -2195,7 +2722,7 @@ const ( R_PPC64_GOT16_LO_DS R_PPC64 = 59 R_PPC64_PLT16_LO_DS R_PPC64 = 60 R_PPC64_SECTOFF_DS R_PPC64 = 61 - R_PPC64_SECTOFF_LO_DS R_PPC64 = 61 + R_PPC64_SECTOFF_LO_DS R_PPC64 = 62 R_PPC64_TOC16_DS R_PPC64 = 63 R_PPC64_TOC16_LO_DS R_PPC64 = 64 R_PPC64_PLTGOT16_DS R_PPC64 = 65 @@ -2252,6 +2779,42 @@ const ( R_PPC64_REL24_NOTOC R_PPC64 = 116 R_PPC64_ADDR64_LOCAL R_PPC64 = 117 R_PPC64_ENTRY R_PPC64 = 118 + R_PPC64_PLTSEQ R_PPC64 = 119 + R_PPC64_PLTCALL R_PPC64 = 120 + R_PPC64_PLTSEQ_NOTOC R_PPC64 = 121 + R_PPC64_PLTCALL_NOTOC R_PPC64 = 122 + R_PPC64_PCREL_OPT R_PPC64 = 123 + R_PPC64_REL24_P9NOTOC R_PPC64 = 124 + R_PPC64_D34 R_PPC64 = 128 + R_PPC64_D34_LO R_PPC64 = 129 + R_PPC64_D34_HI30 R_PPC64 = 130 + R_PPC64_D34_HA30 R_PPC64 = 131 + R_PPC64_PCREL34 R_PPC64 = 132 + R_PPC64_GOT_PCREL34 R_PPC64 = 133 + R_PPC64_PLT_PCREL34 R_PPC64 = 134 + R_PPC64_PLT_PCREL34_NOTOC R_PPC64 = 135 + R_PPC64_ADDR16_HIGHER34 R_PPC64 = 136 + R_PPC64_ADDR16_HIGHERA34 R_PPC64 = 137 + R_PPC64_ADDR16_HIGHEST34 R_PPC64 = 138 + R_PPC64_ADDR16_HIGHESTA34 R_PPC64 = 139 + R_PPC64_REL16_HIGHER34 R_PPC64 = 140 + R_PPC64_REL16_HIGHERA34 R_PPC64 = 141 + R_PPC64_REL16_HIGHEST34 R_PPC64 = 142 + R_PPC64_REL16_HIGHESTA34 R_PPC64 = 143 + R_PPC64_D28 R_PPC64 = 144 + R_PPC64_PCREL28 R_PPC64 = 145 + R_PPC64_TPREL34 R_PPC64 = 146 + R_PPC64_DTPREL34 R_PPC64 = 147 + R_PPC64_GOT_TLSGD_PCREL34 R_PPC64 = 148 + R_PPC64_GOT_TLSLD_PCREL34 R_PPC64 = 149 + R_PPC64_GOT_TPREL_PCREL34 R_PPC64 = 150 + R_PPC64_GOT_DTPREL_PCREL34 R_PPC64 = 151 + R_PPC64_REL16_HIGH R_PPC64 = 240 + R_PPC64_REL16_HIGHA R_PPC64 = 241 + R_PPC64_REL16_HIGHER R_PPC64 = 242 + R_PPC64_REL16_HIGHERA R_PPC64 = 243 + R_PPC64_REL16_HIGHEST R_PPC64 = 244 + R_PPC64_REL16_HIGHESTA R_PPC64 = 245 R_PPC64_REL16DX_HA R_PPC64 = 246 // R_POWERPC_REL16DX_HA R_PPC64_JMP_IREL R_PPC64 = 247 R_PPC64_IRELATIVE R_PPC64 = 248 // R_POWERPC_IRELATIVE @@ -2259,6 +2822,8 @@ const ( R_PPC64_REL16_LO R_PPC64 = 250 // R_POWERPC_REL16_LO R_PPC64_REL16_HI R_PPC64 = 251 // R_POWERPC_REL16_HI R_PPC64_REL16_HA R_PPC64 = 252 // R_POWERPC_REL16_HA + R_PPC64_GNU_VTINHERIT R_PPC64 = 253 + R_PPC64_GNU_VTENTRY R_PPC64 = 254 ) var rppc64Strings = []intName{ @@ -2280,14 +2845,32 @@ var rppc64Strings = []intName{ {15, "R_PPC64_GOT16_LO"}, {16, "R_PPC64_GOT16_HI"}, {17, "R_PPC64_GOT16_HA"}, + {19, "R_PPC64_COPY"}, + {20, "R_PPC64_GLOB_DAT"}, {21, "R_PPC64_JMP_SLOT"}, + {22, "R_PPC64_RELATIVE"}, + {24, "R_PPC64_UADDR32"}, + {25, "R_PPC64_UADDR16"}, {26, "R_PPC64_REL32"}, + {27, "R_PPC64_PLT32"}, + {28, "R_PPC64_PLTREL32"}, + {29, "R_PPC64_PLT16_LO"}, + {30, "R_PPC64_PLT16_HI"}, + {31, "R_PPC64_PLT16_HA"}, + {33, "R_PPC64_SECTOFF"}, + {34, "R_PPC64_SECTOFF_LO"}, + {35, "R_PPC64_SECTOFF_HI"}, + {36, "R_PPC64_SECTOFF_HA"}, + {37, "R_PPC64_REL30"}, {38, "R_PPC64_ADDR64"}, {39, "R_PPC64_ADDR16_HIGHER"}, {40, "R_PPC64_ADDR16_HIGHERA"}, {41, "R_PPC64_ADDR16_HIGHEST"}, {42, "R_PPC64_ADDR16_HIGHESTA"}, + {43, "R_PPC64_UADDR64"}, {44, "R_PPC64_REL64"}, + {45, "R_PPC64_PLT64"}, + {46, "R_PPC64_PLTREL64"}, {47, "R_PPC64_TOC16"}, {48, "R_PPC64_TOC16_LO"}, {49, "R_PPC64_TOC16_HI"}, @@ -2303,7 +2886,7 @@ var rppc64Strings = []intName{ {59, "R_PPC64_GOT16_LO_DS"}, {60, "R_PPC64_PLT16_LO_DS"}, {61, "R_PPC64_SECTOFF_DS"}, - {61, "R_PPC64_SECTOFF_LO_DS"}, + {62, "R_PPC64_SECTOFF_LO_DS"}, {63, "R_PPC64_TOC16_DS"}, {64, "R_PPC64_TOC16_LO_DS"}, {65, "R_PPC64_PLTGOT16_DS"}, @@ -2360,6 +2943,42 @@ var rppc64Strings = []intName{ {116, "R_PPC64_REL24_NOTOC"}, {117, "R_PPC64_ADDR64_LOCAL"}, {118, "R_PPC64_ENTRY"}, + {119, "R_PPC64_PLTSEQ"}, + {120, "R_PPC64_PLTCALL"}, + {121, "R_PPC64_PLTSEQ_NOTOC"}, + {122, "R_PPC64_PLTCALL_NOTOC"}, + {123, "R_PPC64_PCREL_OPT"}, + {124, "R_PPC64_REL24_P9NOTOC"}, + {128, "R_PPC64_D34"}, + {129, "R_PPC64_D34_LO"}, + {130, "R_PPC64_D34_HI30"}, + {131, "R_PPC64_D34_HA30"}, + {132, "R_PPC64_PCREL34"}, + {133, "R_PPC64_GOT_PCREL34"}, + {134, "R_PPC64_PLT_PCREL34"}, + {135, "R_PPC64_PLT_PCREL34_NOTOC"}, + {136, "R_PPC64_ADDR16_HIGHER34"}, + {137, "R_PPC64_ADDR16_HIGHERA34"}, + {138, "R_PPC64_ADDR16_HIGHEST34"}, + {139, "R_PPC64_ADDR16_HIGHESTA34"}, + {140, "R_PPC64_REL16_HIGHER34"}, + {141, "R_PPC64_REL16_HIGHERA34"}, + {142, "R_PPC64_REL16_HIGHEST34"}, + {143, "R_PPC64_REL16_HIGHESTA34"}, + {144, "R_PPC64_D28"}, + {145, "R_PPC64_PCREL28"}, + {146, "R_PPC64_TPREL34"}, + {147, "R_PPC64_DTPREL34"}, + {148, "R_PPC64_GOT_TLSGD_PCREL34"}, + {149, "R_PPC64_GOT_TLSLD_PCREL34"}, + {150, "R_PPC64_GOT_TPREL_PCREL34"}, + {151, "R_PPC64_GOT_DTPREL_PCREL34"}, + {240, "R_PPC64_REL16_HIGH"}, + {241, "R_PPC64_REL16_HIGHA"}, + {242, "R_PPC64_REL16_HIGHER"}, + {243, "R_PPC64_REL16_HIGHERA"}, + {244, "R_PPC64_REL16_HIGHEST"}, + {245, "R_PPC64_REL16_HIGHESTA"}, {246, "R_PPC64_REL16DX_HA"}, {247, "R_PPC64_JMP_IREL"}, {248, "R_PPC64_IRELATIVE"}, @@ -2367,6 +2986,8 @@ var rppc64Strings = []intName{ {250, "R_PPC64_REL16_LO"}, {251, "R_PPC64_REL16_HI"}, {252, "R_PPC64_REL16_HA"}, + {253, "R_PPC64_GNU_VTINHERIT"}, + {254, "R_PPC64_GNU_VTENTRY"}, } func (i R_PPC64) String() string { return stringName(uint32(i), rppc64Strings, false) } diff --git a/elf/elf_test.go b/elf/elf_test.go index f8985a8..0350d53 100644 --- a/elf/elf_test.go +++ b/elf/elf_test.go @@ -10,7 +10,7 @@ import ( ) type nameTest struct { - val interface{} + val any str string } @@ -25,6 +25,7 @@ var nameTests = []nameTest{ {PF_W + PF_R + 0x50, "PF_W+PF_R+0x50"}, {DT_SYMBOLIC, "DT_SYMBOLIC"}, {DF_BIND_NOW, "DF_BIND_NOW"}, + {DF_1_PIE, "DF_1_PIE"}, {NT_FPREGSET, "NT_FPREGSET"}, {STB_GLOBAL, "STB_GLOBAL"}, {STT_COMMON, "STT_COMMON"}, @@ -37,6 +38,7 @@ var nameTests = []nameTest{ {R_SPARC_GOT22, "R_SPARC_GOT22"}, {ET_LOOS + 5, "ET_LOOS+5"}, {ProgFlag(0x50), "0x50"}, + {COMPRESS_ZLIB + 2, "COMPRESS_ZSTD+1"}, } func TestNames(t *testing.T) { diff --git a/elf/exports.go b/elf/exports.go deleted file mode 100644 index 5feab49..0000000 --- a/elf/exports.go +++ /dev/null @@ -1,32 +0,0 @@ -package elf - -/* - Any symbol in the dynamic symbol table (in .dynsym) for which .st_shndx == SHN_UNDEF - (references undefined section) is an import, and every other symbol is defined and exported. -*/ - -// Export - describes a single export entry -type Export struct { - Name string - VirtualAddress uint64 -} - -// Exports - gets exports -func (f *File) Exports() ([]Export, error) { - - var exports []Export - symbols, err := f.DynamicSymbols() - if err != nil { - return nil, err - } - for _, s := range symbols { - if s.Section != SHN_UNDEF { - exports = append(exports, Export{ - Name: s.Name, - VirtualAddress: s.Value, - }) - } - } - - return exports, nil -} diff --git a/elf/file.go b/elf/file.go index 4d21e9c..3b2443a 100644 --- a/elf/file.go +++ b/elf/file.go @@ -2,7 +2,17 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Package elf implements access to ELF object files. +/* +Package elf implements access to ELF object files. + +# Security + +This package is not designed to be hardened against adversarial inputs, and is +outside the scope of https://go.dev/security/policy. In particular, only basic +validation is done when parsing object files. As such, care should be taken when +parsing untrusted inputs, as parsing malformed files may consume significant +resources, or cause panics. +*/ package elf import ( @@ -12,22 +22,13 @@ import ( "encoding/binary" "errors" "fmt" + "binject-debug/internal/saferio" + "binject-debug/internal/zstd" "io" "os" "strings" ) -// seekStart, seekCurrent, seekEnd are copies of -// io.SeekStart, io.SeekCurrent, and io.SeekEnd. -// We can't use the ones from package io because -// we want this code to build with Go 1.4 during -// cmd/dist bootstrap. -const ( - seekStart int = 0 - seekCurrent int = 1 - seekEnd int = 2 -) - // TODO: error reporting detail /* @@ -45,26 +46,26 @@ type FileHeader struct { Type Type Machine Machine Entry uint64 - SHTOffset int64 - ShStrIndex int } // A File represents an open ELF file. type File struct { FileHeader - Sections []*Section Progs []*Prog + Sections []*Section + DynamicTags map[DynTag]uint64 + ELFHeader32 *Header32 + ELFHeader64 *Header64 + Injection []byte + InjectionEOF []byte closer io.Closer gnuNeed []verneed gnuVersym []byte - Insertion []byte - InsertionEOF []byte - - DynTags []DynTagValue } // A SectionHeader represents a single ELF section header. type SectionHeader struct { + Index uint32 Name string Type SectionType Flags SectionFlag @@ -76,9 +77,6 @@ type SectionHeader struct { Addralign uint64 Entsize uint64 - Shnum int // Section Header Number - Shname uint32 // Section Header Name (index) - // FileSize is the size of this section in the file in bytes. // If a section is compressed, FileSize is the size of the // compressed data, while Size (above) is the size of the @@ -110,10 +108,10 @@ type Section struct { // Data reads and returns the contents of the ELF section. // Even if the section is stored compressed in the ELF file, // Data returns uncompressed data. +// +// For an [SHT_NOBITS] section, Data always returns a non-nil error. func (s *Section) Data() ([]byte, error) { - dat := make([]byte, s.Size) - n, err := io.ReadFull(s.Open(), dat) - return dat[0:n], err + return saferio.ReadData(s.Open(), s.Size) } // stringTable reads and returns the string table given by the @@ -128,21 +126,57 @@ func (f *File) stringTable(link uint32) ([]byte, error) { // Open returns a new ReadSeeker reading the ELF section. // Even if the section is stored compressed in the ELF file, // the ReadSeeker reads uncompressed data. +// +// For an [SHT_NOBITS] section, all calls to the opened reader +// will return a non-nil error. func (s *Section) Open() io.ReadSeeker { + if s.Type == SHT_NOBITS { + return io.NewSectionReader(&nobitsSectionReader{}, 0, int64(s.Size)) + } + + var zrd func(io.Reader) (io.ReadCloser, error) if s.Flags&SHF_COMPRESSED == 0 { - return io.NewSectionReader(s.sr, 0, 1<<63-1) + + if !strings.HasPrefix(s.Name, ".zdebug") { + return io.NewSectionReader(s.sr, 0, 1<<63-1) + } + + b := make([]byte, 12) + n, _ := s.sr.ReadAt(b, 0) + if n != 12 || string(b[:4]) != "ZLIB" { + return io.NewSectionReader(s.sr, 0, 1<<63-1) + } + + s.compressionOffset = 12 + s.compressionType = COMPRESS_ZLIB + s.Size = binary.BigEndian.Uint64(b[4:12]) + zrd = zlib.NewReader + + } else if s.Flags&SHF_ALLOC != 0 { + return errorReader{&FormatError{int64(s.Offset), + "SHF_COMPRESSED applies only to non-allocable sections", s.compressionType}} } - if s.compressionType == COMPRESS_ZLIB { - return &readSeekerFromReader{ - reset: func() (io.Reader, error) { - fr := io.NewSectionReader(s.sr, s.compressionOffset, int64(s.FileSize)-s.compressionOffset) - return zlib.NewReader(fr) - }, - size: int64(s.Size), + + switch s.compressionType { + case COMPRESS_ZLIB: + zrd = zlib.NewReader + case COMPRESS_ZSTD: + zrd = func(r io.Reader) (io.ReadCloser, error) { + return io.NopCloser(zstd.NewReader(r)), nil } } - err := &FormatError{int64(s.Offset), "unknown compression type", s.compressionType} - return errorReader{err} + + if zrd == nil { + return errorReader{&FormatError{int64(s.Offset), "unknown compression type", s.compressionType}} + } + + return &readSeekerFromReader{ + reset: func() (io.Reader, error) { + fr := io.NewSectionReader(s.sr, s.compressionOffset, int64(s.FileSize)-s.compressionOffset) + return zrd(fr) + }, + size: int64(s.Size), + } } // A ProgHeader represents a single ELF program header. @@ -177,35 +211,14 @@ func (p *Prog) Open() io.ReadSeeker { return io.NewSectionReader(p.sr, 0, 1<<63- // A Symbol represents an entry in an ELF symbol table section. type Symbol struct { Name string - NameIndex uint32 Info, Other byte Section SectionIndex - SectIndex uint16 Value, Size uint64 -} - -// ToSym64 - Convert to a Sym64 -func (s Symbol) ToSym64() (retval Sym64) { - - retval.Name = s.NameIndex - retval.Info = s.Info - retval.Other = s.Other - retval.Shndx = s.SectIndex - retval.Value = s.Value - retval.Size = s.Size - return -} -// ToSym32 - Convert to a Sym32 -func (s Symbol) ToSym32() (retval Sym32) { - - retval.Name = s.NameIndex - retval.Info = s.Info - retval.Other = s.Other - retval.Shndx = s.SectIndex - retval.Value = uint32(s.Value) - retval.Size = uint32(s.Size) - return + // Version and Library are present only for the dynamic symbol + // table. + Version string + Library string } /* @@ -215,7 +228,7 @@ func (s Symbol) ToSym32() (retval Sym32) { type FormatError struct { off int64 msg string - val interface{} + val any } func (e *FormatError) Error() string { @@ -227,7 +240,7 @@ func (e *FormatError) Error() string { return msg } -// Open opens the named file using os.Open and prepares it for use as an ELF binary. +// Open opens the named file using [os.Open] and prepares it for use as an ELF binary. func Open(name string) (*File, error) { f, err := os.Open(name) if err != nil { @@ -242,8 +255,8 @@ func Open(name string) (*File, error) { return ff, nil } -// Close closes the File. -// If the File was created using NewFile directly instead of Open, +// Close closes the [File]. +// If the [File] was created using [NewFile] directly instead of [Open], // Close has no effect. func (f *File) Close() error { var err error @@ -265,18 +278,7 @@ func (f *File) SectionByType(typ SectionType) *Section { return nil } -// SectionByName returns the first section in f with the -// given name, or nil if there is no such section. -func (f *File) SectionByName(name string) *Section { - for _, s := range f.Sections { - if s.Name == name { - return s - } - } - return nil -} - -// NewFile creates a new File for accessing an ELF binary in an underlying reader. +// NewFile creates a new [File] for accessing an ELF binary in an underlying reader. // The ELF binary is expected to start at position 0 in the ReaderAt. func NewFile(r io.ReaderAt) (*File, error) { sr := io.NewSectionReader(r, 0, 1<<63-1) @@ -290,6 +292,7 @@ func NewFile(r io.ReaderAt) (*File, error) { } f := new(File) + f.DynamicTags = make(map[DynTag]uint64) f.Class = Class(ident[EI_CLASS]) switch f.Class { case ELFCLASS32: @@ -320,12 +323,12 @@ func NewFile(r io.ReaderAt) (*File, error) { // Read ELF file header var phoff int64 var phentsize, phnum int - var shentsize, shnum int - f.ShStrIndex = -1 + var shoff int64 + var shentsize, shnum, shstrndx int switch f.Class { case ELFCLASS32: hdr := new(Header32) - sr.Seek(0, seekStart) + sr.Seek(0, io.SeekStart) if err := binary.Read(sr, f.ByteOrder, hdr); err != nil { return nil, err } @@ -338,13 +341,15 @@ func NewFile(r io.ReaderAt) (*File, error) { phoff = int64(hdr.Phoff) phentsize = int(hdr.Phentsize) phnum = int(hdr.Phnum) - f.SHTOffset = int64(hdr.Shoff) + shoff = int64(hdr.Shoff) shentsize = int(hdr.Shentsize) shnum = int(hdr.Shnum) - f.ShStrIndex = int(hdr.Shstrndx) + shstrndx = int(hdr.Shstrndx) + f.ELFHeader32 = hdr + f.ELFHeader64 = nil case ELFCLASS64: hdr := new(Header64) - sr.Seek(0, seekStart) + sr.Seek(0, io.SeekStart) if err := binary.Read(sr, f.ByteOrder, hdr); err != nil { return nil, err } @@ -357,21 +362,47 @@ func NewFile(r io.ReaderAt) (*File, error) { phoff = int64(hdr.Phoff) phentsize = int(hdr.Phentsize) phnum = int(hdr.Phnum) - f.SHTOffset = int64(hdr.Shoff) + shoff = int64(hdr.Shoff) shentsize = int(hdr.Shentsize) shnum = int(hdr.Shnum) - f.ShStrIndex = int(hdr.Shstrndx) + shstrndx = int(hdr.Shstrndx) + f.ELFHeader32 = nil + f.ELFHeader64 = hdr + } + + if shoff < 0 { + return nil, &FormatError{0, "invalid shoff", shoff} + } + if phoff < 0 { + return nil, &FormatError{0, "invalid phoff", phoff} } - if shnum > 0 && f.SHTOffset > 0 && (f.ShStrIndex < 0 || f.ShStrIndex >= shnum) { - return nil, &FormatError{0, "invalid ELF shstrndx", f.ShStrIndex} + if shoff == 0 && shnum != 0 { + return nil, &FormatError{0, "invalid ELF shnum for shoff=0", shnum} + } + + if shnum > 0 && shstrndx >= shnum { + return nil, &FormatError{0, "invalid ELF shstrndx", shstrndx} + } + + var wantPhentsize, wantShentsize int + switch f.Class { + case ELFCLASS32: + wantPhentsize = 8 * 4 + wantShentsize = 10 * 4 + case ELFCLASS64: + wantPhentsize = 2*4 + 6*8 + wantShentsize = 4*4 + 6*8 + } + if phnum > 0 && phentsize < wantPhentsize { + return nil, &FormatError{0, "invalid ELF phentsize", phentsize} } // Read program headers f.Progs = make([]*Prog, phnum) for i := 0; i < phnum; i++ { off := phoff + int64(i)*int64(phentsize) - sr.Seek(off, seekStart) + sr.Seek(off, io.SeekStart) p := new(Prog) switch f.Class { case ELFCLASS32: @@ -405,17 +436,77 @@ func NewFile(r io.ReaderAt) (*File, error) { Align: ph.Align, } } + if int64(p.Off) < 0 { + return nil, &FormatError{off, "invalid program header offset", p.Off} + } + if int64(p.Filesz) < 0 { + return nil, &FormatError{off, "invalid program header file size", p.Filesz} + } p.sr = io.NewSectionReader(r, int64(p.Off), int64(p.Filesz)) p.ReaderAt = p.sr f.Progs[i] = p } + // If the number of sections is greater than or equal to SHN_LORESERVE + // (0xff00), shnum has the value zero and the actual number of section + // header table entries is contained in the sh_size field of the section + // header at index 0. + if shoff > 0 && shnum == 0 { + var typ, link uint32 + sr.Seek(shoff, io.SeekStart) + switch f.Class { + case ELFCLASS32: + sh := new(Section32) + if err := binary.Read(sr, f.ByteOrder, sh); err != nil { + return nil, err + } + shnum = int(sh.Size) + typ = sh.Type + link = sh.Link + case ELFCLASS64: + sh := new(Section64) + if err := binary.Read(sr, f.ByteOrder, sh); err != nil { + return nil, err + } + shnum = int(sh.Size) + typ = sh.Type + link = sh.Link + } + if SectionType(typ) != SHT_NULL { + return nil, &FormatError{shoff, "invalid type of the initial section", SectionType(typ)} + } + + if shnum < int(SHN_LORESERVE) { + return nil, &FormatError{shoff, "invalid ELF shnum contained in sh_size", shnum} + } + + // If the section name string table section index is greater than or + // equal to SHN_LORESERVE (0xff00), this member has the value + // SHN_XINDEX (0xffff) and the actual index of the section name + // string table section is contained in the sh_link field of the + // section header at index 0. + if shstrndx == int(SHN_XINDEX) { + shstrndx = int(link) + if shstrndx < int(SHN_LORESERVE) { + return nil, &FormatError{shoff, "invalid ELF shstrndx contained in sh_link", shstrndx} + } + } + } + + if shnum > 0 && shentsize < wantShentsize { + return nil, &FormatError{0, "invalid ELF shentsize", shentsize} + } + // Read section headers - f.Sections = make([]*Section, shnum) - names := make([]uint32, shnum) + c := saferio.SliceCap[Section](uint64(shnum)) + if c < 0 { + return nil, &FormatError{0, "too many sections", shnum} + } + f.Sections = make([]*Section, 0, c) + names := make([]uint32, 0, c) for i := 0; i < shnum; i++ { - off := f.SHTOffset + int64(i)*int64(shentsize) - sr.Seek(off, seekStart) + off := shoff + int64(i)*int64(shentsize) + sr.Seek(off, io.SeekStart) s := new(Section) switch f.Class { case ELFCLASS32: @@ -423,8 +514,9 @@ func NewFile(r io.ReaderAt) (*File, error) { if err := binary.Read(sr, f.ByteOrder, sh); err != nil { return nil, err } - names[i] = sh.Name + names = append(names, sh.Name) s.SectionHeader = SectionHeader{ + Index: sh.Name, Type: SectionType(sh.Type), Flags: SectionFlag(sh.Flags), Addr: uint64(sh.Addr), @@ -434,17 +526,15 @@ func NewFile(r io.ReaderAt) (*File, error) { Info: sh.Info, Addralign: uint64(sh.Addralign), Entsize: uint64(sh.Entsize), - Shnum: i, - Shname: sh.Name, } - case ELFCLASS64: sh := new(Section64) if err := binary.Read(sr, f.ByteOrder, sh); err != nil { return nil, err } - names[i] = sh.Name + names = append(names, sh.Name) s.SectionHeader = SectionHeader{ + Index: sh.Name, Type: SectionType(sh.Type), Flags: SectionFlag(sh.Flags), Offset: sh.Off, @@ -454,10 +544,14 @@ func NewFile(r io.ReaderAt) (*File, error) { Info: sh.Info, Addralign: sh.Addralign, Entsize: sh.Entsize, - Shnum: i, - Shname: sh.Name, } } + if int64(s.Offset) < 0 { + return nil, &FormatError{off, "invalid section offset", int64(s.Offset)} + } + if int64(s.FileSize) < 0 { + return nil, &FormatError{off, "invalid section size", int64(s.FileSize)} + } s.sr = io.NewSectionReader(r, int64(s.Offset), int64(s.FileSize)) if s.Flags&SHF_COMPRESSED == 0 { @@ -487,11 +581,7 @@ func NewFile(r io.ReaderAt) (*File, error) { } } - f.Sections[i] = s - } - - if err := f.parseDynTags(); err != nil { - return nil, err + f.Sections = append(f.Sections, s) } if len(f.Sections) == 0 { @@ -499,7 +589,16 @@ func NewFile(r io.ReaderAt) (*File, error) { } // Load section header string table. - shstrtab, err := f.Sections[f.ShStrIndex].Data() + if shstrndx == 0 { + // If the file has no section name string table, + // shstrndx holds the value SHN_UNDEF (0). + return f, nil + } + shstr := f.Sections[shstrndx] + if shstr.Type != SHT_STRTAB { + return nil, &FormatError{shoff + int64(shstrndx*shentsize), "invalid ELF section name string table type", shstr.Type} + } + shstrtab, err := shstr.Data() if err != nil { return nil, err } @@ -507,7 +606,29 @@ func NewFile(r io.ReaderAt) (*File, error) { var ok bool s.Name, ok = getString(shstrtab, int(names[i])) if !ok { - return nil, &FormatError{f.SHTOffset + int64(i*shentsize), "bad section name index", names[i]} + return nil, &FormatError{shoff + int64(i*shentsize), "bad section name index", names[i]} + } + } + + // Load dynamic/exported section string table values + shdyn := f.SectionByType(SHT_DYNAMIC) + if shdyn != nil { + d, err := shdyn.Data() + if err == nil { + r := bytes.NewBuffer(d) + for { + var t DynTag + var v uint64 + err1 := binary.Read(r, f.ByteOrder, &t) + if err1 != nil { + break + } + err2 := binary.Read(r, f.ByteOrder, &v) + if err2 != nil { + break + } + f.DynamicTags[t] = v + } } } @@ -528,7 +649,7 @@ func (f *File) getSymbols(typ SectionType) ([]Symbol, []byte, error) { return nil, nil, errors.New("not implemented") } -// ErrNoSymbols is returned by File.Symbols and File.DynamicSymbols +// ErrNoSymbols is returned by [File.Symbols] and [File.DynamicSymbols] // if there is no such section in the File. var ErrNoSymbols = errors.New("no symbol section") @@ -540,28 +661,34 @@ func (f *File) getSymbols32(typ SectionType) ([]Symbol, []byte, error) { data, err := symtabSection.Data() if err != nil { - return nil, nil, errors.New("cannot load symbol section") + return nil, nil, fmt.Errorf("cannot load symbol section: %w", err) + } + if len(data) == 0 { + return nil, nil, errors.New("symbol section is empty") } - symtab := bytes.NewReader(data) - if symtab.Len()%Sym32Size != 0 { + if len(data)%Sym32Size != 0 { return nil, nil, errors.New("length of symbol section is not a multiple of SymSize") } strdata, err := f.stringTable(symtabSection.Link) if err != nil { - return nil, nil, errors.New("cannot load string table section") + return nil, nil, fmt.Errorf("cannot load string table section: %w", err) } // The first entry is all zeros. - var skip [Sym32Size]byte - symtab.Read(skip[:]) + data = data[Sym32Size:] - symbols := make([]Symbol, symtab.Len()/Sym32Size) + symbols := make([]Symbol, len(data)/Sym32Size) i := 0 var sym Sym32 - for symtab.Len() > 0 { - binary.Read(symtab, f.ByteOrder, &sym) + for len(data) > 0 { + sym.Name = f.ByteOrder.Uint32(data[0:4]) + sym.Value = f.ByteOrder.Uint32(data[4:8]) + sym.Size = f.ByteOrder.Uint32(data[8:12]) + sym.Info = data[12] + sym.Other = data[13] + sym.Shndx = f.ByteOrder.Uint16(data[14:16]) str, _ := getString(strdata, int(sym.Name)) symbols[i].Name = str symbols[i].Info = sym.Info @@ -570,6 +697,7 @@ func (f *File) getSymbols32(typ SectionType) ([]Symbol, []byte, error) { symbols[i].Value = uint64(sym.Value) symbols[i].Size = uint64(sym.Size) i++ + data = data[Sym32Size:] } return symbols, strdata, nil @@ -583,38 +711,40 @@ func (f *File) getSymbols64(typ SectionType) ([]Symbol, []byte, error) { data, err := symtabSection.Data() if err != nil { - return nil, nil, errors.New("cannot load symbol section") + return nil, nil, fmt.Errorf("cannot load symbol section: %w", err) } - symtab := bytes.NewReader(data) - if symtab.Len()%Sym64Size != 0 { + if len(data)%Sym64Size != 0 { return nil, nil, errors.New("length of symbol section is not a multiple of Sym64Size") } strdata, err := f.stringTable(symtabSection.Link) if err != nil { - return nil, nil, errors.New("cannot load string table section") + return nil, nil, fmt.Errorf("cannot load string table section: %w", err) } // The first entry is all zeros. - var skip [Sym64Size]byte - symtab.Read(skip[:]) + data = data[Sym64Size:] - symbols := make([]Symbol, symtab.Len()/Sym64Size) + symbols := make([]Symbol, len(data)/Sym64Size) i := 0 var sym Sym64 - for symtab.Len() > 0 { - binary.Read(symtab, f.ByteOrder, &sym) + for len(data) > 0 { + sym.Name = f.ByteOrder.Uint32(data[0:4]) + sym.Info = data[4] + sym.Other = data[5] + sym.Shndx = f.ByteOrder.Uint16(data[6:8]) + sym.Value = f.ByteOrder.Uint64(data[8:16]) + sym.Size = f.ByteOrder.Uint64(data[16:24]) str, _ := getString(strdata, int(sym.Name)) symbols[i].Name = str - symbols[i].NameIndex = sym.Name symbols[i].Info = sym.Info symbols[i].Other = sym.Other symbols[i].Section = SectionIndex(sym.Shndx) - symbols[i].SectIndex = sym.Shndx symbols[i].Value = sym.Value symbols[i].Size = sym.Size i++ + data = data[Sym64Size:] } return symbols, strdata, nil @@ -645,7 +775,587 @@ func (f *File) Section(name string) *Section { return nil } -// DWARF - No idea what this does +// applyRelocations applies relocations to dst. rels is a relocations section +// in REL or RELA format. +func (f *File) applyRelocations(dst []byte, rels []byte) error { + switch { + case f.Class == ELFCLASS64 && f.Machine == EM_X86_64: + return f.applyRelocationsAMD64(dst, rels) + case f.Class == ELFCLASS32 && f.Machine == EM_386: + return f.applyRelocations386(dst, rels) + case f.Class == ELFCLASS32 && f.Machine == EM_ARM: + return f.applyRelocationsARM(dst, rels) + case f.Class == ELFCLASS64 && f.Machine == EM_AARCH64: + return f.applyRelocationsARM64(dst, rels) + case f.Class == ELFCLASS32 && f.Machine == EM_PPC: + return f.applyRelocationsPPC(dst, rels) + case f.Class == ELFCLASS64 && f.Machine == EM_PPC64: + return f.applyRelocationsPPC64(dst, rels) + case f.Class == ELFCLASS32 && f.Machine == EM_MIPS: + return f.applyRelocationsMIPS(dst, rels) + case f.Class == ELFCLASS64 && f.Machine == EM_MIPS: + return f.applyRelocationsMIPS64(dst, rels) + case f.Class == ELFCLASS64 && f.Machine == EM_LOONGARCH: + return f.applyRelocationsLOONG64(dst, rels) + case f.Class == ELFCLASS64 && f.Machine == EM_RISCV: + return f.applyRelocationsRISCV64(dst, rels) + case f.Class == ELFCLASS64 && f.Machine == EM_S390: + return f.applyRelocationss390x(dst, rels) + case f.Class == ELFCLASS64 && f.Machine == EM_SPARCV9: + return f.applyRelocationsSPARC64(dst, rels) + default: + return errors.New("applyRelocations: not implemented") + } +} + +// canApplyRelocation reports whether we should try to apply a +// relocation to a DWARF data section, given a pointer to the symbol +// targeted by the relocation. +// Most relocations in DWARF data tend to be section-relative, but +// some target non-section symbols (for example, low_PC attrs on +// subprogram or compilation unit DIEs that target function symbols). +func canApplyRelocation(sym *Symbol) bool { + return sym.Section != SHN_UNDEF && sym.Section < SHN_LORESERVE +} + +func (f *File) applyRelocationsAMD64(dst []byte, rels []byte) error { + // 24 is the size of Rela64. + if len(rels)%24 != 0 { + return errors.New("length of relocation section is not a multiple of 24") + } + + symbols, _, err := f.getSymbols(SHT_SYMTAB) + if err != nil { + return err + } + + b := bytes.NewReader(rels) + var rela Rela64 + + for b.Len() > 0 { + binary.Read(b, f.ByteOrder, &rela) + symNo := rela.Info >> 32 + t := R_X86_64(rela.Info & 0xffff) + + if symNo == 0 || symNo > uint64(len(symbols)) { + continue + } + sym := &symbols[symNo-1] + if !canApplyRelocation(sym) { + continue + } + + // There are relocations, so this must be a normal + // object file. The code below handles only basic relocations + // of the form S + A (symbol plus addend). + + switch t { + case R_X86_64_64: + if rela.Off+8 >= uint64(len(dst)) || rela.Addend < 0 { + continue + } + val64 := sym.Value + uint64(rela.Addend) + f.ByteOrder.PutUint64(dst[rela.Off:rela.Off+8], val64) + case R_X86_64_32: + if rela.Off+4 >= uint64(len(dst)) || rela.Addend < 0 { + continue + } + val32 := uint32(sym.Value) + uint32(rela.Addend) + f.ByteOrder.PutUint32(dst[rela.Off:rela.Off+4], val32) + } + } + + return nil +} + +func (f *File) applyRelocations386(dst []byte, rels []byte) error { + // 8 is the size of Rel32. + if len(rels)%8 != 0 { + return errors.New("length of relocation section is not a multiple of 8") + } + + symbols, _, err := f.getSymbols(SHT_SYMTAB) + if err != nil { + return err + } + + b := bytes.NewReader(rels) + var rel Rel32 + + for b.Len() > 0 { + binary.Read(b, f.ByteOrder, &rel) + symNo := rel.Info >> 8 + t := R_386(rel.Info & 0xff) + + if symNo == 0 || symNo > uint32(len(symbols)) { + continue + } + sym := &symbols[symNo-1] + + if t == R_386_32 { + if rel.Off+4 >= uint32(len(dst)) { + continue + } + val := f.ByteOrder.Uint32(dst[rel.Off : rel.Off+4]) + val += uint32(sym.Value) + f.ByteOrder.PutUint32(dst[rel.Off:rel.Off+4], val) + } + } + + return nil +} + +func (f *File) applyRelocationsARM(dst []byte, rels []byte) error { + // 8 is the size of Rel32. + if len(rels)%8 != 0 { + return errors.New("length of relocation section is not a multiple of 8") + } + + symbols, _, err := f.getSymbols(SHT_SYMTAB) + if err != nil { + return err + } + + b := bytes.NewReader(rels) + var rel Rel32 + + for b.Len() > 0 { + binary.Read(b, f.ByteOrder, &rel) + symNo := rel.Info >> 8 + t := R_ARM(rel.Info & 0xff) + + if symNo == 0 || symNo > uint32(len(symbols)) { + continue + } + sym := &symbols[symNo-1] + + switch t { + case R_ARM_ABS32: + if rel.Off+4 >= uint32(len(dst)) { + continue + } + val := f.ByteOrder.Uint32(dst[rel.Off : rel.Off+4]) + val += uint32(sym.Value) + f.ByteOrder.PutUint32(dst[rel.Off:rel.Off+4], val) + } + } + + return nil +} + +func (f *File) applyRelocationsARM64(dst []byte, rels []byte) error { + // 24 is the size of Rela64. + if len(rels)%24 != 0 { + return errors.New("length of relocation section is not a multiple of 24") + } + + symbols, _, err := f.getSymbols(SHT_SYMTAB) + if err != nil { + return err + } + + b := bytes.NewReader(rels) + var rela Rela64 + + for b.Len() > 0 { + binary.Read(b, f.ByteOrder, &rela) + symNo := rela.Info >> 32 + t := R_AARCH64(rela.Info & 0xffff) + + if symNo == 0 || symNo > uint64(len(symbols)) { + continue + } + sym := &symbols[symNo-1] + if !canApplyRelocation(sym) { + continue + } + + // There are relocations, so this must be a normal + // object file. The code below handles only basic relocations + // of the form S + A (symbol plus addend). + + switch t { + case R_AARCH64_ABS64: + if rela.Off+8 >= uint64(len(dst)) || rela.Addend < 0 { + continue + } + val64 := sym.Value + uint64(rela.Addend) + f.ByteOrder.PutUint64(dst[rela.Off:rela.Off+8], val64) + case R_AARCH64_ABS32: + if rela.Off+4 >= uint64(len(dst)) || rela.Addend < 0 { + continue + } + val32 := uint32(sym.Value) + uint32(rela.Addend) + f.ByteOrder.PutUint32(dst[rela.Off:rela.Off+4], val32) + } + } + + return nil +} + +func (f *File) applyRelocationsPPC(dst []byte, rels []byte) error { + // 12 is the size of Rela32. + if len(rels)%12 != 0 { + return errors.New("length of relocation section is not a multiple of 12") + } + + symbols, _, err := f.getSymbols(SHT_SYMTAB) + if err != nil { + return err + } + + b := bytes.NewReader(rels) + var rela Rela32 + + for b.Len() > 0 { + binary.Read(b, f.ByteOrder, &rela) + symNo := rela.Info >> 8 + t := R_PPC(rela.Info & 0xff) + + if symNo == 0 || symNo > uint32(len(symbols)) { + continue + } + sym := &symbols[symNo-1] + if !canApplyRelocation(sym) { + continue + } + + switch t { + case R_PPC_ADDR32: + if rela.Off+4 >= uint32(len(dst)) || rela.Addend < 0 { + continue + } + val32 := uint32(sym.Value) + uint32(rela.Addend) + f.ByteOrder.PutUint32(dst[rela.Off:rela.Off+4], val32) + } + } + + return nil +} + +func (f *File) applyRelocationsPPC64(dst []byte, rels []byte) error { + // 24 is the size of Rela64. + if len(rels)%24 != 0 { + return errors.New("length of relocation section is not a multiple of 24") + } + + symbols, _, err := f.getSymbols(SHT_SYMTAB) + if err != nil { + return err + } + + b := bytes.NewReader(rels) + var rela Rela64 + + for b.Len() > 0 { + binary.Read(b, f.ByteOrder, &rela) + symNo := rela.Info >> 32 + t := R_PPC64(rela.Info & 0xffff) + + if symNo == 0 || symNo > uint64(len(symbols)) { + continue + } + sym := &symbols[symNo-1] + if !canApplyRelocation(sym) { + continue + } + + switch t { + case R_PPC64_ADDR64: + if rela.Off+8 >= uint64(len(dst)) || rela.Addend < 0 { + continue + } + val64 := sym.Value + uint64(rela.Addend) + f.ByteOrder.PutUint64(dst[rela.Off:rela.Off+8], val64) + case R_PPC64_ADDR32: + if rela.Off+4 >= uint64(len(dst)) || rela.Addend < 0 { + continue + } + val32 := uint32(sym.Value) + uint32(rela.Addend) + f.ByteOrder.PutUint32(dst[rela.Off:rela.Off+4], val32) + } + } + + return nil +} + +func (f *File) applyRelocationsMIPS(dst []byte, rels []byte) error { + // 8 is the size of Rel32. + if len(rels)%8 != 0 { + return errors.New("length of relocation section is not a multiple of 8") + } + + symbols, _, err := f.getSymbols(SHT_SYMTAB) + if err != nil { + return err + } + + b := bytes.NewReader(rels) + var rel Rel32 + + for b.Len() > 0 { + binary.Read(b, f.ByteOrder, &rel) + symNo := rel.Info >> 8 + t := R_MIPS(rel.Info & 0xff) + + if symNo == 0 || symNo > uint32(len(symbols)) { + continue + } + sym := &symbols[symNo-1] + + switch t { + case R_MIPS_32: + if rel.Off+4 >= uint32(len(dst)) { + continue + } + val := f.ByteOrder.Uint32(dst[rel.Off : rel.Off+4]) + val += uint32(sym.Value) + f.ByteOrder.PutUint32(dst[rel.Off:rel.Off+4], val) + } + } + + return nil +} + +func (f *File) applyRelocationsMIPS64(dst []byte, rels []byte) error { + // 24 is the size of Rela64. + if len(rels)%24 != 0 { + return errors.New("length of relocation section is not a multiple of 24") + } + + symbols, _, err := f.getSymbols(SHT_SYMTAB) + if err != nil { + return err + } + + b := bytes.NewReader(rels) + var rela Rela64 + + for b.Len() > 0 { + binary.Read(b, f.ByteOrder, &rela) + var symNo uint64 + var t R_MIPS + if f.ByteOrder == binary.BigEndian { + symNo = rela.Info >> 32 + t = R_MIPS(rela.Info & 0xff) + } else { + symNo = rela.Info & 0xffffffff + t = R_MIPS(rela.Info >> 56) + } + + if symNo == 0 || symNo > uint64(len(symbols)) { + continue + } + sym := &symbols[symNo-1] + if !canApplyRelocation(sym) { + continue + } + + switch t { + case R_MIPS_64: + if rela.Off+8 >= uint64(len(dst)) || rela.Addend < 0 { + continue + } + val64 := sym.Value + uint64(rela.Addend) + f.ByteOrder.PutUint64(dst[rela.Off:rela.Off+8], val64) + case R_MIPS_32: + if rela.Off+4 >= uint64(len(dst)) || rela.Addend < 0 { + continue + } + val32 := uint32(sym.Value) + uint32(rela.Addend) + f.ByteOrder.PutUint32(dst[rela.Off:rela.Off+4], val32) + } + } + + return nil +} + +func (f *File) applyRelocationsLOONG64(dst []byte, rels []byte) error { + // 24 is the size of Rela64. + if len(rels)%24 != 0 { + return errors.New("length of relocation section is not a multiple of 24") + } + + symbols, _, err := f.getSymbols(SHT_SYMTAB) + if err != nil { + return err + } + + b := bytes.NewReader(rels) + var rela Rela64 + + for b.Len() > 0 { + binary.Read(b, f.ByteOrder, &rela) + var symNo uint64 + var t R_LARCH + symNo = rela.Info >> 32 + t = R_LARCH(rela.Info & 0xffff) + + if symNo == 0 || symNo > uint64(len(symbols)) { + continue + } + sym := &symbols[symNo-1] + if !canApplyRelocation(sym) { + continue + } + + switch t { + case R_LARCH_64: + if rela.Off+8 >= uint64(len(dst)) || rela.Addend < 0 { + continue + } + val64 := sym.Value + uint64(rela.Addend) + f.ByteOrder.PutUint64(dst[rela.Off:rela.Off+8], val64) + case R_LARCH_32: + if rela.Off+4 >= uint64(len(dst)) || rela.Addend < 0 { + continue + } + val32 := uint32(sym.Value) + uint32(rela.Addend) + f.ByteOrder.PutUint32(dst[rela.Off:rela.Off+4], val32) + } + } + + return nil +} + +func (f *File) applyRelocationsRISCV64(dst []byte, rels []byte) error { + // 24 is the size of Rela64. + if len(rels)%24 != 0 { + return errors.New("length of relocation section is not a multiple of 24") + } + + symbols, _, err := f.getSymbols(SHT_SYMTAB) + if err != nil { + return err + } + + b := bytes.NewReader(rels) + var rela Rela64 + + for b.Len() > 0 { + binary.Read(b, f.ByteOrder, &rela) + symNo := rela.Info >> 32 + t := R_RISCV(rela.Info & 0xffff) + + if symNo == 0 || symNo > uint64(len(symbols)) { + continue + } + sym := &symbols[symNo-1] + if !canApplyRelocation(sym) { + continue + } + + switch t { + case R_RISCV_64: + if rela.Off+8 >= uint64(len(dst)) || rela.Addend < 0 { + continue + } + val64 := sym.Value + uint64(rela.Addend) + f.ByteOrder.PutUint64(dst[rela.Off:rela.Off+8], val64) + case R_RISCV_32: + if rela.Off+4 >= uint64(len(dst)) || rela.Addend < 0 { + continue + } + val32 := uint32(sym.Value) + uint32(rela.Addend) + f.ByteOrder.PutUint32(dst[rela.Off:rela.Off+4], val32) + } + } + + return nil +} + +func (f *File) applyRelocationss390x(dst []byte, rels []byte) error { + // 24 is the size of Rela64. + if len(rels)%24 != 0 { + return errors.New("length of relocation section is not a multiple of 24") + } + + symbols, _, err := f.getSymbols(SHT_SYMTAB) + if err != nil { + return err + } + + b := bytes.NewReader(rels) + var rela Rela64 + + for b.Len() > 0 { + binary.Read(b, f.ByteOrder, &rela) + symNo := rela.Info >> 32 + t := R_390(rela.Info & 0xffff) + + if symNo == 0 || symNo > uint64(len(symbols)) { + continue + } + sym := &symbols[symNo-1] + if !canApplyRelocation(sym) { + continue + } + + switch t { + case R_390_64: + if rela.Off+8 >= uint64(len(dst)) || rela.Addend < 0 { + continue + } + val64 := sym.Value + uint64(rela.Addend) + f.ByteOrder.PutUint64(dst[rela.Off:rela.Off+8], val64) + case R_390_32: + if rela.Off+4 >= uint64(len(dst)) || rela.Addend < 0 { + continue + } + val32 := uint32(sym.Value) + uint32(rela.Addend) + f.ByteOrder.PutUint32(dst[rela.Off:rela.Off+4], val32) + } + } + + return nil +} + +func (f *File) applyRelocationsSPARC64(dst []byte, rels []byte) error { + // 24 is the size of Rela64. + if len(rels)%24 != 0 { + return errors.New("length of relocation section is not a multiple of 24") + } + + symbols, _, err := f.getSymbols(SHT_SYMTAB) + if err != nil { + return err + } + + b := bytes.NewReader(rels) + var rela Rela64 + + for b.Len() > 0 { + binary.Read(b, f.ByteOrder, &rela) + symNo := rela.Info >> 32 + t := R_SPARC(rela.Info & 0xff) + + if symNo == 0 || symNo > uint64(len(symbols)) { + continue + } + sym := &symbols[symNo-1] + if !canApplyRelocation(sym) { + continue + } + + switch t { + case R_SPARC_64, R_SPARC_UA64: + if rela.Off+8 >= uint64(len(dst)) || rela.Addend < 0 { + continue + } + val64 := sym.Value + uint64(rela.Addend) + f.ByteOrder.PutUint64(dst[rela.Off:rela.Off+8], val64) + case R_SPARC_32, R_SPARC_UA32: + if rela.Off+4 >= uint64(len(dst)) || rela.Addend < 0 { + continue + } + val32 := uint32(sym.Value) + uint32(rela.Addend) + f.ByteOrder.PutUint32(dst[rela.Off:rela.Off+4], val32) + } + } + + return nil +} + func (f *File) DWARF() (*dwarf.Data, error) { dwarfSuffix := func(s *Section) string { switch { @@ -666,20 +1376,11 @@ func (f *File) DWARF() (*dwarf.Data, error) { return nil, err } - if len(b) >= 12 && string(b[:4]) == "ZLIB" { - dlen := binary.BigEndian.Uint64(b[4:12]) - dbuf := make([]byte, dlen) - r, err := zlib.NewReader(bytes.NewBuffer(b[12:])) - if err != nil { - return nil, err - } - if _, err := io.ReadFull(r, dbuf); err != nil { - return nil, err - } - if err := r.Close(); err != nil { - return nil, err - } - b = dbuf + if f.Type == ET_EXEC { + // Do not apply relocations to DWARF sections for ET_EXEC binaries. + // Relocations should already be applied, and .rela sections may + // contain incorrect data. + return b, nil } for _, r := range f.Sections { @@ -701,9 +1402,8 @@ func (f *File) DWARF() (*dwarf.Data, error) { return b, nil } - // There are many other DWARF sections, but these - // are the ones the debug/dwarf package uses. - // Don't bother loading others. + // There are many DWARf sections, but these are the ones + // the debug/dwarf package started with. var dat = map[string][]byte{"abbrev": nil, "info": nil, "str": nil, "line": nil, "ranges": nil} for i, s := range f.Sections { suffix := dwarfSuffix(s) @@ -725,10 +1425,14 @@ func (f *File) DWARF() (*dwarf.Data, error) { return nil, err } - // Look for DWARF4 .debug_types sections. + // Look for DWARF4 .debug_types sections and DWARF5 sections. for i, s := range f.Sections { suffix := dwarfSuffix(s) - if suffix != "types" { + if suffix == "" { + continue + } + if _, ok := dat[suffix]; ok { + // Already handled. continue } @@ -737,9 +1441,14 @@ func (f *File) DWARF() (*dwarf.Data, error) { return nil, err } - err = d.AddTypes(fmt.Sprintf("types-%d", i), b) - if err != nil { - return nil, err + if suffix == "types" { + if err := d.AddTypes(fmt.Sprintf("types-%d", i), b); err != nil { + return nil, err + } + } else { + if err := d.AddSection(".debug_"+suffix, b); err != nil { + return nil, err + } } } @@ -760,12 +1469,23 @@ func (f *File) Symbols() ([]Symbol, error) { // DynamicSymbols returns the dynamic symbol table for f. The symbols // will be listed in the order they appear in f. // -// For compatibility with Symbols, DynamicSymbols omits the null symbol at index 0. +// If f has a symbol version table, the returned [File.Symbols] will have +// initialized [Version] and Library fields. +// +// For compatibility with [File.Symbols], [File.DynamicSymbols] omits the null symbol at index 0. // After retrieving the symbols as symtab, an externally supplied index x // corresponds to symtab[x-1], not symtab[x]. func (f *File) DynamicSymbols() ([]Symbol, error) { - sym, _, err := f.getSymbols(SHT_DYNSYM) - return sym, err + sym, str, err := f.getSymbols(SHT_DYNSYM) + if err != nil { + return nil, err + } + if f.gnuVersionInit(str) { + for i := range sym { + sym[i].Library, sym[i].Version = f.gnuVersion(i) + } + } + return sym, nil } type ImportedSymbol struct { @@ -788,7 +1508,8 @@ func (f *File) ImportedSymbols() ([]ImportedSymbol, error) { for i, s := range sym { if ST_BIND(s.Info) == STB_GLOBAL && s.Section == SHN_UNDEF { all = append(all, ImportedSymbol{Name: s.Name}) - f.gnuVersion(i, &all[len(all)-1]) + sym := &all[len(all)-1] + sym.Library, sym.Version = f.gnuVersion(i) } } return all, nil @@ -801,11 +1522,16 @@ type verneed struct { // gnuVersionInit parses the GNU version tables // for use by calls to gnuVersion. -func (f *File) gnuVersionInit(str []byte) { +func (f *File) gnuVersionInit(str []byte) bool { + if f.gnuNeed != nil { + // Already initialized + return true + } + // Accumulate verneed information. vn := f.SectionByType(SHT_GNU_VERNEED) if vn == nil { - return + return false } d, _ := vn.Data() @@ -860,29 +1586,33 @@ func (f *File) gnuVersionInit(str []byte) { // Versym parallels symbol table, indexing into verneed. vs := f.SectionByType(SHT_GNU_VERSYM) if vs == nil { - return + return false } d, _ = vs.Data() f.gnuNeed = need f.gnuVersym = d + return true } // gnuVersion adds Library and Version information to sym, // which came from offset i of the symbol table. -func (f *File) gnuVersion(i int, sym *ImportedSymbol) { - // Each entry is two bytes. +func (f *File) gnuVersion(i int) (library string, version string) { + // Each entry is two bytes; skip undef entry at beginning. i = (i + 1) * 2 if i >= len(f.gnuVersym) { return } - j := int(f.ByteOrder.Uint16(f.gnuVersym[i:])) + s := f.gnuVersym[i:] + if len(s) < 2 { + return + } + j := int(f.ByteOrder.Uint16(s)) if j < 2 || j >= len(f.gnuNeed) { return } n := &f.gnuNeed[j] - sym.Library = n.File - sym.Version = n.Name + return n.File, n.Name } // ImportedLibraries returns the names of all libraries @@ -892,69 +1622,55 @@ func (f *File) ImportedLibraries() ([]string, error) { return f.DynString(DT_NEEDED) } -func (f *File) parseDynTags() error { - s := f.SectionByType(SHT_DYNAMIC) - if s == nil { - return nil // nothing to do - } - - var m []DynTagValue - d, err := s.Data() - if err != nil { - return err - } - //fmt.Printf("%+v\nd: %x len(%d)\n", s, d, len(d)) - - r := bytes.NewBuffer(d) - for { - var t, v uint64 - if err := binary.Read(r, f.ByteOrder, &t); err != nil { - if err == io.EOF { - break - } else if err != nil { - return err - } - } - if err := binary.Read(r, f.ByteOrder, &v); err != nil { - if err == io.EOF { - break - } else if err != nil { - return err - } - } - m = append(m, DynTagValue{Tag: DynTag(t), Value: v}) - //fmt.Printf("%x -> %x\n", t, v) - } - f.DynTags = m - return nil -} - // DynString returns the strings listed for the given tag in the file's dynamic // section. // -// The tag must be one that takes string values: DT_NEEDED, DT_SONAME, DT_RPATH, or -// DT_RUNPATH. +// The tag must be one that takes string values: [DT_NEEDED], [DT_SONAME], [DT_RPATH], or +// [DT_RUNPATH]. func (f *File) DynString(tag DynTag) ([]string, error) { switch tag { case DT_NEEDED, DT_SONAME, DT_RPATH, DT_RUNPATH: default: return nil, fmt.Errorf("non-string-valued tag %v", tag) } - ds := f.SectionByType(SHT_DYNAMIC) if ds == nil { // not dynamic, so no libraries return nil, nil } + d, err := ds.Data() + if err != nil { + return nil, err + } + + dynSize := 8 + if f.Class == ELFCLASS64 { + dynSize = 16 + } + if len(d)%dynSize != 0 { + return nil, errors.New("length of dynamic section is not a multiple of dynamic entry size") + } str, err := f.stringTable(ds.Link) if err != nil { return nil, err } var all []string - for _, taggedValue := range f.DynTags { - if taggedValue.Tag == tag { - s, ok := getString(str, int(taggedValue.Value)) + for len(d) > 0 { + var t DynTag + var v uint64 + switch f.Class { + case ELFCLASS32: + t = DynTag(f.ByteOrder.Uint32(d[0:4])) + v = uint64(f.ByteOrder.Uint32(d[4:8])) + d = d[8:] + case ELFCLASS64: + t = DynTag(f.ByteOrder.Uint64(d[0:8])) + v = f.ByteOrder.Uint64(d[8:16]) + d = d[16:] + } + if t == tag { + s, ok := getString(str, int(v)) if ok { all = append(all, s) } @@ -962,3 +1678,51 @@ func (f *File) DynString(tag DynTag) ([]string, error) { } return all, nil } + +// DynValue returns the values listed for the given tag in the file's dynamic +// section. +func (f *File) DynValue(tag DynTag) ([]uint64, error) { + ds := f.SectionByType(SHT_DYNAMIC) + if ds == nil { + return nil, nil + } + d, err := ds.Data() + if err != nil { + return nil, err + } + + dynSize := 8 + if f.Class == ELFCLASS64 { + dynSize = 16 + } + if len(d)%dynSize != 0 { + return nil, errors.New("length of dynamic section is not a multiple of dynamic entry size") + } + + // Parse the .dynamic section as a string of bytes. + var vals []uint64 + for len(d) > 0 { + var t DynTag + var v uint64 + switch f.Class { + case ELFCLASS32: + t = DynTag(f.ByteOrder.Uint32(d[0:4])) + v = uint64(f.ByteOrder.Uint32(d[4:8])) + d = d[8:] + case ELFCLASS64: + t = DynTag(f.ByteOrder.Uint64(d[0:8])) + v = f.ByteOrder.Uint64(d[8:16]) + d = d[16:] + } + if t == tag { + vals = append(vals, v) + } + } + return vals, nil +} + +type nobitsSectionReader struct{} + +func (*nobitsSectionReader) ReadAt(p []byte, off int64) (n int, err error) { + return 0, errors.New("unexpected read from SHT_NOBITS section") +} diff --git a/elf/file_bytes.go b/elf/file_bytes.go new file mode 100644 index 0000000..7d4050c --- /dev/null +++ b/elf/file_bytes.go @@ -0,0 +1,209 @@ +package elf + +import ( + "bufio" + "bytes" + "encoding/binary" + "io/ioutil" + "log" +) + +// Bytes - returns the bytes of an Elf file +func (f *File) Bytes() ([]byte, error) { + + bytesWritten := uint64(0) + buf := bytes.NewBuffer(nil) + w := bufio.NewWriter(buf) + + // Write Elf Magic + w.WriteByte('\x7f') + w.WriteByte('E') + w.WriteByte('L') + w.WriteByte('F') + bytesWritten += 4 + + w.WriteByte(byte(f.Class)) + w.WriteByte(byte(f.Data)) + w.WriteByte(byte(f.Version)) + w.WriteByte(byte(f.OSABI)) + w.WriteByte(byte(f.ABIVersion)) + // ident[EI_PAD] ( 7 bytes ) + w.Write([]byte{0, 0, 0, 0, 0, 0, 0}) + bytesWritten += 12 + + binary.Write(w, f.ByteOrder, uint16(f.Type)) + binary.Write(w, f.ByteOrder, uint16(f.Machine)) + binary.Write(w, f.ByteOrder, uint32(f.Version)) + bytesWritten += 8 + + switch f.Class { + case ELFCLASS32: + binary.Write(w, f.ByteOrder, uint32(f.Entry)) + binary.Write(w, f.ByteOrder, uint32(f.ELFHeader32.Phoff)) + binary.Write(w, f.ByteOrder, int32(f.ELFHeader32.Shoff)) + binary.Write(w, f.ByteOrder, uint32(f.ELFHeader32.Flags)) + binary.Write(w, f.ByteOrder, uint16(f.ELFHeader32.Ehsize)) + binary.Write(w, f.ByteOrder, uint16(f.ELFHeader32.Phentsize)) + binary.Write(w, f.ByteOrder, uint16(len(f.Progs))) + binary.Write(w, f.ByteOrder, uint16(f.ELFHeader32.Shentsize)) + binary.Write(w, f.ByteOrder, uint16(len(f.Sections))) + binary.Write(w, f.ByteOrder, uint16(f.ELFHeader32.Shstrndx)) + bytesWritten += 28 + case ELFCLASS64: + binary.Write(w, f.ByteOrder, uint64(f.Entry)) + binary.Write(w, f.ByteOrder, uint64(f.ELFHeader64.Phoff)) + binary.Write(w, f.ByteOrder, int64(f.ELFHeader64.Shoff)) + binary.Write(w, f.ByteOrder, uint32(f.ELFHeader64.Flags)) + binary.Write(w, f.ByteOrder, uint16(f.ELFHeader64.Ehsize)) + binary.Write(w, f.ByteOrder, uint16(f.ELFHeader64.Phentsize)) + binary.Write(w, f.ByteOrder, uint16(len(f.Progs))) + binary.Write(w, f.ByteOrder, uint16(f.ELFHeader64.Shentsize)) + binary.Write(w, f.ByteOrder, uint16(len(f.Sections))) + binary.Write(w, f.ByteOrder, uint16(f.ELFHeader64.Shstrndx)) + bytesWritten += 40 + } + + // Program Header + for _, p := range f.Progs { + // Type (segment) + binary.Write(w, f.ByteOrder, uint32(p.Type)) + bytesWritten += 4 + + switch f.Class { + case ELFCLASS32: + binary.Write(w, f.ByteOrder, uint32(p.Off)) + binary.Write(w, f.ByteOrder, uint32(p.Vaddr)) + binary.Write(w, f.ByteOrder, uint32(p.Paddr)) + binary.Write(w, f.ByteOrder, uint32(p.Filesz)) + binary.Write(w, f.ByteOrder, uint32(p.Memsz)) + binary.Write(w, f.ByteOrder, uint32(p.Flags)) + binary.Write(w, f.ByteOrder, uint32(p.Align)) + bytesWritten += 28 + case ELFCLASS64: + binary.Write(w, f.ByteOrder, uint32(p.Flags)) + binary.Write(w, f.ByteOrder, uint64(p.Off)) + binary.Write(w, f.ByteOrder, uint64(p.Vaddr)) + binary.Write(w, f.ByteOrder, uint64(p.Paddr)) + binary.Write(w, f.ByteOrder, uint64(p.Filesz)) + binary.Write(w, f.ByteOrder, uint64(p.Memsz)) + binary.Write(w, f.ByteOrder, uint64(p.Align)) + bytesWritten += 52 + } + } + + for _, s := range f.Sections { + + //log.Printf("Writing section: %s type: %+v\n", s.Name, s.Type) + //log.Printf("written: %x offset: %x\n", bytesWritten, s.Offset) + + if s.Type == SHT_NULL || s.Type == SHT_NOBITS || s.FileSize == 0 { + continue + } + + if bytesWritten > s.Offset { + log.Printf("Overlapping Sections in Generated Elf: %+v\n", s.Name) + continue + } + if s.Offset != 0 && bytesWritten < s.Offset { + pad := make([]byte, s.Offset-bytesWritten) + w.Write(pad) + //log.Printf("Padding before section %s at %x: length:%x to:%x\n", s.Name, bytesWritten, len(pad), s.Offset) + bytesWritten += uint64(len(pad)) + } + + slen := 0 + switch s.Type { + case SHT_DYNAMIC: + for tag, value := range f.DynamicTags { + //log.Printf("writing %d (%x) -> %d (%x)\n", tag, tag, value, value) + switch f.Class { + case ELFCLASS32: + binary.Write(w, f.ByteOrder, uint32(tag)) + binary.Write(w, f.ByteOrder, uint32(value)) + bytesWritten += 8 + case ELFCLASS64: + binary.Write(w, f.ByteOrder, uint64(tag)) + binary.Write(w, f.ByteOrder, uint64(value)) + bytesWritten += 16 + } + } + default: + section, err := ioutil.ReadAll(s.Open()) + if err != nil { + return nil, err + } + binary.Write(w, f.ByteOrder, section) + slen = len(section) + //log.Printf("Wrote %s section at %x, length %x\n", s.Name, bytesWritten, slen) + bytesWritten += uint64(slen) + } + + if s.Type == SHT_PROGBITS && len(f.Injection) > 0 && s.Size-uint64(slen) >= uint64(len(f.Injection)) { + binary.Write(w, f.ByteOrder, f.Injection) + bytesWritten += uint64(len(f.Injection)) + } + w.Flush() + } + + // Pad to Section Header Table + switch f.Class { + case ELFCLASS32: + if bytesWritten < uint64(f.ELFHeader32.Shoff) { + pad := make([]byte, uint64(f.ELFHeader32.Shoff)-bytesWritten) + w.Write(pad) + //log.Printf("Padding before SHT at %x: length:%x to:%x\n", bytesWritten, len(pad), f.ELFHeader32.Shoff) + bytesWritten += uint64(len(pad)) + } + case ELFCLASS64: + if bytesWritten < uint64(f.ELFHeader64.Shoff) { + pad := make([]byte, uint64(f.ELFHeader64.Shoff)-bytesWritten) + w.Write(pad) + //log.Printf("Padding before SHT at %x: length:%x to:%x\n", bytesWritten, len(pad), f.ELFHeader32.Shoff) + bytesWritten += uint64(len(pad)) + } + } + + // Write Section Header Table + for _, s := range f.Sections { + switch f.Class { + case ELFCLASS32: + binary.Write(w, f.ByteOrder, &Section32{ + Name: uint32(s.Index), + Type: uint32(s.Type), + Flags: uint32(s.Flags), + Addr: uint32(s.Addr), + Off: uint32(s.Offset), + Size: uint32(s.Size), + Link: s.Link, + Info: s.Info, + Addralign: uint32(s.Addralign), + Entsize: uint32(s.Entsize), + }) + case ELFCLASS64: + binary.Write(w, f.ByteOrder, &Section64{ + Name: uint32(s.Index), + Type: uint32(s.Type), + Flags: uint64(s.Flags), + Addr: s.Addr, + Off: s.Offset, + Size: s.Size, + Link: s.Link, + Info: s.Info, + Addralign: s.Addralign, + Entsize: s.Entsize, + }) + } + } + + // TODO: Do I have a PT_NOTE segment to add at the end? + + if len(f.InjectionEOF) > 0 { + binary.Write(w, f.ByteOrder, f.InjectionEOF) + bytesWritten += uint64(len(f.InjectionEOF)) + } + + w.Flush() + + return buf.Bytes(), nil +} + diff --git a/elf/file_exportedsymbols.go b/elf/file_exportedsymbols.go new file mode 100644 index 0000000..d4d0cca --- /dev/null +++ b/elf/file_exportedsymbols.go @@ -0,0 +1,21 @@ +package elf + +// ExportedSymbols returns the exported and defined dynamic symbol table for f. +// +// For compatibility with [File.DynamicSymbols], [File.ExportedSymbols] returns the same +// slice of exported [Symbol]s, with the difference being that [Symbol.Section] != [SHN_UNDEF]. +func (f *File) ExportedSymbols() ([]Symbol, error) { + + exported := make([]Symbol, 0) + symbols, err := f.DynamicSymbols() + if err != nil { + return nil, err + } + for _, s := range symbols { + if s.Section != SHN_UNDEF { + exported = append(exported, s) + } + } + + return exported, nil +} diff --git a/elf/file_test.go b/elf/file_test.go index d7c1e9f..5dd83a2 100644 --- a/elf/file_test.go +++ b/elf/file_test.go @@ -7,8 +7,11 @@ package elf import ( "bytes" "compress/gzip" + "compress/zlib" "debug/dwarf" "encoding/binary" + "errors" + "fmt" "io" "math/rand" "net" @@ -16,6 +19,8 @@ import ( "path" "reflect" "runtime" + "slices" + "strings" "testing" ) @@ -25,6 +30,7 @@ type fileTest struct { sections []SectionHeader progs []ProgHeader needed []string + symbols []Symbol } var fileTests = []fileTest{ @@ -71,6 +77,82 @@ var fileTests = []fileTest{ {PT_DYNAMIC, PF_R + PF_W, 0x60c, 0x804960c, 0x804960c, 0x98, 0x98, 0x4}, }, []string{"libc.so.6"}, + []Symbol{ + {"", 3, 0, 1, 134512852, 0, "", ""}, + {"", 3, 0, 2, 134512876, 0, "", ""}, + {"", 3, 0, 3, 134513020, 0, "", ""}, + {"", 3, 0, 4, 134513292, 0, "", ""}, + {"", 3, 0, 5, 134513480, 0, "", ""}, + {"", 3, 0, 6, 134513512, 0, "", ""}, + {"", 3, 0, 7, 134513532, 0, "", ""}, + {"", 3, 0, 8, 134513612, 0, "", ""}, + {"", 3, 0, 9, 134513996, 0, "", ""}, + {"", 3, 0, 10, 134514008, 0, "", ""}, + {"", 3, 0, 11, 134518268, 0, "", ""}, + {"", 3, 0, 12, 134518280, 0, "", ""}, + {"", 3, 0, 13, 134518284, 0, "", ""}, + {"", 3, 0, 14, 134518436, 0, "", ""}, + {"", 3, 0, 15, 134518444, 0, "", ""}, + {"", 3, 0, 16, 134518452, 0, "", ""}, + {"", 3, 0, 17, 134518456, 0, "", ""}, + {"", 3, 0, 18, 134518484, 0, "", ""}, + {"", 3, 0, 19, 0, 0, "", ""}, + {"", 3, 0, 20, 0, 0, "", ""}, + {"", 3, 0, 21, 0, 0, "", ""}, + {"", 3, 0, 22, 0, 0, "", ""}, + {"", 3, 0, 23, 0, 0, "", ""}, + {"", 3, 0, 24, 0, 0, "", ""}, + {"", 3, 0, 25, 0, 0, "", ""}, + {"", 3, 0, 26, 0, 0, "", ""}, + {"", 3, 0, 27, 0, 0, "", ""}, + {"", 3, 0, 28, 0, 0, "", ""}, + {"", 3, 0, 29, 0, 0, "", ""}, + {"crt1.c", 4, 0, 65521, 0, 0, "", ""}, + {"/usr/src/lib/csu/i386-elf/crti.S", 4, 0, 65521, 0, 0, "", ""}, + {"", 4, 0, 65521, 0, 0, "", ""}, + {"", 4, 0, 65521, 0, 0, "", ""}, + {"/usr/src/lib/csu/i386-elf/crti.S", 4, 0, 65521, 0, 0, "", ""}, + {"crtstuff.c", 4, 0, 65521, 0, 0, "", ""}, + {"__CTOR_LIST__", 1, 0, 14, 134518436, 0, "", ""}, + {"__DTOR_LIST__", 1, 0, 15, 134518444, 0, "", ""}, + {"__EH_FRAME_BEGIN__", 1, 0, 12, 134518280, 0, "", ""}, + {"__JCR_LIST__", 1, 0, 16, 134518452, 0, "", ""}, + {"p.0", 1, 0, 11, 134518276, 0, "", ""}, + {"completed.1", 1, 0, 18, 134518484, 1, "", ""}, + {"__do_global_dtors_aux", 2, 0, 8, 134513760, 0, "", ""}, + {"object.2", 1, 0, 18, 134518488, 24, "", ""}, + {"frame_dummy", 2, 0, 8, 134513836, 0, "", ""}, + {"crtstuff.c", 4, 0, 65521, 0, 0, "", ""}, + {"__CTOR_END__", 1, 0, 14, 134518440, 0, "", ""}, + {"__DTOR_END__", 1, 0, 15, 134518448, 0, "", ""}, + {"__FRAME_END__", 1, 0, 12, 134518280, 0, "", ""}, + {"__JCR_END__", 1, 0, 16, 134518452, 0, "", ""}, + {"__do_global_ctors_aux", 2, 0, 8, 134513960, 0, "", ""}, + {"/usr/src/lib/csu/i386-elf/crtn.S", 4, 0, 65521, 0, 0, "", ""}, + {"", 4, 0, 65521, 0, 0, "", ""}, + {"", 4, 0, 65521, 0, 0, "", ""}, + {"/usr/src/lib/csu/i386-elf/crtn.S", 4, 0, 65521, 0, 0, "", ""}, + {"hello.c", 4, 0, 65521, 0, 0, "", ""}, + {"printf", 18, 0, 0, 0, 44, "", ""}, + {"_DYNAMIC", 17, 0, 65521, 134518284, 0, "", ""}, + {"__dso_handle", 17, 2, 11, 134518272, 0, "", ""}, + {"_init", 18, 0, 6, 134513512, 0, "", ""}, + {"environ", 17, 0, 18, 134518512, 4, "", ""}, + {"__deregister_frame_info", 32, 0, 0, 0, 0, "", ""}, + {"__progname", 17, 0, 11, 134518268, 4, "", ""}, + {"_start", 18, 0, 8, 134513612, 145, "", ""}, + {"__bss_start", 16, 0, 65521, 134518484, 0, "", ""}, + {"main", 18, 0, 8, 134513912, 46, "", ""}, + {"_init_tls", 18, 0, 0, 0, 5, "", ""}, + {"_fini", 18, 0, 9, 134513996, 0, "", ""}, + {"atexit", 18, 0, 0, 0, 43, "", ""}, + {"_edata", 16, 0, 65521, 134518484, 0, "", ""}, + {"_GLOBAL_OFFSET_TABLE_", 17, 0, 65521, 134518456, 0, "", ""}, + {"_end", 16, 0, 65521, 134518516, 0, "", ""}, + {"exit", 18, 0, 0, 0, 68, "", ""}, + {"_Jv_RegisterClasses", 32, 0, 0, 0, 0, "", ""}, + {"__register_frame_info", 32, 0, 0, 0, 0, "", ""}, + }, }, { "testdata/gcc-amd64-linux-exec", @@ -125,6 +207,81 @@ var fileTests = []fileTest{ {PT_LOOS + 0x474E551, PF_R + PF_W, 0x0, 0x0, 0x0, 0x0, 0x0, 0x8}, }, []string{"libc.so.6"}, + []Symbol{ + {"", 3, 0, 1, 4194816, 0, "", ""}, + {"", 3, 0, 2, 4194844, 0, "", ""}, + {"", 3, 0, 3, 4194880, 0, "", ""}, + {"", 3, 0, 4, 4194920, 0, "", ""}, + {"", 3, 0, 5, 4194952, 0, "", ""}, + {"", 3, 0, 6, 4195048, 0, "", ""}, + {"", 3, 0, 7, 4195110, 0, "", ""}, + {"", 3, 0, 8, 4195120, 0, "", ""}, + {"", 3, 0, 9, 4195152, 0, "", ""}, + {"", 3, 0, 10, 4195176, 0, "", ""}, + {"", 3, 0, 11, 4195224, 0, "", ""}, + {"", 3, 0, 12, 4195248, 0, "", ""}, + {"", 3, 0, 13, 4195296, 0, "", ""}, + {"", 3, 0, 14, 4195732, 0, "", ""}, + {"", 3, 0, 15, 4195748, 0, "", ""}, + {"", 3, 0, 16, 4195768, 0, "", ""}, + {"", 3, 0, 17, 4195808, 0, "", ""}, + {"", 3, 0, 18, 6293128, 0, "", ""}, + {"", 3, 0, 19, 6293144, 0, "", ""}, + {"", 3, 0, 20, 6293160, 0, "", ""}, + {"", 3, 0, 21, 6293168, 0, "", ""}, + {"", 3, 0, 22, 6293584, 0, "", ""}, + {"", 3, 0, 23, 6293592, 0, "", ""}, + {"", 3, 0, 24, 6293632, 0, "", ""}, + {"", 3, 0, 25, 6293656, 0, "", ""}, + {"", 3, 0, 26, 0, 0, "", ""}, + {"", 3, 0, 27, 0, 0, "", ""}, + {"", 3, 0, 28, 0, 0, "", ""}, + {"", 3, 0, 29, 0, 0, "", ""}, + {"", 3, 0, 30, 0, 0, "", ""}, + {"", 3, 0, 31, 0, 0, "", ""}, + {"", 3, 0, 32, 0, 0, "", ""}, + {"", 3, 0, 33, 0, 0, "", ""}, + {"init.c", 4, 0, 65521, 0, 0, "", ""}, + {"initfini.c", 4, 0, 65521, 0, 0, "", ""}, + {"call_gmon_start", 2, 0, 13, 4195340, 0, "", ""}, + {"crtstuff.c", 4, 0, 65521, 0, 0, "", ""}, + {"__CTOR_LIST__", 1, 0, 18, 6293128, 0, "", ""}, + {"__DTOR_LIST__", 1, 0, 19, 6293144, 0, "", ""}, + {"__JCR_LIST__", 1, 0, 20, 6293160, 0, "", ""}, + {"__do_global_dtors_aux", 2, 0, 13, 4195376, 0, "", ""}, + {"completed.6183", 1, 0, 25, 6293656, 1, "", ""}, + {"p.6181", 1, 0, 24, 6293648, 0, "", ""}, + {"frame_dummy", 2, 0, 13, 4195440, 0, "", ""}, + {"crtstuff.c", 4, 0, 65521, 0, 0, "", ""}, + {"__CTOR_END__", 1, 0, 18, 6293136, 0, "", ""}, + {"__DTOR_END__", 1, 0, 19, 6293152, 0, "", ""}, + {"__FRAME_END__", 1, 0, 17, 4195968, 0, "", ""}, + {"__JCR_END__", 1, 0, 20, 6293160, 0, "", ""}, + {"__do_global_ctors_aux", 2, 0, 13, 4195680, 0, "", ""}, + {"initfini.c", 4, 0, 65521, 0, 0, "", ""}, + {"hello.c", 4, 0, 65521, 0, 0, "", ""}, + {"_GLOBAL_OFFSET_TABLE_", 1, 2, 23, 6293592, 0, "", ""}, + {"__init_array_end", 0, 2, 18, 6293124, 0, "", ""}, + {"__init_array_start", 0, 2, 18, 6293124, 0, "", ""}, + {"_DYNAMIC", 1, 2, 21, 6293168, 0, "", ""}, + {"data_start", 32, 0, 24, 6293632, 0, "", ""}, + {"__libc_csu_fini", 18, 0, 13, 4195520, 2, "", ""}, + {"_start", 18, 0, 13, 4195296, 0, "", ""}, + {"__gmon_start__", 32, 0, 0, 0, 0, "", ""}, + {"_Jv_RegisterClasses", 32, 0, 0, 0, 0, "", ""}, + {"puts@@GLIBC_2.2.5", 18, 0, 0, 0, 396, "", ""}, + {"_fini", 18, 0, 14, 4195732, 0, "", ""}, + {"__libc_start_main@@GLIBC_2.2.5", 18, 0, 0, 0, 450, "", ""}, + {"_IO_stdin_used", 17, 0, 15, 4195748, 4, "", ""}, + {"__data_start", 16, 0, 24, 6293632, 0, "", ""}, + {"__dso_handle", 17, 2, 24, 6293640, 0, "", ""}, + {"__libc_csu_init", 18, 0, 13, 4195536, 137, "", ""}, + {"__bss_start", 16, 0, 65521, 6293656, 0, "", ""}, + {"_end", 16, 0, 65521, 6293664, 0, "", ""}, + {"_edata", 16, 0, 65521, 6293656, 0, "", ""}, + {"main", 18, 0, 13, 4195480, 27, "", ""}, + {"_init", 18, 0, 11, 4195224, 0, "", ""}, + }, }, { "testdata/hello-world-core.gz", @@ -150,6 +307,7 @@ var fileTests = []fileTest{ {Type: PT_LOAD, Flags: PF_X + PF_R, Off: 0x3b000, Vaddr: 0xffffffffff600000, Paddr: 0x0, Filesz: 0x1000, Memsz: 0x1000, Align: 0x1000}, }, nil, + nil, }, { "testdata/compressed-32.obj", @@ -179,6 +337,23 @@ var fileTests = []fileTest{ }, []ProgHeader{}, nil, + []Symbol{ + {"hello.c", 4, 0, 65521, 0, 0, "", ""}, + {"", 3, 0, 1, 0, 0, "", ""}, + {"", 3, 0, 3, 0, 0, "", ""}, + {"", 3, 0, 4, 0, 0, "", ""}, + {"", 3, 0, 5, 0, 0, "", ""}, + {"", 3, 0, 6, 0, 0, "", ""}, + {"", 3, 0, 8, 0, 0, "", ""}, + {"", 3, 0, 9, 0, 0, "", ""}, + {"", 3, 0, 11, 0, 0, "", ""}, + {"", 3, 0, 13, 0, 0, "", ""}, + {"", 3, 0, 15, 0, 0, "", ""}, + {"", 3, 0, 16, 0, 0, "", ""}, + {"", 3, 0, 14, 0, 0, "", ""}, + {"main", 18, 0, 1, 0, 23, "", ""}, + {"puts", 16, 0, 0, 0, 0, "", ""}, + }, }, { "testdata/compressed-64.obj", @@ -208,6 +383,69 @@ var fileTests = []fileTest{ }, []ProgHeader{}, nil, + []Symbol{ + {"hello.c", 4, 0, 65521, 0, 0, "", ""}, + {"", 3, 0, 1, 0, 0, "", ""}, + {"", 3, 0, 3, 0, 0, "", ""}, + {"", 3, 0, 4, 0, 0, "", ""}, + {"", 3, 0, 5, 0, 0, "", ""}, + {"", 3, 0, 6, 0, 0, "", ""}, + {"", 3, 0, 8, 0, 0, "", ""}, + {"", 3, 0, 9, 0, 0, "", ""}, + {"", 3, 0, 11, 0, 0, "", ""}, + {"", 3, 0, 13, 0, 0, "", ""}, + {"", 3, 0, 15, 0, 0, "", ""}, + {"", 3, 0, 16, 0, 0, "", ""}, + {"", 3, 0, 14, 0, 0, "", ""}, + {"main", 18, 0, 1, 0, 27, "", ""}, + {"puts", 16, 0, 0, 0, 0, "", ""}, + }, + }, + { + "testdata/go-relocation-test-gcc620-sparc64.obj", + FileHeader{Class: ELFCLASS64, Data: ELFDATA2MSB, Version: EV_CURRENT, OSABI: ELFOSABI_NONE, ABIVersion: 0x0, ByteOrder: binary.BigEndian, Type: ET_REL, Machine: EM_SPARCV9, Entry: 0x0}, + []SectionHeader{ + {"", SHT_NULL, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {".text", SHT_PROGBITS, SHF_ALLOC + SHF_EXECINSTR, 0x0, 0x40, 0x2c, 0x0, 0x0, 0x4, 0x0, 0x2c}, + {".rela.text", SHT_RELA, SHF_INFO_LINK, 0x0, 0xa58, 0x48, 0x13, 0x1, 0x8, 0x18, 0x48}, + {".data", SHT_PROGBITS, SHF_WRITE + SHF_ALLOC, 0x0, 0x6c, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0}, + {".bss", SHT_NOBITS, SHF_WRITE + SHF_ALLOC, 0x0, 0x6c, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0}, + {".rodata", SHT_PROGBITS, SHF_ALLOC, 0x0, 0x70, 0xd, 0x0, 0x0, 0x8, 0x0, 0xd}, + {".debug_info", SHT_PROGBITS, 0x0, 0x0, 0x7d, 0x346, 0x0, 0x0, 0x1, 0x0, 0x346}, + {".rela.debug_info", SHT_RELA, SHF_INFO_LINK, 0x0, 0xaa0, 0x630, 0x13, 0x6, 0x8, 0x18, 0x630}, + {".debug_abbrev", SHT_PROGBITS, 0x0, 0x0, 0x3c3, 0xf1, 0x0, 0x0, 0x1, 0x0, 0xf1}, + {".debug_aranges", SHT_PROGBITS, 0x0, 0x0, 0x4b4, 0x30, 0x0, 0x0, 0x1, 0x0, 0x30}, + {".rela.debug_aranges", SHT_RELA, SHF_INFO_LINK, 0x0, 0x10d0, 0x30, 0x13, 0x9, 0x8, 0x18, 0x30}, + {".debug_line", SHT_PROGBITS, 0x0, 0x0, 0x4e4, 0xd3, 0x0, 0x0, 0x1, 0x0, 0xd3}, + {".rela.debug_line", SHT_RELA, SHF_INFO_LINK, 0x0, 0x1100, 0x18, 0x13, 0xb, 0x8, 0x18, 0x18}, + {".debug_str", SHT_PROGBITS, SHF_MERGE + SHF_STRINGS, 0x0, 0x5b7, 0x2a3, 0x0, 0x0, 0x1, 0x1, 0x2a3}, + {".comment", SHT_PROGBITS, SHF_MERGE + SHF_STRINGS, 0x0, 0x85a, 0x2e, 0x0, 0x0, 0x1, 0x1, 0x2e}, + {".note.GNU-stack", SHT_PROGBITS, 0x0, 0x0, 0x888, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0}, + {".debug_frame", SHT_PROGBITS, 0x0, 0x0, 0x888, 0x38, 0x0, 0x0, 0x8, 0x0, 0x38}, + {".rela.debug_frame", SHT_RELA, SHF_INFO_LINK, 0x0, 0x1118, 0x30, 0x13, 0x10, 0x8, 0x18, 0x30}, + {".shstrtab", SHT_STRTAB, 0x0, 0x0, 0x1148, 0xb3, 0x0, 0x0, 0x1, 0x0, 0xb3}, + {".symtab", SHT_SYMTAB, 0x0, 0x0, 0x8c0, 0x180, 0x14, 0xe, 0x8, 0x18, 0x180}, + {".strtab", SHT_STRTAB, 0x0, 0x0, 0xa40, 0x13, 0x0, 0x0, 0x1, 0x0, 0x13}, + }, + []ProgHeader{}, + nil, + []Symbol{ + {"hello.c", 4, 0, 65521, 0, 0, "", ""}, + {"", 3, 0, 1, 0, 0, "", ""}, + {"", 3, 0, 3, 0, 0, "", ""}, + {"", 3, 0, 4, 0, 0, "", ""}, + {"", 3, 0, 5, 0, 0, "", ""}, + {"", 3, 0, 6, 0, 0, "", ""}, + {"", 3, 0, 8, 0, 0, "", ""}, + {"", 3, 0, 9, 0, 0, "", ""}, + {"", 3, 0, 11, 0, 0, "", ""}, + {"", 3, 0, 13, 0, 0, "", ""}, + {"", 3, 0, 15, 0, 0, "", ""}, + {"", 3, 0, 16, 0, 0, "", ""}, + {"", 3, 0, 14, 0, 0, "", ""}, + {"main", 18, 0, 1, 0, 44, "", ""}, + {"puts", 16, 0, 0, 0, 0, "", ""}, + }, }, } @@ -230,7 +468,7 @@ func TestOpen(t *testing.T) { continue } defer f.Close() - if !reflect.DeepEqual(f.FileHeader, tt.hdr) { + if f.FileHeader != tt.hdr { t.Errorf("open %s:\n\thave %#v\n\twant %#v\n", tt.file, f.FileHeader, tt.hdr) continue } @@ -238,18 +476,18 @@ func TestOpen(t *testing.T) { if i >= len(tt.sections) { break } - sh := &tt.sections[i] - if !reflect.DeepEqual(&s.SectionHeader, sh) { - t.Errorf("open %s, section %d:\n\thave %#v\n\twant %#v\n", tt.file, i, &s.SectionHeader, sh) + sh := tt.sections[i] + if s.SectionHeader != sh { + t.Errorf("open %s, section %d:\n\thave %#v\n\twant %#v\n", tt.file, i, s.SectionHeader, sh) } } for i, p := range f.Progs { if i >= len(tt.progs) { break } - ph := &tt.progs[i] - if !reflect.DeepEqual(&p.ProgHeader, ph) { - t.Errorf("open %s, program %d:\n\thave %#v\n\twant %#v\n", tt.file, i, &p.ProgHeader, ph) + ph := tt.progs[i] + if p.ProgHeader != ph { + t.Errorf("open %s, program %d:\n\thave %#v\n\twant %#v\n", tt.file, i, p.ProgHeader, ph) } } tn := len(tt.sections) @@ -270,6 +508,22 @@ func TestOpen(t *testing.T) { if !reflect.DeepEqual(tl, fl) { t.Errorf("open %s: DT_NEEDED = %v, want %v", tt.file, tl, fl) } + symbols, err := f.Symbols() + if tt.symbols == nil { + if !errors.Is(err, ErrNoSymbols) { + t.Errorf("open %s: Symbols() expected ErrNoSymbols, have nil", tt.file) + } + if symbols != nil { + t.Errorf("open %s: Symbols() expected no symbols, have %v", tt.file, symbols) + } + } else { + if err != nil { + t.Errorf("open %s: Symbols() unexpected error %v", tt.file, err) + } + if !slices.Equal(symbols, tt.symbols) { + t.Errorf("open %s: Symbols() = %v, want %v", tt.file, symbols, tt.symbols) + } + } } } @@ -293,6 +547,7 @@ func decompress(gz string) (io.ReaderAt, error) { type relocationTestEntry struct { entryNumber int entry *dwarf.Entry + pcRanges [][2]uint64 } type relocationTest struct { @@ -304,367 +559,481 @@ var relocationTests = []relocationTest{ { "testdata/go-relocation-test-gcc441-x86-64.obj", []relocationTestEntry{ - {0, &dwarf.Entry{ - Offset: 0xb, - Tag: dwarf.TagCompileUnit, - Children: true, - Field: []dwarf.Field{ - {Attr: dwarf.AttrProducer, Val: "GNU C 4.4.1", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLanguage, Val: int64(1), Class: dwarf.ClassConstant}, - {Attr: dwarf.AttrName, Val: "go-relocation-test.c", Class: dwarf.ClassString}, - {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, - {Attr: dwarf.AttrHighpc, Val: uint64(0x6), Class: dwarf.ClassAddress}, - {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + { + entry: &dwarf.Entry{ + Offset: 0xb, + Tag: dwarf.TagCompileUnit, + Children: true, + Field: []dwarf.Field{ + {Attr: dwarf.AttrProducer, Val: "GNU C 4.4.1", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLanguage, Val: int64(1), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrName, Val: "go-relocation-test.c", Class: dwarf.ClassString}, + {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, + {Attr: dwarf.AttrHighpc, Val: uint64(0x6), Class: dwarf.ClassAddress}, + {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + }, }, - }}, + pcRanges: [][2]uint64{{0x0, 0x6}}, + }, }, }, { "testdata/go-relocation-test-gcc441-x86.obj", []relocationTestEntry{ - {0, &dwarf.Entry{ - Offset: 0xb, - Tag: dwarf.TagCompileUnit, - Children: true, - Field: []dwarf.Field{ - {Attr: dwarf.AttrProducer, Val: "GNU C 4.4.1", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLanguage, Val: int64(1), Class: dwarf.ClassConstant}, - {Attr: dwarf.AttrName, Val: "t.c", Class: dwarf.ClassString}, - {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, - {Attr: dwarf.AttrHighpc, Val: uint64(0x5), Class: dwarf.ClassAddress}, - {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + { + entry: &dwarf.Entry{ + Offset: 0xb, + Tag: dwarf.TagCompileUnit, + Children: true, + Field: []dwarf.Field{ + {Attr: dwarf.AttrProducer, Val: "GNU C 4.4.1", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLanguage, Val: int64(1), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrName, Val: "t.c", Class: dwarf.ClassString}, + {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, + {Attr: dwarf.AttrHighpc, Val: uint64(0x5), Class: dwarf.ClassAddress}, + {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + }, }, - }}, + pcRanges: [][2]uint64{{0x0, 0x5}}, + }, }, }, { "testdata/go-relocation-test-gcc424-x86-64.obj", []relocationTestEntry{ - {0, &dwarf.Entry{ - Offset: 0xb, - Tag: dwarf.TagCompileUnit, - Children: true, - Field: []dwarf.Field{ - {Attr: dwarf.AttrProducer, Val: "GNU C 4.2.4 (Ubuntu 4.2.4-1ubuntu4)", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLanguage, Val: int64(1), Class: dwarf.ClassConstant}, - {Attr: dwarf.AttrName, Val: "go-relocation-test-gcc424.c", Class: dwarf.ClassString}, - {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, - {Attr: dwarf.AttrHighpc, Val: uint64(0x6), Class: dwarf.ClassAddress}, - {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + { + entry: &dwarf.Entry{ + Offset: 0xb, + Tag: dwarf.TagCompileUnit, + Children: true, + Field: []dwarf.Field{ + {Attr: dwarf.AttrProducer, Val: "GNU C 4.2.4 (Ubuntu 4.2.4-1ubuntu4)", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLanguage, Val: int64(1), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrName, Val: "go-relocation-test-gcc424.c", Class: dwarf.ClassString}, + {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, + {Attr: dwarf.AttrHighpc, Val: uint64(0x6), Class: dwarf.ClassAddress}, + {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + }, }, - }}, + pcRanges: [][2]uint64{{0x0, 0x6}}, + }, }, }, { "testdata/go-relocation-test-gcc482-aarch64.obj", []relocationTestEntry{ - {0, &dwarf.Entry{ - Offset: 0xb, - Tag: dwarf.TagCompileUnit, - Children: true, - Field: []dwarf.Field{ - {Attr: dwarf.AttrProducer, Val: "GNU C 4.8.2 -g -fstack-protector", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLanguage, Val: int64(1), Class: dwarf.ClassConstant}, - {Attr: dwarf.AttrName, Val: "go-relocation-test-gcc482.c", Class: dwarf.ClassString}, - {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, - {Attr: dwarf.AttrHighpc, Val: int64(0x24), Class: dwarf.ClassConstant}, - {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + { + entry: &dwarf.Entry{ + Offset: 0xb, + Tag: dwarf.TagCompileUnit, + Children: true, + Field: []dwarf.Field{ + {Attr: dwarf.AttrProducer, Val: "GNU C 4.8.2 -g -fstack-protector", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLanguage, Val: int64(1), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrName, Val: "go-relocation-test-gcc482.c", Class: dwarf.ClassString}, + {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, + {Attr: dwarf.AttrHighpc, Val: int64(0x24), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + }, }, - }}, + pcRanges: [][2]uint64{{0x0, 0x24}}, + }, }, }, { "testdata/go-relocation-test-gcc492-arm.obj", []relocationTestEntry{ - {0, &dwarf.Entry{ - Offset: 0xb, - Tag: dwarf.TagCompileUnit, - Children: true, - Field: []dwarf.Field{ - {Attr: dwarf.AttrProducer, Val: "GNU C 4.9.2 20141224 (prerelease) -march=armv7-a -mfloat-abi=hard -mfpu=vfpv3-d16 -mtls-dialect=gnu -g", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLanguage, Val: int64(1), Class: dwarf.ClassConstant}, - {Attr: dwarf.AttrName, Val: "go-relocation-test-gcc492.c", Class: dwarf.ClassString}, - {Attr: dwarf.AttrCompDir, Val: "/root/go/src/debug/elf/testdata", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, - {Attr: dwarf.AttrHighpc, Val: int64(0x28), Class: dwarf.ClassConstant}, - {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + { + entry: &dwarf.Entry{ + Offset: 0xb, + Tag: dwarf.TagCompileUnit, + Children: true, + Field: []dwarf.Field{ + {Attr: dwarf.AttrProducer, Val: "GNU C 4.9.2 20141224 (prerelease) -march=armv7-a -mfloat-abi=hard -mfpu=vfpv3-d16 -mtls-dialect=gnu -g", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLanguage, Val: int64(1), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrName, Val: "go-relocation-test-gcc492.c", Class: dwarf.ClassString}, + {Attr: dwarf.AttrCompDir, Val: "/root/go/src/debug/elf/testdata", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, + {Attr: dwarf.AttrHighpc, Val: int64(0x28), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + }, }, - }}, + pcRanges: [][2]uint64{{0x0, 0x28}}, + }, }, }, { "testdata/go-relocation-test-clang-arm.obj", []relocationTestEntry{ - {0, &dwarf.Entry{ - Offset: 0xb, - Tag: dwarf.TagCompileUnit, - Children: true, - Field: []dwarf.Field{ - {Attr: dwarf.AttrProducer, Val: "Debian clang version 3.5.0-10 (tags/RELEASE_350/final) (based on LLVM 3.5.0)", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLanguage, Val: int64(12), Class: dwarf.ClassConstant}, - {Attr: dwarf.AttrName, Val: "hello.c", Class: dwarf.ClassString}, - {Attr: dwarf.AttrStmtList, Val: int64(0x0), Class: dwarf.ClassLinePtr}, - {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, - {Attr: dwarf.AttrHighpc, Val: int64(48), Class: dwarf.ClassConstant}, + { + entry: &dwarf.Entry{ + Offset: 0xb, + Tag: dwarf.TagCompileUnit, + Children: true, + Field: []dwarf.Field{ + {Attr: dwarf.AttrProducer, Val: "Debian clang version 3.5.0-10 (tags/RELEASE_350/final) (based on LLVM 3.5.0)", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLanguage, Val: int64(12), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrName, Val: "hello.c", Class: dwarf.ClassString}, + {Attr: dwarf.AttrStmtList, Val: int64(0x0), Class: dwarf.ClassLinePtr}, + {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, + {Attr: dwarf.AttrHighpc, Val: int64(0x30), Class: dwarf.ClassConstant}, + }, }, - }}, + pcRanges: [][2]uint64{{0x0, 0x30}}, + }, }, }, { "testdata/go-relocation-test-gcc5-ppc.obj", []relocationTestEntry{ - {0, &dwarf.Entry{ - Offset: 0xb, - Tag: dwarf.TagCompileUnit, - Children: true, - Field: []dwarf.Field{ - {Attr: dwarf.AttrProducer, Val: "GNU C11 5.0.0 20150116 (experimental) -Asystem=linux -Asystem=unix -Asystem=posix -g", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLanguage, Val: int64(12), Class: dwarf.ClassConstant}, - {Attr: dwarf.AttrName, Val: "go-relocation-test-gcc5-ppc.c", Class: dwarf.ClassString}, - {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, - {Attr: dwarf.AttrHighpc, Val: int64(0x44), Class: dwarf.ClassConstant}, - {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + { + entry: &dwarf.Entry{ + Offset: 0xb, + Tag: dwarf.TagCompileUnit, + Children: true, + Field: []dwarf.Field{ + {Attr: dwarf.AttrProducer, Val: "GNU C11 5.0.0 20150116 (experimental) -Asystem=linux -Asystem=unix -Asystem=posix -g", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLanguage, Val: int64(12), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrName, Val: "go-relocation-test-gcc5-ppc.c", Class: dwarf.ClassString}, + {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, + {Attr: dwarf.AttrHighpc, Val: int64(0x44), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + }, }, - }}, + pcRanges: [][2]uint64{{0x0, 0x44}}, + }, }, }, { "testdata/go-relocation-test-gcc482-ppc64le.obj", []relocationTestEntry{ - {0, &dwarf.Entry{ - Offset: 0xb, - Tag: dwarf.TagCompileUnit, - Children: true, - Field: []dwarf.Field{ - {Attr: dwarf.AttrProducer, Val: "GNU C 4.8.2 -Asystem=linux -Asystem=unix -Asystem=posix -msecure-plt -mtune=power8 -mcpu=power7 -gdwarf-2 -fstack-protector", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLanguage, Val: int64(1), Class: dwarf.ClassConstant}, - {Attr: dwarf.AttrName, Val: "go-relocation-test-gcc482-ppc64le.c", Class: dwarf.ClassString}, - {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, - {Attr: dwarf.AttrHighpc, Val: uint64(0x24), Class: dwarf.ClassAddress}, - {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + { + entry: &dwarf.Entry{ + Offset: 0xb, + Tag: dwarf.TagCompileUnit, + Children: true, + Field: []dwarf.Field{ + {Attr: dwarf.AttrProducer, Val: "GNU C 4.8.2 -Asystem=linux -Asystem=unix -Asystem=posix -msecure-plt -mtune=power8 -mcpu=power7 -gdwarf-2 -fstack-protector", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLanguage, Val: int64(1), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrName, Val: "go-relocation-test-gcc482-ppc64le.c", Class: dwarf.ClassString}, + {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, + {Attr: dwarf.AttrHighpc, Val: uint64(0x24), Class: dwarf.ClassAddress}, + {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + }, }, - }}, + pcRanges: [][2]uint64{{0x0, 0x24}}, + }, }, }, { "testdata/go-relocation-test-gcc492-mips64.obj", []relocationTestEntry{ - {0, &dwarf.Entry{ - Offset: 0xb, - Tag: dwarf.TagCompileUnit, - Children: true, - Field: []dwarf.Field{ - {Attr: dwarf.AttrProducer, Val: "GNU C 4.9.2 -meb -mabi=64 -march=mips3 -mtune=mips64 -mllsc -mno-shared -g", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLanguage, Val: int64(1), Class: dwarf.ClassConstant}, - {Attr: dwarf.AttrName, Val: "hello.c", Class: dwarf.ClassString}, - {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, - {Attr: dwarf.AttrHighpc, Val: int64(100), Class: dwarf.ClassConstant}, - {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + { + entry: &dwarf.Entry{ + Offset: 0xb, + Tag: dwarf.TagCompileUnit, + Children: true, + Field: []dwarf.Field{ + {Attr: dwarf.AttrProducer, Val: "GNU C 4.9.2 -meb -mabi=64 -march=mips3 -mtune=mips64 -mllsc -mno-shared -g", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLanguage, Val: int64(1), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrName, Val: "hello.c", Class: dwarf.ClassString}, + {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, + {Attr: dwarf.AttrHighpc, Val: int64(0x64), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + }, }, - }}, + pcRanges: [][2]uint64{{0x0, 0x64}}, + }, }, }, { "testdata/go-relocation-test-gcc531-s390x.obj", []relocationTestEntry{ - {0, &dwarf.Entry{ - Offset: 0xb, - Tag: dwarf.TagCompileUnit, - Children: true, - Field: []dwarf.Field{ - {Attr: dwarf.AttrProducer, Val: "GNU C11 5.3.1 20160316 -march=zEC12 -m64 -mzarch -g -fstack-protector-strong", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLanguage, Val: int64(12), Class: dwarf.ClassConstant}, - {Attr: dwarf.AttrName, Val: "hello.c", Class: dwarf.ClassString}, - {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, - {Attr: dwarf.AttrHighpc, Val: int64(58), Class: dwarf.ClassConstant}, - {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + { + entry: &dwarf.Entry{ + Offset: 0xb, + Tag: dwarf.TagCompileUnit, + Children: true, + Field: []dwarf.Field{ + {Attr: dwarf.AttrProducer, Val: "GNU C11 5.3.1 20160316 -march=zEC12 -m64 -mzarch -g -fstack-protector-strong", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLanguage, Val: int64(12), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrName, Val: "hello.c", Class: dwarf.ClassString}, + {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, + {Attr: dwarf.AttrHighpc, Val: int64(0x3a), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + }, }, - }}, + pcRanges: [][2]uint64{{0x0, 0x3a}}, + }, }, }, { "testdata/go-relocation-test-gcc620-sparc64.obj", []relocationTestEntry{ - {0, &dwarf.Entry{ - Offset: 0xb, - Tag: dwarf.TagCompileUnit, - Children: true, - Field: []dwarf.Field{ - {Attr: dwarf.AttrProducer, Val: "GNU C11 6.2.0 20160914 -mcpu=v9 -g -fstack-protector-strong", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLanguage, Val: int64(12), Class: dwarf.ClassConstant}, - {Attr: dwarf.AttrName, Val: "hello.c", Class: dwarf.ClassString}, - {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, - {Attr: dwarf.AttrHighpc, Val: int64(0x2c), Class: dwarf.ClassConstant}, - {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + { + entry: &dwarf.Entry{ + Offset: 0xb, + Tag: dwarf.TagCompileUnit, + Children: true, + Field: []dwarf.Field{ + {Attr: dwarf.AttrProducer, Val: "GNU C11 6.2.0 20160914 -mcpu=v9 -g -fstack-protector-strong", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLanguage, Val: int64(12), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrName, Val: "hello.c", Class: dwarf.ClassString}, + {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, + {Attr: dwarf.AttrHighpc, Val: int64(0x2c), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + }, }, - }}, + pcRanges: [][2]uint64{{0x0, 0x2c}}, + }, }, }, { "testdata/go-relocation-test-gcc492-mipsle.obj", []relocationTestEntry{ - {0, &dwarf.Entry{ - Offset: 0xb, - Tag: dwarf.TagCompileUnit, - Children: true, - Field: []dwarf.Field{ - {Attr: dwarf.AttrProducer, Val: "GNU C 4.9.2 -mel -march=mips2 -mtune=mips32 -mllsc -mno-shared -mabi=32 -g", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLanguage, Val: int64(1), Class: dwarf.ClassConstant}, - {Attr: dwarf.AttrName, Val: "hello.c", Class: dwarf.ClassString}, - {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, - {Attr: dwarf.AttrHighpc, Val: int64(0x58), Class: dwarf.ClassConstant}, - {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + { + entry: &dwarf.Entry{ + Offset: 0xb, + Tag: dwarf.TagCompileUnit, + Children: true, + Field: []dwarf.Field{ + {Attr: dwarf.AttrProducer, Val: "GNU C 4.9.2 -mel -march=mips2 -mtune=mips32 -mllsc -mno-shared -mabi=32 -g", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLanguage, Val: int64(1), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrName, Val: "hello.c", Class: dwarf.ClassString}, + {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, + {Attr: dwarf.AttrHighpc, Val: int64(0x58), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + }, }, - }}, + pcRanges: [][2]uint64{{0x0, 0x58}}, + }, }, }, { "testdata/go-relocation-test-gcc540-mips.obj", []relocationTestEntry{ - {0, &dwarf.Entry{ - Offset: 0xb, - Tag: dwarf.TagCompileUnit, - Children: true, - Field: []dwarf.Field{ - {Attr: dwarf.AttrProducer, Val: "GNU C11 5.4.0 20160609 -meb -mips32 -mtune=mips32r2 -mfpxx -mllsc -mno-shared -mabi=32 -g -gdwarf-2", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLanguage, Val: int64(12), Class: dwarf.ClassConstant}, - {Attr: dwarf.AttrName, Val: "hello.c", Class: dwarf.ClassString}, - {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, - {Attr: dwarf.AttrHighpc, Val: uint64(0x5c), Class: dwarf.ClassAddress}, - {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + { + entry: &dwarf.Entry{ + Offset: 0xb, + Tag: dwarf.TagCompileUnit, + Children: true, + Field: []dwarf.Field{ + {Attr: dwarf.AttrProducer, Val: "GNU C11 5.4.0 20160609 -meb -mips32 -mtune=mips32r2 -mfpxx -mllsc -mno-shared -mabi=32 -g -gdwarf-2", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLanguage, Val: int64(12), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrName, Val: "hello.c", Class: dwarf.ClassString}, + {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, + {Attr: dwarf.AttrHighpc, Val: uint64(0x5c), Class: dwarf.ClassAddress}, + {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + }, }, - }}, + pcRanges: [][2]uint64{{0x0, 0x5c}}, + }, }, }, { "testdata/go-relocation-test-gcc493-mips64le.obj", []relocationTestEntry{ - {0, &dwarf.Entry{ - Offset: 0xb, - Tag: dwarf.TagCompileUnit, - Children: true, - Field: []dwarf.Field{ - {Attr: dwarf.AttrProducer, Val: "GNU C 4.9.3 -mel -mabi=64 -mllsc -mno-shared -g -fstack-protector-strong", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLanguage, Val: int64(1), Class: dwarf.ClassConstant}, - {Attr: dwarf.AttrName, Val: "hello.c", Class: dwarf.ClassString}, - {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, - {Attr: dwarf.AttrHighpc, Val: int64(100), Class: dwarf.ClassConstant}, - {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + { + entry: &dwarf.Entry{ + Offset: 0xb, + Tag: dwarf.TagCompileUnit, + Children: true, + Field: []dwarf.Field{ + {Attr: dwarf.AttrProducer, Val: "GNU C 4.9.3 -mel -mabi=64 -mllsc -mno-shared -g -fstack-protector-strong", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLanguage, Val: int64(1), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrName, Val: "hello.c", Class: dwarf.ClassString}, + {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, + {Attr: dwarf.AttrHighpc, Val: int64(0x64), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + }, }, - }}, + pcRanges: [][2]uint64{{0x0, 0x64}}, + }, }, }, { "testdata/go-relocation-test-gcc720-riscv64.obj", []relocationTestEntry{ - {0, &dwarf.Entry{ - Offset: 0xb, - Tag: dwarf.TagCompileUnit, - Children: true, - Field: []dwarf.Field{ - {Attr: dwarf.AttrProducer, Val: "GNU C11 7.2.0 -march=rv64imafdc -mabi=lp64d -g -gdwarf-2", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLanguage, Val: int64(12), Class: dwarf.ClassConstant}, - {Attr: dwarf.AttrName, Val: "hello.c", Class: dwarf.ClassString}, - {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, - {Attr: dwarf.AttrHighpc, Val: uint64(0x2c), Class: dwarf.ClassAddress}, - {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + { + entry: &dwarf.Entry{ + Offset: 0xb, + Tag: dwarf.TagCompileUnit, + Children: true, + Field: []dwarf.Field{ + {Attr: dwarf.AttrProducer, Val: "GNU C11 7.2.0 -march=rv64imafdc -mabi=lp64d -g -gdwarf-2", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLanguage, Val: int64(12), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrName, Val: "hello.c", Class: dwarf.ClassString}, + {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLowpc, Val: uint64(0x0), Class: dwarf.ClassAddress}, + {Attr: dwarf.AttrHighpc, Val: uint64(0x2c), Class: dwarf.ClassAddress}, + {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + }, }, - }}, + pcRanges: [][2]uint64{{0x0, 0x2c}}, + }, }, }, { "testdata/go-relocation-test-clang-x86.obj", []relocationTestEntry{ - {0, &dwarf.Entry{ - Offset: 0xb, - Tag: dwarf.TagCompileUnit, - Children: true, - Field: []dwarf.Field{ - {Attr: dwarf.AttrProducer, Val: "clang version google3-trunk (trunk r209387)", Class: dwarf.ClassString}, - {Attr: dwarf.AttrLanguage, Val: int64(12), Class: dwarf.ClassConstant}, - {Attr: dwarf.AttrName, Val: "go-relocation-test-clang.c", Class: dwarf.ClassString}, - {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, - {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, + { + entry: &dwarf.Entry{ + Offset: 0xb, + Tag: dwarf.TagCompileUnit, + Children: true, + Field: []dwarf.Field{ + {Attr: dwarf.AttrProducer, Val: "clang version google3-trunk (trunk r209387)", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLanguage, Val: int64(12), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrName, Val: "go-relocation-test-clang.c", Class: dwarf.ClassString}, + {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, + }, }, - }}, + }, }, }, { "testdata/gcc-amd64-openbsd-debug-with-rela.obj", []relocationTestEntry{ - {203, &dwarf.Entry{ - Offset: 0xc62, - Tag: dwarf.TagMember, - Children: false, - Field: []dwarf.Field{ - {Attr: dwarf.AttrName, Val: "it_interval", Class: dwarf.ClassString}, - {Attr: dwarf.AttrDeclFile, Val: int64(7), Class: dwarf.ClassConstant}, - {Attr: dwarf.AttrDeclLine, Val: int64(236), Class: dwarf.ClassConstant}, - {Attr: dwarf.AttrType, Val: dwarf.Offset(0xb7f), Class: dwarf.ClassReference}, - {Attr: dwarf.AttrDataMemberLoc, Val: []byte{0x23, 0x0}, Class: dwarf.ClassExprLoc}, + { + entryNumber: 203, + entry: &dwarf.Entry{ + Offset: 0xc62, + Tag: dwarf.TagMember, + Children: false, + Field: []dwarf.Field{ + {Attr: dwarf.AttrName, Val: "it_interval", Class: dwarf.ClassString}, + {Attr: dwarf.AttrDeclFile, Val: int64(7), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrDeclLine, Val: int64(236), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrType, Val: dwarf.Offset(0xb7f), Class: dwarf.ClassReference}, + {Attr: dwarf.AttrDataMemberLoc, Val: []byte{0x23, 0x0}, Class: dwarf.ClassExprLoc}, + }, + }, + }, + { + entryNumber: 204, + entry: &dwarf.Entry{ + Offset: 0xc70, + Tag: dwarf.TagMember, + Children: false, + Field: []dwarf.Field{ + {Attr: dwarf.AttrName, Val: "it_value", Class: dwarf.ClassString}, + {Attr: dwarf.AttrDeclFile, Val: int64(7), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrDeclLine, Val: int64(237), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrType, Val: dwarf.Offset(0xb7f), Class: dwarf.ClassReference}, + {Attr: dwarf.AttrDataMemberLoc, Val: []byte{0x23, 0x10}, Class: dwarf.ClassExprLoc}, + }, + }, + }, + }, + }, + { + "testdata/go-relocation-test-gcc930-ranges-no-rela-x86-64", + []relocationTestEntry{ + { + entry: &dwarf.Entry{ + Offset: 0xb, + Tag: dwarf.TagCompileUnit, + Children: true, + Field: []dwarf.Field{ + {Attr: dwarf.AttrProducer, Val: "GNU C17 9.3.0 -mtune=generic -march=x86-64 -g -fno-asynchronous-unwind-tables", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLanguage, Val: int64(12), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrName, Val: "multiple-code-sections.c", Class: dwarf.ClassString}, + {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, + {Attr: dwarf.AttrRanges, Val: int64(0), Class: dwarf.ClassRangeListPtr}, + {Attr: dwarf.AttrLowpc, Val: uint64(0), Class: dwarf.ClassAddress}, + {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + }, }, - }}, - {204, &dwarf.Entry{ - Offset: 0xc70, - Tag: dwarf.TagMember, - Children: false, - Field: []dwarf.Field{ - {Attr: dwarf.AttrName, Val: "it_value", Class: dwarf.ClassString}, - {Attr: dwarf.AttrDeclFile, Val: int64(7), Class: dwarf.ClassConstant}, - {Attr: dwarf.AttrDeclLine, Val: int64(237), Class: dwarf.ClassConstant}, - {Attr: dwarf.AttrType, Val: dwarf.Offset(0xb7f), Class: dwarf.ClassReference}, - {Attr: dwarf.AttrDataMemberLoc, Val: []byte{0x23, 0x10}, Class: dwarf.ClassExprLoc}, + pcRanges: [][2]uint64{ + {0x765, 0x777}, + {0x7e1, 0x7ec}, }, - }}, + }, + }, + }, + { + "testdata/go-relocation-test-gcc930-ranges-with-rela-x86-64", + []relocationTestEntry{ + { + entry: &dwarf.Entry{ + Offset: 0xb, + Tag: dwarf.TagCompileUnit, + Children: true, + Field: []dwarf.Field{ + {Attr: dwarf.AttrProducer, Val: "GNU C17 9.3.0 -mtune=generic -march=x86-64 -g -fno-asynchronous-unwind-tables", Class: dwarf.ClassString}, + {Attr: dwarf.AttrLanguage, Val: int64(12), Class: dwarf.ClassConstant}, + {Attr: dwarf.AttrName, Val: "multiple-code-sections.c", Class: dwarf.ClassString}, + {Attr: dwarf.AttrCompDir, Val: "/tmp", Class: dwarf.ClassString}, + {Attr: dwarf.AttrRanges, Val: int64(0), Class: dwarf.ClassRangeListPtr}, + {Attr: dwarf.AttrLowpc, Val: uint64(0), Class: dwarf.ClassAddress}, + {Attr: dwarf.AttrStmtList, Val: int64(0), Class: dwarf.ClassLinePtr}, + }, + }, + pcRanges: [][2]uint64{ + {0x765, 0x777}, + {0x7e1, 0x7ec}, + }, + }, }, }, } func TestDWARFRelocations(t *testing.T) { - for i, test := range relocationTests { - f, err := Open(test.file) - if err != nil { - t.Error(err) - continue - } - dwarf, err := f.DWARF() - if err != nil { - t.Error(err) - continue - } - for _, testEntry := range test.entries { - reader := dwarf.Reader() - for j := 0; j < testEntry.entryNumber; j++ { - entry, err := reader.Next() - if entry == nil || err != nil { - t.Errorf("Failed to skip to entry %d: %v", testEntry.entryNumber, err) - continue - } + for _, test := range relocationTests { + test := test + t.Run(test.file, func(t *testing.T) { + t.Parallel() + f, err := Open(test.file) + if err != nil { + t.Fatal(err) } - entry, err := reader.Next() + dwarf, err := f.DWARF() if err != nil { - t.Error(err) - continue + t.Fatal(err) } - if !reflect.DeepEqual(testEntry.entry, entry) { - t.Errorf("#%d/%d: mismatch: got:%#v want:%#v", i, testEntry.entryNumber, entry, testEntry.entry) - continue + reader := dwarf.Reader() + idx := 0 + for _, testEntry := range test.entries { + if testEntry.entryNumber < idx { + t.Fatalf("internal test error: %d < %d", testEntry.entryNumber, idx) + } + for ; idx < testEntry.entryNumber; idx++ { + entry, err := reader.Next() + if entry == nil || err != nil { + t.Fatalf("Failed to skip to entry %d: %v", testEntry.entryNumber, err) + } + } + entry, err := reader.Next() + idx++ + if err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(testEntry.entry, entry) { + t.Errorf("entry %d mismatch: got:%#v want:%#v", testEntry.entryNumber, entry, testEntry.entry) + } + pcRanges, err := dwarf.Ranges(entry) + if err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(testEntry.pcRanges, pcRanges) { + t.Errorf("entry %d: PC range mismatch: got:%#v want:%#v", testEntry.entryNumber, pcRanges, testEntry.pcRanges) + } } - } + }) } } @@ -784,7 +1153,7 @@ func TestCompressedSection(t *testing.T) { func TestNoSectionOverlaps(t *testing.T) { // Ensure cmd/link outputs sections without overlaps. switch runtime.GOOS { - case "aix", "android", "darwin", "js", "nacl", "plan9", "windows": + case "aix", "android", "darwin", "ios", "js", "plan9", "windows", "wasip1": t.Skipf("cmd/link doesn't produce ELF binaries on %s", runtime.GOOS) } _ = net.ResolveIPAddr // force dynamic linkage @@ -798,15 +1167,413 @@ func TestNoSectionOverlaps(t *testing.T) { if sih.Type == SHT_NOBITS { continue } + // checking for overlap in file for j, sj := range f.Sections { sjh := sj.SectionHeader - if i == j || sjh.Type == SHT_NOBITS || sih.Offset == sjh.Offset && sih.Size == 0 { + if i == j || sjh.Type == SHT_NOBITS || sih.Offset == sjh.Offset && sih.FileSize == 0 { continue } - if sih.Offset >= sjh.Offset && sih.Offset < sjh.Offset+sjh.Size { - t.Errorf("ld produced ELF with section %s within %s: 0x%x <= 0x%x..0x%x < 0x%x", - sih.Name, sjh.Name, sjh.Offset, sih.Offset, sih.Offset+sih.Size, sjh.Offset+sjh.Size) + if sih.Offset >= sjh.Offset && sih.Offset < sjh.Offset+sjh.FileSize { + t.Errorf("ld produced ELF with section offset %s within %s: 0x%x <= 0x%x..0x%x < 0x%x", + sih.Name, sjh.Name, sjh.Offset, sih.Offset, sih.Offset+sih.FileSize, sjh.Offset+sjh.FileSize) } } + + if sih.Flags&SHF_ALLOC == 0 { + continue + } + + // checking for overlap in address space + for j, sj := range f.Sections { + sjh := sj.SectionHeader + if i == j || sjh.Flags&SHF_ALLOC == 0 || sjh.Type == SHT_NOBITS || + sih.Addr == sjh.Addr && sih.Size == 0 { + continue + } + if sih.Addr >= sjh.Addr && sih.Addr < sjh.Addr+sjh.Size { + t.Errorf("ld produced ELF with section address %s within %s: 0x%x <= 0x%x..0x%x < 0x%x", + sih.Name, sjh.Name, sjh.Addr, sih.Addr, sih.Addr+sih.Size, sjh.Addr+sjh.Size) + } + } + } +} + +func TestNobitsSection(t *testing.T) { + const testdata = "testdata/gcc-amd64-linux-exec" + f, err := Open(testdata) + if err != nil { + t.Fatalf("could not read %s: %v", testdata, err) + } + defer f.Close() + + wantError := "unexpected read from SHT_NOBITS section" + bss := f.Section(".bss") + + _, err = bss.Data() + if err == nil || err.Error() != wantError { + t.Fatalf("bss.Data() got error %q, want error %q", err, wantError) + } + + r := bss.Open() + p := make([]byte, 1) + _, err = r.Read(p) + if err == nil || err.Error() != wantError { + t.Fatalf("r.Read(p) got error %q, want error %q", err, wantError) + } +} + +// TestLargeNumberOfSections tests the case that a file has greater than or +// equal to 65280 (0xff00) sections. +func TestLargeNumberOfSections(t *testing.T) { + // A file with >= 0xff00 sections is too big, so we will construct it on the + // fly. The original file "y.o" is generated by these commands: + // 1. generate "y.c": + // for i in `seq 1 65288`; do + // printf -v x "%04x" i; + // echo "int var_$x __attribute__((section(\"section_$x\"))) = $i;" + // done > y.c + // 2. compile: gcc -c y.c -m32 + // + // $readelf -h y.o + // ELF Header: + // Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 + // Class: ELF32 + // Data: 2's complement, little endian + // Version: 1 (current) + // OS/ABI: UNIX - System V + // ABI Version: 0 + // Type: REL (Relocatable file) + // Machine: Intel 80386 + // Version: 0x1 + // Entry point address: 0x0 + // Start of program headers: 0 (bytes into file) + // Start of section headers: 3003468 (bytes into file) + // Flags: 0x0 + // Size of this header: 52 (bytes) + // Size of program headers: 0 (bytes) + // Number of program headers: 0 + // Size of section headers: 40 (bytes) + // Number of section headers: 0 (65298) + // Section header string table index: 65535 (65297) + // + // $readelf -S y.o + // There are 65298 section headers, starting at offset 0x2dd44c: + // Section Headers: + // [Nr] Name Type Addr Off Size ES Flg Lk Inf Al + // [ 0] NULL 00000000 000000 00ff12 00 65297 0 0 + // [ 1] .text PROGBITS 00000000 000034 000000 00 AX 0 0 1 + // [ 2] .data PROGBITS 00000000 000034 000000 00 WA 0 0 1 + // [ 3] .bss NOBITS 00000000 000034 000000 00 WA 0 0 1 + // [ 4] section_0001 PROGBITS 00000000 000034 000004 00 WA 0 0 4 + // [ 5] section_0002 PROGBITS 00000000 000038 000004 00 WA 0 0 4 + // [ section_0003 ~ section_ff06 truncated ] + // [65290] section_ff07 PROGBITS 00000000 03fc4c 000004 00 WA 0 0 4 + // [65291] section_ff08 PROGBITS 00000000 03fc50 000004 00 WA 0 0 4 + // [65292] .comment PROGBITS 00000000 03fc54 000027 01 MS 0 0 1 + // [65293] .note.GNU-stack PROGBITS 00000000 03fc7b 000000 00 0 0 1 + // [65294] .symtab SYMTAB 00000000 03fc7c 0ff0a0 10 65296 2 4 + // [65295] .symtab_shndx SYMTAB SECTION 00000000 13ed1c 03fc28 04 65294 0 4 + // [65296] .strtab STRTAB 00000000 17e944 08f74d 00 0 0 1 + // [65297] .shstrtab STRTAB 00000000 20e091 0cf3bb 00 0 0 1 + + var buf bytes.Buffer + + { + buf.Grow(0x55AF1C) // 3003468 + 40 * 65298 + + h := Header32{ + Ident: [16]byte{0x7F, 'E', 'L', 'F', 0x01, 0x01, 0x01}, + Type: 1, + Machine: 3, + Version: 1, + Shoff: 0x2DD44C, + Ehsize: 0x34, + Shentsize: 0x28, + Shnum: 0, + Shstrndx: 0xFFFF, + } + binary.Write(&buf, binary.LittleEndian, h) + + // Zero out sections [1]~[65294]. + buf.Write(bytes.Repeat([]byte{0}, 0x13ED1C-binary.Size(h))) + + // Write section [65295]. Section [65295] are all zeros except for the + // last 48 bytes. + buf.Write(bytes.Repeat([]byte{0}, 0x03FC28-12*4)) + for i := 0; i < 12; i++ { + binary.Write(&buf, binary.LittleEndian, uint32(0xFF00|i)) + } + + // Write section [65296]. + buf.Write([]byte{0}) + buf.Write([]byte("y.c\x00")) + for i := 1; i <= 65288; i++ { + // var_0001 ~ var_ff08 + name := fmt.Sprintf("var_%04x", i) + buf.Write([]byte(name)) + buf.Write([]byte{0}) + } + + // Write section [65297]. + buf.Write([]byte{0}) + buf.Write([]byte(".symtab\x00")) + buf.Write([]byte(".strtab\x00")) + buf.Write([]byte(".shstrtab\x00")) + buf.Write([]byte(".text\x00")) + buf.Write([]byte(".data\x00")) + buf.Write([]byte(".bss\x00")) + for i := 1; i <= 65288; i++ { + // s_0001 ~ s_ff08 + name := fmt.Sprintf("section_%04x", i) + buf.Write([]byte(name)) + buf.Write([]byte{0}) + } + buf.Write([]byte(".comment\x00")) + buf.Write([]byte(".note.GNU-stack\x00")) + buf.Write([]byte(".symtab_shndx\x00")) + + // Write section header table. + // NULL + binary.Write(&buf, binary.LittleEndian, Section32{Name: 0, Size: 0xFF12, Link: 0xFF11}) + // .text + binary.Write(&buf, binary.LittleEndian, Section32{ + Name: 0x1B, + Type: uint32(SHT_PROGBITS), + Flags: uint32(SHF_ALLOC | SHF_EXECINSTR), + Off: 0x34, + Addralign: 0x01, + }) + // .data + binary.Write(&buf, binary.LittleEndian, Section32{ + Name: 0x21, + Type: uint32(SHT_PROGBITS), + Flags: uint32(SHF_WRITE | SHF_ALLOC), + Off: 0x34, + Addralign: 0x01, + }) + // .bss + binary.Write(&buf, binary.LittleEndian, Section32{ + Name: 0x27, + Type: uint32(SHT_NOBITS), + Flags: uint32(SHF_WRITE | SHF_ALLOC), + Off: 0x34, + Addralign: 0x01, + }) + // s_1 ~ s_65537 + for i := 0; i < 65288; i++ { + s := Section32{ + Name: uint32(0x2C + i*13), + Type: uint32(SHT_PROGBITS), + Flags: uint32(SHF_WRITE | SHF_ALLOC), + Off: uint32(0x34 + i*4), + Size: 0x04, + Addralign: 0x04, + } + binary.Write(&buf, binary.LittleEndian, s) + } + // .comment + binary.Write(&buf, binary.LittleEndian, Section32{ + Name: 0x0CF394, + Type: uint32(SHT_PROGBITS), + Flags: uint32(SHF_MERGE | SHF_STRINGS), + Off: 0x03FC54, + Size: 0x27, + Addralign: 0x01, + Entsize: 0x01, + }) + // .note.GNU-stack + binary.Write(&buf, binary.LittleEndian, Section32{ + Name: 0x0CF39D, + Type: uint32(SHT_PROGBITS), + Off: 0x03FC7B, + Addralign: 0x01, + }) + // .symtab + binary.Write(&buf, binary.LittleEndian, Section32{ + Name: 0x01, + Type: uint32(SHT_SYMTAB), + Off: 0x03FC7C, + Size: 0x0FF0A0, + Link: 0xFF10, + Info: 0x02, + Addralign: 0x04, + Entsize: 0x10, + }) + // .symtab_shndx + binary.Write(&buf, binary.LittleEndian, Section32{ + Name: 0x0CF3AD, + Type: uint32(SHT_SYMTAB_SHNDX), + Off: 0x13ED1C, + Size: 0x03FC28, + Link: 0xFF0E, + Addralign: 0x04, + Entsize: 0x04, + }) + // .strtab + binary.Write(&buf, binary.LittleEndian, Section32{ + Name: 0x09, + Type: uint32(SHT_STRTAB), + Off: 0x17E944, + Size: 0x08F74D, + Addralign: 0x01, + }) + // .shstrtab + binary.Write(&buf, binary.LittleEndian, Section32{ + Name: 0x11, + Type: uint32(SHT_STRTAB), + Off: 0x20E091, + Size: 0x0CF3BB, + Addralign: 0x01, + }) + } + + data := buf.Bytes() + + f, err := NewFile(bytes.NewReader(data)) + if err != nil { + t.Errorf("cannot create file from data: %v", err) + } + defer f.Close() + + wantFileHeader := FileHeader{ + Class: ELFCLASS32, + Data: ELFDATA2LSB, + Version: EV_CURRENT, + OSABI: ELFOSABI_NONE, + ByteOrder: binary.LittleEndian, + Type: ET_REL, + Machine: EM_386, + } + if f.FileHeader != wantFileHeader { + t.Errorf("\nhave %#v\nwant %#v\n", f.FileHeader, wantFileHeader) + } + + wantSectionNum := 65298 + if len(f.Sections) != wantSectionNum { + t.Errorf("len(Sections) = %d, want %d", len(f.Sections), wantSectionNum) + } + + wantSectionHeader := SectionHeader{ + Name: "section_0007", + Type: SHT_PROGBITS, + Flags: SHF_WRITE + SHF_ALLOC, + Offset: 0x4c, + Size: 0x4, + Addralign: 0x4, + FileSize: 0x4, + } + if f.Sections[10].SectionHeader != wantSectionHeader { + t.Errorf("\nhave %#v\nwant %#v\n", f.Sections[10].SectionHeader, wantSectionHeader) + } +} + +func TestIssue10996(t *testing.T) { + data := []byte("\u007fELF\x02\x01\x010000000000000" + + "\x010000000000000000000" + + "\x00\x00\x00\x00\x00\x00\x00\x0000000000\x00\x00\x00\x00" + + "0000") + _, err := NewFile(bytes.NewReader(data)) + if err == nil { + t.Fatalf("opening invalid ELF file unexpectedly succeeded") + } +} + +func TestDynValue(t *testing.T) { + const testdata = "testdata/gcc-amd64-linux-exec" + f, err := Open(testdata) + if err != nil { + t.Fatalf("could not read %s: %v", testdata, err) + } + defer f.Close() + + vals, err := f.DynValue(DT_VERNEEDNUM) + if err != nil { + t.Fatalf("DynValue(DT_VERNEEDNUM): got unexpected error %v", err) + } + + if len(vals) != 1 || vals[0] != 1 { + t.Errorf("DynValue(DT_VERNEEDNUM): got %v, want [1]", vals) + } +} + +func TestIssue59208(t *testing.T) { + // corrupted dwarf data should raise invalid dwarf data instead of invalid zlib + const orig = "testdata/compressed-64.obj" + f, err := Open(orig) + if err != nil { + t.Fatal(err) + } + sec := f.Section(".debug_info") + + data, err := os.ReadFile(orig) + if err != nil { + t.Fatal(err) + } + + dn := make([]byte, len(data)) + zoffset := sec.Offset + uint64(sec.compressionOffset) + copy(dn, data[:zoffset]) + + ozd, err := sec.Data() + if err != nil { + t.Fatal(err) + } + buf := bytes.NewBuffer(nil) + wr := zlib.NewWriter(buf) + // corrupt origin data same as COMPRESS_ZLIB + copy(ozd, []byte{1, 0, 0, 0}) + wr.Write(ozd) + wr.Close() + + copy(dn[zoffset:], buf.Bytes()) + copy(dn[sec.Offset+sec.FileSize:], data[sec.Offset+sec.FileSize:]) + + nf, err := NewFile(bytes.NewReader(dn)) + if err != nil { + t.Error(err) + } + + const want = "decoding dwarf section info" + _, err = nf.DWARF() + if err == nil || !strings.Contains(err.Error(), want) { + t.Errorf("DWARF = %v; want %q", err, want) + } +} + +func BenchmarkSymbols64(b *testing.B) { + const testdata = "testdata/gcc-amd64-linux-exec" + f, err := Open(testdata) + if err != nil { + b.Fatalf("could not read %s: %v", testdata, err) + } + defer f.Close() + b.ResetTimer() + for i := 0; i < b.N; i++ { + symbols, err := f.Symbols() + if err != nil { + b.Fatalf("Symbols(): got unexpected error %v", err) + } + if len(symbols) != 73 { + b.Errorf("\nhave %d symbols\nwant %d symbols\n", len(symbols), 73) + } + } +} + +func BenchmarkSymbols32(b *testing.B) { + const testdata = "testdata/gcc-386-freebsd-exec" + f, err := Open(testdata) + if err != nil { + b.Fatalf("could not read %s: %v", testdata, err) + } + defer f.Close() + b.ResetTimer() + for i := 0; i < b.N; i++ { + symbols, err := f.Symbols() + if err != nil { + b.Fatalf("Symbols(): got unexpected error %v", err) + } + if len(symbols) != 74 { + b.Errorf("\nhave %d symbols\nwant %d symbols\n", len(symbols), 74) + } } } diff --git a/elf/file_write.go b/elf/file_write.go new file mode 100644 index 0000000..01db0ce --- /dev/null +++ b/elf/file_write.go @@ -0,0 +1,27 @@ +package elf + +import "os" + +// Write creates or truncates the named file. If the file already exists, +// it is truncated. If the file does not exist, it is created with mode 0666 +// (before umask). If there is an error, it will be of type *PathError. +func (f *File) Write(name string) error { + fd, err1 := os.Create(name) + if err1 != nil { + return err1 + } + data, err2 := f.Bytes() + if err2 != nil { + return err2 + } + _, err3 := fd.Write(data) + if err3 != nil { + return err3 + } + err4 := fd.Close() + if err4 != nil { + return err4 + } + + return nil +} diff --git a/elf/reader.go b/elf/reader.go index a458436..eab4373 100644 --- a/elf/reader.go +++ b/elf/reader.go @@ -63,11 +63,11 @@ func (r *readSeekerFromReader) Read(p []byte) (n int, err error) { func (r *readSeekerFromReader) Seek(offset int64, whence int) (int64, error) { var newOffset int64 switch whence { - case seekStart: + case io.SeekStart: newOffset = offset - case seekCurrent: + case io.SeekCurrent: newOffset = r.offset + offset - case seekEnd: + case io.SeekEnd: newOffset = r.size + offset default: return 0, os.ErrInvalid diff --git a/elf/relocations.go b/elf/relocations.go deleted file mode 100644 index 33ae41e..0000000 --- a/elf/relocations.go +++ /dev/null @@ -1,529 +0,0 @@ -package elf - -import ( - "bytes" - "encoding/binary" - "errors" -) - -// applyRelocations applies relocations to dst. rels is a relocations section -// in REL or RELA format. -func (f *File) applyRelocations(dst []byte, rels []byte) error { - switch { - case f.Class == ELFCLASS64 && f.Machine == EM_X86_64: - return f.applyRelocationsAMD64(dst, rels) - case f.Class == ELFCLASS32 && f.Machine == EM_386: - return f.applyRelocations386(dst, rels) - case f.Class == ELFCLASS32 && f.Machine == EM_ARM: - return f.applyRelocationsARM(dst, rels) - case f.Class == ELFCLASS64 && f.Machine == EM_AARCH64: - return f.applyRelocationsARM64(dst, rels) - case f.Class == ELFCLASS32 && f.Machine == EM_PPC: - return f.applyRelocationsPPC(dst, rels) - case f.Class == ELFCLASS64 && f.Machine == EM_PPC64: - return f.applyRelocationsPPC64(dst, rels) - case f.Class == ELFCLASS32 && f.Machine == EM_MIPS: - return f.applyRelocationsMIPS(dst, rels) - case f.Class == ELFCLASS64 && f.Machine == EM_MIPS: - return f.applyRelocationsMIPS64(dst, rels) - case f.Class == ELFCLASS64 && f.Machine == EM_RISCV: - return f.applyRelocationsRISCV64(dst, rels) - case f.Class == ELFCLASS64 && f.Machine == EM_S390: - return f.applyRelocationss390x(dst, rels) - case f.Class == ELFCLASS64 && f.Machine == EM_SPARCV9: - return f.applyRelocationsSPARC64(dst, rels) - default: - return errors.New("applyRelocations: not implemented") - } -} - -func (f *File) applyRelocationsAMD64(dst []byte, rels []byte) error { - // 24 is the size of Rela64. - if len(rels)%24 != 0 { - return errors.New("length of relocation section is not a multiple of 24") - } - - symbols, _, err := f.getSymbols(SHT_SYMTAB) - if err != nil { - return err - } - - b := bytes.NewReader(rels) - var rela Rela64 - - for b.Len() > 0 { - binary.Read(b, f.ByteOrder, &rela) - symNo := rela.Info >> 32 - t := R_X86_64(rela.Info & 0xffff) - - if symNo == 0 || symNo > uint64(len(symbols)) { - continue - } - sym := &symbols[symNo-1] - if SymType(sym.Info&0xf) != STT_SECTION { - // We don't handle non-section relocations for now. - continue - } - - // There are relocations, so this must be a normal - // object file, and we only look at section symbols, - // so we assume that the symbol value is 0. - - switch t { - case R_X86_64_64: - if rela.Off+8 >= uint64(len(dst)) || rela.Addend < 0 { - continue - } - f.ByteOrder.PutUint64(dst[rela.Off:rela.Off+8], uint64(rela.Addend)) - case R_X86_64_32: - if rela.Off+4 >= uint64(len(dst)) || rela.Addend < 0 { - continue - } - f.ByteOrder.PutUint32(dst[rela.Off:rela.Off+4], uint32(rela.Addend)) - } - } - - return nil -} - -func (f *File) applyRelocations386(dst []byte, rels []byte) error { - // 8 is the size of Rel32. - if len(rels)%8 != 0 { - return errors.New("length of relocation section is not a multiple of 8") - } - - symbols, _, err := f.getSymbols(SHT_SYMTAB) - if err != nil { - return err - } - - b := bytes.NewReader(rels) - var rel Rel32 - - for b.Len() > 0 { - binary.Read(b, f.ByteOrder, &rel) - symNo := rel.Info >> 8 - t := R_386(rel.Info & 0xff) - - if symNo == 0 || symNo > uint32(len(symbols)) { - continue - } - sym := &symbols[symNo-1] - - if t == R_386_32 { - if rel.Off+4 >= uint32(len(dst)) { - continue - } - val := f.ByteOrder.Uint32(dst[rel.Off : rel.Off+4]) - val += uint32(sym.Value) - f.ByteOrder.PutUint32(dst[rel.Off:rel.Off+4], val) - } - } - - return nil -} - -func (f *File) applyRelocationsARM(dst []byte, rels []byte) error { - // 8 is the size of Rel32. - if len(rels)%8 != 0 { - return errors.New("length of relocation section is not a multiple of 8") - } - - symbols, _, err := f.getSymbols(SHT_SYMTAB) - if err != nil { - return err - } - - b := bytes.NewReader(rels) - var rel Rel32 - - for b.Len() > 0 { - binary.Read(b, f.ByteOrder, &rel) - symNo := rel.Info >> 8 - t := R_ARM(rel.Info & 0xff) - - if symNo == 0 || symNo > uint32(len(symbols)) { - continue - } - sym := &symbols[symNo-1] - - switch t { - case R_ARM_ABS32: - if rel.Off+4 >= uint32(len(dst)) { - continue - } - val := f.ByteOrder.Uint32(dst[rel.Off : rel.Off+4]) - val += uint32(sym.Value) - f.ByteOrder.PutUint32(dst[rel.Off:rel.Off+4], val) - } - } - - return nil -} - -func (f *File) applyRelocationsARM64(dst []byte, rels []byte) error { - // 24 is the size of Rela64. - if len(rels)%24 != 0 { - return errors.New("length of relocation section is not a multiple of 24") - } - - symbols, _, err := f.getSymbols(SHT_SYMTAB) - if err != nil { - return err - } - - b := bytes.NewReader(rels) - var rela Rela64 - - for b.Len() > 0 { - binary.Read(b, f.ByteOrder, &rela) - symNo := rela.Info >> 32 - t := R_AARCH64(rela.Info & 0xffff) - - if symNo == 0 || symNo > uint64(len(symbols)) { - continue - } - sym := &symbols[symNo-1] - if SymType(sym.Info&0xf) != STT_SECTION { - // We don't handle non-section relocations for now. - continue - } - - // There are relocations, so this must be a normal - // object file, and we only look at section symbols, - // so we assume that the symbol value is 0. - - switch t { - case R_AARCH64_ABS64: - if rela.Off+8 >= uint64(len(dst)) || rela.Addend < 0 { - continue - } - f.ByteOrder.PutUint64(dst[rela.Off:rela.Off+8], uint64(rela.Addend)) - case R_AARCH64_ABS32: - if rela.Off+4 >= uint64(len(dst)) || rela.Addend < 0 { - continue - } - f.ByteOrder.PutUint32(dst[rela.Off:rela.Off+4], uint32(rela.Addend)) - } - } - - return nil -} - -func (f *File) applyRelocationsPPC(dst []byte, rels []byte) error { - // 12 is the size of Rela32. - if len(rels)%12 != 0 { - return errors.New("length of relocation section is not a multiple of 12") - } - - symbols, _, err := f.getSymbols(SHT_SYMTAB) - if err != nil { - return err - } - - b := bytes.NewReader(rels) - var rela Rela32 - - for b.Len() > 0 { - binary.Read(b, f.ByteOrder, &rela) - symNo := rela.Info >> 8 - t := R_PPC(rela.Info & 0xff) - - if symNo == 0 || symNo > uint32(len(symbols)) { - continue - } - sym := &symbols[symNo-1] - if SymType(sym.Info&0xf) != STT_SECTION { - // We don't handle non-section relocations for now. - continue - } - - switch t { - case R_PPC_ADDR32: - if rela.Off+4 >= uint32(len(dst)) || rela.Addend < 0 { - continue - } - f.ByteOrder.PutUint32(dst[rela.Off:rela.Off+4], uint32(rela.Addend)) - } - } - - return nil -} - -func (f *File) applyRelocationsPPC64(dst []byte, rels []byte) error { - // 24 is the size of Rela64. - if len(rels)%24 != 0 { - return errors.New("length of relocation section is not a multiple of 24") - } - - symbols, _, err := f.getSymbols(SHT_SYMTAB) - if err != nil { - return err - } - - b := bytes.NewReader(rels) - var rela Rela64 - - for b.Len() > 0 { - binary.Read(b, f.ByteOrder, &rela) - symNo := rela.Info >> 32 - t := R_PPC64(rela.Info & 0xffff) - - if symNo == 0 || symNo > uint64(len(symbols)) { - continue - } - sym := &symbols[symNo-1] - if SymType(sym.Info&0xf) != STT_SECTION { - // We don't handle non-section relocations for now. - continue - } - - switch t { - case R_PPC64_ADDR64: - if rela.Off+8 >= uint64(len(dst)) || rela.Addend < 0 { - continue - } - f.ByteOrder.PutUint64(dst[rela.Off:rela.Off+8], uint64(rela.Addend)) - case R_PPC64_ADDR32: - if rela.Off+4 >= uint64(len(dst)) || rela.Addend < 0 { - continue - } - f.ByteOrder.PutUint32(dst[rela.Off:rela.Off+4], uint32(rela.Addend)) - } - } - - return nil -} - -func (f *File) applyRelocationsMIPS(dst []byte, rels []byte) error { - // 8 is the size of Rel32. - if len(rels)%8 != 0 { - return errors.New("length of relocation section is not a multiple of 8") - } - - symbols, _, err := f.getSymbols(SHT_SYMTAB) - if err != nil { - return err - } - - b := bytes.NewReader(rels) - var rel Rel32 - - for b.Len() > 0 { - binary.Read(b, f.ByteOrder, &rel) - symNo := rel.Info >> 8 - t := R_MIPS(rel.Info & 0xff) - - if symNo == 0 || symNo > uint32(len(symbols)) { - continue - } - sym := &symbols[symNo-1] - - switch t { - case R_MIPS_32: - if rel.Off+4 >= uint32(len(dst)) { - continue - } - val := f.ByteOrder.Uint32(dst[rel.Off : rel.Off+4]) - val += uint32(sym.Value) - f.ByteOrder.PutUint32(dst[rel.Off:rel.Off+4], val) - } - } - - return nil -} - -func (f *File) applyRelocationsMIPS64(dst []byte, rels []byte) error { - // 24 is the size of Rela64. - if len(rels)%24 != 0 { - return errors.New("length of relocation section is not a multiple of 24") - } - - symbols, _, err := f.getSymbols(SHT_SYMTAB) - if err != nil { - return err - } - - b := bytes.NewReader(rels) - var rela Rela64 - - for b.Len() > 0 { - binary.Read(b, f.ByteOrder, &rela) - var symNo uint64 - var t R_MIPS - if f.ByteOrder == binary.BigEndian { - symNo = rela.Info >> 32 - t = R_MIPS(rela.Info & 0xff) - } else { - symNo = rela.Info & 0xffffffff - t = R_MIPS(rela.Info >> 56) - } - - if symNo == 0 || symNo > uint64(len(symbols)) { - continue - } - sym := &symbols[symNo-1] - if SymType(sym.Info&0xf) != STT_SECTION { - // We don't handle non-section relocations for now. - continue - } - - switch t { - case R_MIPS_64: - if rela.Off+8 >= uint64(len(dst)) || rela.Addend < 0 { - continue - } - f.ByteOrder.PutUint64(dst[rela.Off:rela.Off+8], uint64(rela.Addend)) - case R_MIPS_32: - if rela.Off+4 >= uint64(len(dst)) || rela.Addend < 0 { - continue - } - f.ByteOrder.PutUint32(dst[rela.Off:rela.Off+4], uint32(rela.Addend)) - } - } - - return nil -} - -func (f *File) applyRelocationsRISCV64(dst []byte, rels []byte) error { - // 24 is the size of Rela64. - if len(rels)%24 != 0 { - return errors.New("length of relocation section is not a multiple of 24") - } - - symbols, _, err := f.getSymbols(SHT_SYMTAB) - if err != nil { - return err - } - - b := bytes.NewReader(rels) - var rela Rela64 - - for b.Len() > 0 { - binary.Read(b, f.ByteOrder, &rela) - symNo := rela.Info >> 32 - t := R_RISCV(rela.Info & 0xffff) - - if symNo == 0 || symNo > uint64(len(symbols)) { - continue - } - sym := &symbols[symNo-1] - switch SymType(sym.Info & 0xf) { - case STT_SECTION, STT_NOTYPE: - break - default: - continue - } - - switch t { - case R_RISCV_64: - if rela.Off+8 >= uint64(len(dst)) || rela.Addend < 0 { - continue - } - val := sym.Value + uint64(rela.Addend) - f.ByteOrder.PutUint64(dst[rela.Off:rela.Off+8], val) - case R_RISCV_32: - if rela.Off+4 >= uint64(len(dst)) || rela.Addend < 0 { - continue - } - val := uint32(sym.Value) + uint32(rela.Addend) - f.ByteOrder.PutUint32(dst[rela.Off:rela.Off+4], val) - } - } - - return nil -} - -func (f *File) applyRelocationss390x(dst []byte, rels []byte) error { - // 24 is the size of Rela64. - if len(rels)%24 != 0 { - return errors.New("length of relocation section is not a multiple of 24") - } - - symbols, _, err := f.getSymbols(SHT_SYMTAB) - if err != nil { - return err - } - - b := bytes.NewReader(rels) - var rela Rela64 - - for b.Len() > 0 { - binary.Read(b, f.ByteOrder, &rela) - symNo := rela.Info >> 32 - t := R_390(rela.Info & 0xffff) - - if symNo == 0 || symNo > uint64(len(symbols)) { - continue - } - sym := &symbols[symNo-1] - switch SymType(sym.Info & 0xf) { - case STT_SECTION, STT_NOTYPE: - break - default: - continue - } - - switch t { - case R_390_64: - if rela.Off+8 >= uint64(len(dst)) || rela.Addend < 0 { - continue - } - val := sym.Value + uint64(rela.Addend) - f.ByteOrder.PutUint64(dst[rela.Off:rela.Off+8], val) - case R_390_32: - if rela.Off+4 >= uint64(len(dst)) || rela.Addend < 0 { - continue - } - val := uint32(sym.Value) + uint32(rela.Addend) - f.ByteOrder.PutUint32(dst[rela.Off:rela.Off+4], val) - } - } - - return nil -} - -func (f *File) applyRelocationsSPARC64(dst []byte, rels []byte) error { - // 24 is the size of Rela64. - if len(rels)%24 != 0 { - return errors.New("length of relocation section is not a multiple of 24") - } - - symbols, _, err := f.getSymbols(SHT_SYMTAB) - if err != nil { - return err - } - - b := bytes.NewReader(rels) - var rela Rela64 - - for b.Len() > 0 { - binary.Read(b, f.ByteOrder, &rela) - symNo := rela.Info >> 32 - t := R_SPARC(rela.Info & 0xff) - - if symNo == 0 || symNo > uint64(len(symbols)) { - continue - } - sym := &symbols[symNo-1] - if SymType(sym.Info&0xf) != STT_SECTION { - // We don't handle non-section relocations for now. - continue - } - - switch t { - case R_SPARC_64, R_SPARC_UA64: - if rela.Off+8 >= uint64(len(dst)) || rela.Addend < 0 { - continue - } - f.ByteOrder.PutUint64(dst[rela.Off:rela.Off+8], uint64(rela.Addend)) - case R_SPARC_32, R_SPARC_UA32: - if rela.Off+4 >= uint64(len(dst)) || rela.Addend < 0 { - continue - } - f.ByteOrder.PutUint32(dst[rela.Off:rela.Off+4], uint32(rela.Addend)) - } - } - - return nil -} diff --git a/elf/symbols_test.go b/elf/symbols_test.go index 1b79520..42f0231 100644 --- a/elf/symbols_test.go +++ b/elf/symbols_test.go @@ -819,6 +819,8 @@ var dynamicSymbolsGolden = map[string][]Symbol{ Section: 0x0, Value: 0x0, Size: 0x18C, + Version: "GLIBC_2.2.5", + Library: "libc.so.6", }, Symbol{ Name: "__libc_start_main", @@ -827,6 +829,8 @@ var dynamicSymbolsGolden = map[string][]Symbol{ Section: 0x0, Value: 0x0, Size: 0x1C2, + Version: "GLIBC_2.2.5", + Library: "libc.so.6", }, }, "testdata/go-relocation-test-clang-x86.obj": {}, diff --git a/elf/testdata/go-relocation-test-gcc930-ranges-no-rela-x86-64 b/elf/testdata/go-relocation-test-gcc930-ranges-no-rela-x86-64 new file mode 100644 index 0000000..c013f3e Binary files /dev/null and b/elf/testdata/go-relocation-test-gcc930-ranges-no-rela-x86-64 differ diff --git a/elf/testdata/go-relocation-test-gcc930-ranges-with-rela-x86-64 b/elf/testdata/go-relocation-test-gcc930-ranges-with-rela-x86-64 new file mode 100644 index 0000000..51e03aa Binary files /dev/null and b/elf/testdata/go-relocation-test-gcc930-ranges-with-rela-x86-64 differ diff --git a/elf/testdata/multiple-code-sections.c b/elf/testdata/multiple-code-sections.c new file mode 100644 index 0000000..03b9d53 --- /dev/null +++ b/elf/testdata/multiple-code-sections.c @@ -0,0 +1,28 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Build with: +// gcc -g multiple-code-sections.c -Wl,--emit-relocs -Wl,--discard-none -Wl,-zmax-page-size=1 -fno-asynchronous-unwind-tables -o go-relocation-test-gcc930-ranges-with-rela-x86-64 +// gcc -g multiple-code-sections.c -Wl,-zmax-page-size=1 -fno-asynchronous-unwind-tables -o go-relocation-test-gcc930-ranges-no-rela-x86-64 +// Strip with: +// strip --only-keep-debug \ +// --remove-section=.eh_frame \ +// --remove-section=.eh_frame_hdr \ +// --remove-section=.shstrtab \ +// --remove-section=.strtab \ +// --remove-section=.symtab \ +// --remove-section=.note.gnu.build-id \ +// --remove-section=.note.ABI-tag \ +// --remove-section=.dynamic \ +// --remove-section=.gnu.hash \ +// --remove-section=.interp \ +// --remove-section=.rodata +__attribute__((section(".separate_section"))) // To get GCC to emit a DW_AT_ranges attribute for the CU. +int func(void) { + return 0; +} + +int main(int argc, char *argv[]) { + return 0; +} diff --git a/elf/write.go b/elf/write.go deleted file mode 100644 index a3ec757..0000000 --- a/elf/write.go +++ /dev/null @@ -1,281 +0,0 @@ -package elf - -import ( - "bufio" - "bytes" - "encoding/binary" - "io/ioutil" - "log" - "os" -) - -// Bytes - returns the bytes of an Elf file -func (elfFile *File) Bytes() ([]byte, error) { - - bytesWritten := uint64(0) - elfBuf := bytes.NewBuffer(nil) - w := bufio.NewWriter(elfBuf) - - // Write Elf Magic - w.WriteByte('\x7f') - w.WriteByte('E') - w.WriteByte('L') - w.WriteByte('F') - bytesWritten += 4 - - // ident[EI_CLASS] - w.WriteByte(byte(elfFile.Class)) - // ident[EI_DATA] - w.WriteByte(byte(elfFile.Data)) - // ident[EI_VERSION] - w.WriteByte(byte(elfFile.Version)) - // ident[EI_OSABI] - w.WriteByte(byte(elfFile.OSABI)) - // ident[EI_ABIVERSION] - w.WriteByte(byte(elfFile.ABIVersion)) - // ident[EI_PAD] ( 7 bytes ) - w.Write([]byte{0, 0, 0, 0, 0, 0, 0}) - bytesWritten += 12 - - // Type - binary.Write(w, elfFile.ByteOrder, uint16(elfFile.Type)) - // Machine - binary.Write(w, elfFile.ByteOrder, uint16(elfFile.Machine)) - // Version - binary.Write(w, elfFile.ByteOrder, uint32(elfFile.Version)) - bytesWritten += 8 - - phsize := 0 - - switch elfFile.Class { - case ELFCLASS32: - phsize = 0x20 - // Entry 32 - binary.Write(w, elfFile.ByteOrder, uint32(elfFile.Entry)) - // PH Offset 32 - binary.Write(w, elfFile.ByteOrder, uint32(0x34)) - // SH Offset 32 // 0x20 0x28 4 8 e_shoff Points to the start of the section header table. - binary.Write(w, elfFile.ByteOrder, int32(elfFile.FileHeader.SHTOffset)) - // Flags - binary.Write(w, elfFile.ByteOrder, uint32(0)) // todo - // EH Size - binary.Write(w, elfFile.ByteOrder, uint16(52)) - // PH Size // 0x2A 0x36 2 e_phentsize Contains the size of a program header table entry. - binary.Write(w, elfFile.ByteOrder, uint16(phsize)) - // PH Num // 0x2C 0x38 2 e_phnum Contains the number of entries in the program header table. - binary.Write(w, elfFile.ByteOrder, uint16(len(elfFile.Progs))) - // SH Size // 0x2E 0x3A 2 e_shentsize Contains the size of a section header table entry. - binary.Write(w, elfFile.ByteOrder, uint16(0x28)) - bytesWritten += 24 - - case ELFCLASS64: - phsize = 0x38 - // Entry 64 - binary.Write(w, elfFile.ByteOrder, uint64(elfFile.Entry)) - // PH Offset 64 - binary.Write(w, elfFile.ByteOrder, uint64(0x40)) - // SH Offset 64 // 0x20 0x28 4 8 e_shoff Points to the start of the section header table. - binary.Write(w, elfFile.ByteOrder, int64(elfFile.FileHeader.SHTOffset)) - // Flags - binary.Write(w, elfFile.ByteOrder, uint32(0)) // I think right? - // EH Size - binary.Write(w, elfFile.ByteOrder, uint16(64)) - // PH Size // 0x2A 0x36 2 e_phentsize Contains the size of a program header table entry. - binary.Write(w, elfFile.ByteOrder, uint16(phsize)) - // PH Num // 0x2C 0x38 2 e_phnum Contains the number of entries in the program header table. - binary.Write(w, elfFile.ByteOrder, uint16(len(elfFile.Progs))) - // SH Size // 0x2E 0x3A 2 e_shentsize Contains the size of a section header table entry. - binary.Write(w, elfFile.ByteOrder, uint16(0x40)) - bytesWritten += 36 - } - - // SH Num // 0x30 0x3C 2 e_shnum Contains the number of entries in the section header table. - binary.Write(w, elfFile.ByteOrder, uint16(len(elfFile.Sections))) - // SH Str Ndx // 0x32 0x3E 2 e_shstrndx Contains index of the section header table entry that contains the section names. - binary.Write(w, elfFile.ByteOrder, uint16(elfFile.ShStrIndex)) - bytesWritten += 4 - - // Program Header - for _, p := range elfFile.Progs { - // Type (segment) - binary.Write(w, elfFile.ByteOrder, uint32(p.Type)) - bytesWritten += 4 - - switch elfFile.Class { - case ELFCLASS32: - // Offset of Segment in File - binary.Write(w, elfFile.ByteOrder, uint32(p.Off)) - - // Vaddr - binary.Write(w, elfFile.ByteOrder, uint32(p.Vaddr)) - - // Paddr - binary.Write(w, elfFile.ByteOrder, uint32(p.Paddr)) - - // File Size - binary.Write(w, elfFile.ByteOrder, uint32(p.Filesz)) - - // Memory Size - binary.Write(w, elfFile.ByteOrder, uint32(p.Memsz)) - - // Flags (segment) - binary.Write(w, elfFile.ByteOrder, uint32(p.Flags)) - - // Alignment - binary.Write(w, elfFile.ByteOrder, uint32(p.Align)) - - bytesWritten += 28 - - case ELFCLASS64: - // Flags (segment) - binary.Write(w, elfFile.ByteOrder, uint32(p.Flags)) - - // Offset of Segment in File - binary.Write(w, elfFile.ByteOrder, uint64(p.Off)) - - // Vaddr - binary.Write(w, elfFile.ByteOrder, uint64(p.Vaddr)) - - // Paddr - binary.Write(w, elfFile.ByteOrder, uint64(p.Paddr)) - - // File Size - binary.Write(w, elfFile.ByteOrder, uint64(p.Filesz)) - - // Memory Size - binary.Write(w, elfFile.ByteOrder, uint64(p.Memsz)) - - // Alignment - binary.Write(w, elfFile.ByteOrder, uint64(p.Align)) - - bytesWritten += 52 - } - } - - sortedSections := elfFile.Sections[:] - //sort.Slice(sortedSections, func(a, b int) bool { return elfFile.Sections[a].Link < elfFile.Sections[b].Link }) - for _, s := range sortedSections { - - //log.Printf("Writing section: %s type: %+v\n", s.Name, s.Type) - //log.Printf("written: %x offset: %x\n", bytesWritten, s.Offset) - - if s.Type == SHT_NULL || s.Type == SHT_NOBITS || s.FileSize == 0 { - //log.Println("continuing...") - continue - } - - if bytesWritten > s.Offset { - log.Printf("Overlapping Sections in Generated Elf: %+v\n", s.Name) - continue - } - if s.Offset != 0 && bytesWritten < s.Offset { - pad := make([]byte, s.Offset-bytesWritten) - w.Write(pad) - //log.Printf("Padding before section %s at %x: length:%x to:%x\n", s.Name, bytesWritten, len(pad), s.Offset) - bytesWritten += uint64(len(pad)) - } - - slen := 0 - switch s.Type { - case SHT_DYNAMIC: - for _, taggedValue := range elfFile.DynTags { - //log.Printf("writing %d (%x) -> %d (%x)\n", taggedValue.Tag, taggedValue.Tag, taggedValue.Value, taggedValue.Value) - switch elfFile.Class { - case ELFCLASS32: - binary.Write(w, elfFile.ByteOrder, uint32(taggedValue.Tag)) - binary.Write(w, elfFile.ByteOrder, uint32(taggedValue.Value)) - bytesWritten += 8 - case ELFCLASS64: - binary.Write(w, elfFile.ByteOrder, uint64(taggedValue.Tag)) - binary.Write(w, elfFile.ByteOrder, uint64(taggedValue.Value)) - bytesWritten += 16 - } - } - default: - section, err := ioutil.ReadAll(s.Open()) - if err != nil { - return nil, err - } - binary.Write(w, elfFile.ByteOrder, section) - slen = len(section) - //log.Printf("Wrote %s section at %x, length %x\n", s.Name, bytesWritten, slen) - bytesWritten += uint64(slen) - } - - // todo: elfFile.Insertion should be renamed InsertionLoadEnd or similar - if s.Type == SHT_PROGBITS && len(elfFile.Insertion) > 0 && s.Size-uint64(slen) >= uint64(len(elfFile.Insertion)) { - binary.Write(w, elfFile.ByteOrder, elfFile.Insertion) - bytesWritten += uint64(len(elfFile.Insertion)) - } - w.Flush() - } - - // Pad to Section Header Table - if bytesWritten < uint64(elfFile.FileHeader.SHTOffset) { - pad := make([]byte, uint64(elfFile.FileHeader.SHTOffset)-bytesWritten) - w.Write(pad) - //log.Printf("Padding before SHT at %x: length:%x to:%x\n", bytesWritten, len(pad), elfFile.FileHeader.SHTOffset) - bytesWritten += uint64(len(pad)) - } - - // Write Section Header Table - - for _, s := range elfFile.Sections[:] { - - switch elfFile.Class { - case ELFCLASS32: - binary.Write(w, elfFile.ByteOrder, &Section32{ - Name: s.Shname, - Type: uint32(s.Type), - Flags: uint32(s.Flags), - Addr: uint32(s.Addr), - Off: uint32(s.Offset), - Size: uint32(s.Size), - Link: s.Link, - Info: s.Info, - Addralign: uint32(s.Addralign), - Entsize: uint32(s.Entsize)}) - case ELFCLASS64: - binary.Write(w, elfFile.ByteOrder, &Section64{ - Name: s.Shname, - Type: uint32(s.Type), - Flags: uint64(s.Flags), - Addr: s.Addr, - Off: s.Offset, - Size: s.Size, - Link: s.Link, - Info: s.Info, - Addralign: s.Addralign, - Entsize: s.Entsize}) - } - } - - // Do I have a PT_NOTE segment to add at the end? - - if len(elfFile.InsertionEOF) > 0 { - binary.Write(w, elfFile.ByteOrder, elfFile.InsertionEOF) - bytesWritten += uint64(len(elfFile.InsertionEOF)) - } - - w.Flush() - return elfBuf.Bytes(), nil -} - -// WriteFile - Creates a new file and writes it using the Bytes func above -func (elfFile *File) WriteFile(destFile string) error { - f, err := os.Create(destFile) - if err != nil { - return err - } - defer f.Close() - elfData, err := elfFile.Bytes() - if err != nil { - return err - } - _, err = f.Write(elfData) - if err != nil { - return err - } - - return nil -} diff --git a/go.mod b/go.mod index 78f055d..3438564 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,3 @@ -module github.com/Binject/debug +module binject-debug -go 1.15 +go 1.22.1 diff --git a/internal/saferio/io.go b/internal/saferio/io.go new file mode 100644 index 0000000..5c428e6 --- /dev/null +++ b/internal/saferio/io.go @@ -0,0 +1,132 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package saferio provides I/O functions that avoid allocating large +// amounts of memory unnecessarily. This is intended for packages that +// read data from an [io.Reader] where the size is part of the input +// data but the input may be corrupt, or may be provided by an +// untrustworthy attacker. +package saferio + +import ( + "io" + "unsafe" +) + +// chunk is an arbitrary limit on how much memory we are willing +// to allocate without concern. +const chunk = 10 << 20 // 10M + +// ReadData reads n bytes from the input stream, but avoids allocating +// all n bytes if n is large. This avoids crashing the program by +// allocating all n bytes in cases where n is incorrect. +// +// The error is io.EOF only if no bytes were read. +// If an io.EOF happens after reading some but not all the bytes, +// ReadData returns io.ErrUnexpectedEOF. +func ReadData(r io.Reader, n uint64) ([]byte, error) { + if int64(n) < 0 || n != uint64(int(n)) { + // n is too large to fit in int, so we can't allocate + // a buffer large enough. Treat this as a read failure. + return nil, io.ErrUnexpectedEOF + } + + if n < chunk { + buf := make([]byte, n) + _, err := io.ReadFull(r, buf) + if err != nil { + return nil, err + } + return buf, nil + } + + var buf []byte + buf1 := make([]byte, chunk) + for n > 0 { + next := n + if next > chunk { + next = chunk + } + _, err := io.ReadFull(r, buf1[:next]) + if err != nil { + if len(buf) > 0 && err == io.EOF { + err = io.ErrUnexpectedEOF + } + return nil, err + } + buf = append(buf, buf1[:next]...) + n -= next + } + return buf, nil +} + +// ReadDataAt reads n bytes from the input stream at off, but avoids +// allocating all n bytes if n is large. This avoids crashing the program +// by allocating all n bytes in cases where n is incorrect. +func ReadDataAt(r io.ReaderAt, n uint64, off int64) ([]byte, error) { + if int64(n) < 0 || n != uint64(int(n)) { + // n is too large to fit in int, so we can't allocate + // a buffer large enough. Treat this as a read failure. + return nil, io.ErrUnexpectedEOF + } + + if n < chunk { + buf := make([]byte, n) + _, err := r.ReadAt(buf, off) + if err != nil { + // io.SectionReader can return EOF for n == 0, + // but for our purposes that is a success. + if err != io.EOF || n > 0 { + return nil, err + } + } + return buf, nil + } + + var buf []byte + buf1 := make([]byte, chunk) + for n > 0 { + next := n + if next > chunk { + next = chunk + } + _, err := r.ReadAt(buf1[:next], off) + if err != nil { + return nil, err + } + buf = append(buf, buf1[:next]...) + n -= next + off += int64(next) + } + return buf, nil +} + +// SliceCapWithSize returns the capacity to use when allocating a slice. +// After the slice is allocated with the capacity, it should be +// built using append. This will avoid allocating too much memory +// if the capacity is large and incorrect. +// +// A negative result means that the value is always too big. +func SliceCapWithSize(size, c uint64) int { + if int64(c) < 0 || c != uint64(int(c)) { + return -1 + } + if size > 0 && c > (1<<64-1)/size { + return -1 + } + if c*size > chunk { + c = chunk / size + if c == 0 { + c = 1 + } + } + return int(c) +} + +// SliceCap is like SliceCapWithSize but using generics. +func SliceCap[E any](c uint64) int { + var v E + size := uint64(unsafe.Sizeof(v)) + return SliceCapWithSize(size, c) +} diff --git a/internal/saferio/io_test.go b/internal/saferio/io_test.go new file mode 100644 index 0000000..696356f --- /dev/null +++ b/internal/saferio/io_test.go @@ -0,0 +1,136 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package saferio + +import ( + "bytes" + "io" + "testing" +) + +func TestReadData(t *testing.T) { + const count = 100 + input := bytes.Repeat([]byte{'a'}, count) + + t.Run("small", func(t *testing.T) { + got, err := ReadData(bytes.NewReader(input), count) + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(got, input) { + t.Errorf("got %v, want %v", got, input) + } + }) + + t.Run("large", func(t *testing.T) { + _, err := ReadData(bytes.NewReader(input), 10<<30) + if err == nil { + t.Error("large read succeeded unexpectedly") + } + }) + + t.Run("maxint", func(t *testing.T) { + _, err := ReadData(bytes.NewReader(input), 1<<62) + if err == nil { + t.Error("large read succeeded unexpectedly") + } + }) + + t.Run("small-EOF", func(t *testing.T) { + _, err := ReadData(bytes.NewReader(nil), chunk-1) + if err != io.EOF { + t.Errorf("ReadData = %v, want io.EOF", err) + } + }) + + t.Run("large-EOF", func(t *testing.T) { + _, err := ReadData(bytes.NewReader(nil), chunk+1) + if err != io.EOF { + t.Errorf("ReadData = %v, want io.EOF", err) + } + }) + + t.Run("large-UnexpectedEOF", func(t *testing.T) { + _, err := ReadData(bytes.NewReader(make([]byte, chunk)), chunk+1) + if err != io.ErrUnexpectedEOF { + t.Errorf("ReadData = %v, want io.ErrUnexpectedEOF", err) + } + }) +} + +func TestReadDataAt(t *testing.T) { + const count = 100 + input := bytes.Repeat([]byte{'a'}, count) + + t.Run("small", func(t *testing.T) { + got, err := ReadDataAt(bytes.NewReader(input), count, 0) + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(got, input) { + t.Errorf("got %v, want %v", got, input) + } + }) + + t.Run("large", func(t *testing.T) { + _, err := ReadDataAt(bytes.NewReader(input), 10<<30, 0) + if err == nil { + t.Error("large read succeeded unexpectedly") + } + }) + + t.Run("maxint", func(t *testing.T) { + _, err := ReadDataAt(bytes.NewReader(input), 1<<62, 0) + if err == nil { + t.Error("large read succeeded unexpectedly") + } + }) + + t.Run("SectionReader", func(t *testing.T) { + // Reading 0 bytes from an io.SectionReader at the end + // of the section will return EOF, but ReadDataAt + // should succeed and return 0 bytes. + sr := io.NewSectionReader(bytes.NewReader(input), 0, 0) + got, err := ReadDataAt(sr, 0, 0) + if err != nil { + t.Fatal(err) + } + if len(got) > 0 { + t.Errorf("got %d bytes, expected 0", len(got)) + } + }) +} + +func TestSliceCap(t *testing.T) { + t.Run("small", func(t *testing.T) { + c := SliceCap[int](10) + if c != 10 { + t.Errorf("got capacity %d, want %d", c, 10) + } + }) + + t.Run("large", func(t *testing.T) { + c := SliceCap[byte](1 << 30) + if c < 0 { + t.Error("SliceCap failed unexpectedly") + } else if c == 1<<30 { + t.Errorf("got capacity %d which is too high", c) + } + }) + + t.Run("maxint", func(t *testing.T) { + c := SliceCap[byte](1 << 63) + if c >= 0 { + t.Errorf("SliceCap returned %d, expected failure", c) + } + }) + + t.Run("overflow", func(t *testing.T) { + c := SliceCap[int64](1 << 62) + if c >= 0 { + t.Errorf("SliceCap returned %d, expected failure", c) + } + }) +} diff --git a/internal/zstd/bits.go b/internal/zstd/bits.go new file mode 100644 index 0000000..c9a2f70 --- /dev/null +++ b/internal/zstd/bits.go @@ -0,0 +1,130 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zstd + +import ( + "math/bits" +) + +// block is the data for a single compressed block. +// The data starts immediately after the 3 byte block header, +// and is Block_Size bytes long. +type block []byte + +// bitReader reads a bit stream going forward. +type bitReader struct { + r *Reader // for error reporting + data block // the bits to read + off uint32 // current offset into data + bits uint32 // bits ready to be returned + cnt uint32 // number of valid bits in the bits field +} + +// makeBitReader makes a bit reader starting at off. +func (r *Reader) makeBitReader(data block, off int) bitReader { + return bitReader{ + r: r, + data: data, + off: uint32(off), + } +} + +// moreBits is called to read more bits. +// This ensures that at least 16 bits are available. +func (br *bitReader) moreBits() error { + for br.cnt < 16 { + if br.off >= uint32(len(br.data)) { + return br.r.makeEOFError(int(br.off)) + } + c := br.data[br.off] + br.off++ + br.bits |= uint32(c) << br.cnt + br.cnt += 8 + } + return nil +} + +// val is called to fetch a value of b bits. +func (br *bitReader) val(b uint8) uint32 { + r := br.bits & ((1 << b) - 1) + br.bits >>= b + br.cnt -= uint32(b) + return r +} + +// backup steps back to the last byte we used. +func (br *bitReader) backup() { + for br.cnt >= 8 { + br.off-- + br.cnt -= 8 + } +} + +// makeError returns an error at the current offset wrapping a string. +func (br *bitReader) makeError(msg string) error { + return br.r.makeError(int(br.off), msg) +} + +// reverseBitReader reads a bit stream in reverse. +type reverseBitReader struct { + r *Reader // for error reporting + data block // the bits to read + off uint32 // current offset into data + start uint32 // start in data; we read backward to start + bits uint32 // bits ready to be returned + cnt uint32 // number of valid bits in bits field +} + +// makeReverseBitReader makes a reverseBitReader reading backward +// from off to start. The bitstream starts with a 1 bit in the last +// byte, at off. +func (r *Reader) makeReverseBitReader(data block, off, start int) (reverseBitReader, error) { + streamStart := data[off] + if streamStart == 0 { + return reverseBitReader{}, r.makeError(off, "zero byte at reverse bit stream start") + } + rbr := reverseBitReader{ + r: r, + data: data, + off: uint32(off), + start: uint32(start), + bits: uint32(streamStart), + cnt: uint32(7 - bits.LeadingZeros8(streamStart)), + } + return rbr, nil +} + +// val is called to fetch a value of b bits. +func (rbr *reverseBitReader) val(b uint8) (uint32, error) { + if !rbr.fetch(b) { + return 0, rbr.r.makeEOFError(int(rbr.off)) + } + + rbr.cnt -= uint32(b) + v := (rbr.bits >> rbr.cnt) & ((1 << b) - 1) + return v, nil +} + +// fetch is called to ensure that at least b bits are available. +// It reports false if this can't be done, +// in which case only rbr.cnt bits are available. +func (rbr *reverseBitReader) fetch(b uint8) bool { + for rbr.cnt < uint32(b) { + if rbr.off <= rbr.start { + return false + } + rbr.off-- + c := rbr.data[rbr.off] + rbr.bits <<= 8 + rbr.bits |= uint32(c) + rbr.cnt += 8 + } + return true +} + +// makeError returns an error at the current offset wrapping a string. +func (rbr *reverseBitReader) makeError(msg string) error { + return rbr.r.makeError(int(rbr.off), msg) +} diff --git a/internal/zstd/block.go b/internal/zstd/block.go new file mode 100644 index 0000000..11a99cd --- /dev/null +++ b/internal/zstd/block.go @@ -0,0 +1,425 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zstd + +import ( + "io" +) + +// debug can be set in the source to print debug info using println. +const debug = false + +// compressedBlock decompresses a compressed block, storing the decompressed +// data in r.buffer. The blockSize argument is the compressed size. +// RFC 3.1.1.3. +func (r *Reader) compressedBlock(blockSize int) error { + if len(r.compressedBuf) >= blockSize { + r.compressedBuf = r.compressedBuf[:blockSize] + } else { + // We know that blockSize <= 128K, + // so this won't allocate an enormous amount. + need := blockSize - len(r.compressedBuf) + r.compressedBuf = append(r.compressedBuf, make([]byte, need)...) + } + + if _, err := io.ReadFull(r.r, r.compressedBuf); err != nil { + return r.wrapNonEOFError(0, err) + } + + data := block(r.compressedBuf) + off := 0 + r.buffer = r.buffer[:0] + + litoff, litbuf, err := r.readLiterals(data, off, r.literals[:0]) + if err != nil { + return err + } + r.literals = litbuf + + off = litoff + + seqCount, off, err := r.initSeqs(data, off) + if err != nil { + return err + } + + if seqCount == 0 { + // No sequences, just literals. + if off < len(data) { + return r.makeError(off, "extraneous data after no sequences") + } + + r.buffer = append(r.buffer, litbuf...) + + return nil + } + + return r.execSeqs(data, off, litbuf, seqCount) +} + +// seqCode is the kind of sequence codes we have to handle. +type seqCode int + +const ( + seqLiteral seqCode = iota + seqOffset + seqMatch +) + +// seqCodeInfoData is the information needed to set up seqTables and +// seqTableBits for a particular kind of sequence code. +type seqCodeInfoData struct { + predefTable []fseBaselineEntry // predefined FSE + predefTableBits int // number of bits in predefTable + maxSym int // max symbol value in FSE + maxBits int // max bits for FSE + + // toBaseline converts from an FSE table to an FSE baseline table. + toBaseline func(*Reader, int, []fseEntry, []fseBaselineEntry) error +} + +// seqCodeInfo is the seqCodeInfoData for each kind of sequence code. +var seqCodeInfo = [3]seqCodeInfoData{ + seqLiteral: { + predefTable: predefinedLiteralTable[:], + predefTableBits: 6, + maxSym: 35, + maxBits: 9, + toBaseline: (*Reader).makeLiteralBaselineFSE, + }, + seqOffset: { + predefTable: predefinedOffsetTable[:], + predefTableBits: 5, + maxSym: 31, + maxBits: 8, + toBaseline: (*Reader).makeOffsetBaselineFSE, + }, + seqMatch: { + predefTable: predefinedMatchTable[:], + predefTableBits: 6, + maxSym: 52, + maxBits: 9, + toBaseline: (*Reader).makeMatchBaselineFSE, + }, +} + +// initSeqs reads the Sequences_Section_Header and sets up the FSE +// tables used to read the sequence codes. It returns the number of +// sequences and the new offset. RFC 3.1.1.3.2.1. +func (r *Reader) initSeqs(data block, off int) (int, int, error) { + if off >= len(data) { + return 0, 0, r.makeEOFError(off) + } + + seqHdr := data[off] + off++ + if seqHdr == 0 { + return 0, off, nil + } + + var seqCount int + if seqHdr < 128 { + seqCount = int(seqHdr) + } else if seqHdr < 255 { + if off >= len(data) { + return 0, 0, r.makeEOFError(off) + } + seqCount = ((int(seqHdr) - 128) << 8) + int(data[off]) + off++ + } else { + if off+1 >= len(data) { + return 0, 0, r.makeEOFError(off) + } + seqCount = int(data[off]) + (int(data[off+1]) << 8) + 0x7f00 + off += 2 + } + + // Read the Symbol_Compression_Modes byte. + + if off >= len(data) { + return 0, 0, r.makeEOFError(off) + } + symMode := data[off] + if symMode&3 != 0 { + return 0, 0, r.makeError(off, "invalid symbol compression mode") + } + off++ + + // Set up the FSE tables used to decode the sequence codes. + + var err error + off, err = r.setSeqTable(data, off, seqLiteral, (symMode>>6)&3) + if err != nil { + return 0, 0, err + } + + off, err = r.setSeqTable(data, off, seqOffset, (symMode>>4)&3) + if err != nil { + return 0, 0, err + } + + off, err = r.setSeqTable(data, off, seqMatch, (symMode>>2)&3) + if err != nil { + return 0, 0, err + } + + return seqCount, off, nil +} + +// setSeqTable uses the Compression_Mode in mode to set up r.seqTables and +// r.seqTableBits for kind. We store these in the Reader because one of +// the modes simply reuses the value from the last block in the frame. +func (r *Reader) setSeqTable(data block, off int, kind seqCode, mode byte) (int, error) { + info := &seqCodeInfo[kind] + switch mode { + case 0: + // Predefined_Mode + r.seqTables[kind] = info.predefTable + r.seqTableBits[kind] = uint8(info.predefTableBits) + return off, nil + + case 1: + // RLE_Mode + if off >= len(data) { + return 0, r.makeEOFError(off) + } + rle := data[off] + off++ + + // Build a simple baseline table that always returns rle. + + entry := []fseEntry{ + { + sym: rle, + bits: 0, + base: 0, + }, + } + if cap(r.seqTableBuffers[kind]) == 0 { + r.seqTableBuffers[kind] = make([]fseBaselineEntry, 1< 128<<10 { + return rbr.makeError("uncompressed size too big") + } + + ptoffset := &r.seqTables[seqOffset][offsetState] + ptmatch := &r.seqTables[seqMatch][matchState] + ptliteral := &r.seqTables[seqLiteral][literalState] + + add, err := rbr.val(ptoffset.basebits) + if err != nil { + return err + } + offset := ptoffset.baseline + add + + add, err = rbr.val(ptmatch.basebits) + if err != nil { + return err + } + match := ptmatch.baseline + add + + add, err = rbr.val(ptliteral.basebits) + if err != nil { + return err + } + literal := ptliteral.baseline + add + + // Handle repeat offsets. RFC 3.1.1.5. + // See the comment in makeOffsetBaselineFSE. + if ptoffset.basebits > 1 { + r.repeatedOffset3 = r.repeatedOffset2 + r.repeatedOffset2 = r.repeatedOffset1 + r.repeatedOffset1 = offset + } else { + if literal == 0 { + offset++ + } + switch offset { + case 1: + offset = r.repeatedOffset1 + case 2: + offset = r.repeatedOffset2 + r.repeatedOffset2 = r.repeatedOffset1 + r.repeatedOffset1 = offset + case 3: + offset = r.repeatedOffset3 + r.repeatedOffset3 = r.repeatedOffset2 + r.repeatedOffset2 = r.repeatedOffset1 + r.repeatedOffset1 = offset + case 4: + offset = r.repeatedOffset1 - 1 + r.repeatedOffset3 = r.repeatedOffset2 + r.repeatedOffset2 = r.repeatedOffset1 + r.repeatedOffset1 = offset + } + } + + seq++ + if seq < seqCount { + // Update the states. + add, err = rbr.val(ptliteral.bits) + if err != nil { + return err + } + literalState = uint32(ptliteral.base) + add + + add, err = rbr.val(ptmatch.bits) + if err != nil { + return err + } + matchState = uint32(ptmatch.base) + add + + add, err = rbr.val(ptoffset.bits) + if err != nil { + return err + } + offsetState = uint32(ptoffset.base) + add + } + + // The next sequence is now in literal, offset, match. + + if debug { + println("literal", literal, "offset", offset, "match", match) + } + + // Copy literal bytes from litbuf. + if literal > uint32(len(litbuf)) { + return rbr.makeError("literal byte overflow") + } + if literal > 0 { + r.buffer = append(r.buffer, litbuf[:literal]...) + litbuf = litbuf[literal:] + } + + if match > 0 { + if err := r.copyFromWindow(&rbr, offset, match); err != nil { + return err + } + } + } + + r.buffer = append(r.buffer, litbuf...) + + if rbr.cnt != 0 { + return r.makeError(off, "extraneous data after sequences") + } + + return nil +} + +// Copy match bytes from the decoded output, or the window, at offset. +func (r *Reader) copyFromWindow(rbr *reverseBitReader, offset, match uint32) error { + if offset == 0 { + return rbr.makeError("invalid zero offset") + } + + // Offset may point into the buffer or the window and + // match may extend past the end of the initial buffer. + // |--r.window--|--r.buffer--| + // |<-----offset------| + // |------match----------->| + bufferOffset := uint32(0) + lenBlock := uint32(len(r.buffer)) + if lenBlock < offset { + lenWindow := r.window.len() + copy := offset - lenBlock + if copy > lenWindow { + return rbr.makeError("offset past window") + } + windowOffset := lenWindow - copy + if copy > match { + copy = match + } + r.buffer = r.window.appendTo(r.buffer, windowOffset, windowOffset+copy) + match -= copy + } else { + bufferOffset = lenBlock - offset + } + + // We are being asked to copy data that we are adding to the + // buffer in the same copy. + for match > 0 { + copy := uint32(len(r.buffer)) - bufferOffset + if copy > match { + copy = match + } + r.buffer = append(r.buffer, r.buffer[bufferOffset:bufferOffset+copy]...) + match -= copy + } + return nil +} diff --git a/internal/zstd/fse.go b/internal/zstd/fse.go new file mode 100644 index 0000000..f03a792 --- /dev/null +++ b/internal/zstd/fse.go @@ -0,0 +1,437 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zstd + +import ( + "math/bits" +) + +// fseEntry is one entry in an FSE table. +type fseEntry struct { + sym uint8 // value that this entry records + bits uint8 // number of bits to read to determine next state + base uint16 // add those bits to this state to get the next state +} + +// readFSE reads an FSE table from data starting at off. +// maxSym is the maximum symbol value. +// maxBits is the maximum number of bits permitted for symbols in the table. +// The FSE is written into table, which must be at least 1< maxBits { + return 0, 0, br.makeError("FSE accuracy log too large") + } + + // The number of remaining probabilities, plus 1. + // This determines the number of bits to be read for the next value. + remaining := (1 << accuracyLog) + 1 + + // The current difference between small and large values, + // which depends on the number of remaining values. + // Small values use 1 less bit. + threshold := 1 << accuracyLog + + // The number of bits needed to compute threshold. + bitsNeeded := accuracyLog + 1 + + // The next character value. + sym := 0 + + // Whether the last count was 0. + prev0 := false + + var norm [256]int16 + + for remaining > 1 && sym <= maxSym { + if err := br.moreBits(); err != nil { + return 0, 0, err + } + + if prev0 { + // Previous count was 0, so there is a 2-bit + // repeat flag. If the 2-bit flag is 0b11, + // it adds 3 and then there is another repeat flag. + zsym := sym + for (br.bits & 0xfff) == 0xfff { + zsym += 3 * 6 + br.bits >>= 12 + br.cnt -= 12 + if err := br.moreBits(); err != nil { + return 0, 0, err + } + } + for (br.bits & 3) == 3 { + zsym += 3 + br.bits >>= 2 + br.cnt -= 2 + if err := br.moreBits(); err != nil { + return 0, 0, err + } + } + + // We have at least 14 bits here, + // no need to call moreBits + + zsym += int(br.val(2)) + + if zsym > maxSym { + return 0, 0, br.makeError("FSE symbol index overflow") + } + + for ; sym < zsym; sym++ { + norm[uint8(sym)] = 0 + } + + prev0 = false + continue + } + + max := (2*threshold - 1) - remaining + var count int + if int(br.bits&uint32(threshold-1)) < max { + // A small value. + count = int(br.bits & uint32((threshold - 1))) + br.bits >>= bitsNeeded - 1 + br.cnt -= uint32(bitsNeeded - 1) + } else { + // A large value. + count = int(br.bits & uint32((2*threshold - 1))) + if count >= threshold { + count -= max + } + br.bits >>= bitsNeeded + br.cnt -= uint32(bitsNeeded) + } + + count-- + if count >= 0 { + remaining -= count + } else { + remaining-- + } + if sym >= 256 { + return 0, 0, br.makeError("FSE sym overflow") + } + norm[uint8(sym)] = int16(count) + sym++ + + prev0 = count == 0 + + for remaining < threshold { + bitsNeeded-- + threshold >>= 1 + } + } + + if remaining != 1 { + return 0, 0, br.makeError("too many symbols in FSE table") + } + + for ; sym <= maxSym; sym++ { + norm[uint8(sym)] = 0 + } + + br.backup() + + if err := r.buildFSE(off, norm[:maxSym+1], table, accuracyLog); err != nil { + return 0, 0, err + } + + return accuracyLog, int(br.off), nil +} + +// buildFSE builds an FSE decoding table from a list of probabilities. +// The probabilities are in norm. next is scratch space. The number of bits +// in the table is tableBits. +func (r *Reader) buildFSE(off int, norm []int16, table []fseEntry, tableBits int) error { + tableSize := 1 << tableBits + highThreshold := tableSize - 1 + + var next [256]uint16 + + for i, n := range norm { + if n >= 0 { + next[uint8(i)] = uint16(n) + } else { + table[highThreshold].sym = uint8(i) + highThreshold-- + next[uint8(i)] = 1 + } + } + + pos := 0 + step := (tableSize >> 1) + (tableSize >> 3) + 3 + mask := tableSize - 1 + for i, n := range norm { + for j := 0; j < int(n); j++ { + table[pos].sym = uint8(i) + pos = (pos + step) & mask + for pos > highThreshold { + pos = (pos + step) & mask + } + } + } + if pos != 0 { + return r.makeError(off, "FSE count error") + } + + for i := 0; i < tableSize; i++ { + sym := table[i].sym + nextState := next[sym] + next[sym]++ + + if nextState == 0 { + return r.makeError(off, "FSE state error") + } + + highBit := 15 - bits.LeadingZeros16(nextState) + + bits := tableBits - highBit + table[i].bits = uint8(bits) + table[i].base = (nextState << bits) - uint16(tableSize) + } + + return nil +} + +// fseBaselineEntry is an entry in an FSE baseline table. +// We use these for literal/match/length values. +// Those require mapping the symbol to a baseline value, +// and then reading zero or more bits and adding the value to the baseline. +// Rather than looking these up in separate tables, +// we convert the FSE table to an FSE baseline table. +type fseBaselineEntry struct { + baseline uint32 // baseline for value that this entry represents + basebits uint8 // number of bits to read to add to baseline + bits uint8 // number of bits to read to determine next state + base uint16 // add the bits to this base to get the next state +} + +// Given a literal length code, we need to read a number of bits and +// add that to a baseline. For states 0 to 15 the baseline is the +// state and the number of bits is zero. RFC 3.1.1.3.2.1.1. + +const literalLengthOffset = 16 + +var literalLengthBase = []uint32{ + 16 | (1 << 24), + 18 | (1 << 24), + 20 | (1 << 24), + 22 | (1 << 24), + 24 | (2 << 24), + 28 | (2 << 24), + 32 | (3 << 24), + 40 | (3 << 24), + 48 | (4 << 24), + 64 | (6 << 24), + 128 | (7 << 24), + 256 | (8 << 24), + 512 | (9 << 24), + 1024 | (10 << 24), + 2048 | (11 << 24), + 4096 | (12 << 24), + 8192 | (13 << 24), + 16384 | (14 << 24), + 32768 | (15 << 24), + 65536 | (16 << 24), +} + +// makeLiteralBaselineFSE converts the literal length fseTable to baselineTable. +func (r *Reader) makeLiteralBaselineFSE(off int, fseTable []fseEntry, baselineTable []fseBaselineEntry) error { + for i, e := range fseTable { + be := fseBaselineEntry{ + bits: e.bits, + base: e.base, + } + if e.sym < literalLengthOffset { + be.baseline = uint32(e.sym) + be.basebits = 0 + } else { + if e.sym > 35 { + return r.makeError(off, "FSE baseline symbol overflow") + } + idx := e.sym - literalLengthOffset + basebits := literalLengthBase[idx] + be.baseline = basebits & 0xffffff + be.basebits = uint8(basebits >> 24) + } + baselineTable[i] = be + } + return nil +} + +// makeOffsetBaselineFSE converts the offset length fseTable to baselineTable. +func (r *Reader) makeOffsetBaselineFSE(off int, fseTable []fseEntry, baselineTable []fseBaselineEntry) error { + for i, e := range fseTable { + be := fseBaselineEntry{ + bits: e.bits, + base: e.base, + } + if e.sym > 31 { + return r.makeError(off, "FSE offset symbol overflow") + } + + // The simple way to write this is + // be.baseline = 1 << e.sym + // be.basebits = e.sym + // That would give us an offset value that corresponds to + // the one described in the RFC. However, for offsets > 3 + // we have to subtract 3. And for offset values 1, 2, 3 + // we use a repeated offset. + // + // The baseline is always a power of 2, and is never 0, + // so for those low values we will see one entry that is + // baseline 1, basebits 0, and one entry that is baseline 2, + // basebits 1. All other entries will have baseline >= 4 + // basebits >= 2. + // + // So we can check for RFC offset <= 3 by checking for + // basebits <= 1. That means that we can subtract 3 here + // and not worry about doing it in the hot loop. + + be.baseline = 1 << e.sym + if e.sym >= 2 { + be.baseline -= 3 + } + be.basebits = e.sym + baselineTable[i] = be + } + return nil +} + +// Given a match length code, we need to read a number of bits and add +// that to a baseline. For states 0 to 31 the baseline is state+3 and +// the number of bits is zero. RFC 3.1.1.3.2.1.1. + +const matchLengthOffset = 32 + +var matchLengthBase = []uint32{ + 35 | (1 << 24), + 37 | (1 << 24), + 39 | (1 << 24), + 41 | (1 << 24), + 43 | (2 << 24), + 47 | (2 << 24), + 51 | (3 << 24), + 59 | (3 << 24), + 67 | (4 << 24), + 83 | (4 << 24), + 99 | (5 << 24), + 131 | (7 << 24), + 259 | (8 << 24), + 515 | (9 << 24), + 1027 | (10 << 24), + 2051 | (11 << 24), + 4099 | (12 << 24), + 8195 | (13 << 24), + 16387 | (14 << 24), + 32771 | (15 << 24), + 65539 | (16 << 24), +} + +// makeMatchBaselineFSE converts the match length fseTable to baselineTable. +func (r *Reader) makeMatchBaselineFSE(off int, fseTable []fseEntry, baselineTable []fseBaselineEntry) error { + for i, e := range fseTable { + be := fseBaselineEntry{ + bits: e.bits, + base: e.base, + } + if e.sym < matchLengthOffset { + be.baseline = uint32(e.sym) + 3 + be.basebits = 0 + } else { + if e.sym > 52 { + return r.makeError(off, "FSE baseline symbol overflow") + } + idx := e.sym - matchLengthOffset + basebits := matchLengthBase[idx] + be.baseline = basebits & 0xffffff + be.basebits = uint8(basebits >> 24) + } + baselineTable[i] = be + } + return nil +} + +// predefinedLiteralTable is the predefined table to use for literal lengths. +// Generated from table in RFC 3.1.1.3.2.2.1. +// Checked by TestPredefinedTables. +var predefinedLiteralTable = [...]fseBaselineEntry{ + {0, 0, 4, 0}, {0, 0, 4, 16}, {1, 0, 5, 32}, + {3, 0, 5, 0}, {4, 0, 5, 0}, {6, 0, 5, 0}, + {7, 0, 5, 0}, {9, 0, 5, 0}, {10, 0, 5, 0}, + {12, 0, 5, 0}, {14, 0, 6, 0}, {16, 1, 5, 0}, + {20, 1, 5, 0}, {22, 1, 5, 0}, {28, 2, 5, 0}, + {32, 3, 5, 0}, {48, 4, 5, 0}, {64, 6, 5, 32}, + {128, 7, 5, 0}, {256, 8, 6, 0}, {1024, 10, 6, 0}, + {4096, 12, 6, 0}, {0, 0, 4, 32}, {1, 0, 4, 0}, + {2, 0, 5, 0}, {4, 0, 5, 32}, {5, 0, 5, 0}, + {7, 0, 5, 32}, {8, 0, 5, 0}, {10, 0, 5, 32}, + {11, 0, 5, 0}, {13, 0, 6, 0}, {16, 1, 5, 32}, + {18, 1, 5, 0}, {22, 1, 5, 32}, {24, 2, 5, 0}, + {32, 3, 5, 32}, {40, 3, 5, 0}, {64, 6, 4, 0}, + {64, 6, 4, 16}, {128, 7, 5, 32}, {512, 9, 6, 0}, + {2048, 11, 6, 0}, {0, 0, 4, 48}, {1, 0, 4, 16}, + {2, 0, 5, 32}, {3, 0, 5, 32}, {5, 0, 5, 32}, + {6, 0, 5, 32}, {8, 0, 5, 32}, {9, 0, 5, 32}, + {11, 0, 5, 32}, {12, 0, 5, 32}, {15, 0, 6, 0}, + {18, 1, 5, 32}, {20, 1, 5, 32}, {24, 2, 5, 32}, + {28, 2, 5, 32}, {40, 3, 5, 32}, {48, 4, 5, 32}, + {65536, 16, 6, 0}, {32768, 15, 6, 0}, {16384, 14, 6, 0}, + {8192, 13, 6, 0}, +} + +// predefinedOffsetTable is the predefined table to use for offsets. +// Generated from table in RFC 3.1.1.3.2.2.3. +// Checked by TestPredefinedTables. +var predefinedOffsetTable = [...]fseBaselineEntry{ + {1, 0, 5, 0}, {61, 6, 4, 0}, {509, 9, 5, 0}, + {32765, 15, 5, 0}, {2097149, 21, 5, 0}, {5, 3, 5, 0}, + {125, 7, 4, 0}, {4093, 12, 5, 0}, {262141, 18, 5, 0}, + {8388605, 23, 5, 0}, {29, 5, 5, 0}, {253, 8, 4, 0}, + {16381, 14, 5, 0}, {1048573, 20, 5, 0}, {1, 2, 5, 0}, + {125, 7, 4, 16}, {2045, 11, 5, 0}, {131069, 17, 5, 0}, + {4194301, 22, 5, 0}, {13, 4, 5, 0}, {253, 8, 4, 16}, + {8189, 13, 5, 0}, {524285, 19, 5, 0}, {2, 1, 5, 0}, + {61, 6, 4, 16}, {1021, 10, 5, 0}, {65533, 16, 5, 0}, + {268435453, 28, 5, 0}, {134217725, 27, 5, 0}, {67108861, 26, 5, 0}, + {33554429, 25, 5, 0}, {16777213, 24, 5, 0}, +} + +// predefinedMatchTable is the predefined table to use for match lengths. +// Generated from table in RFC 3.1.1.3.2.2.2. +// Checked by TestPredefinedTables. +var predefinedMatchTable = [...]fseBaselineEntry{ + {3, 0, 6, 0}, {4, 0, 4, 0}, {5, 0, 5, 32}, + {6, 0, 5, 0}, {8, 0, 5, 0}, {9, 0, 5, 0}, + {11, 0, 5, 0}, {13, 0, 6, 0}, {16, 0, 6, 0}, + {19, 0, 6, 0}, {22, 0, 6, 0}, {25, 0, 6, 0}, + {28, 0, 6, 0}, {31, 0, 6, 0}, {34, 0, 6, 0}, + {37, 1, 6, 0}, {41, 1, 6, 0}, {47, 2, 6, 0}, + {59, 3, 6, 0}, {83, 4, 6, 0}, {131, 7, 6, 0}, + {515, 9, 6, 0}, {4, 0, 4, 16}, {5, 0, 4, 0}, + {6, 0, 5, 32}, {7, 0, 5, 0}, {9, 0, 5, 32}, + {10, 0, 5, 0}, {12, 0, 6, 0}, {15, 0, 6, 0}, + {18, 0, 6, 0}, {21, 0, 6, 0}, {24, 0, 6, 0}, + {27, 0, 6, 0}, {30, 0, 6, 0}, {33, 0, 6, 0}, + {35, 1, 6, 0}, {39, 1, 6, 0}, {43, 2, 6, 0}, + {51, 3, 6, 0}, {67, 4, 6, 0}, {99, 5, 6, 0}, + {259, 8, 6, 0}, {4, 0, 4, 32}, {4, 0, 4, 48}, + {5, 0, 4, 16}, {7, 0, 5, 32}, {8, 0, 5, 32}, + {10, 0, 5, 32}, {11, 0, 5, 32}, {14, 0, 6, 0}, + {17, 0, 6, 0}, {20, 0, 6, 0}, {23, 0, 6, 0}, + {26, 0, 6, 0}, {29, 0, 6, 0}, {32, 0, 6, 0}, + {65539, 16, 6, 0}, {32771, 15, 6, 0}, {16387, 14, 6, 0}, + {8195, 13, 6, 0}, {4099, 12, 6, 0}, {2051, 11, 6, 0}, + {1027, 10, 6, 0}, +} diff --git a/internal/zstd/fse_test.go b/internal/zstd/fse_test.go new file mode 100644 index 0000000..6f106b6 --- /dev/null +++ b/internal/zstd/fse_test.go @@ -0,0 +1,89 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zstd + +import ( + "slices" + "testing" +) + +// literalPredefinedDistribution is the predefined distribution table +// for literal lengths. RFC 3.1.1.3.2.2.1. +var literalPredefinedDistribution = []int16{ + 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, + -1, -1, -1, -1, +} + +// offsetPredefinedDistribution is the predefined distribution table +// for offsets. RFC 3.1.1.3.2.2.3. +var offsetPredefinedDistribution = []int16{ + 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, +} + +// matchPredefinedDistribution is the predefined distribution table +// for match lengths. RFC 3.1.1.3.2.2.2. +var matchPredefinedDistribution = []int16{ + 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, + -1, -1, -1, -1, -1, +} + +// TestPredefinedTables verifies that we can generate the predefined +// literal/offset/match tables from the input data in RFC 8878. +// This serves as a test of the predefined tables, and also of buildFSE +// and the functions that make baseline FSE tables. +func TestPredefinedTables(t *testing.T) { + tests := []struct { + name string + distribution []int16 + tableBits int + toBaseline func(*Reader, int, []fseEntry, []fseBaselineEntry) error + predef []fseBaselineEntry + }{ + { + name: "literal", + distribution: literalPredefinedDistribution, + tableBits: 6, + toBaseline: (*Reader).makeLiteralBaselineFSE, + predef: predefinedLiteralTable[:], + }, + { + name: "offset", + distribution: offsetPredefinedDistribution, + tableBits: 5, + toBaseline: (*Reader).makeOffsetBaselineFSE, + predef: predefinedOffsetTable[:], + }, + { + name: "match", + distribution: matchPredefinedDistribution, + tableBits: 6, + toBaseline: (*Reader).makeMatchBaselineFSE, + predef: predefinedMatchTable[:], + }, + } + for _, test := range tests { + test := test + t.Run(test.name, func(t *testing.T) { + var r Reader + table := make([]fseEntry, 1< len(zstdExp) { + c = len(zstdExp) + } + goExp = goExp[:c] + zstdExp = zstdExp[:c] + if !bytes.Equal(goExp, zstdExp) { + t.Error("byte mismatch after error") + t.Logf("Go error: %v\n", goErr) + t.Logf("zstd error: %v\n", zstdErr) + showDiffs(t, zstdExp, goExp) + } + } + }) +} diff --git a/internal/zstd/huff.go b/internal/zstd/huff.go new file mode 100644 index 0000000..452e24b --- /dev/null +++ b/internal/zstd/huff.go @@ -0,0 +1,204 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zstd + +import ( + "io" + "math/bits" +) + +// maxHuffmanBits is the largest possible Huffman table bits. +const maxHuffmanBits = 11 + +// readHuff reads Huffman table from data starting at off into table. +// Each entry in a Huffman table is a pair of bytes. +// The high byte is the encoded value. The low byte is the number +// of bits used to encode that value. We index into the table +// with a value of size tableBits. A value that requires fewer bits +// appear in the table multiple times. +// This returns the number of bits in the Huffman table and the new offset. +// RFC 4.2.1. +func (r *Reader) readHuff(data block, off int, table []uint16) (tableBits, roff int, err error) { + if off >= len(data) { + return 0, 0, r.makeEOFError(off) + } + + hdr := data[off] + off++ + + var weights [256]uint8 + var count int + if hdr < 128 { + // The table is compressed using an FSE. RFC 4.2.1.2. + if len(r.fseScratch) < 1<<6 { + r.fseScratch = make([]fseEntry, 1<<6) + } + fseBits, noff, err := r.readFSE(data, off, 255, 6, r.fseScratch) + if err != nil { + return 0, 0, err + } + fseTable := r.fseScratch + + if off+int(hdr) > len(data) { + return 0, 0, r.makeEOFError(off) + } + + rbr, err := r.makeReverseBitReader(data, off+int(hdr)-1, noff) + if err != nil { + return 0, 0, err + } + + state1, err := rbr.val(uint8(fseBits)) + if err != nil { + return 0, 0, err + } + + state2, err := rbr.val(uint8(fseBits)) + if err != nil { + return 0, 0, err + } + + // There are two independent FSE streams, tracked by + // state1 and state2. We decode them alternately. + + for { + pt := &fseTable[state1] + if !rbr.fetch(pt.bits) { + if count >= 254 { + return 0, 0, rbr.makeError("Huffman count overflow") + } + weights[count] = pt.sym + weights[count+1] = fseTable[state2].sym + count += 2 + break + } + + v, err := rbr.val(pt.bits) + if err != nil { + return 0, 0, err + } + state1 = uint32(pt.base) + v + + if count >= 255 { + return 0, 0, rbr.makeError("Huffman count overflow") + } + + weights[count] = pt.sym + count++ + + pt = &fseTable[state2] + + if !rbr.fetch(pt.bits) { + if count >= 254 { + return 0, 0, rbr.makeError("Huffman count overflow") + } + weights[count] = pt.sym + weights[count+1] = fseTable[state1].sym + count += 2 + break + } + + v, err = rbr.val(pt.bits) + if err != nil { + return 0, 0, err + } + state2 = uint32(pt.base) + v + + if count >= 255 { + return 0, 0, rbr.makeError("Huffman count overflow") + } + + weights[count] = pt.sym + count++ + } + + off += int(hdr) + } else { + // The table is not compressed. Each weight is 4 bits. + + count = int(hdr) - 127 + if off+((count+1)/2) >= len(data) { + return 0, 0, io.ErrUnexpectedEOF + } + for i := 0; i < count; i += 2 { + b := data[off] + off++ + weights[i] = b >> 4 + weights[i+1] = b & 0xf + } + } + + // RFC 4.2.1.3. + + var weightMark [13]uint32 + weightMask := uint32(0) + for _, w := range weights[:count] { + if w > 12 { + return 0, 0, r.makeError(off, "Huffman weight overflow") + } + weightMark[w]++ + if w > 0 { + weightMask += 1 << (w - 1) + } + } + if weightMask == 0 { + return 0, 0, r.makeError(off, "bad Huffman weights") + } + + tableBits = 32 - bits.LeadingZeros32(weightMask) + if tableBits > maxHuffmanBits { + return 0, 0, r.makeError(off, "bad Huffman weights") + } + + if len(table) < 1<= 256 { + return 0, 0, r.makeError(off, "Huffman weight overflow") + } + weights[count] = uint8(highBit + 1) + count++ + weightMark[highBit+1]++ + + if weightMark[1] < 2 || weightMark[1]&1 != 0 { + return 0, 0, r.makeError(off, "bad Huffman weights") + } + + // Change weightMark from a count of weights to the index of + // the first symbol for that weight. We shift the indexes to + // also store how many we have seen so far, + next := uint32(0) + for i := 0; i < tableBits; i++ { + cur := next + next += weightMark[i+1] << i + weightMark[i+1] = cur + } + + for i, w := range weights[:count] { + if w == 0 { + continue + } + length := uint32(1) << (w - 1) + tval := uint16(i)<<8 | (uint16(tableBits) + 1 - uint16(w)) + start := weightMark[w] + for j := uint32(0); j < length; j++ { + table[start+j] = tval + } + weightMark[w] += length + } + + return tableBits, off, nil +} diff --git a/internal/zstd/literals.go b/internal/zstd/literals.go new file mode 100644 index 0000000..11ef859 --- /dev/null +++ b/internal/zstd/literals.go @@ -0,0 +1,336 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zstd + +import ( + "encoding/binary" +) + +// readLiterals reads and decompresses the literals from data at off. +// The literals are appended to outbuf, which is returned. +// Also returns the new input offset. RFC 3.1.1.3.1. +func (r *Reader) readLiterals(data block, off int, outbuf []byte) (int, []byte, error) { + if off >= len(data) { + return 0, nil, r.makeEOFError(off) + } + + // Literals section header. RFC 3.1.1.3.1.1. + hdr := data[off] + off++ + + if (hdr&3) == 0 || (hdr&3) == 1 { + return r.readRawRLELiterals(data, off, hdr, outbuf) + } else { + return r.readHuffLiterals(data, off, hdr, outbuf) + } +} + +// readRawRLELiterals reads and decompresses a Raw_Literals_Block or +// a RLE_Literals_Block. RFC 3.1.1.3.1.1. +func (r *Reader) readRawRLELiterals(data block, off int, hdr byte, outbuf []byte) (int, []byte, error) { + raw := (hdr & 3) == 0 + + var regeneratedSize int + switch (hdr >> 2) & 3 { + case 0, 2: + regeneratedSize = int(hdr >> 3) + case 1: + if off >= len(data) { + return 0, nil, r.makeEOFError(off) + } + regeneratedSize = int(hdr>>4) + (int(data[off]) << 4) + off++ + case 3: + if off+1 >= len(data) { + return 0, nil, r.makeEOFError(off) + } + regeneratedSize = int(hdr>>4) + (int(data[off]) << 4) + (int(data[off+1]) << 12) + off += 2 + } + + // We are going to use the entire literal block in the output. + // The maximum size of one decompressed block is 128K, + // so we can't have more literals than that. + if regeneratedSize > 128<<10 { + return 0, nil, r.makeError(off, "literal size too large") + } + + if raw { + // RFC 3.1.1.3.1.2. + if off+regeneratedSize > len(data) { + return 0, nil, r.makeError(off, "raw literal size too large") + } + outbuf = append(outbuf, data[off:off+regeneratedSize]...) + off += regeneratedSize + } else { + // RFC 3.1.1.3.1.3. + if off >= len(data) { + return 0, nil, r.makeError(off, "RLE literal missing") + } + rle := data[off] + off++ + for i := 0; i < regeneratedSize; i++ { + outbuf = append(outbuf, rle) + } + } + + return off, outbuf, nil +} + +// readHuffLiterals reads and decompresses a Compressed_Literals_Block or +// a Treeless_Literals_Block. RFC 3.1.1.3.1.4. +func (r *Reader) readHuffLiterals(data block, off int, hdr byte, outbuf []byte) (int, []byte, error) { + var ( + regeneratedSize int + compressedSize int + streams int + ) + switch (hdr >> 2) & 3 { + case 0, 1: + if off+1 >= len(data) { + return 0, nil, r.makeEOFError(off) + } + regeneratedSize = (int(hdr) >> 4) | ((int(data[off]) & 0x3f) << 4) + compressedSize = (int(data[off]) >> 6) | (int(data[off+1]) << 2) + off += 2 + if ((hdr >> 2) & 3) == 0 { + streams = 1 + } else { + streams = 4 + } + case 2: + if off+2 >= len(data) { + return 0, nil, r.makeEOFError(off) + } + regeneratedSize = (int(hdr) >> 4) | (int(data[off]) << 4) | ((int(data[off+1]) & 3) << 12) + compressedSize = (int(data[off+1]) >> 2) | (int(data[off+2]) << 6) + off += 3 + streams = 4 + case 3: + if off+3 >= len(data) { + return 0, nil, r.makeEOFError(off) + } + regeneratedSize = (int(hdr) >> 4) | (int(data[off]) << 4) | ((int(data[off+1]) & 0x3f) << 12) + compressedSize = (int(data[off+1]) >> 6) | (int(data[off+2]) << 2) | (int(data[off+3]) << 10) + off += 4 + streams = 4 + } + + // We are going to use the entire literal block in the output. + // The maximum size of one decompressed block is 128K, + // so we can't have more literals than that. + if regeneratedSize > 128<<10 { + return 0, nil, r.makeError(off, "literal size too large") + } + + roff := off + compressedSize + if roff > len(data) || roff < 0 { + return 0, nil, r.makeEOFError(off) + } + + totalStreamsSize := compressedSize + if (hdr & 3) == 2 { + // Compressed_Literals_Block. + // Read new huffman tree. + + if len(r.huffmanTable) < 1<> (rbr.cnt - huffBits)) & huffMask + t = huffTable[idx] + outbuf = append(outbuf, byte(t>>8)) + rbr.cnt -= uint32(t & 0xff) + } + + return outbuf, nil +} + +// readLiteralsFourStreams reads four interleaved streams of +// compressed literals. +func (r *Reader) readLiteralsFourStreams(data block, off, totalStreamsSize, regeneratedSize int, outbuf []byte) ([]byte, error) { + // Read the jump table to find out where the streams are. + // RFC 3.1.1.3.1.6. + if off+5 >= len(data) { + return nil, r.makeEOFError(off) + } + if totalStreamsSize < 6 { + return nil, r.makeError(off, "total streams size too small for jump table") + } + // RFC 3.1.1.3.1.6. + // "The decompressed size of each stream is equal to (Regenerated_Size+3)/4, + // except for the last stream, which may be up to 3 bytes smaller, + // to reach a total decompressed size as specified in Regenerated_Size." + regeneratedStreamSize := (regeneratedSize + 3) / 4 + if regeneratedSize < regeneratedStreamSize*3 { + return nil, r.makeError(off, "regenerated size too small to decode streams") + } + + streamSize1 := binary.LittleEndian.Uint16(data[off:]) + streamSize2 := binary.LittleEndian.Uint16(data[off+2:]) + streamSize3 := binary.LittleEndian.Uint16(data[off+4:]) + off += 6 + + tot := uint64(streamSize1) + uint64(streamSize2) + uint64(streamSize3) + if tot > uint64(totalStreamsSize)-6 { + return nil, r.makeEOFError(off) + } + streamSize4 := uint32(totalStreamsSize) - 6 - uint32(tot) + + off-- + off1 := off + int(streamSize1) + start1 := off + 1 + + off2 := off1 + int(streamSize2) + start2 := off1 + 1 + + off3 := off2 + int(streamSize3) + start3 := off2 + 1 + + off4 := off3 + int(streamSize4) + start4 := off3 + 1 + + // We let the reverse bit readers read earlier bytes, + // because the Huffman tables ignore bits that they don't need. + + rbr1, err := r.makeReverseBitReader(data, off1, start1-2) + if err != nil { + return nil, err + } + + rbr2, err := r.makeReverseBitReader(data, off2, start2-2) + if err != nil { + return nil, err + } + + rbr3, err := r.makeReverseBitReader(data, off3, start3-2) + if err != nil { + return nil, err + } + + rbr4, err := r.makeReverseBitReader(data, off4, start4-2) + if err != nil { + return nil, err + } + + out1 := len(outbuf) + out2 := out1 + regeneratedStreamSize + out3 := out2 + regeneratedStreamSize + out4 := out3 + regeneratedStreamSize + + regeneratedStreamSize4 := regeneratedSize - regeneratedStreamSize*3 + + outbuf = append(outbuf, make([]byte, regeneratedSize)...) + + huffTable := r.huffmanTable + huffBits := uint32(r.huffmanTableBits) + huffMask := (uint32(1) << huffBits) - 1 + + for i := 0; i < regeneratedStreamSize; i++ { + use4 := i < regeneratedStreamSize4 + + fetchHuff := func(rbr *reverseBitReader) (uint16, error) { + if !rbr.fetch(uint8(huffBits)) { + return 0, rbr.makeError("literals Huffman stream out of bits") + } + idx := (rbr.bits >> (rbr.cnt - huffBits)) & huffMask + return huffTable[idx], nil + } + + t1, err := fetchHuff(&rbr1) + if err != nil { + return nil, err + } + + t2, err := fetchHuff(&rbr2) + if err != nil { + return nil, err + } + + t3, err := fetchHuff(&rbr3) + if err != nil { + return nil, err + } + + if use4 { + t4, err := fetchHuff(&rbr4) + if err != nil { + return nil, err + } + outbuf[out4] = byte(t4 >> 8) + out4++ + rbr4.cnt -= uint32(t4 & 0xff) + } + + outbuf[out1] = byte(t1 >> 8) + out1++ + rbr1.cnt -= uint32(t1 & 0xff) + + outbuf[out2] = byte(t2 >> 8) + out2++ + rbr2.cnt -= uint32(t2 & 0xff) + + outbuf[out3] = byte(t3 >> 8) + out3++ + rbr3.cnt -= uint32(t3 & 0xff) + } + + return outbuf, nil +} diff --git a/internal/zstd/testdata/1890a371.gettysburg.txt-100x.zst b/internal/zstd/testdata/1890a371.gettysburg.txt-100x.zst new file mode 100644 index 0000000..afb4a27 Binary files /dev/null and b/internal/zstd/testdata/1890a371.gettysburg.txt-100x.zst differ diff --git a/internal/zstd/testdata/README b/internal/zstd/testdata/README new file mode 100644 index 0000000..1a6dbb3 --- /dev/null +++ b/internal/zstd/testdata/README @@ -0,0 +1,10 @@ +This directory holds files for testing zstd.NewReader. + +Each one is a Zstandard compressed file named as hash.arbitrary-name.zst, +where hash is the first eight hexadecimal digits of the SHA256 hash +of the expected uncompressed content: + + zstd -d < 1890a371.gettysburg.txt-100x.zst | sha256sum | head -c 8 + 1890a371 + +The test uses hash value to verify decompression result. diff --git a/internal/zstd/testdata/f2a8e35c.helloworld-11000x.zst b/internal/zstd/testdata/f2a8e35c.helloworld-11000x.zst new file mode 100644 index 0000000..87a8aca Binary files /dev/null and b/internal/zstd/testdata/f2a8e35c.helloworld-11000x.zst differ diff --git a/internal/zstd/testdata/fcf30b99.zero-dictionary-ids.zst b/internal/zstd/testdata/fcf30b99.zero-dictionary-ids.zst new file mode 100644 index 0000000..1be89e8 Binary files /dev/null and b/internal/zstd/testdata/fcf30b99.zero-dictionary-ids.zst differ diff --git a/internal/zstd/window.go b/internal/zstd/window.go new file mode 100644 index 0000000..f9c5f04 --- /dev/null +++ b/internal/zstd/window.go @@ -0,0 +1,90 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zstd + +// window stores up to size bytes of data. +// It is implemented as a circular buffer: +// sequential save calls append to the data slice until +// its length reaches configured size and after that, +// save calls overwrite previously saved data at off +// and update off such that it always points at +// the byte stored before others. +type window struct { + size int + data []byte + off int +} + +// reset clears stored data and configures window size. +func (w *window) reset(size int) { + w.data = w.data[:0] + w.off = 0 + w.size = size +} + +// len returns the number of stored bytes. +func (w *window) len() uint32 { + return uint32(len(w.data)) +} + +// save stores up to size last bytes from the buf. +func (w *window) save(buf []byte) { + if w.size == 0 { + return + } + if len(buf) == 0 { + return + } + + if len(buf) >= w.size { + from := len(buf) - w.size + w.data = append(w.data[:0], buf[from:]...) + w.off = 0 + return + } + + // Update off to point to the oldest remaining byte. + free := w.size - len(w.data) + if free == 0 { + n := copy(w.data[w.off:], buf) + if n == len(buf) { + w.off += n + } else { + w.off = copy(w.data, buf[n:]) + } + } else { + if free >= len(buf) { + w.data = append(w.data, buf...) + } else { + w.data = append(w.data, buf[:free]...) + w.off = copy(w.data, buf[free:]) + } + } +} + +// appendTo appends stored bytes between from and to indices to the buf. +// Index from must be less or equal to index to and to must be less or equal to w.len(). +func (w *window) appendTo(buf []byte, from, to uint32) []byte { + dataLen := uint32(len(w.data)) + from += uint32(w.off) + to += uint32(w.off) + + wrap := false + if from > dataLen { + from -= dataLen + wrap = !wrap + } + if to > dataLen { + to -= dataLen + wrap = !wrap + } + + if wrap { + buf = append(buf, w.data[from:]...) + return append(buf, w.data[:to]...) + } else { + return append(buf, w.data[from:to]...) + } +} diff --git a/internal/zstd/window_test.go b/internal/zstd/window_test.go new file mode 100644 index 0000000..afa2eef --- /dev/null +++ b/internal/zstd/window_test.go @@ -0,0 +1,72 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zstd + +import ( + "bytes" + "fmt" + "testing" +) + +func makeSequence(start, n int) (seq []byte) { + for i := 0; i < n; i++ { + seq = append(seq, byte(start+i)) + } + return +} + +func TestWindow(t *testing.T) { + for size := 0; size <= 3; size++ { + for i := 0; i <= 2*size; i++ { + a := makeSequence('a', i) + for j := 0; j <= 2*size; j++ { + b := makeSequence('a'+i, j) + for k := 0; k <= 2*size; k++ { + c := makeSequence('a'+i+j, k) + + t.Run(fmt.Sprintf("%d-%d-%d-%d", size, i, j, k), func(t *testing.T) { + testWindow(t, size, a, b, c) + }) + } + } + } + } +} + +// testWindow tests window by saving three sequences of bytes to it. +// Third sequence tests read offset that can become non-zero only after second save. +func testWindow(t *testing.T, size int, a, b, c []byte) { + var w window + w.reset(size) + + w.save(a) + w.save(b) + w.save(c) + + var tail []byte + tail = append(tail, a...) + tail = append(tail, b...) + tail = append(tail, c...) + + if len(tail) > size { + tail = tail[len(tail)-size:] + } + + if w.len() != uint32(len(tail)) { + t.Errorf("wrong data length: got: %d, want: %d", w.len(), len(tail)) + } + + var from, to uint32 + for from = 0; from <= uint32(len(tail)); from++ { + for to = from; to <= uint32(len(tail)); to++ { + got := w.appendTo(nil, from, to) + want := tail[from:to] + + if !bytes.Equal(got, want) { + t.Errorf("wrong data at [%d:%d]: got %q, want %q", from, to, got, want) + } + } + } +} diff --git a/internal/zstd/xxhash.go b/internal/zstd/xxhash.go new file mode 100644 index 0000000..4d579ee --- /dev/null +++ b/internal/zstd/xxhash.go @@ -0,0 +1,148 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zstd + +import ( + "encoding/binary" + "math/bits" +) + +const ( + xxhPrime64c1 = 0x9e3779b185ebca87 + xxhPrime64c2 = 0xc2b2ae3d27d4eb4f + xxhPrime64c3 = 0x165667b19e3779f9 + xxhPrime64c4 = 0x85ebca77c2b2ae63 + xxhPrime64c5 = 0x27d4eb2f165667c5 +) + +// xxhash64 is the state of a xxHash-64 checksum. +type xxhash64 struct { + len uint64 // total length hashed + v [4]uint64 // accumulators + buf [32]byte // buffer + cnt int // number of bytes in buffer +} + +// reset discards the current state and prepares to compute a new hash. +// We assume a seed of 0 since that is what zstd uses. +func (xh *xxhash64) reset() { + xh.len = 0 + + // Separate addition for awkward constant overflow. + xh.v[0] = xxhPrime64c1 + xh.v[0] += xxhPrime64c2 + + xh.v[1] = xxhPrime64c2 + xh.v[2] = 0 + + // Separate negation for awkward constant overflow. + xh.v[3] = xxhPrime64c1 + xh.v[3] = -xh.v[3] + + for i := range xh.buf { + xh.buf[i] = 0 + } + xh.cnt = 0 +} + +// update adds a buffer to the has. +func (xh *xxhash64) update(b []byte) { + xh.len += uint64(len(b)) + + if xh.cnt+len(b) < len(xh.buf) { + copy(xh.buf[xh.cnt:], b) + xh.cnt += len(b) + return + } + + if xh.cnt > 0 { + n := copy(xh.buf[xh.cnt:], b) + b = b[n:] + xh.v[0] = xh.round(xh.v[0], binary.LittleEndian.Uint64(xh.buf[:])) + xh.v[1] = xh.round(xh.v[1], binary.LittleEndian.Uint64(xh.buf[8:])) + xh.v[2] = xh.round(xh.v[2], binary.LittleEndian.Uint64(xh.buf[16:])) + xh.v[3] = xh.round(xh.v[3], binary.LittleEndian.Uint64(xh.buf[24:])) + xh.cnt = 0 + } + + for len(b) >= 32 { + xh.v[0] = xh.round(xh.v[0], binary.LittleEndian.Uint64(b)) + xh.v[1] = xh.round(xh.v[1], binary.LittleEndian.Uint64(b[8:])) + xh.v[2] = xh.round(xh.v[2], binary.LittleEndian.Uint64(b[16:])) + xh.v[3] = xh.round(xh.v[3], binary.LittleEndian.Uint64(b[24:])) + b = b[32:] + } + + if len(b) > 0 { + copy(xh.buf[:], b) + xh.cnt = len(b) + } +} + +// digest returns the final hash value. +func (xh *xxhash64) digest() uint64 { + var h64 uint64 + if xh.len < 32 { + h64 = xh.v[2] + xxhPrime64c5 + } else { + h64 = bits.RotateLeft64(xh.v[0], 1) + + bits.RotateLeft64(xh.v[1], 7) + + bits.RotateLeft64(xh.v[2], 12) + + bits.RotateLeft64(xh.v[3], 18) + h64 = xh.mergeRound(h64, xh.v[0]) + h64 = xh.mergeRound(h64, xh.v[1]) + h64 = xh.mergeRound(h64, xh.v[2]) + h64 = xh.mergeRound(h64, xh.v[3]) + } + + h64 += xh.len + + len := xh.len + len &= 31 + buf := xh.buf[:] + for len >= 8 { + k1 := xh.round(0, binary.LittleEndian.Uint64(buf)) + buf = buf[8:] + h64 ^= k1 + h64 = bits.RotateLeft64(h64, 27)*xxhPrime64c1 + xxhPrime64c4 + len -= 8 + } + if len >= 4 { + h64 ^= uint64(binary.LittleEndian.Uint32(buf)) * xxhPrime64c1 + buf = buf[4:] + h64 = bits.RotateLeft64(h64, 23)*xxhPrime64c2 + xxhPrime64c3 + len -= 4 + } + for len > 0 { + h64 ^= uint64(buf[0]) * xxhPrime64c5 + buf = buf[1:] + h64 = bits.RotateLeft64(h64, 11) * xxhPrime64c1 + len-- + } + + h64 ^= h64 >> 33 + h64 *= xxhPrime64c2 + h64 ^= h64 >> 29 + h64 *= xxhPrime64c3 + h64 ^= h64 >> 32 + + return h64 +} + +// round updates a value. +func (xh *xxhash64) round(v, n uint64) uint64 { + v += n * xxhPrime64c2 + v = bits.RotateLeft64(v, 31) + v *= xxhPrime64c1 + return v +} + +// mergeRound updates a value in the final round. +func (xh *xxhash64) mergeRound(v, n uint64) uint64 { + n = xh.round(0, n) + v ^= n + v = v*xxhPrime64c1 + xxhPrime64c4 + return v +} diff --git a/internal/zstd/xxhash_test.go b/internal/zstd/xxhash_test.go new file mode 100644 index 0000000..68ca558 --- /dev/null +++ b/internal/zstd/xxhash_test.go @@ -0,0 +1,115 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zstd + +import ( + "bytes" + "os" + "os/exec" + "strconv" + "testing" +) + +var xxHashTests = []struct { + data string + hash uint64 +}{ + { + "hello, world", + 0xb33a384e6d1b1242, + }, + { + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789$", + 0x1032d841e824f998, + }, +} + +func TestXXHash(t *testing.T) { + var xh xxhash64 + for i, test := range xxHashTests { + xh.reset() + xh.update([]byte(test.data)) + if got := xh.digest(); got != test.hash { + t.Errorf("#%d: got %#x want %#x", i, got, test.hash) + } + } +} + +func TestLargeXXHash(t *testing.T) { + if testing.Short() { + t.Skip("skipping expensive test in short mode") + } + + data, err := os.ReadFile("../../testdata/Isaac.Newton-Opticks.txt") + if err != nil { + t.Fatal(err) + } + + var xh xxhash64 + xh.reset() + i := 0 + for i < len(data) { + // Write varying amounts to test buffering. + c := i%4094 + 1 + if i+c > len(data) { + c = len(data) - i + } + xh.update(data[i : i+c]) + i += c + } + + got := xh.digest() + want := uint64(0xf0dd39fd7e063f82) + if got != want { + t.Errorf("got %#x want %#x", got, want) + } +} + +func findXxhsum(t testing.TB) string { + xxhsum, err := exec.LookPath("xxhsum") + if err != nil { + t.Skip("skipping because xxhsum not found") + } + return xxhsum +} + +func FuzzXXHash(f *testing.F) { + xxhsum := findXxhsum(f) + + for _, test := range xxHashTests { + f.Add([]byte(test.data)) + } + f.Add(bytes.Repeat([]byte("abcdefghijklmnop"), 256)) + var buf bytes.Buffer + for i := 0; i < 256; i++ { + buf.WriteByte(byte(i)) + } + f.Add(bytes.Repeat(buf.Bytes(), 64)) + f.Add(bigData(f)) + + f.Fuzz(func(t *testing.T, b []byte) { + cmd := exec.Command(xxhsum, "-H64") + cmd.Stdin = bytes.NewReader(b) + var hhsumHash bytes.Buffer + cmd.Stdout = &hhsumHash + if err := cmd.Run(); err != nil { + t.Fatalf("running hhsum failed: %v", err) + } + hhHashBytes := bytes.Fields(bytes.TrimSpace(hhsumHash.Bytes()))[0] + hhHash, err := strconv.ParseUint(string(hhHashBytes), 16, 64) + if err != nil { + t.Fatalf("could not parse hash %q: %v", hhHashBytes, err) + } + + var xh xxhash64 + xh.reset() + xh.update(b) + goHash := xh.digest() + + if goHash != hhHash { + t.Errorf("Go hash %#x != xxhsum hash %#x", goHash, hhHash) + } + }) +} diff --git a/internal/zstd/zstd.go b/internal/zstd/zstd.go new file mode 100644 index 0000000..0230076 --- /dev/null +++ b/internal/zstd/zstd.go @@ -0,0 +1,522 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package zstd provides a decompressor for zstd streams, +// described in RFC 8878. It does not support dictionaries. +package zstd + +import ( + "encoding/binary" + "errors" + "fmt" + "io" +) + +// fuzzing is a fuzzer hook set to true when fuzzing. +// This is used to reject cases where we don't match zstd. +var fuzzing = false + +// Reader implements [io.Reader] to read a zstd compressed stream. +type Reader struct { + // The underlying Reader. + r io.Reader + + // Whether we have read the frame header. + // This is of interest when buffer is empty. + // If true we expect to see a new block. + sawFrameHeader bool + + // Whether the current frame expects a checksum. + hasChecksum bool + + // Whether we have read at least one frame. + readOneFrame bool + + // True if the frame size is not known. + frameSizeUnknown bool + + // The number of uncompressed bytes remaining in the current frame. + // If frameSizeUnknown is true, this is not valid. + remainingFrameSize uint64 + + // The number of bytes read from r up to the start of the current + // block, for error reporting. + blockOffset int64 + + // Buffered decompressed data. + buffer []byte + // Current read offset in buffer. + off int + + // The current repeated offsets. + repeatedOffset1 uint32 + repeatedOffset2 uint32 + repeatedOffset3 uint32 + + // The current Huffman tree used for compressing literals. + huffmanTable []uint16 + huffmanTableBits int + + // The window for back references. + window window + + // A buffer available to hold a compressed block. + compressedBuf []byte + + // A buffer for literals. + literals []byte + + // Sequence decode FSE tables. + seqTables [3][]fseBaselineEntry + seqTableBits [3]uint8 + + // Buffers for sequence decode FSE tables. + seqTableBuffers [3][]fseBaselineEntry + + // Scratch space used for small reads, to avoid allocation. + scratch [16]byte + + // A scratch table for reading an FSE. Only temporarily valid. + fseScratch []fseEntry + + // For checksum computation. + checksum xxhash64 +} + +// NewReader creates a new Reader that decompresses data from the given reader. +func NewReader(input io.Reader) *Reader { + r := new(Reader) + r.Reset(input) + return r +} + +// Reset discards the current state and starts reading a new stream from r. +// This permits reusing a Reader rather than allocating a new one. +func (r *Reader) Reset(input io.Reader) { + r.r = input + + // Several fields are preserved to avoid allocation. + // Others are always set before they are used. + r.sawFrameHeader = false + r.hasChecksum = false + r.readOneFrame = false + r.frameSizeUnknown = false + r.remainingFrameSize = 0 + r.blockOffset = 0 + r.buffer = r.buffer[:0] + r.off = 0 + // repeatedOffset1 + // repeatedOffset2 + // repeatedOffset3 + // huffmanTable + // huffmanTableBits + // window + // compressedBuf + // literals + // seqTables + // seqTableBits + // seqTableBuffers + // scratch + // fseScratch +} + +// Read implements [io.Reader]. +func (r *Reader) Read(p []byte) (int, error) { + if err := r.refillIfNeeded(); err != nil { + return 0, err + } + n := copy(p, r.buffer[r.off:]) + r.off += n + return n, nil +} + +// ReadByte implements [io.ByteReader]. +func (r *Reader) ReadByte() (byte, error) { + if err := r.refillIfNeeded(); err != nil { + return 0, err + } + ret := r.buffer[r.off] + r.off++ + return ret, nil +} + +// refillIfNeeded reads the next block if necessary. +func (r *Reader) refillIfNeeded() error { + for r.off >= len(r.buffer) { + if err := r.refill(); err != nil { + return err + } + r.off = 0 + } + return nil +} + +// refill reads and decompresses the next block. +func (r *Reader) refill() error { + if !r.sawFrameHeader { + if err := r.readFrameHeader(); err != nil { + return err + } + } + return r.readBlock() +} + +// readFrameHeader reads the frame header and prepares to read a block. +func (r *Reader) readFrameHeader() error { +retry: + relativeOffset := 0 + + // Read magic number. RFC 3.1.1. + if _, err := io.ReadFull(r.r, r.scratch[:4]); err != nil { + // We require that the stream contains at least one frame. + if err == io.EOF && !r.readOneFrame { + err = io.ErrUnexpectedEOF + } + return r.wrapError(relativeOffset, err) + } + + if magic := binary.LittleEndian.Uint32(r.scratch[:4]); magic != 0xfd2fb528 { + if magic >= 0x184d2a50 && magic <= 0x184d2a5f { + // This is a skippable frame. + r.blockOffset += int64(relativeOffset) + 4 + if err := r.skipFrame(); err != nil { + return err + } + r.readOneFrame = true + goto retry + } + + return r.makeError(relativeOffset, "invalid magic number") + } + + relativeOffset += 4 + + // Read Frame_Header_Descriptor. RFC 3.1.1.1.1. + if _, err := io.ReadFull(r.r, r.scratch[:1]); err != nil { + return r.wrapNonEOFError(relativeOffset, err) + } + descriptor := r.scratch[0] + + singleSegment := descriptor&(1<<5) != 0 + + fcsFieldSize := 1 << (descriptor >> 6) + if fcsFieldSize == 1 && !singleSegment { + fcsFieldSize = 0 + } + + var windowDescriptorSize int + if singleSegment { + windowDescriptorSize = 0 + } else { + windowDescriptorSize = 1 + } + + if descriptor&(1<<3) != 0 { + return r.makeError(relativeOffset, "reserved bit set in frame header descriptor") + } + + r.hasChecksum = descriptor&(1<<2) != 0 + if r.hasChecksum { + r.checksum.reset() + } + + // Dictionary_ID_Flag. RFC 3.1.1.1.1.6. + dictionaryIdSize := 0 + if dictIdFlag := descriptor & 3; dictIdFlag != 0 { + dictionaryIdSize = 1 << (dictIdFlag - 1) + } + + relativeOffset++ + + headerSize := windowDescriptorSize + dictionaryIdSize + fcsFieldSize + + if _, err := io.ReadFull(r.r, r.scratch[:headerSize]); err != nil { + return r.wrapNonEOFError(relativeOffset, err) + } + + // Figure out the maximum amount of data we need to retain + // for backreferences. + var windowSize int + if !singleSegment { + // Window descriptor. RFC 3.1.1.1.2. + windowDescriptor := r.scratch[0] + exponent := uint64(windowDescriptor >> 3) + mantissa := uint64(windowDescriptor & 7) + windowLog := exponent + 10 + windowBase := uint64(1) << windowLog + windowAdd := (windowBase / 8) * mantissa + windowSize = int(windowBase + windowAdd) + + // Default zstd sets limits on the window size. + if fuzzing && (windowLog > 31 || windowSize > 1<<27) { + return r.makeError(relativeOffset, "windowSize too large") + } + } + + // Dictionary_ID. RFC 3.1.1.1.3. + if dictionaryIdSize != 0 { + dictionaryId := r.scratch[windowDescriptorSize : windowDescriptorSize+dictionaryIdSize] + // Allow only zero Dictionary ID. + for _, b := range dictionaryId { + if b != 0 { + return r.makeError(relativeOffset, "dictionaries are not supported") + } + } + } + + // Frame_Content_Size. RFC 3.1.1.1.4. + r.frameSizeUnknown = false + r.remainingFrameSize = 0 + fb := r.scratch[windowDescriptorSize+dictionaryIdSize:] + switch fcsFieldSize { + case 0: + r.frameSizeUnknown = true + case 1: + r.remainingFrameSize = uint64(fb[0]) + case 2: + r.remainingFrameSize = 256 + uint64(binary.LittleEndian.Uint16(fb)) + case 4: + r.remainingFrameSize = uint64(binary.LittleEndian.Uint32(fb)) + case 8: + r.remainingFrameSize = binary.LittleEndian.Uint64(fb) + default: + panic("unreachable") + } + + // RFC 3.1.1.1.2. + // When Single_Segment_Flag is set, Window_Descriptor is not present. + // In this case, Window_Size is Frame_Content_Size. + if singleSegment { + windowSize = int(r.remainingFrameSize) + } + + // RFC 8878 3.1.1.1.1.2. permits us to set an 8M max on window size. + if windowSize > 8<<20 { + windowSize = 8 << 20 + } + + relativeOffset += headerSize + + r.sawFrameHeader = true + r.readOneFrame = true + r.blockOffset += int64(relativeOffset) + + // Prepare to read blocks from the frame. + r.repeatedOffset1 = 1 + r.repeatedOffset2 = 4 + r.repeatedOffset3 = 8 + r.huffmanTableBits = 0 + r.window.reset(windowSize) + r.seqTables[0] = nil + r.seqTables[1] = nil + r.seqTables[2] = nil + + return nil +} + +// skipFrame skips a skippable frame. RFC 3.1.2. +func (r *Reader) skipFrame() error { + relativeOffset := 0 + + if _, err := io.ReadFull(r.r, r.scratch[:4]); err != nil { + return r.wrapNonEOFError(relativeOffset, err) + } + + relativeOffset += 4 + + size := binary.LittleEndian.Uint32(r.scratch[:4]) + if size == 0 { + r.blockOffset += int64(relativeOffset) + return nil + } + + if seeker, ok := r.r.(io.Seeker); ok { + r.blockOffset += int64(relativeOffset) + // Implementations of Seeker do not always detect invalid offsets, + // so check that the new offset is valid by comparing to the end. + prev, err := seeker.Seek(0, io.SeekCurrent) + if err != nil { + return r.wrapError(0, err) + } + end, err := seeker.Seek(0, io.SeekEnd) + if err != nil { + return r.wrapError(0, err) + } + if prev > end-int64(size) { + r.blockOffset += end - prev + return r.makeEOFError(0) + } + + // The new offset is valid, so seek to it. + _, err = seeker.Seek(prev+int64(size), io.SeekStart) + if err != nil { + return r.wrapError(0, err) + } + r.blockOffset += int64(size) + return nil + } + + var skip []byte + const chunk = 1 << 20 // 1M + for size >= chunk { + if len(skip) == 0 { + skip = make([]byte, chunk) + } + if _, err := io.ReadFull(r.r, skip); err != nil { + return r.wrapNonEOFError(relativeOffset, err) + } + relativeOffset += chunk + size -= chunk + } + if size > 0 { + if len(skip) == 0 { + skip = make([]byte, size) + } + if _, err := io.ReadFull(r.r, skip); err != nil { + return r.wrapNonEOFError(relativeOffset, err) + } + relativeOffset += int(size) + } + + r.blockOffset += int64(relativeOffset) + + return nil +} + +// readBlock reads the next block from a frame. +func (r *Reader) readBlock() error { + relativeOffset := 0 + + // Read Block_Header. RFC 3.1.1.2. + if _, err := io.ReadFull(r.r, r.scratch[:3]); err != nil { + return r.wrapNonEOFError(relativeOffset, err) + } + + relativeOffset += 3 + + header := uint32(r.scratch[0]) | (uint32(r.scratch[1]) << 8) | (uint32(r.scratch[2]) << 16) + + lastBlock := header&1 != 0 + blockType := (header >> 1) & 3 + blockSize := int(header >> 3) + + // Maximum block size is smaller of window size and 128K. + // We don't record the window size for a single segment frame, + // so just use 128K. RFC 3.1.1.2.3, 3.1.1.2.4. + if blockSize > 128<<10 || (r.window.size > 0 && blockSize > r.window.size) { + return r.makeError(relativeOffset, "block size too large") + } + + // Handle different block types. RFC 3.1.1.2.2. + switch blockType { + case 0: + r.setBufferSize(blockSize) + if _, err := io.ReadFull(r.r, r.buffer); err != nil { + return r.wrapNonEOFError(relativeOffset, err) + } + relativeOffset += blockSize + r.blockOffset += int64(relativeOffset) + case 1: + r.setBufferSize(blockSize) + if _, err := io.ReadFull(r.r, r.scratch[:1]); err != nil { + return r.wrapNonEOFError(relativeOffset, err) + } + relativeOffset++ + v := r.scratch[0] + for i := range r.buffer { + r.buffer[i] = v + } + r.blockOffset += int64(relativeOffset) + case 2: + r.blockOffset += int64(relativeOffset) + if err := r.compressedBlock(blockSize); err != nil { + return err + } + r.blockOffset += int64(blockSize) + case 3: + return r.makeError(relativeOffset, "invalid block type") + } + + if !r.frameSizeUnknown { + if uint64(len(r.buffer)) > r.remainingFrameSize { + return r.makeError(relativeOffset, "too many uncompressed bytes in frame") + } + r.remainingFrameSize -= uint64(len(r.buffer)) + } + + if r.hasChecksum { + r.checksum.update(r.buffer) + } + + if !lastBlock { + r.window.save(r.buffer) + } else { + if !r.frameSizeUnknown && r.remainingFrameSize != 0 { + return r.makeError(relativeOffset, "not enough uncompressed bytes for frame") + } + // Check for checksum at end of frame. RFC 3.1.1. + if r.hasChecksum { + if _, err := io.ReadFull(r.r, r.scratch[:4]); err != nil { + return r.wrapNonEOFError(0, err) + } + + inputChecksum := binary.LittleEndian.Uint32(r.scratch[:4]) + dataChecksum := uint32(r.checksum.digest()) + if inputChecksum != dataChecksum { + return r.wrapError(0, fmt.Errorf("invalid checksum: got %#x want %#x", dataChecksum, inputChecksum)) + } + + r.blockOffset += 4 + } + r.sawFrameHeader = false + } + + return nil +} + +// setBufferSize sets the decompressed buffer size. +// When this is called the buffer is empty. +func (r *Reader) setBufferSize(size int) { + if cap(r.buffer) < size { + need := size - cap(r.buffer) + r.buffer = append(r.buffer[:cap(r.buffer)], make([]byte, need)...) + } + r.buffer = r.buffer[:size] +} + +// zstdError is an error while decompressing. +type zstdError struct { + offset int64 + err error +} + +func (ze *zstdError) Error() string { + return fmt.Sprintf("zstd decompression error at %d: %v", ze.offset, ze.err) +} + +func (ze *zstdError) Unwrap() error { + return ze.err +} + +func (r *Reader) makeEOFError(off int) error { + return r.wrapError(off, io.ErrUnexpectedEOF) +} + +func (r *Reader) wrapNonEOFError(off int, err error) error { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + return r.wrapError(off, err) +} + +func (r *Reader) makeError(off int, msg string) error { + return r.wrapError(off, errors.New(msg)) +} + +func (r *Reader) wrapError(off int, err error) error { + if err == io.EOF { + return err + } + return &zstdError{r.blockOffset + int64(off), err} +} diff --git a/internal/zstd/zstd_test.go b/internal/zstd/zstd_test.go new file mode 100644 index 0000000..f2a2e1b --- /dev/null +++ b/internal/zstd/zstd_test.go @@ -0,0 +1,335 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zstd + +import ( + "bytes" + "crypto/sha256" + "fmt" + "internal/race" + "internal/testenv" + "io" + "os" + "os/exec" + "path/filepath" + "strings" + "sync" + "testing" +) + +// tests holds some simple test cases, including some found by fuzzing. +var tests = []struct { + name, uncompressed, compressed string +}{ + { + "hello", + "hello, world\n", + "\x28\xb5\x2f\xfd\x24\x0d\x69\x00\x00\x68\x65\x6c\x6c\x6f\x2c\x20\x77\x6f\x72\x6c\x64\x0a\x4c\x1f\xf9\xf1", + }, + { + // a small compressed .debug_ranges section. + "ranges", + "\xcc\x11\x00\x00\x00\x00\x00\x00\xd5\x13\x00\x00\x00\x00\x00\x00" + + "\x1c\x14\x00\x00\x00\x00\x00\x00\x72\x14\x00\x00\x00\x00\x00\x00" + + "\x9d\x14\x00\x00\x00\x00\x00\x00\xd5\x14\x00\x00\x00\x00\x00\x00" + + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + + "\xfb\x12\x00\x00\x00\x00\x00\x00\x09\x13\x00\x00\x00\x00\x00\x00" + + "\x0c\x13\x00\x00\x00\x00\x00\x00\xcb\x13\x00\x00\x00\x00\x00\x00" + + "\x29\x14\x00\x00\x00\x00\x00\x00\x4e\x14\x00\x00\x00\x00\x00\x00" + + "\x9d\x14\x00\x00\x00\x00\x00\x00\xd5\x14\x00\x00\x00\x00\x00\x00" + + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + + "\xfb\x12\x00\x00\x00\x00\x00\x00\x09\x13\x00\x00\x00\x00\x00\x00" + + "\x67\x13\x00\x00\x00\x00\x00\x00\xcb\x13\x00\x00\x00\x00\x00\x00" + + "\x9d\x14\x00\x00\x00\x00\x00\x00\xd5\x14\x00\x00\x00\x00\x00\x00" + + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + + "\x5f\x0b\x00\x00\x00\x00\x00\x00\x6c\x0b\x00\x00\x00\x00\x00\x00" + + "\x7d\x0b\x00\x00\x00\x00\x00\x00\x7e\x0c\x00\x00\x00\x00\x00\x00" + + "\x38\x0f\x00\x00\x00\x00\x00\x00\x5c\x0f\x00\x00\x00\x00\x00\x00" + + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + + "\x83\x0c\x00\x00\x00\x00\x00\x00\xfa\x0c\x00\x00\x00\x00\x00\x00" + + "\xfd\x0d\x00\x00\x00\x00\x00\x00\xef\x0e\x00\x00\x00\x00\x00\x00" + + "\x14\x0f\x00\x00\x00\x00\x00\x00\x38\x0f\x00\x00\x00\x00\x00\x00" + + "\x9f\x0f\x00\x00\x00\x00\x00\x00\xac\x0f\x00\x00\x00\x00\x00\x00" + + "\xdb\x0f\x00\x00\x00\x00\x00\x00\xff\x0f\x00\x00\x00\x00\x00\x00" + + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + + "\xfd\x0d\x00\x00\x00\x00\x00\x00\xd8\x0e\x00\x00\x00\x00\x00\x00" + + "\x9f\x0f\x00\x00\x00\x00\x00\x00\xac\x0f\x00\x00\x00\x00\x00\x00" + + "\xdb\x0f\x00\x00\x00\x00\x00\x00\xff\x0f\x00\x00\x00\x00\x00\x00" + + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + + "\xfa\x0c\x00\x00\x00\x00\x00\x00\xea\x0d\x00\x00\x00\x00\x00\x00" + + "\xef\x0e\x00\x00\x00\x00\x00\x00\x14\x0f\x00\x00\x00\x00\x00\x00" + + "\x5c\x0f\x00\x00\x00\x00\x00\x00\x9f\x0f\x00\x00\x00\x00\x00\x00" + + "\xac\x0f\x00\x00\x00\x00\x00\x00\xdb\x0f\x00\x00\x00\x00\x00\x00" + + "\xff\x0f\x00\x00\x00\x00\x00\x00\x2c\x10\x00\x00\x00\x00\x00\x00" + + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + + "\x60\x11\x00\x00\x00\x00\x00\x00\xd1\x16\x00\x00\x00\x00\x00\x00" + + "\x40\x0b\x00\x00\x00\x00\x00\x00\x2c\x10\x00\x00\x00\x00\x00\x00" + + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + + "\x7a\x00\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00" + + "\x9f\x01\x00\x00\x00\x00\x00\x00\xa7\x01\x00\x00\x00\x00\x00\x00" + + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + + "\x7a\x00\x00\x00\x00\x00\x00\x00\xa9\x00\x00\x00\x00\x00\x00\x00" + + "\x9f\x01\x00\x00\x00\x00\x00\x00\xa7\x01\x00\x00\x00\x00\x00\x00" + + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + + "\x28\xb5\x2f\xfd\x64\xa0\x01\x2d\x05\x00\xc4\x04\xcc\x11\x00\xd5" + + "\x13\x00\x1c\x14\x00\x72\x9d\xd5\xfb\x12\x00\x09\x0c\x13\xcb\x13" + + "\x29\x4e\x67\x5f\x0b\x6c\x0b\x7d\x0b\x7e\x0c\x38\x0f\x5c\x0f\x83" + + "\x0c\xfa\x0c\xfd\x0d\xef\x0e\x14\x38\x9f\x0f\xac\x0f\xdb\x0f\xff" + + "\x0f\xd8\x9f\xac\xdb\xff\xea\x5c\x2c\x10\x60\xd1\x16\x40\x0b\x7a" + + "\x00\xb6\x00\x9f\x01\xa7\x01\xa9\x36\x20\xa0\x83\x14\x34\x63\x4a" + + "\x21\x70\x8c\x07\x46\x03\x4e\x10\x62\x3c\x06\x4e\xc8\x8c\xb0\x32" + + "\x2a\x59\xad\xb2\xf1\x02\x82\x7c\x33\xcb\x92\x6f\x32\x4f\x9b\xb0" + + "\xa2\x30\xf0\xc0\x06\x1e\x98\x99\x2c\x06\x1e\xd8\xc0\x03\x56\xd8" + + "\xc0\x03\x0f\x6c\xe0\x01\xf1\xf0\xee\x9a\xc6\xc8\x97\x99\xd1\x6c" + + "\xb4\x21\x45\x3b\x10\xe4\x7b\x99\x4d\x8a\x36\x64\x5c\x77\x08\x02" + + "\xcb\xe0\xce", + }, + { + "fuzz1", + "0\x00\x00\x00\x00\x000\x00\x00\x00\x00\x001\x00\x00\x00\x00\x000000", + "(\xb5/\xfd\x04X\x8d\x00\x00P0\x000\x001\x000000\x03T\x02\x00\x01\x01m\xf9\xb7G", + }, + { + "empty block", + "", + "\x28\xb5\x2f\xfd\x00\x00\x15\x00\x00\x00\x00", + }, + { + "single skippable frame", + "", + "\x50\x2a\x4d\x18\x00\x00\x00\x00", + }, + { + "two skippable frames", + "", + "\x50\x2a\x4d\x18\x00\x00\x00\x00" + + "\x50\x2a\x4d\x18\x00\x00\x00\x00", + }, +} + +func TestSamples(t *testing.T) { + for _, test := range tests { + test := test + t.Run(test.name, func(t *testing.T) { + r := NewReader(strings.NewReader(test.compressed)) + got, err := io.ReadAll(r) + if err != nil { + t.Fatal(err) + } + gotstr := string(got) + if gotstr != test.uncompressed { + t.Errorf("got %q want %q", gotstr, test.uncompressed) + } + }) + } +} + +func TestReset(t *testing.T) { + input := strings.NewReader("") + r := NewReader(input) + for _, test := range tests { + test := test + t.Run(test.name, func(t *testing.T) { + input.Reset(test.compressed) + r.Reset(input) + got, err := io.ReadAll(r) + if err != nil { + t.Fatal(err) + } + gotstr := string(got) + if gotstr != test.uncompressed { + t.Errorf("got %q want %q", gotstr, test.uncompressed) + } + }) + } +} + +var ( + bigDataOnce sync.Once + bigDataBytes []byte + bigDataErr error +) + +// bigData returns the contents of our large test file repeated multiple times. +func bigData(t testing.TB) []byte { + bigDataOnce.Do(func() { + bigDataBytes, bigDataErr = os.ReadFile("../../testdata/Isaac.Newton-Opticks.txt") + if bigDataErr == nil { + bigDataBytes = bytes.Repeat(bigDataBytes, 20) + } + }) + if bigDataErr != nil { + t.Fatal(bigDataErr) + } + return bigDataBytes +} + +func findZstd(t testing.TB) string { + zstd, err := exec.LookPath("zstd") + if err != nil { + t.Skip("skipping because zstd not found") + } + return zstd +} + +var ( + zstdBigOnce sync.Once + zstdBigBytes []byte + zstdBigErr error +) + +// zstdBigData returns the compressed contents of our large test file. +// This will only run on Unix systems with zstd installed. +// That's OK as the package is GOOS-independent. +func zstdBigData(t testing.TB) []byte { + input := bigData(t) + + zstd := findZstd(t) + + zstdBigOnce.Do(func() { + cmd := exec.Command(zstd, "-z") + cmd.Stdin = bytes.NewReader(input) + var compressed bytes.Buffer + cmd.Stdout = &compressed + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + zstdBigErr = fmt.Errorf("running zstd failed: %v", err) + return + } + + zstdBigBytes = compressed.Bytes() + }) + if zstdBigErr != nil { + t.Fatal(zstdBigErr) + } + return zstdBigBytes +} + +// Test decompressing a large file. We don't have a compressor, +// so this test only runs on systems with zstd installed. +func TestLarge(t *testing.T) { + if testing.Short() { + t.Skip("skipping expensive test in short mode") + } + + data := bigData(t) + compressed := zstdBigData(t) + + t.Logf("zstd compressed %d bytes to %d", len(data), len(compressed)) + + r := NewReader(bytes.NewReader(compressed)) + got, err := io.ReadAll(r) + if err != nil { + t.Fatal(err) + } + + if !bytes.Equal(got, data) { + showDiffs(t, got, data) + } +} + +// showDiffs reports the first few differences in two []byte. +func showDiffs(t *testing.T, got, want []byte) { + t.Error("data mismatch") + if len(got) != len(want) { + t.Errorf("got data length %d, want %d", len(got), len(want)) + } + diffs := 0 + for i, b := range got { + if i >= len(want) { + break + } + if b != want[i] { + diffs++ + if diffs > 20 { + break + } + t.Logf("%d: %#x != %#x", i, b, want[i]) + } + } +} + +func TestAlloc(t *testing.T) { + testenv.SkipIfOptimizationOff(t) + if race.Enabled { + t.Skip("skipping allocation test under race detector") + } + + compressed := zstdBigData(t) + input := bytes.NewReader(compressed) + r := NewReader(input) + c := testing.AllocsPerRun(10, func() { + input.Reset(compressed) + r.Reset(input) + io.Copy(io.Discard, r) + }) + if c != 0 { + t.Errorf("got %v allocs, want 0", c) + } +} + +func TestFileSamples(t *testing.T) { + samples, err := os.ReadDir("testdata") + if err != nil { + t.Fatal(err) + } + + for _, sample := range samples { + name := sample.Name() + if !strings.HasSuffix(name, ".zst") { + continue + } + + t.Run(name, func(t *testing.T) { + f, err := os.Open(filepath.Join("testdata", name)) + if err != nil { + t.Fatal(err) + } + + r := NewReader(f) + h := sha256.New() + if _, err := io.Copy(h, r); err != nil { + t.Fatal(err) + } + got := fmt.Sprintf("%x", h.Sum(nil))[:8] + + want, _, _ := strings.Cut(name, ".") + if got != want { + t.Errorf("Wrong uncompressed content hash: got %s, want %s", got, want) + } + }) + } +} + +func TestReaderBad(t *testing.T) { + for i, s := range badStrings { + t.Run(fmt.Sprintf("badStrings#%d", i), func(t *testing.T) { + _, err := io.Copy(io.Discard, NewReader(strings.NewReader(s))) + if err == nil { + t.Error("expected error") + } + }) + } +} + +func BenchmarkLarge(b *testing.B) { + b.StopTimer() + b.ReportAllocs() + + compressed := zstdBigData(b) + + b.SetBytes(int64(len(compressed))) + + input := bytes.NewReader(compressed) + r := NewReader(input) + + b.StartTimer() + for i := 0; i < b.N; i++ { + input.Reset(compressed) + r.Reset(input) + io.Copy(io.Discard, r) + } +} diff --git a/plan9obj/file.go b/plan9obj/file.go index 314608d..bdedcb7 100644 --- a/plan9obj/file.go +++ b/plan9obj/file.go @@ -2,13 +2,24 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Package plan9obj implements access to Plan 9 a.out object files. +/* +Package plan9obj implements access to Plan 9 a.out object files. + +# Security + +This package is not designed to be hardened against adversarial inputs, and is +outside the scope of https://go.dev/security/policy. In particular, only basic +validation is done when parsing object files. As such, care should be taken when +parsing untrusted inputs, as parsing malformed files may consume significant +resources, or cause panics. +*/ package plan9obj import ( "encoding/binary" "errors" "fmt" + "binject-debug/internal/saferio" "io" "os" ) @@ -55,12 +66,7 @@ type Section struct { // Data reads and returns the contents of the Plan 9 a.out section. func (s *Section) Data() ([]byte, error) { - dat := make([]byte, s.sr.Size()) - n, err := s.sr.ReadAt(dat, 0) - if n == len(dat) { - err = nil - } - return dat[0:n], err + return saferio.ReadDataAt(s.sr, uint64(s.Size), 0) } // Open returns a new ReadSeeker reading the Plan 9 a.out section. @@ -82,7 +88,7 @@ type Sym struct { type formatError struct { off int msg string - val interface{} + val any } func (e *formatError) Error() string { @@ -94,7 +100,7 @@ func (e *formatError) Error() string { return msg } -// Open opens the named file using os.Open and prepares it for use as a Plan 9 a.out binary. +// Open opens the named file using [os.Open] and prepares it for use as a Plan 9 a.out binary. func Open(name string) (*File, error) { f, err := os.Open(name) if err != nil { @@ -109,8 +115,8 @@ func Open(name string) (*File, error) { return ff, nil } -// Close closes the File. -// If the File was created using NewFile directly instead of Open, +// Close closes the [File]. +// If the [File] was created using [NewFile] directly instead of [Open], // Close has no effect. func (f *File) Close() error { var err error @@ -130,7 +136,7 @@ func parseMagic(magic []byte) (uint32, error) { return 0, &formatError{0, "bad magic number", magic} } -// NewFile creates a new File for accessing a Plan 9 binary in an underlying reader. +// NewFile creates a new [File] for accessing a Plan 9 binary in an underlying reader. // The Plan 9 binary is expected to start at position 0 in the ReaderAt. func NewFile(r io.ReaderAt) (*File, error) { sr := io.NewSectionReader(r, 0, 1<<63-1) @@ -216,8 +222,10 @@ func walksymtab(data []byte, ptrsz int, fn func(sym) error) error { p = p[4:] } - var typ byte - typ = p[0] & 0x7F + if len(p) < 1 { + return &formatError{len(data), "unexpected EOF", nil} + } + typ := p[0] & 0x7F s.typ = typ p = p[1:] @@ -252,7 +260,7 @@ func walksymtab(data []byte, ptrsz int, fn func(sym) error) error { return nil } -// NewTable decodes the Go symbol table in data, +// newTable decodes the Go symbol table in data, // returning an in-memory representation. func newTable(symtab []byte, ptrsz int) ([]Sym, error) { var n int @@ -301,11 +309,15 @@ func newTable(symtab []byte, ptrsz int) ([]Sym, error) { return syms, nil } +// ErrNoSymbols is returned by [File.Symbols] if there is no such section +// in the File. +var ErrNoSymbols = errors.New("no symbol section") + // Symbols returns the symbol table for f. func (f *File) Symbols() ([]Sym, error) { symtabSection := f.Section("syms") if symtabSection == nil { - return nil, errors.New("no symbol section") + return nil, ErrNoSymbols } symtab, err := symtabSection.Data()