518 lines
13 KiB
Go
518 lines
13 KiB
Go
// Copyright 2021 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package pkgbits
|
|
|
|
import (
|
|
"encoding/binary"
|
|
"errors"
|
|
"fmt"
|
|
"go/constant"
|
|
"go/token"
|
|
"io"
|
|
"math/big"
|
|
"os"
|
|
"runtime"
|
|
"strings"
|
|
)
|
|
|
|
// A PkgDecoder provides methods for decoding a package's Unified IR
|
|
// export data.
|
|
type PkgDecoder struct {
|
|
// version is the file format version.
|
|
version uint32
|
|
|
|
// sync indicates whether the file uses sync markers.
|
|
sync bool
|
|
|
|
// pkgPath is the package path for the package to be decoded.
|
|
//
|
|
// TODO(mdempsky): Remove; unneeded since CL 391014.
|
|
pkgPath string
|
|
|
|
// elemData is the full data payload of the encoded package.
|
|
// Elements are densely and contiguously packed together.
|
|
//
|
|
// The last 8 bytes of elemData are the package fingerprint.
|
|
elemData string
|
|
|
|
// elemEnds stores the byte-offset end positions of element
|
|
// bitstreams within elemData.
|
|
//
|
|
// For example, element I's bitstream data starts at elemEnds[I-1]
|
|
// (or 0, if I==0) and ends at elemEnds[I].
|
|
//
|
|
// Note: elemEnds is indexed by absolute indices, not
|
|
// section-relative indices.
|
|
elemEnds []uint32
|
|
|
|
// elemEndsEnds stores the index-offset end positions of relocation
|
|
// sections within elemEnds.
|
|
//
|
|
// For example, section K's end positions start at elemEndsEnds[K-1]
|
|
// (or 0, if K==0) and end at elemEndsEnds[K].
|
|
elemEndsEnds [numRelocs]uint32
|
|
|
|
scratchRelocEnt []RelocEnt
|
|
}
|
|
|
|
// PkgPath returns the package path for the package
|
|
//
|
|
// TODO(mdempsky): Remove; unneeded since CL 391014.
|
|
func (pr *PkgDecoder) PkgPath() string { return pr.pkgPath }
|
|
|
|
// SyncMarkers reports whether pr uses sync markers.
|
|
func (pr *PkgDecoder) SyncMarkers() bool { return pr.sync }
|
|
|
|
// NewPkgDecoder returns a PkgDecoder initialized to read the Unified
|
|
// IR export data from input. pkgPath is the package path for the
|
|
// compilation unit that produced the export data.
|
|
//
|
|
// TODO(mdempsky): Remove pkgPath parameter; unneeded since CL 391014.
|
|
func NewPkgDecoder(pkgPath, input string) PkgDecoder {
|
|
pr := PkgDecoder{
|
|
pkgPath: pkgPath,
|
|
}
|
|
|
|
// TODO(mdempsky): Implement direct indexing of input string to
|
|
// avoid copying the position information.
|
|
|
|
r := strings.NewReader(input)
|
|
|
|
assert(binary.Read(r, binary.LittleEndian, &pr.version) == nil)
|
|
|
|
switch pr.version {
|
|
default:
|
|
panic(fmt.Errorf("unsupported version: %v", pr.version))
|
|
case 0:
|
|
// no flags
|
|
case 1:
|
|
var flags uint32
|
|
assert(binary.Read(r, binary.LittleEndian, &flags) == nil)
|
|
pr.sync = flags&flagSyncMarkers != 0
|
|
}
|
|
|
|
assert(binary.Read(r, binary.LittleEndian, pr.elemEndsEnds[:]) == nil)
|
|
|
|
pr.elemEnds = make([]uint32, pr.elemEndsEnds[len(pr.elemEndsEnds)-1])
|
|
assert(binary.Read(r, binary.LittleEndian, pr.elemEnds[:]) == nil)
|
|
|
|
pos, err := r.Seek(0, io.SeekCurrent)
|
|
assert(err == nil)
|
|
|
|
pr.elemData = input[pos:]
|
|
assert(len(pr.elemData)-8 == int(pr.elemEnds[len(pr.elemEnds)-1]))
|
|
|
|
return pr
|
|
}
|
|
|
|
// NumElems returns the number of elements in section k.
|
|
func (pr *PkgDecoder) NumElems(k RelocKind) int {
|
|
count := int(pr.elemEndsEnds[k])
|
|
if k > 0 {
|
|
count -= int(pr.elemEndsEnds[k-1])
|
|
}
|
|
return count
|
|
}
|
|
|
|
// TotalElems returns the total number of elements across all sections.
|
|
func (pr *PkgDecoder) TotalElems() int {
|
|
return len(pr.elemEnds)
|
|
}
|
|
|
|
// Fingerprint returns the package fingerprint.
|
|
func (pr *PkgDecoder) Fingerprint() [8]byte {
|
|
var fp [8]byte
|
|
copy(fp[:], pr.elemData[len(pr.elemData)-8:])
|
|
return fp
|
|
}
|
|
|
|
// AbsIdx returns the absolute index for the given (section, index)
|
|
// pair.
|
|
func (pr *PkgDecoder) AbsIdx(k RelocKind, idx Index) int {
|
|
absIdx := int(idx)
|
|
if k > 0 {
|
|
absIdx += int(pr.elemEndsEnds[k-1])
|
|
}
|
|
if absIdx >= int(pr.elemEndsEnds[k]) {
|
|
errorf("%v:%v is out of bounds; %v", k, idx, pr.elemEndsEnds)
|
|
}
|
|
return absIdx
|
|
}
|
|
|
|
// DataIdx returns the raw element bitstream for the given (section,
|
|
// index) pair.
|
|
func (pr *PkgDecoder) DataIdx(k RelocKind, idx Index) string {
|
|
absIdx := pr.AbsIdx(k, idx)
|
|
|
|
var start uint32
|
|
if absIdx > 0 {
|
|
start = pr.elemEnds[absIdx-1]
|
|
}
|
|
end := pr.elemEnds[absIdx]
|
|
|
|
return pr.elemData[start:end]
|
|
}
|
|
|
|
// StringIdx returns the string value for the given string index.
|
|
func (pr *PkgDecoder) StringIdx(idx Index) string {
|
|
return pr.DataIdx(RelocString, idx)
|
|
}
|
|
|
|
// NewDecoder returns a Decoder for the given (section, index) pair,
|
|
// and decodes the given SyncMarker from the element bitstream.
|
|
func (pr *PkgDecoder) NewDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder {
|
|
r := pr.NewDecoderRaw(k, idx)
|
|
r.Sync(marker)
|
|
return r
|
|
}
|
|
|
|
// TempDecoder returns a Decoder for the given (section, index) pair,
|
|
// and decodes the given SyncMarker from the element bitstream.
|
|
// If possible the Decoder should be RetireDecoder'd when it is no longer
|
|
// needed, this will avoid heap allocations.
|
|
func (pr *PkgDecoder) TempDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder {
|
|
r := pr.TempDecoderRaw(k, idx)
|
|
r.Sync(marker)
|
|
return r
|
|
}
|
|
|
|
func (pr *PkgDecoder) RetireDecoder(d *Decoder) {
|
|
pr.scratchRelocEnt = d.Relocs
|
|
d.Relocs = nil
|
|
}
|
|
|
|
// NewDecoderRaw returns a Decoder for the given (section, index) pair.
|
|
//
|
|
// Most callers should use NewDecoder instead.
|
|
func (pr *PkgDecoder) NewDecoderRaw(k RelocKind, idx Index) Decoder {
|
|
r := Decoder{
|
|
common: pr,
|
|
k: k,
|
|
Idx: idx,
|
|
}
|
|
|
|
// TODO(mdempsky) r.data.Reset(...) after #44505 is resolved.
|
|
r.Data = *strings.NewReader(pr.DataIdx(k, idx))
|
|
|
|
r.Sync(SyncRelocs)
|
|
r.Relocs = make([]RelocEnt, r.Len())
|
|
for i := range r.Relocs {
|
|
r.Sync(SyncReloc)
|
|
r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())}
|
|
}
|
|
|
|
return r
|
|
}
|
|
|
|
func (pr *PkgDecoder) TempDecoderRaw(k RelocKind, idx Index) Decoder {
|
|
r := Decoder{
|
|
common: pr,
|
|
k: k,
|
|
Idx: idx,
|
|
}
|
|
|
|
r.Data.Reset(pr.DataIdx(k, idx))
|
|
r.Sync(SyncRelocs)
|
|
l := r.Len()
|
|
if cap(pr.scratchRelocEnt) >= l {
|
|
r.Relocs = pr.scratchRelocEnt[:l]
|
|
pr.scratchRelocEnt = nil
|
|
} else {
|
|
r.Relocs = make([]RelocEnt, l)
|
|
}
|
|
for i := range r.Relocs {
|
|
r.Sync(SyncReloc)
|
|
r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())}
|
|
}
|
|
|
|
return r
|
|
}
|
|
|
|
// A Decoder provides methods for decoding an individual element's
|
|
// bitstream data.
|
|
type Decoder struct {
|
|
common *PkgDecoder
|
|
|
|
Relocs []RelocEnt
|
|
Data strings.Reader
|
|
|
|
k RelocKind
|
|
Idx Index
|
|
}
|
|
|
|
func (r *Decoder) checkErr(err error) {
|
|
if err != nil {
|
|
errorf("unexpected decoding error: %w", err)
|
|
}
|
|
}
|
|
|
|
func (r *Decoder) rawUvarint() uint64 {
|
|
x, err := readUvarint(&r.Data)
|
|
r.checkErr(err)
|
|
return x
|
|
}
|
|
|
|
// readUvarint is a type-specialized copy of encoding/binary.ReadUvarint.
|
|
// This avoids the interface conversion and thus has better escape properties,
|
|
// which flows up the stack.
|
|
func readUvarint(r *strings.Reader) (uint64, error) {
|
|
var x uint64
|
|
var s uint
|
|
for i := 0; i < binary.MaxVarintLen64; i++ {
|
|
b, err := r.ReadByte()
|
|
if err != nil {
|
|
if i > 0 && err == io.EOF {
|
|
err = io.ErrUnexpectedEOF
|
|
}
|
|
return x, err
|
|
}
|
|
if b < 0x80 {
|
|
if i == binary.MaxVarintLen64-1 && b > 1 {
|
|
return x, overflow
|
|
}
|
|
return x | uint64(b)<<s, nil
|
|
}
|
|
x |= uint64(b&0x7f) << s
|
|
s += 7
|
|
}
|
|
return x, overflow
|
|
}
|
|
|
|
var overflow = errors.New("pkgbits: readUvarint overflows a 64-bit integer")
|
|
|
|
func (r *Decoder) rawVarint() int64 {
|
|
ux := r.rawUvarint()
|
|
|
|
// Zig-zag decode.
|
|
x := int64(ux >> 1)
|
|
if ux&1 != 0 {
|
|
x = ^x
|
|
}
|
|
return x
|
|
}
|
|
|
|
func (r *Decoder) rawReloc(k RelocKind, idx int) Index {
|
|
e := r.Relocs[idx]
|
|
assert(e.Kind == k)
|
|
return e.Idx
|
|
}
|
|
|
|
// Sync decodes a sync marker from the element bitstream and asserts
|
|
// that it matches the expected marker.
|
|
//
|
|
// If r.common.sync is false, then Sync is a no-op.
|
|
func (r *Decoder) Sync(mWant SyncMarker) {
|
|
if !r.common.sync {
|
|
return
|
|
}
|
|
|
|
pos, _ := r.Data.Seek(0, io.SeekCurrent)
|
|
mHave := SyncMarker(r.rawUvarint())
|
|
writerPCs := make([]int, r.rawUvarint())
|
|
for i := range writerPCs {
|
|
writerPCs[i] = int(r.rawUvarint())
|
|
}
|
|
|
|
if mHave == mWant {
|
|
return
|
|
}
|
|
|
|
// There's some tension here between printing:
|
|
//
|
|
// (1) full file paths that tools can recognize (e.g., so emacs
|
|
// hyperlinks the "file:line" text for easy navigation), or
|
|
//
|
|
// (2) short file paths that are easier for humans to read (e.g., by
|
|
// omitting redundant or irrelevant details, so it's easier to
|
|
// focus on the useful bits that remain).
|
|
//
|
|
// The current formatting favors the former, as it seems more
|
|
// helpful in practice. But perhaps the formatting could be improved
|
|
// to better address both concerns. For example, use relative file
|
|
// paths if they would be shorter, or rewrite file paths to contain
|
|
// "$GOROOT" (like objabi.AbsFile does) if tools can be taught how
|
|
// to reliably expand that again.
|
|
|
|
fmt.Printf("export data desync: package %q, section %v, index %v, offset %v\n", r.common.pkgPath, r.k, r.Idx, pos)
|
|
|
|
fmt.Printf("\nfound %v, written at:\n", mHave)
|
|
if len(writerPCs) == 0 {
|
|
fmt.Printf("\t[stack trace unavailable; recompile package %q with -d=syncframes]\n", r.common.pkgPath)
|
|
}
|
|
for _, pc := range writerPCs {
|
|
fmt.Printf("\t%s\n", r.common.StringIdx(r.rawReloc(RelocString, pc)))
|
|
}
|
|
|
|
fmt.Printf("\nexpected %v, reading at:\n", mWant)
|
|
var readerPCs [32]uintptr // TODO(mdempsky): Dynamically size?
|
|
n := runtime.Callers(2, readerPCs[:])
|
|
for _, pc := range fmtFrames(readerPCs[:n]...) {
|
|
fmt.Printf("\t%s\n", pc)
|
|
}
|
|
|
|
// We already printed a stack trace for the reader, so now we can
|
|
// simply exit. Printing a second one with panic or base.Fatalf
|
|
// would just be noise.
|
|
os.Exit(1)
|
|
}
|
|
|
|
// Bool decodes and returns a bool value from the element bitstream.
|
|
func (r *Decoder) Bool() bool {
|
|
r.Sync(SyncBool)
|
|
x, err := r.Data.ReadByte()
|
|
r.checkErr(err)
|
|
assert(x < 2)
|
|
return x != 0
|
|
}
|
|
|
|
// Int64 decodes and returns an int64 value from the element bitstream.
|
|
func (r *Decoder) Int64() int64 {
|
|
r.Sync(SyncInt64)
|
|
return r.rawVarint()
|
|
}
|
|
|
|
// Uint64 decodes and returns a uint64 value from the element bitstream.
|
|
func (r *Decoder) Uint64() uint64 {
|
|
r.Sync(SyncUint64)
|
|
return r.rawUvarint()
|
|
}
|
|
|
|
// Len decodes and returns a non-negative int value from the element bitstream.
|
|
func (r *Decoder) Len() int { x := r.Uint64(); v := int(x); assert(uint64(v) == x); return v }
|
|
|
|
// Int decodes and returns an int value from the element bitstream.
|
|
func (r *Decoder) Int() int { x := r.Int64(); v := int(x); assert(int64(v) == x); return v }
|
|
|
|
// Uint decodes and returns a uint value from the element bitstream.
|
|
func (r *Decoder) Uint() uint { x := r.Uint64(); v := uint(x); assert(uint64(v) == x); return v }
|
|
|
|
// Code decodes a Code value from the element bitstream and returns
|
|
// its ordinal value. It's the caller's responsibility to convert the
|
|
// result to an appropriate Code type.
|
|
//
|
|
// TODO(mdempsky): Ideally this method would have signature "Code[T
|
|
// Code] T" instead, but we don't allow generic methods and the
|
|
// compiler can't depend on generics yet anyway.
|
|
func (r *Decoder) Code(mark SyncMarker) int {
|
|
r.Sync(mark)
|
|
return r.Len()
|
|
}
|
|
|
|
// Reloc decodes a relocation of expected section k from the element
|
|
// bitstream and returns an index to the referenced element.
|
|
func (r *Decoder) Reloc(k RelocKind) Index {
|
|
r.Sync(SyncUseReloc)
|
|
return r.rawReloc(k, r.Len())
|
|
}
|
|
|
|
// String decodes and returns a string value from the element
|
|
// bitstream.
|
|
func (r *Decoder) String() string {
|
|
r.Sync(SyncString)
|
|
return r.common.StringIdx(r.Reloc(RelocString))
|
|
}
|
|
|
|
// Strings decodes and returns a variable-length slice of strings from
|
|
// the element bitstream.
|
|
func (r *Decoder) Strings() []string {
|
|
res := make([]string, r.Len())
|
|
for i := range res {
|
|
res[i] = r.String()
|
|
}
|
|
return res
|
|
}
|
|
|
|
// Value decodes and returns a constant.Value from the element
|
|
// bitstream.
|
|
func (r *Decoder) Value() constant.Value {
|
|
r.Sync(SyncValue)
|
|
isComplex := r.Bool()
|
|
val := r.scalar()
|
|
if isComplex {
|
|
val = constant.BinaryOp(val, token.ADD, constant.MakeImag(r.scalar()))
|
|
}
|
|
return val
|
|
}
|
|
|
|
func (r *Decoder) scalar() constant.Value {
|
|
switch tag := CodeVal(r.Code(SyncVal)); tag {
|
|
default:
|
|
panic(fmt.Errorf("unexpected scalar tag: %v", tag))
|
|
|
|
case ValBool:
|
|
return constant.MakeBool(r.Bool())
|
|
case ValString:
|
|
return constant.MakeString(r.String())
|
|
case ValInt64:
|
|
return constant.MakeInt64(r.Int64())
|
|
case ValBigInt:
|
|
return constant.Make(r.bigInt())
|
|
case ValBigRat:
|
|
num := r.bigInt()
|
|
denom := r.bigInt()
|
|
return constant.Make(new(big.Rat).SetFrac(num, denom))
|
|
case ValBigFloat:
|
|
return constant.Make(r.bigFloat())
|
|
}
|
|
}
|
|
|
|
func (r *Decoder) bigInt() *big.Int {
|
|
v := new(big.Int).SetBytes([]byte(r.String()))
|
|
if r.Bool() {
|
|
v.Neg(v)
|
|
}
|
|
return v
|
|
}
|
|
|
|
func (r *Decoder) bigFloat() *big.Float {
|
|
v := new(big.Float).SetPrec(512)
|
|
assert(v.UnmarshalText([]byte(r.String())) == nil)
|
|
return v
|
|
}
|
|
|
|
// @@@ Helpers
|
|
|
|
// TODO(mdempsky): These should probably be removed. I think they're a
|
|
// smell that the export data format is not yet quite right.
|
|
|
|
// PeekPkgPath returns the package path for the specified package
|
|
// index.
|
|
func (pr *PkgDecoder) PeekPkgPath(idx Index) string {
|
|
var path string
|
|
{
|
|
r := pr.TempDecoder(RelocPkg, idx, SyncPkgDef)
|
|
path = r.String()
|
|
pr.RetireDecoder(&r)
|
|
}
|
|
if path == "" {
|
|
path = pr.pkgPath
|
|
}
|
|
return path
|
|
}
|
|
|
|
// PeekObj returns the package path, object name, and CodeObj for the
|
|
// specified object index.
|
|
func (pr *PkgDecoder) PeekObj(idx Index) (string, string, CodeObj) {
|
|
var ridx Index
|
|
var name string
|
|
var rcode int
|
|
{
|
|
r := pr.TempDecoder(RelocName, idx, SyncObject1)
|
|
r.Sync(SyncSym)
|
|
r.Sync(SyncPkg)
|
|
ridx = r.Reloc(RelocPkg)
|
|
name = r.String()
|
|
rcode = r.Code(SyncCodeObj)
|
|
pr.RetireDecoder(&r)
|
|
}
|
|
|
|
path := pr.PeekPkgPath(ridx)
|
|
assert(name != "")
|
|
|
|
tag := CodeObj(rcode)
|
|
|
|
return path, name, tag
|
|
}
|