diff --git a/cmd/car/car.go b/cmd/car/car.go index d34ce416..e88a06f8 100644 --- a/cmd/car/car.go +++ b/cmd/car/car.go @@ -15,6 +15,38 @@ func main1() int { Name: "car", Usage: "Utility for working with car files", Commands: []*cli.Command{ + { + Name: "convert", + Usage: "Convert a car file to given codec", + Aliases: []string{"con"}, + Action: ConvertCar, + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "selector", + Aliases: []string{"s"}, + Usage: "A selector over the dag", + }, + }, + }, + { + Name: "concatenate", + Usage: "Concatenate car files", + Aliases: []string{"cat"}, + Action: CatCar, + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "file", + Aliases: []string{"f", "output", "o"}, + Usage: "The car file to write to", + TakesFile: true, + }, + &cli.IntFlag{ + Name: "version", + Value: 2, + Usage: "Write output as a v1 or v2 format car", + }, + }, + }, { Name: "create", Usage: "Create a car file", @@ -109,6 +141,19 @@ func main1() int { }, }, }, + { + Name: "import", + Usage: "Import a block into a car file", + Action: ImportCar, + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "codec", + Aliases: []string{"c"}, + Usage: "The codec the block data should be interpreted with", + Value: multicodec.DagJson.String(), + }, + }, + }, { Name: "index", Aliases: []string{"i"}, @@ -130,7 +175,7 @@ func main1() int { }, { Name: "list", - Aliases: []string{"l"}, + Aliases: []string{"l", "ls"}, Usage: "List the CIDs in a car", Action: ListCar, Flags: []cli.Flag{ diff --git a/cmd/car/concatenate.go b/cmd/car/concatenate.go new file mode 100644 index 00000000..654c5d1c --- /dev/null +++ b/cmd/car/concatenate.go @@ -0,0 +1,79 @@ +package main + +import ( + "fmt" + "io" + "os" + + "github.com/ipld/go-car/v2" + "github.com/ipld/go-car/v2/blockstore" + "github.com/urfave/cli/v2" +) + +// CatCar will concatenate the blocks from a set of source car files together into a +// combined destination car file. +// The root of the destination car will be the roots of the last specified source car. +func CatCar(c *cli.Context) error { + var err error + if c.Args().Len() == 0 { + return fmt.Errorf("a least one source from must be specified") + } + + if !c.IsSet("file") { + return fmt.Errorf("a file destination must be specified") + } + + options := []car.Option{} + switch c.Int("version") { + case 1: + options = []car.Option{blockstore.WriteAsCarV1(true)} + case 2: + // already the default + default: + return fmt.Errorf("invalid CAR version %d", c.Int("version")) + } + + // peak at final root + lst := c.Args().Get(c.Args().Len() - 1) + lstStore, err := blockstore.OpenReadOnly(lst) + if err != nil { + return err + } + roots, err := lstStore.Roots() + if err != nil { + return err + } + _ = lstStore.Close() + + cdest, err := blockstore.OpenReadWrite(c.String("file"), roots, options...) + if err != nil { + return err + } + + for _, src := range c.Args().Slice() { + f, err := os.Open(src) + if err != nil { + return err + } + blkRdr, err := car.NewBlockReader(f) + if err != nil { + return err + } + blk, err := blkRdr.Next() + for err != io.EOF { + if err := cdest.Put(c.Context, blk); err != nil { + return err + } + blk, err = blkRdr.Next() + if err != nil && err != io.EOF { + return err + } + } + + if err := f.Close(); err != nil { + return err + } + } + + return cdest.Finalize() +} diff --git a/cmd/car/convert.go b/cmd/car/convert.go new file mode 100644 index 00000000..1bf37aa5 --- /dev/null +++ b/cmd/car/convert.go @@ -0,0 +1,251 @@ +package main + +import ( + "bytes" + "fmt" + "io" + "os" + + blocks "github.com/ipfs/go-block-format" + "github.com/ipfs/go-cid" + ipfsbs "github.com/ipfs/go-ipfs-blockstore" + "github.com/ipld/go-car/v2/blockstore" + dagpb "github.com/ipld/go-codec-dagpb" + "github.com/ipld/go-ipld-prime" + "github.com/ipld/go-ipld-prime/datamodel" + "github.com/ipld/go-ipld-prime/linking" + cidlink "github.com/ipld/go-ipld-prime/linking/cid" + basicnode "github.com/ipld/go-ipld-prime/node/basic" + "github.com/ipld/go-ipld-prime/storage/memstore" + "github.com/ipld/go-ipld-prime/traversal" + "github.com/ipld/go-ipld-prime/traversal/selector" + selectorParser "github.com/ipld/go-ipld-prime/traversal/selector/parse" + "github.com/multiformats/go-multicodec" + "github.com/multiformats/go-multihash" + "github.com/urfave/cli/v2" +) + +type children struct { + t int + done bool + old []cid.Cid + new []cid.Cid +} + +func proxyCid(proto cidlink.LinkPrototype) (cid.Cid, error) { + // make a cid with the right length that we eventually will patch with the root. + hasher, err := multihash.GetHasher(proto.MhType) + if err != nil { + return cid.Undef, err + } + digest := hasher.Sum([]byte{}) + hash, err := multihash.Encode(digest, proto.MhType) + if err != nil { + return cid.Undef, err + } + proxyRoot := cid.NewCidV1(uint64(proto.Codec), hash) + return proxyRoot, nil +} + +// ConvertCar will will re-write the blocks in a car to a specified codec. +func ConvertCar(c *cli.Context) error { + if c.Args().Len() < 2 { + return fmt.Errorf("Usage: convert [codec]") + } + + output := c.Args().Get(1) + bs, err := blockstore.OpenReadOnly(c.Args().Get(0)) + if err != nil { + return err + } + _ = os.Remove(output) + + convertTo := multicodec.DagJson + codec := "" + if c.Args().Len() > 2 { + codec = c.Args().Get(2) + } + for _, candidate := range multicodec.KnownCodes() { + if candidate.String() == codec { + convertTo = candidate + } + } + proto := cidlink.LinkPrototype{ + Prefix: cid.NewPrefixV1(uint64(convertTo), multihash.SHA2_256), + } + p, err := proxyCid(proto) + if err != nil { + return err + } + outStore, err := blockstore.OpenReadWrite(output, []cid.Cid{p}, blockstore.AllowDuplicatePuts(false)) + if err != nil { + return err + } + outls := cidlink.DefaultLinkSystem() + outls.TrustedStorage = true + outls.StorageWriteOpener = func(lc linking.LinkContext) (io.Writer, linking.BlockWriteCommitter, error) { + buf := bytes.NewBuffer(nil) + return buf, func(l datamodel.Link) error { + c := l.(cidlink.Link).Cid + blk, _ := blocks.NewBlockWithCid(buf.Bytes(), c) + return outStore.Put(lc.Ctx, blk) + }, nil + } + + roots, err := bs.Roots() + if err != nil { + return err + } + if len(roots) != 1 { + return fmt.Errorf("car file has does not have exactly one root, dag root must be specified explicitly") + } + rootCid := roots[0] + + sel := selectorParser.CommonSelector_MatchAllRecursively + if c.IsSet("selector") { + sel, err = selectorParser.ParseJSONSelector(c.String("selector")) + if err != nil { + return err + } + } + linkVisitOnlyOnce := !c.IsSet("selector") // if using a custom selector, this isn't as safe + + workMap := make(map[cid.Cid]*children) + tempStore := memstore.Store{} + + // Step 1: traverse blocks into tempstore. populate workmap. + ls := cidlink.DefaultLinkSystem() + ls.TrustedStorage = true + ls.StorageReadOpener = func(_ linking.LinkContext, l datamodel.Link) (io.Reader, error) { + if cl, ok := l.(cidlink.Link); ok { + blk, err := bs.Get(c.Context, cl.Cid) + if err != nil { + if err == ipfsbs.ErrNotFound { + return nil, traversal.SkipMe{} + } + return nil, err + } + if err := tempStore.Put(c.Context, cl.Cid.String(), blk.RawData()); err != nil { + return nil, err + } + workMap[cl.Cid] = &children{} + return bytes.NewBuffer(blk.RawData()), nil + } + return nil, fmt.Errorf("unknown link type: %T", l) + } + + nsc := func(lnk datamodel.Link, lctx ipld.LinkContext) (datamodel.NodePrototype, error) { + if lnk, ok := lnk.(cidlink.Link); ok && lnk.Cid.Prefix().Codec == 0x70 { + return dagpb.Type.PBNode, nil + } + return basicnode.Prototype.Any, nil + } + + rootLink := cidlink.Link{Cid: rootCid} + ns, _ := nsc(rootLink, ipld.LinkContext{}) + rootNode, err := ls.Load(ipld.LinkContext{}, rootLink, ns) + if err != nil { + return err + } + + traversalProgress := traversal.Progress{ + Cfg: &traversal.Config{ + LinkSystem: ls, + LinkTargetNodePrototypeChooser: nsc, + LinkVisitOnlyOnce: linkVisitOnlyOnce, + }, + } + + s, err := selector.CompileSelector(sel) + if err != nil { + return err + } + + err = traversalProgress.WalkAdv(rootNode, s, func(traversal.Progress, datamodel.Node, traversal.VisitReason) error { return nil }) + if err != nil { + return err + } + + // Step 2: traverse workmap and load blocks to get old children. + for blkCid := range workMap { + old := make([]cid.Cid, 0) + lnk := cidlink.Link{Cid: blkCid} + ns, _ = nsc(lnk, ipld.LinkContext{}) + node, err := ls.Load(ipld.LinkContext{}, lnk, ns) + if err != nil { + return err + } + traversal.WalkLocal(node, func(p traversal.Progress, n datamodel.Node) error { + if n.Kind() == datamodel.Kind_Link { + nlk, _ := n.AsLink() + old = append(old, nlk.(cidlink.Link).Cid) + } + return nil + }) + child := children{t: 0, done: false, old: old, new: make([]cid.Cid, len(old))} + workMap[blkCid] = &child + } + + // Step 3: for nodes with no-uncoverted children, transform the node, and convert. + done := 0 + xar, _ := selector.CompileSelector(selectorParser.CommonSelector_ExploreAllRecursively) + for done < len(workMap) { + for c := range workMap { + if workMap[c].t == len(workMap[c].old) && !workMap[c].done { + v := workMap[c] + var newRoot ipld.Node + lnk := cidlink.Link{Cid: c} + ns, _ = nsc(lnk, ipld.LinkContext{}) + oldRoot, err := ls.Load(ipld.LinkContext{}, lnk, ns) + if err != nil { + return err + } + if len(v.old) == 0 { + // shortcut on leaf nodes. + newRoot = oldRoot + } else { + // Step 3.1: transform the node using old->new map + newRoot, err = traversal.WalkTransforming(oldRoot, xar, func(p traversal.Progress, n datamodel.Node) (datamodel.Node, error) { + if n.Kind() == datamodel.Kind_Link { + nlk, _ := n.AsLink() + oldCid := nlk.(cidlink.Link).Cid + for i, c := range v.old { + if c.Equals(oldCid) { + newLk := basicnode.NewLink(cidlink.Link{Cid: v.new[i]}) + return newLk, nil + } + } + return nil, fmt.Errorf("could not find link %s in workmap: %v", oldCid, v.old) + } + return n, nil + }) + if err != nil { + return err + } + } + // Step 3.2: serialize into output datastore + newLnk, err := outls.Store(ipld.LinkContext{}, proto, newRoot) + if err != nil { + return err + } + newCid := newLnk.(cidlink.Link).Cid + + // Step 3.3: update workmap indicating parents should transform this child. + for d := range workMap { + for i, o := range workMap[d].old { + if o.Equals(c) { + (*workMap[d]).new[i] = newCid + (*workMap[d]).t++ + } + } + } + + (*workMap[c]).done = true + done++ + } + } + } + + return outStore.Finalize() + // todo: fix up root cid +} diff --git a/cmd/car/create.go b/cmd/car/create.go index de9f6abd..bfa988ab 100644 --- a/cmd/car/create.go +++ b/cmd/car/create.go @@ -32,16 +32,12 @@ func CreateCar(c *cli.Context) error { } // make a cid with the right length that we eventually will patch with the root. - hasher, err := multihash.GetHasher(multihash.SHA2_256) + proxyRoot, err := proxyCid(cidlink.LinkPrototype{ + Prefix: cid.NewPrefixV1(uint64(multicodec.DagPb), multihash.SHA2_256), + }) if err != nil { return err } - digest := hasher.Sum([]byte{}) - hash, err := multihash.Encode(digest, multihash.SHA2_256) - if err != nil { - return err - } - proxyRoot := cid.NewCidV1(uint64(multicodec.DagPb), hash) options := []car.Option{} switch c.Int("version") { diff --git a/cmd/car/import.go b/cmd/car/import.go new file mode 100644 index 00000000..4bcdaa91 --- /dev/null +++ b/cmd/car/import.go @@ -0,0 +1,67 @@ +package main + +import ( + "io" + "os" + + "github.com/ipfs/go-cid" + "github.com/ipld/go-car/v2" + cidlink "github.com/ipld/go-ipld-prime/linking/cid" + "github.com/ipld/go-ipld-prime/storage/memstore" + selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse" + "github.com/multiformats/go-multicodec" + "github.com/multiformats/go-multihash" + "github.com/urfave/cli/v2" +) + +// ImportCar will take a file or stream representing a block of data +// and create a car in the specified codec such that the data is packaged +// into a single-block car file. +func ImportCar(c *cli.Context) error { + var err error + inStream := os.Stdin + if c.Args().Len() >= 1 && c.Args().First() != "-" { + inStream, err = os.Open(c.Args().First()) + if err != nil { + return err + } + } + data, err := io.ReadAll(inStream) + if err != nil { + return err + } + + convertTo := multicodec.Raw + for _, candidate := range multicodec.KnownCodes() { + if candidate.String() == c.String("codec") { + convertTo = candidate + } + } + + proto := cid.Prefix{ + Version: 1, + Codec: uint64(convertTo), + MhType: multihash.SHA2_256, + MhLength: -1, + } + root, err := proto.Sum(data) + if err != nil { + return err + } + + ls := cidlink.DefaultLinkSystem() + store := memstore.Store{} + store.Put(c.Context, string(root.KeyString()), data) + ls.SetReadStorage(&store) + + outStream := os.Stdout + if c.Args().Len() >= 2 { + outStream, err = os.Create(c.Args().Get(1)) + if err != nil { + return err + } + defer outStream.Close() + } + _, err = car.TraverseV1(c.Context, &ls, root, selectorparse.CommonSelector_MatchPoint, outStream) + return err +} diff --git a/cmd/car/testdata/script/concatenate.txt b/cmd/car/testdata/script/concatenate.txt new file mode 100644 index 00000000..1a125f80 --- /dev/null +++ b/cmd/car/testdata/script/concatenate.txt @@ -0,0 +1,23 @@ +stdin filteredroot.txt +car filter ${INPUTS}/sample-wrapped-v2.car out.car +! stderr . +car list out.car +! stderr . +cmp stdout filteredroot.txt + +stdin filteredcids.txt +car filter ${INPUTS}/sample-wrapped-v2.car out-2.car +car list out-2.car +! stderr . +cmp stdout filteredcids.txt + +car concatenate -f combined.car out.car out-2.car +! stderr . +car list combined.car +stdout -count=3 '^bafy' + +-- filteredroot.txt -- +bafy2bzaced4ueelaegfs5fqu4tzsh6ywbbpfk3cxppupmxfdhbpbhzawfw5oy +-- filteredcids.txt -- +bafy2bzacebohz654namrgmwjjx4qmtwgxixsd7pn4tlanyrc3g3hwj75hlxrw +bafy2bzaceaqtiesyfqd2jibmofz22oolguzf5wscwh73rmeypglfu2xhkptri \ No newline at end of file diff --git a/cmd/car/testdata/script/convert.txt b/cmd/car/testdata/script/convert.txt new file mode 100644 index 00000000..cff948ad --- /dev/null +++ b/cmd/car/testdata/script/convert.txt @@ -0,0 +1,4 @@ +car convert ${INPUTS}/sample-wrapped-v2.car out.car dagjson +! stderr . +car list out.car +stdout -count=1049 '^baguq' diff --git a/cmd/car/testdata/script/import.txt b/cmd/car/testdata/script/import.txt new file mode 100644 index 00000000..2d483011 --- /dev/null +++ b/cmd/car/testdata/script/import.txt @@ -0,0 +1,11 @@ +env FOO_CID='bafkreicgzc7pgvw5mdtsfboafwkqtsdmtyxi2hv5if6uifq6z6pwtmjira' + +stdin foo.txt +car import -c raw - out.car +car ls out.car +stdout -count=1 '^bafk' +car gb out.car $FOO_CID +cmp stdout foo.txt + +-- foo.txt -- +foo content \ No newline at end of file