diff --git a/storage/aliases.go b/storage/aliases.go index 0a80299c29..496b763c8a 100644 --- a/storage/aliases.go +++ b/storage/aliases.go @@ -14,10 +14,7 @@ package storage -import ( - "github.com/google/trillian/storage/storagepb" - "github.com/google/trillian/storage/tree" -) +import "github.com/google/trillian/storage/tree" // TODO(pavelkalinnikov, v2): These aliases were created to not break the code // that depended on these types. We should delete this. @@ -28,14 +25,6 @@ type NodeID = tree.NodeID // Suffix is an alias to github.com/google/trillian/storage/tree.Suffix. type Suffix = tree.Suffix -// PopulateSubtreeFunc is a function which knows how to re-populate a subtree -// from just its leaf nodes. -type PopulateSubtreeFunc func(*storagepb.SubtreeProto) error - -// PrepareSubtreeWriteFunc is a function that carries out any required tree -// type specific manipulation of a subtree before it's written to storage -type PrepareSubtreeWriteFunc func(*storagepb.SubtreeProto) error - // These are aliases for the functions of the same name in github.com/google/trillian/storage/tree. var ( NewNodeIDFromHash = tree.NewNodeIDFromHash diff --git a/storage/cache/log_subtree_cache.go b/storage/cache/log_subtree_cache.go deleted file mode 100644 index 4367364fd5..0000000000 --- a/storage/cache/log_subtree_cache.go +++ /dev/null @@ -1,153 +0,0 @@ -// Copyright 2017 Google LLC. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package cache - -import ( - "fmt" - - "github.com/google/trillian/merkle/compact" - "github.com/google/trillian/merkle/hashers" - "github.com/google/trillian/storage" - "github.com/google/trillian/storage/storagepb" - "github.com/google/trillian/storage/tree" -) - -const ( - // logStrataDepth is the strata that must be used for all log subtrees. - logStrataDepth = 8 - // maxLogDepth is the number of bits in a log path. - maxLogDepth = 64 -) - -// NewLogSubtreeCache creates and returns a SubtreeCache appropriate for use with a log -// tree. The caller must supply the strata depths to be used and a suitable LogHasher. -func NewLogSubtreeCache(logStrata []int, hasher hashers.LogHasher) *SubtreeCache { - return NewSubtreeCache(logStrata, populateLogSubtreeNodes(hasher), prepareLogSubtreeWrite()) -} - -// LogPopulateFunc obtains a log storage population function based on a supplied LogHasher. -// This is intended for use by storage utilities. -func LogPopulateFunc(hasher hashers.LogHasher) storage.PopulateSubtreeFunc { - return populateLogSubtreeNodes(hasher) -} - -// populateLogSubtreeNodes re-creates a Log subtree's InternalNodes from the -// subtree Leaves map. -// -// This uses the compact Merkle tree to repopulate internal nodes, and so will -// handle imperfect (but left-hand dense) subtrees. Note that we only rebuild internal -// nodes when the subtree is fully populated. For an explanation of why see the comments -// below for PrepareLogSubtreeWrite. -func populateLogSubtreeNodes(hasher hashers.LogHasher) storage.PopulateSubtreeFunc { - return func(st *storagepb.SubtreeProto) error { - if st.Depth < 1 { - return fmt.Errorf("populate log subtree with invalid depth: %d", st.Depth) - } - // maxLeaves is the number of leaves that fully populates a subtree of the depth we are - // working with. - maxLeaves := 1 << uint(st.Depth) - - // If the subtree is fully populated then the internal node map is expected to be nil but in - // case it isn't we recreate it as we're about to rebuild the contents. We'll check - // below that the number of nodes is what we expected to have. - if st.InternalNodes == nil || len(st.Leaves) == maxLeaves { - st.InternalNodes = make(map[string][]byte) - } - store := func(id compact.NodeID, hash []byte) { - if id.Level == logStrataDepth && id.Index == 0 { - // no space for the root in the node cache - return - } - - // Don't put leaves into the internal map and only update if we're rebuilding internal - // nodes. If the subtree was saved with internal nodes then we don't touch the map. - if id.Level > 0 && len(st.Leaves) == maxLeaves { - subDepth := logStrataDepth - int(id.Level) - // TODO(Martin2112): See if we can possibly avoid the expense hiding inside NewNodeIDFromPrefix. - nodeID := tree.NewNodeIDFromPrefix(st.Prefix, subDepth, int64(id.Index), logStrataDepth, maxLogDepth) - sfx := nodeID.Suffix(len(st.Prefix), int(st.Depth)) - sfxKey := sfx.String() - st.InternalNodes[sfxKey] = hash - } - } - - fact := compact.RangeFactory{Hash: hasher.HashChildren} - cr := fact.NewEmptyRange(0) - - // We need to update the subtree root hash regardless of whether it's fully populated - for leafIndex := int64(0); leafIndex < int64(len(st.Leaves)); leafIndex++ { - nodeID := tree.NewNodeIDFromPrefix(st.Prefix, logStrataDepth, leafIndex, logStrataDepth, maxLogDepth) - sfx := nodeID.Suffix(len(st.Prefix), int(st.Depth)) - sfxKey := sfx.String() - h := st.Leaves[sfxKey] - if h == nil { - return fmt.Errorf("unexpectedly got nil for subtree leaf suffix %s", sfx) - } - if size, expected := int64(cr.End()), leafIndex; size != expected { - return fmt.Errorf("got size of %d, but expected %d", size, expected) - } - if err := cr.Append(h, store); err != nil { - return err - } - } - root, err := cr.GetRootHash(store) - if err != nil { - return fmt.Errorf("failed to compute root hash: %v", err) - } - st.RootHash = root - - // Additional check - after population we should have the same number of internal nodes - // as before the subtree was written to storage. Either because they were loaded from - // storage or just rebuilt above. - if got, want := uint32(len(st.InternalNodes)), st.InternalNodeCount; got != want { - // TODO(Martin2112): Possibly replace this with stronger checks on the data in - // subtrees on disk so we can detect corruption. - return fmt.Errorf("log repop got: %d internal nodes, want: %d", got, want) - } - - return nil - } -} - -// prepareLogSubtreeWrite prepares a log subtree for writing. If the subtree is fully -// populated the internal nodes are cleared. Otherwise they are written. -// -// To see why this is necessary consider the case where a tree has a single full subtree -// and then an additional leaf is added. -// -// This causes an extra level to be added to the tree with an internal node that is a hash -// of the root of the left full subtree and the new leaf. Note that the leaves remain at -// level zero in the overall tree coordinate space but they are now in a lower subtree stratum -// than they were before the last node was added as the tree has grown above them. -// -// Thus in the case just discussed the internal nodes cannot be correctly reconstructed -// in isolation when the tree is reloaded because of the dependency on another subtree. -// -// Fully populated subtrees don't have this problem because by definition they can only -// contain internal nodes built from their own contents. -func prepareLogSubtreeWrite() storage.PrepareSubtreeWriteFunc { - return func(st *storagepb.SubtreeProto) error { - st.InternalNodeCount = uint32(len(st.InternalNodes)) - if st.Depth < 1 { - return fmt.Errorf("prepare subtree for log write invalid depth: %d", st.Depth) - } - maxLeaves := 1 << uint(st.Depth) - // If the subtree is fully populated we can safely clear the internal nodes - if len(st.Leaves) == maxLeaves { - st.InternalNodes = nil - } - return nil - } -} diff --git a/storage/cache/log_tile.go b/storage/cache/log_tile.go new file mode 100644 index 0000000000..356b7f9991 --- /dev/null +++ b/storage/cache/log_tile.go @@ -0,0 +1,137 @@ +// Copyright 2017 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cache + +import ( + "fmt" + + "github.com/google/trillian/merkle/compact" + "github.com/google/trillian/merkle/hashers" + "github.com/google/trillian/storage/storagepb" + "github.com/google/trillian/storage/tree" +) + +const ( + // logStrataDepth is the strata that must be used for all log subtrees. + logStrataDepth = 8 + // maxLogDepth is the number of bits in a log path. + maxLogDepth = 64 +) + +// PopulateLogTile re-creates a log tile's InternalNodes from the Leaves map. +// +// This uses the compact Merkle tree to repopulate internal nodes, and so will +// handle imperfect (but left-hand dense) subtrees. Note that we only rebuild internal +// nodes when the subtree is fully populated. For an explanation of why see the comments +// below for prepareLogTile. +// +// TODO(pavelkalinnikov): Unexport it after the refactoring. +func PopulateLogTile(st *storagepb.SubtreeProto, hasher hashers.LogHasher) error { + if st.Depth < 1 { + return fmt.Errorf("populate log subtree with invalid depth: %d", st.Depth) + } + // maxLeaves is the number of leaves that fully populates a subtree of the depth we are + // working with. + maxLeaves := 1 << uint(st.Depth) + + // If the subtree is fully populated then the internal node map is expected to be nil but in + // case it isn't we recreate it as we're about to rebuild the contents. We'll check + // below that the number of nodes is what we expected to have. + if st.InternalNodes == nil || len(st.Leaves) == maxLeaves { + st.InternalNodes = make(map[string][]byte) + } + store := func(id compact.NodeID, hash []byte) { + if id.Level == logStrataDepth && id.Index == 0 { + // no space for the root in the node cache + return + } + + // Don't put leaves into the internal map and only update if we're rebuilding internal + // nodes. If the subtree was saved with internal nodes then we don't touch the map. + if id.Level > 0 && len(st.Leaves) == maxLeaves { + subDepth := logStrataDepth - int(id.Level) + // TODO(Martin2112): See if we can possibly avoid the expense hiding inside NewNodeIDFromPrefix. + nodeID := tree.NewNodeIDFromPrefix(st.Prefix, subDepth, int64(id.Index), logStrataDepth, maxLogDepth) + sfx := nodeID.Suffix(len(st.Prefix), int(st.Depth)) + sfxKey := sfx.String() + st.InternalNodes[sfxKey] = hash + } + } + + fact := compact.RangeFactory{Hash: hasher.HashChildren} + cr := fact.NewEmptyRange(0) + + // We need to update the subtree root hash regardless of whether it's fully populated + for leafIndex := int64(0); leafIndex < int64(len(st.Leaves)); leafIndex++ { + nodeID := tree.NewNodeIDFromPrefix(st.Prefix, logStrataDepth, leafIndex, logStrataDepth, maxLogDepth) + sfx := nodeID.Suffix(len(st.Prefix), int(st.Depth)) + sfxKey := sfx.String() + h := st.Leaves[sfxKey] + if h == nil { + return fmt.Errorf("unexpectedly got nil for subtree leaf suffix %s", sfx) + } + if size, expected := int64(cr.End()), leafIndex; size != expected { + return fmt.Errorf("got size of %d, but expected %d", size, expected) + } + if err := cr.Append(h, store); err != nil { + return err + } + } + root, err := cr.GetRootHash(store) + if err != nil { + return fmt.Errorf("failed to compute root hash: %v", err) + } + st.RootHash = root + + // Additional check - after population we should have the same number of internal nodes + // as before the subtree was written to storage. Either because they were loaded from + // storage or just rebuilt above. + if got, want := uint32(len(st.InternalNodes)), st.InternalNodeCount; got != want { + // TODO(Martin2112): Possibly replace this with stronger checks on the data in + // subtrees on disk so we can detect corruption. + return fmt.Errorf("log repop got: %d internal nodes, want: %d", got, want) + } + + return nil +} + +// prepareLogTile prepares a log tile for writing. If it is fully populated the +// internal nodes are cleared. Otherwise they are written. +// +// To see why this is necessary consider the case where a tree has a single full subtree +// and then an additional leaf is added. +// +// This causes an extra level to be added to the tree with an internal node that is a hash +// of the root of the left full subtree and the new leaf. Note that the leaves remain at +// level zero in the overall tree coordinate space but they are now in a lower subtree stratum +// than they were before the last node was added as the tree has grown above them. +// +// Thus in the case just discussed the internal nodes cannot be correctly reconstructed +// in isolation when the tree is reloaded because of the dependency on another subtree. +// +// Fully populated subtrees don't have this problem because by definition they can only +// contain internal nodes built from their own contents. +func prepareLogTile(st *storagepb.SubtreeProto) error { + st.InternalNodeCount = uint32(len(st.InternalNodes)) + if st.Depth < 1 { + return fmt.Errorf("prepare subtree for log write invalid depth: %d", st.Depth) + } + maxLeaves := 1 << uint(st.Depth) + // If the subtree is fully populated we can safely clear the internal nodes + if len(st.Leaves) == maxLeaves { + st.InternalNodes = nil + } + return nil +} diff --git a/storage/cache/subtree_cache.go b/storage/cache/subtree_cache.go index 5c13f2ae3b..977d44c977 100644 --- a/storage/cache/subtree_cache.go +++ b/storage/cache/subtree_cache.go @@ -25,7 +25,7 @@ import ( "github.com/golang/glog" "github.com/golang/protobuf/proto" //nolint:staticcheck "github.com/google/trillian/merkle/compact" - "github.com/google/trillian/storage" + "github.com/google/trillian/merkle/hashers" "github.com/google/trillian/storage/storagepb" "github.com/google/trillian/storage/tree" ) @@ -59,6 +59,7 @@ const maxSupportedTreeDepth = 64 // 2. Subtrees/nodes are rarely written, and mostly read. type SubtreeCache struct { layout *tree.Layout + hasher hashers.LogHasher // subtrees contains the Subtree data read from storage, and is updated by // calls to SetNodeHash. @@ -67,19 +68,13 @@ type SubtreeCache struct { // to storage. dirtyPrefixes sync.Map - // populate is used to rebuild internal nodes when subtrees are loaded from storage. - populate storage.PopulateSubtreeFunc // populateConcurrency sets the amount of concurrency when repopulating subtrees. populateConcurrency int - // prepare is used for preparation work when subtrees are about to be written to storage. - prepare storage.PrepareSubtreeWriteFunc } -// NewSubtreeCache returns a newly intialised cache ready for use. -// populateSubtree is a function which knows how to populate a subtree's -// internal nodes given its leaves, and will be called for each subtree loaded -// from storage. -func NewSubtreeCache(strataDepths []int, populateSubtree storage.PopulateSubtreeFunc, prepareSubtreeWrite storage.PrepareSubtreeWriteFunc) *SubtreeCache { +// NewLogSubtreeCache creates and returns a SubtreeCache appropriate for use with a log +// tree. The caller must supply the strata depths to be used and a suitable LogHasher. +func NewLogSubtreeCache(strataDepths []int, hasher hashers.LogHasher) *SubtreeCache { // TODO(al): pass this in maxTreeDepth := maxSupportedTreeDepth glog.V(1).Infof("Creating new subtree cache maxDepth=%d strataDepths=%v", maxTreeDepth, strataDepths) @@ -94,12 +89,10 @@ func NewSubtreeCache(strataDepths []int, populateSubtree storage.PopulateSubtree if *populateConcurrency <= 0 { panic(fmt.Errorf("populate_subtree_concurrency must be set to >= 1")) } - return &SubtreeCache{ layout: layout, - populate: populateSubtree, + hasher: hasher, populateConcurrency: *populateConcurrency, - prepare: prepareSubtreeWrite, } } @@ -159,7 +152,7 @@ func (s *SubtreeCache) preload(ids []tree.NodeID, getSubtrees GetSubtreesFunc) e // return it when done defer func() { workTokens <- true }() - s.populate(t) + PopulateLogTile(t, s.hasher) ch <- t // Note: This never blocks because len(ch) == len(subtrees). }() } @@ -296,7 +289,7 @@ func (s *SubtreeCache) getNodeHash(id tree.NodeID, getSubtree GetSubtreeFunc) ([ if c == nil { c = s.newEmptySubtree(subID) } else { - if err := s.populate(c); err != nil { + if err := PopulateLogTile(c, s.hasher); err != nil { return nil, err } } @@ -426,8 +419,7 @@ func (s *SubtreeCache) Flush(ctx context.Context, setSubtrees SetSubtreesFunc) e v.RootHash = nil if len(v.Leaves) > 0 { - // prepare internal nodes ready for the write (tree type specific) - if err := s.prepare(v); err != nil { + if err := prepareLogTile(v); err != nil { rangeErr = err return false } diff --git a/storage/cache/subtree_cache_test.go b/storage/cache/subtree_cache_test.go index b7b79b8659..a70e5b6752 100644 --- a/storage/cache/subtree_cache_test.go +++ b/storage/cache/subtree_cache_test.go @@ -38,7 +38,7 @@ func TestCacheFillOnlyReadsSubtrees(t *testing.T) { defer mockCtrl.Finish() m := NewMockNodeStorage(mockCtrl) - c := NewSubtreeCache(defaultLogStrata, populateLogSubtreeNodes(rfc6962.DefaultHasher), prepareLogSubtreeWrite()) + c := NewLogSubtreeCache(defaultLogStrata, rfc6962.DefaultHasher) nodeID := tree.NewNodeIDFromHash([]byte("1234")) // When we loop around asking for all 0..32 bit prefix lengths of the above @@ -68,7 +68,7 @@ func TestCacheGetNodesReadsSubtrees(t *testing.T) { defer mockCtrl.Finish() m := NewMockNodeStorage(mockCtrl) - c := NewSubtreeCache(defaultLogStrata, populateLogSubtreeNodes(rfc6962.DefaultHasher), prepareLogSubtreeWrite()) + c := NewLogSubtreeCache(defaultLogStrata, rfc6962.DefaultHasher) ids := []compact.NodeID{ compact.NewNodeID(0, 0x1234), @@ -135,7 +135,7 @@ func TestCacheFlush(t *testing.T) { defer mockCtrl.Finish() m := NewMockNodeStorage(mockCtrl) - c := NewSubtreeCache(defaultLogStrata, populateLogSubtreeNodes(rfc6962.DefaultHasher), prepareLogSubtreeWrite()) + c := NewLogSubtreeCache(defaultLogStrata, rfc6962.DefaultHasher) id := compact.NewNodeID(0, 12345) nodeID := stestonly.MustCreateNodeIDForTreeCoords(int64(id.Level), int64(id.Index), maxLogDepth) @@ -211,7 +211,6 @@ func TestCacheFlush(t *testing.T) { } func TestRepopulateLogSubtree(t *testing.T) { - populateTheThing := populateLogSubtreeNodes(rfc6962.DefaultHasher) fact := compact.RangeFactory{Hash: rfc6962.DefaultHasher.HashChildren} cr := fact.NewEmptyRange(0) cmtStorage := storagepb.SubtreeProto{ @@ -223,7 +222,7 @@ func TestRepopulateLogSubtree(t *testing.T) { Leaves: make(map[string][]byte), Depth: int32(defaultLogStrata[0]), } - c := NewSubtreeCache(defaultLogStrata, populateLogSubtreeNodes(rfc6962.DefaultHasher), prepareLogSubtreeWrite()) + c := NewLogSubtreeCache(defaultLogStrata, rfc6962.DefaultHasher) for numLeaves := int64(1); numLeaves <= 256; numLeaves++ { // clear internal nodes s.InternalNodes = make(map[string][]byte) @@ -253,8 +252,8 @@ func TestRepopulateLogSubtree(t *testing.T) { } cmtStorage.Leaves[sfxKey] = leafHash - if err := populateTheThing(&s); err != nil { - t.Fatalf("failed populate subtree: %v", err) + if err := PopulateLogTile(&s, rfc6962.DefaultHasher); err != nil { + t.Fatalf("failed populating tile: %v", err) } root, err := cr.GetRootHash(nil) if err != nil { @@ -291,10 +290,9 @@ func BenchmarkRepopulateLogSubtree(b *testing.B) { s.Leaves[sfx.String()] = hash } - populate := populateLogSubtreeNodes(hasher) for n := 0; n < b.N; n++ { - if err := populate(&s); err != nil { - b.Fatalf("failed populate subtree: %v", err) + if err := PopulateLogTile(&s, hasher); err != nil { + b.Fatalf("failed populating tile: %v", err) } } } @@ -352,7 +350,7 @@ func TestIdempotentWrites(t *testing.T) { // We should see many reads, but only the first call to SetNodeHash should // result in an actual write being flushed through to storage. for i := 0; i < 10; i++ { - c := NewSubtreeCache(defaultLogStrata, populateLogSubtreeNodes(rfc6962.DefaultHasher), prepareLogSubtreeWrite()) + c := NewLogSubtreeCache(defaultLogStrata, rfc6962.DefaultHasher) _, err := c.getNodeHash(nodeID, m.GetSubtree) if err != nil { t.Fatalf("%d: failed to get node hash: %v", i, err) diff --git a/storage/tools/dump_tree/dumplib.go b/storage/tools/dump_tree/dumplib.go index 3dff5f2dbf..a9259139e0 100644 --- a/storage/tools/dump_tree/dumplib.go +++ b/storage/tools/dump_tree/dumplib.go @@ -42,6 +42,7 @@ import ( "github.com/google/trillian/crypto/sigpb" "github.com/google/trillian/log" "github.com/google/trillian/merkle/compact" + "github.com/google/trillian/merkle/hashers" "github.com/google/trillian/merkle/hashers/registry" _ "github.com/google/trillian/merkle/rfc6962" // Register the hasher. rfc6962 "github.com/google/trillian/merkle/rfc6962/hasher" @@ -301,19 +302,18 @@ func Main(args Options) string { if err != nil { glog.Fatalf("Failed to create a log hasher: %v", err) } - repopFunc := cache.LogPopulateFunc(hasher) if args.LatestRevision { - return latestRevisions(ls, tree.TreeId, repopFunc, formatter, args.Rebuild, args.HexKeys) + return latestRevisions(ls, tree.TreeId, hasher, formatter, args.Rebuild, args.HexKeys) } - return allRevisions(ls, tree.TreeId, repopFunc, formatter, args.Rebuild, args.HexKeys) + return allRevisions(ls, tree.TreeId, hasher, formatter, args.Rebuild, args.HexKeys) } -func allRevisions(ls storage.LogStorage, treeID int64, repopFunc storage.PopulateSubtreeFunc, of func(*storagepb.SubtreeProto) string, rebuildInternal, hexKeysFlag bool) string { +func allRevisions(ls storage.LogStorage, treeID int64, hasher hashers.LogHasher, of func(*storagepb.SubtreeProto) string, rebuildInternal, hexKeysFlag bool) string { out := new(bytes.Buffer) memory.DumpSubtrees(ls, treeID, func(k string, v *storagepb.SubtreeProto) { if rebuildInternal { - repopFunc(v) + cache.PopulateLogTile(v, hasher) } if hexKeysFlag { hexKeys(v) @@ -323,7 +323,7 @@ func allRevisions(ls storage.LogStorage, treeID int64, repopFunc storage.Populat return out.String() } -func latestRevisions(ls storage.LogStorage, treeID int64, repopFunc storage.PopulateSubtreeFunc, of func(*storagepb.SubtreeProto) string, rebuildInternal, hexKeysFlag bool) string { +func latestRevisions(ls storage.LogStorage, treeID int64, hasher hashers.LogHasher, of func(*storagepb.SubtreeProto) string, rebuildInternal, hexKeysFlag bool) string { out := new(bytes.Buffer) // vMap maps subtree prefixes (as strings) to the corresponding subtree proto and its revision vMap := make(map[string]treeAndRev) @@ -361,7 +361,7 @@ func latestRevisions(ls storage.LogStorage, treeID int64, repopFunc storage.Popu for _, k := range sKeys { v := vMap[k] if rebuildInternal { - repopFunc(v.subtree) + cache.PopulateLogTile(v.subtree, hasher) } if hexKeysFlag { hexKeys(v.subtree)