aboutsummaryrefslogtreecommitdiffstats
path: root/finder
diff options
context:
space:
mode:
authorJeff Gaston2017-08-09 20:16:34 -0500
committerJeff Gaston2017-08-09 20:16:34 -0500
commitd1abeb9d982b11fdf4047176d213acc8197c375f (patch)
tree63159a1dd05d904e64748a0d71356f4a4eafc901 /finder
parentb6d161bf16031e06fc8d532e35a53c73ad20f84f (diff)
downloadplatform-build-soong-d1abeb9d982b11fdf4047176d213acc8197c375f.tar.gz
platform-build-soong-d1abeb9d982b11fdf4047176d213acc8197c375f.tar.xz
platform-build-soong-d1abeb9d982b11fdf4047176d213acc8197c375f.zip
Revert "Cacheable, multithreaded finder."
This reverts commit b6d161bf16031e06fc8d532e35a53c73ad20f84f. Reason for revert: New Build Breakage: aosp-master/sdk_mac @ 4260825 Change-Id: I8bda8c50c5e5c9f84621d11a4c15b168833bcd21
Diffstat (limited to 'finder')
-rw-r--r--finder/Android.bp37
-rw-r--r--finder/cmd/Android.bp29
-rw-r--r--finder/cmd/finder.go149
-rw-r--r--finder/finder.go1399
-rw-r--r--finder/finder_test.go1573
5 files changed, 0 insertions, 3187 deletions
diff --git a/finder/Android.bp b/finder/Android.bp
deleted file mode 100644
index b5c0e130..00000000
--- a/finder/Android.bp
+++ /dev/null
@@ -1,37 +0,0 @@
1// Copyright 2017 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//
16// fast, parallel, caching implementation of `find`
17//
18
19subdirs = [
20 "cmd"
21]
22
23bootstrap_go_package {
24 name: "soong-finder",
25 pkgPath: "android/soong/finder",
26 srcs: [
27 "finder.go",
28 ],
29 testSrcs: [
30 "finder_test.go",
31 ],
32 deps: [
33 "soong-fs",
34 ],
35}
36
37
diff --git a/finder/cmd/Android.bp b/finder/cmd/Android.bp
deleted file mode 100644
index 9dc84ae4..00000000
--- a/finder/cmd/Android.bp
+++ /dev/null
@@ -1,29 +0,0 @@
1// Copyright 2017 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//
16// fast, parallel, caching implementation of `find`
17//
18
19blueprint_go_binary {
20 name: "finder",
21 srcs: [
22 "finder.go",
23 ],
24 deps: [
25 "soong-finder"
26 ],
27}
28
29
diff --git a/finder/cmd/finder.go b/finder/cmd/finder.go
deleted file mode 100644
index 9da16602..00000000
--- a/finder/cmd/finder.go
+++ /dev/null
@@ -1,149 +0,0 @@
1// Copyright 2017 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package main
16
17import (
18 "errors"
19 "flag"
20 "fmt"
21 "io"
22 "io/ioutil"
23 "log"
24 "os"
25 "runtime/pprof"
26 "sort"
27 "strings"
28 "time"
29
30 "android/soong/finder"
31 "android/soong/fs"
32)
33
34var (
35 // configuration of what to find
36 excludeDirs string
37 filenamesToFind string
38 pruneFiles string
39
40 // other configuration
41 cpuprofile string
42 verbose bool
43 dbPath string
44 numIterations int
45)
46
47func init() {
48 flag.StringVar(&cpuprofile, "cpuprofile", "",
49 "filepath of profile file to write (optional)")
50 flag.BoolVar(&verbose, "v", false, "log additional information")
51 flag.StringVar(&dbPath, "db", "", "filepath of cache db")
52
53 flag.StringVar(&excludeDirs, "exclude-dirs", "",
54 "comma-separated list of directory names to exclude from search")
55 flag.StringVar(&filenamesToFind, "names", "",
56 "comma-separated list of filenames to find")
57 flag.StringVar(&pruneFiles, "prune-files", "",
58 "filenames that if discovered will exclude their entire directory "+
59 "(including sibling files and directories)")
60 flag.IntVar(&numIterations, "count", 1,
61 "number of times to run. This is intended for use with --cpuprofile"+
62 " , to increase profile accuracy")
63}
64
65var usage = func() {
66 fmt.Printf("usage: finder -name <fileName> --db <dbPath> <searchDirectory> [<searchDirectory>...]\n")
67 flag.PrintDefaults()
68}
69
70func main() {
71 err := run()
72 if err != nil {
73 fmt.Fprintf(os.Stderr, "%v\n", err.Error())
74 os.Exit(1)
75 }
76}
77
78func stringToList(input string) []string {
79 return strings.Split(input, ",")
80}
81
82func run() error {
83 startTime := time.Now()
84 flag.Parse()
85
86 if cpuprofile != "" {
87 f, err := os.Create(cpuprofile)
88 if err != nil {
89 return fmt.Errorf("Error opening cpuprofile: %s", err)
90 }
91 pprof.StartCPUProfile(f)
92 defer f.Close()
93 defer pprof.StopCPUProfile()
94 }
95
96 var writer io.Writer
97 if verbose {
98 writer = os.Stderr
99 } else {
100 writer = ioutil.Discard
101 }
102
103 // TODO: replace Lshortfile with Llongfile when bug 63821638 is done
104 logger := log.New(writer, "", log.Ldate|log.Lmicroseconds|log.Lshortfile)
105
106 logger.Printf("Finder starting at %v\n", startTime)
107
108 rootPaths := flag.Args()
109 if len(rootPaths) < 1 {
110 usage()
111 return fmt.Errorf(
112 "Must give at least one <searchDirectory>")
113 }
114
115 workingDir, err := os.Getwd()
116 if err != nil {
117 return err
118 }
119 params := finder.CacheParams{
120 WorkingDirectory: workingDir,
121 RootDirs: rootPaths,
122 ExcludeDirs: stringToList(excludeDirs),
123 PruneFiles: stringToList(pruneFiles),
124 IncludeFiles: stringToList(filenamesToFind),
125 }
126 if dbPath == "" {
127 usage()
128 return errors.New("Param 'db' must be nonempty")
129 }
130 matches := []string{}
131 for i := 0; i < numIterations; i++ {
132 matches = runFind(params, logger)
133 }
134 findDuration := time.Since(startTime)
135 logger.Printf("Found these %v inodes in %v :\n", len(matches), findDuration)
136 sort.Strings(matches)
137 for _, match := range matches {
138 fmt.Println(match)
139 }
140 logger.Printf("End of %v inodes\n", len(matches))
141 logger.Printf("Finder completed in %v\n", time.Since(startTime))
142 return nil
143}
144
145func runFind(params finder.CacheParams, logger *log.Logger) (paths []string) {
146 service := finder.New(params, fs.OsFs, logger, dbPath)
147 defer service.Shutdown()
148 return service.FindAll()
149}
diff --git a/finder/finder.go b/finder/finder.go
deleted file mode 100644
index ad85ee9a..00000000
--- a/finder/finder.go
+++ /dev/null
@@ -1,1399 +0,0 @@
1// Copyright 2017 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package finder
16
17import (
18 "bufio"
19 "bytes"
20 "encoding/json"
21 "fmt"
22 "io"
23 "os"
24 "path/filepath"
25 "runtime"
26 "sort"
27 "strings"
28 "sync"
29 "sync/atomic"
30 "time"
31
32 "android/soong/fs"
33 "errors"
34)
35
36// This file provides a Finder struct that can quickly search for files satisfying
37// certain criteria.
38// This Finder gets its speed partially from parallelism and partially from caching.
39// If a Stat call returns the same result as last time, then it means Finder
40// can skip the ReadDir call for that dir.
41
42// The primary data structure used by the finder is the field Finder.nodes ,
43// which is a tree of nodes of type *pathMap .
44// Each node represents a directory on disk, along with its stats, subdirectories,
45// and contained files.
46
47// The common use case for the Finder is that the caller creates a Finder and gives
48// it the same query that was given to it in the previous execution.
49// In this situation, the major events that take place are:
50// 1. The Finder begins to load its db
51// 2. The Finder begins to stat the directories mentioned in its db (using multiple threads)
52// Calling Stat on each of these directories is generally a large fraction of the total time
53// 3. The Finder begins to construct a separate tree of nodes in each of its threads
54// 4. The Finder merges the individual node trees into the main node tree
55// 5. The Finder may call ReadDir a few times if there are a few directories that are out-of-date
56// These ReadDir calls might prompt additional Stat calls, etc
57// 6. The Finder waits for all loading to complete
58// 7. The Finder searches the cache for files matching the user's query (using multiple threads)
59
60// These are the invariants regarding concurrency:
61// 1. The public methods of Finder are threadsafe.
62// The public methods are only performance-optimized for one caller at a time, however.
63// For the moment, multiple concurrent callers shouldn't expect any better performance than
64// multiple serial callers.
65// 2. While building the node tree, only one thread may ever access the <children> collection of a
66// *pathMap at once.
67// a) The thread that accesses the <children> collection is the thread that discovers the
68// children (by reading them from the cache or by having received a response to ReadDir).
69// 1) Consequently, the thread that discovers the children also spawns requests to stat
70// subdirs.
71// b) Consequently, while building the node tree, no thread may do a lookup of its
72// *pathMap via filepath because another thread may be adding children to the
73// <children> collection of an ancestor node. Additionally, in rare cases, another thread
74// may be removing children from an ancestor node if the children were only discovered to
75// be irrelevant after calling ReadDir (which happens if a prune-file was just added).
76// 3. No query will begin to be serviced until all loading (both reading the db
77// and scanning the filesystem) is complete.
78// Tests indicate that it only takes about 10% as long to search the in-memory cache as to
79// generate it, making this not a huge loss in performance.
80// 4. The parsing of the db and the initial setup of the pathMap tree must complete before
81// beginning to call listDirSync (because listDirSync can create new entries in the pathMap)
82
83// see cmd/finder.go or finder_test.go for usage examples
84
85// Update versionString whenever making a backwards-incompatible change to the cache file format
86const versionString = "Android finder version 1"
87
88// a CacheParams specifies which files and directories the user wishes be scanned and
89// potentially added to the cache
90type CacheParams struct {
91 // WorkingDirectory is used as a base for any relative file paths given to the Finder
92 WorkingDirectory string
93
94 // RootDirs are the root directories used to initiate the search
95 RootDirs []string
96
97 // ExcludeDirs are directory names that if encountered are removed from the search
98 ExcludeDirs []string
99
100 // PruneFiles are file names that if encountered prune their entire directory
101 // (including siblings)
102 PruneFiles []string
103
104 // IncludeFiles are file names to include as matches
105 IncludeFiles []string
106}
107
108// a cacheConfig stores the inputs that determine what should be included in the cache
109type cacheConfig struct {
110 CacheParams
111
112 // FilesystemView is a unique identifier telling which parts of which file systems
113 // are readable by the Finder. In practice its value is essentially username@hostname.
114 // FilesystemView is set to ensure that a cache file copied to another host or
115 // found by another user doesn't inadvertently get reused.
116 FilesystemView string
117}
118
119func (p *cacheConfig) Dump() ([]byte, error) {
120 bytes, err := json.Marshal(p)
121 return bytes, err
122}
123
124// a cacheMetadata stores version information about the cache
125type cacheMetadata struct {
126 // The Version enables the Finder to determine whether it can even parse the file
127 // If the version changes, the entire cache file must be regenerated
128 Version string
129
130 // The CacheParams enables the Finder to determine whether the parameters match
131 // If the CacheParams change, the Finder can choose how much of the cache file to reuse
132 // (although in practice, the Finder will probably choose to ignore the entire file anyway)
133 Config cacheConfig
134}
135
136type Logger interface {
137 Output(calldepth int, s string) error
138}
139
140// the Finder is the main struct that callers will want to use
141type Finder struct {
142 // configuration
143 DbPath string
144 numDbLoadingThreads int
145 numSearchingThreads int
146 cacheMetadata cacheMetadata
147 logger Logger
148 filesystem fs.FileSystem
149
150 // temporary state
151 threadPool *threadPool
152 mutex sync.Mutex
153
154 // non-temporary state
155 modifiedFlag int32
156 nodes pathMap
157}
158
159// New creates a new Finder for use
160func New(cacheParams CacheParams, filesystem fs.FileSystem,
161 logger Logger, dbPath string) *Finder {
162
163 numThreads := runtime.NumCPU() * 2
164 numDbLoadingThreads := numThreads
165 numSearchingThreads := numThreads
166
167 metadata := cacheMetadata{
168 Version: versionString,
169 Config: cacheConfig{
170 CacheParams: cacheParams,
171 FilesystemView: filesystem.ViewId(),
172 },
173 }
174
175 finder := &Finder{
176 numDbLoadingThreads: numDbLoadingThreads,
177 numSearchingThreads: numSearchingThreads,
178 cacheMetadata: metadata,
179 logger: logger,
180 filesystem: filesystem,
181
182 nodes: *newPathMap("/"),
183 DbPath: dbPath,
184 }
185
186 finder.loadFromFilesystem()
187
188 finder.verbosef("Done parsing db\n")
189 return finder
190}
191
192// FindNamed searches for every cached file
193func (f *Finder) FindAll() []string {
194 return f.FindAt("/")
195}
196
197// FindNamed searches for every cached file under <rootDir>
198func (f *Finder) FindAt(rootDir string) []string {
199 filter := func(entries DirEntries) (dirNames []string, fileNames []string) {
200 return entries.DirNames, entries.FileNames
201 }
202 return f.FindMatching(rootDir, filter)
203}
204
205// FindNamed searches for every cached file named <fileName>
206func (f *Finder) FindNamed(fileName string) []string {
207 return f.FindNamedAt("/", fileName)
208}
209
210// FindNamedAt searches under <rootPath> for every file named <fileName>
211// The reason a caller might use FindNamedAt instead of FindNamed is if they want
212// to limit their search to a subset of the cache
213func (f *Finder) FindNamedAt(rootPath string, fileName string) []string {
214 filter := func(entries DirEntries) (dirNames []string, fileNames []string) {
215 matches := []string{}
216 for _, foundName := range entries.FileNames {
217 if foundName == fileName {
218 matches = append(matches, foundName)
219 }
220 }
221 return entries.DirNames, matches
222
223 }
224 return f.FindMatching(rootPath, filter)
225}
226
227// FindFirstNamed searches for every file named <fileName>
228// Whenever it finds a match, it stops search subdirectories
229func (f *Finder) FindFirstNamed(fileName string) []string {
230 return f.FindFirstNamedAt("/", fileName)
231}
232
233// FindFirstNamedAt searches for every file named <fileName>
234// Whenever it finds a match, it stops search subdirectories
235func (f *Finder) FindFirstNamedAt(rootPath string, fileName string) []string {
236 filter := func(entries DirEntries) (dirNames []string, fileNames []string) {
237 matches := []string{}
238 for _, foundName := range entries.FileNames {
239 if foundName == fileName {
240 matches = append(matches, foundName)
241 }
242 }
243
244 if len(matches) > 0 {
245 return []string{}, matches
246 }
247 return entries.DirNames, matches
248 }
249 return f.FindMatching(rootPath, filter)
250}
251
252// FindMatching is the most general exported function for searching for files in the cache
253// The WalkFunc will be invoked repeatedly and is expected to modify the provided DirEntries
254// in place, removing file paths and directories as desired.
255// WalkFunc will be invoked potentially many times in parallel, and must be threadsafe.
256func (f *Finder) FindMatching(rootPath string, filter WalkFunc) []string {
257 // set up some parameters
258 scanStart := time.Now()
259 var isRel bool
260 workingDir := f.cacheMetadata.Config.WorkingDirectory
261
262 isRel = !filepath.IsAbs(rootPath)
263 if isRel {
264 rootPath = filepath.Join(workingDir, rootPath)
265 }
266
267 rootPath = filepath.Clean(rootPath)
268
269 // ensure nothing else is using the Finder
270 f.verbosef("FindMatching waiting for finder to be idle\n")
271 f.lock()
272 defer f.unlock()
273
274 node := f.nodes.GetNode(rootPath, false)
275 if node == nil {
276 f.verbosef("No data for path %v ; apparently not included in cache params: %v\n",
277 rootPath, f.cacheMetadata.Config.CacheParams)
278 // path is not found; don't do a search
279 return []string{}
280 }
281
282 // search for matching files
283 f.verbosef("Finder finding %v using cache\n", rootPath)
284 results := f.findInCacheMultithreaded(node, filter, f.numSearchingThreads)
285
286 // format and return results
287 if isRel {
288 for i := 0; i < len(results); i++ {
289 results[i] = strings.Replace(results[i], workingDir+"/", "", 1)
290 }
291 }
292 sort.Strings(results)
293 f.verbosef("Found %v files under %v in %v using cache\n",
294 len(results), rootPath, time.Since(scanStart))
295 return results
296}
297
298// Shutdown saves the contents of the Finder to its database file
299func (f *Finder) Shutdown() {
300 f.verbosef("Shutting down\n")
301 if f.wasModified() {
302 err := f.dumpDb()
303 if err != nil {
304 f.verbosef("%v\n", err)
305 }
306 } else {
307 f.verbosef("Skipping dumping unmodified db\n")
308 }
309}
310
311// End of public api
312
313// joinCleanPaths is like filepath.Join but is faster because
314// joinCleanPaths doesn't have to support paths ending in "/" or containing ".."
315func joinCleanPaths(base string, leaf string) string {
316 if base == "" {
317 return leaf
318 }
319 if base == "/" {
320 return base + leaf
321 }
322 if leaf == "" {
323 return base
324 }
325 return base + "/" + leaf
326}
327
328func (f *Finder) verbosef(format string, args ...interface{}) {
329 f.logger.Output(2, fmt.Sprintf(format, args...))
330}
331
332// loadFromFilesystem populates the in-memory cache based on the contents of the filesystem
333func (f *Finder) loadFromFilesystem() {
334 f.threadPool = newThreadPool(f.numDbLoadingThreads)
335
336 err := f.startFromExternalCache()
337 if err != nil {
338 f.startWithoutExternalCache()
339 }
340
341 startTime := time.Now()
342 f.verbosef("Waiting for pending requests to complete\n")
343 f.threadPool.Wait()
344 f.verbosef("Is idle after %v\n", time.Now().Sub(startTime))
345 f.threadPool = nil
346}
347
348func (f *Finder) startFind(path string) {
349 if !filepath.IsAbs(path) {
350 path = filepath.Join(f.cacheMetadata.Config.WorkingDirectory, path)
351 }
352 node := f.nodes.GetNode(path, true)
353 f.statDirAsync(node)
354}
355
356func (f *Finder) lock() {
357 f.mutex.Lock()
358}
359
360func (f *Finder) unlock() {
361 f.mutex.Unlock()
362}
363
364// a statResponse is the relevant portion of the response from the filesystem to a Stat call
365type statResponse struct {
366 ModTime int64
367 Inode uint64
368 Device uint64
369}
370
371// a pathAndStats stores a path and its stats
372type pathAndStats struct {
373 statResponse
374
375 Path string
376}
377
378// a dirFullInfo stores all of the relevant information we know about a directory
379type dirFullInfo struct {
380 pathAndStats
381
382 FileNames []string
383}
384
385// a PersistedDirInfo is the information about a dir that we save to our cache on disk
386type PersistedDirInfo struct {
387 // These field names are short because they are repeated many times in the output json file
388 P string // path
389 T int64 // modification time
390 I uint64 // inode number
391 F []string // relevant filenames contained
392}
393
394// a PersistedDirs is the information that we persist for a group of dirs
395type PersistedDirs struct {
396 // the device on which each directory is stored
397 Device uint64
398 // the common root path to which all contained dirs are relative
399 Root string
400 // the directories themselves
401 Dirs []PersistedDirInfo
402}
403
404// a CacheEntry is the smallest unit that can be read and parsed from the cache (on disk) at a time
405type CacheEntry []PersistedDirs
406
407// a DirEntries lists the files and directories contained directly within a specific directory
408type DirEntries struct {
409 Path string
410
411 // elements of DirNames are just the dir names; they don't include any '/' character
412 DirNames []string
413 // elements of FileNames are just the file names; they don't include '/' character
414 FileNames []string
415}
416
417// a WalkFunc is the type that is passed into various Find functions for determining which
418// directories the caller wishes be walked. The WalkFunc is expected to decide which
419// directories to walk and which files to consider as matches to the original query.
420type WalkFunc func(DirEntries) (dirs []string, files []string)
421
422// a mapNode stores the relevant stats about a directory to be stored in a pathMap
423type mapNode struct {
424 statResponse
425 FileNames []string
426}
427
428// a pathMap implements the directory tree structure of nodes
429type pathMap struct {
430 mapNode
431
432 path string
433
434 children map[string]*pathMap
435
436 // number of descendent nodes, including self
437 approximateNumDescendents int
438}
439
440func newPathMap(path string) *pathMap {
441 result := &pathMap{path: path, children: make(map[string]*pathMap, 4),
442 approximateNumDescendents: 1}
443 return result
444}
445
446// GetNode returns the node at <path>
447func (m *pathMap) GetNode(path string, createIfNotFound bool) *pathMap {
448 if len(path) > 0 && path[0] == '/' {
449 path = path[1:]
450 }
451
452 node := m
453 for {
454 if path == "" {
455 return node
456 }
457
458 index := strings.Index(path, "/")
459 var firstComponent string
460 if index >= 0 {
461 firstComponent = path[:index]
462 path = path[index+1:]
463 } else {
464 firstComponent = path
465 path = ""
466 }
467
468 child, found := node.children[firstComponent]
469
470 if !found {
471 if createIfNotFound {
472 child = node.newChild(firstComponent)
473 } else {
474 return nil
475 }
476 }
477
478 node = child
479 }
480}
481
482func (m *pathMap) newChild(name string) (child *pathMap) {
483 path := joinCleanPaths(m.path, name)
484 newChild := newPathMap(path)
485 m.children[name] = newChild
486
487 return m.children[name]
488}
489
490func (m *pathMap) UpdateNumDescendents() int {
491 count := 1
492 for _, child := range m.children {
493 count += child.approximateNumDescendents
494 }
495 m.approximateNumDescendents = count
496 return count
497}
498
499func (m *pathMap) UpdateNumDescendentsRecursive() {
500 for _, child := range m.children {
501 child.UpdateNumDescendentsRecursive()
502 }
503 m.UpdateNumDescendents()
504}
505
506func (m *pathMap) MergeIn(other *pathMap) {
507 for key, theirs := range other.children {
508 ours, found := m.children[key]
509 if found {
510 ours.MergeIn(theirs)
511 } else {
512 m.children[key] = theirs
513 }
514 }
515 if other.ModTime != 0 {
516 m.mapNode = other.mapNode
517 }
518 m.UpdateNumDescendents()
519}
520
521func (m *pathMap) DumpAll() []dirFullInfo {
522 results := []dirFullInfo{}
523 m.dumpInto("", &results)
524 return results
525}
526
527func (m *pathMap) dumpInto(path string, results *[]dirFullInfo) {
528 *results = append(*results,
529 dirFullInfo{
530 pathAndStats{statResponse: m.statResponse, Path: path},
531 m.FileNames},
532 )
533 for key, child := range m.children {
534 childPath := joinCleanPaths(path, key)
535 if len(childPath) == 0 || childPath[0] != '/' {
536 childPath = "/" + childPath
537 }
538 child.dumpInto(childPath, results)
539 }
540}
541
542// a semaphore can be locked by up to <capacity> callers at once
543type semaphore struct {
544 pool chan bool
545}
546
547func newSemaphore(capacity int) *semaphore {
548 return &semaphore{pool: make(chan bool, capacity)}
549}
550
551func (l *semaphore) Lock() {
552 l.pool <- true
553}
554
555func (l *semaphore) Unlock() {
556 <-l.pool
557}
558
559// A threadPool runs goroutines and supports throttling and waiting.
560// Without throttling, Go may exhaust the maximum number of various resources, such as
561// threads or file descriptors, and crash the program.
562type threadPool struct {
563 receivedRequests sync.WaitGroup
564 activeRequests semaphore
565}
566
567func newThreadPool(maxNumConcurrentThreads int) *threadPool {
568 return &threadPool{
569 receivedRequests: sync.WaitGroup{},
570 activeRequests: *newSemaphore(maxNumConcurrentThreads),
571 }
572}
573
574// Run requests to run the given function in its own goroutine
575func (p *threadPool) Run(function func()) {
576 p.receivedRequests.Add(1)
577 // If Run() was called from within a goroutine spawned by this threadPool,
578 // then we may need to return from Run() before having capacity to actually
579 // run <function>.
580 //
581 // It's possible that the body of <function> contains a statement (such as a syscall)
582 // that will cause Go to pin it to a thread, or will contain a statement that uses
583 // another resource that is in short supply (such as a file descriptor), so we can't
584 // actually run <function> until we have capacity.
585 //
586 // However, the semaphore used for synchronization is implemented via a channel and
587 // shouldn't require a new thread for each access.
588 go func() {
589 p.activeRequests.Lock()
590 function()
591 p.activeRequests.Unlock()
592 p.receivedRequests.Done()
593 }()
594}
595
596// Wait waits until all goroutines are done, just like sync.WaitGroup's Wait
597func (p *threadPool) Wait() {
598 p.receivedRequests.Wait()
599}
600
601func (f *Finder) serializeCacheEntry(dirInfos []dirFullInfo) ([]byte, error) {
602 // group each dirFullInfo by its Device, to avoid having to repeat it in the output
603 dirsByDevice := map[uint64][]PersistedDirInfo{}
604 for _, entry := range dirInfos {
605 _, found := dirsByDevice[entry.Device]
606 if !found {
607 dirsByDevice[entry.Device] = []PersistedDirInfo{}
608 }
609 dirsByDevice[entry.Device] = append(dirsByDevice[entry.Device],
610 PersistedDirInfo{P: entry.Path, T: entry.ModTime, I: entry.Inode, F: entry.FileNames})
611 }
612
613 cacheEntry := CacheEntry{}
614
615 for device, infos := range dirsByDevice {
616 // find common prefix
617 prefix := ""
618 if len(infos) > 0 {
619 prefix = infos[0].P
620 }
621 for _, info := range infos {
622 for !strings.HasPrefix(info.P+"/", prefix+"/") {
623 prefix = filepath.Dir(prefix)
624 }
625 }
626 // remove common prefix
627 for i := range infos {
628 suffix := strings.Replace(infos[i].P, prefix, "", 1)
629 if len(suffix) > 0 && suffix[0] == '/' {
630 suffix = suffix[1:]
631 }
632 infos[i].P = suffix
633 }
634
635 // turn the map (keyed by device) into a list of structs with labeled fields
636 // this is to improve readability of the output
637 cacheEntry = append(cacheEntry, PersistedDirs{Device: device, Root: prefix, Dirs: infos})
638 }
639
640 // convert to json.
641 // it would save some space to use a different format than json for the db file,
642 // but the space and time savings are small, and json is easy for humans to read
643 bytes, err := json.Marshal(cacheEntry)
644 return bytes, err
645}
646
647func (f *Finder) parseCacheEntry(bytes []byte) ([]dirFullInfo, error) {
648 var cacheEntry CacheEntry
649 err := json.Unmarshal(bytes, &cacheEntry)
650 if err != nil {
651 return nil, err
652 }
653
654 // convert from a CacheEntry to a []dirFullInfo (by copying a few fields)
655 capacity := 0
656 for _, element := range cacheEntry {
657 capacity += len(element.Dirs)
658 }
659 nodes := make([]dirFullInfo, capacity)
660 count := 0
661 for _, element := range cacheEntry {
662 for _, dir := range element.Dirs {
663 path := joinCleanPaths(element.Root, dir.P)
664
665 nodes[count] = dirFullInfo{
666 pathAndStats: pathAndStats{
667 statResponse: statResponse{
668 ModTime: dir.T, Inode: dir.I, Device: element.Device,
669 },
670 Path: path},
671 FileNames: dir.F}
672 count++
673 }
674 }
675 return nodes, nil
676}
677
678// We use the following separator byte to distinguish individually parseable blocks of json
679// because we know this separator won't appear in the json that we're parsing.
680//
681// The newline byte can only appear in a UTF-8 stream if the newline character appears, because:
682// - The newline character is encoded as "0000 1010" in binary ("0a" in hex)
683// - UTF-8 dictates that bytes beginning with a "0" bit are never emitted as part of a multibyte
684// character.
685//
686// We know that the newline character will never appear in our json string, because:
687// - If a newline character appears as part of a data string, then json encoding will
688// emit two characters instead: '\' and 'n'.
689// - The json encoder that we use doesn't emit the optional newlines between any of its
690// other outputs.
691const lineSeparator = byte('\n')
692
693func (f *Finder) readLine(reader *bufio.Reader) ([]byte, error) {
694 return reader.ReadBytes(lineSeparator)
695}
696
697// validateCacheHeader reads the cache header from cacheReader and tells whether the cache is compatible with this Finder
698func (f *Finder) validateCacheHeader(cacheReader *bufio.Reader) bool {
699 cacheVersionBytes, err := f.readLine(cacheReader)
700 if err != nil {
701 f.verbosef("Failed to read database header; database is invalid\n")
702 return false
703 }
704 if len(cacheVersionBytes) > 0 && cacheVersionBytes[len(cacheVersionBytes)-1] == lineSeparator {
705 cacheVersionBytes = cacheVersionBytes[:len(cacheVersionBytes)-1]
706 }
707 cacheVersionString := string(cacheVersionBytes)
708 currentVersion := f.cacheMetadata.Version
709 if cacheVersionString != currentVersion {
710 f.verbosef("Version changed from %q to %q, database is not applicable\n", cacheVersionString, currentVersion)
711 return false
712 }
713
714 cacheParamBytes, err := f.readLine(cacheReader)
715 if err != nil {
716 f.verbosef("Failed to read database search params; database is invalid\n")
717 return false
718 }
719
720 if len(cacheParamBytes) > 0 && cacheParamBytes[len(cacheParamBytes)-1] == lineSeparator {
721 cacheParamBytes = cacheParamBytes[:len(cacheParamBytes)-1]
722 }
723
724 currentParamBytes, err := f.cacheMetadata.Config.Dump()
725 if err != nil {
726 panic("Finder failed to serialize its parameters")
727 }
728 cacheParamString := string(cacheParamBytes)
729 currentParamString := string(currentParamBytes)
730 if cacheParamString != currentParamString {
731 f.verbosef("Params changed from %q to %q, database is not applicable\n", cacheParamString, currentParamString)
732 return false
733 }
734 return true
735}
736
737// loadBytes compares the cache info in <data> to the state of the filesystem
738// loadBytes returns a map representing <data> and also a slice of dirs that need to be re-walked
739func (f *Finder) loadBytes(id int, data []byte) (m *pathMap, dirsToWalk []string, err error) {
740
741 helperStartTime := time.Now()
742
743 cachedNodes, err := f.parseCacheEntry(data)
744 if err != nil {
745 return nil, nil, fmt.Errorf("Failed to parse block %v: %v\n", id, err.Error())
746 }
747
748 unmarshalDate := time.Now()
749 f.verbosef("Unmarshaled %v objects for %v in %v\n",
750 len(cachedNodes), id, unmarshalDate.Sub(helperStartTime))
751
752 tempMap := newPathMap("/")
753 stats := make([]statResponse, len(cachedNodes))
754
755 for i, node := range cachedNodes {
756 // check the file system for an updated timestamp
757 stats[i] = f.statDirSync(node.Path)
758 }
759
760 dirsToWalk = []string{}
761 for i, cachedNode := range cachedNodes {
762 updated := stats[i]
763 // save the cached value
764 container := tempMap.GetNode(cachedNode.Path, true)
765 container.mapNode = mapNode{statResponse: updated}
766
767 // if the metadata changed and the directory still exists, then
768 // make a note to walk it later
769 if !f.isInfoUpToDate(cachedNode.statResponse, updated) && updated.ModTime != 0 {
770 f.setModified()
771 // make a note that the directory needs to be walked
772 dirsToWalk = append(dirsToWalk, cachedNode.Path)
773 } else {
774 container.mapNode.FileNames = cachedNode.FileNames
775 }
776 }
777 // count the number of nodes to improve our understanding of the shape of the tree,
778 // thereby improving parallelism of subsequent searches
779 tempMap.UpdateNumDescendentsRecursive()
780
781 f.verbosef("Statted inodes of block %v in %v\n", id, time.Now().Sub(unmarshalDate))
782 return tempMap, dirsToWalk, nil
783}
784
785// startFromExternalCache loads the cache database from disk
786// startFromExternalCache waits to return until the load of the cache db is complete, but
787// startFromExternalCache does not wait for all every listDir() or statDir() request to complete
788func (f *Finder) startFromExternalCache() (err error) {
789 startTime := time.Now()
790 dbPath := f.DbPath
791
792 // open cache file and validate its header
793 reader, err := f.filesystem.Open(dbPath)
794 if err != nil {
795 return errors.New("No data to load from database\n")
796 }
797 bufferedReader := bufio.NewReader(reader)
798 if !f.validateCacheHeader(bufferedReader) {
799 return errors.New("Cache header does not match")
800 }
801 f.verbosef("Database header matches, will attempt to use database %v\n", f.DbPath)
802
803 // read the file and spawn threads to process it
804 nodesToWalk := [][]*pathMap{}
805 mainTree := newPathMap("/")
806
807 // read the blocks and stream them into <blockChannel>
808 type dataBlock struct {
809 id int
810 err error
811 data []byte
812 }
813 blockChannel := make(chan dataBlock, f.numDbLoadingThreads)
814 readBlocks := func() {
815 index := 0
816 for {
817 // It takes some time to unmarshal the input from json, so we want
818 // to unmarshal it in parallel. In order to find valid places to
819 // break the input, we scan for the line separators that we inserted
820 // (for this purpose) when we dumped the database.
821 data, err := f.readLine(bufferedReader)
822 var response dataBlock
823 done := false
824 if err != nil && err != io.EOF {
825 response = dataBlock{id: index, err: err, data: nil}
826 done = true
827 } else {
828 done = (err == io.EOF)
829 response = dataBlock{id: index, err: nil, data: data}
830 }
831 blockChannel <- response
832 index++
833 duration := time.Since(startTime)
834 f.verbosef("Read block %v after %v\n", index, duration)
835 if done {
836 f.verbosef("Read %v blocks in %v\n", index, duration)
837 close(blockChannel)
838 return
839 }
840 }
841 }
842 go readBlocks()
843
844 // Read from <blockChannel> and stream the responses into <resultChannel>.
845 type workResponse struct {
846 id int
847 err error
848 tree *pathMap
849 updatedDirs []string
850 }
851 resultChannel := make(chan workResponse)
852 processBlocks := func() {
853 numProcessed := 0
854 threadPool := newThreadPool(f.numDbLoadingThreads)
855 for {
856 // get a block to process
857 block, received := <-blockChannel
858 if !received {
859 break
860 }
861
862 if block.err != nil {
863 resultChannel <- workResponse{err: block.err}
864 break
865 }
866 numProcessed++
867 // wait until there is CPU available to process it
868 threadPool.Run(
869 func() {
870 processStartTime := time.Now()
871 f.verbosef("Starting to process block %v after %v\n",
872 block.id, processStartTime.Sub(startTime))
873 tempMap, updatedDirs, err := f.loadBytes(block.id, block.data)
874 var response workResponse
875 if err != nil {
876 f.verbosef(
877 "Block %v failed to parse with error %v\n",
878 block.id, err)
879 response = workResponse{err: err}
880 } else {
881 response = workResponse{
882 id: block.id,
883 err: nil,
884 tree: tempMap,
885 updatedDirs: updatedDirs,
886 }
887 }
888 f.verbosef("Processed block %v in %v\n",
889 block.id, time.Since(processStartTime),
890 )
891 resultChannel <- response
892 },
893 )
894 }
895 threadPool.Wait()
896 f.verbosef("Finished processing %v blocks in %v\n",
897 numProcessed, time.Since(startTime))
898 close(resultChannel)
899 }
900 go processBlocks()
901
902 // Read from <resultChannel> and use the results
903 combineResults := func() (err error) {
904 for {
905 result, received := <-resultChannel
906 if !received {
907 break
908 }
909 if err != nil {
910 // In case of an error, wait for work to complete before
911 // returning the error. This ensures that any subsequent
912 // work doesn't need to compete for resources (and possibly
913 // fail due to, for example, a filesystem limit on the number of
914 // concurrently open files) with past work.
915 continue
916 }
917 if result.err != nil {
918 err = result.err
919 continue
920 }
921 // update main tree
922 mainTree.MergeIn(result.tree)
923 // record any new directories that we will need to Stat()
924 updatedNodes := make([]*pathMap, len(result.updatedDirs))
925 for j, dir := range result.updatedDirs {
926 node := mainTree.GetNode(dir, false)
927 updatedNodes[j] = node
928 }
929 nodesToWalk = append(nodesToWalk, updatedNodes)
930 }
931 return err
932 }
933 err = combineResults()
934 if err != nil {
935 return err
936 }
937
938 f.nodes = *mainTree
939
940 // after having loaded the entire db and therefore created entries for
941 // the directories we know of, now it's safe to start calling ReadDir on
942 // any updated directories
943 for i := range nodesToWalk {
944 f.listDirsAsync(nodesToWalk[i])
945 }
946 f.verbosef("Loaded db and statted its contents in %v\n", time.Since(startTime))
947 return err
948}
949
950// startWithoutExternalCache starts scanning the filesystem according to the cache config
951// startWithoutExternalCache should be called if startFromExternalCache is not applicable
952func (f *Finder) startWithoutExternalCache() {
953 configDirs := f.cacheMetadata.Config.RootDirs
954
955 // clean paths
956 candidates := make([]string, len(configDirs))
957 for i, dir := range configDirs {
958 candidates[i] = filepath.Clean(dir)
959 }
960 // remove duplicates
961 dirsToScan := make([]string, 0, len(configDirs))
962 for _, candidate := range candidates {
963 include := true
964 for _, included := range dirsToScan {
965 if included == "/" || strings.HasPrefix(candidate+"/", included+"/") {
966 include = false
967 break
968 }
969 }
970 if include {
971 dirsToScan = append(dirsToScan, candidate)
972 }
973 }
974
975 // start searching finally
976 for _, path := range dirsToScan {
977 f.verbosef("Starting find of %v\n", path)
978 f.startFind(path)
979 }
980}
981
982// isInfoUpToDate tells whether <new> can confirm that results computed at <old> are still valid
983func (f *Finder) isInfoUpToDate(old statResponse, new statResponse) (equal bool) {
984 if old.Inode != new.Inode {
985 return false
986 }
987 if old.ModTime != new.ModTime {
988 return false
989 }
990 if old.Device != new.Device {
991 return false
992 }
993 return true
994}
995
996func (f *Finder) wasModified() bool {
997 return atomic.LoadInt32(&f.modifiedFlag) > 0
998}
999
1000func (f *Finder) setModified() {
1001 var newVal int32
1002 newVal = 1
1003 atomic.StoreInt32(&f.modifiedFlag, newVal)
1004}
1005
1006// sortedDirEntries exports directory entries to facilitate dumping them to the external cache
1007func (f *Finder) sortedDirEntries() []dirFullInfo {
1008 startTime := time.Now()
1009 nodes := make([]dirFullInfo, 0)
1010 for _, node := range f.nodes.DumpAll() {
1011 if node.ModTime != 0 {
1012 nodes = append(nodes, node)
1013 }
1014 }
1015 discoveryDate := time.Now()
1016 f.verbosef("Generated %v cache entries in %v\n", len(nodes), discoveryDate.Sub(startTime))
1017 less := func(i int, j int) bool {
1018 return nodes[i].Path < nodes[j].Path
1019 }
1020 sort.Slice(nodes, less)
1021 sortDate := time.Now()
1022 f.verbosef("Sorted %v cache entries in %v\n", len(nodes), sortDate.Sub(discoveryDate))
1023
1024 return nodes
1025}
1026
1027// serializeDb converts the cache database into a form to save to disk
1028func (f *Finder) serializeDb() ([]byte, error) {
1029 // sort dir entries
1030 var entryList = f.sortedDirEntries()
1031
1032 // Generate an output file that can be conveniently loaded using the same number of threads
1033 // as were used in this execution (because presumably that will be the number of threads
1034 // used in the next execution too)
1035
1036 // generate header
1037 header := []byte{}
1038 header = append(header, []byte(f.cacheMetadata.Version)...)
1039 header = append(header, lineSeparator)
1040 configDump, err := f.cacheMetadata.Config.Dump()
1041 if err != nil {
1042 return nil, err
1043 }
1044 header = append(header, configDump...)
1045
1046 // serialize individual blocks in parallel
1047 numBlocks := f.numDbLoadingThreads
1048 if numBlocks > len(entryList) {
1049 numBlocks = len(entryList)
1050 }
1051 blocks := make([][]byte, 1+numBlocks)
1052 blocks[0] = header
1053 blockMin := 0
1054 wg := sync.WaitGroup{}
1055 var errLock sync.Mutex
1056
1057 for i := 1; i <= numBlocks; i++ {
1058 // identify next block
1059 blockMax := len(entryList) * i / numBlocks
1060 block := entryList[blockMin:blockMax]
1061
1062 // process block
1063 wg.Add(1)
1064 go func(index int, block []dirFullInfo) {
1065 byteBlock, subErr := f.serializeCacheEntry(block)
1066 f.verbosef("Serialized block %v into %v bytes\n", index, len(byteBlock))
1067 if subErr != nil {
1068 f.verbosef("%v\n", subErr.Error())
1069 errLock.Lock()
1070 err = subErr
1071 errLock.Unlock()
1072 } else {
1073 blocks[index] = byteBlock
1074 }
1075 wg.Done()
1076 }(i, block)
1077
1078 blockMin = blockMax
1079 }
1080
1081 wg.Wait()
1082
1083 if err != nil {
1084 return nil, err
1085 }
1086
1087 content := bytes.Join(blocks, []byte{lineSeparator})
1088
1089 return content, nil
1090}
1091
1092// dumpDb saves the cache database to disk
1093func (f *Finder) dumpDb() error {
1094 startTime := time.Now()
1095 f.verbosef("Dumping db\n")
1096
1097 tempPath := f.DbPath + ".tmp"
1098
1099 bytes, err := f.serializeDb()
1100 if err != nil {
1101 return err
1102 }
1103 serializeDate := time.Now()
1104 f.verbosef("Serialized db in %v\n", serializeDate.Sub(startTime))
1105 // dump file and atomically move
1106 err = f.filesystem.WriteFile(tempPath, bytes, 0777)
1107 if err != nil {
1108 return err
1109 }
1110 err = f.filesystem.Rename(tempPath, f.DbPath)
1111 if err != nil {
1112 return err
1113 }
1114
1115 f.verbosef("Wrote db in %v\n", time.Now().Sub(serializeDate))
1116 return nil
1117}
1118
1119func (f *Finder) statDirAsync(dir *pathMap) {
1120 node := dir
1121 path := dir.path
1122 f.threadPool.Run(
1123 func() {
1124 updatedStats := f.statDirSync(path)
1125
1126 if !f.isInfoUpToDate(node.statResponse, updatedStats) {
1127 node.mapNode = mapNode{
1128 statResponse: updatedStats,
1129 FileNames: []string{},
1130 }
1131 f.setModified()
1132 if node.statResponse.ModTime != 0 {
1133 // modification time was updated, so re-scan for
1134 // child directories
1135 f.listDirAsync(dir)
1136 }
1137 }
1138 },
1139 )
1140}
1141
1142func (f *Finder) statDirSync(path string) statResponse {
1143
1144 fileInfo, err := f.filesystem.Lstat(path)
1145
1146 var stats statResponse
1147 if err != nil {
1148 // in case of a failure to stat the directory, treat the directory as missing (modTime = 0)
1149 return stats
1150 }
1151 modTime := fileInfo.ModTime()
1152 stats = statResponse{}
1153 inode, err := f.filesystem.InodeNumber(fileInfo)
1154 if err != nil {
1155 panic(fmt.Sprintf("Could not get inode number of %v: %v\n", path, err.Error()))
1156 }
1157 stats.Inode = inode
1158 device, err := f.filesystem.DeviceNumber(fileInfo)
1159 if err != nil {
1160 panic(fmt.Sprintf("Could not get device number of %v: %v\n", path, err.Error()))
1161 }
1162 stats.Device = device
1163 permissionsChangeTime, err := f.filesystem.PermTime(fileInfo)
1164
1165 if err != nil {
1166 panic(fmt.Sprintf("Could not get permissions modification time (CTime) of %v: %v\n", path, err.Error()))
1167 }
1168 // We're only interested in knowing whether anything about the directory
1169 // has changed since last check, so we use the latest of the two
1170 // modification times (content modification (mtime) and
1171 // permission modification (ctime))
1172 if permissionsChangeTime.After(modTime) {
1173 modTime = permissionsChangeTime
1174 }
1175 stats.ModTime = modTime.UnixNano()
1176
1177 return stats
1178}
1179
1180// pruneCacheCandidates removes the items that we don't want to include in our persistent cache
1181func (f *Finder) pruneCacheCandidates(items *DirEntries) {
1182
1183 for _, fileName := range items.FileNames {
1184 for _, abortedName := range f.cacheMetadata.Config.PruneFiles {
1185 if fileName == abortedName {
1186 items.FileNames = []string{}
1187 items.DirNames = []string{}
1188 return
1189 }
1190 }
1191 }
1192
1193 // remove any files that aren't the ones we want to include
1194 writeIndex := 0
1195 for _, fileName := range items.FileNames {
1196 // include only these files
1197 for _, includedName := range f.cacheMetadata.Config.IncludeFiles {
1198 if fileName == includedName {
1199 items.FileNames[writeIndex] = fileName
1200 writeIndex++
1201 break
1202 }
1203 }
1204 }
1205 // resize
1206 items.FileNames = items.FileNames[:writeIndex]
1207
1208 writeIndex = 0
1209 for _, dirName := range items.DirNames {
1210 items.DirNames[writeIndex] = dirName
1211 // ignore other dirs that are known to not be inputs to the build process
1212 include := true
1213 for _, excludedName := range f.cacheMetadata.Config.ExcludeDirs {
1214 if dirName == excludedName {
1215 // don't include
1216 include = false
1217 break
1218 }
1219 }
1220 if include {
1221 writeIndex++
1222 }
1223 }
1224 // resize
1225 items.DirNames = items.DirNames[:writeIndex]
1226}
1227
1228func (f *Finder) listDirsAsync(nodes []*pathMap) {
1229 f.threadPool.Run(
1230 func() {
1231 for i := range nodes {
1232 f.listDirSync(nodes[i])
1233 }
1234 },
1235 )
1236}
1237
1238func (f *Finder) listDirAsync(node *pathMap) {
1239 f.threadPool.Run(
1240 func() {
1241 f.listDirSync(node)
1242 },
1243 )
1244}
1245
1246func (f *Finder) listDirSync(dir *pathMap) {
1247 path := dir.path
1248 children, err := f.filesystem.ReadDir(path)
1249
1250 if err != nil {
1251 // if listing the contents of the directory fails (presumably due to
1252 // permission denied), then treat the directory as empty
1253 children = []os.FileInfo{}
1254 }
1255
1256 var subdirs []string
1257 var subfiles []string
1258
1259 for _, child := range children {
1260 linkBits := child.Mode() & os.ModeSymlink
1261 isLink := linkBits != 0
1262 if child.IsDir() {
1263 if !isLink {
1264 // Skip symlink dirs.
1265 // We don't have to support symlink dirs because
1266 // that would cause duplicates.
1267 subdirs = append(subdirs, child.Name())
1268 }
1269 } else {
1270 // We do have to support symlink files because the link name might be
1271 // different than the target name
1272 // (for example, Android.bp -> build/soong/root.bp)
1273 subfiles = append(subfiles, child.Name())
1274 }
1275
1276 }
1277 parentNode := dir
1278
1279 entry := &DirEntries{Path: path, DirNames: subdirs, FileNames: subfiles}
1280 f.pruneCacheCandidates(entry)
1281
1282 // create a pathMap node for each relevant subdirectory
1283 relevantChildren := map[string]*pathMap{}
1284 for _, subdirName := range entry.DirNames {
1285 childNode, found := parentNode.children[subdirName]
1286 // if we already knew of this directory, then we already have a request pending to Stat it
1287 // if we didn't already know of this directory, then we must Stat it now
1288 if !found {
1289 childNode = parentNode.newChild(subdirName)
1290 f.statDirAsync(childNode)
1291 }
1292 relevantChildren[subdirName] = childNode
1293 }
1294 // Note that in rare cases, it's possible that we're reducing the set of
1295 // children via this statement, if these are all true:
1296 // 1. we previously had a cache that knew about subdirectories of parentNode
1297 // 2. the user created a prune-file (described in pruneCacheCandidates)
1298 // inside <parentNode>, which specifies that the contents of parentNode
1299 // are to be ignored.
1300 // The fact that it's possible to remove children here means that *pathMap structs
1301 // must not be looked up from f.nodes by filepath (and instead must be accessed by
1302 // direct pointer) until after every listDirSync completes
1303 parentNode.FileNames = entry.FileNames
1304 parentNode.children = relevantChildren
1305
1306}
1307
1308// listMatches takes a node and a function that specifies which subdirectories and
1309// files to include, and listMatches returns the matches
1310func (f *Finder) listMatches(node *pathMap,
1311 filter WalkFunc) (subDirs []*pathMap, filePaths []string) {
1312 entries := DirEntries{
1313 FileNames: node.FileNames,
1314 }
1315 entries.DirNames = make([]string, 0, len(node.children))
1316 for childName := range node.children {
1317 entries.DirNames = append(entries.DirNames, childName)
1318 }
1319
1320 dirNames, fileNames := filter(entries)
1321
1322 subDirs = []*pathMap{}
1323 filePaths = make([]string, 0, len(fileNames))
1324 for _, fileName := range fileNames {
1325 filePaths = append(filePaths, joinCleanPaths(node.path, fileName))
1326 }
1327 subDirs = make([]*pathMap, 0, len(dirNames))
1328 for _, childName := range dirNames {
1329 child, ok := node.children[childName]
1330 if ok {
1331 subDirs = append(subDirs, child)
1332 }
1333 }
1334
1335 return subDirs, filePaths
1336}
1337
1338// findInCacheMultithreaded spawns potentially multiple goroutines with which to search the cache.
1339func (f *Finder) findInCacheMultithreaded(node *pathMap, filter WalkFunc,
1340 approxNumThreads int) []string {
1341
1342 if approxNumThreads < 2 {
1343 // Done spawning threads; process remaining directories
1344 return f.findInCacheSinglethreaded(node, filter)
1345 }
1346
1347 totalWork := 0
1348 for _, child := range node.children {
1349 totalWork += child.approximateNumDescendents
1350 }
1351 childrenResults := make(chan []string, len(node.children))
1352
1353 subDirs, filePaths := f.listMatches(node, filter)
1354
1355 // process child directories
1356 for _, child := range subDirs {
1357 numChildThreads := approxNumThreads * child.approximateNumDescendents / totalWork
1358 childProcessor := func(child *pathMap) {
1359 childResults := f.findInCacheMultithreaded(child, filter, numChildThreads)
1360 childrenResults <- childResults
1361 }
1362 // If we're allowed to use more than 1 thread to process this directory,
1363 // then instead we use 1 thread for each subdirectory.
1364 // It would be strange to spawn threads for only some subdirectories.
1365 go childProcessor(child)
1366 }
1367
1368 // collect results
1369 for i := 0; i < len(subDirs); i++ {
1370 childResults := <-childrenResults
1371 filePaths = append(filePaths, childResults...)
1372 }
1373 close(childrenResults)
1374
1375 return filePaths
1376}
1377
1378// findInCacheSinglethreaded synchronously searches the cache for all matching file paths
1379// note findInCacheSinglethreaded runs 2X to 4X as fast by being iterative rather than recursive
1380func (f *Finder) findInCacheSinglethreaded(node *pathMap, filter WalkFunc) []string {
1381 if node == nil {
1382 return []string{}
1383 }
1384
1385 nodes := []*pathMap{node}
1386 matches := []string{}
1387
1388 for len(nodes) > 0 {
1389 currentNode := nodes[0]
1390 nodes = nodes[1:]
1391
1392 subDirs, filePaths := f.listMatches(currentNode, filter)
1393
1394 nodes = append(nodes, subDirs...)
1395
1396 matches = append(matches, filePaths...)
1397 }
1398 return matches
1399}
diff --git a/finder/finder_test.go b/finder/finder_test.go
deleted file mode 100644
index 60e5eb28..00000000
--- a/finder/finder_test.go
+++ /dev/null
@@ -1,1573 +0,0 @@
1// Copyright 2017 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package finder
16
17import (
18 "fmt"
19 "log"
20 "path/filepath"
21 "reflect"
22 "testing"
23
24 "sort"
25
26 "io/ioutil"
27
28 "android/soong/fs"
29 "runtime/debug"
30 "time"
31)
32
33// some utils for tests to use
34func newFs() *fs.MockFs {
35 return fs.NewMockFs(map[string][]byte{})
36}
37
38func newFinder(t *testing.T, filesystem *fs.MockFs, cacheParams CacheParams) *Finder {
39 cachePath := "/finder/finder-db"
40 cacheDir := filepath.Dir(cachePath)
41 filesystem.MkDirs(cacheDir)
42 if cacheParams.WorkingDirectory == "" {
43 cacheParams.WorkingDirectory = "/cwd"
44 }
45
46 logger := log.New(ioutil.Discard, "", 0)
47 finder := New(cacheParams, filesystem, logger, cachePath)
48 return finder
49}
50
51func finderWithSameParams(t *testing.T, original *Finder) *Finder {
52 return New(
53 original.cacheMetadata.Config.CacheParams,
54 original.filesystem,
55 original.logger,
56 original.DbPath)
57}
58
59func write(t *testing.T, path string, content string, filesystem *fs.MockFs) {
60 parent := filepath.Dir(path)
61 filesystem.MkDirs(parent)
62 err := filesystem.WriteFile(path, []byte(content), 0777)
63 if err != nil {
64 t.Fatal(err.Error())
65 }
66}
67
68func create(t *testing.T, path string, filesystem *fs.MockFs) {
69 write(t, path, "hi", filesystem)
70}
71
72func delete(t *testing.T, path string, filesystem *fs.MockFs) {
73 err := filesystem.Remove(path)
74 if err != nil {
75 t.Fatal(err.Error())
76 }
77}
78
79func removeAll(t *testing.T, path string, filesystem *fs.MockFs) {
80 err := filesystem.RemoveAll(path)
81 if err != nil {
82 t.Fatal(err.Error())
83 }
84}
85
86func move(t *testing.T, oldPath string, newPath string, filesystem *fs.MockFs) {
87 err := filesystem.Rename(oldPath, newPath)
88 if err != nil {
89 t.Fatal(err.Error())
90 }
91}
92
93func link(t *testing.T, newPath string, oldPath string, filesystem *fs.MockFs) {
94 parentPath := filepath.Dir(newPath)
95 err := filesystem.MkDirs(parentPath)
96 if err != nil {
97 t.Fatal(err.Error())
98 }
99 err = filesystem.Symlink(oldPath, newPath)
100 if err != nil {
101 t.Fatal(err.Error())
102 }
103}
104func read(t *testing.T, path string, filesystem *fs.MockFs) string {
105 reader, err := filesystem.Open(path)
106 if err != nil {
107 t.Fatalf(err.Error())
108 }
109 bytes, err := ioutil.ReadAll(reader)
110 if err != nil {
111 t.Fatal(err.Error())
112 }
113 return string(bytes)
114}
115func modTime(t *testing.T, path string, filesystem *fs.MockFs) time.Time {
116 stats, err := filesystem.Lstat(path)
117 if err != nil {
118 t.Fatal(err.Error())
119 }
120 return stats.ModTime()
121}
122func setReadable(t *testing.T, path string, readable bool, filesystem *fs.MockFs) {
123 err := filesystem.SetReadable(path, readable)
124 if err != nil {
125 t.Fatal(err.Error())
126 }
127}
128func fatal(t *testing.T, message string) {
129 t.Error(message)
130 debug.PrintStack()
131 t.FailNow()
132}
133func assertSameResponse(t *testing.T, actual []string, expected []string) {
134 sort.Strings(actual)
135 sort.Strings(expected)
136 if !reflect.DeepEqual(actual, expected) {
137 fatal(
138 t,
139 fmt.Sprintf(
140 "Expected Finder to return these %v paths:\n %v,\ninstead returned these %v paths: %v\n",
141 len(expected), expected, len(actual), actual),
142 )
143 }
144}
145
146func assertSameStatCalls(t *testing.T, actual []string, expected []string) {
147 sort.Strings(actual)
148 sort.Strings(expected)
149
150 if !reflect.DeepEqual(actual, expected) {
151 fatal(
152 t,
153 fmt.Sprintf(
154 "Finder made incorrect Stat calls.\n"+
155 "Actual:\n"+
156 "%v\n"+
157 "Expected:\n"+
158 "%v\n"+
159 "\n",
160 actual, expected),
161 )
162 }
163}
164func assertSameReadDirCalls(t *testing.T, actual []string, expected []string) {
165 sort.Strings(actual)
166 sort.Strings(expected)
167
168 if !reflect.DeepEqual(actual, expected) {
169 fatal(
170 t,
171 fmt.Sprintf(
172 "Finder made incorrect ReadDir calls.\n"+
173 "Actual:\n"+
174 "%v\n"+
175 "Expected:\n"+
176 "%v\n"+
177 "\n",
178 actual, expected),
179 )
180 }
181}
182
183// runSimpleTests creates a few files, searches for findme.txt, and checks for the expected matches
184func runSimpleTest(t *testing.T, existentPaths []string, expectedMatches []string) {
185 filesystem := newFs()
186 root := "/tmp"
187 filesystem.MkDirs(root)
188 for _, path := range existentPaths {
189 create(t, filepath.Join(root, path), filesystem)
190 }
191
192 finder := newFinder(t,
193 filesystem,
194 CacheParams{
195 "/cwd",
196 []string{root},
197 nil,
198 nil,
199 []string{"findme.txt", "skipme.txt"},
200 },
201 )
202 defer finder.Shutdown()
203
204 foundPaths := finder.FindNamedAt(root, "findme.txt")
205 absoluteMatches := []string{}
206 for i := range expectedMatches {
207 absoluteMatches = append(absoluteMatches, filepath.Join(root, expectedMatches[i]))
208 }
209 assertSameResponse(t, foundPaths, absoluteMatches)
210}
211
212// end of utils, start of individual tests
213
214func TestSingleFile(t *testing.T) {
215 runSimpleTest(t,
216 []string{"findme.txt"},
217 []string{"findme.txt"},
218 )
219}
220
221func TestIncludeFiles(t *testing.T) {
222 runSimpleTest(t,
223 []string{"findme.txt", "skipme.txt"},
224 []string{"findme.txt"},
225 )
226}
227
228func TestNestedDirectories(t *testing.T) {
229 runSimpleTest(t,
230 []string{"findme.txt", "skipme.txt", "subdir/findme.txt", "subdir/skipme.txt"},
231 []string{"findme.txt", "subdir/findme.txt"},
232 )
233}
234
235func TestEmptyDirectory(t *testing.T) {
236 runSimpleTest(t,
237 []string{},
238 []string{},
239 )
240}
241
242func TestEmptyPath(t *testing.T) {
243 filesystem := newFs()
244 root := "/tmp"
245 create(t, filepath.Join(root, "findme.txt"), filesystem)
246
247 finder := newFinder(
248 t,
249 filesystem,
250 CacheParams{
251 RootDirs: []string{root},
252 IncludeFiles: []string{"findme.txt", "skipme.txt"},
253 },
254 )
255 defer finder.Shutdown()
256
257 foundPaths := finder.FindNamedAt("", "findme.txt")
258
259 assertSameResponse(t, foundPaths, []string{})
260}
261
262func TestFilesystemRoot(t *testing.T) {
263 filesystem := newFs()
264 root := "/"
265 createdPath := "/findme.txt"
266 create(t, createdPath, filesystem)
267
268 finder := newFinder(
269 t,
270 filesystem,
271 CacheParams{
272 RootDirs: []string{root},
273 IncludeFiles: []string{"findme.txt", "skipme.txt"},
274 },
275 )
276 defer finder.Shutdown()
277
278 foundPaths := finder.FindNamedAt(root, "findme.txt")
279
280 assertSameResponse(t, foundPaths, []string{createdPath})
281}
282
283func TestNonexistentPath(t *testing.T) {
284 filesystem := newFs()
285 create(t, "/tmp/findme.txt", filesystem)
286
287 finder := newFinder(
288 t,
289 filesystem,
290 CacheParams{
291 RootDirs: []string{"/tmp/IDontExist"},
292 IncludeFiles: []string{"findme.txt", "skipme.txt"},
293 },
294 )
295 defer finder.Shutdown()
296
297 foundPaths := finder.FindNamedAt("/tmp/IAlsoDontExist", "findme.txt")
298
299 assertSameResponse(t, foundPaths, []string{})
300}
301
302func TestExcludeDirs(t *testing.T) {
303 filesystem := newFs()
304 create(t, "/tmp/exclude/findme.txt", filesystem)
305 create(t, "/tmp/exclude/subdir/findme.txt", filesystem)
306 create(t, "/tmp/subdir/exclude/findme.txt", filesystem)
307 create(t, "/tmp/subdir/subdir/findme.txt", filesystem)
308 create(t, "/tmp/subdir/findme.txt", filesystem)
309 create(t, "/tmp/findme.txt", filesystem)
310
311 finder := newFinder(
312 t,
313 filesystem,
314 CacheParams{
315 RootDirs: []string{"/tmp"},
316 ExcludeDirs: []string{"exclude"},
317 IncludeFiles: []string{"findme.txt", "skipme.txt"},
318 },
319 )
320 defer finder.Shutdown()
321
322 foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
323
324 assertSameResponse(t, foundPaths,
325 []string{"/tmp/findme.txt",
326 "/tmp/subdir/findme.txt",
327 "/tmp/subdir/subdir/findme.txt"})
328}
329
330func TestPruneFiles(t *testing.T) {
331 filesystem := newFs()
332 create(t, "/tmp/out/findme.txt", filesystem)
333 create(t, "/tmp/out/.ignore-out-dir", filesystem)
334 create(t, "/tmp/out/child/findme.txt", filesystem)
335
336 create(t, "/tmp/out2/.ignore-out-dir", filesystem)
337 create(t, "/tmp/out2/sub/findme.txt", filesystem)
338
339 create(t, "/tmp/findme.txt", filesystem)
340 create(t, "/tmp/include/findme.txt", filesystem)
341
342 finder := newFinder(
343 t,
344 filesystem,
345 CacheParams{
346 RootDirs: []string{"/tmp"},
347 PruneFiles: []string{".ignore-out-dir"},
348 IncludeFiles: []string{"findme.txt"},
349 },
350 )
351 defer finder.Shutdown()
352
353 foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
354
355 assertSameResponse(t, foundPaths,
356 []string{"/tmp/findme.txt",
357 "/tmp/include/findme.txt"})
358}
359
360func TestRootDir(t *testing.T) {
361 filesystem := newFs()
362 create(t, "/tmp/a/findme.txt", filesystem)
363 create(t, "/tmp/a/subdir/findme.txt", filesystem)
364 create(t, "/tmp/b/findme.txt", filesystem)
365 create(t, "/tmp/b/subdir/findme.txt", filesystem)
366
367 finder := newFinder(
368 t,
369 filesystem,
370 CacheParams{
371 RootDirs: []string{"/tmp/a"},
372 IncludeFiles: []string{"findme.txt"},
373 },
374 )
375 defer finder.Shutdown()
376
377 foundPaths := finder.FindNamedAt("/tmp/a", "findme.txt")
378
379 assertSameResponse(t, foundPaths,
380 []string{"/tmp/a/findme.txt",
381 "/tmp/a/subdir/findme.txt"})
382}
383
384func TestUncachedDir(t *testing.T) {
385 filesystem := newFs()
386 create(t, "/tmp/a/findme.txt", filesystem)
387 create(t, "/tmp/a/subdir/findme.txt", filesystem)
388 create(t, "/tmp/b/findme.txt", filesystem)
389 create(t, "/tmp/b/subdir/findme.txt", filesystem)
390
391 finder := newFinder(
392 t,
393 filesystem,
394 CacheParams{
395 RootDirs: []string{"/IDoNotExist"},
396 IncludeFiles: []string{"findme.txt"},
397 },
398 )
399
400 foundPaths := finder.FindNamedAt("/tmp/a", "findme.txt")
401 // If the caller queries for a file that is in the cache, then computing the
402 // correct answer won't be fast, and it would be easy for the caller to
403 // fail to notice its slowness. Instead, we only ever search the cache for files
404 // to return, which enforces that we can determine which files will be
405 // interesting upfront.
406 assertSameResponse(t, foundPaths, []string{})
407
408 finder.Shutdown()
409}
410
411func TestSearchingForFilesExcludedFromCache(t *testing.T) {
412 // setup filesystem
413 filesystem := newFs()
414 create(t, "/tmp/findme.txt", filesystem)
415 create(t, "/tmp/a/findme.txt", filesystem)
416 create(t, "/tmp/a/misc.txt", filesystem)
417
418 // set up the finder and run it
419 finder := newFinder(
420 t,
421 filesystem,
422 CacheParams{
423 RootDirs: []string{"/tmp"},
424 IncludeFiles: []string{"findme.txt"},
425 },
426 )
427 foundPaths := finder.FindNamedAt("/tmp", "misc.txt")
428 // If the caller queries for a file that is in the cache, then computing the
429 // correct answer won't be fast, and it would be easy for the caller to
430 // fail to notice its slowness. Instead, we only ever search the cache for files
431 // to return, which enforces that we can determine which files will be
432 // interesting upfront.
433 assertSameResponse(t, foundPaths, []string{})
434
435 finder.Shutdown()
436}
437
438func TestRelativeFilePaths(t *testing.T) {
439 filesystem := newFs()
440
441 create(t, "/tmp/ignore/hi.txt", filesystem)
442 create(t, "/tmp/include/hi.txt", filesystem)
443 create(t, "/cwd/hi.txt", filesystem)
444 create(t, "/cwd/a/hi.txt", filesystem)
445 create(t, "/cwd/a/a/hi.txt", filesystem)
446
447 finder := newFinder(
448 t,
449 filesystem,
450 CacheParams{
451 RootDirs: []string{"/cwd", "/tmp/include"},
452 IncludeFiles: []string{"hi.txt"},
453 },
454 )
455 defer finder.Shutdown()
456
457 foundPaths := finder.FindNamedAt("a", "hi.txt")
458 assertSameResponse(t, foundPaths,
459 []string{"a/hi.txt",
460 "a/a/hi.txt"})
461
462 foundPaths = finder.FindNamedAt("/tmp/include", "hi.txt")
463 assertSameResponse(t, foundPaths, []string{"/tmp/include/hi.txt"})
464
465 foundPaths = finder.FindNamedAt(".", "hi.txt")
466 assertSameResponse(t, foundPaths,
467 []string{"hi.txt",
468 "a/hi.txt",
469 "a/a/hi.txt"})
470
471 foundPaths = finder.FindNamedAt("/tmp/include", "hi.txt")
472 assertSameResponse(t, foundPaths, []string{"/tmp/include/hi.txt"})
473}
474
475// have to run this test with the race-detector (`go test -race src/android/soong/finder/*.go`)
476// for there to be much chance of the test actually detecting any error that may be present
477func TestRootDirsContainedInOtherRootDirs(t *testing.T) {
478 filesystem := newFs()
479
480 create(t, "/tmp/a/b/c/d/e/f/g/h/i/j/findme.txt", filesystem)
481
482 finder := newFinder(
483 t,
484 filesystem,
485 CacheParams{
486 RootDirs: []string{"/", "/a/b/c", "/a/b/c/d/e/f", "/a/b/c/d/e/f/g/h/i"},
487 IncludeFiles: []string{"findme.txt"},
488 },
489 )
490 defer finder.Shutdown()
491
492 foundPaths := finder.FindNamedAt("/tmp/a", "findme.txt")
493
494 assertSameResponse(t, foundPaths,
495 []string{"/tmp/a/b/c/d/e/f/g/h/i/j/findme.txt"})
496}
497
498func TestFindFirst(t *testing.T) {
499 filesystem := newFs()
500 create(t, "/tmp/a/hi.txt", filesystem)
501 create(t, "/tmp/b/hi.txt", filesystem)
502 create(t, "/tmp/b/a/hi.txt", filesystem)
503
504 finder := newFinder(
505 t,
506 filesystem,
507 CacheParams{
508 RootDirs: []string{"/tmp"},
509 IncludeFiles: []string{"hi.txt"},
510 },
511 )
512 defer finder.Shutdown()
513
514 foundPaths := finder.FindFirstNamed("hi.txt")
515
516 assertSameResponse(t, foundPaths,
517 []string{"/tmp/a/hi.txt",
518 "/tmp/b/hi.txt"},
519 )
520}
521
522func TestConcurrentFindSameDirectory(t *testing.T) {
523 filesystem := newFs()
524
525 // create a bunch of files and directories
526 paths := []string{}
527 for i := 0; i < 10; i++ {
528 parentDir := fmt.Sprintf("/tmp/%v", i)
529 for j := 0; j < 10; j++ {
530 filePath := filepath.Join(parentDir, fmt.Sprintf("%v/findme.txt", j))
531 paths = append(paths, filePath)
532 }
533 }
534 sort.Strings(paths)
535 for _, path := range paths {
536 create(t, path, filesystem)
537 }
538
539 // set up a finder
540 finder := newFinder(
541 t,
542 filesystem,
543 CacheParams{
544 RootDirs: []string{"/tmp"},
545 IncludeFiles: []string{"findme.txt"},
546 },
547 )
548 defer finder.Shutdown()
549
550 numTests := 20
551 results := make(chan []string, numTests)
552 // make several parallel calls to the finder
553 for i := 0; i < numTests; i++ {
554 go func() {
555 foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
556 results <- foundPaths
557 }()
558 }
559
560 // check that each response was correct
561 for i := 0; i < numTests; i++ {
562 foundPaths := <-results
563 assertSameResponse(t, foundPaths, paths)
564 }
565}
566
567func TestConcurrentFindDifferentDirectories(t *testing.T) {
568 filesystem := newFs()
569
570 // create a bunch of files and directories
571 allFiles := []string{}
572 numSubdirs := 10
573 rootPaths := []string{}
574 queryAnswers := [][]string{}
575 for i := 0; i < numSubdirs; i++ {
576 parentDir := fmt.Sprintf("/tmp/%v", i)
577 rootPaths = append(rootPaths, parentDir)
578 queryAnswers = append(queryAnswers, []string{})
579 for j := 0; j < 10; j++ {
580 filePath := filepath.Join(parentDir, fmt.Sprintf("%v/findme.txt", j))
581 queryAnswers[i] = append(queryAnswers[i], filePath)
582 allFiles = append(allFiles, filePath)
583 }
584 sort.Strings(queryAnswers[i])
585 }
586 sort.Strings(allFiles)
587 for _, path := range allFiles {
588 create(t, path, filesystem)
589 }
590
591 // set up a finder
592 finder := newFinder(
593 t,
594 filesystem,
595
596 CacheParams{
597 RootDirs: []string{"/tmp"},
598 IncludeFiles: []string{"findme.txt"},
599 },
600 )
601 defer finder.Shutdown()
602
603 type testRun struct {
604 path string
605 foundMatches []string
606 correctMatches []string
607 }
608
609 numTests := numSubdirs + 1
610 testRuns := make(chan testRun, numTests)
611
612 searchAt := func(path string, correctMatches []string) {
613 foundPaths := finder.FindNamedAt(path, "findme.txt")
614 testRuns <- testRun{path, foundPaths, correctMatches}
615 }
616
617 // make several parallel calls to the finder
618 go searchAt("/tmp", allFiles)
619 for i := 0; i < len(rootPaths); i++ {
620 go searchAt(rootPaths[i], queryAnswers[i])
621 }
622
623 // check that each response was correct
624 for i := 0; i < numTests; i++ {
625 testRun := <-testRuns
626 assertSameResponse(t, testRun.foundMatches, testRun.correctMatches)
627 }
628}
629
630func TestStrangelyFormattedPaths(t *testing.T) {
631 filesystem := newFs()
632
633 create(t, "/tmp/findme.txt", filesystem)
634 create(t, "/tmp/a/findme.txt", filesystem)
635 create(t, "/tmp/b/findme.txt", filesystem)
636
637 finder := newFinder(
638 t,
639 filesystem,
640 CacheParams{
641 RootDirs: []string{"//tmp//a//.."},
642 IncludeFiles: []string{"findme.txt"},
643 },
644 )
645 defer finder.Shutdown()
646
647 foundPaths := finder.FindNamedAt("//tmp//a//..", "findme.txt")
648
649 assertSameResponse(t, foundPaths,
650 []string{"/tmp/a/findme.txt",
651 "/tmp/b/findme.txt",
652 "/tmp/findme.txt"})
653}
654
655func TestCorruptedCacheHeader(t *testing.T) {
656 filesystem := newFs()
657
658 create(t, "/tmp/findme.txt", filesystem)
659 create(t, "/tmp/a/findme.txt", filesystem)
660 write(t, "/finder/finder-db", "sample header", filesystem)
661
662 finder := newFinder(
663 t,
664 filesystem,
665 CacheParams{
666 RootDirs: []string{"/tmp"},
667 IncludeFiles: []string{"findme.txt"},
668 },
669 )
670 defer finder.Shutdown()
671
672 foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
673
674 assertSameResponse(t, foundPaths,
675 []string{"/tmp/a/findme.txt",
676 "/tmp/findme.txt"})
677}
678
679func TestCanUseCache(t *testing.T) {
680 // setup filesystem
681 filesystem := newFs()
682 create(t, "/tmp/findme.txt", filesystem)
683 create(t, "/tmp/a/findme.txt", filesystem)
684
685 // run the first finder
686 finder := newFinder(
687 t,
688 filesystem,
689 CacheParams{
690 RootDirs: []string{"/tmp"},
691 IncludeFiles: []string{"findme.txt"},
692 },
693 )
694 foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
695 // check the response of the first finder
696 correctResponse := []string{"/tmp/a/findme.txt",
697 "/tmp/findme.txt"}
698 assertSameResponse(t, foundPaths, correctResponse)
699 finder.Shutdown()
700
701 // check results
702 cacheText := read(t, finder.DbPath, filesystem)
703 if len(cacheText) < 1 {
704 t.Fatalf("saved cache db is empty\n")
705 }
706 if len(filesystem.StatCalls) == 0 {
707 t.Fatal("No Stat calls recorded by mock filesystem")
708 }
709 if len(filesystem.ReadDirCalls) == 0 {
710 t.Fatal("No ReadDir calls recorded by filesystem")
711 }
712 statCalls := filesystem.StatCalls
713 filesystem.ClearMetrics()
714
715 // run the second finder
716 finder2 := finderWithSameParams(t, finder)
717 foundPaths = finder2.FindNamedAt("/tmp", "findme.txt")
718 // check results
719 assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{})
720 assertSameReadDirCalls(t, filesystem.StatCalls, statCalls)
721
722 finder2.Shutdown()
723}
724
725func TestCorruptedCacheBody(t *testing.T) {
726 // setup filesystem
727 filesystem := newFs()
728 create(t, "/tmp/findme.txt", filesystem)
729 create(t, "/tmp/a/findme.txt", filesystem)
730
731 // run the first finder
732 finder := newFinder(
733 t,
734 filesystem,
735 CacheParams{
736 RootDirs: []string{"/tmp"},
737 IncludeFiles: []string{"findme.txt"},
738 },
739 )
740 foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
741 finder.Shutdown()
742
743 // check the response of the first finder
744 correctResponse := []string{"/tmp/a/findme.txt",
745 "/tmp/findme.txt"}
746 assertSameResponse(t, foundPaths, correctResponse)
747 numStatCalls := len(filesystem.StatCalls)
748 numReadDirCalls := len(filesystem.ReadDirCalls)
749
750 // load the cache file, corrupt it, and save it
751 cacheReader, err := filesystem.Open(finder.DbPath)
752 if err != nil {
753 t.Fatal(err)
754 }
755 cacheData, err := ioutil.ReadAll(cacheReader)
756 if err != nil {
757 t.Fatal(err)
758 }
759 cacheData = append(cacheData, []byte("DontMindMe")...)
760 filesystem.WriteFile(finder.DbPath, cacheData, 0777)
761 filesystem.ClearMetrics()
762
763 // run the second finder
764 finder2 := finderWithSameParams(t, finder)
765 foundPaths = finder2.FindNamedAt("/tmp", "findme.txt")
766 // check results
767 assertSameResponse(t, foundPaths, correctResponse)
768 numNewStatCalls := len(filesystem.StatCalls)
769 numNewReadDirCalls := len(filesystem.ReadDirCalls)
770 // It's permissable to make more Stat calls with a corrupted cache because
771 // the Finder may restart once it detects corruption.
772 // However, it may have already issued many Stat calls.
773 // Because a corrupted db is not expected to be a common (or even a supported case),
774 // we don't care to optimize it and don't cache the already-issued Stat calls
775 if numNewReadDirCalls < numReadDirCalls {
776 t.Fatalf(
777 "Finder made fewer ReadDir calls with a corrupted cache (%v calls) than with no cache"+
778 " (%v calls)",
779 numNewReadDirCalls, numReadDirCalls)
780 }
781 if numNewStatCalls < numStatCalls {
782 t.Fatalf(
783 "Finder made fewer Stat calls with a corrupted cache (%v calls) than with no cache (%v calls)",
784 numNewStatCalls, numStatCalls)
785 }
786 finder2.Shutdown()
787}
788
789func TestStatCalls(t *testing.T) {
790 // setup filesystem
791 filesystem := newFs()
792 create(t, "/tmp/a/findme.txt", filesystem)
793
794 // run finder
795 finder := newFinder(
796 t,
797 filesystem,
798 CacheParams{
799 RootDirs: []string{"/tmp"},
800 IncludeFiles: []string{"findme.txt"},
801 },
802 )
803 foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
804 finder.Shutdown()
805
806 // check response
807 assertSameResponse(t, foundPaths, []string{"/tmp/a/findme.txt"})
808 assertSameStatCalls(t, filesystem.StatCalls, []string{"/tmp", "/tmp/a"})
809 assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{"/tmp", "/tmp/a"})
810}
811
812func TestFileAdded(t *testing.T) {
813 // setup filesystem
814 filesystem := newFs()
815 create(t, "/tmp/ignoreme.txt", filesystem)
816 create(t, "/tmp/a/findme.txt", filesystem)
817 create(t, "/tmp/b/ignore.txt", filesystem)
818 create(t, "/tmp/b/c/nope.txt", filesystem)
819 create(t, "/tmp/b/c/d/irrelevant.txt", filesystem)
820
821 // run the first finder
822 finder := newFinder(
823 t,
824 filesystem,
825 CacheParams{
826 RootDirs: []string{"/tmp"},
827 IncludeFiles: []string{"findme.txt"},
828 },
829 )
830 foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
831 filesystem.Clock.Tick()
832 finder.Shutdown()
833 // check the response of the first finder
834 assertSameResponse(t, foundPaths, []string{"/tmp/a/findme.txt"})
835
836 // modify the filesystem
837 filesystem.Clock.Tick()
838 create(t, "/tmp/b/c/findme.txt", filesystem)
839 filesystem.Clock.Tick()
840 filesystem.ClearMetrics()
841
842 // run the second finder
843 finder2 := finderWithSameParams(t, finder)
844 foundPaths = finder2.FindNamedAt("/tmp", "findme.txt")
845
846 // check results
847 assertSameResponse(t, foundPaths, []string{"/tmp/a/findme.txt", "/tmp/b/c/findme.txt"})
848 assertSameStatCalls(t, filesystem.StatCalls, []string{"/tmp", "/tmp/a", "/tmp/b", "/tmp/b/c", "/tmp/b/c/d"})
849 assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{"/tmp/b/c"})
850 finder2.Shutdown()
851
852}
853
854func TestDirectoriesAdded(t *testing.T) {
855 // setup filesystem
856 filesystem := newFs()
857 create(t, "/tmp/ignoreme.txt", filesystem)
858 create(t, "/tmp/a/findme.txt", filesystem)
859 create(t, "/tmp/b/ignore.txt", filesystem)
860 create(t, "/tmp/b/c/nope.txt", filesystem)
861 create(t, "/tmp/b/c/d/irrelevant.txt", filesystem)
862
863 // run the first finder
864 finder := newFinder(
865 t,
866 filesystem,
867 CacheParams{
868 RootDirs: []string{"/tmp"},
869 IncludeFiles: []string{"findme.txt"},
870 },
871 )
872 foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
873 finder.Shutdown()
874 // check the response of the first finder
875 assertSameResponse(t, foundPaths, []string{"/tmp/a/findme.txt"})
876
877 // modify the filesystem
878 filesystem.Clock.Tick()
879 create(t, "/tmp/b/c/new/findme.txt", filesystem)
880 create(t, "/tmp/b/c/new/new2/findme.txt", filesystem)
881 create(t, "/tmp/b/c/new/new2/ignoreme.txt", filesystem)
882 filesystem.ClearMetrics()
883
884 // run the second finder
885 finder2 := finderWithSameParams(t, finder)
886 foundPaths = finder2.FindNamedAt("/tmp", "findme.txt")
887
888 // check results
889 assertSameResponse(t, foundPaths,
890 []string{"/tmp/a/findme.txt", "/tmp/b/c/new/findme.txt", "/tmp/b/c/new/new2/findme.txt"})
891 assertSameStatCalls(t, filesystem.StatCalls,
892 []string{"/tmp", "/tmp/a", "/tmp/b", "/tmp/b/c", "/tmp/b/c/d", "/tmp/b/c/new", "/tmp/b/c/new/new2"})
893 assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{"/tmp/b/c", "/tmp/b/c/new", "/tmp/b/c/new/new2"})
894
895 finder2.Shutdown()
896}
897
898func TestDirectoryAndSubdirectoryBothUpdated(t *testing.T) {
899 // setup filesystem
900 filesystem := newFs()
901 create(t, "/tmp/hi1.txt", filesystem)
902 create(t, "/tmp/a/hi1.txt", filesystem)
903
904 // run the first finder
905 finder := newFinder(
906 t,
907 filesystem,
908 CacheParams{
909 RootDirs: []string{"/tmp"},
910 IncludeFiles: []string{"hi1.txt", "hi2.txt"},
911 },
912 )
913 foundPaths := finder.FindNamedAt("/tmp", "hi1.txt")
914 finder.Shutdown()
915 // check the response of the first finder
916 assertSameResponse(t, foundPaths, []string{"/tmp/hi1.txt", "/tmp/a/hi1.txt"})
917
918 // modify the filesystem
919 filesystem.Clock.Tick()
920 create(t, "/tmp/hi2.txt", filesystem)
921 create(t, "/tmp/a/hi2.txt", filesystem)
922 filesystem.ClearMetrics()
923
924 // run the second finder
925 finder2 := finderWithSameParams(t, finder)
926 foundPaths = finder2.FindAll()
927
928 // check results
929 assertSameResponse(t, foundPaths,
930 []string{"/tmp/hi1.txt", "/tmp/hi2.txt", "/tmp/a/hi1.txt", "/tmp/a/hi2.txt"})
931 assertSameStatCalls(t, filesystem.StatCalls,
932 []string{"/tmp", "/tmp/a"})
933 assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{"/tmp", "/tmp/a"})
934
935 finder2.Shutdown()
936}
937
938func TestFileDeleted(t *testing.T) {
939 // setup filesystem
940 filesystem := newFs()
941 create(t, "/tmp/ignoreme.txt", filesystem)
942 create(t, "/tmp/a/findme.txt", filesystem)
943 create(t, "/tmp/b/findme.txt", filesystem)
944 create(t, "/tmp/b/c/nope.txt", filesystem)
945 create(t, "/tmp/b/c/d/irrelevant.txt", filesystem)
946
947 // run the first finder
948 finder := newFinder(
949 t,
950 filesystem,
951 CacheParams{
952 RootDirs: []string{"/tmp"},
953 IncludeFiles: []string{"findme.txt"},
954 },
955 )
956 foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
957 finder.Shutdown()
958 // check the response of the first finder
959 assertSameResponse(t, foundPaths, []string{"/tmp/a/findme.txt", "/tmp/b/findme.txt"})
960
961 // modify the filesystem
962 filesystem.Clock.Tick()
963 delete(t, "/tmp/b/findme.txt", filesystem)
964 filesystem.ClearMetrics()
965
966 // run the second finder
967 finder2 := finderWithSameParams(t, finder)
968 foundPaths = finder2.FindNamedAt("/tmp", "findme.txt")
969
970 // check results
971 assertSameResponse(t, foundPaths, []string{"/tmp/a/findme.txt"})
972 assertSameStatCalls(t, filesystem.StatCalls, []string{"/tmp", "/tmp/a", "/tmp/b", "/tmp/b/c", "/tmp/b/c/d"})
973 assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{"/tmp/b"})
974
975 finder2.Shutdown()
976}
977
978func TestDirectoriesDeleted(t *testing.T) {
979 // setup filesystem
980 filesystem := newFs()
981 create(t, "/tmp/findme.txt", filesystem)
982 create(t, "/tmp/a/findme.txt", filesystem)
983 create(t, "/tmp/a/1/findme.txt", filesystem)
984 create(t, "/tmp/a/1/2/findme.txt", filesystem)
985 create(t, "/tmp/b/findme.txt", filesystem)
986
987 // run the first finder
988 finder := newFinder(
989 t,
990 filesystem,
991 CacheParams{
992 RootDirs: []string{"/tmp"},
993 IncludeFiles: []string{"findme.txt"},
994 },
995 )
996 foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
997 finder.Shutdown()
998 // check the response of the first finder
999 assertSameResponse(t, foundPaths,
1000 []string{"/tmp/findme.txt",
1001 "/tmp/a/findme.txt",
1002 "/tmp/a/1/findme.txt",
1003 "/tmp/a/1/2/findme.txt",
1004 "/tmp/b/findme.txt"})
1005
1006 // modify the filesystem
1007 filesystem.Clock.Tick()
1008 removeAll(t, "/tmp/a/1", filesystem)
1009 filesystem.ClearMetrics()
1010
1011 // run the second finder
1012 finder2 := finderWithSameParams(t, finder)
1013 foundPaths = finder2.FindNamedAt("/tmp", "findme.txt")
1014
1015 // check results
1016 assertSameResponse(t, foundPaths,
1017 []string{"/tmp/findme.txt", "/tmp/a/findme.txt", "/tmp/b/findme.txt"})
1018 // Technically, we don't care whether /tmp/a/1/2 gets Statted or gets skipped
1019 // if the Finder detects the nonexistence of /tmp/a/1
1020 // However, when resuming from cache, we don't want the Finder to necessarily wait
1021 // to stat a directory until after statting its parent.
1022 // So here we just include /tmp/a/1/2 in the list.
1023 // The Finder is currently implemented to always restat every dir and
1024 // to not short-circuit due to nonexistence of parents (but it will remove
1025 // missing dirs from the cache for next time)
1026 assertSameStatCalls(t, filesystem.StatCalls,
1027 []string{"/tmp", "/tmp/a", "/tmp/a/1", "/tmp/a/1/2", "/tmp/b"})
1028 assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{"/tmp/a"})
1029
1030 finder2.Shutdown()
1031}
1032
1033func TestDirectoriesMoved(t *testing.T) {
1034 // setup filesystem
1035 filesystem := newFs()
1036 create(t, "/tmp/findme.txt", filesystem)
1037 create(t, "/tmp/a/findme.txt", filesystem)
1038 create(t, "/tmp/a/1/findme.txt", filesystem)
1039 create(t, "/tmp/a/1/2/findme.txt", filesystem)
1040 create(t, "/tmp/b/findme.txt", filesystem)
1041
1042 // run the first finder
1043 finder := newFinder(
1044 t,
1045 filesystem,
1046 CacheParams{
1047 RootDirs: []string{"/tmp"},
1048 IncludeFiles: []string{"findme.txt"},
1049 },
1050 )
1051 foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
1052 finder.Shutdown()
1053 // check the response of the first finder
1054 assertSameResponse(t, foundPaths,
1055 []string{"/tmp/findme.txt",
1056 "/tmp/a/findme.txt",
1057 "/tmp/a/1/findme.txt",
1058 "/tmp/a/1/2/findme.txt",
1059 "/tmp/b/findme.txt"})
1060
1061 // modify the filesystem
1062 filesystem.Clock.Tick()
1063 move(t, "/tmp/a", "/tmp/c", filesystem)
1064 filesystem.ClearMetrics()
1065
1066 // run the second finder
1067 finder2 := finderWithSameParams(t, finder)
1068 foundPaths = finder2.FindNamedAt("/tmp", "findme.txt")
1069
1070 // check results
1071 assertSameResponse(t, foundPaths,
1072 []string{"/tmp/findme.txt",
1073 "/tmp/b/findme.txt",
1074 "/tmp/c/findme.txt",
1075 "/tmp/c/1/findme.txt",
1076 "/tmp/c/1/2/findme.txt"})
1077 // Technically, we don't care whether /tmp/a/1/2 gets Statted or gets skipped
1078 // if the Finder detects the nonexistence of /tmp/a/1
1079 // However, when resuming from cache, we don't want the Finder to necessarily wait
1080 // to stat a directory until after statting its parent.
1081 // So here we just include /tmp/a/1/2 in the list.
1082 // The Finder is currently implemented to always restat every dir and
1083 // to not short-circuit due to nonexistence of parents (but it will remove
1084 // missing dirs from the cache for next time)
1085 assertSameStatCalls(t, filesystem.StatCalls,
1086 []string{"/tmp", "/tmp/a", "/tmp/a/1", "/tmp/a/1/2", "/tmp/b", "/tmp/c", "/tmp/c/1", "/tmp/c/1/2"})
1087 assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{"/tmp", "/tmp/c", "/tmp/c/1", "/tmp/c/1/2"})
1088 finder2.Shutdown()
1089}
1090
1091func TestDirectoriesSwapped(t *testing.T) {
1092 // setup filesystem
1093 filesystem := newFs()
1094 create(t, "/tmp/findme.txt", filesystem)
1095 create(t, "/tmp/a/findme.txt", filesystem)
1096 create(t, "/tmp/a/1/findme.txt", filesystem)
1097 create(t, "/tmp/a/1/2/findme.txt", filesystem)
1098 create(t, "/tmp/b/findme.txt", filesystem)
1099
1100 // run the first finder
1101 finder := newFinder(
1102 t,
1103 filesystem,
1104 CacheParams{
1105 RootDirs: []string{"/tmp"},
1106 IncludeFiles: []string{"findme.txt"},
1107 },
1108 )
1109 foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
1110 finder.Shutdown()
1111 // check the response of the first finder
1112 assertSameResponse(t, foundPaths,
1113 []string{"/tmp/findme.txt",
1114 "/tmp/a/findme.txt",
1115 "/tmp/a/1/findme.txt",
1116 "/tmp/a/1/2/findme.txt",
1117 "/tmp/b/findme.txt"})
1118
1119 // modify the filesystem
1120 filesystem.Clock.Tick()
1121 move(t, "/tmp/a", "/tmp/temp", filesystem)
1122 move(t, "/tmp/b", "/tmp/a", filesystem)
1123 move(t, "/tmp/temp", "/tmp/b", filesystem)
1124 filesystem.ClearMetrics()
1125
1126 // run the second finder
1127 finder2 := finderWithSameParams(t, finder)
1128 foundPaths = finder2.FindNamedAt("/tmp", "findme.txt")
1129
1130 // check results
1131 assertSameResponse(t, foundPaths,
1132 []string{"/tmp/findme.txt",
1133 "/tmp/a/findme.txt",
1134 "/tmp/b/findme.txt",
1135 "/tmp/b/1/findme.txt",
1136 "/tmp/b/1/2/findme.txt"})
1137 // Technically, we don't care whether /tmp/a/1/2 gets Statted or gets skipped
1138 // if the Finder detects the nonexistence of /tmp/a/1
1139 // However, when resuming from cache, we don't want the Finder to necessarily wait
1140 // to stat a directory until after statting its parent.
1141 // So here we just include /tmp/a/1/2 in the list.
1142 // The Finder is currently implemented to always restat every dir and
1143 // to not short-circuit due to nonexistence of parents (but it will remove
1144 // missing dirs from the cache for next time)
1145 assertSameStatCalls(t, filesystem.StatCalls,
1146 []string{"/tmp", "/tmp/a", "/tmp/a/1", "/tmp/a/1/2", "/tmp/b", "/tmp/b/1", "/tmp/b/1/2"})
1147 assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{"/tmp", "/tmp/a", "/tmp/b", "/tmp/b/1", "/tmp/b/1/2"})
1148 finder2.Shutdown()
1149}
1150
1151// runFsReplacementTest tests a change modifying properties of the filesystem itself:
1152// runFsReplacementTest tests changing the user, the hostname, or the device number
1153// runFsReplacementTest is a helper method called by other tests
1154func runFsReplacementTest(t *testing.T, fs1 *fs.MockFs, fs2 *fs.MockFs) {
1155 // setup fs1
1156 create(t, "/tmp/findme.txt", fs1)
1157 create(t, "/tmp/a/findme.txt", fs1)
1158 create(t, "/tmp/a/a/findme.txt", fs1)
1159
1160 // setup fs2 to have the same directories but different files
1161 create(t, "/tmp/findme.txt", fs2)
1162 create(t, "/tmp/a/findme.txt", fs2)
1163 create(t, "/tmp/a/a/ignoreme.txt", fs2)
1164 create(t, "/tmp/a/b/findme.txt", fs2)
1165
1166 // run the first finder
1167 finder := newFinder(
1168 t,
1169 fs1,
1170 CacheParams{
1171 RootDirs: []string{"/tmp"},
1172 IncludeFiles: []string{"findme.txt"},
1173 },
1174 )
1175 foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
1176 finder.Shutdown()
1177 // check the response of the first finder
1178 assertSameResponse(t, foundPaths,
1179 []string{"/tmp/findme.txt", "/tmp/a/findme.txt", "/tmp/a/a/findme.txt"})
1180
1181 // copy the cache data from the first filesystem to the second
1182 cacheContent := read(t, finder.DbPath, fs1)
1183 write(t, finder.DbPath, cacheContent, fs2)
1184
1185 // run the second finder, with the same config and same cache contents but a different filesystem
1186 finder2 := newFinder(
1187 t,
1188 fs2,
1189 CacheParams{
1190 RootDirs: []string{"/tmp"},
1191 IncludeFiles: []string{"findme.txt"},
1192 },
1193 )
1194 foundPaths = finder2.FindNamedAt("/tmp", "findme.txt")
1195
1196 // check results
1197 assertSameResponse(t, foundPaths,
1198 []string{"/tmp/findme.txt", "/tmp/a/findme.txt", "/tmp/a/b/findme.txt"})
1199 assertSameStatCalls(t, fs2.StatCalls,
1200 []string{"/tmp", "/tmp/a", "/tmp/a/a", "/tmp/a/b"})
1201 assertSameReadDirCalls(t, fs2.ReadDirCalls,
1202 []string{"/tmp", "/tmp/a", "/tmp/a/a", "/tmp/a/b"})
1203 finder2.Shutdown()
1204}
1205
1206func TestChangeOfDevice(t *testing.T) {
1207 fs1 := newFs()
1208 // not as fine-grained mounting controls as a real filesystem, but should be adequate
1209 fs1.SetDeviceNumber(0)
1210
1211 fs2 := newFs()
1212 fs2.SetDeviceNumber(1)
1213
1214 runFsReplacementTest(t, fs1, fs2)
1215}
1216
1217func TestChangeOfUserOrHost(t *testing.T) {
1218 fs1 := newFs()
1219 fs1.SetViewId("me@here")
1220
1221 fs2 := newFs()
1222 fs2.SetViewId("you@there")
1223
1224 runFsReplacementTest(t, fs1, fs2)
1225}
1226
1227func TestConsistentCacheOrdering(t *testing.T) {
1228 // setup filesystem
1229 filesystem := newFs()
1230 for i := 0; i < 5; i++ {
1231 create(t, fmt.Sprintf("/tmp/%v/findme.txt", i), filesystem)
1232 }
1233
1234 // run the first finder
1235 finder := newFinder(
1236 t,
1237 filesystem,
1238 CacheParams{
1239 RootDirs: []string{"/tmp"},
1240 IncludeFiles: []string{"findme.txt"},
1241 },
1242 )
1243 finder.FindNamedAt("/tmp", "findme.txt")
1244 finder.Shutdown()
1245
1246 // read db file
1247 string1 := read(t, finder.DbPath, filesystem)
1248
1249 err := filesystem.Remove(finder.DbPath)
1250 if err != nil {
1251 t.Fatal(err)
1252 }
1253
1254 // run another finder
1255 finder2 := finderWithSameParams(t, finder)
1256 finder2.FindNamedAt("/tmp", "findme.txt")
1257 finder2.Shutdown()
1258
1259 string2 := read(t, finder.DbPath, filesystem)
1260
1261 if string1 != string2 {
1262 t.Errorf("Running Finder twice generated two dbs not having identical contents.\n"+
1263 "Content of first file:\n"+
1264 "\n"+
1265 "%v"+
1266 "\n"+
1267 "\n"+
1268 "Content of second file:\n"+
1269 "\n"+
1270 "%v\n"+
1271 "\n",
1272 string1,
1273 string2,
1274 )
1275 }
1276
1277}
1278
1279func TestNumSyscallsOfSecondFind(t *testing.T) {
1280 // setup filesystem
1281 filesystem := newFs()
1282 create(t, "/tmp/findme.txt", filesystem)
1283 create(t, "/tmp/a/findme.txt", filesystem)
1284 create(t, "/tmp/a/misc.txt", filesystem)
1285
1286 // set up the finder and run it once
1287 finder := newFinder(
1288 t,
1289 filesystem,
1290 CacheParams{
1291 RootDirs: []string{"/tmp"},
1292 IncludeFiles: []string{"findme.txt"},
1293 },
1294 )
1295 foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
1296 assertSameResponse(t, foundPaths, []string{"/tmp/findme.txt", "/tmp/a/findme.txt"})
1297
1298 filesystem.ClearMetrics()
1299
1300 // run the finder again and confirm it doesn't check the filesystem
1301 refoundPaths := finder.FindNamedAt("/tmp", "findme.txt")
1302 assertSameResponse(t, refoundPaths, foundPaths)
1303 assertSameStatCalls(t, filesystem.StatCalls, []string{})
1304 assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{})
1305
1306 finder.Shutdown()
1307}
1308
1309func TestChangingParamsOfSecondFind(t *testing.T) {
1310 // setup filesystem
1311 filesystem := newFs()
1312 create(t, "/tmp/findme.txt", filesystem)
1313 create(t, "/tmp/a/findme.txt", filesystem)
1314 create(t, "/tmp/a/metoo.txt", filesystem)
1315
1316 // set up the finder and run it once
1317 finder := newFinder(
1318 t,
1319 filesystem,
1320 CacheParams{
1321 RootDirs: []string{"/tmp"},
1322 IncludeFiles: []string{"findme.txt", "metoo.txt"},
1323 },
1324 )
1325 foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
1326 assertSameResponse(t, foundPaths, []string{"/tmp/findme.txt", "/tmp/a/findme.txt"})
1327
1328 filesystem.ClearMetrics()
1329
1330 // run the finder again and confirm it gets the right answer without asking the filesystem
1331 refoundPaths := finder.FindNamedAt("/tmp", "metoo.txt")
1332 assertSameResponse(t, refoundPaths, []string{"/tmp/a/metoo.txt"})
1333 assertSameStatCalls(t, filesystem.StatCalls, []string{})
1334 assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{})
1335
1336 finder.Shutdown()
1337}
1338
1339func TestSymlinkPointingToFile(t *testing.T) {
1340 // setup filesystem
1341 filesystem := newFs()
1342 create(t, "/tmp/a/hi.txt", filesystem)
1343 create(t, "/tmp/a/ignoreme.txt", filesystem)
1344 link(t, "/tmp/hi.txt", "a/hi.txt", filesystem)
1345 link(t, "/tmp/b/hi.txt", "../a/hi.txt", filesystem)
1346 link(t, "/tmp/c/hi.txt", "/tmp/hi.txt", filesystem)
1347 link(t, "/tmp/d/hi.txt", "../a/bye.txt", filesystem)
1348 link(t, "/tmp/d/bye.txt", "../a/hi.txt", filesystem)
1349 link(t, "/tmp/e/bye.txt", "../a/bye.txt", filesystem)
1350 link(t, "/tmp/f/hi.txt", "somethingThatDoesntExist", filesystem)
1351
1352 // set up the finder and run it once
1353 finder := newFinder(
1354 t,
1355 filesystem,
1356 CacheParams{
1357 RootDirs: []string{"/tmp"},
1358 IncludeFiles: []string{"hi.txt"},
1359 },
1360 )
1361 foundPaths := finder.FindNamedAt("/tmp", "hi.txt")
1362 // should search based on the name of the link rather than the destination or validity of the link
1363 correctResponse := []string{
1364 "/tmp/a/hi.txt",
1365 "/tmp/hi.txt",
1366 "/tmp/b/hi.txt",
1367 "/tmp/c/hi.txt",
1368 "/tmp/d/hi.txt",
1369 "/tmp/f/hi.txt",
1370 }
1371 assertSameResponse(t, foundPaths, correctResponse)
1372
1373}
1374
1375func TestSymlinkPointingToDirectory(t *testing.T) {
1376 // setup filesystem
1377 filesystem := newFs()
1378 create(t, "/tmp/dir/hi.txt", filesystem)
1379 create(t, "/tmp/dir/ignoreme.txt", filesystem)
1380
1381 link(t, "/tmp/links/dir", "../dir", filesystem)
1382 link(t, "/tmp/links/link", "../dir", filesystem)
1383 link(t, "/tmp/links/broken", "nothingHere", filesystem)
1384 link(t, "/tmp/links/recursive", "recursive", filesystem)
1385
1386 // set up the finder and run it once
1387 finder := newFinder(
1388 t,
1389 filesystem,
1390 CacheParams{
1391 RootDirs: []string{"/tmp"},
1392 IncludeFiles: []string{"hi.txt"},
1393 },
1394 )
1395
1396 foundPaths := finder.FindNamedAt("/tmp", "hi.txt")
1397
1398 // should completely ignore symlinks that point to directories
1399 correctResponse := []string{
1400 "/tmp/dir/hi.txt",
1401 }
1402 assertSameResponse(t, foundPaths, correctResponse)
1403
1404}
1405
1406// TestAddPruneFile confirms that adding a prune-file (into a directory for which we
1407// already had a cache) causes the directory to be ignored
1408func TestAddPruneFile(t *testing.T) {
1409 // setup filesystem
1410 filesystem := newFs()
1411 create(t, "/tmp/out/hi.txt", filesystem)
1412 create(t, "/tmp/out/a/hi.txt", filesystem)
1413 create(t, "/tmp/hi.txt", filesystem)
1414
1415 // do find
1416 finder := newFinder(
1417 t,
1418 filesystem,
1419 CacheParams{
1420 RootDirs: []string{"/tmp"},
1421 PruneFiles: []string{".ignore-out-dir"},
1422 IncludeFiles: []string{"hi.txt"},
1423 },
1424 )
1425
1426 foundPaths := finder.FindNamedAt("/tmp", "hi.txt")
1427
1428 // check result
1429 assertSameResponse(t, foundPaths,
1430 []string{"/tmp/hi.txt",
1431 "/tmp/out/hi.txt",
1432 "/tmp/out/a/hi.txt"},
1433 )
1434 finder.Shutdown()
1435
1436 // modify filesystem
1437 filesystem.Clock.Tick()
1438 create(t, "/tmp/out/.ignore-out-dir", filesystem)
1439 // run another find and check its result
1440 finder2 := finderWithSameParams(t, finder)
1441 foundPaths = finder2.FindNamedAt("/tmp", "hi.txt")
1442 assertSameResponse(t, foundPaths, []string{"/tmp/hi.txt"})
1443 finder2.Shutdown()
1444}
1445
1446func TestUpdatingDbIffChanged(t *testing.T) {
1447 // setup filesystem
1448 filesystem := newFs()
1449 create(t, "/tmp/a/hi.txt", filesystem)
1450 create(t, "/tmp/b/bye.txt", filesystem)
1451
1452 // run the first finder
1453 finder := newFinder(
1454 t,
1455 filesystem,
1456 CacheParams{
1457 RootDirs: []string{"/tmp"},
1458 IncludeFiles: []string{"hi.txt"},
1459 },
1460 )
1461 foundPaths := finder.FindAll()
1462 filesystem.Clock.Tick()
1463 finder.Shutdown()
1464 // check results
1465 assertSameResponse(t, foundPaths, []string{"/tmp/a/hi.txt"})
1466
1467 // modify the filesystem
1468 filesystem.Clock.Tick()
1469 create(t, "/tmp/b/hi.txt", filesystem)
1470 filesystem.Clock.Tick()
1471 filesystem.ClearMetrics()
1472
1473 // run the second finder
1474 finder2 := finderWithSameParams(t, finder)
1475 foundPaths = finder2.FindAll()
1476 finder2.Shutdown()
1477 // check results
1478 assertSameResponse(t, foundPaths, []string{"/tmp/a/hi.txt", "/tmp/b/hi.txt"})
1479 assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{"/tmp/b"})
1480 expectedDbWriteTime := filesystem.Clock.Time()
1481 actualDbWriteTime := modTime(t, finder2.DbPath, filesystem)
1482 if actualDbWriteTime != expectedDbWriteTime {
1483 t.Fatalf("Expected to write db at %v, actually wrote db at %v\n",
1484 expectedDbWriteTime, actualDbWriteTime)
1485 }
1486
1487 // reset metrics
1488 filesystem.ClearMetrics()
1489
1490 // run the third finder
1491 finder3 := finderWithSameParams(t, finder2)
1492 foundPaths = finder3.FindAll()
1493
1494 // check results
1495 assertSameResponse(t, foundPaths, []string{"/tmp/a/hi.txt", "/tmp/b/hi.txt"})
1496 assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{})
1497 finder3.Shutdown()
1498 actualDbWriteTime = modTime(t, finder3.DbPath, filesystem)
1499 if actualDbWriteTime != expectedDbWriteTime {
1500 t.Fatalf("Re-wrote db even when contents did not change")
1501 }
1502
1503}
1504
1505func TestDirectoryNotPermitted(t *testing.T) {
1506 // setup filesystem
1507 filesystem := newFs()
1508 create(t, "/tmp/hi.txt", filesystem)
1509 create(t, "/tmp/a/hi.txt", filesystem)
1510 create(t, "/tmp/a/a/hi.txt", filesystem)
1511 create(t, "/tmp/b/hi.txt", filesystem)
1512
1513 // run the first finder
1514 finder := newFinder(
1515 t,
1516 filesystem,
1517 CacheParams{
1518 RootDirs: []string{"/tmp"},
1519 IncludeFiles: []string{"hi.txt"},
1520 },
1521 )
1522 foundPaths := finder.FindAll()
1523 filesystem.Clock.Tick()
1524 finder.Shutdown()
1525 allPaths := []string{"/tmp/hi.txt", "/tmp/a/hi.txt", "/tmp/a/a/hi.txt", "/tmp/b/hi.txt"}
1526 // check results
1527 assertSameResponse(t, foundPaths, allPaths)
1528
1529 // modify the filesystem
1530 filesystem.Clock.Tick()
1531
1532 setReadable(t, "/tmp/a", false, filesystem)
1533 filesystem.Clock.Tick()
1534
1535 // run the second finder
1536 finder2 := finderWithSameParams(t, finder)
1537 foundPaths = finder2.FindAll()
1538 finder2.Shutdown()
1539 // check results
1540 assertSameResponse(t, foundPaths, []string{"/tmp/hi.txt", "/tmp/b/hi.txt"})
1541
1542 // modify the filesystem back
1543 setReadable(t, "/tmp/a", true, filesystem)
1544
1545 // run the third finder
1546 finder3 := finderWithSameParams(t, finder2)
1547 foundPaths = finder3.FindAll()
1548 finder3.Shutdown()
1549 // check results
1550 assertSameResponse(t, foundPaths, allPaths)
1551}
1552
1553func TestFileNotPermitted(t *testing.T) {
1554 // setup filesystem
1555 filesystem := newFs()
1556 create(t, "/tmp/hi.txt", filesystem)
1557 setReadable(t, "/tmp/hi.txt", false, filesystem)
1558
1559 // run the first finder
1560 finder := newFinder(
1561 t,
1562 filesystem,
1563 CacheParams{
1564 RootDirs: []string{"/tmp"},
1565 IncludeFiles: []string{"hi.txt"},
1566 },
1567 )
1568 foundPaths := finder.FindAll()
1569 filesystem.Clock.Tick()
1570 finder.Shutdown()
1571 // check results
1572 assertSameResponse(t, foundPaths, []string{"/tmp/hi.txt"})
1573}