cloud_storage 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. #!/usr/bin/env python
  2. # Copyright 2014 The Chromium Authors. All rights reserved.
  3. # Use of this source code is governed by a BSD-style license that can be
  4. # found in the LICENSE file.
  5. import argparse
  6. import logging
  7. import os
  8. import subprocess
  9. import sys
  10. from telemetry.core import util
  11. from telemetry.internal.util import command_line
  12. sys.path.insert(1, os.path.abspath(os.path.join(
  13. util.GetCatapultDir(), 'common', 'py_utils')))
  14. from py_utils import cloud_storage
  15. BUCKETS = {bucket: easy_bucket_name for easy_bucket_name, bucket
  16. in cloud_storage.BUCKET_ALIASES.iteritems()}
  17. def _GetPaths(path):
  18. root, ext = os.path.splitext(path)
  19. if ext == '.sha1':
  20. file_path = root
  21. hash_path = path
  22. else:
  23. file_path = path
  24. hash_path = path + '.sha1'
  25. return file_path, hash_path
  26. def _FindFilesInCloudStorage(files):
  27. """Returns a dict of all files and which buckets they're in."""
  28. # Preprocessing: get the contents of all buckets.
  29. bucket_contents = {}
  30. for bucket in BUCKETS:
  31. try:
  32. bucket_contents[bucket] = cloud_storage.List(bucket)
  33. except (cloud_storage.PermissionError, cloud_storage.CredentialsError):
  34. pass
  35. # Check if each file is in the bucket contents.
  36. file_buckets = {}
  37. for path in files:
  38. file_path, hash_path = _GetPaths(path)
  39. if file_path in file_buckets:
  40. # Ignore duplicates, if both data and sha1 file were in the file list.
  41. continue
  42. if not os.path.exists(hash_path):
  43. # Probably got some non-Cloud Storage files in the file list. Ignore.
  44. continue
  45. file_hash = cloud_storage.ReadHash(hash_path)
  46. file_buckets[file_path] = []
  47. for bucket in BUCKETS:
  48. if bucket in bucket_contents and file_hash in bucket_contents[bucket]:
  49. file_buckets[file_path].append(bucket)
  50. return file_buckets
  51. class Ls(command_line.Command):
  52. """List which bucket each file is in."""
  53. @classmethod
  54. def AddCommandLineArgs(cls, parser):
  55. parser.add_argument('-r', '--recursive', action='store_true')
  56. parser.add_argument('paths', nargs='+')
  57. @classmethod
  58. def ProcessCommandLineArgs(cls, parser, args):
  59. for path in args.paths:
  60. if not os.path.exists(path):
  61. parser.error('Path not found: %s' % path)
  62. def Run(self, args):
  63. def GetFilesInPaths(paths, recursive):
  64. """If path is a dir, yields all files in path, otherwise just yields path.
  65. If recursive is true, walks subdirectories recursively."""
  66. for path in paths:
  67. if not os.path.isdir(path):
  68. yield path
  69. continue
  70. if recursive:
  71. for root, _, filenames in os.walk(path):
  72. for filename in filenames:
  73. yield os.path.join(root, filename)
  74. else:
  75. for filename in os.listdir(path):
  76. yield os.path.join(path, filename)
  77. files = _FindFilesInCloudStorage(GetFilesInPaths(args.paths, args.recursive))
  78. if not files:
  79. print 'No files in Cloud Storage.'
  80. return
  81. for file_path, buckets in sorted(files.iteritems()):
  82. if buckets:
  83. buckets = [BUCKETS[bucket] for bucket in buckets]
  84. print '%-11s %s' % (','.join(buckets), file_path)
  85. else:
  86. print '%-11s %s' % ('not found', file_path)
  87. class Mv(command_line.Command):
  88. """Move files to the given bucket."""
  89. @classmethod
  90. def AddCommandLineArgs(cls, parser):
  91. parser.add_argument('files', nargs='+')
  92. parser.add_argument('bucket', choices=cloud_storage.BUCKET_ALIASES)
  93. @classmethod
  94. def ProcessCommandLineArgs(cls, parser, args):
  95. args.bucket = cloud_storage.BUCKET_ALIASES[args.bucket]
  96. def Run(self, args):
  97. files = _FindFilesInCloudStorage(args.files)
  98. for file_path, buckets in sorted(files.iteritems()):
  99. if not buckets:
  100. raise IOError('%s not found in Cloud Storage.' % file_path)
  101. for file_path, buckets in sorted(files.iteritems()):
  102. if args.bucket in buckets:
  103. buckets.remove(args.bucket)
  104. if not buckets:
  105. logging.info('Skipping %s, no action needed.' % file_path)
  106. continue
  107. # Move to the target bucket.
  108. file_hash = cloud_storage.ReadHash(file_path + '.sha1')
  109. cloud_storage.Move(buckets.pop(), args.bucket, file_hash)
  110. # Delete all additional copies.
  111. for bucket in buckets:
  112. cloud_storage.Delete(bucket, file_hash)
  113. class Rm(command_line.Command):
  114. """Remove files from Cloud Storage."""
  115. @classmethod
  116. def AddCommandLineArgs(cls, parser):
  117. parser.add_argument('files', nargs='+')
  118. def Run(self, args):
  119. files = _FindFilesInCloudStorage(args.files)
  120. for file_path, buckets in sorted(files.iteritems()):
  121. file_hash = cloud_storage.ReadHash(file_path + '.sha1')
  122. for bucket in buckets:
  123. cloud_storage.Delete(bucket, file_hash)
  124. class Upload(command_line.Command):
  125. """Upload files to Cloud Storage."""
  126. @classmethod
  127. def AddCommandLineArgs(cls, parser):
  128. parser.add_argument('files', nargs='+')
  129. parser.add_argument('bucket', choices=cloud_storage.BUCKET_ALIASES)
  130. @classmethod
  131. def ProcessCommandLineArgs(cls, parser, args):
  132. args.bucket = cloud_storage.BUCKET_ALIASES[args.bucket]
  133. for path in args.files:
  134. if not os.path.exists(path):
  135. parser.error('File not found: %s' % path)
  136. def Run(self, args):
  137. for file_path in args.files:
  138. file_hash = cloud_storage.CalculateHash(file_path)
  139. # Create or update the hash file.
  140. hash_path = file_path + '.sha1'
  141. with open(hash_path, 'wb') as f:
  142. f.write(file_hash)
  143. f.flush()
  144. # Add the data to Cloud Storage.
  145. cloud_storage.Insert(args.bucket, file_hash, file_path)
  146. # Add the hash file to the branch, for convenience. :)
  147. subprocess.call(['git', 'add', hash_path])
  148. class CloudStorageCommand(command_line.SubcommandCommand):
  149. commands = (Ls, Mv, Rm, Upload)
  150. if __name__ == '__main__':
  151. logging.getLogger().setLevel(logging.INFO)
  152. sys.exit(CloudStorageCommand.main())