Package CedarBackup3 :: Package tools :: Module amazons3
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup3.tools.amazons3

   1  # -*- coding: iso-8859-1 -*- 
   2  # vim: set ft=python ts=3 sw=3 expandtab: 
   3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
   4  # 
   5  #              C E D A R 
   6  #          S O L U T I O N S       "Software done right." 
   7  #           S O F T W A R E 
   8  # 
   9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  10  # 
  11  # Copyright (c) 2014,2015 Kenneth J. Pronovici. 
  12  # All rights reserved. 
  13  # 
  14  # This program is free software; you can redistribute it and/or 
  15  # modify it under the terms of the GNU General Public License, 
  16  # Version 2, as published by the Free Software Foundation. 
  17  # 
  18  # This program is distributed in the hope that it will be useful, 
  19  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  20  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
  21  # 
  22  # Copies of the GNU General Public License are available from 
  23  # the Free Software Foundation website, http://www.gnu.org/. 
  24  # 
  25  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  26  # 
  27  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
  28  # Language : Python 3 (>= 3.4) 
  29  # Project  : Cedar Backup, release 3 
  30  # Purpose  : Cedar Backup tool to synchronize an Amazon S3 bucket. 
  31  # 
  32  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  33   
  34  ######################################################################## 
  35  # Notes 
  36  ######################################################################## 
  37   
  38  """ 
  39  Synchonizes a local directory with an Amazon S3 bucket. 
  40   
  41  No configuration is required; all necessary information is taken from the 
  42  command-line.  The only thing configuration would help with is the path 
  43  resolver interface, and it doesn't seem worth it to require configuration just 
  44  to get that. 
  45   
  46  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
  47  """ 
  48   
  49  ######################################################################## 
  50  # Imported modules and constants 
  51  ######################################################################## 
  52   
  53  # System modules 
  54  import sys 
  55  import os 
  56  import logging 
  57  import getopt 
  58  import json 
  59  import warnings 
  60  from functools import total_ordering 
  61  import chardet 
  62   
  63  # Cedar Backup modules 
  64  from CedarBackup3.release import AUTHOR, EMAIL, VERSION, DATE, COPYRIGHT 
  65  from CedarBackup3.filesystem import FilesystemList 
  66  from CedarBackup3.cli import setupLogging, DEFAULT_LOGFILE, DEFAULT_OWNERSHIP, DEFAULT_MODE 
  67  from CedarBackup3.util import Diagnostics, splitCommandLine, encodePath 
  68  from CedarBackup3.util import executeCommand 
  69   
  70   
  71  ######################################################################## 
  72  # Module-wide constants and variables 
  73  ######################################################################## 
  74   
  75  logger = logging.getLogger("CedarBackup3.log.tools.amazons3") 
  76   
  77  AWS_COMMAND   = [ "aws" ] 
  78   
  79  SHORT_SWITCHES     = "hVbql:o:m:OdsDvw" 
  80  LONG_SWITCHES      = [ 'help', 'version', 'verbose', 'quiet', 
  81                         'logfile=', 'owner=', 'mode=', 
  82                         'output', 'debug', 'stack', 'diagnostics', 
  83                         'verifyOnly', 'ignoreWarnings', ] 
84 85 86 ####################################################################### 87 # Options class 88 ####################################################################### 89 90 @total_ordering 91 -class Options(object):
92 93 ###################### 94 # Class documentation 95 ###################### 96 97 """ 98 Class representing command-line options for the cback3-amazons3-sync script. 99 100 The C{Options} class is a Python object representation of the command-line 101 options of the cback3-amazons3-sync script. 102 103 The object representation is two-way: a command line string or a list of 104 command line arguments can be used to create an C{Options} object, and then 105 changes to the object can be propogated back to a list of command-line 106 arguments or to a command-line string. An C{Options} object can even be 107 created from scratch programmatically (if you have a need for that). 108 109 There are two main levels of validation in the C{Options} class. The first 110 is field-level validation. Field-level validation comes into play when a 111 given field in an object is assigned to or updated. We use Python's 112 C{property} functionality to enforce specific validations on field values, 113 and in some places we even use customized list classes to enforce 114 validations on list members. You should expect to catch a C{ValueError} 115 exception when making assignments to fields if you are programmatically 116 filling an object. 117 118 The second level of validation is post-completion validation. Certain 119 validations don't make sense until an object representation of options is 120 fully "complete". We don't want these validations to apply all of the time, 121 because it would make building up a valid object from scratch a real pain. 122 For instance, we might have to do things in the right order to keep from 123 throwing exceptions, etc. 124 125 All of these post-completion validations are encapsulated in the 126 L{Options.validate} method. This method can be called at any time by a 127 client, and will always be called immediately after creating a C{Options} 128 object from a command line and before exporting a C{Options} object back to 129 a command line. This way, we get acceptable ease-of-use but we also don't 130 accept or emit invalid command lines. 131 132 @note: Lists within this class are "unordered" for equality comparisons. 133 134 @sort: __init__, __repr__, __str__, __cmp__, __eq__, __lt__, __gt__ 135 """ 136 137 ############## 138 # Constructor 139 ############## 140
141 - def __init__(self, argumentList=None, argumentString=None, validate=True):
142 """ 143 Initializes an options object. 144 145 If you initialize the object without passing either C{argumentList} or 146 C{argumentString}, the object will be empty and will be invalid until it 147 is filled in properly. 148 149 No reference to the original arguments is saved off by this class. Once 150 the data has been parsed (successfully or not) this original information 151 is discarded. 152 153 The argument list is assumed to be a list of arguments, not including the 154 name of the command, something like C{sys.argv[1:]}. If you pass 155 C{sys.argv} instead, things are not going to work. 156 157 The argument string will be parsed into an argument list by the 158 L{util.splitCommandLine} function (see the documentation for that 159 function for some important notes about its limitations). There is an 160 assumption that the resulting list will be equivalent to C{sys.argv[1:]}, 161 just like C{argumentList}. 162 163 Unless the C{validate} argument is C{False}, the L{Options.validate} 164 method will be called (with its default arguments) after successfully 165 parsing any passed-in command line. This validation ensures that 166 appropriate actions, etc. have been specified. Keep in mind that even if 167 C{validate} is C{False}, it might not be possible to parse the passed-in 168 command line, so an exception might still be raised. 169 170 @note: The command line format is specified by the L{_usage} function. 171 Call L{_usage} to see a usage statement for the cback3-amazons3-sync script. 172 173 @note: It is strongly suggested that the C{validate} option always be set 174 to C{True} (the default) unless there is a specific need to read in 175 invalid command line arguments. 176 177 @param argumentList: Command line for a program. 178 @type argumentList: List of arguments, i.e. C{sys.argv} 179 180 @param argumentString: Command line for a program. 181 @type argumentString: String, i.e. "cback3-amazons3-sync --verbose stage store" 182 183 @param validate: Validate the command line after parsing it. 184 @type validate: Boolean true/false. 185 186 @raise getopt.GetoptError: If the command-line arguments could not be parsed. 187 @raise ValueError: If the command-line arguments are invalid. 188 """ 189 self._help = False 190 self._version = False 191 self._verbose = False 192 self._quiet = False 193 self._logfile = None 194 self._owner = None 195 self._mode = None 196 self._output = False 197 self._debug = False 198 self._stacktrace = False 199 self._diagnostics = False 200 self._verifyOnly = False 201 self._ignoreWarnings = False 202 self._sourceDir = None 203 self._s3BucketUrl = None 204 if argumentList is not None and argumentString is not None: 205 raise ValueError("Use either argumentList or argumentString, but not both.") 206 if argumentString is not None: 207 argumentList = splitCommandLine(argumentString) 208 if argumentList is not None: 209 self._parseArgumentList(argumentList) 210 if validate: 211 self.validate()
212 213 214 ######################### 215 # String representations 216 ######################### 217
218 - def __repr__(self):
219 """ 220 Official string representation for class instance. 221 """ 222 return self.buildArgumentString(validate=False)
223
224 - def __str__(self):
225 """ 226 Informal string representation for class instance. 227 """ 228 return self.__repr__()
229 230 231 ############################# 232 # Standard comparison method 233 ############################# 234
235 - def __eq__(self, other):
236 """Equals operator, iplemented in terms of original Python 2 compare operator.""" 237 return self.__cmp__(other) == 0
238
239 - def __lt__(self, other):
240 """Less-than operator, iplemented in terms of original Python 2 compare operator.""" 241 return self.__cmp__(other) < 0
242
243 - def __gt__(self, other):
244 """Greater-than operator, iplemented in terms of original Python 2 compare operator.""" 245 return self.__cmp__(other) > 0
246
247 - def __cmp__(self, other):
248 """ 249 Original Python 2 comparison operator. 250 Lists within this class are "unordered" for equality comparisons. 251 @param other: Other object to compare to. 252 @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other. 253 """ 254 if other is None: 255 return 1 256 if self.help != other.help: 257 if self.help < other.help: 258 return -1 259 else: 260 return 1 261 if self.version != other.version: 262 if self.version < other.version: 263 return -1 264 else: 265 return 1 266 if self.verbose != other.verbose: 267 if self.verbose < other.verbose: 268 return -1 269 else: 270 return 1 271 if self.quiet != other.quiet: 272 if self.quiet < other.quiet: 273 return -1 274 else: 275 return 1 276 if self.logfile != other.logfile: 277 if str(self.logfile or "") < str(other.logfile or ""): 278 return -1 279 else: 280 return 1 281 if self.owner != other.owner: 282 if str(self.owner or "") < str(other.owner or ""): 283 return -1 284 else: 285 return 1 286 if self.mode != other.mode: 287 if int(self.mode or 0) < int(other.mode or 0): 288 return -1 289 else: 290 return 1 291 if self.output != other.output: 292 if self.output < other.output: 293 return -1 294 else: 295 return 1 296 if self.debug != other.debug: 297 if self.debug < other.debug: 298 return -1 299 else: 300 return 1 301 if self.stacktrace != other.stacktrace: 302 if self.stacktrace < other.stacktrace: 303 return -1 304 else: 305 return 1 306 if self.diagnostics != other.diagnostics: 307 if self.diagnostics < other.diagnostics: 308 return -1 309 else: 310 return 1 311 if self.verifyOnly != other.verifyOnly: 312 if self.verifyOnly < other.verifyOnly: 313 return -1 314 else: 315 return 1 316 if self.ignoreWarnings != other.ignoreWarnings: 317 if self.ignoreWarnings < other.ignoreWarnings: 318 return -1 319 else: 320 return 1 321 if self.sourceDir != other.sourceDir: 322 if str(self.sourceDir or "") < str(other.sourceDir or ""): 323 return -1 324 else: 325 return 1 326 if self.s3BucketUrl != other.s3BucketUrl: 327 if str(self.s3BucketUrl or "") < str(other.s3BucketUrl or ""): 328 return -1 329 else: 330 return 1 331 return 0
332 333 334 ############# 335 # Properties 336 ############# 337
338 - def _setHelp(self, value):
339 """ 340 Property target used to set the help flag. 341 No validations, but we normalize the value to C{True} or C{False}. 342 """ 343 if value: 344 self._help = True 345 else: 346 self._help = False
347
348 - def _getHelp(self):
349 """ 350 Property target used to get the help flag. 351 """ 352 return self._help
353
354 - def _setVersion(self, value):
355 """ 356 Property target used to set the version flag. 357 No validations, but we normalize the value to C{True} or C{False}. 358 """ 359 if value: 360 self._version = True 361 else: 362 self._version = False
363
364 - def _getVersion(self):
365 """ 366 Property target used to get the version flag. 367 """ 368 return self._version
369
370 - def _setVerbose(self, value):
371 """ 372 Property target used to set the verbose flag. 373 No validations, but we normalize the value to C{True} or C{False}. 374 """ 375 if value: 376 self._verbose = True 377 else: 378 self._verbose = False
379
380 - def _getVerbose(self):
381 """ 382 Property target used to get the verbose flag. 383 """ 384 return self._verbose
385
386 - def _setQuiet(self, value):
387 """ 388 Property target used to set the quiet flag. 389 No validations, but we normalize the value to C{True} or C{False}. 390 """ 391 if value: 392 self._quiet = True 393 else: 394 self._quiet = False
395
396 - def _getQuiet(self):
397 """ 398 Property target used to get the quiet flag. 399 """ 400 return self._quiet
401
402 - def _setLogfile(self, value):
403 """ 404 Property target used to set the logfile parameter. 405 @raise ValueError: If the value cannot be encoded properly. 406 """ 407 if value is not None: 408 if len(value) < 1: 409 raise ValueError("The logfile parameter must be a non-empty string.") 410 self._logfile = encodePath(value)
411
412 - def _getLogfile(self):
413 """ 414 Property target used to get the logfile parameter. 415 """ 416 return self._logfile
417
418 - def _setOwner(self, value):
419 """ 420 Property target used to set the owner parameter. 421 If not C{None}, the owner must be a C{(user,group)} tuple or list. 422 Strings (and inherited children of strings) are explicitly disallowed. 423 The value will be normalized to a tuple. 424 @raise ValueError: If the value is not valid. 425 """ 426 if value is None: 427 self._owner = None 428 else: 429 if isinstance(value, str): 430 raise ValueError("Must specify user and group tuple for owner parameter.") 431 if len(value) != 2: 432 raise ValueError("Must specify user and group tuple for owner parameter.") 433 if len(value[0]) < 1 or len(value[1]) < 1: 434 raise ValueError("User and group tuple values must be non-empty strings.") 435 self._owner = (value[0], value[1])
436
437 - def _getOwner(self):
438 """ 439 Property target used to get the owner parameter. 440 The parameter is a tuple of C{(user, group)}. 441 """ 442 return self._owner
443
444 - def _setMode(self, value):
445 """ 446 Property target used to set the mode parameter. 447 """ 448 if value is None: 449 self._mode = None 450 else: 451 try: 452 if isinstance(value, str): 453 value = int(value, 8) 454 else: 455 value = int(value) 456 except TypeError: 457 raise ValueError("Mode must be an octal integer >= 0, i.e. 644.") 458 if value < 0: 459 raise ValueError("Mode must be an octal integer >= 0. i.e. 644.") 460 self._mode = value
461
462 - def _getMode(self):
463 """ 464 Property target used to get the mode parameter. 465 """ 466 return self._mode
467
468 - def _setOutput(self, value):
469 """ 470 Property target used to set the output flag. 471 No validations, but we normalize the value to C{True} or C{False}. 472 """ 473 if value: 474 self._output = True 475 else: 476 self._output = False
477
478 - def _getOutput(self):
479 """ 480 Property target used to get the output flag. 481 """ 482 return self._output
483
484 - def _setDebug(self, value):
485 """ 486 Property target used to set the debug flag. 487 No validations, but we normalize the value to C{True} or C{False}. 488 """ 489 if value: 490 self._debug = True 491 else: 492 self._debug = False
493
494 - def _getDebug(self):
495 """ 496 Property target used to get the debug flag. 497 """ 498 return self._debug
499
500 - def _setStacktrace(self, value):
501 """ 502 Property target used to set the stacktrace flag. 503 No validations, but we normalize the value to C{True} or C{False}. 504 """ 505 if value: 506 self._stacktrace = True 507 else: 508 self._stacktrace = False
509
510 - def _getStacktrace(self):
511 """ 512 Property target used to get the stacktrace flag. 513 """ 514 return self._stacktrace
515
516 - def _setDiagnostics(self, value):
517 """ 518 Property target used to set the diagnostics flag. 519 No validations, but we normalize the value to C{True} or C{False}. 520 """ 521 if value: 522 self._diagnostics = True 523 else: 524 self._diagnostics = False
525
526 - def _getDiagnostics(self):
527 """ 528 Property target used to get the diagnostics flag. 529 """ 530 return self._diagnostics
531
532 - def _setVerifyOnly(self, value):
533 """ 534 Property target used to set the verifyOnly flag. 535 No validations, but we normalize the value to C{True} or C{False}. 536 """ 537 if value: 538 self._verifyOnly = True 539 else: 540 self._verifyOnly = False
541
542 - def _getVerifyOnly(self):
543 """ 544 Property target used to get the verifyOnly flag. 545 """ 546 return self._verifyOnly
547
548 - def _setIgnoreWarnings(self, value):
549 """ 550 Property target used to set the ignoreWarnings flag. 551 No validations, but we normalize the value to C{True} or C{False}. 552 """ 553 if value: 554 self._ignoreWarnings = True 555 else: 556 self._ignoreWarnings = False
557
558 - def _getIgnoreWarnings(self):
559 """ 560 Property target used to get the ignoreWarnings flag. 561 """ 562 return self._ignoreWarnings
563
564 - def _setSourceDir(self, value):
565 """ 566 Property target used to set the sourceDir parameter. 567 """ 568 if value is not None: 569 if len(value) < 1: 570 raise ValueError("The sourceDir parameter must be a non-empty string.") 571 self._sourceDir = value
572
573 - def _getSourceDir(self):
574 """ 575 Property target used to get the sourceDir parameter. 576 """ 577 return self._sourceDir
578
579 - def _setS3BucketUrl(self, value):
580 """ 581 Property target used to set the s3BucketUrl parameter. 582 """ 583 if value is not None: 584 if len(value) < 1: 585 raise ValueError("The s3BucketUrl parameter must be a non-empty string.") 586 self._s3BucketUrl = value
587
588 - def _getS3BucketUrl(self):
589 """ 590 Property target used to get the s3BucketUrl parameter. 591 """ 592 return self._s3BucketUrl
593 594 help = property(_getHelp, _setHelp, None, "Command-line help (C{-h,--help}) flag.") 595 version = property(_getVersion, _setVersion, None, "Command-line version (C{-V,--version}) flag.") 596 verbose = property(_getVerbose, _setVerbose, None, "Command-line verbose (C{-b,--verbose}) flag.") 597 quiet = property(_getQuiet, _setQuiet, None, "Command-line quiet (C{-q,--quiet}) flag.") 598 logfile = property(_getLogfile, _setLogfile, None, "Command-line logfile (C{-l,--logfile}) parameter.") 599 owner = property(_getOwner, _setOwner, None, "Command-line owner (C{-o,--owner}) parameter, as tuple C{(user,group)}.") 600 mode = property(_getMode, _setMode, None, "Command-line mode (C{-m,--mode}) parameter.") 601 output = property(_getOutput, _setOutput, None, "Command-line output (C{-O,--output}) flag.") 602 debug = property(_getDebug, _setDebug, None, "Command-line debug (C{-d,--debug}) flag.") 603 stacktrace = property(_getStacktrace, _setStacktrace, None, "Command-line stacktrace (C{-s,--stack}) flag.") 604 diagnostics = property(_getDiagnostics, _setDiagnostics, None, "Command-line diagnostics (C{-D,--diagnostics}) flag.") 605 verifyOnly = property(_getVerifyOnly, _setVerifyOnly, None, "Command-line verifyOnly (C{-v,--verifyOnly}) flag.") 606 ignoreWarnings = property(_getIgnoreWarnings, _setIgnoreWarnings, None, "Command-line ignoreWarnings (C{-w,--ignoreWarnings}) flag.") 607 sourceDir = property(_getSourceDir, _setSourceDir, None, "Command-line sourceDir, source of sync.") 608 s3BucketUrl = property(_getS3BucketUrl, _setS3BucketUrl, None, "Command-line s3BucketUrl, target of sync.") 609 610 611 ################## 612 # Utility methods 613 ################## 614
615 - def validate(self):
616 """ 617 Validates command-line options represented by the object. 618 619 Unless C{--help} or C{--version} are supplied, at least one action must 620 be specified. Other validations (as for allowed values for particular 621 options) will be taken care of at assignment time by the properties 622 functionality. 623 624 @note: The command line format is specified by the L{_usage} function. 625 Call L{_usage} to see a usage statement for the cback3-amazons3-sync script. 626 627 @raise ValueError: If one of the validations fails. 628 """ 629 if not self.help and not self.version and not self.diagnostics: 630 if self.sourceDir is None or self.s3BucketUrl is None: 631 raise ValueError("Source directory and S3 bucket URL are both required.")
632
633 - def buildArgumentList(self, validate=True):
634 """ 635 Extracts options into a list of command line arguments. 636 637 The original order of the various arguments (if, indeed, the object was 638 initialized with a command-line) is not preserved in this generated 639 argument list. Besides that, the argument list is normalized to use the 640 long option names (i.e. --version rather than -V). The resulting list 641 will be suitable for passing back to the constructor in the 642 C{argumentList} parameter. Unlike L{buildArgumentString}, string 643 arguments are not quoted here, because there is no need for it. 644 645 Unless the C{validate} parameter is C{False}, the L{Options.validate} 646 method will be called (with its default arguments) against the 647 options before extracting the command line. If the options are not valid, 648 then an argument list will not be extracted. 649 650 @note: It is strongly suggested that the C{validate} option always be set 651 to C{True} (the default) unless there is a specific need to extract an 652 invalid command line. 653 654 @param validate: Validate the options before extracting the command line. 655 @type validate: Boolean true/false. 656 657 @return: List representation of command-line arguments. 658 @raise ValueError: If options within the object are invalid. 659 """ 660 if validate: 661 self.validate() 662 argumentList = [] 663 if self._help: 664 argumentList.append("--help") 665 if self.version: 666 argumentList.append("--version") 667 if self.verbose: 668 argumentList.append("--verbose") 669 if self.quiet: 670 argumentList.append("--quiet") 671 if self.logfile is not None: 672 argumentList.append("--logfile") 673 argumentList.append(self.logfile) 674 if self.owner is not None: 675 argumentList.append("--owner") 676 argumentList.append("%s:%s" % (self.owner[0], self.owner[1])) 677 if self.mode is not None: 678 argumentList.append("--mode") 679 argumentList.append("%o" % self.mode) 680 if self.output: 681 argumentList.append("--output") 682 if self.debug: 683 argumentList.append("--debug") 684 if self.stacktrace: 685 argumentList.append("--stack") 686 if self.diagnostics: 687 argumentList.append("--diagnostics") 688 if self.verifyOnly: 689 argumentList.append("--verifyOnly") 690 if self.ignoreWarnings: 691 argumentList.append("--ignoreWarnings") 692 if self.sourceDir is not None: 693 argumentList.append(self.sourceDir) 694 if self.s3BucketUrl is not None: 695 argumentList.append(self.s3BucketUrl) 696 return argumentList
697
698 - def buildArgumentString(self, validate=True):
699 """ 700 Extracts options into a string of command-line arguments. 701 702 The original order of the various arguments (if, indeed, the object was 703 initialized with a command-line) is not preserved in this generated 704 argument string. Besides that, the argument string is normalized to use 705 the long option names (i.e. --version rather than -V) and to quote all 706 string arguments with double quotes (C{"}). The resulting string will be 707 suitable for passing back to the constructor in the C{argumentString} 708 parameter. 709 710 Unless the C{validate} parameter is C{False}, the L{Options.validate} 711 method will be called (with its default arguments) against the options 712 before extracting the command line. If the options are not valid, then 713 an argument string will not be extracted. 714 715 @note: It is strongly suggested that the C{validate} option always be set 716 to C{True} (the default) unless there is a specific need to extract an 717 invalid command line. 718 719 @param validate: Validate the options before extracting the command line. 720 @type validate: Boolean true/false. 721 722 @return: String representation of command-line arguments. 723 @raise ValueError: If options within the object are invalid. 724 """ 725 if validate: 726 self.validate() 727 argumentString = "" 728 if self._help: 729 argumentString += "--help " 730 if self.version: 731 argumentString += "--version " 732 if self.verbose: 733 argumentString += "--verbose " 734 if self.quiet: 735 argumentString += "--quiet " 736 if self.logfile is not None: 737 argumentString += "--logfile \"%s\" " % self.logfile 738 if self.owner is not None: 739 argumentString += "--owner \"%s:%s\" " % (self.owner[0], self.owner[1]) 740 if self.mode is not None: 741 argumentString += "--mode %o " % self.mode 742 if self.output: 743 argumentString += "--output " 744 if self.debug: 745 argumentString += "--debug " 746 if self.stacktrace: 747 argumentString += "--stack " 748 if self.diagnostics: 749 argumentString += "--diagnostics " 750 if self.verifyOnly: 751 argumentString += "--verifyOnly " 752 if self.ignoreWarnings: 753 argumentString += "--ignoreWarnings " 754 if self.sourceDir is not None: 755 argumentString += "\"%s\" " % self.sourceDir 756 if self.s3BucketUrl is not None: 757 argumentString += "\"%s\" " % self.s3BucketUrl 758 return argumentString
759
760 - def _parseArgumentList(self, argumentList):
761 """ 762 Internal method to parse a list of command-line arguments. 763 764 Most of the validation we do here has to do with whether the arguments 765 can be parsed and whether any values which exist are valid. We don't do 766 any validation as to whether required elements exist or whether elements 767 exist in the proper combination (instead, that's the job of the 768 L{validate} method). 769 770 For any of the options which supply parameters, if the option is 771 duplicated with long and short switches (i.e. C{-l} and a C{--logfile}) 772 then the long switch is used. If the same option is duplicated with the 773 same switch (long or short), then the last entry on the command line is 774 used. 775 776 @param argumentList: List of arguments to a command. 777 @type argumentList: List of arguments to a command, i.e. C{sys.argv[1:]} 778 779 @raise ValueError: If the argument list cannot be successfully parsed. 780 """ 781 switches = { } 782 opts, remaining = getopt.getopt(argumentList, SHORT_SWITCHES, LONG_SWITCHES) 783 for o, a in opts: # push the switches into a hash 784 switches[o] = a 785 if "-h" in switches or "--help" in switches: 786 self.help = True 787 if "-V" in switches or "--version" in switches: 788 self.version = True 789 if "-b" in switches or "--verbose" in switches: 790 self.verbose = True 791 if "-q" in switches or "--quiet" in switches: 792 self.quiet = True 793 if "-l" in switches: 794 self.logfile = switches["-l"] 795 if "--logfile" in switches: 796 self.logfile = switches["--logfile"] 797 if "-o" in switches: 798 self.owner = switches["-o"].split(":", 1) 799 if "--owner" in switches: 800 self.owner = switches["--owner"].split(":", 1) 801 if "-m" in switches: 802 self.mode = switches["-m"] 803 if "--mode" in switches: 804 self.mode = switches["--mode"] 805 if "-O" in switches or "--output" in switches: 806 self.output = True 807 if "-d" in switches or "--debug" in switches: 808 self.debug = True 809 if "-s" in switches or "--stack" in switches: 810 self.stacktrace = True 811 if "-D" in switches or "--diagnostics" in switches: 812 self.diagnostics = True 813 if "-v" in switches or "--verifyOnly" in switches: 814 self.verifyOnly = True 815 if "-w" in switches or "--ignoreWarnings" in switches: 816 self.ignoreWarnings = True 817 try: 818 (self.sourceDir, self.s3BucketUrl) = remaining 819 except ValueError: 820 pass
821
822 823 ####################################################################### 824 # Public functions 825 ####################################################################### 826 827 ################# 828 # cli() function 829 ################# 830 831 -def cli():
832 """ 833 Implements the command-line interface for the C{cback3-amazons3-sync} script. 834 835 Essentially, this is the "main routine" for the cback3-amazons3-sync script. It does 836 all of the argument processing for the script, and then also implements the 837 tool functionality. 838 839 This function looks pretty similiar to C{CedarBackup3.cli.cli()}. It's not 840 easy to refactor this code to make it reusable and also readable, so I've 841 decided to just live with the duplication. 842 843 A different error code is returned for each type of failure: 844 845 - C{1}: The Python interpreter version is < 3.4 846 - C{2}: Error processing command-line arguments 847 - C{3}: Error configuring logging 848 - C{5}: Backup was interrupted with a CTRL-C or similar 849 - C{6}: Error executing other parts of the script 850 851 @note: This script uses print rather than logging to the INFO level, because 852 it is interactive. Underlying Cedar Backup functionality uses the logging 853 mechanism exclusively. 854 855 @return: Error code as described above. 856 """ 857 try: 858 if list(map(int, [sys.version_info[0], sys.version_info[1]])) < [3, 4]: 859 sys.stderr.write("Python 3 version 3.4 or greater required.\n") 860 return 1 861 except: 862 # sys.version_info isn't available before 2.0 863 sys.stderr.write("Python 3 version 3.4 or greater required.\n") 864 return 1 865 866 try: 867 options = Options(argumentList=sys.argv[1:]) 868 except Exception as e: 869 _usage() 870 sys.stderr.write(" *** Error: %s\n" % e) 871 return 2 872 873 if options.help: 874 _usage() 875 return 0 876 if options.version: 877 _version() 878 return 0 879 if options.diagnostics: 880 _diagnostics() 881 return 0 882 883 if options.stacktrace: 884 logfile = setupLogging(options) 885 else: 886 try: 887 logfile = setupLogging(options) 888 except Exception as e: 889 sys.stderr.write("Error setting up logging: %s\n" % e) 890 return 3 891 892 logger.info("Cedar Backup Amazon S3 sync run started.") 893 logger.info("Options were [%s]", options) 894 logger.info("Logfile is [%s]", logfile) 895 Diagnostics().logDiagnostics(method=logger.info) 896 897 if options.stacktrace: 898 _executeAction(options) 899 else: 900 try: 901 _executeAction(options) 902 except KeyboardInterrupt: 903 logger.error("Backup interrupted.") 904 logger.info("Cedar Backup Amazon S3 sync run completed with status 5.") 905 return 5 906 except Exception as e: 907 logger.error("Error executing backup: %s", e) 908 logger.info("Cedar Backup Amazon S3 sync run completed with status 6.") 909 return 6 910 911 logger.info("Cedar Backup Amazon S3 sync run completed with status 0.") 912 return 0
913
914 915 ####################################################################### 916 # Utility functions 917 ####################################################################### 918 919 #################### 920 # _usage() function 921 #################### 922 923 -def _usage(fd=sys.stderr):
924 """ 925 Prints usage information for the cback3-amazons3-sync script. 926 @param fd: File descriptor used to print information. 927 @note: The C{fd} is used rather than C{print} to facilitate unit testing. 928 """ 929 fd.write("\n") 930 fd.write(" Usage: cback3-amazons3-sync [switches] sourceDir s3bucketUrl\n") 931 fd.write("\n") 932 fd.write(" Cedar Backup Amazon S3 sync tool.\n") 933 fd.write("\n") 934 fd.write(" This Cedar Backup utility synchronizes a local directory to an Amazon S3\n") 935 fd.write(" bucket. After the sync is complete, a validation step is taken. An\n") 936 fd.write(" error is reported if the contents of the bucket do not match the\n") 937 fd.write(" source directory, or if the indicated size for any file differs.\n") 938 fd.write(" This tool is a wrapper over the AWS CLI command-line tool.\n") 939 fd.write("\n") 940 fd.write(" The following arguments are required:\n") 941 fd.write("\n") 942 fd.write(" sourceDir The local source directory on disk (must exist)\n") 943 fd.write(" s3BucketUrl The URL to the target Amazon S3 bucket\n") 944 fd.write("\n") 945 fd.write(" The following switches are accepted:\n") 946 fd.write("\n") 947 fd.write(" -h, --help Display this usage/help listing\n") 948 fd.write(" -V, --version Display version information\n") 949 fd.write(" -b, --verbose Print verbose output as well as logging to disk\n") 950 fd.write(" -q, --quiet Run quietly (display no output to the screen)\n") 951 fd.write(" -l, --logfile Path to logfile (default: %s)\n" % DEFAULT_LOGFILE) 952 fd.write(" -o, --owner Logfile ownership, user:group (default: %s:%s)\n" % (DEFAULT_OWNERSHIP[0], DEFAULT_OWNERSHIP[1])) 953 fd.write(" -m, --mode Octal logfile permissions mode (default: %o)\n" % DEFAULT_MODE) 954 fd.write(" -O, --output Record some sub-command (i.e. aws) output to the log\n") 955 fd.write(" -d, --debug Write debugging information to the log (implies --output)\n") 956 fd.write(" -s, --stack Dump Python stack trace instead of swallowing exceptions\n") # exactly 80 characters in width! 957 fd.write(" -D, --diagnostics Print runtime diagnostics to the screen and exit\n") 958 fd.write(" -v, --verifyOnly Only verify the S3 bucket contents, do not make changes\n") 959 fd.write(" -w, --ignoreWarnings Ignore warnings about problematic filename encodings\n") 960 fd.write("\n") 961 fd.write(" Typical usage would be something like:\n") 962 fd.write("\n") 963 fd.write(" cback3-amazons3-sync /home/myuser s3://example.com-backup/myuser\n") 964 fd.write("\n") 965 fd.write(" This will sync the contents of /home/myuser into the indicated bucket.\n") 966 fd.write("\n")
967
968 969 ###################### 970 # _version() function 971 ###################### 972 973 -def _version(fd=sys.stdout):
974 """ 975 Prints version information for the cback3-amazons3-sync script. 976 @param fd: File descriptor used to print information. 977 @note: The C{fd} is used rather than C{print} to facilitate unit testing. 978 """ 979 fd.write("\n") 980 fd.write(" Cedar Backup Amazon S3 sync tool.\n") 981 fd.write(" Included with Cedar Backup version %s, released %s.\n" % (VERSION, DATE)) 982 fd.write("\n") 983 fd.write(" Copyright (c) %s %s <%s>.\n" % (COPYRIGHT, AUTHOR, EMAIL)) 984 fd.write(" See CREDITS for a list of included code and other contributors.\n") 985 fd.write(" This is free software; there is NO warranty. See the\n") 986 fd.write(" GNU General Public License version 2 for copying conditions.\n") 987 fd.write("\n") 988 fd.write(" Use the --help option for usage information.\n") 989 fd.write("\n")
990
991 992 ########################## 993 # _diagnostics() function 994 ########################## 995 996 -def _diagnostics(fd=sys.stdout):
997 """ 998 Prints runtime diagnostics information. 999 @param fd: File descriptor used to print information. 1000 @note: The C{fd} is used rather than C{print} to facilitate unit testing. 1001 """ 1002 fd.write("\n") 1003 fd.write("Diagnostics:\n") 1004 fd.write("\n") 1005 Diagnostics().printDiagnostics(fd=fd, prefix=" ") 1006 fd.write("\n")
1007
1008 1009 ############################ 1010 # _executeAction() function 1011 ############################ 1012 1013 -def _executeAction(options):
1014 """ 1015 Implements the guts of the cback3-amazons3-sync tool. 1016 1017 @param options: Program command-line options. 1018 @type options: Options object. 1019 1020 @raise Exception: Under many generic error conditions 1021 """ 1022 sourceFiles = _buildSourceFiles(options.sourceDir) 1023 if not options.ignoreWarnings: 1024 _checkSourceFiles(options.sourceDir, sourceFiles) 1025 if not options.verifyOnly: 1026 _synchronizeBucket(options.sourceDir, options.s3BucketUrl) 1027 _verifyBucketContents(options.sourceDir, sourceFiles, options.s3BucketUrl)
1028
1029 1030 ################################ 1031 # _buildSourceFiles() function 1032 ################################ 1033 1034 -def _buildSourceFiles(sourceDir):
1035 """ 1036 Build a list of files in a source directory 1037 @param sourceDir: Local source directory 1038 @return: FilesystemList with contents of source directory 1039 """ 1040 if not os.path.isdir(sourceDir): 1041 raise ValueError("Source directory does not exist on disk.") 1042 sourceFiles = FilesystemList() 1043 sourceFiles.addDirContents(sourceDir) 1044 return sourceFiles
1045
1046 1047 ############################### 1048 # _checkSourceFiles() function 1049 ############################### 1050 1051 -def _checkSourceFiles(sourceDir, sourceFiles):
1052 """ 1053 Check source files, trying to guess which ones will have encoding problems. 1054 @param sourceDir: Local source directory 1055 @param sourceDir: Local source directory 1056 @raises ValueError: If a problem file is found 1057 @see U{http://opensourcehacker.com/2011/09/16/fix-linux-filename-encodings-with-python/} 1058 @see U{http://serverfault.com/questions/82821/how-to-tell-the-language-encoding-of-a-filename-on-linux} 1059 @see U{http://randysofia.com/2014/06/06/aws-cli-and-your-locale/} 1060 """ 1061 with warnings.catch_warnings(): 1062 warnings.simplefilter("ignore") # So we don't print unicode warnings from comparisons 1063 1064 encoding = Diagnostics().encoding 1065 1066 failed = False 1067 for entry in sourceFiles: 1068 result = chardet.detect(entry) 1069 source = entry.decode(result["encoding"]) 1070 try: 1071 target = source.encode(encoding) 1072 if source != target: 1073 logger.error("Inconsistent encoding for [%s]: got %s, but need %s", entry, result["encoding"], encoding) 1074 failed = True 1075 except UnicodeEncodeError: 1076 logger.error("Inconsistent encoding for [%s]: got %s, but need %s", entry, result["encoding"], encoding) 1077 failed = True 1078 1079 if not failed: 1080 logger.info("Completed checking source filename encoding (no problems found).") 1081 else: 1082 logger.error("Some filenames have inconsistent encodings and will likely cause sync problems.") 1083 logger.error("You may be able to fix this by setting a more sensible locale in your environment.") 1084 logger.error("Aternately, you can rename the problem files to be valid in the indicated locale.") 1085 logger.error("To ignore this warning and proceed anyway, use --ignoreWarnings") 1086 raise ValueError("Some filenames have inconsistent encodings and will likely cause sync problems.")
1087
1088 1089 ################################ 1090 # _synchronizeBucket() function 1091 ################################ 1092 1093 -def _synchronizeBucket(sourceDir, s3BucketUrl):
1094 """ 1095 Synchronize a local directory to an Amazon S3 bucket. 1096 @param sourceDir: Local source directory 1097 @param s3BucketUrl: Target S3 bucket URL 1098 """ 1099 logger.info("Synchronizing local source directory up to Amazon S3.") 1100 args = [ "s3", "sync", sourceDir, s3BucketUrl, "--delete", "--recursive", ] 1101 result = executeCommand(AWS_COMMAND, args, returnOutput=False)[0] 1102 if result != 0: 1103 raise IOError("Error [%d] calling AWS CLI synchronize bucket." % result)
1104
1105 1106 ################################### 1107 # _verifyBucketContents() function 1108 ################################### 1109 1110 -def _verifyBucketContents(sourceDir, sourceFiles, s3BucketUrl):
1111 """ 1112 Verify that a source directory is equivalent to an Amazon S3 bucket. 1113 @param sourceDir: Local source directory 1114 @param sourceFiles: Filesystem list containing contents of source directory 1115 @param s3BucketUrl: Target S3 bucket URL 1116 """ 1117 # As of this writing, the documentation for the S3 API that we're using 1118 # below says that up to 1000 elements at a time are returned, and that we 1119 # have to manually handle pagination by looking for the IsTruncated element. 1120 # However, in practice, this is not true. I have been testing with 1121 # "aws-cli/1.4.4 Python/2.7.3 Linux/3.2.0-4-686-pae", installed through PIP. 1122 # No matter how many items exist in my bucket and prefix, I get back a 1123 # single JSON result. I've tested with buckets containing nearly 6000 1124 # elements. 1125 # 1126 # If I turn on debugging, it's clear that underneath, something in the API 1127 # is executing multiple list-object requests against AWS, and stiching 1128 # results together to give me back the final JSON result. The debug output 1129 # clearly incldues multiple requests, and each XML response (except for the 1130 # final one) contains <IsTruncated>true</IsTruncated>. 1131 # 1132 # This feature is not mentioned in the offical changelog for any of the 1133 # releases going back to 1.0.0. It appears to happen in the botocore 1134 # library, but I'll admit I can't actually find the code that implements it. 1135 # For now, all I can do is rely on this behavior and hope that the 1136 # documentation is out-of-date. I'm not going to write code that tries to 1137 # parse out IsTruncated if I can't actually test that code. 1138 1139 (bucket, prefix) = s3BucketUrl.replace("s3://", "").split("/", 1) 1140 1141 query = "Contents[].{Key: Key, Size: Size}" 1142 args = [ "s3api", "list-objects", "--bucket", bucket, "--prefix", prefix, "--query", query, ] 1143 (result, data) = executeCommand(AWS_COMMAND, args, returnOutput=True) 1144 if result != 0: 1145 raise IOError("Error [%d] calling AWS CLI verify bucket contents." % result) 1146 1147 contents = { } 1148 for entry in json.loads("".join(data)): 1149 key = entry["Key"].replace(prefix, "") 1150 size = int(entry["Size"]) 1151 contents[key] = size 1152 1153 failed = False 1154 for entry in sourceFiles: 1155 if os.path.isfile(entry): 1156 key = entry.replace(sourceDir, "") 1157 size = int(os.stat(entry).st_size) 1158 if not key in contents: 1159 logger.error("File was apparently not uploaded: [%s]", entry) 1160 failed = True 1161 else: 1162 if size != contents[key]: 1163 logger.error("File size differs [%s]: expected %s bytes but got %s bytes", entry, size, contents[key]) 1164 failed = True 1165 1166 if not failed: 1167 logger.info("Completed verifying Amazon S3 bucket contents (no problems found).") 1168 else: 1169 logger.error("There were differences between source directory and target S3 bucket.") 1170 raise ValueError("There were differences between source directory and target S3 bucket.")
1171 1172 1173 ######################################################################### 1174 # Main routine 1175 ######################################################################## 1176 1177 if __name__ == "__main__": 1178 sys.exit(cli()) 1179