Package CedarBackup3 :: Package tools :: Module amazons3
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup3.tools.amazons3

   1  # -*- coding: iso-8859-1 -*- 
   2  # vim: set ft=python ts=3 sw=3 expandtab: 
   3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
   4  # 
   5  #              C E D A R 
   6  #          S O L U T I O N S       "Software done right." 
   7  #           S O F T W A R E 
   8  # 
   9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  10  # 
  11  # Copyright (c) 2014,2015 Kenneth J. Pronovici. 
  12  # All rights reserved. 
  13  # 
  14  # This program is free software; you can redistribute it and/or 
  15  # modify it under the terms of the GNU General Public License, 
  16  # Version 2, as published by the Free Software Foundation. 
  17  # 
  18  # This program is distributed in the hope that it will be useful, 
  19  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  20  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
  21  # 
  22  # Copies of the GNU General Public License are available from 
  23  # the Free Software Foundation website, http://www.gnu.org/. 
  24  # 
  25  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  26  # 
  27  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
  28  # Language : Python 3 (>= 3.4) 
  29  # Project  : Cedar Backup, release 3 
  30  # Purpose  : Cedar Backup tool to synchronize an Amazon S3 bucket. 
  31  # 
  32  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  33   
  34  ######################################################################## 
  35  # Notes 
  36  ######################################################################## 
  37   
  38  """ 
  39  Synchonizes a local directory with an Amazon S3 bucket. 
  40   
  41  No configuration is required; all necessary information is taken from the 
  42  command-line.  The only thing configuration would help with is the path 
  43  resolver interface, and it doesn't seem worth it to require configuration just 
  44  to get that. 
  45   
  46  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
  47  """ 
  48   
  49  ######################################################################## 
  50  # Imported modules and constants 
  51  ######################################################################## 
  52   
  53  # System modules 
  54  import sys 
  55  import os 
  56  import logging 
  57  import getopt 
  58  import json 
  59  import chardet 
  60  import warnings 
  61  from functools import total_ordering 
  62   
  63  # Cedar Backup modules 
  64  from CedarBackup3.release import AUTHOR, EMAIL, VERSION, DATE, COPYRIGHT 
  65  from CedarBackup3.filesystem import FilesystemList 
  66  from CedarBackup3.cli import setupLogging, DEFAULT_LOGFILE, DEFAULT_OWNERSHIP, DEFAULT_MODE 
  67  from CedarBackup3.util import Diagnostics, splitCommandLine, encodePath 
  68  from CedarBackup3.util import executeCommand 
  69   
  70   
  71  ######################################################################## 
  72  # Module-wide constants and variables 
  73  ######################################################################## 
  74   
  75  logger = logging.getLogger("CedarBackup3.log.tools.amazons3") 
  76   
  77  AWS_COMMAND   = [ "aws" ] 
  78   
  79  SHORT_SWITCHES     = "hVbql:o:m:OdsDvw" 
  80  LONG_SWITCHES      = [ 'help', 'version', 'verbose', 'quiet', 
  81                         'logfile=', 'owner=', 'mode=', 
  82                         'output', 'debug', 'stack', 'diagnostics', 
  83                         'verifyOnly', 'ignoreWarnings', ] 
84 85 86 ####################################################################### 87 # Options class 88 ####################################################################### 89 90 @total_ordering 91 -class Options(object):
92 93 ###################### 94 # Class documentation 95 ###################### 96 97 """ 98 Class representing command-line options for the cback3-amazons3-sync script. 99 100 The C{Options} class is a Python object representation of the command-line 101 options of the cback3-amazons3-sync script. 102 103 The object representation is two-way: a command line string or a list of 104 command line arguments can be used to create an C{Options} object, and then 105 changes to the object can be propogated back to a list of command-line 106 arguments or to a command-line string. An C{Options} object can even be 107 created from scratch programmatically (if you have a need for that). 108 109 There are two main levels of validation in the C{Options} class. The first 110 is field-level validation. Field-level validation comes into play when a 111 given field in an object is assigned to or updated. We use Python's 112 C{property} functionality to enforce specific validations on field values, 113 and in some places we even use customized list classes to enforce 114 validations on list members. You should expect to catch a C{ValueError} 115 exception when making assignments to fields if you are programmatically 116 filling an object. 117 118 The second level of validation is post-completion validation. Certain 119 validations don't make sense until an object representation of options is 120 fully "complete". We don't want these validations to apply all of the time, 121 because it would make building up a valid object from scratch a real pain. 122 For instance, we might have to do things in the right order to keep from 123 throwing exceptions, etc. 124 125 All of these post-completion validations are encapsulated in the 126 L{Options.validate} method. This method can be called at any time by a 127 client, and will always be called immediately after creating a C{Options} 128 object from a command line and before exporting a C{Options} object back to 129 a command line. This way, we get acceptable ease-of-use but we also don't 130 accept or emit invalid command lines. 131 132 @note: Lists within this class are "unordered" for equality comparisons. 133 134 @sort: __init__, __repr__, __str__, __cmp__, __eq__, __lt__, __gt__ 135 """ 136 137 ############## 138 # Constructor 139 ############## 140
141 - def __init__(self, argumentList=None, argumentString=None, validate=True):
142 """ 143 Initializes an options object. 144 145 If you initialize the object without passing either C{argumentList} or 146 C{argumentString}, the object will be empty and will be invalid until it 147 is filled in properly. 148 149 No reference to the original arguments is saved off by this class. Once 150 the data has been parsed (successfully or not) this original information 151 is discarded. 152 153 The argument list is assumed to be a list of arguments, not including the 154 name of the command, something like C{sys.argv[1:]}. If you pass 155 C{sys.argv} instead, things are not going to work. 156 157 The argument string will be parsed into an argument list by the 158 L{util.splitCommandLine} function (see the documentation for that 159 function for some important notes about its limitations). There is an 160 assumption that the resulting list will be equivalent to C{sys.argv[1:]}, 161 just like C{argumentList}. 162 163 Unless the C{validate} argument is C{False}, the L{Options.validate} 164 method will be called (with its default arguments) after successfully 165 parsing any passed-in command line. This validation ensures that 166 appropriate actions, etc. have been specified. Keep in mind that even if 167 C{validate} is C{False}, it might not be possible to parse the passed-in 168 command line, so an exception might still be raised. 169 170 @note: The command line format is specified by the L{_usage} function. 171 Call L{_usage} to see a usage statement for the cback3-amazons3-sync script. 172 173 @note: It is strongly suggested that the C{validate} option always be set 174 to C{True} (the default) unless there is a specific need to read in 175 invalid command line arguments. 176 177 @param argumentList: Command line for a program. 178 @type argumentList: List of arguments, i.e. C{sys.argv} 179 180 @param argumentString: Command line for a program. 181 @type argumentString: String, i.e. "cback3-amazons3-sync --verbose stage store" 182 183 @param validate: Validate the command line after parsing it. 184 @type validate: Boolean true/false. 185 186 @raise getopt.GetoptError: If the command-line arguments could not be parsed. 187 @raise ValueError: If the command-line arguments are invalid. 188 """ 189 self._help = False 190 self._version = False 191 self._verbose = False 192 self._quiet = False 193 self._logfile = None 194 self._owner = None 195 self._mode = None 196 self._output = False 197 self._debug = False 198 self._stacktrace = False 199 self._diagnostics = False 200 self._verifyOnly = False 201 self._ignoreWarnings = False 202 self._sourceDir = None 203 self._s3BucketUrl = None 204 if argumentList is not None and argumentString is not None: 205 raise ValueError("Use either argumentList or argumentString, but not both.") 206 if argumentString is not None: 207 argumentList = splitCommandLine(argumentString) 208 if argumentList is not None: 209 self._parseArgumentList(argumentList) 210 if validate: 211 self.validate()
212 213 214 ######################### 215 # String representations 216 ######################### 217
218 - def __repr__(self):
219 """ 220 Official string representation for class instance. 221 """ 222 return self.buildArgumentString(validate=False)
223
224 - def __str__(self):
225 """ 226 Informal string representation for class instance. 227 """ 228 return self.__repr__()
229 230 231 ############################# 232 # Standard comparison method 233 ############################# 234
235 - def __eq__(self, other):
236 """Equals operator, iplemented in terms of original Python 2 compare operator.""" 237 return self.__cmp__(other) == 0
238
239 - def __lt__(self, other):
240 """Less-than operator, iplemented in terms of original Python 2 compare operator.""" 241 return self.__cmp__(other) < 0
242
243 - def __gt__(self, other):
244 """Greater-than operator, iplemented in terms of original Python 2 compare operator.""" 245 return self.__cmp__(other) > 0
246
247 - def __cmp__(self, other):
248 """ 249 Original Python 2 comparison operator. 250 Lists within this class are "unordered" for equality comparisons. 251 @param other: Other object to compare to. 252 @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other. 253 """ 254 if other is None: 255 return 1 256 if self.help != other.help: 257 if self.help < other.help: 258 return -1 259 else: 260 return 1 261 if self.version != other.version: 262 if self.version < other.version: 263 return -1 264 else: 265 return 1 266 if self.verbose != other.verbose: 267 if self.verbose < other.verbose: 268 return -1 269 else: 270 return 1 271 if self.quiet != other.quiet: 272 if self.quiet < other.quiet: 273 return -1 274 else: 275 return 1 276 if self.logfile != other.logfile: 277 if str(self.logfile or "") < str(other.logfile or ""): 278 return -1 279 else: 280 return 1 281 if self.owner != other.owner: 282 if str(self.owner or "") < str(other.owner or ""): 283 return -1 284 else: 285 return 1 286 if self.mode != other.mode: 287 if int(self.mode or 0) < int(other.mode or 0): 288 return -1 289 else: 290 return 1 291 if self.output != other.output: 292 if self.output < other.output: 293 return -1 294 else: 295 return 1 296 if self.debug != other.debug: 297 if self.debug < other.debug: 298 return -1 299 else: 300 return 1 301 if self.stacktrace != other.stacktrace: 302 if self.stacktrace < other.stacktrace: 303 return -1 304 else: 305 return 1 306 if self.diagnostics != other.diagnostics: 307 if self.diagnostics < other.diagnostics: 308 return -1 309 else: 310 return 1 311 if self.verifyOnly != other.verifyOnly: 312 if self.verifyOnly < other.verifyOnly: 313 return -1 314 else: 315 return 1 316 if self.ignoreWarnings != other.ignoreWarnings: 317 if self.ignoreWarnings < other.ignoreWarnings: 318 return -1 319 else: 320 return 1 321 if self.sourceDir != other.sourceDir: 322 if str(self.sourceDir or "") < str(other.sourceDir or ""): 323 return -1 324 else: 325 return 1 326 if self.s3BucketUrl != other.s3BucketUrl: 327 if str(self.s3BucketUrl or "") < str(other.s3BucketUrl or ""): 328 return -1 329 else: 330 return 1 331 return 0
332 333 334 ############# 335 # Properties 336 ############# 337
338 - def _setHelp(self, value):
339 """ 340 Property target used to set the help flag. 341 No validations, but we normalize the value to C{True} or C{False}. 342 """ 343 if value: 344 self._help = True 345 else: 346 self._help = False
347
348 - def _getHelp(self):
349 """ 350 Property target used to get the help flag. 351 """ 352 return self._help
353
354 - def _setVersion(self, value):
355 """ 356 Property target used to set the version flag. 357 No validations, but we normalize the value to C{True} or C{False}. 358 """ 359 if value: 360 self._version = True 361 else: 362 self._version = False
363
364 - def _getVersion(self):
365 """ 366 Property target used to get the version flag. 367 """ 368 return self._version
369
370 - def _setVerbose(self, value):
371 """ 372 Property target used to set the verbose flag. 373 No validations, but we normalize the value to C{True} or C{False}. 374 """ 375 if value: 376 self._verbose = True 377 else: 378 self._verbose = False
379
380 - def _getVerbose(self):
381 """ 382 Property target used to get the verbose flag. 383 """ 384 return self._verbose
385
386 - def _setQuiet(self, value):
387 """ 388 Property target used to set the quiet flag. 389 No validations, but we normalize the value to C{True} or C{False}. 390 """ 391 if value: 392 self._quiet = True 393 else: 394 self._quiet = False
395
396 - def _getQuiet(self):
397 """ 398 Property target used to get the quiet flag. 399 """ 400 return self._quiet
401
402 - def _setLogfile(self, value):
403 """ 404 Property target used to set the logfile parameter. 405 @raise ValueError: If the value cannot be encoded properly. 406 """ 407 if value is not None: 408 if len(value) < 1: 409 raise ValueError("The logfile parameter must be a non-empty string.") 410 self._logfile = encodePath(value)
411
412 - def _getLogfile(self):
413 """ 414 Property target used to get the logfile parameter. 415 """ 416 return self._logfile
417
418 - def _setOwner(self, value):
419 """ 420 Property target used to set the owner parameter. 421 If not C{None}, the owner must be a C{(user,group)} tuple or list. 422 Strings (and inherited children of strings) are explicitly disallowed. 423 The value will be normalized to a tuple. 424 @raise ValueError: If the value is not valid. 425 """ 426 if value is None: 427 self._owner = None 428 else: 429 if isinstance(value, str): 430 raise ValueError("Must specify user and group tuple for owner parameter.") 431 if len(value) != 2: 432 raise ValueError("Must specify user and group tuple for owner parameter.") 433 if len(value[0]) < 1 or len(value[1]) < 1: 434 raise ValueError("User and group tuple values must be non-empty strings.") 435 self._owner = (value[0], value[1])
436
437 - def _getOwner(self):
438 """ 439 Property target used to get the owner parameter. 440 The parameter is a tuple of C{(user, group)}. 441 """ 442 return self._owner
443
444 - def _setMode(self, value):
445 """ 446 Property target used to set the mode parameter. 447 """ 448 if value is None: 449 self._mode = None 450 else: 451 try: 452 if isinstance(value, str): 453 value = int(value, 8) 454 else: 455 value = int(value) 456 except TypeError: 457 raise ValueError("Mode must be an octal integer >= 0, i.e. 644.") 458 if value < 0: 459 raise ValueError("Mode must be an octal integer >= 0. i.e. 644.") 460 self._mode = value
461
462 - def _getMode(self):
463 """ 464 Property target used to get the mode parameter. 465 """ 466 return self._mode
467
468 - def _setOutput(self, value):
469 """ 470 Property target used to set the output flag. 471 No validations, but we normalize the value to C{True} or C{False}. 472 """ 473 if value: 474 self._output = True 475 else: 476 self._output = False
477
478 - def _getOutput(self):
479 """ 480 Property target used to get the output flag. 481 """ 482 return self._output
483
484 - def _setDebug(self, value):
485 """ 486 Property target used to set the debug flag. 487 No validations, but we normalize the value to C{True} or C{False}. 488 """ 489 if value: 490 self._debug = True 491 else: 492 self._debug = False
493
494 - def _getDebug(self):
495 """ 496 Property target used to get the debug flag. 497 """ 498 return self._debug
499
500 - def _setStacktrace(self, value):
501 """ 502 Property target used to set the stacktrace flag. 503 No validations, but we normalize the value to C{True} or C{False}. 504 """ 505 if value: 506 self._stacktrace = True 507 else: 508 self._stacktrace = False
509
510 - def _getStacktrace(self):
511 """ 512 Property target used to get the stacktrace flag. 513 """ 514 return self._stacktrace
515
516 - def _setDiagnostics(self, value):
517 """ 518 Property target used to set the diagnostics flag. 519 No validations, but we normalize the value to C{True} or C{False}. 520 """ 521 if value: 522 self._diagnostics = True 523 else: 524 self._diagnostics = False
525
526 - def _getDiagnostics(self):
527 """ 528 Property target used to get the diagnostics flag. 529 """ 530 return self._diagnostics
531
532 - def _setVerifyOnly(self, value):
533 """ 534 Property target used to set the verifyOnly flag. 535 No validations, but we normalize the value to C{True} or C{False}. 536 """ 537 if value: 538 self._verifyOnly = True 539 else: 540 self._verifyOnly = False
541
542 - def _getVerifyOnly(self):
543 """ 544 Property target used to get the verifyOnly flag. 545 """ 546 return self._verifyOnly
547
548 - def _setIgnoreWarnings(self, value):
549 """ 550 Property target used to set the ignoreWarnings flag. 551 No validations, but we normalize the value to C{True} or C{False}. 552 """ 553 if value: 554 self._ignoreWarnings = True 555 else: 556 self._ignoreWarnings = False
557
558 - def _getIgnoreWarnings(self):
559 """ 560 Property target used to get the ignoreWarnings flag. 561 """ 562 return self._ignoreWarnings
563
564 - def _setSourceDir(self, value):
565 """ 566 Property target used to set the sourceDir parameter. 567 """ 568 if value is not None: 569 if len(value) < 1: 570 raise ValueError("The sourceDir parameter must be a non-empty string.") 571 self._sourceDir = value
572
573 - def _getSourceDir(self):
574 """ 575 Property target used to get the sourceDir parameter. 576 """ 577 return self._sourceDir
578
579 - def _setS3BucketUrl(self, value):
580 """ 581 Property target used to set the s3BucketUrl parameter. 582 """ 583 if value is not None: 584 if len(value) < 1: 585 raise ValueError("The s3BucketUrl parameter must be a non-empty string.") 586 self._s3BucketUrl = value
587
588 - def _getS3BucketUrl(self):
589 """ 590 Property target used to get the s3BucketUrl parameter. 591 """ 592 return self._s3BucketUrl
593 594 help = property(_getHelp, _setHelp, None, "Command-line help (C{-h,--help}) flag.") 595 version = property(_getVersion, _setVersion, None, "Command-line version (C{-V,--version}) flag.") 596 verbose = property(_getVerbose, _setVerbose, None, "Command-line verbose (C{-b,--verbose}) flag.") 597 quiet = property(_getQuiet, _setQuiet, None, "Command-line quiet (C{-q,--quiet}) flag.") 598 logfile = property(_getLogfile, _setLogfile, None, "Command-line logfile (C{-l,--logfile}) parameter.") 599 owner = property(_getOwner, _setOwner, None, "Command-line owner (C{-o,--owner}) parameter, as tuple C{(user,group)}.") 600 mode = property(_getMode, _setMode, None, "Command-line mode (C{-m,--mode}) parameter.") 601 output = property(_getOutput, _setOutput, None, "Command-line output (C{-O,--output}) flag.") 602 debug = property(_getDebug, _setDebug, None, "Command-line debug (C{-d,--debug}) flag.") 603 stacktrace = property(_getStacktrace, _setStacktrace, None, "Command-line stacktrace (C{-s,--stack}) flag.") 604 diagnostics = property(_getDiagnostics, _setDiagnostics, None, "Command-line diagnostics (C{-D,--diagnostics}) flag.") 605 verifyOnly = property(_getVerifyOnly, _setVerifyOnly, None, "Command-line verifyOnly (C{-v,--verifyOnly}) flag.") 606 ignoreWarnings = property(_getIgnoreWarnings, _setIgnoreWarnings, None, "Command-line ignoreWarnings (C{-w,--ignoreWarnings}) flag.") 607 sourceDir = property(_getSourceDir, _setSourceDir, None, "Command-line sourceDir, source of sync.") 608 s3BucketUrl = property(_getS3BucketUrl, _setS3BucketUrl, None, "Command-line s3BucketUrl, target of sync.") 609 610 611 ################## 612 # Utility methods 613 ################## 614
615 - def validate(self):
616 """ 617 Validates command-line options represented by the object. 618 619 Unless C{--help} or C{--version} are supplied, at least one action must 620 be specified. Other validations (as for allowed values for particular 621 options) will be taken care of at assignment time by the properties 622 functionality. 623 624 @note: The command line format is specified by the L{_usage} function. 625 Call L{_usage} to see a usage statement for the cback3-amazons3-sync script. 626 627 @raise ValueError: If one of the validations fails. 628 """ 629 if not self.help and not self.version and not self.diagnostics: 630 if self.sourceDir is None or self.s3BucketUrl is None: 631 raise ValueError("Source directory and S3 bucket URL are both required.")
632
633 - def buildArgumentList(self, validate=True):
634 """ 635 Extracts options into a list of command line arguments. 636 637 The original order of the various arguments (if, indeed, the object was 638 initialized with a command-line) is not preserved in this generated 639 argument list. Besides that, the argument list is normalized to use the 640 long option names (i.e. --version rather than -V). The resulting list 641 will be suitable for passing back to the constructor in the 642 C{argumentList} parameter. Unlike L{buildArgumentString}, string 643 arguments are not quoted here, because there is no need for it. 644 645 Unless the C{validate} parameter is C{False}, the L{Options.validate} 646 method will be called (with its default arguments) against the 647 options before extracting the command line. If the options are not valid, 648 then an argument list will not be extracted. 649 650 @note: It is strongly suggested that the C{validate} option always be set 651 to C{True} (the default) unless there is a specific need to extract an 652 invalid command line. 653 654 @param validate: Validate the options before extracting the command line. 655 @type validate: Boolean true/false. 656 657 @return: List representation of command-line arguments. 658 @raise ValueError: If options within the object are invalid. 659 """ 660 if validate: 661 self.validate() 662 argumentList = [] 663 if self._help: 664 argumentList.append("--help") 665 if self.version: 666 argumentList.append("--version") 667 if self.verbose: 668 argumentList.append("--verbose") 669 if self.quiet: 670 argumentList.append("--quiet") 671 if self.logfile is not None: 672 argumentList.append("--logfile") 673 argumentList.append(self.logfile) 674 if self.owner is not None: 675 argumentList.append("--owner") 676 argumentList.append("%s:%s" % (self.owner[0], self.owner[1])) 677 if self.mode is not None: 678 argumentList.append("--mode") 679 argumentList.append("%o" % self.mode) 680 if self.output: 681 argumentList.append("--output") 682 if self.debug: 683 argumentList.append("--debug") 684 if self.stacktrace: 685 argumentList.append("--stack") 686 if self.diagnostics: 687 argumentList.append("--diagnostics") 688 if self.verifyOnly: 689 argumentList.append("--verifyOnly") 690 if self.ignoreWarnings: 691 argumentList.append("--ignoreWarnings") 692 if self.sourceDir is not None: 693 argumentList.append(self.sourceDir) 694 if self.s3BucketUrl is not None: 695 argumentList.append(self.s3BucketUrl) 696 return argumentList
697
698 - def buildArgumentString(self, validate=True):
699 """ 700 Extracts options into a string of command-line arguments. 701 702 The original order of the various arguments (if, indeed, the object was 703 initialized with a command-line) is not preserved in this generated 704 argument string. Besides that, the argument string is normalized to use 705 the long option names (i.e. --version rather than -V) and to quote all 706 string arguments with double quotes (C{"}). The resulting string will be 707 suitable for passing back to the constructor in the C{argumentString} 708 parameter. 709 710 Unless the C{validate} parameter is C{False}, the L{Options.validate} 711 method will be called (with its default arguments) against the options 712 before extracting the command line. If the options are not valid, then 713 an argument string will not be extracted. 714 715 @note: It is strongly suggested that the C{validate} option always be set 716 to C{True} (the default) unless there is a specific need to extract an 717 invalid command line. 718 719 @param validate: Validate the options before extracting the command line. 720 @type validate: Boolean true/false. 721 722 @return: String representation of command-line arguments. 723 @raise ValueError: If options within the object are invalid. 724 """ 725 if validate: 726 self.validate() 727 argumentString = "" 728 if self._help: 729 argumentString += "--help " 730 if self.version: 731 argumentString += "--version " 732 if self.verbose: 733 argumentString += "--verbose " 734 if self.quiet: 735 argumentString += "--quiet " 736 if self.logfile is not None: 737 argumentString += "--logfile \"%s\" " % self.logfile 738 if self.owner is not None: 739 argumentString += "--owner \"%s:%s\" " % (self.owner[0], self.owner[1]) 740 if self.mode is not None: 741 argumentString += "--mode %o " % self.mode 742 if self.output: 743 argumentString += "--output " 744 if self.debug: 745 argumentString += "--debug " 746 if self.stacktrace: 747 argumentString += "--stack " 748 if self.diagnostics: 749 argumentString += "--diagnostics " 750 if self.verifyOnly: 751 argumentString += "--verifyOnly " 752 if self.ignoreWarnings: 753 argumentString += "--ignoreWarnings " 754 if self.sourceDir is not None: 755 argumentString += "\"%s\" " % self.sourceDir 756 if self.s3BucketUrl is not None: 757 argumentString += "\"%s\" " % self.s3BucketUrl 758 return argumentString
759
760 - def _parseArgumentList(self, argumentList):
761 """ 762 Internal method to parse a list of command-line arguments. 763 764 Most of the validation we do here has to do with whether the arguments 765 can be parsed and whether any values which exist are valid. We don't do 766 any validation as to whether required elements exist or whether elements 767 exist in the proper combination (instead, that's the job of the 768 L{validate} method). 769 770 For any of the options which supply parameters, if the option is 771 duplicated with long and short switches (i.e. C{-l} and a C{--logfile}) 772 then the long switch is used. If the same option is duplicated with the 773 same switch (long or short), then the last entry on the command line is 774 used. 775 776 @param argumentList: List of arguments to a command. 777 @type argumentList: List of arguments to a command, i.e. C{sys.argv[1:]} 778 779 @raise ValueError: If the argument list cannot be successfully parsed. 780 """ 781 switches = { } 782 opts, remaining = getopt.getopt(argumentList, SHORT_SWITCHES, LONG_SWITCHES) 783 for o, a in opts: # push the switches into a hash 784 switches[o] = a 785 if "-h" in switches or "--help" in switches: 786 self.help = True 787 if "-V" in switches or "--version" in switches: 788 self.version = True 789 if "-b" in switches or "--verbose" in switches: 790 self.verbose = True 791 if "-q" in switches or "--quiet" in switches: 792 self.quiet = True 793 if "-l" in switches: 794 self.logfile = switches["-l"] 795 if "--logfile" in switches: 796 self.logfile = switches["--logfile"] 797 if "-o" in switches: 798 self.owner = switches["-o"].split(":", 1) 799 if "--owner" in switches: 800 self.owner = switches["--owner"].split(":", 1) 801 if "-m" in switches: 802 self.mode = switches["-m"] 803 if "--mode" in switches: 804 self.mode = switches["--mode"] 805 if "-O" in switches or "--output" in switches: 806 self.output = True 807 if "-d" in switches or "--debug" in switches: 808 self.debug = True 809 if "-s" in switches or "--stack" in switches: 810 self.stacktrace = True 811 if "-D" in switches or "--diagnostics" in switches: 812 self.diagnostics = True 813 if "-v" in switches or "--verifyOnly" in switches: 814 self.verifyOnly = True 815 if "-w" in switches or "--ignoreWarnings" in switches: 816 self.ignoreWarnings = True 817 try: 818 (self.sourceDir, self.s3BucketUrl) = remaining 819 except ValueError: 820 pass
821
822 823 ####################################################################### 824 # Public functions 825 ####################################################################### 826 827 ################# 828 # cli() function 829 ################# 830 831 -def cli():
832 """ 833 Implements the command-line interface for the C{cback3-amazons3-sync} script. 834 835 Essentially, this is the "main routine" for the cback3-amazons3-sync script. It does 836 all of the argument processing for the script, and then also implements the 837 tool functionality. 838 839 This function looks pretty similiar to C{CedarBackup3.cli.cli()}. It's not 840 easy to refactor this code to make it reusable and also readable, so I've 841 decided to just live with the duplication. 842 843 A different error code is returned for each type of failure: 844 845 - C{1}: The Python interpreter version is < 3.4 846 - C{2}: Error processing command-line arguments 847 - C{3}: Error configuring logging 848 - C{5}: Backup was interrupted with a CTRL-C or similar 849 - C{6}: Error executing other parts of the script 850 851 @note: This script uses print rather than logging to the INFO level, because 852 it is interactive. Underlying Cedar Backup functionality uses the logging 853 mechanism exclusively. 854 855 @return: Error code as described above. 856 """ 857 try: 858 if list(map(int, [sys.version_info[0], sys.version_info[1]])) < [3, 4]: 859 sys.stderr.write("Python 3 version 3.4 or greater required.\n") 860 return 1 861 except: 862 # sys.version_info isn't available before 2.0 863 sys.stderr.write("Python 3 version 3.4 or greater required.\n") 864 return 1 865 866 try: 867 options = Options(argumentList=sys.argv[1:]) 868 except Exception as e: 869 _usage() 870 sys.stderr.write(" *** Error: %s\n" % e) 871 return 2 872 873 if options.help: 874 _usage() 875 return 0 876 if options.version: 877 _version() 878 return 0 879 if options.diagnostics: 880 _diagnostics() 881 return 0 882 883 try: 884 logfile = setupLogging(options) 885 except Exception as e: 886 sys.stderr.write("Error setting up logging: %s\n" % e) 887 return 3 888 889 logger.info("Cedar Backup Amazon S3 sync run started.") 890 logger.info("Options were [%s]", options) 891 logger.info("Logfile is [%s]", logfile) 892 Diagnostics().logDiagnostics(method=logger.info) 893 894 if options.stacktrace: 895 _executeAction(options) 896 else: 897 try: 898 _executeAction(options) 899 except KeyboardInterrupt: 900 logger.error("Backup interrupted.") 901 logger.info("Cedar Backup Amazon S3 sync run completed with status 5.") 902 return 5 903 except Exception as e: 904 logger.error("Error executing backup: %s", e) 905 logger.info("Cedar Backup Amazon S3 sync run completed with status 6.") 906 return 6 907 908 logger.info("Cedar Backup Amazon S3 sync run completed with status 0.") 909 return 0
910
911 912 ####################################################################### 913 # Utility functions 914 ####################################################################### 915 916 #################### 917 # _usage() function 918 #################### 919 920 -def _usage(fd=sys.stderr):
921 """ 922 Prints usage information for the cback3-amazons3-sync script. 923 @param fd: File descriptor used to print information. 924 @note: The C{fd} is used rather than C{print} to facilitate unit testing. 925 """ 926 fd.write("\n") 927 fd.write(" Usage: cback3-amazons3-sync [switches] sourceDir s3bucketUrl\n") 928 fd.write("\n") 929 fd.write(" Cedar Backup Amazon S3 sync tool.\n") 930 fd.write("\n") 931 fd.write(" This Cedar Backup utility synchronizes a local directory to an Amazon S3\n") 932 fd.write(" bucket. After the sync is complete, a validation step is taken. An\n") 933 fd.write(" error is reported if the contents of the bucket do not match the\n") 934 fd.write(" source directory, or if the indicated size for any file differs.\n") 935 fd.write(" This tool is a wrapper over the AWS CLI command-line tool.\n") 936 fd.write("\n") 937 fd.write(" The following arguments are required:\n") 938 fd.write("\n") 939 fd.write(" sourceDir The local source directory on disk (must exist)\n") 940 fd.write(" s3BucketUrl The URL to the target Amazon S3 bucket\n") 941 fd.write("\n") 942 fd.write(" The following switches are accepted:\n") 943 fd.write("\n") 944 fd.write(" -h, --help Display this usage/help listing\n") 945 fd.write(" -V, --version Display version information\n") 946 fd.write(" -b, --verbose Print verbose output as well as logging to disk\n") 947 fd.write(" -q, --quiet Run quietly (display no output to the screen)\n") 948 fd.write(" -l, --logfile Path to logfile (default: %s)\n" % DEFAULT_LOGFILE) 949 fd.write(" -o, --owner Logfile ownership, user:group (default: %s:%s)\n" % (DEFAULT_OWNERSHIP[0], DEFAULT_OWNERSHIP[1])) 950 fd.write(" -m, --mode Octal logfile permissions mode (default: %o)\n" % DEFAULT_MODE) 951 fd.write(" -O, --output Record some sub-command (i.e. aws) output to the log\n") 952 fd.write(" -d, --debug Write debugging information to the log (implies --output)\n") 953 fd.write(" -s, --stack Dump Python stack trace instead of swallowing exceptions\n") # exactly 80 characters in width! 954 fd.write(" -D, --diagnostics Print runtime diagnostics to the screen and exit\n") 955 fd.write(" -v, --verifyOnly Only verify the S3 bucket contents, do not make changes\n") 956 fd.write(" -w, --ignoreWarnings Ignore warnings about problematic filename encodings\n") 957 fd.write("\n") 958 fd.write(" Typical usage would be something like:\n") 959 fd.write("\n") 960 fd.write(" cback3-amazons3-sync /home/myuser s3://example.com-backup/myuser\n") 961 fd.write("\n") 962 fd.write(" This will sync the contents of /home/myuser into the indicated bucket.\n") 963 fd.write("\n")
964
965 966 ###################### 967 # _version() function 968 ###################### 969 970 -def _version(fd=sys.stdout):
971 """ 972 Prints version information for the cback3-amazons3-sync script. 973 @param fd: File descriptor used to print information. 974 @note: The C{fd} is used rather than C{print} to facilitate unit testing. 975 """ 976 fd.write("\n") 977 fd.write(" Cedar Backup Amazon S3 sync tool.\n") 978 fd.write(" Included with Cedar Backup version %s, released %s.\n" % (VERSION, DATE)) 979 fd.write("\n") 980 fd.write(" Copyright (c) %s %s <%s>.\n" % (COPYRIGHT, AUTHOR, EMAIL)) 981 fd.write(" See CREDITS for a list of included code and other contributors.\n") 982 fd.write(" This is free software; there is NO warranty. See the\n") 983 fd.write(" GNU General Public License version 2 for copying conditions.\n") 984 fd.write("\n") 985 fd.write(" Use the --help option for usage information.\n") 986 fd.write("\n")
987
988 989 ########################## 990 # _diagnostics() function 991 ########################## 992 993 -def _diagnostics(fd=sys.stdout):
994 """ 995 Prints runtime diagnostics information. 996 @param fd: File descriptor used to print information. 997 @note: The C{fd} is used rather than C{print} to facilitate unit testing. 998 """ 999 fd.write("\n") 1000 fd.write("Diagnostics:\n") 1001 fd.write("\n") 1002 Diagnostics().printDiagnostics(fd=fd, prefix=" ") 1003 fd.write("\n")
1004
1005 1006 ############################ 1007 # _executeAction() function 1008 ############################ 1009 1010 -def _executeAction(options):
1011 """ 1012 Implements the guts of the cback3-amazons3-sync tool. 1013 1014 @param options: Program command-line options. 1015 @type options: Options object. 1016 1017 @raise Exception: Under many generic error conditions 1018 """ 1019 sourceFiles = _buildSourceFiles(options.sourceDir) 1020 if not options.ignoreWarnings: 1021 _checkSourceFiles(options.sourceDir, sourceFiles) 1022 if not options.verifyOnly: 1023 _synchronizeBucket(options.sourceDir, options.s3BucketUrl) 1024 _verifyBucketContents(options.sourceDir, sourceFiles, options.s3BucketUrl)
1025
1026 1027 ################################ 1028 # _buildSourceFiles() function 1029 ################################ 1030 1031 -def _buildSourceFiles(sourceDir):
1032 """ 1033 Build a list of files in a source directory 1034 @param sourceDir: Local source directory 1035 @return: FilesystemList with contents of source directory 1036 """ 1037 if not os.path.isdir(sourceDir): 1038 raise ValueError("Source directory does not exist on disk.") 1039 sourceFiles = FilesystemList() 1040 sourceFiles.addDirContents(sourceDir) 1041 return sourceFiles
1042
1043 1044 ############################### 1045 # _checkSourceFiles() function 1046 ############################### 1047 1048 -def _checkSourceFiles(sourceDir, sourceFiles):
1049 """ 1050 Check source files, trying to guess which ones will have encoding problems. 1051 @param sourceDir: Local source directory 1052 @param sourceDir: Local source directory 1053 @raises ValueError: If a problem file is found 1054 @see U{http://opensourcehacker.com/2011/09/16/fix-linux-filename-encodings-with-python/} 1055 @see U{http://serverfault.com/questions/82821/how-to-tell-the-language-encoding-of-a-filename-on-linux} 1056 @see U{http://randysofia.com/2014/06/06/aws-cli-and-your-locale/} 1057 """ 1058 with warnings.catch_warnings(): 1059 warnings.simplefilter("ignore") # So we don't print unicode warnings from comparisons 1060 1061 encoding = Diagnostics().encoding 1062 1063 failed = False 1064 for entry in sourceFiles: 1065 result = chardet.detect(entry) 1066 source = entry.decode(result["encoding"]) 1067 try: 1068 target = source.encode(encoding) 1069 if source != target: 1070 logger.error("Inconsistent encoding for [%s]: got %s, but need %s", entry, result["encoding"], encoding) 1071 failed = True 1072 except UnicodeEncodeError: 1073 logger.error("Inconsistent encoding for [%s]: got %s, but need %s", entry, result["encoding"], encoding) 1074 failed = True 1075 1076 if not failed: 1077 logger.info("Completed checking source filename encoding (no problems found).") 1078 else: 1079 logger.error("Some filenames have inconsistent encodings and will likely cause sync problems.") 1080 logger.error("You may be able to fix this by setting a more sensible locale in your environment.") 1081 logger.error("Aternately, you can rename the problem files to be valid in the indicated locale.") 1082 logger.error("To ignore this warning and proceed anyway, use --ignoreWarnings") 1083 raise ValueError("Some filenames have inconsistent encodings and will likely cause sync problems.")
1084
1085 1086 ################################ 1087 # _synchronizeBucket() function 1088 ################################ 1089 1090 -def _synchronizeBucket(sourceDir, s3BucketUrl):
1091 """ 1092 Synchronize a local directory to an Amazon S3 bucket. 1093 @param sourceDir: Local source directory 1094 @param s3BucketUrl: Target S3 bucket URL 1095 """ 1096 logger.info("Synchronizing local source directory up to Amazon S3.") 1097 args = [ "s3", "sync", sourceDir, s3BucketUrl, "--delete", "--recursive", ] 1098 result = executeCommand(AWS_COMMAND, args, returnOutput=False)[0] 1099 if result != 0: 1100 raise IOError("Error [%d] calling AWS CLI synchronize bucket." % result)
1101
1102 1103 ################################### 1104 # _verifyBucketContents() function 1105 ################################### 1106 1107 -def _verifyBucketContents(sourceDir, sourceFiles, s3BucketUrl):
1108 """ 1109 Verify that a source directory is equivalent to an Amazon S3 bucket. 1110 @param sourceDir: Local source directory 1111 @param sourceFiles: Filesystem list containing contents of source directory 1112 @param s3BucketUrl: Target S3 bucket URL 1113 """ 1114 # As of this writing, the documentation for the S3 API that we're using 1115 # below says that up to 1000 elements at a time are returned, and that we 1116 # have to manually handle pagination by looking for the IsTruncated element. 1117 # However, in practice, this is not true. I have been testing with 1118 # "aws-cli/1.4.4 Python/2.7.3 Linux/3.2.0-4-686-pae", installed through PIP. 1119 # No matter how many items exist in my bucket and prefix, I get back a 1120 # single JSON result. I've tested with buckets containing nearly 6000 1121 # elements. 1122 # 1123 # If I turn on debugging, it's clear that underneath, something in the API 1124 # is executing multiple list-object requests against AWS, and stiching 1125 # results together to give me back the final JSON result. The debug output 1126 # clearly incldues multiple requests, and each XML response (except for the 1127 # final one) contains <IsTruncated>true</IsTruncated>. 1128 # 1129 # This feature is not mentioned in the offical changelog for any of the 1130 # releases going back to 1.0.0. It appears to happen in the botocore 1131 # library, but I'll admit I can't actually find the code that implements it. 1132 # For now, all I can do is rely on this behavior and hope that the 1133 # documentation is out-of-date. I'm not going to write code that tries to 1134 # parse out IsTruncated if I can't actually test that code. 1135 1136 (bucket, prefix) = s3BucketUrl.replace("s3://", "").split("/", 1) 1137 1138 query = "Contents[].{Key: Key, Size: Size}" 1139 args = [ "s3api", "list-objects", "--bucket", bucket, "--prefix", prefix, "--query", query, ] 1140 (result, data) = executeCommand(AWS_COMMAND, args, returnOutput=True) 1141 if result != 0: 1142 raise IOError("Error [%d] calling AWS CLI verify bucket contents." % result) 1143 1144 contents = { } 1145 for entry in json.loads("".join(data)): 1146 key = entry["Key"].replace(prefix, "") 1147 size = int(entry["Size"]) 1148 contents[key] = size 1149 1150 failed = False 1151 for entry in sourceFiles: 1152 if os.path.isfile(entry): 1153 key = entry.replace(sourceDir, "") 1154 size = int(os.stat(entry).st_size) 1155 if not key in contents: 1156 logger.error("File was apparently not uploaded: [%s]", entry) 1157 failed = True 1158 else: 1159 if size != contents[key]: 1160 logger.error("File size differs [%s]: expected %s bytes but got %s bytes", entry, size, contents[key]) 1161 failed = True 1162 1163 if not failed: 1164 logger.info("Completed verifying Amazon S3 bucket contents (no problems found).") 1165 else: 1166 logger.error("There were differences between source directory and target S3 bucket.") 1167 raise ValueError("There were differences between source directory and target S3 bucket.")
1168 1169 1170 ######################################################################### 1171 # Main routine 1172 ######################################################################## 1173 1174 if __name__ == "__main__": 1175 sys.exit(cli()) 1176