Package CedarBackup2 :: Module filesystem
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup2.filesystem

   1  # -*- coding: iso-8859-1 -*- 
   2  # vim: set ft=python ts=3 sw=3 expandtab: 
   3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
   4  # 
   5  #              C E D A R 
   6  #          S O L U T I O N S       "Software done right." 
   7  #           S O F T W A R E 
   8  # 
   9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  10  # 
  11  # Copyright (c) 2004-2008,2010 Kenneth J. Pronovici. 
  12  # All rights reserved. 
  13  # 
  14  # This program is free software; you can redistribute it and/or 
  15  # modify it under the terms of the GNU General Public License, 
  16  # Version 2, as published by the Free Software Foundation. 
  17  # 
  18  # This program is distributed in the hope that it will be useful, 
  19  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  20  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
  21  # 
  22  # Copies of the GNU General Public License are available from 
  23  # the Free Software Foundation website, http://www.gnu.org/. 
  24  # 
  25  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  26  # 
  27  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
  28  # Language : Python 2 (>= 2.7) 
  29  # Project  : Cedar Backup, release 2 
  30  # Purpose  : Provides filesystem-related objects. 
  31  # 
  32  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  33   
  34  ######################################################################## 
  35  # Module documentation 
  36  ######################################################################## 
  37   
  38  """ 
  39  Provides filesystem-related objects. 
  40  @sort: FilesystemList, BackupFileList, PurgeItemList 
  41  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
  42  """ 
  43   
  44   
  45  ######################################################################## 
  46  # Imported modules 
  47  ######################################################################## 
  48   
  49  # System modules 
  50  import os 
  51  import re 
  52  import math 
  53  import logging 
  54  import tarfile 
  55   
  56  # Cedar Backup modules 
  57  from CedarBackup2.knapsack import firstFit, bestFit, worstFit, alternateFit 
  58  from CedarBackup2.util import AbsolutePathList, UnorderedList, RegexList 
  59  from CedarBackup2.util import removeKeys, displayBytes, calculateFileAge, encodePath, dereferenceLink 
  60   
  61   
  62  ######################################################################## 
  63  # Module-wide variables 
  64  ######################################################################## 
  65   
  66  logger = logging.getLogger("CedarBackup2.log.filesystem") 
67 68 69 ######################################################################## 70 # FilesystemList class definition 71 ######################################################################## 72 73 -class FilesystemList(list):
74 75 ###################### 76 # Class documentation 77 ###################### 78 79 """ 80 Represents a list of filesystem items. 81 82 This is a generic class that represents a list of filesystem items. Callers 83 can add individual files or directories to the list, or can recursively add 84 the contents of a directory. The class also allows for up-front exclusions 85 in several forms (all files, all directories, all items matching a pattern, 86 all items whose basename matches a pattern, or all directories containing a 87 specific "ignore file"). Symbolic links are typically backed up 88 non-recursively, i.e. the link to a directory is backed up, but not the 89 contents of that link (we don't want to deal with recursive loops, etc.). 90 91 The custom methods such as L{addFile} will only add items if they exist on 92 the filesystem and do not match any exclusions that are already in place. 93 However, since a FilesystemList is a subclass of Python's standard list 94 class, callers can also add items to the list in the usual way, using 95 methods like C{append()} or C{insert()}. No validations apply to items 96 added to the list in this way; however, many list-manipulation methods deal 97 "gracefully" with items that don't exist in the filesystem, often by 98 ignoring them. 99 100 Once a list has been created, callers can remove individual items from the 101 list using standard methods like C{pop()} or C{remove()} or they can use 102 custom methods to remove specific types of entries or entries which match a 103 particular pattern. 104 105 @note: Regular expression patterns that apply to paths are assumed to be 106 bounded at front and back by the beginning and end of the string, i.e. they 107 are treated as if they begin with C{^} and end with C{$}. This is true 108 whether we are matching a complete path or a basename. 109 110 @note: Some platforms, like Windows, do not support soft links. On those 111 platforms, the ignore-soft-links flag can be set, but it won't do any good 112 because the operating system never reports a file as a soft link. 113 114 @sort: __init__, addFile, addDir, addDirContents, removeFiles, removeDirs, 115 removeLinks, removeMatch, removeInvalid, normalize, 116 excludeFiles, excludeDirs, excludeLinks, excludePaths, 117 excludePatterns, excludeBasenamePatterns, ignoreFile 118 """ 119 120 121 ############## 122 # Constructor 123 ############## 124
125 - def __init__(self):
126 """Initializes a list with no configured exclusions.""" 127 list.__init__(self) 128 self._excludeFiles = False 129 self._excludeDirs = False 130 self._excludeLinks = False 131 self._excludePaths = None 132 self._excludePatterns = None 133 self._excludeBasenamePatterns = None 134 self._ignoreFile = None 135 self.excludeFiles = False 136 self.excludeLinks = False 137 self.excludeDirs = False 138 self.excludePaths = [] 139 self.excludePatterns = RegexList() 140 self.excludeBasenamePatterns = RegexList() 141 self.ignoreFile = None
142 143 144 ############# 145 # Properties 146 ############# 147
148 - def _setExcludeFiles(self, value):
149 """ 150 Property target used to set the exclude files flag. 151 No validations, but we normalize the value to C{True} or C{False}. 152 """ 153 if value: 154 self._excludeFiles = True 155 else: 156 self._excludeFiles = False
157
158 - def _getExcludeFiles(self):
159 """ 160 Property target used to get the exclude files flag. 161 """ 162 return self._excludeFiles
163
164 - def _setExcludeDirs(self, value):
165 """ 166 Property target used to set the exclude directories flag. 167 No validations, but we normalize the value to C{True} or C{False}. 168 """ 169 if value: 170 self._excludeDirs = True 171 else: 172 self._excludeDirs = False
173
174 - def _getExcludeDirs(self):
175 """ 176 Property target used to get the exclude directories flag. 177 """ 178 return self._excludeDirs
179 189 195
196 - def _setExcludePaths(self, value):
197 """ 198 Property target used to set the exclude paths list. 199 A C{None} value is converted to an empty list. 200 Elements do not have to exist on disk at the time of assignment. 201 @raise ValueError: If any list element is not an absolute path. 202 """ 203 self._excludePaths = AbsolutePathList() 204 if value is not None: 205 self._excludePaths.extend(value)
206
207 - def _getExcludePaths(self):
208 """ 209 Property target used to get the absolute exclude paths list. 210 """ 211 return self._excludePaths
212
213 - def _setExcludePatterns(self, value):
214 """ 215 Property target used to set the exclude patterns list. 216 A C{None} value is converted to an empty list. 217 """ 218 self._excludePatterns = RegexList() 219 if value is not None: 220 self._excludePatterns.extend(value)
221
222 - def _getExcludePatterns(self):
223 """ 224 Property target used to get the exclude patterns list. 225 """ 226 return self._excludePatterns
227
228 - def _setExcludeBasenamePatterns(self, value):
229 """ 230 Property target used to set the exclude basename patterns list. 231 A C{None} value is converted to an empty list. 232 """ 233 self._excludeBasenamePatterns = RegexList() 234 if value is not None: 235 self._excludeBasenamePatterns.extend(value)
236
238 """ 239 Property target used to get the exclude basename patterns list. 240 """ 241 return self._excludeBasenamePatterns
242
243 - def _setIgnoreFile(self, value):
244 """ 245 Property target used to set the ignore file. 246 The value must be a non-empty string if it is not C{None}. 247 @raise ValueError: If the value is an empty string. 248 """ 249 if value is not None: 250 if len(value) < 1: 251 raise ValueError("The ignore file must be a non-empty string.") 252 self._ignoreFile = value
253
254 - def _getIgnoreFile(self):
255 """ 256 Property target used to get the ignore file. 257 """ 258 return self._ignoreFile
259 260 excludeFiles = property(_getExcludeFiles, _setExcludeFiles, None, "Boolean indicating whether files should be excluded.") 261 excludeDirs = property(_getExcludeDirs, _setExcludeDirs, None, "Boolean indicating whether directories should be excluded.") 262 excludeLinks = property(_getExcludeLinks, _setExcludeLinks, None, "Boolean indicating whether soft links should be excluded.") 263 excludePaths = property(_getExcludePaths, _setExcludePaths, None, "List of absolute paths to be excluded.") 264 excludePatterns = property(_getExcludePatterns, _setExcludePatterns, None, 265 "List of regular expression patterns (matching complete path) to be excluded.") 266 excludeBasenamePatterns = property(_getExcludeBasenamePatterns, _setExcludeBasenamePatterns, 267 None, "List of regular expression patterns (matching basename) to be excluded.") 268 ignoreFile = property(_getIgnoreFile, _setIgnoreFile, None, "Name of file which will cause directory contents to be ignored.") 269 270 271 ############## 272 # Add methods 273 ############## 274
275 - def addFile(self, path):
276 """ 277 Adds a file to the list. 278 279 The path must exist and must be a file or a link to an existing file. It 280 will be added to the list subject to any exclusions that are in place. 281 282 @param path: File path to be added to the list 283 @type path: String representing a path on disk 284 285 @return: Number of items added to the list. 286 287 @raise ValueError: If path is not a file or does not exist. 288 @raise ValueError: If the path could not be encoded properly. 289 """ 290 path = encodePath(path) 291 if not os.path.exists(path) or not os.path.isfile(path): 292 logger.debug("Path [%s] is not a file or does not exist on disk.", path) 293 raise ValueError("Path is not a file or does not exist on disk.") 294 if self.excludeLinks and os.path.islink(path): 295 logger.debug("Path [%s] is excluded based on excludeLinks.", path) 296 return 0 297 if self.excludeFiles: 298 logger.debug("Path [%s] is excluded based on excludeFiles.", path) 299 return 0 300 if path in self.excludePaths: 301 logger.debug("Path [%s] is excluded based on excludePaths.", path) 302 return 0 303 for pattern in self.excludePatterns: 304 pattern = encodePath(pattern) # use same encoding as filenames 305 if re.compile(r"^%s$" % pattern).match(path): # safe to assume all are valid due to RegexList 306 logger.debug("Path [%s] is excluded based on pattern [%s].", path, pattern) 307 return 0 308 for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList 309 pattern = encodePath(pattern) # use same encoding as filenames 310 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 311 logger.debug("Path [%s] is excluded based on basename pattern [%s].", path, pattern) 312 return 0 313 self.append(path) 314 logger.debug("Added file to list: [%s]", path) 315 return 1
316
317 - def addDir(self, path):
318 """ 319 Adds a directory to the list. 320 321 The path must exist and must be a directory or a link to an existing 322 directory. It will be added to the list subject to any exclusions that 323 are in place. The L{ignoreFile} does not apply to this method, only to 324 L{addDirContents}. 325 326 @param path: Directory path to be added to the list 327 @type path: String representing a path on disk 328 329 @return: Number of items added to the list. 330 331 @raise ValueError: If path is not a directory or does not exist. 332 @raise ValueError: If the path could not be encoded properly. 333 """ 334 path = encodePath(path) 335 path = normalizeDir(path) 336 if not os.path.exists(path) or not os.path.isdir(path): 337 logger.debug("Path [%s] is not a directory or does not exist on disk.", path) 338 raise ValueError("Path is not a directory or does not exist on disk.") 339 if self.excludeLinks and os.path.islink(path): 340 logger.debug("Path [%s] is excluded based on excludeLinks.", path) 341 return 0 342 if self.excludeDirs: 343 logger.debug("Path [%s] is excluded based on excludeDirs.", path) 344 return 0 345 if path in self.excludePaths: 346 logger.debug("Path [%s] is excluded based on excludePaths.", path) 347 return 0 348 for pattern in self.excludePatterns: # safe to assume all are valid due to RegexList 349 pattern = encodePath(pattern) # use same encoding as filenames 350 if re.compile(r"^%s$" % pattern).match(path): 351 logger.debug("Path [%s] is excluded based on pattern [%s].", path, pattern) 352 return 0 353 for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList 354 pattern = encodePath(pattern) # use same encoding as filenames 355 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 356 logger.debug("Path [%s] is excluded based on basename pattern [%s].", path, pattern) 357 return 0 358 self.append(path) 359 logger.debug("Added directory to list: [%s]", path) 360 return 1
361
362 - def addDirContents(self, path, recursive=True, addSelf=True, linkDepth=0, dereference=False):
363 """ 364 Adds the contents of a directory to the list. 365 366 The path must exist and must be a directory or a link to a directory. 367 The contents of the directory (as well as the directory path itself) will 368 be recursively added to the list, subject to any exclusions that are in 369 place. If you only want the directory and its immediate contents to be 370 added, then pass in C{recursive=False}. 371 372 @note: If a directory's absolute path matches an exclude pattern or path, 373 or if the directory contains the configured ignore file, then the 374 directory and all of its contents will be recursively excluded from the 375 list. 376 377 @note: If the passed-in directory happens to be a soft link, it will be 378 recursed. However, the linkDepth parameter controls whether any soft 379 links I{within} the directory will be recursed. The link depth is 380 maximum depth of the tree at which soft links should be followed. So, a 381 depth of 0 does not follow any soft links, a depth of 1 follows only 382 links within the passed-in directory, a depth of 2 follows the links at 383 the next level down, etc. 384 385 @note: Any invalid soft links (i.e. soft links that point to 386 non-existent items) will be silently ignored. 387 388 @note: The L{excludeDirs} flag only controls whether any given directory 389 path itself is added to the list once it has been discovered. It does 390 I{not} modify any behavior related to directory recursion. 391 392 @note: If you call this method I{on a link to a directory} that link will 393 never be dereferenced (it may, however, be followed). 394 395 @param path: Directory path whose contents should be added to the list 396 @type path: String representing a path on disk 397 398 @param recursive: Indicates whether directory contents should be added recursively. 399 @type recursive: Boolean value 400 401 @param addSelf: Indicates whether the directory itself should be added to the list. 402 @type addSelf: Boolean value 403 404 @param linkDepth: Maximum depth of the tree at which soft links should be followed 405 @type linkDepth: Integer value, where zero means not to follow any soft links 406 407 @param dereference: Indicates whether soft links, if followed, should be dereferenced 408 @type dereference: Boolean value 409 410 @return: Number of items recursively added to the list 411 412 @raise ValueError: If path is not a directory or does not exist. 413 @raise ValueError: If the path could not be encoded properly. 414 """ 415 path = encodePath(path) 416 path = normalizeDir(path) 417 return self._addDirContentsInternal(path, addSelf, recursive, linkDepth, dereference)
418
419 - def _addDirContentsInternal(self, path, includePath=True, recursive=True, linkDepth=0, dereference=False):
420 """ 421 Internal implementation of C{addDirContents}. 422 423 This internal implementation exists due to some refactoring. Basically, 424 some subclasses have a need to add the contents of a directory, but not 425 the directory itself. This is different than the standard C{FilesystemList} 426 behavior and actually ends up making a special case out of the first 427 call in the recursive chain. Since I don't want to expose the modified 428 interface, C{addDirContents} ends up being wholly implemented in terms 429 of this method. 430 431 The linkDepth parameter controls whether soft links are followed when we 432 are adding the contents recursively. Any recursive calls reduce the 433 value by one. If the value zero or less, then soft links will just be 434 added as directories, but will not be followed. This means that links 435 are followed to a I{constant depth} starting from the top-most directory. 436 437 There is one difference between soft links and directories: soft links 438 that are added recursively are not placed into the list explicitly. This 439 is because if we do add the links recursively, the resulting tar file 440 gets a little confused (it has a link and a directory with the same 441 name). 442 443 @note: If you call this method I{on a link to a directory} that link will 444 never be dereferenced (it may, however, be followed). 445 446 @param path: Directory path whose contents should be added to the list. 447 @param includePath: Indicates whether to include the path as well as contents. 448 @param recursive: Indicates whether directory contents should be added recursively. 449 @param linkDepth: Depth of soft links that should be followed 450 @param dereference: Indicates whether soft links, if followed, should be dereferenced 451 452 @return: Number of items recursively added to the list 453 454 @raise ValueError: If path is not a directory or does not exist. 455 """ 456 added = 0 457 if not os.path.exists(path) or not os.path.isdir(path): 458 logger.debug("Path [%s] is not a directory or does not exist on disk.", path) 459 raise ValueError("Path is not a directory or does not exist on disk.") 460 if path in self.excludePaths: 461 logger.debug("Path [%s] is excluded based on excludePaths.", path) 462 return added 463 for pattern in self.excludePatterns: # safe to assume all are valid due to RegexList 464 pattern = encodePath(pattern) # use same encoding as filenames 465 if re.compile(r"^%s$" % pattern).match(path): 466 logger.debug("Path [%s] is excluded based on pattern [%s].", path, pattern) 467 return added 468 for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList 469 pattern = encodePath(pattern) # use same encoding as filenames 470 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 471 logger.debug("Path [%s] is excluded based on basename pattern [%s].", path, pattern) 472 return added 473 if self.ignoreFile is not None and os.path.exists(os.path.join(path, self.ignoreFile)): 474 logger.debug("Path [%s] is excluded based on ignore file.", path) 475 return added 476 if includePath: 477 added += self.addDir(path) # could actually be excluded by addDir, yet 478 for entry in os.listdir(path): 479 entrypath = os.path.join(path, entry) 480 if os.path.isfile(entrypath): 481 if linkDepth > 0 and dereference: 482 derefpath = dereferenceLink(entrypath) 483 if derefpath != entrypath: 484 added += self.addFile(derefpath) 485 added += self.addFile(entrypath) 486 elif os.path.isdir(entrypath): 487 if os.path.islink(entrypath): 488 if recursive: 489 if linkDepth > 0: 490 newDepth = linkDepth - 1 491 if dereference: 492 derefpath = dereferenceLink(entrypath) 493 if derefpath != entrypath: 494 added += self._addDirContentsInternal(derefpath, True, recursive, newDepth, dereference) 495 added += self.addDir(entrypath) 496 else: 497 added += self._addDirContentsInternal(entrypath, False, recursive, newDepth, dereference) 498 else: 499 added += self.addDir(entrypath) 500 else: 501 added += self.addDir(entrypath) 502 else: 503 if recursive: 504 newDepth = linkDepth - 1 505 added += self._addDirContentsInternal(entrypath, True, recursive, newDepth, dereference) 506 else: 507 added += self.addDir(entrypath) 508 return added
509 510 511 ################# 512 # Remove methods 513 ################# 514
515 - def removeFiles(self, pattern=None):
516 """ 517 Removes file entries from the list. 518 519 If C{pattern} is not passed in or is C{None}, then all file entries will 520 be removed from the list. Otherwise, only those file entries matching 521 the pattern will be removed. Any entry which does not exist on disk 522 will be ignored (use L{removeInvalid} to purge those entries). 523 524 This method might be fairly slow for large lists, since it must check the 525 type of each item in the list. If you know ahead of time that you want 526 to exclude all files, then you will be better off setting L{excludeFiles} 527 to C{True} before adding items to the list. 528 529 @param pattern: Regular expression pattern representing entries to remove 530 531 @return: Number of entries removed 532 @raise ValueError: If the passed-in pattern is not a valid regular expression. 533 """ 534 removed = 0 535 if pattern is None: 536 for entry in self[:]: 537 if os.path.exists(entry) and os.path.isfile(entry): 538 self.remove(entry) 539 logger.debug("Removed path [%s] from list.", entry) 540 removed += 1 541 else: 542 try: 543 pattern = encodePath(pattern) # use same encoding as filenames 544 compiled = re.compile(pattern) 545 except re.error: 546 raise ValueError("Pattern is not a valid regular expression.") 547 for entry in self[:]: 548 if os.path.exists(entry) and os.path.isfile(entry): 549 if compiled.match(entry): 550 self.remove(entry) 551 logger.debug("Removed path [%s] from list.", entry) 552 removed += 1 553 logger.debug("Removed a total of %d entries.", removed) 554 return removed
555
556 - def removeDirs(self, pattern=None):
557 """ 558 Removes directory entries from the list. 559 560 If C{pattern} is not passed in or is C{None}, then all directory entries 561 will be removed from the list. Otherwise, only those directory entries 562 matching the pattern will be removed. Any entry which does not exist on 563 disk will be ignored (use L{removeInvalid} to purge those entries). 564 565 This method might be fairly slow for large lists, since it must check the 566 type of each item in the list. If you know ahead of time that you want 567 to exclude all directories, then you will be better off setting 568 L{excludeDirs} to C{True} before adding items to the list (note that this 569 will not prevent you from recursively adding the I{contents} of 570 directories). 571 572 @param pattern: Regular expression pattern representing entries to remove 573 574 @return: Number of entries removed 575 @raise ValueError: If the passed-in pattern is not a valid regular expression. 576 """ 577 removed = 0 578 if pattern is None: 579 for entry in self[:]: 580 if os.path.exists(entry) and os.path.isdir(entry): 581 self.remove(entry) 582 logger.debug("Removed path [%s] from list.", entry) 583 removed += 1 584 else: 585 try: 586 pattern = encodePath(pattern) # use same encoding as filenames 587 compiled = re.compile(pattern) 588 except re.error: 589 raise ValueError("Pattern is not a valid regular expression.") 590 for entry in self[:]: 591 if os.path.exists(entry) and os.path.isdir(entry): 592 if compiled.match(entry): 593 self.remove(entry) 594 logger.debug("Removed path [%s] from list based on pattern [%s].", entry, pattern) 595 removed += 1 596 logger.debug("Removed a total of %d entries.", removed) 597 return removed
598 639
640 - def removeMatch(self, pattern):
641 """ 642 Removes from the list all entries matching a pattern. 643 644 This method removes from the list all entries which match the passed in 645 C{pattern}. Since there is no need to check the type of each entry, it 646 is faster to call this method than to call the L{removeFiles}, 647 L{removeDirs} or L{removeLinks} methods individually. If you know which 648 patterns you will want to remove ahead of time, you may be better off 649 setting L{excludePatterns} or L{excludeBasenamePatterns} before adding 650 items to the list. 651 652 @note: Unlike when using the exclude lists, the pattern here is I{not} 653 bounded at the front and the back of the string. You can use any pattern 654 you want. 655 656 @param pattern: Regular expression pattern representing entries to remove 657 658 @return: Number of entries removed. 659 @raise ValueError: If the passed-in pattern is not a valid regular expression. 660 """ 661 try: 662 pattern = encodePath(pattern) # use same encoding as filenames 663 compiled = re.compile(pattern) 664 except re.error: 665 raise ValueError("Pattern is not a valid regular expression.") 666 removed = 0 667 for entry in self[:]: 668 if compiled.match(entry): 669 self.remove(entry) 670 logger.debug("Removed path [%s] from list based on pattern [%s].", entry, pattern) 671 removed += 1 672 logger.debug("Removed a total of %d entries.", removed) 673 return removed
674
675 - def removeInvalid(self):
676 """ 677 Removes from the list all entries that do not exist on disk. 678 679 This method removes from the list all entries which do not currently 680 exist on disk in some form. No attention is paid to whether the entries 681 are files or directories. 682 683 @return: Number of entries removed. 684 """ 685 removed = 0 686 for entry in self[:]: 687 if not os.path.exists(entry): 688 self.remove(entry) 689 logger.debug("Removed path [%s] from list.", entry) 690 removed += 1 691 logger.debug("Removed a total of %d entries.", removed) 692 return removed
693 694 695 ################## 696 # Utility methods 697 ################## 698
699 - def normalize(self):
700 """Normalizes the list, ensuring that each entry is unique.""" 701 orig = len(self) 702 self.sort() 703 dups = filter(lambda x, self=self: self[x] == self[x+1], range(0, len(self) - 1)) # pylint: disable=W0110 704 items = map(lambda x, self=self: self[x], dups) # pylint: disable=W0110 705 map(self.remove, items) 706 new = len(self) 707 logger.debug("Completed normalizing list; removed %d items (%d originally, %d now).", new-orig, orig, new)
708
709 - def verify(self):
710 """ 711 Verifies that all entries in the list exist on disk. 712 @return: C{True} if all entries exist, C{False} otherwise. 713 """ 714 for entry in self: 715 if not os.path.exists(entry): 716 logger.debug("Path [%s] is invalid; list is not valid.", entry) 717 return False 718 logger.debug("All entries in list are valid.") 719 return True
720
721 722 ######################################################################## 723 # SpanItem class definition 724 ######################################################################## 725 726 -class SpanItem(object): # pylint: disable=R0903
727 """ 728 Item returned by L{BackupFileList.generateSpan}. 729 """
730 - def __init__(self, fileList, size, capacity, utilization):
731 """ 732 Create object. 733 @param fileList: List of files 734 @param size: Size (in bytes) of files 735 @param utilization: Utilization, as a percentage (0-100) 736 """ 737 self.fileList = fileList 738 self.size = size 739 self.capacity = capacity 740 self.utilization = utilization
741
742 743 ######################################################################## 744 # BackupFileList class definition 745 ######################################################################## 746 747 -class BackupFileList(FilesystemList): # pylint: disable=R0904
748 749 ###################### 750 # Class documentation 751 ###################### 752 753 """ 754 List of files to be backed up. 755 756 A BackupFileList is a L{FilesystemList} containing a list of files to be 757 backed up. It only contains files, not directories (soft links are treated 758 like files). On top of the generic functionality provided by 759 L{FilesystemList}, this class adds functionality to keep a hash (checksum) 760 for each file in the list, and it also provides a method to calculate the 761 total size of the files in the list and a way to export the list into tar 762 form. 763 764 @sort: __init__, addDir, totalSize, generateSizeMap, generateDigestMap, 765 generateFitted, generateTarfile, removeUnchanged 766 """ 767 768 ############## 769 # Constructor 770 ############## 771
772 - def __init__(self):
773 """Initializes a list with no configured exclusions.""" 774 FilesystemList.__init__(self)
775 776 777 ################################ 778 # Overridden superclass methods 779 ################################ 780
781 - def addDir(self, path):
782 """ 783 Adds a directory to the list. 784 785 Note that this class does not allow directories to be added by themselves 786 (a backup list contains only files). However, since links to directories 787 are technically files, we allow them to be added. 788 789 This method is implemented in terms of the superclass method, with one 790 additional validation: the superclass method is only called if the 791 passed-in path is both a directory and a link. All of the superclass's 792 existing validations and restrictions apply. 793 794 @param path: Directory path to be added to the list 795 @type path: String representing a path on disk 796 797 @return: Number of items added to the list. 798 799 @raise ValueError: If path is not a directory or does not exist. 800 @raise ValueError: If the path could not be encoded properly. 801 """ 802 path = encodePath(path) 803 path = normalizeDir(path) 804 if os.path.isdir(path) and not os.path.islink(path): 805 return 0 806 else: 807 return FilesystemList.addDir(self, path)
808 809 810 ################## 811 # Utility methods 812 ################## 813
814 - def totalSize(self):
815 """ 816 Returns the total size among all files in the list. 817 Only files are counted. 818 Soft links that point at files are ignored. 819 Entries which do not exist on disk are ignored. 820 @return: Total size, in bytes 821 """ 822 total = 0.0 823 for entry in self: 824 if os.path.isfile(entry) and not os.path.islink(entry): 825 total += float(os.stat(entry).st_size) 826 return total
827
828 - def generateSizeMap(self):
829 """ 830 Generates a mapping from file to file size in bytes. 831 The mapping does include soft links, which are listed with size zero. 832 Entries which do not exist on disk are ignored. 833 @return: Dictionary mapping file to file size 834 """ 835 table = { } 836 for entry in self: 837 if os.path.islink(entry): 838 table[entry] = 0.0 839 elif os.path.isfile(entry): 840 table[entry] = float(os.stat(entry).st_size) 841 return table
842
843 - def generateDigestMap(self, stripPrefix=None):
844 """ 845 Generates a mapping from file to file digest. 846 847 Currently, the digest is an SHA hash, which should be pretty secure. In 848 the future, this might be a different kind of hash, but we guarantee that 849 the type of the hash will not change unless the library major version 850 number is bumped. 851 852 Entries which do not exist on disk are ignored. 853 854 Soft links are ignored. We would end up generating a digest for the file 855 that the soft link points at, which doesn't make any sense. 856 857 If C{stripPrefix} is passed in, then that prefix will be stripped from 858 each key when the map is generated. This can be useful in generating two 859 "relative" digest maps to be compared to one another. 860 861 @param stripPrefix: Common prefix to be stripped from paths 862 @type stripPrefix: String with any contents 863 864 @return: Dictionary mapping file to digest value 865 @see: L{removeUnchanged} 866 """ 867 table = { } 868 if stripPrefix is not None: 869 for entry in self: 870 if os.path.isfile(entry) and not os.path.islink(entry): 871 table[entry.replace(stripPrefix, "", 1)] = BackupFileList._generateDigest(entry) 872 else: 873 for entry in self: 874 if os.path.isfile(entry) and not os.path.islink(entry): 875 table[entry] = BackupFileList._generateDigest(entry) 876 return table
877 878 @staticmethod
879 - def _generateDigest(path):
880 """ 881 Generates an SHA digest for a given file on disk. 882 883 The original code for this function used this simplistic implementation, 884 which requires reading the entire file into memory at once in order to 885 generate a digest value:: 886 887 sha.new(open(path).read()).hexdigest() 888 889 Not surprisingly, this isn't an optimal solution. The U{Simple file 890 hashing <http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/259109>} 891 Python Cookbook recipe describes how to incrementally generate a hash 892 value by reading in chunks of data rather than reading the file all at 893 once. The recipe relies on the the C{update()} method of the various 894 Python hashing algorithms. 895 896 In my tests using a 110 MB file on CD, the original implementation 897 requires 111 seconds. This implementation requires only 40-45 seconds, 898 which is a pretty substantial speed-up. 899 900 Experience shows that reading in around 4kB (4096 bytes) at a time yields 901 the best performance. Smaller reads are quite a bit slower, and larger 902 reads don't make much of a difference. The 4kB number makes me a little 903 suspicious, and I think it might be related to the size of a filesystem 904 read at the hardware level. However, I've decided to just hardcode 4096 905 until I have evidence that shows it's worthwhile making the read size 906 configurable. 907 908 @param path: Path to generate digest for. 909 910 @return: ASCII-safe SHA digest for the file. 911 @raise OSError: If the file cannot be opened. 912 """ 913 # pylint: disable=C0103,E1101 914 try: 915 import hashlib 916 s = hashlib.sha1() 917 except ImportError: 918 import sha 919 s = sha.new() 920 f = open(path, mode="rb") # in case platform cares about binary reads 921 readBytes = 4096 # see notes above 922 while readBytes > 0: 923 readString = f.read(readBytes) 924 s.update(readString) 925 readBytes = len(readString) 926 f.close() 927 digest = s.hexdigest() 928 logger.debug("Generated digest [%s] for file [%s].", digest, path) 929 return digest
930
931 - def generateFitted(self, capacity, algorithm="worst_fit"):
932 """ 933 Generates a list of items that fit in the indicated capacity. 934 935 Sometimes, callers would like to include every item in a list, but are 936 unable to because not all of the items fit in the space available. This 937 method returns a copy of the list, containing only the items that fit in 938 a given capacity. A copy is returned so that we don't lose any 939 information if for some reason the fitted list is unsatisfactory. 940 941 The fitting is done using the functions in the knapsack module. By 942 default, the first fit algorithm is used, but you can also choose 943 from best fit, worst fit and alternate fit. 944 945 @param capacity: Maximum capacity among the files in the new list 946 @type capacity: Integer, in bytes 947 948 @param algorithm: Knapsack (fit) algorithm to use 949 @type algorithm: One of "first_fit", "best_fit", "worst_fit", "alternate_fit" 950 951 @return: Copy of list with total size no larger than indicated capacity 952 @raise ValueError: If the algorithm is invalid. 953 """ 954 table = self._getKnapsackTable() 955 function = BackupFileList._getKnapsackFunction(algorithm) 956 return function(table, capacity)[0]
957
958 - def generateSpan(self, capacity, algorithm="worst_fit"):
959 """ 960 Splits the list of items into sub-lists that fit in a given capacity. 961 962 Sometimes, callers need split to a backup file list into a set of smaller 963 lists. For instance, you could use this to "span" the files across a set 964 of discs. 965 966 The fitting is done using the functions in the knapsack module. By 967 default, the first fit algorithm is used, but you can also choose 968 from best fit, worst fit and alternate fit. 969 970 @note: If any of your items are larger than the capacity, then it won't 971 be possible to find a solution. In this case, a value error will be 972 raised. 973 974 @param capacity: Maximum capacity among the files in the new list 975 @type capacity: Integer, in bytes 976 977 @param algorithm: Knapsack (fit) algorithm to use 978 @type algorithm: One of "first_fit", "best_fit", "worst_fit", "alternate_fit" 979 980 @return: List of L{SpanItem} objects. 981 982 @raise ValueError: If the algorithm is invalid. 983 @raise ValueError: If it's not possible to fit some items 984 """ 985 spanItems = [] 986 function = BackupFileList._getKnapsackFunction(algorithm) 987 table = self._getKnapsackTable(capacity) 988 iteration = 0 989 while len(table) > 0: 990 iteration += 1 991 fit = function(table, capacity) 992 if len(fit[0]) == 0: 993 # Should never happen due to validations in _convertToKnapsackForm(), but let's be safe 994 raise ValueError("After iteration %d, unable to add any new items." % iteration) 995 removeKeys(table, fit[0]) 996 utilization = (float(fit[1])/float(capacity))*100.0 997 item = SpanItem(fit[0], fit[1], capacity, utilization) 998 spanItems.append(item) 999 return spanItems
1000
1001 - def _getKnapsackTable(self, capacity=None):
1002 """ 1003 Converts the list into the form needed by the knapsack algorithms. 1004 @return: Dictionary mapping file name to tuple of (file path, file size). 1005 """ 1006 table = { } 1007 for entry in self: 1008 if os.path.islink(entry): 1009 table[entry] = (entry, 0.0) 1010 elif os.path.isfile(entry): 1011 size = float(os.stat(entry).st_size) 1012 if capacity is not None: 1013 if size > capacity: 1014 raise ValueError("File [%s] cannot fit in capacity %s." % (entry, displayBytes(capacity))) 1015 table[entry] = (entry, size) 1016 return table
1017 1018 @staticmethod
1019 - def _getKnapsackFunction(algorithm):
1020 """ 1021 Returns a reference to the function associated with an algorithm name. 1022 Algorithm name must be one of "first_fit", "best_fit", "worst_fit", "alternate_fit" 1023 @param algorithm: Name of the algorithm 1024 @return: Reference to knapsack function 1025 @raise ValueError: If the algorithm name is unknown. 1026 """ 1027 if algorithm == "first_fit": 1028 return firstFit 1029 elif algorithm == "best_fit": 1030 return bestFit 1031 elif algorithm == "worst_fit": 1032 return worstFit 1033 elif algorithm == "alternate_fit": 1034 return alternateFit 1035 else: 1036 raise ValueError("Algorithm [%s] is invalid." % algorithm)
1037
1038 - def generateTarfile(self, path, mode='tar', ignore=False, flat=False):
1039 """ 1040 Creates a tar file containing the files in the list. 1041 1042 By default, this method will create uncompressed tar files. If you pass 1043 in mode C{'targz'}, then it will create gzipped tar files, and if you 1044 pass in mode C{'tarbz2'}, then it will create bzipped tar files. 1045 1046 The tar file will be created as a GNU tar archive, which enables extended 1047 file name lengths, etc. Since GNU tar is so prevalent, I've decided that 1048 the extra functionality out-weighs the disadvantage of not being 1049 "standard". 1050 1051 If you pass in C{flat=True}, then a "flat" archive will be created, and 1052 all of the files will be added to the root of the archive. So, the file 1053 C{/tmp/something/whatever.txt} would be added as just C{whatever.txt}. 1054 1055 By default, the whole method call fails if there are problems adding any 1056 of the files to the archive, resulting in an exception. Under these 1057 circumstances, callers are advised that they might want to call 1058 L{removeInvalid()} and then attempt to extract the tar file a second 1059 time, since the most common cause of failures is a missing file (a file 1060 that existed when the list was built, but is gone again by the time the 1061 tar file is built). 1062 1063 If you want to, you can pass in C{ignore=True}, and the method will 1064 ignore errors encountered when adding individual files to the archive 1065 (but not errors opening and closing the archive itself). 1066 1067 We'll always attempt to remove the tarfile from disk if an exception will 1068 be thrown. 1069 1070 @note: No validation is done as to whether the entries in the list are 1071 files, since only files or soft links should be in an object like this. 1072 However, to be safe, everything is explicitly added to the tar archive 1073 non-recursively so it's safe to include soft links to directories. 1074 1075 @note: The Python C{tarfile} module, which is used internally here, is 1076 supposed to deal properly with long filenames and links. In my testing, 1077 I have found that it appears to be able to add long really long filenames 1078 to archives, but doesn't do a good job reading them back out, even out of 1079 an archive it created. Fortunately, all Cedar Backup does is add files 1080 to archives. 1081 1082 @param path: Path of tar file to create on disk 1083 @type path: String representing a path on disk 1084 1085 @param mode: Tar creation mode 1086 @type mode: One of either C{'tar'}, C{'targz'} or C{'tarbz2'} 1087 1088 @param ignore: Indicates whether to ignore certain errors. 1089 @type ignore: Boolean 1090 1091 @param flat: Creates "flat" archive by putting all items in root 1092 @type flat: Boolean 1093 1094 @raise ValueError: If mode is not valid 1095 @raise ValueError: If list is empty 1096 @raise ValueError: If the path could not be encoded properly. 1097 @raise TarError: If there is a problem creating the tar file 1098 """ 1099 # pylint: disable=E1101 1100 path = encodePath(path) 1101 if len(self) == 0: raise ValueError("Empty list cannot be used to generate tarfile.") 1102 if mode == 'tar': tarmode = "w:" 1103 elif mode == 'targz': tarmode = "w:gz" 1104 elif mode == 'tarbz2': tarmode = "w:bz2" 1105 else: raise ValueError("Mode [%s] is not valid." % mode) 1106 try: 1107 tar = tarfile.open(path, tarmode) 1108 try: 1109 tar.format = tarfile.GNU_FORMAT 1110 except AttributeError: 1111 tar.posix = False 1112 for entry in self: 1113 try: 1114 if flat: 1115 tar.add(entry, arcname=os.path.basename(entry), recursive=False) 1116 else: 1117 tar.add(entry, recursive=False) 1118 except tarfile.TarError, e: 1119 if not ignore: 1120 raise e 1121 logger.info("Unable to add file [%s]; going on anyway.", entry) 1122 except OSError, e: 1123 if not ignore: 1124 raise tarfile.TarError(e) 1125 logger.info("Unable to add file [%s]; going on anyway.", entry) 1126 tar.close() 1127 except tarfile.ReadError, e: 1128 try: tar.close() 1129 except: pass 1130 if os.path.exists(path): 1131 try: os.remove(path) 1132 except: pass 1133 raise tarfile.ReadError("Unable to open [%s]; maybe directory doesn't exist?" % path) 1134 except tarfile.TarError, e: 1135 try: tar.close() 1136 except: pass 1137 if os.path.exists(path): 1138 try: os.remove(path) 1139 except: pass 1140 raise e
1141
1142 - def removeUnchanged(self, digestMap, captureDigest=False):
1143 """ 1144 Removes unchanged entries from the list. 1145 1146 This method relies on a digest map as returned from L{generateDigestMap}. 1147 For each entry in C{digestMap}, if the entry also exists in the current 1148 list I{and} the entry in the current list has the same digest value as in 1149 the map, the entry in the current list will be removed. 1150 1151 This method offers a convenient way for callers to filter unneeded 1152 entries from a list. The idea is that a caller will capture a digest map 1153 from C{generateDigestMap} at some point in time (perhaps the beginning of 1154 the week), and will save off that map using C{pickle} or some other 1155 method. Then, the caller could use this method sometime in the future to 1156 filter out any unchanged files based on the saved-off map. 1157 1158 If C{captureDigest} is passed-in as C{True}, then digest information will 1159 be captured for the entire list before the removal step occurs using the 1160 same rules as in L{generateDigestMap}. The check will involve a lookup 1161 into the complete digest map. 1162 1163 If C{captureDigest} is passed in as C{False}, we will only generate a 1164 digest value for files we actually need to check, and we'll ignore any 1165 entry in the list which isn't a file that currently exists on disk. 1166 1167 The return value varies depending on C{captureDigest}, as well. To 1168 preserve backwards compatibility, if C{captureDigest} is C{False}, then 1169 we'll just return a single value representing the number of entries 1170 removed. Otherwise, we'll return a tuple of C{(entries removed, digest 1171 map)}. The returned digest map will be in exactly the form returned by 1172 L{generateDigestMap}. 1173 1174 @note: For performance reasons, this method actually ends up rebuilding 1175 the list from scratch. First, we build a temporary dictionary containing 1176 all of the items from the original list. Then, we remove items as needed 1177 from the dictionary (which is faster than the equivalent operation on a 1178 list). Finally, we replace the contents of the current list based on the 1179 keys left in the dictionary. This should be transparent to the caller. 1180 1181 @param digestMap: Dictionary mapping file name to digest value. 1182 @type digestMap: Map as returned from L{generateDigestMap}. 1183 1184 @param captureDigest: Indicates that digest information should be captured. 1185 @type captureDigest: Boolean 1186 1187 @return: Results as discussed above (format varies based on arguments) 1188 """ 1189 if captureDigest: 1190 removed = 0 1191 table = {} 1192 captured = {} 1193 for entry in self: 1194 if os.path.isfile(entry) and not os.path.islink(entry): 1195 table[entry] = BackupFileList._generateDigest(entry) 1196 captured[entry] = table[entry] 1197 else: 1198 table[entry] = None 1199 for entry in digestMap.keys(): 1200 if table.has_key(entry): 1201 if table[entry] is not None: # equivalent to file/link check in other case 1202 digest = table[entry] 1203 if digest == digestMap[entry]: 1204 removed += 1 1205 del table[entry] 1206 logger.debug("Discarded unchanged file [%s].", entry) 1207 self[:] = table.keys() 1208 return (removed, captured) 1209 else: 1210 removed = 0 1211 table = {} 1212 for entry in self: 1213 table[entry] = None 1214 for entry in digestMap.keys(): 1215 if table.has_key(entry): 1216 if os.path.isfile(entry) and not os.path.islink(entry): 1217 digest = BackupFileList._generateDigest(entry) 1218 if digest == digestMap[entry]: 1219 removed += 1 1220 del table[entry] 1221 logger.debug("Discarded unchanged file [%s].", entry) 1222 self[:] = table.keys() 1223 return removed
1224
1225 1226 ######################################################################## 1227 # PurgeItemList class definition 1228 ######################################################################## 1229 1230 -class PurgeItemList(FilesystemList): # pylint: disable=R0904
1231 1232 ###################### 1233 # Class documentation 1234 ###################### 1235 1236 """ 1237 List of files and directories to be purged. 1238 1239 A PurgeItemList is a L{FilesystemList} containing a list of files and 1240 directories to be purged. On top of the generic functionality provided by 1241 L{FilesystemList}, this class adds functionality to remove items that are 1242 too young to be purged, and to actually remove each item in the list from 1243 the filesystem. 1244 1245 The other main difference is that when you add a directory's contents to a 1246 purge item list, the directory itself is not added to the list. This way, 1247 if someone asks to purge within in C{/opt/backup/collect}, that directory 1248 doesn't get removed once all of the files within it is gone. 1249 """ 1250 1251 ############## 1252 # Constructor 1253 ############## 1254
1255 - def __init__(self):
1256 """Initializes a list with no configured exclusions.""" 1257 FilesystemList.__init__(self)
1258 1259 1260 ############## 1261 # Add methods 1262 ############## 1263
1264 - def addDirContents(self, path, recursive=True, addSelf=True, linkDepth=0, dereference=False):
1265 """ 1266 Adds the contents of a directory to the list. 1267 1268 The path must exist and must be a directory or a link to a directory. 1269 The contents of the directory (but I{not} the directory path itself) will 1270 be recursively added to the list, subject to any exclusions that are in 1271 place. If you only want the directory and its contents to be added, then 1272 pass in C{recursive=False}. 1273 1274 @note: If a directory's absolute path matches an exclude pattern or path, 1275 or if the directory contains the configured ignore file, then the 1276 directory and all of its contents will be recursively excluded from the 1277 list. 1278 1279 @note: If the passed-in directory happens to be a soft link, it will be 1280 recursed. However, the linkDepth parameter controls whether any soft 1281 links I{within} the directory will be recursed. The link depth is 1282 maximum depth of the tree at which soft links should be followed. So, a 1283 depth of 0 does not follow any soft links, a depth of 1 follows only 1284 links within the passed-in directory, a depth of 2 follows the links at 1285 the next level down, etc. 1286 1287 @note: Any invalid soft links (i.e. soft links that point to 1288 non-existent items) will be silently ignored. 1289 1290 @note: The L{excludeDirs} flag only controls whether any given soft link 1291 path itself is added to the list once it has been discovered. It does 1292 I{not} modify any behavior related to directory recursion. 1293 1294 @note: The L{excludeDirs} flag only controls whether any given directory 1295 path itself is added to the list once it has been discovered. It does 1296 I{not} modify any behavior related to directory recursion. 1297 1298 @note: If you call this method I{on a link to a directory} that link will 1299 never be dereferenced (it may, however, be followed). 1300 1301 @param path: Directory path whose contents should be added to the list 1302 @type path: String representing a path on disk 1303 1304 @param recursive: Indicates whether directory contents should be added recursively. 1305 @type recursive: Boolean value 1306 1307 @param addSelf: Ignored in this subclass. 1308 1309 @param linkDepth: Depth of soft links that should be followed 1310 @type linkDepth: Integer value, where zero means not to follow any soft links 1311 1312 @param dereference: Indicates whether soft links, if followed, should be dereferenced 1313 @type dereference: Boolean value 1314 1315 @return: Number of items recursively added to the list 1316 1317 @raise ValueError: If path is not a directory or does not exist. 1318 @raise ValueError: If the path could not be encoded properly. 1319 """ 1320 path = encodePath(path) 1321 path = normalizeDir(path) 1322 return super(PurgeItemList, self)._addDirContentsInternal(path, False, recursive, linkDepth, dereference)
1323 1324 1325 ################## 1326 # Utility methods 1327 ################## 1328
1329 - def removeYoungFiles(self, daysOld):
1330 """ 1331 Removes from the list files younger than a certain age (in days). 1332 1333 Any file whose "age" in days is less than (C{<}) the value of the 1334 C{daysOld} parameter will be removed from the list so that it will not be 1335 purged later when L{purgeItems} is called. Directories and soft links 1336 will be ignored. 1337 1338 The "age" of a file is the amount of time since the file was last used, 1339 per the most recent of the file's C{st_atime} and C{st_mtime} values. 1340 1341 @note: Some people find the "sense" of this method confusing or 1342 "backwards". Keep in mind that this method is used to remove items 1343 I{from the list}, not from the filesystem! It removes from the list 1344 those items that you would I{not} want to purge because they are too 1345 young. As an example, passing in C{daysOld} of zero (0) would remove 1346 from the list no files, which would result in purging all of the files 1347 later. I would be happy to make a synonym of this method with an 1348 easier-to-understand "sense", if someone can suggest one. 1349 1350 @param daysOld: Minimum age of files that are to be kept in the list. 1351 @type daysOld: Integer value >= 0. 1352 1353 @return: Number of entries removed 1354 """ 1355 removed = 0 1356 daysOld = int(daysOld) 1357 if daysOld < 0: 1358 raise ValueError("Days old value must be an integer >= 0.") 1359 for entry in self[:]: 1360 if os.path.isfile(entry) and not os.path.islink(entry): 1361 try: 1362 ageInDays = calculateFileAge(entry) 1363 ageInWholeDays = math.floor(ageInDays) 1364 if ageInWholeDays < daysOld: 1365 removed += 1 1366 self.remove(entry) 1367 except OSError: 1368 pass 1369 return removed
1370
1371 - def purgeItems(self):
1372 """ 1373 Purges all items in the list. 1374 1375 Every item in the list will be purged. Directories in the list will 1376 I{not} be purged recursively, and hence will only be removed if they are 1377 empty. Errors will be ignored. 1378 1379 To faciliate easy removal of directories that will end up being empty, 1380 the delete process happens in two passes: files first (including soft 1381 links), then directories. 1382 1383 @return: Tuple containing count of (files, dirs) removed 1384 """ 1385 files = 0 1386 dirs = 0 1387 for entry in self: 1388 if os.path.exists(entry) and (os.path.isfile(entry) or os.path.islink(entry)): 1389 try: 1390 os.remove(entry) 1391 files += 1 1392 logger.debug("Purged file [%s].", entry) 1393 except OSError: 1394 pass 1395 for entry in self: 1396 if os.path.exists(entry) and os.path.isdir(entry) and not os.path.islink(entry): 1397 try: 1398 os.rmdir(entry) 1399 dirs += 1 1400 logger.debug("Purged empty directory [%s].", entry) 1401 except OSError: 1402 pass 1403 return (files, dirs)
1404
1405 1406 ######################################################################## 1407 # Public functions 1408 ######################################################################## 1409 1410 ########################## 1411 # normalizeDir() function 1412 ########################## 1413 1414 -def normalizeDir(path):
1415 """ 1416 Normalizes a directory name. 1417 1418 For our purposes, a directory name is normalized by removing the trailing 1419 path separator, if any. This is important because we want directories to 1420 appear within lists in a consistent way, although from the user's 1421 perspective passing in C{/path/to/dir/} and C{/path/to/dir} are equivalent. 1422 1423 @param path: Path to be normalized. 1424 @type path: String representing a path on disk 1425 1426 @return: Normalized path, which should be equivalent to the original. 1427 """ 1428 if path != os.sep and path[-1:] == os.sep: 1429 return path[:-1] 1430 return path
1431
1432 1433 ############################# 1434 # compareContents() function 1435 ############################# 1436 1437 -def compareContents(path1, path2, verbose=False):
1438 """ 1439 Compares the contents of two directories to see if they are equivalent. 1440 1441 The two directories are recursively compared. First, we check whether they 1442 contain exactly the same set of files. Then, we check to see every given 1443 file has exactly the same contents in both directories. 1444 1445 This is all relatively simple to implement through the magic of 1446 L{BackupFileList.generateDigestMap}, which knows how to strip a path prefix 1447 off the front of each entry in the mapping it generates. This makes our 1448 comparison as simple as creating a list for each path, then generating a 1449 digest map for each path and comparing the two. 1450 1451 If no exception is thrown, the two directories are considered identical. 1452 1453 If the C{verbose} flag is C{True}, then an alternate (but slower) method is 1454 used so that any thrown exception can indicate exactly which file caused the 1455 comparison to fail. The thrown C{ValueError} exception distinguishes 1456 between the directories containing different files, and containing the same 1457 files with differing content. 1458 1459 @note: Symlinks are I{not} followed for the purposes of this comparison. 1460 1461 @param path1: First path to compare. 1462 @type path1: String representing a path on disk 1463 1464 @param path2: First path to compare. 1465 @type path2: String representing a path on disk 1466 1467 @param verbose: Indicates whether a verbose response should be given. 1468 @type verbose: Boolean 1469 1470 @raise ValueError: If a directory doesn't exist or can't be read. 1471 @raise ValueError: If the two directories are not equivalent. 1472 @raise IOError: If there is an unusual problem reading the directories. 1473 """ 1474 try: 1475 path1List = BackupFileList() 1476 path1List.addDirContents(path1) 1477 path1Digest = path1List.generateDigestMap(stripPrefix=normalizeDir(path1)) 1478 path2List = BackupFileList() 1479 path2List.addDirContents(path2) 1480 path2Digest = path2List.generateDigestMap(stripPrefix=normalizeDir(path2)) 1481 compareDigestMaps(path1Digest, path2Digest, verbose) 1482 except IOError, e: 1483 logger.error("I/O error encountered during consistency check.") 1484 raise e
1485
1486 -def compareDigestMaps(digest1, digest2, verbose=False):
1487 """ 1488 Compares two digest maps and throws an exception if they differ. 1489 1490 @param digest1: First digest to compare. 1491 @type digest1: Digest as returned from BackupFileList.generateDigestMap() 1492 1493 @param digest2: Second digest to compare. 1494 @type digest2: Digest as returned from BackupFileList.generateDigestMap() 1495 1496 @param verbose: Indicates whether a verbose response should be given. 1497 @type verbose: Boolean 1498 1499 @raise ValueError: If the two directories are not equivalent. 1500 """ 1501 if not verbose: 1502 if digest1 != digest2: 1503 raise ValueError("Consistency check failed.") 1504 else: 1505 list1 = UnorderedList(digest1.keys()) 1506 list2 = UnorderedList(digest2.keys()) 1507 if list1 != list2: 1508 raise ValueError("Directories contain a different set of files.") 1509 for key in list1: 1510 if digest1[key] != digest2[key]: 1511 raise ValueError("File contents for [%s] vary between directories." % key)
1512