1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38 """
39 Implements the standard 'collect' action.
40 @sort: executeCollect
41 @author: Kenneth J. Pronovici <pronovic@ieee.org>
42 """
43
44
45
46
47
48
49
50 import os
51 import logging
52 import pickle
53
54
55 from CedarBackup2.filesystem import BackupFileList, FilesystemList
56 from CedarBackup2.util import isStartOfWeek, changeOwnership, displayBytes, buildNormalizedPath
57 from CedarBackup2.actions.constants import DIGEST_EXTENSION, COLLECT_INDICATOR
58 from CedarBackup2.actions.util import writeIndicatorFile
59
60
61
62
63
64
65 logger = logging.getLogger("CedarBackup2.log.actions.collect")
66
67
68
69
70
71
72
73
74
75
77 """
78 Executes the collect backup action.
79
80 @note: When the collect action is complete, we will write a collect
81 indicator to the collect directory, so it's obvious that the collect action
82 has completed. The stage process uses this indicator to decide whether a
83 peer is ready to be staged.
84
85 @param configPath: Path to configuration file on disk.
86 @type configPath: String representing a path on disk.
87
88 @param options: Program command-line options.
89 @type options: Options object.
90
91 @param config: Program configuration.
92 @type config: Config object.
93
94 @raise ValueError: Under many generic error conditions
95 @raise TarError: If there is a problem creating a tar file
96 """
97 logger.debug("Executing the 'collect' action.")
98 if config.options is None or config.collect is None:
99 raise ValueError("Collect configuration is not properly filled in.")
100 if ((config.collect.collectFiles is None or len(config.collect.collectFiles) < 1) and
101 (config.collect.collectDirs is None or len(config.collect.collectDirs) < 1)):
102 raise ValueError("There must be at least one collect file or collect directory.")
103 fullBackup = options.full
104 logger.debug("Full backup flag is [%s]" % fullBackup)
105 todayIsStart = isStartOfWeek(config.options.startingDay)
106 resetDigest = fullBackup or todayIsStart
107 logger.debug("Reset digest flag is [%s]" % resetDigest)
108 if config.collect.collectFiles is not None:
109 for collectFile in config.collect.collectFiles:
110 logger.debug("Working with collect file [%s]" % collectFile.absolutePath)
111 collectMode = _getCollectMode(config, collectFile)
112 archiveMode = _getArchiveMode(config, collectFile)
113 digestPath = _getDigestPath(config, collectFile.absolutePath)
114 tarfilePath = _getTarfilePath(config, collectFile.absolutePath, archiveMode)
115 if fullBackup or (collectMode in ['daily', 'incr', ]) or (collectMode == 'weekly' and todayIsStart):
116 logger.debug("File meets criteria to be backed up today.")
117 _collectFile(config, collectFile.absolutePath, tarfilePath,
118 collectMode, archiveMode, resetDigest, digestPath)
119 else:
120 logger.debug("File will not be backed up, per collect mode.")
121 logger.info("Completed collecting file [%s]" % collectFile.absolutePath)
122 if config.collect.collectDirs is not None:
123 for collectDir in config.collect.collectDirs:
124 logger.debug("Working with collect directory [%s]" % collectDir.absolutePath)
125 collectMode = _getCollectMode(config, collectDir)
126 archiveMode = _getArchiveMode(config, collectDir)
127 ignoreFile = _getIgnoreFile(config, collectDir)
128 linkDepth = _getLinkDepth(collectDir)
129 dereference = _getDereference(collectDir)
130 recursionLevel = _getRecursionLevel(collectDir)
131 (excludePaths, excludePatterns) = _getExclusions(config, collectDir)
132 if fullBackup or (collectMode in ['daily', 'incr', ]) or (collectMode == 'weekly' and todayIsStart):
133 logger.debug("Directory meets criteria to be backed up today.")
134 _collectDirectory(config, collectDir.absolutePath,
135 collectMode, archiveMode, ignoreFile, linkDepth, dereference,
136 resetDigest, excludePaths, excludePatterns, recursionLevel)
137 else:
138 logger.debug("Directory will not be backed up, per collect mode.")
139 logger.info("Completed collecting directory [%s]" % collectDir.absolutePath)
140 writeIndicatorFile(config.collect.targetDir, COLLECT_INDICATOR,
141 config.options.backupUser, config.options.backupGroup)
142 logger.info("Executed the 'collect' action successfully.")
143
144
145
146
147
148
149
150
151
152
153 -def _collectFile(config, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath):
154 """
155 Collects a configured collect file.
156
157 The indicated collect file is collected into the indicated tarfile.
158 For files that are collected incrementally, we'll use the indicated
159 digest path and pay attention to the reset digest flag (basically, the reset
160 digest flag ignores any existing digest, but a new digest is always
161 rewritten).
162
163 The caller must decide what the collect and archive modes are, since they
164 can be on both the collect configuration and the collect file itself.
165
166 @param config: Config object.
167 @param absolutePath: Absolute path of file to collect.
168 @param tarfilePath: Path to tarfile that should be created.
169 @param collectMode: Collect mode to use.
170 @param archiveMode: Archive mode to use.
171 @param resetDigest: Reset digest flag.
172 @param digestPath: Path to digest file on disk, if needed.
173 """
174 backupList = BackupFileList()
175 backupList.addFile(absolutePath)
176 _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath)
177
178
179
180
181
182
183 -def _collectDirectory(config, absolutePath, collectMode, archiveMode,
184 ignoreFile, linkDepth, dereference, resetDigest,
185 excludePaths, excludePatterns, recursionLevel):
186 """
187 Collects a configured collect directory.
188
189 The indicated collect directory is collected into the indicated tarfile.
190 For directories that are collected incrementally, we'll use the indicated
191 digest path and pay attention to the reset digest flag (basically, the reset
192 digest flag ignores any existing digest, but a new digest is always
193 rewritten).
194
195 The caller must decide what the collect and archive modes are, since they
196 can be on both the collect configuration and the collect directory itself.
197
198 @param config: Config object.
199 @param absolutePath: Absolute path of directory to collect.
200 @param collectMode: Collect mode to use.
201 @param archiveMode: Archive mode to use.
202 @param ignoreFile: Ignore file to use.
203 @param linkDepth: Link depth value to use.
204 @param dereference: Dereference flag to use.
205 @param resetDigest: Reset digest flag.
206 @param excludePaths: List of absolute paths to exclude.
207 @param excludePatterns: List of patterns to exclude.
208 @param recursionLevel: Recursion level (zero for no recursion)
209 """
210 if recursionLevel == 0:
211
212 logger.info("Collecting directory [%s]" % absolutePath)
213 tarfilePath = _getTarfilePath(config, absolutePath, archiveMode)
214 digestPath = _getDigestPath(config, absolutePath)
215
216 backupList = BackupFileList()
217 backupList.ignoreFile = ignoreFile
218 backupList.excludePaths = excludePaths
219 backupList.excludePatterns = excludePatterns
220 backupList.addDirContents(absolutePath, linkDepth=linkDepth, dereference=dereference)
221
222 _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath)
223 else:
224
225 subdirs = FilesystemList()
226 subdirs.excludeFiles = True
227 subdirs.excludeLinks = True
228 subdirs.excludePaths = excludePaths
229 subdirs.excludePatterns = excludePatterns
230 subdirs.addDirContents(path=absolutePath, recursive=False, addSelf=False)
231
232
233 for subdir in subdirs:
234 _collectDirectory(config, subdir, collectMode, archiveMode,
235 ignoreFile, linkDepth, dereference, resetDigest,
236 excludePaths, excludePatterns, recursionLevel-1)
237 excludePaths.append(subdir)
238
239
240 _collectDirectory(config, absolutePath, collectMode, archiveMode,
241 ignoreFile, linkDepth, dereference, resetDigest,
242 excludePaths, excludePatterns, 0)
243
244
245
246
247
248
249 -def _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath):
250 """
251 Execute the backup process for the indicated backup list.
252
253 This function exists mainly to consolidate functionality between the
254 L{_collectFile} and L{_collectDirectory} functions. Those functions build
255 the backup list; this function causes the backup to execute properly and
256 also manages usage of the digest file on disk as explained in their
257 comments.
258
259 For collect files, the digest file will always just contain the single file
260 that is being backed up. This might little wasteful in terms of the number
261 of files that we keep around, but it's consistent and easy to understand.
262
263 @param config: Config object.
264 @param backupList: List to execute backup for
265 @param absolutePath: Absolute path of directory or file to collect.
266 @param tarfilePath: Path to tarfile that should be created.
267 @param collectMode: Collect mode to use.
268 @param archiveMode: Archive mode to use.
269 @param resetDigest: Reset digest flag.
270 @param digestPath: Path to digest file on disk, if needed.
271 """
272 if collectMode != 'incr':
273 logger.debug("Collect mode is [%s]; no digest will be used." % collectMode)
274 if len(backupList) == 1 and backupList[0] == absolutePath:
275 logger.info("Backing up file [%s] (%s)." % (absolutePath, displayBytes(backupList.totalSize())))
276 else:
277 logger.info("Backing up %d files in [%s] (%s)." % (len(backupList), absolutePath, displayBytes(backupList.totalSize())))
278 if len(backupList) > 0:
279 backupList.generateTarfile(tarfilePath, archiveMode, True)
280 changeOwnership(tarfilePath, config.options.backupUser, config.options.backupGroup)
281 else:
282 if resetDigest:
283 logger.debug("Based on resetDigest flag, digest will be cleared.")
284 oldDigest = {}
285 else:
286 logger.debug("Based on resetDigest flag, digest will loaded from disk.")
287 oldDigest = _loadDigest(digestPath)
288 (removed, newDigest) = backupList.removeUnchanged(oldDigest, captureDigest=True)
289 logger.debug("Removed %d unchanged files based on digest values." % removed)
290 if len(backupList) == 1 and backupList[0] == absolutePath:
291 logger.info("Backing up file [%s] (%s)." % (absolutePath, displayBytes(backupList.totalSize())))
292 else:
293 logger.info("Backing up %d files in [%s] (%s)." % (len(backupList), absolutePath, displayBytes(backupList.totalSize())))
294 if len(backupList) > 0:
295 backupList.generateTarfile(tarfilePath, archiveMode, True)
296 changeOwnership(tarfilePath, config.options.backupUser, config.options.backupGroup)
297 _writeDigest(config, newDigest, digestPath)
298
299
300
301
302
303
305 """
306 Loads the indicated digest path from disk into a dictionary.
307
308 If we can't load the digest successfully (either because it doesn't exist or
309 for some other reason), then an empty dictionary will be returned - but the
310 condition will be logged.
311
312 @param digestPath: Path to the digest file on disk.
313
314 @return: Dictionary representing contents of digest path.
315 """
316 if not os.path.isfile(digestPath):
317 digest = {}
318 logger.debug("Digest [%s] does not exist on disk." % digestPath)
319 else:
320 try:
321 digest = pickle.load(open(digestPath, "r"))
322 logger.debug("Loaded digest [%s] from disk: %d entries." % (digestPath, len(digest)))
323 except:
324 digest = {}
325 logger.error("Failed loading digest [%s] from disk." % digestPath)
326 return digest
327
328
329
330
331
332
334 """
335 Writes the digest dictionary to the indicated digest path on disk.
336
337 If we can't write the digest successfully for any reason, we'll log the
338 condition but won't throw an exception.
339
340 @param config: Config object.
341 @param digest: Digest dictionary to write to disk.
342 @param digestPath: Path to the digest file on disk.
343 """
344 try:
345 pickle.dump(digest, open(digestPath, "w"))
346 changeOwnership(digestPath, config.options.backupUser, config.options.backupGroup)
347 logger.debug("Wrote new digest [%s] to disk: %d entries." % (digestPath, len(digest)))
348 except:
349 logger.error("Failed to write digest [%s] to disk." % digestPath)
350
351
352
353
354
355
356
357
358
359
361 """
362 Gets the collect mode that should be used for a collect directory or file.
363 If possible, use the one on the file or directory, otherwise take from collect section.
364 @param config: Config object.
365 @param item: C{CollectFile} or C{CollectDir} object
366 @return: Collect mode to use.
367 """
368 if item.collectMode is None:
369 collectMode = config.collect.collectMode
370 else:
371 collectMode = item.collectMode
372 logger.debug("Collect mode is [%s]" % collectMode)
373 return collectMode
374
375
376
377
378
379
381 """
382 Gets the archive mode that should be used for a collect directory or file.
383 If possible, use the one on the file or directory, otherwise take from collect section.
384 @param config: Config object.
385 @param item: C{CollectFile} or C{CollectDir} object
386 @return: Archive mode to use.
387 """
388 if item.archiveMode is None:
389 archiveMode = config.collect.archiveMode
390 else:
391 archiveMode = item.archiveMode
392 logger.debug("Archive mode is [%s]" % archiveMode)
393 return archiveMode
394
395
396
397
398
399
401 """
402 Gets the ignore file that should be used for a collect directory or file.
403 If possible, use the one on the file or directory, otherwise take from collect section.
404 @param config: Config object.
405 @param item: C{CollectFile} or C{CollectDir} object
406 @return: Ignore file to use.
407 """
408 if item.ignoreFile is None:
409 ignoreFile = config.collect.ignoreFile
410 else:
411 ignoreFile = item.ignoreFile
412 logger.debug("Ignore file is [%s]" % ignoreFile)
413 return ignoreFile
414
415
416
417
418
419
421 """
422 Gets the link depth that should be used for a collect directory.
423 If possible, use the one on the directory, otherwise set a value of 0 (zero).
424 @param item: C{CollectDir} object
425 @return: Link depth to use.
426 """
427 if item.linkDepth is None:
428 linkDepth = 0
429 else:
430 linkDepth = item.linkDepth
431 logger.debug("Link depth is [%d]" % linkDepth)
432 return linkDepth
433
434
435
436
437
438
440 """
441 Gets the dereference flag that should be used for a collect directory.
442 If possible, use the one on the directory, otherwise set a value of False.
443 @param item: C{CollectDir} object
444 @return: Dereference flag to use.
445 """
446 if item.dereference is None:
447 dereference = False
448 else:
449 dereference = item.dereference
450 logger.debug("Dereference flag is [%s]" % dereference)
451 return dereference
452
453
454
455
456
457
471
472
473
474
475
476
478 """
479 Gets the digest path associated with a collect directory or file.
480 @param config: Config object.
481 @param absolutePath: Absolute path to generate digest for
482 @return: Absolute path to the digest associated with the collect directory or file.
483 """
484 normalized = buildNormalizedPath(absolutePath)
485 filename = "%s.%s" % (normalized, DIGEST_EXTENSION)
486 digestPath = os.path.join(config.options.workingDir, filename)
487 logger.debug("Digest path is [%s]" % digestPath)
488 return digestPath
489
490
491
492
493
494
496 """
497 Gets the tarfile path (including correct extension) associated with a collect directory.
498 @param config: Config object.
499 @param absolutePath: Absolute path to generate tarfile for
500 @param archiveMode: Archive mode to use for this tarfile.
501 @return: Absolute path to the tarfile associated with the collect directory.
502 """
503 if archiveMode == 'tar':
504 extension = "tar"
505 elif archiveMode == 'targz':
506 extension = "tar.gz"
507 elif archiveMode == 'tarbz2':
508 extension = "tar.bz2"
509 normalized = buildNormalizedPath(absolutePath)
510 filename = "%s.%s" % (normalized, extension)
511 tarfilePath = os.path.join(config.collect.targetDir, filename)
512 logger.debug("Tarfile path is [%s]" % tarfilePath)
513 return tarfilePath
514
515
516
517
518
519
554