1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38 """
39 Provides an extension to split up large files in staging directories.
40
41 When this extension is executed, it will look through the configured Cedar
42 Backup staging directory for files exceeding a specified size limit, and split
43 them down into smaller files using the 'split' utility. Any directory which
44 has already been split (as indicated by the C{cback.split} file) will be
45 ignored.
46
47 This extension requires a new configuration section <split> and is intended
48 to be run immediately after the standard stage action or immediately before the
49 standard store action. Aside from its own configuration, it requires the
50 options and staging configuration sections in the standard Cedar Backup
51 configuration file.
52
53 @author: Kenneth J. Pronovici <pronovic@ieee.org>
54 """
55
56
57
58
59
60
61 import os
62 import re
63 import logging
64
65
66 from CedarBackup2.util import resolveCommand, executeCommand, changeOwnership
67 from CedarBackup2.xmlutil import createInputDom, addContainerNode
68 from CedarBackup2.xmlutil import readFirstChild
69 from CedarBackup2.actions.util import findDailyDirs, writeIndicatorFile, getBackupFiles
70 from CedarBackup2.config import ByteQuantity, readByteQuantity, addByteQuantityNode
71
72
73
74
75
76
77 logger = logging.getLogger("CedarBackup2.log.extend.split")
78
79 SPLIT_COMMAND = [ "split", ]
80 SPLIT_INDICATOR = "cback.split"
88
89 """
90 Class representing split configuration.
91
92 Split configuration is used for splitting staging directories.
93
94 The following restrictions exist on data in this class:
95
96 - The size limit must be a ByteQuantity
97 - The split size must be a ByteQuantity
98
99 @sort: __init__, __repr__, __str__, __cmp__, sizeLimit, splitSize
100 """
101
102 - def __init__(self, sizeLimit=None, splitSize=None):
103 """
104 Constructor for the C{SplitCOnfig} class.
105
106 @param sizeLimit: Size limit of the files, in bytes
107 @param splitSize: Size that files exceeding the limit will be split into, in bytes
108
109 @raise ValueError: If one of the values is invalid.
110 """
111 self._sizeLimit = None
112 self._splitSize = None
113 self.sizeLimit = sizeLimit
114 self.splitSize = splitSize
115
117 """
118 Official string representation for class instance.
119 """
120 return "SplitConfig(%s, %s)" % (self.sizeLimit, self.splitSize)
121
123 """
124 Informal string representation for class instance.
125 """
126 return self.__repr__()
127
129 """
130 Definition of equals operator for this class.
131 Lists within this class are "unordered" for equality comparisons.
132 @param other: Other object to compare to.
133 @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other.
134 """
135 if other is None:
136 return 1
137 if self.sizeLimit != other.sizeLimit:
138 if self.sizeLimit < other.sizeLimit:
139 return -1
140 else:
141 return 1
142 if self.splitSize != other.splitSize:
143 if self.splitSize < other.splitSize:
144 return -1
145 else:
146 return 1
147 return 0
148
150 """
151 Property target used to set the size limit.
152 If not C{None}, the value must be a C{ByteQuantity} object.
153 @raise ValueError: If the value is not a C{ByteQuantity}
154 """
155 if value is None:
156 self._sizeLimit = None
157 else:
158 if not isinstance(value, ByteQuantity):
159 raise ValueError("Value must be a C{ByteQuantity} object.")
160 self._sizeLimit = value
161
163 """
164 Property target used to get the size limit.
165 """
166 return self._sizeLimit
167
169 """
170 Property target used to set the split size.
171 If not C{None}, the value must be a C{ByteQuantity} object.
172 @raise ValueError: If the value is not a C{ByteQuantity}
173 """
174 if value is None:
175 self._splitSize = None
176 else:
177 if not isinstance(value, ByteQuantity):
178 raise ValueError("Value must be a C{ByteQuantity} object.")
179 self._splitSize = value
180
182 """
183 Property target used to get the split size.
184 """
185 return self._splitSize
186
187 sizeLimit = property(_getSizeLimit, _setSizeLimit, None, doc="Size limit, as a ByteQuantity")
188 splitSize = property(_getSplitSize, _setSplitSize, None, doc="Split size, as a ByteQuantity")
189
196
197 """
198 Class representing this extension's configuration document.
199
200 This is not a general-purpose configuration object like the main Cedar
201 Backup configuration object. Instead, it just knows how to parse and emit
202 split-specific configuration values. Third parties who need to read and
203 write configuration related to this extension should access it through the
204 constructor, C{validate} and C{addConfig} methods.
205
206 @note: Lists within this class are "unordered" for equality comparisons.
207
208 @sort: __init__, __repr__, __str__, __cmp__, split, validate, addConfig
209 """
210
211 - def __init__(self, xmlData=None, xmlPath=None, validate=True):
212 """
213 Initializes a configuration object.
214
215 If you initialize the object without passing either C{xmlData} or
216 C{xmlPath} then configuration will be empty and will be invalid until it
217 is filled in properly.
218
219 No reference to the original XML data or original path is saved off by
220 this class. Once the data has been parsed (successfully or not) this
221 original information is discarded.
222
223 Unless the C{validate} argument is C{False}, the L{LocalConfig.validate}
224 method will be called (with its default arguments) against configuration
225 after successfully parsing any passed-in XML. Keep in mind that even if
226 C{validate} is C{False}, it might not be possible to parse the passed-in
227 XML document if lower-level validations fail.
228
229 @note: It is strongly suggested that the C{validate} option always be set
230 to C{True} (the default) unless there is a specific need to read in
231 invalid configuration from disk.
232
233 @param xmlData: XML data representing configuration.
234 @type xmlData: String data.
235
236 @param xmlPath: Path to an XML file on disk.
237 @type xmlPath: Absolute path to a file on disk.
238
239 @param validate: Validate the document after parsing it.
240 @type validate: Boolean true/false.
241
242 @raise ValueError: If both C{xmlData} and C{xmlPath} are passed-in.
243 @raise ValueError: If the XML data in C{xmlData} or C{xmlPath} cannot be parsed.
244 @raise ValueError: If the parsed configuration document is not valid.
245 """
246 self._split = None
247 self.split = None
248 if xmlData is not None and xmlPath is not None:
249 raise ValueError("Use either xmlData or xmlPath, but not both.")
250 if xmlData is not None:
251 self._parseXmlData(xmlData)
252 if validate:
253 self.validate()
254 elif xmlPath is not None:
255 xmlData = open(xmlPath).read()
256 self._parseXmlData(xmlData)
257 if validate:
258 self.validate()
259
261 """
262 Official string representation for class instance.
263 """
264 return "LocalConfig(%s)" % (self.split)
265
267 """
268 Informal string representation for class instance.
269 """
270 return self.__repr__()
271
273 """
274 Definition of equals operator for this class.
275 Lists within this class are "unordered" for equality comparisons.
276 @param other: Other object to compare to.
277 @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other.
278 """
279 if other is None:
280 return 1
281 if self.split != other.split:
282 if self.split < other.split:
283 return -1
284 else:
285 return 1
286 return 0
287
289 """
290 Property target used to set the split configuration value.
291 If not C{None}, the value must be a C{SplitConfig} object.
292 @raise ValueError: If the value is not a C{SplitConfig}
293 """
294 if value is None:
295 self._split = None
296 else:
297 if not isinstance(value, SplitConfig):
298 raise ValueError("Value must be a C{SplitConfig} object.")
299 self._split = value
300
302 """
303 Property target used to get the split configuration value.
304 """
305 return self._split
306
307 split = property(_getSplit, _setSplit, None, "Split configuration in terms of a C{SplitConfig} object.")
308
310 """
311 Validates configuration represented by the object.
312
313 Split configuration must be filled in. Within that, both the size limit
314 and split size must be filled in.
315
316 @raise ValueError: If one of the validations fails.
317 """
318 if self.split is None:
319 raise ValueError("Split section is required.")
320 if self.split.sizeLimit is None:
321 raise ValueError("Size limit must be set.")
322 if self.split.splitSize is None:
323 raise ValueError("Split size must be set.")
324
326 """
327 Adds a <split> configuration section as the next child of a parent.
328
329 Third parties should use this function to write configuration related to
330 this extension.
331
332 We add the following fields to the document::
333
334 sizeLimit //cb_config/split/size_limit
335 splitSize //cb_config/split/split_size
336
337 @param xmlDom: DOM tree as from C{impl.createDocument()}.
338 @param parentNode: Parent that the section should be appended to.
339 """
340 if self.split is not None:
341 sectionNode = addContainerNode(xmlDom, parentNode, "split")
342 addByteQuantityNode(xmlDom, sectionNode, "size_limit", self.split.sizeLimit)
343 addByteQuantityNode(xmlDom, sectionNode, "split_size", self.split.splitSize)
344
346 """
347 Internal method to parse an XML string into the object.
348
349 This method parses the XML document into a DOM tree (C{xmlDom}) and then
350 calls a static method to parse the split configuration section.
351
352 @param xmlData: XML data to be parsed
353 @type xmlData: String data
354
355 @raise ValueError: If the XML cannot be successfully parsed.
356 """
357 (xmlDom, parentNode) = createInputDom(xmlData)
358 self._split = LocalConfig._parseSplit(parentNode)
359
360 @staticmethod
362 """
363 Parses an split configuration section.
364
365 We read the following individual fields::
366
367 sizeLimit //cb_config/split/size_limit
368 splitSize //cb_config/split/split_size
369
370 @param parent: Parent node to search beneath.
371
372 @return: C{EncryptConfig} object or C{None} if the section does not exist.
373 @raise ValueError: If some filled-in value is invalid.
374 """
375 split = None
376 section = readFirstChild(parent, "split")
377 if section is not None:
378 split = SplitConfig()
379 split.sizeLimit = readByteQuantity(section, "size_limit")
380 split.splitSize = readByteQuantity(section, "split_size")
381 return split
382
383
384
385
386
387
388
389
390
391
392 -def executeAction(configPath, options, config):
418
419
420
421
422
423
424 -def _splitDailyDir(dailyDir, sizeLimit, splitSize, backupUser, backupGroup):
425 """
426 Splits large files in a daily staging directory.
427
428 Files that match INDICATOR_PATTERNS (i.e. C{"cback.store"},
429 C{"cback.stage"}, etc.) are assumed to be indicator files and are ignored.
430 All other files are split.
431
432 @param dailyDir: Daily directory to encrypt
433 @param sizeLimit: Size limit, in bytes
434 @param splitSize: Split size, in bytes
435 @param backupUser: User that target files should be owned by
436 @param backupGroup: Group that target files should be owned by
437
438 @raise ValueError: If the encrypt mode is not supported.
439 @raise ValueError: If the daily staging directory does not exist.
440 """
441 logger.debug("Begin splitting contents of [%s].", dailyDir)
442 fileList = getBackupFiles(dailyDir)
443 for path in fileList:
444 size = float(os.stat(path).st_size)
445 if size > sizeLimit.bytes:
446 _splitFile(path, splitSize, backupUser, backupGroup, removeSource=True)
447 logger.debug("Completed splitting contents of [%s].", dailyDir)
448
449
450
451
452
453
454 -def _splitFile(sourcePath, splitSize, backupUser, backupGroup, removeSource=False):
455 """
456 Splits the source file into chunks of the indicated size.
457
458 The split files will be owned by the indicated backup user and group. If
459 C{removeSource} is C{True}, then the source file will be removed after it is
460 successfully split.
461
462 @param sourcePath: Absolute path of the source file to split
463 @param splitSize: Encryption mode (only "gpg" is allowed)
464 @param backupUser: User that target files should be owned by
465 @param backupGroup: Group that target files should be owned by
466 @param removeSource: Indicates whether to remove the source file
467
468 @raise IOError: If there is a problem accessing, splitting or removing the source file.
469 """
470 cwd = os.getcwd()
471 try:
472 if not os.path.exists(sourcePath):
473 raise ValueError("Source path [%s] does not exist." % sourcePath)
474 dirname = os.path.dirname(sourcePath)
475 filename = os.path.basename(sourcePath)
476 prefix = "%s_" % filename
477 bytes = int(splitSize.bytes)
478 os.chdir(dirname)
479 command = resolveCommand(SPLIT_COMMAND)
480 args = [ "--verbose", "--numeric-suffixes", "--suffix-length=5", "--bytes=%d" % bytes, filename, prefix, ]
481 (result, output) = executeCommand(command, args, returnOutput=True, ignoreStderr=False)
482 if result != 0:
483 raise IOError("Error [%d] calling split for [%s]." % (result, sourcePath))
484 pattern = re.compile(r"(creating file [`'])(%s)(.*)(')" % prefix)
485 match = pattern.search(output[-1:][0])
486 if match is None:
487 raise IOError("Unable to parse output from split command.")
488 value = int(match.group(3).strip())
489 for index in range(0, value):
490 path = "%s%05d" % (prefix, index)
491 if not os.path.exists(path):
492 raise IOError("After call to split, expected file [%s] does not exist." % path)
493 changeOwnership(path, backupUser, backupGroup)
494 if removeSource:
495 if os.path.exists(sourcePath):
496 try:
497 os.remove(sourcePath)
498 logger.debug("Completed removing old file [%s].", sourcePath)
499 except:
500 raise IOError("Failed to remove file [%s] after splitting it." % (sourcePath))
501 finally:
502 os.chdir(cwd)
503