Merge lp:~harlowja/cloud-init/cloud-init-gzip into lp:~cloud-init-dev/cloud-init/trunk

Proposed by Joshua Harlow
Status: Merged
Merged at revision: 835
Proposed branch: lp:~harlowja/cloud-init/cloud-init-gzip
Merge into: lp:~cloud-init-dev/cloud-init/trunk
Diff against target: 194 lines (+78/-24)
1 file modified
cloudinit/user_data.py (+78/-24)
To merge this branch: bzr merge lp:~harlowja/cloud-init/cloud-init-gzip
Reviewer Review Type Date Requested Status
cloud-init Commiters Pending
Review via email: mp+175961@code.launchpad.net
To post a comment you must log in.
831. By Joshua Harlow

Ensure we reset the part after decompression.

832. By Joshua Harlow

Keep filename from original part.

833. By Joshua Harlow

Unify filename, header replacement.

834. By Joshua Harlow

Just check the filename existing.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'cloudinit/user_data.py'
2--- cloudinit/user_data.py 2013-02-24 05:23:24 +0000
3+++ cloudinit/user_data.py 2013-07-20 23:42:24 +0000
4@@ -23,8 +23,10 @@
5 import os
6
7 import email
8+
9 from email.mime.base import MIMEBase
10 from email.mime.multipart import MIMEMultipart
11+from email.mime.nonmultipart import MIMENonMultipart
12 from email.mime.text import MIMEText
13
14 from cloudinit import handlers
15@@ -48,6 +50,18 @@
16 UNDEF_TYPE = "text/plain"
17 ARCHIVE_UNDEF_TYPE = "text/cloud-config"
18
19+# This seems to hit most of the gzip possible content types.
20+DECOMP_TYPES = [
21+ 'application/gzip',
22+ 'application/gzip-compressed',
23+ 'application/gzipped',
24+ 'application/x-compress',
25+ 'application/x-compressed',
26+ 'application/x-gunzip',
27+ 'application/x-gzip',
28+ 'application/x-gzip-compressed',
29+]
30+
31 # Msg header used to track attachments
32 ATTACHMENT_FIELD = 'Number-Attachments'
33
34@@ -56,6 +70,17 @@
35 EXAMINE_FOR_LAUNCH_INDEX = ["text/cloud-config"]
36
37
38+def _replace_header(msg, key, value):
39+ del msg[key]
40+ msg[key] = value
41+
42+
43+def _set_filename(msg, filename):
44+ del msg['Content-Disposition']
45+ msg.add_header('Content-Disposition',
46+ 'attachment', filename=str(filename))
47+
48+
49 class UserDataProcessor(object):
50 def __init__(self, paths):
51 self.paths = paths
52@@ -67,6 +92,10 @@
53 return accumulating_msg
54
55 def _process_msg(self, base_msg, append_msg):
56+
57+ def find_ctype(payload):
58+ return handlers.type_from_starts_with(payload)
59+
60 for part in base_msg.walk():
61 if is_skippable(part):
62 continue
63@@ -74,21 +103,51 @@
64 ctype = None
65 ctype_orig = part.get_content_type()
66 payload = part.get_payload(decode=True)
67-
68+ was_compressed = False
69+
70+ # When the message states it is of a gzipped content type ensure
71+ # that we attempt to decode said payload so that the decompressed
72+ # data can be examined (instead of the compressed data).
73+ if ctype_orig in DECOMP_TYPES:
74+ try:
75+ payload = util.decomp_gzip(payload, quiet=False)
76+ # At this point we don't know what the content-type is
77+ # since we just decompressed it.
78+ ctype_orig = None
79+ was_compressed = True
80+ except util.DecompressionError as e:
81+ LOG.warn("Failed decompressing payload from %s of length"
82+ " %s due to: %s", ctype_orig, len(payload), e)
83+ continue
84+
85+ # Attempt to figure out the payloads content-type
86 if not ctype_orig:
87 ctype_orig = UNDEF_TYPE
88-
89 if ctype_orig in TYPE_NEEDED:
90- ctype = handlers.type_from_starts_with(payload)
91-
92+ ctype = find_ctype(payload)
93 if ctype is None:
94 ctype = ctype_orig
95
96+ # In the case where the data was compressed, we want to make sure
97+ # that we create a new message that contains the found content
98+ # type with the uncompressed content since later traversals of the
99+ # messages will expect a part not compressed.
100+ if was_compressed:
101+ maintype, subtype = ctype.split("/", 1)
102+ n_part = MIMENonMultipart(maintype, subtype)
103+ n_part.set_payload(payload)
104+ # Copy various headers from the old part to the new one,
105+ # but don't include all the headers since some are not useful
106+ # after decoding and decompression.
107+ if part.get_filename():
108+ _set_filename(n_part, part.get_filename())
109+ for h in ('Launch-Index',):
110+ if h in part:
111+ _replace_header(n_part, h, str(part[h]))
112+ part = n_part
113+
114 if ctype != ctype_orig:
115- if CONTENT_TYPE in part:
116- part.replace_header(CONTENT_TYPE, ctype)
117- else:
118- part[CONTENT_TYPE] = ctype
119+ _replace_header(part, CONTENT_TYPE, ctype)
120
121 if ctype in INCLUDE_TYPES:
122 self._do_include(payload, append_msg)
123@@ -98,12 +157,9 @@
124 self._explode_archive(payload, append_msg)
125 continue
126
127- # Should this be happening, shouldn't
128+ # TODO(harlowja): Should this be happening, shouldn't
129 # the part header be modified and not the base?
130- if CONTENT_TYPE in base_msg:
131- base_msg.replace_header(CONTENT_TYPE, ctype)
132- else:
133- base_msg[CONTENT_TYPE] = ctype
134+ _replace_header(base_msg, CONTENT_TYPE, ctype)
135
136 self._attach_part(append_msg, part)
137
138@@ -138,8 +194,7 @@
139
140 def _process_before_attach(self, msg, attached_id):
141 if not msg.get_filename():
142- msg.add_header('Content-Disposition',
143- 'attachment', filename=PART_FN_TPL % (attached_id))
144+ _set_filename(msg, PART_FN_TPL % (attached_id))
145 self._attach_launch_index(msg)
146
147 def _do_include(self, content, append_msg):
148@@ -217,13 +272,15 @@
149 msg.set_payload(content)
150
151 if 'filename' in ent:
152- msg.add_header('Content-Disposition',
153- 'attachment', filename=ent['filename'])
154+ _set_filename(msg, ent['filename'])
155 if 'launch-index' in ent:
156 msg.add_header('Launch-Index', str(ent['launch-index']))
157
158 for header in list(ent.keys()):
159- if header in ('content', 'filename', 'type', 'launch-index'):
160+ if header.lower() in ('content', 'filename', 'type',
161+ 'launch-index', 'content-disposition',
162+ ATTACHMENT_FIELD.lower(),
163+ CONTENT_TYPE.lower()):
164 continue
165 msg.add_header(header, ent[header])
166
167@@ -238,13 +295,13 @@
168 outer_msg[ATTACHMENT_FIELD] = '0'
169
170 if new_count is not None:
171- outer_msg.replace_header(ATTACHMENT_FIELD, str(new_count))
172+ _replace_header(outer_msg, ATTACHMENT_FIELD, str(new_count))
173
174 fetched_count = 0
175 try:
176 fetched_count = int(outer_msg.get(ATTACHMENT_FIELD))
177 except (ValueError, TypeError):
178- outer_msg.replace_header(ATTACHMENT_FIELD, str(fetched_count))
179+ _replace_header(outer_msg, ATTACHMENT_FIELD, str(fetched_count))
180 return fetched_count
181
182 def _attach_part(self, outer_msg, part):
183@@ -276,10 +333,7 @@
184 if "mime-version:" in data[0:4096].lower():
185 msg = email.message_from_string(data)
186 for (key, val) in headers.iteritems():
187- if key in msg:
188- msg.replace_header(key, val)
189- else:
190- msg[key] = val
191+ _replace_header(msg, key, val)
192 else:
193 mtype = headers.get(CONTENT_TYPE, NOT_MULTIPART_TYPE)
194 maintype, subtype = mtype.split("/", 1)