Merge lp:~dholbach/developer-ubuntu-com/importer-post-deployment-fixes into lp:developer-ubuntu-com
- importer-post-deployment-fixes
- Merge into stable
Status: | Merged |
---|---|
Approved by: | David Callé |
Approved revision: | 212 |
Merged at revision: | 194 |
Proposed branch: | lp:~dholbach/developer-ubuntu-com/importer-post-deployment-fixes |
Merge into: | lp:developer-ubuntu-com |
Diff against target: |
480 lines (+189/-72) 7 files modified
md_importer/importer/__init__.py (+12/-0) md_importer/importer/article.py (+37/-22) md_importer/importer/publish.py (+54/-22) md_importer/importer/repo.py (+0/-1) md_importer/tests/test_branch_import.py (+40/-6) md_importer/tests/test_link_rewrite.py (+43/-20) md_importer/tests/utils.py (+3/-1) |
To merge this branch: | bzr merge lp:~dholbach/developer-ubuntu-com/importer-post-deployment-fixes |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Ubuntu App Developer site developers | Pending | ||
Review via email: mp+284309@code.launchpad.net |
Commit message
Description of the change
This is ready to land now.
List of fixes:
- only use a shortlist of markdown extensions
- fix the rewriting of links in articles (links between articles in the
same branch), fix and extend tests
- simplify code somewhat, remove useless bits
- fix stripping of tags like <body>, add tests
- 192. By Daniel Holbach
-
only set page to public_object if it exists
- 193. By Daniel Holbach
-
remove unnecessary reset of repo.pages
- 194. By Daniel Holbach
-
merge from trunk
- 195. By Daniel Holbach
-
- make TestLinkRewrite link check explicit
- fix condition in TestLinkBrokenRewrite to not leave for loop early - 196. By Daniel Holbach
-
bring TestSnapcraftLi
nkRewrite test closer to reality and just import what's important to us, fix condition to not leave for loop early - 197. By Daniel Holbach
-
remove unnecessary line
- 198. By Daniel Holbach
-
avoid 'break' in the for loop
- 199. By Daniel Holbach
-
break out update_page functionality into separate function
- 200. By Daniel Holbach
-
store list of local images and if links were rewritten in the article object, use the new update_page function
- 201. By Daniel Holbach
-
add TODO item, make pyflakes and pep8 happy
- 202. By Daniel Holbach
-
remove body/html tags after soup.prettify
- 203. By Daniel Holbach
-
add test to ensure we strip all <body> tags from the imported articles
- 204. By Daniel Holbach
-
make sure internal links start with '/'
- 205. By Daniel Holbach
-
fix tests wrt fixed links
- 206. By Daniel Holbach
-
remove stray print
- 207. By Daniel Holbach
-
make regexes for stripping body/html/head tags clearer
- 208. By Daniel Holbach
-
drop pymdownx.
headeranchor - it creates problems (order of link attributes gets mixed up depending on which html output we use), and we don't need it on the page - 209. By Daniel Holbach
-
- when comparing HTML, always use clean_html from djangocms_
text_ckeditor
and soup.prettify, so we're looking at the same output style
- add convenience function find_text_plugin
- check not only if the draft's html has changed, but also if the published
version changed
- update test as well, check only published pages - 210. By Daniel Holbach
-
make sure we don't have 'None' as slug for the root node, add tests (one for the URL, one for links in the HTML)
- 211. By Daniel Holbach
-
cater to the use-case where we just import snappy docs, but have no release_alias (ie current) set
- 212. By Daniel Holbach
-
fix typo
Preview Diff
1 | === modified file 'md_importer/importer/__init__.py' | |||
2 | --- md_importer/importer/__init__.py 2016-01-12 11:44:04 +0000 | |||
3 | +++ md_importer/importer/__init__.py 2016-03-02 11:13:56 +0000 | |||
4 | @@ -3,3 +3,15 @@ | |||
5 | 3 | DEFAULT_LANG = LANGUAGE_CODE | 3 | DEFAULT_LANG = LANGUAGE_CODE |
6 | 4 | HOME_PAGE_URL = '/{}/'.format(DEFAULT_LANG) | 4 | HOME_PAGE_URL = '/{}/'.format(DEFAULT_LANG) |
7 | 5 | SUPPORTED_ARTICLE_TYPES = ['.md', '.html'] | 5 | SUPPORTED_ARTICLE_TYPES = ['.md', '.html'] |
8 | 6 | |||
9 | 7 | # Instead of just using pymdownx.github, we go with these because of | ||
10 | 8 | # https://github.com/facelessuser/pymdown-extensions/issues/11 | ||
11 | 9 | MARKDOWN_EXTENSIONS = [ | ||
12 | 10 | 'markdown.extensions.tables', | ||
13 | 11 | 'pymdownx.magiclink', | ||
14 | 12 | 'pymdownx.betterem', | ||
15 | 13 | 'pymdownx.tilde', | ||
16 | 14 | 'pymdownx.githubemoji', | ||
17 | 15 | 'pymdownx.tasklist', | ||
18 | 16 | 'pymdownx.superfences', | ||
19 | 17 | ] | ||
20 | 6 | 18 | ||
21 | === modified file 'md_importer/importer/article.py' | |||
22 | --- md_importer/importer/article.py 2016-01-15 13:56:34 +0000 | |||
23 | +++ md_importer/importer/article.py 2016-03-02 11:13:56 +0000 | |||
24 | @@ -8,9 +8,10 @@ | |||
25 | 8 | 8 | ||
26 | 9 | from . import ( | 9 | from . import ( |
27 | 10 | DEFAULT_LANG, | 10 | DEFAULT_LANG, |
28 | 11 | MARKDOWN_EXTENSIONS, | ||
29 | 11 | SUPPORTED_ARTICLE_TYPES, | 12 | SUPPORTED_ARTICLE_TYPES, |
30 | 12 | ) | 13 | ) |
32 | 13 | from .publish import get_or_create_page, slugify | 14 | from .publish import get_or_create_page, slugify, update_page |
33 | 14 | 15 | ||
34 | 15 | if sys.version_info.major == 2: | 16 | if sys.version_info.major == 2: |
35 | 16 | from urlparse import urlparse | 17 | from urlparse import urlparse |
36 | @@ -27,18 +28,18 @@ | |||
37 | 27 | self.write_to = slugify(self.fn) | 28 | self.write_to = slugify(self.fn) |
38 | 28 | self.full_url = write_to | 29 | self.full_url = write_to |
39 | 29 | self.slug = os.path.basename(self.full_url) | 30 | self.slug = os.path.basename(self.full_url) |
40 | 31 | self.links_rewritten = False | ||
41 | 32 | self.local_images = [] | ||
42 | 30 | 33 | ||
43 | 31 | def _find_local_images(self): | 34 | def _find_local_images(self): |
44 | 32 | '''Local images are currently not supported.''' | 35 | '''Local images are currently not supported.''' |
45 | 33 | soup = BeautifulSoup(self.html, 'html5lib') | 36 | soup = BeautifulSoup(self.html, 'html5lib') |
46 | 34 | local_images = [] | ||
47 | 35 | for img in soup.find_all('img'): | 37 | for img in soup.find_all('img'): |
48 | 36 | if img.has_attr('src'): | 38 | if img.has_attr('src'): |
49 | 37 | (scheme, netloc, path, params, query, fragment) = \ | 39 | (scheme, netloc, path, params, query, fragment) = \ |
50 | 38 | urlparse(img.attrs['src']) | 40 | urlparse(img.attrs['src']) |
51 | 39 | if scheme not in ['http', 'https']: | 41 | if scheme not in ['http', 'https']: |
54 | 40 | local_images.extend([img.attrs['src']]) | 42 | self.local_images.extend([img.attrs['src']]) |
53 | 41 | return local_images | ||
55 | 42 | 43 | ||
56 | 43 | def read(self): | 44 | def read(self): |
57 | 44 | if os.path.splitext(self.fn)[1] not in SUPPORTED_ARTICLE_TYPES: | 45 | if os.path.splitext(self.fn)[1] not in SUPPORTED_ARTICLE_TYPES: |
58 | @@ -50,13 +51,13 @@ | |||
59 | 50 | self.html = markdown.markdown( | 51 | self.html = markdown.markdown( |
60 | 51 | f.read(), | 52 | f.read(), |
61 | 52 | output_format='html5', | 53 | output_format='html5', |
63 | 53 | extensions=['pymdownx.github']) | 54 | extensions=MARKDOWN_EXTENSIONS) |
64 | 54 | elif self.fn.endswith('.html'): | 55 | elif self.fn.endswith('.html'): |
65 | 55 | self.html = f.read() | 56 | self.html = f.read() |
68 | 56 | local_images = self._find_local_images() | 57 | self._find_local_images() |
69 | 57 | if local_images: | 58 | if self.local_images: |
70 | 58 | logging.error('Found the following local image(s): {}'.format( | 59 | logging.error('Found the following local image(s): {}'.format( |
72 | 59 | ', '.join(local_images) | 60 | ', '.join(self.local_images) |
73 | 60 | )) | 61 | )) |
74 | 61 | return False | 62 | return False |
75 | 62 | self.title = self._read_title() | 63 | self.title = self._read_title() |
76 | @@ -73,10 +74,15 @@ | |||
77 | 73 | return slugify(self.fn).replace('-', ' ').title() | 74 | return slugify(self.fn).replace('-', ' ').title() |
78 | 74 | 75 | ||
79 | 75 | def _remove_body_and_html_tags(self): | 76 | def _remove_body_and_html_tags(self): |
84 | 76 | self.html = re.sub(r"<html>\n\s<body>\n", "", self.html, | 77 | for regex in [ |
85 | 77 | flags=re.MULTILINE) | 78 | # These are added by markdown.markdown |
86 | 78 | self.html = re.sub(r"\s<\/body>\n<\/html>", "", self.html, | 79 | r'\s*<html>\s*<body>\s*', |
87 | 79 | flags=re.MULTILINE) | 80 | r'\s*<\/body>\s*<\/html>\s*', |
88 | 81 | # This is added by BeautifulSoup.prettify | ||
89 | 82 | r'\s*<html>\s*<head>\s*<\/head>\s*<body>\s*', | ||
90 | 83 | ]: | ||
91 | 84 | self.html = re.sub(regex, '', self.html, | ||
92 | 85 | flags=re.MULTILINE) | ||
93 | 80 | 86 | ||
94 | 81 | def _use_developer_site_style(self): | 87 | def _use_developer_site_style(self): |
95 | 82 | begin = (u"<div class=\"row no-border\">" | 88 | begin = (u"<div class=\"row no-border\">" |
96 | @@ -92,7 +98,6 @@ | |||
97 | 92 | 98 | ||
98 | 93 | def replace_links(self, titles, url_map): | 99 | def replace_links(self, titles, url_map): |
99 | 94 | soup = BeautifulSoup(self.html, 'html5lib') | 100 | soup = BeautifulSoup(self.html, 'html5lib') |
100 | 95 | change = False | ||
101 | 96 | for link in soup.find_all('a'): | 101 | for link in soup.find_all('a'): |
102 | 97 | if not link.has_attr('class') or \ | 102 | if not link.has_attr('class') or \ |
103 | 98 | 'headeranchor-link' not in link.attrs['class']: | 103 | 'headeranchor-link' not in link.attrs['class']: |
104 | @@ -100,10 +105,12 @@ | |||
105 | 100 | if title.endswith(link.attrs['href']) and \ | 105 | if title.endswith(link.attrs['href']) and \ |
106 | 101 | link.attrs['href'] != url_map[title].full_url: | 106 | link.attrs['href'] != url_map[title].full_url: |
107 | 102 | link.attrs['href'] = url_map[title].full_url | 107 | link.attrs['href'] = url_map[title].full_url |
110 | 103 | change = True | 108 | if not link.attrs['href'].startswith('/'): |
111 | 104 | if change: | 109 | link.attrs['href'] = '/' + link.attrs['href'] |
112 | 110 | self.links_rewritten = True | ||
113 | 111 | if self.links_rewritten: | ||
114 | 105 | self.html = soup.prettify() | 112 | self.html = soup.prettify() |
116 | 106 | return change | 113 | self._remove_body_and_html_tags() |
117 | 107 | 114 | ||
118 | 108 | def add_to_db(self): | 115 | def add_to_db(self): |
119 | 109 | '''Publishes pages in their branch alias namespace.''' | 116 | '''Publishes pages in their branch alias namespace.''' |
120 | @@ -112,13 +119,19 @@ | |||
121 | 112 | html=self.html) | 119 | html=self.html) |
122 | 113 | if not self.page: | 120 | if not self.page: |
123 | 114 | return False | 121 | return False |
125 | 115 | self.full_url = self.page.get_absolute_url() | 122 | self.full_url = re.sub( |
126 | 123 | r'^\/None\/', '/{}/'.format(DEFAULT_LANG), | ||
127 | 124 | self.page.get_absolute_url()) | ||
128 | 116 | return True | 125 | return True |
129 | 117 | 126 | ||
130 | 118 | def publish(self): | 127 | def publish(self): |
131 | 128 | if self.links_rewritten: | ||
132 | 129 | update_page(self.page, title=self.title, full_url=self.full_url, | ||
133 | 130 | menu_title=self.title, html=self.html) | ||
134 | 119 | if self.page.is_dirty(DEFAULT_LANG): | 131 | if self.page.is_dirty(DEFAULT_LANG): |
135 | 120 | self.page.publish(DEFAULT_LANG) | 132 | self.page.publish(DEFAULT_LANG) |
137 | 121 | self.page = self.page.get_public_object() | 133 | if self.page.get_public_object(): |
138 | 134 | self.page = self.page.get_public_object() | ||
139 | 122 | return self.page | 135 | return self.page |
140 | 123 | 136 | ||
141 | 124 | 137 | ||
142 | @@ -128,14 +141,16 @@ | |||
143 | 128 | def read(self): | 141 | def read(self): |
144 | 129 | if not Article.read(self): | 142 | if not Article.read(self): |
145 | 130 | return False | 143 | return False |
148 | 131 | self.release_alias = re.findall(r'snappy/guides/(\S+?)/\S+?', | 144 | matches = re.findall(r'snappy/guides/(\S+?)/\S+?', |
149 | 132 | self.full_url)[0] | 145 | self.full_url) |
150 | 146 | if matches: | ||
151 | 147 | self.release_alias = matches[0] | ||
152 | 133 | self._make_snappy_mods() | 148 | self._make_snappy_mods() |
153 | 134 | return True | 149 | return True |
154 | 135 | 150 | ||
155 | 136 | def _make_snappy_mods(self): | 151 | def _make_snappy_mods(self): |
156 | 137 | # Make sure the reader knows which documentation she is browsing | 152 | # Make sure the reader knows which documentation she is browsing |
158 | 138 | if self.release_alias != 'current': | 153 | if self.release_alias and self.release_alias != 'current': |
159 | 139 | before = (u"<div class=\"row no-border\">\n" | 154 | before = (u"<div class=\"row no-border\">\n" |
160 | 140 | "<div class=\"eight-col\">\n") | 155 | "<div class=\"eight-col\">\n") |
161 | 141 | after = (u"<div class=\"row no-border\">\n" | 156 | after = (u"<div class=\"row no-border\">\n" |
162 | @@ -158,6 +173,6 @@ | |||
163 | 158 | redirect="/snappy/guides/current/{}".format(self.slug)) | 173 | redirect="/snappy/guides/current/{}".format(self.slug)) |
164 | 159 | if not page: | 174 | if not page: |
165 | 160 | return False | 175 | return False |
167 | 161 | else: | 176 | elif self.release_alias: |
168 | 162 | self.title += " (%s)" % (self.release_alias,) | 177 | self.title += " (%s)" % (self.release_alias,) |
169 | 163 | return Article.add_to_db(self) | 178 | return Article.add_to_db(self) |
170 | 164 | 179 | ||
171 | === modified file 'md_importer/importer/publish.py' | |||
172 | --- md_importer/importer/publish.py 2016-01-15 13:58:39 +0000 | |||
173 | +++ md_importer/importer/publish.py 2016-03-02 11:13:56 +0000 | |||
174 | @@ -4,11 +4,18 @@ | |||
175 | 4 | from cms.models import Title | 4 | from cms.models import Title |
176 | 5 | from djangocms_text_ckeditor.html import clean_html | 5 | from djangocms_text_ckeditor.html import clean_html |
177 | 6 | 6 | ||
178 | 7 | from bs4 import BeautifulSoup | ||
179 | 7 | import logging | 8 | import logging |
180 | 8 | import re | 9 | import re |
181 | 9 | import os | 10 | import os |
182 | 10 | 11 | ||
183 | 11 | 12 | ||
184 | 13 | def _compare_html(html_a, html_b): | ||
185 | 14 | soup_a = BeautifulSoup(html_a, 'html5lib') | ||
186 | 15 | soup_b = BeautifulSoup(html_b, 'html5lib') | ||
187 | 16 | return (clean_html(soup_a.prettify()) == clean_html(soup_b.prettify())) | ||
188 | 17 | |||
189 | 18 | |||
190 | 12 | def slugify(filename): | 19 | def slugify(filename): |
191 | 13 | return os.path.basename(filename).replace('.md', '').replace('.html', '') | 20 | return os.path.basename(filename).replace('.md', '').replace('.html', '') |
192 | 14 | 21 | ||
193 | @@ -32,6 +39,51 @@ | |||
194 | 32 | return parent_pages[0].page | 39 | return parent_pages[0].page |
195 | 33 | 40 | ||
196 | 34 | 41 | ||
197 | 42 | def find_text_plugin(page): | ||
198 | 43 | # We create the page, so we know there's just one placeholder | ||
199 | 44 | placeholder = page.placeholders.all()[0] | ||
200 | 45 | if placeholder.get_plugins(): | ||
201 | 46 | return ( | ||
202 | 47 | placeholder, | ||
203 | 48 | placeholder.get_plugins()[0].get_plugin_instance()[0] | ||
204 | 49 | ) | ||
205 | 50 | return (placeholder, None) | ||
206 | 51 | |||
207 | 52 | |||
208 | 53 | def update_page(page, title, full_url, menu_title=None, | ||
209 | 54 | in_navigation=True, redirect=None, html=None): | ||
210 | 55 | if page.get_title() != title: | ||
211 | 56 | page.title = title | ||
212 | 57 | if page.get_menu_title() != menu_title: | ||
213 | 58 | page.menu_title = menu_title | ||
214 | 59 | if page.in_navigation != in_navigation: | ||
215 | 60 | page.in_navigation = in_navigation | ||
216 | 61 | if page.get_redirect() != redirect: | ||
217 | 62 | page.redirect = redirect | ||
218 | 63 | if html: | ||
219 | 64 | update = True | ||
220 | 65 | (placeholder, plugin) = find_text_plugin(page) | ||
221 | 66 | if plugin: | ||
222 | 67 | if _compare_html(html, plugin.body): | ||
223 | 68 | update = False | ||
224 | 69 | elif page.get_public_object(): | ||
225 | 70 | (dummy, published_plugin) = \ | ||
226 | 71 | find_text_plugin(page.get_public_object()) | ||
227 | 72 | if published_plugin: | ||
228 | 73 | if _compare_html(html, published_plugin.body): | ||
229 | 74 | update = False | ||
230 | 75 | if update: | ||
231 | 76 | plugin.body = html | ||
232 | 77 | plugin.save() | ||
233 | 78 | else: | ||
234 | 79 | # Reset draft | ||
235 | 80 | page.get_draft_object().revert(DEFAULT_LANG) | ||
236 | 81 | else: | ||
237 | 82 | add_plugin( | ||
238 | 83 | placeholder, 'RawHtmlPlugin', | ||
239 | 84 | DEFAULT_LANG, body=html) | ||
240 | 85 | |||
241 | 86 | |||
242 | 35 | def get_or_create_page(title, full_url, menu_title=None, | 87 | def get_or_create_page(title, full_url, menu_title=None, |
243 | 36 | in_navigation=True, redirect=None, html=None): | 88 | in_navigation=True, redirect=None, html=None): |
244 | 37 | # First check if pages already exist. | 89 | # First check if pages already exist. |
245 | @@ -39,26 +91,8 @@ | |||
246 | 39 | path__regex=full_url).filter(publisher_is_draft=True) | 91 | path__regex=full_url).filter(publisher_is_draft=True) |
247 | 40 | if pages: | 92 | if pages: |
248 | 41 | page = pages[0].page | 93 | page = pages[0].page |
269 | 42 | if page.get_title() != title: | 94 | update_page(page, title, full_url, menu_title, in_navigation, |
270 | 43 | page.title = title | 95 | redirect, html) |
251 | 44 | if page.get_menu_title() != menu_title: | ||
252 | 45 | page.menu_title = menu_title | ||
253 | 46 | if page.in_navigation != in_navigation: | ||
254 | 47 | page.in_navigation = in_navigation | ||
255 | 48 | if page.get_redirect() != redirect: | ||
256 | 49 | page.redirect = redirect | ||
257 | 50 | if html: | ||
258 | 51 | # We create the page, so we know there's just one placeholder | ||
259 | 52 | placeholder = page.placeholders.all()[0] | ||
260 | 53 | if placeholder.get_plugins(): | ||
261 | 54 | plugin = placeholder.get_plugins()[0].get_plugin_instance()[0] | ||
262 | 55 | if plugin.body != clean_html(html, full=False): | ||
263 | 56 | plugin.body = html | ||
264 | 57 | plugin.save() | ||
265 | 58 | else: | ||
266 | 59 | add_plugin( | ||
267 | 60 | placeholder, 'RawHtmlPlugin', | ||
268 | 61 | DEFAULT_LANG, body=html) | ||
271 | 62 | else: | 96 | else: |
272 | 63 | parent = _find_parent(full_url) | 97 | parent = _find_parent(full_url) |
273 | 64 | if not parent: | 98 | if not parent: |
274 | @@ -70,6 +104,4 @@ | |||
275 | 70 | position='last-child', redirect=redirect) | 104 | position='last-child', redirect=redirect) |
276 | 71 | placeholder = page.placeholders.get() | 105 | placeholder = page.placeholders.get() |
277 | 72 | add_plugin(placeholder, 'RawHtmlPlugin', DEFAULT_LANG, body=html) | 106 | add_plugin(placeholder, 'RawHtmlPlugin', DEFAULT_LANG, body=html) |
278 | 73 | placeholder = page.placeholders.all()[0] | ||
279 | 74 | plugin = placeholder.get_plugins()[0].get_plugin_instance()[0] | ||
280 | 75 | return page | 107 | return page |
281 | 76 | 108 | ||
282 | === modified file 'md_importer/importer/repo.py' | |||
283 | --- md_importer/importer/repo.py 2016-01-15 18:54:50 +0000 | |||
284 | +++ md_importer/importer/repo.py 2016-03-02 11:13:56 +0000 | |||
285 | @@ -118,7 +118,6 @@ | |||
286 | 118 | logging.error('Publishing of {} aborted.'.format(self.origin)) | 118 | logging.error('Publishing of {} aborted.'.format(self.origin)) |
287 | 119 | return False | 119 | return False |
288 | 120 | article.replace_links(self.titles, self.url_map) | 120 | article.replace_links(self.titles, self.url_map) |
289 | 121 | self.pages = [] | ||
290 | 122 | for article in self.imported_articles: | 121 | for article in self.imported_articles: |
291 | 123 | self.pages.extend([article.publish()]) | 122 | self.pages.extend([article.publish()]) |
292 | 124 | if self.index_page: | 123 | if self.index_page: |
293 | 125 | 124 | ||
294 | === modified file 'md_importer/tests/test_branch_import.py' | |||
295 | --- md_importer/tests/test_branch_import.py 2016-01-15 13:59:32 +0000 | |||
296 | +++ md_importer/tests/test_branch_import.py 2016-03-02 11:13:56 +0000 | |||
297 | @@ -2,9 +2,10 @@ | |||
298 | 2 | import pytz | 2 | import pytz |
299 | 3 | import shutil | 3 | import shutil |
300 | 4 | 4 | ||
302 | 5 | from cms.models import CMSPlugin, Page | 5 | from cms.models import Page |
303 | 6 | 6 | ||
304 | 7 | from md_importer.importer.article import Article | 7 | from md_importer.importer.article import Article |
305 | 8 | from md_importer.importer.publish import find_text_plugin | ||
306 | 8 | from .utils import TestLocalBranchImport | 9 | from .utils import TestLocalBranchImport |
307 | 9 | 10 | ||
308 | 10 | 11 | ||
309 | @@ -66,6 +67,39 @@ | |||
310 | 66 | self.assertEqual(page.parent_id, self.root.id) | 67 | self.assertEqual(page.parent_id, self.root.id) |
311 | 67 | 68 | ||
312 | 68 | 69 | ||
313 | 70 | class TestArticleHTMLTagsAfterImport(TestLocalBranchImport): | ||
314 | 71 | def runTest(self): | ||
315 | 72 | self.create_repo('data/snapcraft-test') | ||
316 | 73 | self.repo.add_directive('docs', '') | ||
317 | 74 | self.assertEqual(len(self.repo.directives), 1) | ||
318 | 75 | self.assertTrue(self.repo.execute_import_directives()) | ||
319 | 76 | self.assertGreater(len(self.repo.imported_articles), 3) | ||
320 | 77 | self.assertTrue(self.repo.publish()) | ||
321 | 78 | pages = Page.objects.all() | ||
322 | 79 | self.assertGreater(pages.count(), len(self.repo.imported_articles)) | ||
323 | 80 | for article in self.repo.imported_articles: | ||
324 | 81 | self.assertIsInstance(article, Article) | ||
325 | 82 | self.assertNotIn('<body>', article.html) | ||
326 | 83 | self.assertNotIn('<body>', article.html) | ||
327 | 84 | |||
328 | 85 | |||
329 | 86 | class TestNoneInURLAfterImport(TestLocalBranchImport): | ||
330 | 87 | def runTest(self): | ||
331 | 88 | self.create_repo('data/snapcraft-test') | ||
332 | 89 | self.repo.add_directive('docs', '') | ||
333 | 90 | self.assertEqual(len(self.repo.directives), 1) | ||
334 | 91 | self.assertTrue(self.repo.execute_import_directives()) | ||
335 | 92 | self.assertGreater(len(self.repo.imported_articles), 3) | ||
336 | 93 | self.assertTrue(self.repo.publish()) | ||
337 | 94 | pages = Page.objects.all() | ||
338 | 95 | self.assertGreater(pages.count(), len(self.repo.imported_articles)) | ||
339 | 96 | for article in self.repo.imported_articles: | ||
340 | 97 | self.assertIsInstance(article, Article) | ||
341 | 98 | self.assertNotIn('/None/', article.full_url) | ||
342 | 99 | for page in pages: | ||
343 | 100 | self.assertIsNotNone(page.get_slug()) | ||
344 | 101 | |||
345 | 102 | |||
346 | 69 | class TestTwiceImport(TestLocalBranchImport): | 103 | class TestTwiceImport(TestLocalBranchImport): |
347 | 70 | '''Run import on the same contents twice, make sure we don't | 104 | '''Run import on the same contents twice, make sure we don't |
348 | 71 | add new pages over and over again.''' | 105 | add new pages over and over again.''' |
349 | @@ -101,9 +135,9 @@ | |||
350 | 101 | self.assertEqual( | 135 | self.assertEqual( |
351 | 102 | Page.objects.filter(publisher_is_draft=False).count(), | 136 | Page.objects.filter(publisher_is_draft=False).count(), |
352 | 103 | len(self.repo.imported_articles)+1) # articles + root | 137 | len(self.repo.imported_articles)+1) # articles + root |
353 | 138 | shutil.rmtree(self.tempdir) | ||
354 | 104 | # Take the time before publishing the second import | 139 | # Take the time before publishing the second import |
355 | 105 | now = datetime.now(pytz.utc) | 140 | now = datetime.now(pytz.utc) |
356 | 106 | shutil.rmtree(self.tempdir) | ||
357 | 107 | # Run second import | 141 | # Run second import |
358 | 108 | self.create_repo('data/snapcraft-test') | 142 | self.create_repo('data/snapcraft-test') |
359 | 109 | self.repo.add_directive('docs', '') | 143 | self.repo.add_directive('docs', '') |
360 | @@ -112,7 +146,7 @@ | |||
361 | 112 | self.assertTrue(self.repo.execute_import_directives()) | 146 | self.assertTrue(self.repo.execute_import_directives()) |
362 | 113 | self.assertTrue(self.repo.publish()) | 147 | self.assertTrue(self.repo.publish()) |
363 | 114 | # Check the page's plugins | 148 | # Check the page's plugins |
368 | 115 | for plugin_change in CMSPlugin.objects.filter( | 149 | for page in Page.objects.filter(publisher_is_draft=False): |
369 | 116 | plugin_type='RawHtmlPlugin').order_by( | 150 | if page != self.root: |
370 | 117 | '-changed_date'): | 151 | (dummy, plugin) = find_text_plugin(page) |
371 | 118 | self.assertGreater(now, plugin_change.changed_date) | 152 | self.assertGreater(now, plugin.changed_date) |
372 | 119 | 153 | ||
373 | === modified file 'md_importer/tests/test_link_rewrite.py' | |||
374 | --- md_importer/tests/test_link_rewrite.py 2016-01-11 14:38:51 +0000 | |||
375 | +++ md_importer/tests/test_link_rewrite.py 2016-03-02 11:13:56 +0000 | |||
376 | @@ -30,6 +30,11 @@ | |||
377 | 30 | link.attrs['href'], | 30 | link.attrs['href'], |
378 | 31 | ', '.join([p.get_absolute_url() for p in pages]))) | 31 | ', '.join([p.get_absolute_url() for p in pages]))) |
379 | 32 | self.assertIn(page, pages) | 32 | self.assertIn(page, pages) |
380 | 33 | if article.slug == 'file1': | ||
381 | 34 | for link in soup.find_all('a'): | ||
382 | 35 | if not link.has_attr('class') or \ | ||
383 | 36 | 'headeranchor-link' not in link.attrs['class']: | ||
384 | 37 | self.assertEqual(link.attrs['href'], '/file2') | ||
385 | 33 | 38 | ||
386 | 34 | 39 | ||
387 | 35 | class TestLinkBrokenRewrite(TestLocalBranchImport): | 40 | class TestLinkBrokenRewrite(TestLocalBranchImport): |
388 | @@ -45,12 +50,34 @@ | |||
389 | 45 | self.assertEqual(article.page.parent, self.root) | 50 | self.assertEqual(article.page.parent, self.root) |
390 | 46 | soup = BeautifulSoup(article.html, 'html5lib') | 51 | soup = BeautifulSoup(article.html, 'html5lib') |
391 | 47 | for link in soup.find_all('a'): | 52 | for link in soup.find_all('a'): |
398 | 48 | if link.has_attr('class') and \ | 53 | if not link.has_attr('class') or \ |
399 | 49 | 'headeranchor-link' in link.attrs['class']: | 54 | 'headeranchor-link' not in link.attrs['class']: |
400 | 50 | break | 55 | page = self.check_local_link(link.attrs['href']) |
401 | 51 | page = self.check_local_link(link.attrs['href']) | 56 | self.assertIsNone(page) |
402 | 52 | self.assertIsNone(page) | 57 | self.assertNotIn(page, pages) |
403 | 53 | self.assertNotIn(page, pages) | 58 | |
404 | 59 | |||
405 | 60 | class TestNoneNotInLinks(TestLocalBranchImport): | ||
406 | 61 | def runTest(self): | ||
407 | 62 | self.create_repo('data/snapcraft-test') | ||
408 | 63 | snappy_page = db_add_empty_page('Snappy', self.root) | ||
409 | 64 | self.assertFalse(snappy_page.publisher_is_draft) | ||
410 | 65 | build_apps = db_add_empty_page('Build Apps', snappy_page) | ||
411 | 66 | self.assertFalse(build_apps.publisher_is_draft) | ||
412 | 67 | self.assertEqual( | ||
413 | 68 | 3, Page.objects.filter(publisher_is_draft=False).count()) | ||
414 | 69 | self.repo.add_directive('docs/intro.md', 'snappy/build-apps/current') | ||
415 | 70 | self.repo.add_directive('docs', 'snappy/build-apps/current') | ||
416 | 71 | self.assertTrue(self.repo.execute_import_directives()) | ||
417 | 72 | self.assertTrue(self.repo.publish()) | ||
418 | 73 | pages = Page.objects.all() | ||
419 | 74 | for article in self.repo.imported_articles: | ||
420 | 75 | self.assertTrue(isinstance(article, Article)) | ||
421 | 76 | self.assertGreater(len(article.html), 0) | ||
422 | 77 | soup = BeautifulSoup(article.html, 'html5lib') | ||
423 | 78 | for link in soup.find_all('a'): | ||
424 | 79 | if is_local_link(link): | ||
425 | 80 | self.assertFalse(link.attrs['href'].startswith('/None/')) | ||
426 | 54 | 81 | ||
427 | 55 | 82 | ||
428 | 56 | class TestSnapcraftLinkRewrite(TestLocalBranchImport): | 83 | class TestSnapcraftLinkRewrite(TestLocalBranchImport): |
429 | @@ -62,25 +89,21 @@ | |||
430 | 62 | self.assertFalse(build_apps.publisher_is_draft) | 89 | self.assertFalse(build_apps.publisher_is_draft) |
431 | 63 | self.assertEqual( | 90 | self.assertEqual( |
432 | 64 | 3, Page.objects.filter(publisher_is_draft=False).count()) | 91 | 3, Page.objects.filter(publisher_is_draft=False).count()) |
437 | 65 | self.repo.add_directive('docs', 'snappy/build-apps/devel') | 92 | self.repo.add_directive('docs/intro.md', 'snappy/build-apps/current') |
438 | 66 | self.repo.add_directive('README.md', 'snappy/build-apps/devel') | 93 | self.repo.add_directive('docs', 'snappy/build-apps/current') |
435 | 67 | self.repo.add_directive( | ||
436 | 68 | 'HACKING.md', 'snappy/build-apps/devel/hacking') | ||
439 | 69 | self.assertTrue(self.repo.execute_import_directives()) | 94 | self.assertTrue(self.repo.execute_import_directives()) |
440 | 70 | self.assertTrue(self.repo.publish()) | 95 | self.assertTrue(self.repo.publish()) |
441 | 71 | pages = Page.objects.all() | 96 | pages = Page.objects.all() |
442 | 72 | for article in self.repo.imported_articles: | 97 | for article in self.repo.imported_articles: |
443 | 73 | self.assertTrue(isinstance(article, Article)) | 98 | self.assertTrue(isinstance(article, Article)) |
444 | 74 | self.assertGreater(len(article.html), 0) | 99 | self.assertGreater(len(article.html), 0) |
445 | 75 | for article in self.repo.imported_articles: | ||
446 | 76 | soup = BeautifulSoup(article.html, 'html5lib') | 100 | soup = BeautifulSoup(article.html, 'html5lib') |
447 | 77 | for link in soup.find_all('a'): | 101 | for link in soup.find_all('a'): |
457 | 78 | if not is_local_link(link): | 102 | if is_local_link(link): |
458 | 79 | break | 103 | page = self.check_local_link(link.attrs['href']) |
459 | 80 | page = self.check_local_link(link.attrs['href']) | 104 | self.assertIsNotNone( |
460 | 81 | self.assertIsNotNone( | 105 | page, |
461 | 82 | page, | 106 | msg='Link {} not found. Available pages: {}'.format( |
462 | 83 | msg='Link {} not found. Available pages: {}'.format( | 107 | link.attrs['href'], |
463 | 84 | link.attrs['href'], | 108 | ', '.join([p.get_absolute_url() for p in pages]))) |
464 | 85 | ', '.join([p.get_absolute_url() for p in pages]))) | 109 | self.assertIn(page, pages) |
456 | 86 | self.assertIn(page, pages) | ||
465 | 87 | 110 | ||
466 | === modified file 'md_importer/tests/utils.py' | |||
467 | --- md_importer/tests/utils.py 2016-01-11 14:38:51 +0000 | |||
468 | +++ md_importer/tests/utils.py 2016-03-02 11:13:56 +0000 | |||
469 | @@ -55,8 +55,10 @@ | |||
470 | 55 | self.assertEqual(self.fetch_retcode, 0) | 55 | self.assertEqual(self.fetch_retcode, 0) |
471 | 56 | 56 | ||
472 | 57 | def check_local_link(self, url): | 57 | def check_local_link(self, url): |
473 | 58 | if not url.startswith('/'): | ||
474 | 59 | url = '/' + url | ||
475 | 58 | if not url.startswith('/{}/'.format(DEFAULT_LANG)): | 60 | if not url.startswith('/{}/'.format(DEFAULT_LANG)): |
477 | 59 | url = '/{}/{}/'.format(DEFAULT_LANG, url) | 61 | url = '/{}'.format(DEFAULT_LANG) + url |
478 | 60 | request = self.get_request(url) | 62 | request = self.get_request(url) |
479 | 61 | page = get_page_from_request(request) | 63 | page = get_page_from_request(request) |
480 | 62 | return page | 64 | return page |