Merge lp:~jamesodhunt/upstart/bug-1447756-the-actual-fix into lp:upstart

Proposed by James Hunt on 2015-05-13
Status: Merged
Merged at revision: 1666
Proposed branch: lp:~jamesodhunt/upstart/bug-1447756-the-actual-fix
Merge into: lp:upstart
Diff against target: 230 lines (+167/-3)
3 files modified
ChangeLog (+18/-0)
init/log.c (+42/-0)
init/tests/test_log.c (+107/-3)
To merge this branch: bzr merge lp:~jamesodhunt/upstart/bug-1447756-the-actual-fix
Reviewer Review Type Date Requested Status
Upstart Reviewers 2015-05-13 Pending
Review via email: mp+259020@code.launchpad.net

Description of the change

* init/log.c:
  - log_clear_unflushed(): Handle logs on the unflushed list which
    get flushed automatically by their corresponding NihIo before
    a request to clear the unflushed list is made. This scenario
    is triggered when a job which has spawned processes exits, but
    where the spawned processes continue to run and then produce
    output before the request to clear the unflushed list is made
    (at the time the log partition becomes writable) but after the
    log partition becomes writable (LP: #1447756).
* init/tests/test_log.c:
  - test_log_new(): Added new test:
    "ensure logger unflushed list ignores already flushed data".
  - test_log_destroy(): Improved test
     "ensure unflushed data moved to unflushed list with uid 0" by giving
    the log a parent and checking parent handling behaviour.

To post a comment you must log in.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'ChangeLog'
2--- ChangeLog 2015-04-24 12:57:48 +0000
3+++ ChangeLog 2015-05-13 15:53:09 +0000
4@@ -1,3 +1,21 @@
5+2015-05-12 James Hunt <james.hunt@ubuntu.com>
6+
7+ * init/log.c:
8+ - log_clear_unflushed(): Handle logs on the unflushed list which
9+ get flushed automatically by their corresponding NihIo before
10+ a request to clear the unflushed list is made. This scenario
11+ is triggered when a job which has spawned processes exits, but
12+ where the spawned processes continue to run and then produce
13+ output before the request to clear the unflushed list is made
14+ (at the time the log partition becomes writable) but after the
15+ log partition becomes writable (LP: #1447756).
16+ * init/tests/test_log.c:
17+ - test_log_new(): Added new test:
18+ "ensure logger unflushed list ignores already flushed data".
19+ - test_log_destroy(): Improved test
20+ "ensure unflushed data moved to unflushed list with uid 0" by giving
21+ the log a parent and checking parent handling behaviour.
22+
23 2015-04-24 James Hunt <james.hunt@ubuntu.com>
24
25 * init/log.c:
26
27=== modified file 'init/log.c'
28--- init/log.c 2015-04-24 12:57:48 +0000
29+++ init/log.c 2015-05-13 15:53:09 +0000
30@@ -802,6 +802,48 @@
31
32 nih_assert (log);
33
34+ if (! log->unflushed->len) {
35+ /* The job that originally owned this log has
36+ * exited, but it spawned one or more other
37+ * processes which still live on. If those
38+ * processes produce output, the NihIo will get
39+ * called automatically to flush the data.
40+ * However, that happens asynchronously to
41+ * clearing the unflushed list.
42+ *
43+ * Hence, if no unflushed data remains, remove
44+ * the entry from the list.
45+ *
46+ * Note that doing so frees the log and stops
47+ * any further output being logged from the
48+ * out-of-job process(es). This isn't ideal
49+ * however:
50+ *
51+ * - Better this than having the Log objects remain indefinately.
52+ *
53+ * - The problem is ultimately caused by a rogue
54+ * job which should be fixed (see the warning
55+ * that is produced for such jobs in
56+ * log_read_watch()).
57+ *
58+ * - Even if the Log was retained, it would not
59+ * be flushed unless multiple calls to this
60+ * function are made (which is not going to
61+ * occur by default).
62+ *
63+ * Note that it is necessary to set the io to
64+ * NULL since it has already been freed by
65+ * nih_io_closed(), but the io is not set to
66+ * NULL. If we don't do this, when log_destroy()
67+ * is called, it will attempt to dereference the
68+ * already freed log->io.
69+ */
70+ log->io = NULL;
71+ nih_free (elem);
72+
73+ continue;
74+ }
75+
76 /* To be added to this list, log should have been
77 * detached from its parent job.
78 */
79
80=== modified file 'init/tests/test_log.c'
81--- init/tests/test_log.c 2015-04-24 12:57:48 +0000
82+++ init/tests/test_log.c 2015-05-13 15:53:09 +0000
83@@ -654,6 +654,94 @@
84 TEST_FREE (log);
85
86 /************************************************************/
87+ TEST_FEATURE ("ensure logger unflushed list ignores already flushed data");
88+
89+ /* Reset */
90+ log_flushed = 0;
91+
92+ TEST_EQ (openpty (&pty_master, &pty_slave, NULL, NULL, NULL), 0);
93+
94+ TEST_GT (sprintf (filename, "%s/test.log", dirname), 0);
95+
96+ TEST_NE (log_unflushed_files, NULL);
97+ TEST_TRUE (NIH_LIST_EMPTY (log_unflushed_files));
98+
99+ TEST_EQ (stat (dirname, &statbuf), 0);
100+
101+ /* Save */
102+ old_perms = statbuf.st_mode;
103+
104+ /* Make file inaccessible */
105+ TEST_EQ (chmod (dirname, 0x0), 0);
106+
107+ log = log_new (NULL, filename, pty_master, 0);
108+ TEST_NE_P (log, NULL);
109+
110+ /* Write data to simulate a job process, but _DON'T_ close the
111+ * fd (to simulate the job spawning another process which olds
112+ * the fd open).
113+ */
114+ ret = write (pty_slave, str, strlen (str));
115+ TEST_GT (ret, 0);
116+ ret = write (pty_slave, "\n", 1);
117+ TEST_EQ (ret, 1);
118+
119+ assert0 (log->unflushed->len);
120+ TEST_EQ_P (log->unflushed->buf, NULL);
121+
122+ TEST_NE_P (log->io, NULL);
123+
124+ TEST_WATCH_UPDATE ();
125+
126+ /* nih_io_closed() should not have been called */
127+ TEST_NE_P (log->io, NULL);
128+
129+ TEST_EQ_STR (log->unflushed->buf, "hello, world!\r\n");
130+
131+ /* +2 for '\r\n' */
132+ TEST_EQ (log->unflushed->len, 2 + strlen (str));
133+
134+ /* Ensure no log file written */
135+ TEST_LT (stat (filename, &statbuf), 0);
136+
137+ TEST_TRUE (NIH_LIST_EMPTY (log_unflushed_files));
138+
139+ TEST_FREE_TAG (log);
140+
141+ /* Force the usual code path for early boot on a normal system
142+ * when a job produces output before the disks are writable.
143+ */
144+ log->open_errno = EROFS;
145+
146+ TEST_EQ (log_handle_unflushed (NULL, log), 0);
147+
148+ TEST_FALSE (NIH_LIST_EMPTY (log_unflushed_files));
149+
150+ /* Restore access */
151+ TEST_EQ (chmod (dirname, old_perms), 0);
152+
153+ TEST_NE_P (log->io, NULL);
154+
155+ /* Simulate the data having already been flushed to disk
156+ * and nih_io_closed() having been called.
157+ */
158+ log->io = NULL;
159+ log->unflushed->len = 0;
160+ log->unflushed->buf = NULL;
161+ log->remote_closed = 1;
162+
163+ ret = log_clear_unflushed ();
164+ TEST_EQ (ret, 0);
165+
166+ TEST_FREE (log);
167+
168+ /* Ensure no log file written */
169+ TEST_LT (stat (filename, &statbuf), 0);
170+
171+ /* Tidy up */
172+ close (pty_slave);
173+
174+ /************************************************************/
175 TEST_FEATURE ("ensure logger flushes when destroyed with uid 0");
176
177 TEST_EQ (openpty (&pty_master, &pty_slave, NULL, NULL, NULL), 0);
178@@ -1143,6 +1231,7 @@
179 int fd;
180 NihListEntry *entry;
181 struct stat statbuf;
182+ char *p;
183
184 TEST_FUNCTION ("log_destroy");
185
186@@ -1227,9 +1316,14 @@
187 TEST_NE (fd, -1);
188 close (fd);
189
190- log = log_new (NULL, filename, pty_master, 0);
191+ p = nih_strdup (NULL, "hello");
192+ TEST_NE_P (p, NULL);
193+
194+ log = log_new (p, filename, pty_master, 0);
195 TEST_NE_P (log, NULL);
196
197+ TEST_ALLOC_PARENT (log, p);
198+
199 ret = write (pty_slave, str, strlen (str));
200 TEST_GT (ret, 0);
201
202@@ -1244,16 +1338,26 @@
203 log_flushed = 0;
204
205 TEST_TRUE (NIH_LIST_EMPTY (log_unflushed_files));
206- ret = log_handle_unflushed (NULL, log);
207-
208+ ret = log_handle_unflushed (p, log);
209 TEST_EQ (ret, 0);
210+
211 TEST_FALSE (NIH_LIST_EMPTY (log_unflushed_files));
212
213+ TEST_ALLOC_NOT_PARENT (log, p);
214+
215 entry = (NihListEntry *)log_unflushed_files->next;
216 TEST_NE_P (entry, NULL);
217 TEST_FREE_TAG (entry);
218 TEST_ALLOC_PARENT (entry, log_unflushed_files);
219
220+ /* Free the original parent object and ensure it doesn't free
221+ * the log.
222+ */
223+ nih_free (p);
224+
225+ TEST_FALSE (NIH_LIST_EMPTY (log_unflushed_files));
226+ TEST_NOT_FREE (log);
227+
228 tmp_log = entry->data;
229 TEST_EQ_P (tmp_log, log);
230 TEST_ALLOC_PARENT (log, entry);

Subscribers

People subscribed via source and target branches