Merge lp:~maddevelopers/mg5amcnlo/1.5.14 into lp:~madteam/mg5amcnlo/trunk
- 1.5.14
- Merge into trunk
Proposed by
Olivier Mattelaer
Status: | Merged |
---|---|
Merged at revision: | 247 |
Proposed branch: | lp:~maddevelopers/mg5amcnlo/1.5.14 |
Merge into: | lp:~madteam/mg5amcnlo/trunk |
Diff against target: |
668 lines (+234/-41) 10 files modified
Template/Source/dsample.f (+11/-8) Template/Source/gen_ximprove.f (+50/-0) Template/bin/internal/run_combine (+12/-1) Template/bin/newprocess_mg5 (+3/-0) UpdateNotes.txt (+3/-0) madgraph/interface/madevent_interface.py (+21/-8) madgraph/interface/madgraph_interface.py (+6/-0) madgraph/iolibs/template_files/madevent_symmetry.f (+12/-1) madgraph/various/cluster.py (+76/-23) proc_card.dat (+40/-0) |
To merge this branch: | bzr merge lp:~maddevelopers/mg5amcnlo/1.5.14 |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
marco zaro | Pending | ||
Valentin Hirschi | Pending | ||
Rikkert Frederix | Pending | ||
Review via email: mp+196760@code.launchpad.net |
Commit message
Description of the change
- Improve submission to various cluster (condor/SLURM/PBS). Thanks Dorival and Shu for the testing.
- Add a warning that ./bin/newproces
This is a technical merge, mainly important for the adding of that warning.
So I would like to push this version tomorrow. This is what I will do if nobody complains about it.
Cheers,
Olivier
To post a comment you must log in.
- 259. By Olivier Mattelaer
-
UpdateNotes.txt
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'Template/Source/dsample.f' |
2 | --- Template/Source/dsample.f 2013-08-01 05:18:06 +0000 |
3 | +++ Template/Source/dsample.f 2013-11-26 22:31:29 +0000 |
4 | @@ -104,9 +104,7 @@ |
5 | itminx = itmin |
6 | if (nsteps .lt. 1) nsteps=1 |
7 | nwrite = itmax*ncall/nsteps |
8 | -c open(unit=66,file='.sample_warn',status='unknown') |
9 | -c write(66,*) 'Warnings from sample run.',itmax,ncall |
10 | -c close(66) |
11 | + |
12 | call sample_init(ndim,ncall,itmax,ninvar,nconfigs) |
13 | call graph_init |
14 | do i=1,itmax |
15 | @@ -213,13 +211,15 @@ |
16 | do i=cur_it-itsum,cur_it-1 |
17 | write(66,'(i4,5e15.5)') i,xmean(i),xsigma(i),xeff(i),xwmax(i),xrmean(i) |
18 | enddo |
19 | - close(66) |
20 | + flush(66) |
21 | + close(66, status='KEEP') |
22 | else |
23 | open(unit=66,file='results.dat',status='unknown') |
24 | write(66,'(3e12.5,2i9,i5,i9,3e10.3)')0.,0.,0.,kevent,nw, |
25 | & 1,0,0.,0.,0. |
26 | write(66,'(i4,5e15.5)') 1,0.,0.,0.,0.,0. |
27 | - close(66) |
28 | + flush(66) |
29 | + close(66, status='KEEP') |
30 | |
31 | endif |
32 | c |
33 | @@ -373,13 +373,15 @@ |
34 | do i=cur_it-itsum,cur_it-1 |
35 | write(66,'(i4,5e15.5)') i,xmean(i),xsigma(i),xeff(i),xwmax(i),xrmean(i) |
36 | enddo |
37 | - close(66) |
38 | + flush(66) |
39 | + close(66, status='KEEP') |
40 | else |
41 | open(unit=66,file='results.dat',status='unknown') |
42 | write(66,'(3e12.5,2i9,i5,i9,3e10.3)')0.,0.,0.,kevent,nw, |
43 | & 1,0,0.,0.,0. |
44 | write(66,'(i4,5e15.5)') 1,0.,0.,0.,0.,0. |
45 | - close(66) |
46 | + flush(66) |
47 | + close(66, status='KEEP') |
48 | |
49 | endif |
50 | |
51 | @@ -1863,7 +1865,8 @@ |
52 | write(66,'(3e12.5,2i9,i5,i9,3e10.3)')0.,0.,0.,0,0, |
53 | & 0,1,0.,0.,0. |
54 | write(66,'(i4,5e15.5)') 1,0.,0.,0.,0.,0. |
55 | - close(66) |
56 | + flush(66) |
57 | + close(66, status='KEEP') |
58 | |
59 | c Remove file events.lhe (otherwise event combination gets screwed up) |
60 | write(*,*) 'Deleting file events.lhe' |
61 | |
62 | === modified file 'Template/Source/gen_ximprove.f' |
63 | --- Template/Source/gen_ximprove.f 2012-11-08 04:48:28 +0000 |
64 | +++ Template/Source/gen_ximprove.f 2013-11-26 22:31:29 +0000 |
65 | @@ -354,9 +354,19 @@ |
66 | & '" >> input_sg.txt' !Helicity |
67 | write(26,'(5x,3a)')'echo "',gn(io(i))(2:ip-1), |
68 | $ '" >>input_sg.txt' |
69 | + write(26,20) 'for try in $(seq 1 10);' |
70 | + write(26,20) 'do' |
71 | write(26,20) '../madevent >> $k <input_sg.txt' |
72 | + write(26,25) 'if [ -s $k ]' |
73 | + write(26,25) 'then' |
74 | + write(26,25) ' break' |
75 | + write(26,25) 'else' |
76 | + write(26,25) ' echo $try > fail.log ' |
77 | + write(26,25) 'fi' |
78 | + write(26,25) 'done' |
79 | write(26,20) 'rm ftn25 ftn26' |
80 | write(26,20) 'cat $k >> log.txt' |
81 | + write(26,20) 'echo "" >> $k; echo "ls status:" >> $k; ls >> $k' |
82 | write(26,20) 'cd ../' |
83 | endif |
84 | enddo !Loop over diagrams |
85 | @@ -681,7 +691,17 @@ |
86 | & ' " >> input_sg.txt' !Helicity 0=exact |
87 | write(26,'(9x,3a)')'echo "',gn(io(np))(2:ip-1), |
88 | $ '" >>input_sg.txt' |
89 | + write(26,25) 'for try in $(seq 1 10);' |
90 | + write(26,25) 'do' |
91 | write(26,25) '../madevent >> $k <input_sg.txt' |
92 | + write(26,25) 'if [ -s $k ]' |
93 | + write(26,25) 'then' |
94 | + write(26,25) ' break' |
95 | + write(26,25) 'else' |
96 | + write(26,25) ' echo $try > fail.log ' |
97 | + write(26,25) 'fi' |
98 | + write(26,25) 'done' |
99 | + write(26,20) 'echo "" >> $k; echo "ls status:" >> $k; ls >> $k' |
100 | write(26,25) 'cat $k >> log.txt' |
101 | write(26,25) 'if [[ -e ftn26 ]]; then' |
102 | write(26,25) ' cp ftn26 ftn25' |
103 | @@ -718,7 +738,17 @@ |
104 | write(26,25) 'if [[ -e ftn26 ]]; then' |
105 | write(26,25) ' cp ftn26 ftn25' |
106 | write(26,25) 'fi' |
107 | + write(26,25) 'for try in $(seq 1 10);' |
108 | + write(26,25) 'do' |
109 | write(26,25) '../madevent >> $k <input_sg.txt' |
110 | + write(26,25) 'if [ -s $k ]' |
111 | + write(26,25) 'then' |
112 | + write(26,25) ' break' |
113 | + write(26,25) 'else' |
114 | + write(26,25) ' echo $try > fail.log ' |
115 | + write(26,25) 'fi' |
116 | + write(26,25) 'done' |
117 | + write(26,20) 'echo "" >> $k; echo "ls status:" >> $k; ls >> $k' |
118 | write(26,25) 'cat $k >> log.txt' |
119 | write(26,20) 'fi' |
120 | write(26,20) 'cd ../' |
121 | @@ -848,7 +878,17 @@ |
122 | & ' " >> input_sg.txt' !Helicity 0=exact |
123 | write(26,'(9x,3a)')'echo "',gn(i)(2:ip-1), |
124 | $ '" >>input_sg.txt' |
125 | + write(26,25) 'for try in $(seq 1 10);' |
126 | + write(26,25) 'do' |
127 | write(26,25) '../madevent >> $k <input_sg.txt' |
128 | + write(26,25) 'if [ -s $k ]' |
129 | + write(26,25) 'then' |
130 | + write(26,25) ' break' |
131 | + write(26,25) 'else' |
132 | + write(26,25) ' echo $try > fail.log ' |
133 | + write(26,25) 'fi' |
134 | + write(26,25) 'done' |
135 | + write(26,20) 'echo "" >> $k; echo "ls status:" >> $k; ls >> $k' |
136 | write(26,25) 'cat $k >> log.txt' |
137 | write(26,25) 'if [[ -e ftn26 ]]; then' |
138 | write(26,25) ' cp ftn26 ftn25' |
139 | @@ -876,7 +916,17 @@ |
140 | write(26,25) 'if [[ -e ftn26 ]]; then' |
141 | write(26,25) ' cp ftn26 ftn25' |
142 | write(26,25) 'fi' |
143 | + write(26,25) 'for try in $(seq 1 10);' |
144 | + write(26,25) 'do' |
145 | write(26,25) '../madevent >> $k <input_sg.txt' |
146 | + write(26,25) 'if [ -s $k ]' |
147 | + write(26,25) 'then' |
148 | + write(26,25) ' break' |
149 | + write(26,25) 'else' |
150 | + write(26,25) ' echo $try > fail.log ' |
151 | + write(26,25) 'fi' |
152 | + write(26,25) 'done' |
153 | + write(26,20) 'echo "" >> $k; echo "ls status:" >> $k; ls >> $k' |
154 | write(26,25) 'cat $k >> log.txt' |
155 | write(26,20) 'fi' |
156 | write(26,20) 'cd ../' |
157 | |
158 | === modified file 'Template/bin/internal/run_combine' |
159 | --- Template/bin/internal/run_combine 2011-08-31 15:58:51 +0000 |
160 | +++ Template/bin/internal/run_combine 2013-11-26 22:31:29 +0000 |
161 | @@ -1,4 +1,15 @@ |
162 | #!/bin/bash |
163 | |
164 | -../bin/internal/combine_events |
165 | +rm fail_combine.log &> /dev/null |
166 | +for try in $(seq 1 10) |
167 | +do |
168 | + ../bin/internal/combine_events 2> fail_combine.log |
169 | + if grep -F "combine_events: No such file or directory" fail_combine.log |
170 | + then |
171 | + sleep 1 |
172 | + else |
173 | + rm fail_combine.log &> /dev/null |
174 | + break |
175 | + fi |
176 | +done |
177 | rm -f scratch |
178 | |
179 | === modified file 'Template/bin/newprocess_mg5' |
180 | --- Template/bin/newprocess_mg5 2013-03-15 04:03:57 +0000 |
181 | +++ Template/bin/newprocess_mg5 2013-11-26 22:31:29 +0000 |
182 | @@ -10,6 +10,9 @@ |
183 | exit |
184 | fi |
185 | fi |
186 | +echo -e "\033[1;31m WARNING This out-dated method of using MG5 will be remove in version 2.0\033[0m" |
187 | +echo -e "\033[1;31m A proc_card example is present in the main directory of MG5 \033[0m" |
188 | +echo -e "\033[1;31m you can run it like ./bin/mg5 proc_card.dat \033[0m" |
189 | |
190 | if [[ -e SubProcesses/subproc.mg ]]; then |
191 | echo "Error: newprocess_mg5 can only be used in a clean copy of Template." |
192 | |
193 | === modified file 'UpdateNotes.txt' |
194 | --- UpdateNotes.txt 2013-11-04 08:13:31 +0000 |
195 | +++ UpdateNotes.txt 2013-11-26 22:31:29 +0000 |
196 | @@ -1,5 +1,8 @@ |
197 | Update notes for MadGraph 5 (in reverse time order) |
198 | |
199 | +1.5.14 (26/11/13) OM: Add warning about the fact that newprocess_mg5 is going to be remove in MG5_aMC_V2.0.0 |
200 | + OM: Improved cluster submision/re-submition control. |
201 | + |
202 | 1.5.13 (04/11/13) OM: Implement a function which check if jobs submitted to cluster are correctly runned. |
203 | In case of failure, you can re-submitted the failing jobs automatically. The maximal |
204 | number of re-submission for a job can be parametrize (default 1) and how long you have to |
205 | |
206 | === modified file 'madgraph/interface/madevent_interface.py' |
207 | --- madgraph/interface/madevent_interface.py 2013-11-01 18:33:23 +0000 |
208 | +++ madgraph/interface/madevent_interface.py 2013-11-26 22:31:29 +0000 |
209 | @@ -1795,7 +1795,8 @@ |
210 | args.remove(arg) |
211 | |
212 | if args and args[0] in ["run_mode", "cluster_mode", "cluster_queue", |
213 | - "cluster_temp_path", "nb_core"]: |
214 | + "cluster_temp_path", "nb_core", "cluster_nb_retry", |
215 | + "cluster_retry_wait"]: |
216 | return args |
217 | |
218 | if self.cluster_mode == 2 and not self.nb_core: |
219 | @@ -1807,6 +1808,8 @@ |
220 | cluster_name = opt['cluster_type'] |
221 | self.cluster = cluster.from_name[cluster_name](opt['cluster_queue'], |
222 | opt['cluster_temp_path']) |
223 | + self.cluster.nb_retry = opt['cluster_nb_retry'] |
224 | + self.cluster.cluster_retry_wait = int(opt['cluster_retry_wait']) |
225 | return args |
226 | |
227 | ############################################################################ |
228 | @@ -1906,6 +1909,11 @@ |
229 | continue |
230 | self.options[key] = None |
231 | elif key.startswith('cluster'): |
232 | + if key in ('cluster_nb_retry','cluster_wait_retry'): |
233 | + self.options[key] = int(self.options[key]) |
234 | + if hasattr(self,'cluster'): |
235 | + del self.cluster |
236 | + |
237 | pass |
238 | elif key == 'automatic_html_opening': |
239 | if self.options[key] in ['False', 'True']: |
240 | @@ -2199,13 +2207,13 @@ |
241 | self.cluster = cluster.from_name[opt['cluster_type']](\ |
242 | opt['cluster_queue'], opt['cluster_temp_path']) |
243 | self.cluster.nb_retry = self.options['cluster_nb_retry'] |
244 | - self.cluster_retry_wait = self.options['cluster_retry_wait'] |
245 | + self.cluster.cluster_retry_wait = int(self.options['cluster_retry_wait']) |
246 | elif args[0] in ['cluster_nb_retry', 'cluster_retry_wait']: |
247 | self.options[args[0]] = int(args[1]) |
248 | if args[0] == 'cluster_nb_retry': |
249 | self.cluster.nb_retry = int(args[1]) |
250 | else: |
251 | - self.cluster_retry_wait = int(args[1]) |
252 | + self.cluster.cluster_retry_wait = int(args[1]) |
253 | elif args[0] == 'nb_core': |
254 | if args[1] == 'None': |
255 | import multiprocessing |
256 | @@ -2845,7 +2853,8 @@ |
257 | if self.cluster_mode == 1: |
258 | self.cluster.launch_and_wait('../bin/internal/run_combine', |
259 | cwd=pjoin(self.me_dir,'SubProcesses'), |
260 | - stdout=pjoin(self.me_dir,'SubProcesses', 'combine.log')) |
261 | + stdout=pjoin(self.me_dir,'SubProcesses', 'combine.log'), |
262 | + required_output=[pjoin(self.me_dir,'SubProcesses', 'combine.log')]) |
263 | else: |
264 | misc.call(['../bin/internal/run_combine'], |
265 | cwd=pjoin(self.me_dir,'SubProcesses'), |
266 | @@ -3071,16 +3080,20 @@ |
267 | |
268 | if not self.run_name: |
269 | self.check_pythia(args) |
270 | - self.configure_directory() |
271 | + self.configure_directory(html_opening =False) |
272 | else: |
273 | # initialize / remove lhapdf mode |
274 | - self.configure_directory() |
275 | + self.configure_directory(html_opening =False) |
276 | self.check_pythia(args) |
277 | |
278 | # the args are modify and the last arg is always the mode |
279 | if not no_default: |
280 | self.ask_pythia_run_configuration(args[-1]) |
281 | |
282 | + if self.options['automatic_html_opening']: |
283 | + misc.open_file(os.path.join(self.me_dir, 'crossx.html')) |
284 | + self.options['automatic_html_opening'] = False |
285 | + |
286 | # Update the banner with the pythia card |
287 | if not self.banner: |
288 | self.banner = banner_mod.recover_banner(self.results, 'pythia') |
289 | @@ -3903,7 +3916,7 @@ |
290 | return name % (max(data+[0])+1) |
291 | |
292 | ############################################################################ |
293 | - def configure_directory(self): |
294 | + def configure_directory(self, html_opening=True): |
295 | """ All action require before any type of run """ |
296 | |
297 | |
298 | @@ -3919,7 +3932,7 @@ |
299 | else: |
300 | self.configured = time.time() |
301 | self.update_status('compile directory', level=None) |
302 | - if self.options['automatic_html_opening']: |
303 | + if self.options['automatic_html_opening'] and html_opening: |
304 | misc.open_file(os.path.join(self.me_dir, 'crossx.html')) |
305 | self.options['automatic_html_opening'] = False |
306 | #open only once the web page |
307 | |
308 | === modified file 'madgraph/interface/madgraph_interface.py' |
309 | --- madgraph/interface/madgraph_interface.py 2013-11-01 18:33:23 +0000 |
310 | +++ madgraph/interface/madgraph_interface.py 2013-11-26 22:31:29 +0000 |
311 | @@ -3152,6 +3152,7 @@ |
312 | text = open(path).read() |
313 | text = text.replace('FC=g77','FC=gfortran') |
314 | open(path, 'w').writelines(text) |
315 | + os.environ['FC'] = compiler |
316 | |
317 | if logger.level <= logging.INFO: |
318 | devnull = open(os.devnull,'w') |
319 | @@ -3354,6 +3355,11 @@ |
320 | break |
321 | print 'apply patch %s' % (i+1) |
322 | text = filetext.read() |
323 | + # track rename since patch fail to apply those correctly. |
324 | + pattern = re.compile(r'''=== renamed file \'(?P<orig>[^\']*)\' => \'(?P<new>[^\']*)\'''') |
325 | + #=== renamed file 'Template/SubProcesses/addmothers.f' => 'madgraph/iolibs/template_files/addmothers.f' |
326 | + for orig, new in pattern.findall(text): |
327 | + files.cp(pjoin(MG5DIR, orig), pjoin(MG5DIR, new)) |
328 | p= subprocess.Popen(['patch', '-p1'], stdin=subprocess.PIPE, |
329 | cwd=MG5DIR) |
330 | p.communicate(text) |
331 | |
332 | === modified file 'madgraph/iolibs/template_files/madevent_symmetry.f' |
333 | --- madgraph/iolibs/template_files/madevent_symmetry.f 2013-08-06 19:08:29 +0000 |
334 | +++ madgraph/iolibs/template_files/madevent_symmetry.f 2013-11-26 22:31:29 +0000 |
335 | @@ -651,7 +651,7 @@ |
336 | c |
337 | c Now write the commands |
338 | c |
339 | - write(lun,20) 'echo $i >& run.$script' |
340 | +c write(lun,20) 'echo $i >& run.$script' |
341 | write(lun,20) 'j=G$i' |
342 | write(lun,20) 'if [[ ! -e $j ]]; then' |
343 | write(lun,25) 'mkdir $j' |
344 | @@ -661,9 +661,20 @@ |
345 | write(lun,20) 'rm -f $k' |
346 | write(lun,20) 'cat ../input_app.txt >& input_app.txt' |
347 | write(lun,20) 'echo $i >> input_app.txt' |
348 | + write(lun,20) 'for try in $(seq 1 10);' |
349 | + write(lun,20) 'do' |
350 | write(lun,20) '../madevent > $k <input_app.txt' |
351 | + write(lun,20) 'if [ -s $k ]' |
352 | + write(lun,20) 'then' |
353 | + write(lun,20) ' break' |
354 | + write(lun,20) 'else' |
355 | + write(lun,20) 'sleep 1' |
356 | +c write(lun,20) 'rm -rf $k; ../madevent > $k <input_app.txt' |
357 | + write(lun,20) 'fi' |
358 | + write(lun,20) 'done' |
359 | write(lun,20) 'rm -f ftn25 ftn99' |
360 | if(.not.gridpack) write(lun,20) 'rm -f ftn26' |
361 | + write(lun,20) 'echo "" >> $k; echo "ls status:" >> $k; ls >> $k' |
362 | write(lun,20) 'cp $k log.txt' |
363 | write(lun,20) 'cd ../' |
364 | write(lun,15) 'done' |
365 | |
366 | === modified file 'madgraph/various/cluster.py' |
367 | --- madgraph/various/cluster.py 2013-11-01 18:33:23 +0000 |
368 | +++ madgraph/various/cluster.py 2013-11-26 22:31:29 +0000 |
369 | @@ -48,7 +48,10 @@ |
370 | try: |
371 | return f(self, *args, **opt) |
372 | except error: |
373 | - self.remove(*args, **opt) |
374 | + try: |
375 | + self.remove(*args, **opt) |
376 | + except Exception: |
377 | + pass |
378 | raise error |
379 | return deco_f_interupt |
380 | return deco_interupt |
381 | @@ -59,7 +62,7 @@ |
382 | def deco_f_store(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None, |
383 | input_files=[], output_files=[], required_output=[], nb_submit=0): |
384 | frame = inspect.currentframe() |
385 | - args, i, j, values = inspect.getargvalues(frame) |
386 | + args, _, _, values = inspect.getargvalues(frame) |
387 | args = dict([(i, values[i]) for i in args if i != 'self']) |
388 | id = f(self, **args) |
389 | if self.nb_retry > 0: |
390 | @@ -83,6 +86,7 @@ |
391 | # attribute to relaunch jobs if they failed to produce expected data |
392 | self.nb_retry = 1 |
393 | self.retry_args = {} |
394 | + self.cluster_retry_wait = 300 |
395 | |
396 | def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, |
397 | log=None, required_output=[], nb_submit=0): |
398 | @@ -95,10 +99,11 @@ |
399 | """How to make one submission. Return status id on the cluster. |
400 | NO SHARE DISK""" |
401 | |
402 | - if not hasattr(self, 'temp_dir') or not self.temp_dir: |
403 | + if not hasattr(self, 'temp_dir') or not self.temp_dir or \ |
404 | + (input_files == [] == output_files): |
405 | return self.submit(prog, argument, cwd, stdout, stderr, log, |
406 | required_output=required_output, nb_submit=nb_submit) |
407 | - print self.temp_dir |
408 | + |
409 | if cwd is None: |
410 | cwd = os.getcwd() |
411 | if not os.path.exists(prog): |
412 | @@ -183,6 +188,7 @@ |
413 | def check_termination(self, job_id): |
414 | """Check the termination of the jobs with job_id and relaunch it if needed.""" |
415 | |
416 | + |
417 | if job_id not in self.retry_args: |
418 | return True |
419 | |
420 | @@ -206,12 +212,11 @@ |
421 | return 'done' |
422 | |
423 | if time_check == 0: |
424 | - logger.warning('''Job %s failed to produce expected output. Waiting for filesystem update.\nMissing file:\n%s''' % (job_id,path)) |
425 | + logger.debug('''Job %s: missing output:%s''' % (job_id,path)) |
426 | args['time_check'] = time.time() |
427 | return 'wait' |
428 | - elif self.cluster_retry_wait < time.time() - time_check: |
429 | + elif self.cluster_retry_wait > time.time() - time_check: |
430 | return 'wait' |
431 | - |
432 | |
433 | #jobs failed to be completed even after waiting time!! |
434 | if self.nb_retry < 0: |
435 | @@ -224,14 +229,14 @@ |
436 | with option: %s |
437 | file missing: %s. |
438 | Stopping all runs.''' % (job_id, args, path)) |
439 | - self.remove() |
440 | + #self.remove() |
441 | elif args['nb_submit'] >= self.nb_retry: |
442 | logger.critical('''Fail to run correctly job %s. |
443 | with option: %s |
444 | file missing: %s |
445 | Fails %s times |
446 | No resubmition. ''' % (job_id, args, path, args['nb_submit'])) |
447 | - self.remove() |
448 | + #self.remove() |
449 | else: |
450 | args['nb_submit'] += 1 |
451 | logger.warning('resubmit job (for the %s times)' % args['nb_submit']) |
452 | @@ -247,7 +252,8 @@ |
453 | |
454 | @check_interupt() |
455 | def launch_and_wait(self, prog, argument=[], cwd=None, stdout=None, |
456 | - stderr=None, log=None): |
457 | + stderr=None, log=None, required_output=[], nb_submit=0, |
458 | + input_files=[], output_files=[]): |
459 | """launch one job on the cluster and wait for it""" |
460 | |
461 | special_output = False # tag for concatenate the error with the output. |
462 | @@ -255,14 +261,41 @@ |
463 | #We are suppose to send the output to stdout |
464 | special_output = True |
465 | stderr = stdout + '.err' |
466 | - id = self.submit(prog, argument, cwd, stdout, stderr, log) |
467 | - while 1: |
468 | + id = self.submit2(prog, argument, cwd, stdout, stderr, log, |
469 | + required_output=required_output, input_files=input_files, |
470 | + output_files=output_files) |
471 | + |
472 | + frame = inspect.currentframe() |
473 | + args, _, _, values = inspect.getargvalues(frame) |
474 | + args = dict([(i, values[i]) for i in args if i != 'self']) |
475 | + self.retry_args[id] = args |
476 | + |
477 | + nb_wait=0 |
478 | + while 1: |
479 | + nb_wait+=1 |
480 | status = self.control_one_job(id) |
481 | if not status in ['R','I']: |
482 | + status = self.check_termination(id) |
483 | + if status in ['wait']: |
484 | + time.sleep(30) |
485 | + continue |
486 | + elif status in ['resubmit']: |
487 | + id = self.submitted_ids[0] |
488 | + time.sleep(30) |
489 | + continue |
490 | + #really stop! |
491 | time.sleep(30) #security to ensure that the file are really written on the disk |
492 | break |
493 | time.sleep(30) |
494 | |
495 | + if required_output: |
496 | + status = self.check_termination(id) |
497 | + if status == 'wait': |
498 | + run += 1 |
499 | + elif status == 'resubmit': |
500 | + idle += 1 |
501 | + |
502 | + |
503 | if special_output: |
504 | # combine the stdout and the stderr |
505 | #wait up to 50 s to see if those files exists |
506 | @@ -281,7 +314,7 @@ |
507 | return |
508 | time.sleep(10) |
509 | |
510 | - def remove(self, *args): |
511 | + def remove(self, *args, **opts): |
512 | """ """ |
513 | logger.warning("""This cluster didn't support job removal, |
514 | the jobs are still running on the cluster.""") |
515 | @@ -362,6 +395,10 @@ |
516 | input/output file should be give relative to cwd |
517 | """ |
518 | |
519 | + if (input_files == [] == output_files): |
520 | + return self.submit(prog, argument, cwd, stdout, stderr, log, |
521 | + required_output=required_output, nb_submit=nb_submit) |
522 | + |
523 | text = """Executable = %(prog)s |
524 | output = %(stdout)s |
525 | error = %(stderr)s |
526 | @@ -483,7 +520,7 @@ |
527 | fail += 1 |
528 | |
529 | for id in list(self.submitted_ids): |
530 | - if id not in ongoing: |
531 | + if int(id) not in ongoing: |
532 | status = self.check_termination(id) |
533 | if status == 'wait': |
534 | run += 1 |
535 | @@ -493,7 +530,7 @@ |
536 | return idle, run, self.submitted - (idle+run+fail), fail |
537 | |
538 | @multiple_try() |
539 | - def remove(self, *args): |
540 | + def remove(self, *args, **opts): |
541 | """Clean the jobson the cluster""" |
542 | |
543 | if not self.submitted_ids: |
544 | @@ -623,7 +660,7 @@ |
545 | return idle, run, self.submitted - (idle+run+fail), fail |
546 | |
547 | @multiple_try() |
548 | - def remove(self, *args): |
549 | + def remove(self, *args, **opts): |
550 | """Clean the jobs on the cluster""" |
551 | |
552 | if not self.submitted_ids: |
553 | @@ -773,7 +810,7 @@ |
554 | |
555 | |
556 | @multiple_try() |
557 | - def remove(self, *args): |
558 | + def remove(self, *args, **opts): |
559 | """Clean the jobs on the cluster""" |
560 | |
561 | if not self.submitted_ids: |
562 | @@ -899,7 +936,7 @@ |
563 | return idle, run, self.submitted - (idle+run+fail), fail |
564 | |
565 | @multiple_try() |
566 | - def remove(self, *args): |
567 | + def remove(self, *args,**opts): |
568 | """Clean the jobs on the cluster""" |
569 | |
570 | if not self.submitted_ids: |
571 | @@ -1023,7 +1060,7 @@ |
572 | return idle, run, self.submitted - idle - run - fail, fail |
573 | |
574 | @multiple_try() |
575 | - def remove(self, *args): |
576 | + def remove(self, *args, **opts): |
577 | """Clean the jobs on the cluster""" |
578 | |
579 | if not self.submitted_ids: |
580 | @@ -1113,22 +1150,38 @@ |
581 | me_dir = 'a' + me_dir[1:] |
582 | |
583 | idle, run, fail = 0, 0, 0 |
584 | + ongoing=[] |
585 | for line in status.stdout: |
586 | if me_dir in line: |
587 | - status = line.split()[4] |
588 | + id, _, _,_ , status,_ = line.split(None,5) |
589 | + ongoing.append(id) |
590 | if status in self.idle_tag: |
591 | idle += 1 |
592 | elif status in self.running_tag: |
593 | run += 1 |
594 | elif status in self.complete_tag: |
595 | - continue |
596 | + status = self.check_termination(id) |
597 | + if status == 'wait': |
598 | + run += 1 |
599 | + elif status == 'resubmit': |
600 | + idle += 1 |
601 | else: |
602 | fail += 1 |
603 | - |
604 | + |
605 | + #control other finished job |
606 | + for id in list(self.submitted_ids): |
607 | + if id not in ongoing: |
608 | + status = self.check_termination(id) |
609 | + if status == 'wait': |
610 | + run += 1 |
611 | + elif status == 'resubmit': |
612 | + idle += 1 |
613 | + |
614 | + |
615 | return idle, run, self.submitted - (idle+run+fail), fail |
616 | |
617 | @multiple_try() |
618 | - def remove(self, *args): |
619 | + def remove(self, *args, **opts): |
620 | """Clean the jobs on the cluster""" |
621 | |
622 | if not self.submitted_ids: |
623 | |
624 | === added file 'proc_card.dat' |
625 | --- proc_card.dat 1970-01-01 00:00:00 +0000 |
626 | +++ proc_card.dat 2013-11-26 22:31:29 +0000 |
627 | @@ -0,0 +1,40 @@ |
628 | +#************************************************************ |
629 | +#* MadGraph 5 * |
630 | +#* * |
631 | +#* * * * |
632 | +#* * * * * * |
633 | +#* * * * * 5 * * * * * |
634 | +#* * * * * * |
635 | +#* * * * |
636 | +#* * |
637 | +#* * |
638 | +#* The MadGraph Development Team - Please visit us at * |
639 | +#* https://server06.fynu.ucl.ac.be/projects/madgraph * |
640 | +#* * |
641 | +#************************************************************ |
642 | +#* * |
643 | +#* Command File for MadGraph 5 * |
644 | +#* * |
645 | +#* run as ./bin/mg5 filename * |
646 | +#* * |
647 | +#************************************************************ |
648 | +import model sm |
649 | +# Define multiparticle labels |
650 | +define p = g u c d s u~ c~ d~ s~ |
651 | +define j = g u c d s u~ c~ d~ s~ |
652 | +define l+ = e+ mu+ |
653 | +define l- = e- mu- |
654 | +define vl = ve vm vt |
655 | +define vl~ = ve~ vm~ vt~ |
656 | +# Specify process(es) to run |
657 | +generate p p > e- ve~ @1 |
658 | +add process p p > e- ve~ j @2 |
659 | +add process p p > t t~ @3 |
660 | +# Output processes to MadEvent directory |
661 | +output |
662 | +# This will create a directory PROC_$MODELNAME_$X |
663 | +# If you want to specify the path/name of the directory use |
664 | +# output PATH |
665 | + |
666 | +# To generate events, you can go to the created directory and |
667 | +# run ./bin/generate_events |
668 | \ No newline at end of file |