Merge lp:~jon-hill/supertree-toolkit/fill_in_taxa into lp:supertree-toolkit
- fill_in_taxa
- Merge into stk
Proposed by
Jon Hill
Status: | Merged |
---|---|
Merged at revision: | 270 |
Proposed branch: | lp:~jon-hill/supertree-toolkit/fill_in_taxa |
Merge into: | lp:supertree-toolkit |
Diff against target: |
611 lines (+517/-6) 6 files modified
stk/p4/Tree_muck.py (+2/-2) stk/scripts/fill_in_with_taxonomy.py (+408/-0) stk/supertree_toolkit.py (+79/-0) stk/test/_supertree_toolkit.py (+13/-1) stk/test/_trees.py (+9/-3) stk/test/data/input/create_taxonomy.csv (+6/-0) |
To merge this branch: | bzr merge lp:~jon-hill/supertree-toolkit/fill_in_taxa |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Jon Hill | Approve | ||
Review via email: mp+254215@code.launchpad.net |
Commit message
Description of the change
Adds functionality to fill in a tree using taxonomy
To post a comment you must log in.
Revision history for this message
Jon Hill (jon-hill) wrote : | # |
Revision history for this message
Jon Hill (jon-hill) : | # |
review:
Approve
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'stk/p4/Tree_muck.py' |
2 | --- stk/p4/Tree_muck.py 2012-01-11 08:57:43 +0000 |
3 | +++ stk/p4/Tree_muck.py 2015-03-26 09:59:01 +0000 |
4 | @@ -769,8 +769,8 @@ |
5 | else: |
6 | gm.append("The 2 specified nodes should have a parent-child relationship") |
7 | raise Glitch, gm |
8 | - |
9 | - self.deleteCStuff() |
10 | + if var.usePfAndNumpy: |
11 | + self.deleteCStuff() |
12 | |
13 | hasBrLens = False |
14 | for n in self.iterNodes(): |
15 | |
16 | === added file 'stk/scripts/fill_in_with_taxonomy.py' |
17 | --- stk/scripts/fill_in_with_taxonomy.py 1970-01-01 00:00:00 +0000 |
18 | +++ stk/scripts/fill_in_with_taxonomy.py 2015-03-26 09:59:01 +0000 |
19 | @@ -0,0 +1,408 @@ |
20 | +#!/usr/bin/env python |
21 | +# |
22 | +# Supertree Toolkit. Software for managing and manipulating sources |
23 | +# trees ready for supretree construction. |
24 | +# Copyright (C) 2015, Jon Hill, Katie Davis |
25 | +# |
26 | +# This program is free software: you can redistribute it and/or modify |
27 | +# it under the terms of the GNU General Public License as published by |
28 | +# the Free Software Foundation, either version 3 of the License, or |
29 | +# (at your option) any later version. |
30 | +# |
31 | +# This program is distributed in the hope that it will be useful, |
32 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
33 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
34 | +# GNU General Public License for more details. |
35 | +# |
36 | +# You should have received a copy of the GNU General Public License |
37 | +# along with this program. If not, see <http://www.gnu.org/licenses/>. |
38 | +# |
39 | +# Jon Hill. jon.hill@york.ac.uk |
40 | + |
41 | +import urllib2 |
42 | +from urllib import quote_plus |
43 | +import simplejson as json |
44 | +import argparse |
45 | +import os |
46 | +import sys |
47 | +stk_path = os.path.join( os.path.realpath(os.path.dirname(__file__)), os.pardir ) |
48 | +sys.path.insert(0, stk_path) |
49 | +import supertree_toolkit as stk |
50 | +import csv |
51 | + |
52 | +# What we get from EOL |
53 | +current_taxonomy_levels = ['species','genus','family','order','class','phylum','kingdom'] |
54 | +# And the extra ones from ITIS |
55 | +extra_taxonomy_levels = ['superfamily','infraorder','suborder','superorder','subclass','subphylum','superphylum','infrakingdom','subkingdom'] |
56 | +# all of them in order |
57 | +taxonomy_levels = ['species','genus','subfamily','family','superfamily','infraorder','suborder','order','superorder','subclass','class','subphylum','phylum','superphylum','infrakingdom','subkingdom','kingdom'] |
58 | + |
59 | +def get_tree_taxa_taxonomy(taxon,wsdlObjectWoRMS): |
60 | + |
61 | + taxon_data = wsdlObjectWoRMS.getAphiaRecords(taxon.replace('_',' ')) |
62 | + if taxon_data == None: |
63 | + return {} |
64 | + |
65 | + taxon_id = taxon_data[0]['valid_AphiaID'] # there might be records that aren't valid - they point to the valid one though |
66 | + # call it again via the ID this time to make sure we've got the right one. |
67 | + taxon_data = wsdlObjectWoRMS.getAphiaRecordByID(taxon_id) |
68 | + # add data to taxonomy dictionary |
69 | + # get the taxonomy of this species |
70 | + classification = wsdlObjectWoRMS.getAphiaClassificationByID(taxon_id) |
71 | + # construct array |
72 | + tax_array = {} |
73 | + # classification is a nested dictionary, so we need to iterate down it |
74 | + current_child = classification.child |
75 | + while True: |
76 | + tax_array[current_child.rank.lower()] = current_child.scientificname |
77 | + current_child = current_child.child |
78 | + if current_child == '': # empty one is a string for some reason |
79 | + break |
80 | + return tax_array |
81 | + |
82 | + |
83 | + |
84 | +def get_taxonomy_worms(taxonomy, start_otu): |
85 | + """ Gets and processes a taxon from the queue to get its taxonomy.""" |
86 | + from SOAPpy import WSDL |
87 | + |
88 | + wsdlObjectWoRMS = WSDL.Proxy('http://www.marinespecies.org/aphia.php?p=soap&wsdl=1') |
89 | + |
90 | + # this is the recursive function |
91 | + def get_children(taxonomy, ID): |
92 | + |
93 | + # get data |
94 | + this_item = wsdlObjectWoRMS.getAphiaRecordByID(ID) |
95 | + if this_item == None: |
96 | + return taxonomy |
97 | + if this_item['rank'].lower() == 'species': |
98 | + # add data to taxonomy dictionary |
99 | + # get the taxonomy of this species |
100 | + classification = wsdlObjectWoRMS.getAphiaClassificationByID(ID) |
101 | + taxon = this_item.scientificname |
102 | + if not taxon in taxonomy: # is a new taxon, not previously in the taxonomy |
103 | + # construct array |
104 | + tax_array = {} |
105 | + # classification is a nested dictionary, so we need to iterate down it |
106 | + current_child = classification.child |
107 | + while True: |
108 | + if taxonomy_levels.index(current_child.rank.lower()) <= taxonomy_levels.index(start_taxonomy_level): |
109 | + # we need this - we're closer to the tips of the tree than we started |
110 | + tax_array[current_child.rank.lower()] = current_child.scientificname |
111 | + current_child = current_child.child |
112 | + if current_child == '': # empty one is a string for some reason |
113 | + break |
114 | + taxonomy[this_item.scientificname] = tax_array |
115 | + return taxonomy |
116 | + else: |
117 | + return taxonomy |
118 | + |
119 | + children = wsdlObjectWoRMS.getAphiaChildrenByID(ID, 1, False) |
120 | + |
121 | + for child in children: |
122 | + taxonomy = get_children(taxonomy, child['valid_AphiaID']) |
123 | + |
124 | + return taxonomy |
125 | + |
126 | + |
127 | + # main bit of the get_taxonomy_worms function |
128 | + try: |
129 | + start_taxa = wsdlObjectWoRMS.getAphiaRecords(start_otu) |
130 | + start_id = start_taxa[0]['valid_AphiaID'] # there might be records that aren't valid - they point to the valid one though |
131 | + # call it again via the ID this time to make sure we've got the right one. |
132 | + start_taxa = wsdlObjectWoRMS.getAphiaRecordByID(start_id) |
133 | + start_taxonomy_level = start_taxa['rank'].lower() |
134 | + except HTTPError: |
135 | + print "Error" |
136 | + sys.exit(-1) |
137 | + |
138 | + taxonomy = get_children(taxonomy,start_id) |
139 | + |
140 | + return taxonomy, start_taxonomy_level |
141 | + |
142 | + |
143 | +def main(): |
144 | + |
145 | + # do stuff |
146 | + parser = argparse.ArgumentParser( |
147 | + prog="Fill tree in using taxonomy", |
148 | + description="Fills in the taxonomic gaps using polytomies within a tree to increase coverage", |
149 | + ) |
150 | + parser.add_argument( |
151 | + '-v', |
152 | + '--verbose', |
153 | + action='store_true', |
154 | + help="Verbose output: mainly progress reports.", |
155 | + default=False |
156 | + ) |
157 | + parser.add_argument( |
158 | + '--pref_db', |
159 | + help="Taxonomy database to use. Default is Species 2000/ITIS", |
160 | + choices=['itis', 'worms', 'ncbi'], |
161 | + default = 'worms' |
162 | + ) |
163 | + parser.add_argument( |
164 | + '--save_taxonomy', |
165 | + help="Save the taxonomy downloaded. Give a filename" |
166 | + ) |
167 | + parser.add_argument( |
168 | + '--taxonomy_from_file', |
169 | + help='Use a downloaded taxonomy database from the chosen database, rather than online. Much quicker for large datasets. Give the filename', |
170 | + ) |
171 | + parser.add_argument( |
172 | + '--tree_taxonomy', |
173 | + help="Supply a STK taxonomy file for taxa in the tree. If not, one will be created from the database being used here." |
174 | + ) |
175 | + parser.add_argument( |
176 | + 'top_level', |
177 | + nargs=1, |
178 | + help="The top level group to look in, e.g. Arthropoda, Decapoda. Must match the database." |
179 | + ) |
180 | + parser.add_argument( |
181 | + 'input_file', |
182 | + metavar='input_file', |
183 | + nargs=1, |
184 | + help="Your tree file" |
185 | + ) |
186 | + parser.add_argument( |
187 | + 'output_file', |
188 | + metavar='output_file', |
189 | + nargs=1, |
190 | + help="Your new tree file" |
191 | + ) |
192 | + |
193 | + args = parser.parse_args() |
194 | + verbose = args.verbose |
195 | + input_file = args.input_file[0] |
196 | + output_file = args.output_file[0] |
197 | + top_level = args.top_level[0] |
198 | + save_taxonomy_file = args.save_taxonomy |
199 | + tree_taxonomy = args.tree_taxonomy |
200 | + pref_db = args.pref_db |
201 | + if (save_taxonomy_file == None): |
202 | + save_taxonomy = False |
203 | + else: |
204 | + save_taxonomy = True |
205 | + |
206 | + # grab taxa in tree |
207 | + tree = stk.import_tree(input_file) |
208 | + taxa_list = stk._getTaxaFromNewick(tree) |
209 | + |
210 | + taxonomy = {} |
211 | + |
212 | + # we're going to add the taxa in the tree to the taxonomy, to stop them |
213 | + # being fetched in first place. We delete them later |
214 | + for taxon in taxa_list: |
215 | + taxon = taxon.replace('_',' ') |
216 | + taxonomy[taxon] = [] |
217 | + |
218 | + |
219 | + if (pref_db == 'itis'): |
220 | + # get taxonomy info from itis |
221 | + print "Sorry, ITIS is not implemented yet" |
222 | + pass |
223 | + elif (pref_db == 'worms'): |
224 | + # get tree taxonomy from worms |
225 | + if (tree_taxonomy == None): |
226 | + tree_taxonomy = {} |
227 | + for t in taxa_list: |
228 | + from SOAPpy import WSDL |
229 | + wsdlObjectWoRMS = WSDL.Proxy('http://www.marinespecies.org/aphia.php?p=soap&wsdl=1') |
230 | + tree_taxonomy[t] = get_tree_taxa_taxonomy(t,wsdlObjectWoRMS) |
231 | + else: |
232 | + tree_taxonomy = stk.load_taxonomy(tree_taxonomy) |
233 | + # get taxonomy from worms |
234 | + taxonomy, start_level = get_taxonomy_worms(taxonomy,top_level) |
235 | + |
236 | + elif (pref_db == 'ncbi'): |
237 | + # get taxonomy from ncbi |
238 | + print "Sorry, NCBI is not implemented yet" |
239 | + pass |
240 | + else: |
241 | + print "ERROR: Didn't understand you database choice" |
242 | + sys.exit(-1) |
243 | + |
244 | + # clean up taxonomy, deleting the ones already in the tree |
245 | + for taxon in taxa_list: |
246 | + taxon = taxon.replace('_',' ') |
247 | + del taxonomy[taxon] |
248 | + |
249 | + # step up the taxonomy levels from genus, adding taxa to the correct node |
250 | + # as a polytomy |
251 | + for level in taxonomy_levels[1::]: # skip species.... |
252 | + new_taxa = [] |
253 | + for t in taxonomy: |
254 | + # skip odd ones that should be in there |
255 | + if start_level in taxonomy[t] and taxonomy[t][start_level] == top_level: |
256 | + try: |
257 | + new_taxa.append(taxonomy[t][level]) |
258 | + except KeyError: |
259 | + continue # don't have this info |
260 | + new_taxa = _uniquify(new_taxa) |
261 | + for nt in new_taxa: |
262 | + taxa_to_add = [] |
263 | + taxa_in_clade = [] |
264 | + for t in taxonomy: |
265 | + if start_level in taxonomy[t] and taxonomy[t][start_level] == top_level: |
266 | + try: |
267 | + if taxonomy[t][level] == nt: |
268 | + taxa_to_add.append(t.replace(' ','_')) |
269 | + except KeyError: |
270 | + continue |
271 | + # add to tree |
272 | + for t in taxa_list: |
273 | + if level in tree_taxonomy[t] and tree_taxonomy[t][level] == nt: |
274 | + taxa_in_clade.append(t) |
275 | + if len(taxa_in_clade) > 0: |
276 | + tree = add_taxa(tree, taxa_to_add, taxa_in_clade) |
277 | + for t in taxa_to_add: # clean up taxonomy |
278 | + del taxonomy[t.replace('_',' ')] |
279 | + |
280 | + |
281 | + trees = {} |
282 | + trees['tree_1'] = tree |
283 | + output = stk._amalgamate_trees(trees,format='nexus') |
284 | + f = open(output_file, "w") |
285 | + f.write(output) |
286 | + f.close() |
287 | + |
288 | + if not save_taxonomy_file == None: |
289 | + with open(save_taxonomy_file, 'w') as f: |
290 | + writer = csv.writer(f) |
291 | + headers = [] |
292 | + headers.append("OTU") |
293 | + headers.extend(taxonomy_levels) |
294 | + headers.append("Data source") |
295 | + writer.writerow(headers) |
296 | + for t in taxonomy: |
297 | + otu = t |
298 | + try: |
299 | + species = taxonomy[t]['species'] |
300 | + except KeyError: |
301 | + species = "-" |
302 | + try: |
303 | + genus = taxonomy[t]['genus'] |
304 | + except KeyError: |
305 | + genus = "-" |
306 | + try: |
307 | + family = taxonomy[t]['family'] |
308 | + except KeyError: |
309 | + family = "-" |
310 | + try: |
311 | + superfamily = taxonomy[t]['superfamily'] |
312 | + except KeyError: |
313 | + superfamily = "-" |
314 | + try: |
315 | + infraorder = taxonomy[t]['infraorder'] |
316 | + except KeyError: |
317 | + infraorder = "-" |
318 | + try: |
319 | + suborder = taxonomy[t]['suborder'] |
320 | + except KeyError: |
321 | + suborder = "-" |
322 | + try: |
323 | + order = taxonomy[t]['order'] |
324 | + except KeyError: |
325 | + order = "-" |
326 | + try: |
327 | + superorder = taxonomy[t]['superorder'] |
328 | + except KeyError: |
329 | + superorder = "-" |
330 | + try: |
331 | + subclass = taxonomy[t]['subclass'] |
332 | + except KeyError: |
333 | + subclass = "-" |
334 | + try: |
335 | + tclass = taxonomy[t]['class'] |
336 | + except KeyError: |
337 | + tclass = "-" |
338 | + try: |
339 | + subphylum = taxonomy[t]['subphylum'] |
340 | + except KeyError: |
341 | + subphylum = "-" |
342 | + try: |
343 | + phylum = taxonomy[t]['phylum'] |
344 | + except KeyError: |
345 | + phylum = "-" |
346 | + try: |
347 | + superphylum = taxonomy[t]['superphylum'] |
348 | + except KeyError: |
349 | + superphylum = "-" |
350 | + try: |
351 | + infrakingdom = taxonomy[t]['infrakingdom'] |
352 | + except: |
353 | + infrakingdom = "-" |
354 | + try: |
355 | + subkingdom = taxonomy[t]['subkingdom'] |
356 | + except: |
357 | + subkingdom = "-" |
358 | + try: |
359 | + kingdom = taxonomy[t]['kingdom'] |
360 | + except KeyError: |
361 | + kingdom = "-" |
362 | + try: |
363 | + provider = taxonomy[t]['provider'] |
364 | + except KeyError: |
365 | + provider = "-" |
366 | + |
367 | + if (isinstance(species, list)): |
368 | + species = " ".join(species) |
369 | + this_classification = [ |
370 | + otu.encode('utf-8'), |
371 | + species.encode('utf-8'), |
372 | + genus.encode('utf-8'), |
373 | + family.encode('utf-8'), |
374 | + superfamily.encode('utf-8'), |
375 | + infraorder.encode('utf-8'), |
376 | + suborder.encode('utf-8'), |
377 | + order.encode('utf-8'), |
378 | + superorder.encode('utf-8'), |
379 | + subclass.encode('utf-8'), |
380 | + tclass.encode('utf-8'), |
381 | + subphylum.encode('utf-8'), |
382 | + phylum.encode('utf-8'), |
383 | + superphylum.encode('utf-8'), |
384 | + infrakingdom.encode('utf-8'), |
385 | + subkingdom.encode('utf-8'), |
386 | + kingdom.encode('utf-8'), |
387 | + provider.encode('utf-8')] |
388 | + writer.writerow(this_classification) |
389 | + |
390 | + |
391 | +def _uniquify(l): |
392 | + """ |
393 | + Make a list, l, contain only unique data |
394 | + """ |
395 | + keys = {} |
396 | + for e in l: |
397 | + keys[e] = 1 |
398 | + |
399 | + return keys.keys() |
400 | + |
401 | +def add_taxa(tree, new_taxa, taxa_in_clade): |
402 | + |
403 | + # create new tree of the new taxa |
404 | + #tree_string = "(" + ",".join(new_taxa) + ");" |
405 | + #additionalTaxa = stk._parse_tree(tree_string) |
406 | + |
407 | + # find mrca parent |
408 | + treeobj = stk._parse_tree(tree) |
409 | + mrca = stk.get_mrca(tree,taxa_in_clade) |
410 | + mrca_parent = treeobj.node(mrca).parent |
411 | + |
412 | + # insert a node into the tree between the MRCA and it's parent (p4.addNodeBetweenNodes) |
413 | + newNode = treeobj.addNodeBetweenNodes(mrca, mrca_parent) |
414 | + |
415 | + # add the new tree at the new node using p4.addSubTree(self, selfNode, theSubTree, subTreeTaxNames=None) |
416 | + #treeobj.addSubTree(newNode, additionalTaxa) |
417 | + for t in new_taxa: |
418 | + treeobj.addSibLeaf(newNode,t) |
419 | + |
420 | + # return new tree |
421 | + return treeobj.writeNewick(fName=None,toString=True).strip() |
422 | + |
423 | +if __name__ == "__main__": |
424 | + main() |
425 | + |
426 | + |
427 | + |
428 | |
429 | === modified file 'stk/supertree_toolkit.py' |
430 | --- stk/supertree_toolkit.py 2014-12-10 08:55:43 +0000 |
431 | +++ stk/supertree_toolkit.py 2015-03-26 09:59:01 +0000 |
432 | @@ -52,6 +52,7 @@ |
433 | IDENTICAL = 0 |
434 | SUBSET = 1 |
435 | PLATFORM = sys.platform |
436 | +taxonomy_levels = ['species','genus','family','superfamily','infraorder','suborder','order','superorder','subclass','class','subphylum','phylum','superphylum','infrakingdom','subkingdom','kingdom'] |
437 | |
438 | # supertree_toolkit is the backend for the STK. Loaded by both the GUI and |
439 | # CLI, this contains all the functions to actually *do* something |
440 | @@ -1991,6 +1992,32 @@ |
441 | |
442 | return output_string |
443 | |
444 | + |
445 | + |
446 | +def load_taxonomy(taxonomy_csv): |
447 | + """Load in a taxonomy CSV file and convert to taxonomy Dict""" |
448 | + |
449 | + import csv |
450 | + |
451 | + taxonomy = {} |
452 | + |
453 | + with open(taxonomy_csv, 'rU') as csvfile: |
454 | + tax_reader = csv.reader(csvfile, delimiter=',') |
455 | + tax_reader.next() |
456 | + for row in tax_reader: |
457 | + current_taxonomy = {} |
458 | + i = 1 |
459 | + for t in taxonomy_levels: |
460 | + if not row[i] == '-': |
461 | + current_taxonomy[t] = row[i] |
462 | + i = i+ 1 |
463 | + |
464 | + current_taxonomy['provider'] = row[17] # data source |
465 | + taxonomy[row[0]] = current_taxonomy |
466 | + |
467 | + return taxonomy |
468 | + |
469 | + |
470 | def data_overlap(XML, overlap_amount=2, filename=None, detailed=False, show=False, verbose=False, ignoreWarnings=False): |
471 | """ Calculate the amount of taxonomic overlap between source trees. |
472 | The output is a True/False by default, but you can specify an |
473 | @@ -2852,6 +2879,58 @@ |
474 | |
475 | return XML |
476 | |
477 | +def get_mrca(tree,taxa_list): |
478 | + """Return the node number for the MRCA of the list of given taxa |
479 | + This node number must be used in conjection with a p4 tree object, along |
480 | + the lines of: |
481 | + treeobj = _parse_tree(tree_string) |
482 | + treeobj.node(mrca).parent |
483 | + """ |
484 | + |
485 | + # find MRCA of all taxa within this clade, already in the tree |
486 | + node_ids = [] |
487 | + # get the nodes of the taxa in question |
488 | + node_id_for_taxa = [] |
489 | + treeobj = _parse_tree(tree) |
490 | + for t in taxa_list: |
491 | + node_id_for_taxa.append(treeobj.node(t).nodeNum) |
492 | + # for each, get all parents to root |
493 | + for n in node_id_for_taxa: |
494 | + nodes = [] |
495 | + nodes.append(treeobj.node(n).parent.nodeNum) |
496 | + while 1: |
497 | + nn = treeobj.node(nodes[-1]).parent |
498 | + if nn == None: |
499 | + break |
500 | + else: |
501 | + nodes.append(nn.nodeNum) |
502 | + node_ids.append(nodes) |
503 | + # in the shortest list, loop through the values, check they exist in all lists. If it does, |
504 | + # that node is your MRCA |
505 | + big = sys.maxsize |
506 | + node_ids |
507 | + shortest = 0 |
508 | + for n in node_ids: |
509 | + if len(n) < big: |
510 | + big = len(n) |
511 | + shortest = n |
512 | + mrca = -1 |
513 | + for s in shortest: |
514 | + found = True |
515 | + for n in node_ids: |
516 | + if not s in n: |
517 | + found = False |
518 | + break # move to next s |
519 | + # if we get here, we have the MRCA |
520 | + if (found): |
521 | + mrca = s |
522 | + break |
523 | + if mrca == -1: |
524 | + # something went wrong! |
525 | + raise InvalidSTKData("Error finding MRCA of: "+" ".join(taxa_list)) |
526 | + |
527 | + return mrca |
528 | + |
529 | ################ PRIVATE FUNCTIONS ######################## |
530 | |
531 | def _uniquify(l): |
532 | |
533 | === modified file 'stk/test/_supertree_toolkit.py' |
534 | --- stk/test/_supertree_toolkit.py 2014-12-10 08:55:43 +0000 |
535 | +++ stk/test/_supertree_toolkit.py 2015-03-26 09:59:01 +0000 |
536 | @@ -12,7 +12,7 @@ |
537 | from stk.supertree_toolkit import data_overlap, read_matrix, subs_file_from_str, clean_data, obtain_trees, get_all_source_names |
538 | from stk.supertree_toolkit import add_historical_event, _sort_data, _parse_xml, _check_sources, _swap_tree_in_XML, replace_genera |
539 | from stk.supertree_toolkit import get_all_taxa, _get_all_siblings, _parse_tree, get_characters_used, _trees_equal, get_weights |
540 | -from stk.supertree_toolkit import get_outgroup, set_all_tree_names, create_tree_name |
541 | +from stk.supertree_toolkit import get_outgroup, set_all_tree_names, create_tree_name, load_taxonomy |
542 | from lxml import etree |
543 | from util import * |
544 | from stk.stk_exceptions import * |
545 | @@ -558,6 +558,18 @@ |
546 | self.assert_(c in expected_characters) |
547 | self.assert_(len(characters) == len(expected_characters)) |
548 | |
549 | + def test_load_taxonomy(self): |
550 | + csv_file = "data/input/create_taxonomy.csv" |
551 | + expected = {'Archaeopteryx lithographica': {'subkingdom': 'Metazoa', 'subclass': 'Tetrapodomorpha', 'suborder': 'Coelurosauria', 'provider': 'Paleobiology Database', 'genus': 'Archaeopteryx', 'class': 'Aves'}, |
552 | + 'Egretta tricolor': {'kingdom': 'Animalia', 'family': 'Ardeidae', 'subkingdom': 'Bilateria', 'subclass': 'Neoloricata', 'class': 'Aves', 'phylum': 'Chordata', 'superphylum': 'Lophozoa', 'suborder': 'Ischnochitonina', 'provider': 'Species 2000 & ITIS Catalogue of Life: April 2013', 'infrakingdom': 'Protostomia', 'genus': 'Egretta', 'order': 'Pelecaniformes', 'species': 'Egretta tricolor'}, |
553 | + 'Gallus gallus': {'kingdom': 'Animalia', 'infrakingdom': 'Protostomia', 'family': 'Phasianidae', 'subkingdom': 'Bilateria', 'class': 'Aves', 'phylum': 'Chordata', 'superphylum': 'Lophozoa', 'provider': 'Species 2000 & ITIS Catalogue of Life: April 2013', 'genus': 'Gallus', 'order': 'Galliformes', 'species': 'Gallus gallus'}, |
554 | + 'Thalassarche melanophris': {'kingdom': 'Animalia', 'family': 'Diomedeidae', 'subkingdom': 'Bilateria', 'class': 'Aves', 'phylum': 'Chordata', 'provider': 'Species 2000 & ITIS Catalogue of Life: April 2013', 'infrakingdom': 'Deuterostomia', 'subphylum': 'Vertebrata', 'genus': 'Thalassarche', 'order': 'Procellariiformes', 'species': 'Thalassarche melanophris'}, |
555 | + 'Jeletzkytes criptonodosus': {'kingdom': 'Metazoa', 'family': 'Scaphitidae', 'order': 'Ammonoidea', 'phylum': 'Mollusca', 'provider': 'PBDB', 'species': 'Jeletzkytes criptonodosus', 'class': 'Cephalopoda'}} |
556 | + taxonomy = load_taxonomy(csv_file) |
557 | + self.maxDiff = None |
558 | + |
559 | + self.assertDictEqual(taxonomy, expected) |
560 | + |
561 | def test_name_tree(self): |
562 | XML = etree.tostring(etree.parse('data/input/single_source_no_names.phyml',parser),pretty_print=True) |
563 | xml_root = _parse_xml(XML) |
564 | |
565 | === modified file 'stk/test/_trees.py' |
566 | --- stk/test/_trees.py 2014-06-09 15:36:19 +0000 |
567 | +++ stk/test/_trees.py 2015-03-26 09:59:01 +0000 |
568 | @@ -6,7 +6,7 @@ |
569 | from stk.supertree_toolkit import import_tree, obtain_trees, get_all_taxa, _assemble_tree_matrix, create_matrix, _delete_taxon, _sub_taxon,_tree_contains |
570 | from stk.supertree_toolkit import _swap_tree_in_XML, substitute_taxa, get_taxa_from_tree, get_characters_from_tree, amalgamate_trees, _uniquify |
571 | from stk.supertree_toolkit import import_trees, import_tree, _trees_equal, _find_trees_for_permuting, permute_tree, get_all_source_names, _getTaxaFromNewick |
572 | - |
573 | +from stk.supertree_toolkit import get_mrca |
574 | import os |
575 | from lxml import etree |
576 | from util import * |
577 | @@ -33,8 +33,8 @@ |
578 | self.assert_(e_tree == tree) |
579 | |
580 | def test_import_tutorial_tree(self): |
581 | - test_file = "../../doc/tutorial/Cebezas_etal_tree1.tre" |
582 | - e_tree = "(Onconida_alaini, ((Paramunida_granulata, ((Paramunida_pronoe, ((Paramunida_thalie, (Paramunida_pictura, Paramunida_labis)), (Paramunida_luminata, (Paramunida_belone, (Paramunida_salai, Paramunida_lophia))))), (Paramunida_stichas, Paramunida_proxima))), (Plesionida_concava, Plesionida_aliena)));" |
583 | + test_file = "../../doc/tutorial/5.3_DataEntry/HallThatje_2009.tre" |
584 | + e_tree = "((Aegla_sp., (Pagurus_bernhardus, Pagurus_hirsutiusculus)), (((Cryptolithodes_sitchensis, Cryptolithodes_typicus), (Phyllolithodes_papillosus, (Lopholithodes_mandtii, (Glyptolithodes_cristatipes, (Paralomis_formosa, Paralomis_spinosissima))), (Neolithodes_brodiei, (Paralithodes_camtschaticus, Paralithodes_brevipes), (Lithodes_confundens, Lithodes_ferox)))), (Oedignathus_inermis, (Hapalogaster_dentata, Hapalogaster_mertensii))));" |
585 | tree = import_tree(test_file) |
586 | self.assert_(e_tree == tree) |
587 | |
588 | @@ -209,6 +209,12 @@ |
589 | |
590 | class TestTreeManipulation(unittest.TestCase): |
591 | |
592 | + |
593 | + def test_get_mrca(self): |
594 | + tree = "(B,(C,(D,(E,((A,F),((I,(G,H)),(J,(K,L))))))));" |
595 | + mrca = get_mrca(tree,["A","I", "L"]) |
596 | + self.assert_(mrca == 8) |
597 | + |
598 | def test_get_all_trees(self): |
599 | XML = etree.tostring(etree.parse(single_source_input,parser),pretty_print=True) |
600 | tree = obtain_trees(XML) |
601 | |
602 | === added file 'stk/test/data/input/create_taxonomy.csv' |
603 | --- stk/test/data/input/create_taxonomy.csv 1970-01-01 00:00:00 +0000 |
604 | +++ stk/test/data/input/create_taxonomy.csv 2015-03-26 09:59:01 +0000 |
605 | @@ -0,0 +1,6 @@ |
606 | +OTU,species,genus,family,superfamily,infraorder,suborder,order,superorder,subclass,class,subphylum,phylum,superphylum,infrakingdom,subkingdom,kingdom,Data source |
607 | +Archaeopteryx lithographica,-,Archaeopteryx,-,-,-,Coelurosauria,-,-,Tetrapodomorpha,Aves,-,-,-,-,Metazoa,-,Paleobiology Database |
608 | +Thalassarche melanophris,Thalassarche melanophris,Thalassarche,Diomedeidae,-,-,-,Procellariiformes,-,-,Aves,Vertebrata,Chordata,-,Deuterostomia,Bilateria,Animalia,Species 2000 & ITIS Catalogue of Life: April 2013 |
609 | +Egretta tricolor,Egretta tricolor,Egretta,Ardeidae,-,-,Ischnochitonina,Pelecaniformes,-,Neoloricata,Aves,-,Chordata,Lophozoa,Protostomia,Bilateria,Animalia,Species 2000 & ITIS Catalogue of Life: April 2013 |
610 | +Gallus gallus,Gallus gallus,Gallus,Phasianidae,-,-,-,Galliformes,-,-,Aves,-,Chordata,Lophozoa,Protostomia,Bilateria,Animalia,Species 2000 & ITIS Catalogue of Life: April 2013 |
611 | +Jeletzkytes criptonodosus,Jeletzkytes criptonodosus,-,Scaphitidae,-,-,-,Ammonoidea,-,-,Cephalopoda,-,Mollusca,-,-,-,Metazoa,PBDB |
Missing test for load taxonomy