Merge lp:~jon-hill/supertree-toolkit/fill_in_taxa into lp:supertree-toolkit

Proposed by Jon Hill
Status: Merged
Merged at revision: 270
Proposed branch: lp:~jon-hill/supertree-toolkit/fill_in_taxa
Merge into: lp:supertree-toolkit
Diff against target: 611 lines (+517/-6)
6 files modified
stk/p4/Tree_muck.py (+2/-2)
stk/scripts/fill_in_with_taxonomy.py (+408/-0)
stk/supertree_toolkit.py (+79/-0)
stk/test/_supertree_toolkit.py (+13/-1)
stk/test/_trees.py (+9/-3)
stk/test/data/input/create_taxonomy.csv (+6/-0)
To merge this branch: bzr merge lp:~jon-hill/supertree-toolkit/fill_in_taxa
Reviewer Review Type Date Requested Status
Jon Hill Approve
Review via email: mp+254215@code.launchpad.net

Description of the change

Adds functionality to fill in a tree using taxonomy

To post a comment you must log in.
Revision history for this message
Jon Hill (jon-hill) wrote :

Missing test for load taxonomy

272. By Jon Hill

Adding test for load_taxonomy and fixing broken test

Revision history for this message
Jon Hill (jon-hill) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'stk/p4/Tree_muck.py'
2--- stk/p4/Tree_muck.py 2012-01-11 08:57:43 +0000
3+++ stk/p4/Tree_muck.py 2015-03-26 09:59:01 +0000
4@@ -769,8 +769,8 @@
5 else:
6 gm.append("The 2 specified nodes should have a parent-child relationship")
7 raise Glitch, gm
8-
9- self.deleteCStuff()
10+ if var.usePfAndNumpy:
11+ self.deleteCStuff()
12
13 hasBrLens = False
14 for n in self.iterNodes():
15
16=== added file 'stk/scripts/fill_in_with_taxonomy.py'
17--- stk/scripts/fill_in_with_taxonomy.py 1970-01-01 00:00:00 +0000
18+++ stk/scripts/fill_in_with_taxonomy.py 2015-03-26 09:59:01 +0000
19@@ -0,0 +1,408 @@
20+#!/usr/bin/env python
21+#
22+# Supertree Toolkit. Software for managing and manipulating sources
23+# trees ready for supretree construction.
24+# Copyright (C) 2015, Jon Hill, Katie Davis
25+#
26+# This program is free software: you can redistribute it and/or modify
27+# it under the terms of the GNU General Public License as published by
28+# the Free Software Foundation, either version 3 of the License, or
29+# (at your option) any later version.
30+#
31+# This program is distributed in the hope that it will be useful,
32+# but WITHOUT ANY WARRANTY; without even the implied warranty of
33+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34+# GNU General Public License for more details.
35+#
36+# You should have received a copy of the GNU General Public License
37+# along with this program. If not, see <http://www.gnu.org/licenses/>.
38+#
39+# Jon Hill. jon.hill@york.ac.uk
40+
41+import urllib2
42+from urllib import quote_plus
43+import simplejson as json
44+import argparse
45+import os
46+import sys
47+stk_path = os.path.join( os.path.realpath(os.path.dirname(__file__)), os.pardir )
48+sys.path.insert(0, stk_path)
49+import supertree_toolkit as stk
50+import csv
51+
52+# What we get from EOL
53+current_taxonomy_levels = ['species','genus','family','order','class','phylum','kingdom']
54+# And the extra ones from ITIS
55+extra_taxonomy_levels = ['superfamily','infraorder','suborder','superorder','subclass','subphylum','superphylum','infrakingdom','subkingdom']
56+# all of them in order
57+taxonomy_levels = ['species','genus','subfamily','family','superfamily','infraorder','suborder','order','superorder','subclass','class','subphylum','phylum','superphylum','infrakingdom','subkingdom','kingdom']
58+
59+def get_tree_taxa_taxonomy(taxon,wsdlObjectWoRMS):
60+
61+ taxon_data = wsdlObjectWoRMS.getAphiaRecords(taxon.replace('_',' '))
62+ if taxon_data == None:
63+ return {}
64+
65+ taxon_id = taxon_data[0]['valid_AphiaID'] # there might be records that aren't valid - they point to the valid one though
66+ # call it again via the ID this time to make sure we've got the right one.
67+ taxon_data = wsdlObjectWoRMS.getAphiaRecordByID(taxon_id)
68+ # add data to taxonomy dictionary
69+ # get the taxonomy of this species
70+ classification = wsdlObjectWoRMS.getAphiaClassificationByID(taxon_id)
71+ # construct array
72+ tax_array = {}
73+ # classification is a nested dictionary, so we need to iterate down it
74+ current_child = classification.child
75+ while True:
76+ tax_array[current_child.rank.lower()] = current_child.scientificname
77+ current_child = current_child.child
78+ if current_child == '': # empty one is a string for some reason
79+ break
80+ return tax_array
81+
82+
83+
84+def get_taxonomy_worms(taxonomy, start_otu):
85+ """ Gets and processes a taxon from the queue to get its taxonomy."""
86+ from SOAPpy import WSDL
87+
88+ wsdlObjectWoRMS = WSDL.Proxy('http://www.marinespecies.org/aphia.php?p=soap&wsdl=1')
89+
90+ # this is the recursive function
91+ def get_children(taxonomy, ID):
92+
93+ # get data
94+ this_item = wsdlObjectWoRMS.getAphiaRecordByID(ID)
95+ if this_item == None:
96+ return taxonomy
97+ if this_item['rank'].lower() == 'species':
98+ # add data to taxonomy dictionary
99+ # get the taxonomy of this species
100+ classification = wsdlObjectWoRMS.getAphiaClassificationByID(ID)
101+ taxon = this_item.scientificname
102+ if not taxon in taxonomy: # is a new taxon, not previously in the taxonomy
103+ # construct array
104+ tax_array = {}
105+ # classification is a nested dictionary, so we need to iterate down it
106+ current_child = classification.child
107+ while True:
108+ if taxonomy_levels.index(current_child.rank.lower()) <= taxonomy_levels.index(start_taxonomy_level):
109+ # we need this - we're closer to the tips of the tree than we started
110+ tax_array[current_child.rank.lower()] = current_child.scientificname
111+ current_child = current_child.child
112+ if current_child == '': # empty one is a string for some reason
113+ break
114+ taxonomy[this_item.scientificname] = tax_array
115+ return taxonomy
116+ else:
117+ return taxonomy
118+
119+ children = wsdlObjectWoRMS.getAphiaChildrenByID(ID, 1, False)
120+
121+ for child in children:
122+ taxonomy = get_children(taxonomy, child['valid_AphiaID'])
123+
124+ return taxonomy
125+
126+
127+ # main bit of the get_taxonomy_worms function
128+ try:
129+ start_taxa = wsdlObjectWoRMS.getAphiaRecords(start_otu)
130+ start_id = start_taxa[0]['valid_AphiaID'] # there might be records that aren't valid - they point to the valid one though
131+ # call it again via the ID this time to make sure we've got the right one.
132+ start_taxa = wsdlObjectWoRMS.getAphiaRecordByID(start_id)
133+ start_taxonomy_level = start_taxa['rank'].lower()
134+ except HTTPError:
135+ print "Error"
136+ sys.exit(-1)
137+
138+ taxonomy = get_children(taxonomy,start_id)
139+
140+ return taxonomy, start_taxonomy_level
141+
142+
143+def main():
144+
145+ # do stuff
146+ parser = argparse.ArgumentParser(
147+ prog="Fill tree in using taxonomy",
148+ description="Fills in the taxonomic gaps using polytomies within a tree to increase coverage",
149+ )
150+ parser.add_argument(
151+ '-v',
152+ '--verbose',
153+ action='store_true',
154+ help="Verbose output: mainly progress reports.",
155+ default=False
156+ )
157+ parser.add_argument(
158+ '--pref_db',
159+ help="Taxonomy database to use. Default is Species 2000/ITIS",
160+ choices=['itis', 'worms', 'ncbi'],
161+ default = 'worms'
162+ )
163+ parser.add_argument(
164+ '--save_taxonomy',
165+ help="Save the taxonomy downloaded. Give a filename"
166+ )
167+ parser.add_argument(
168+ '--taxonomy_from_file',
169+ help='Use a downloaded taxonomy database from the chosen database, rather than online. Much quicker for large datasets. Give the filename',
170+ )
171+ parser.add_argument(
172+ '--tree_taxonomy',
173+ help="Supply a STK taxonomy file for taxa in the tree. If not, one will be created from the database being used here."
174+ )
175+ parser.add_argument(
176+ 'top_level',
177+ nargs=1,
178+ help="The top level group to look in, e.g. Arthropoda, Decapoda. Must match the database."
179+ )
180+ parser.add_argument(
181+ 'input_file',
182+ metavar='input_file',
183+ nargs=1,
184+ help="Your tree file"
185+ )
186+ parser.add_argument(
187+ 'output_file',
188+ metavar='output_file',
189+ nargs=1,
190+ help="Your new tree file"
191+ )
192+
193+ args = parser.parse_args()
194+ verbose = args.verbose
195+ input_file = args.input_file[0]
196+ output_file = args.output_file[0]
197+ top_level = args.top_level[0]
198+ save_taxonomy_file = args.save_taxonomy
199+ tree_taxonomy = args.tree_taxonomy
200+ pref_db = args.pref_db
201+ if (save_taxonomy_file == None):
202+ save_taxonomy = False
203+ else:
204+ save_taxonomy = True
205+
206+ # grab taxa in tree
207+ tree = stk.import_tree(input_file)
208+ taxa_list = stk._getTaxaFromNewick(tree)
209+
210+ taxonomy = {}
211+
212+ # we're going to add the taxa in the tree to the taxonomy, to stop them
213+ # being fetched in first place. We delete them later
214+ for taxon in taxa_list:
215+ taxon = taxon.replace('_',' ')
216+ taxonomy[taxon] = []
217+
218+
219+ if (pref_db == 'itis'):
220+ # get taxonomy info from itis
221+ print "Sorry, ITIS is not implemented yet"
222+ pass
223+ elif (pref_db == 'worms'):
224+ # get tree taxonomy from worms
225+ if (tree_taxonomy == None):
226+ tree_taxonomy = {}
227+ for t in taxa_list:
228+ from SOAPpy import WSDL
229+ wsdlObjectWoRMS = WSDL.Proxy('http://www.marinespecies.org/aphia.php?p=soap&wsdl=1')
230+ tree_taxonomy[t] = get_tree_taxa_taxonomy(t,wsdlObjectWoRMS)
231+ else:
232+ tree_taxonomy = stk.load_taxonomy(tree_taxonomy)
233+ # get taxonomy from worms
234+ taxonomy, start_level = get_taxonomy_worms(taxonomy,top_level)
235+
236+ elif (pref_db == 'ncbi'):
237+ # get taxonomy from ncbi
238+ print "Sorry, NCBI is not implemented yet"
239+ pass
240+ else:
241+ print "ERROR: Didn't understand you database choice"
242+ sys.exit(-1)
243+
244+ # clean up taxonomy, deleting the ones already in the tree
245+ for taxon in taxa_list:
246+ taxon = taxon.replace('_',' ')
247+ del taxonomy[taxon]
248+
249+ # step up the taxonomy levels from genus, adding taxa to the correct node
250+ # as a polytomy
251+ for level in taxonomy_levels[1::]: # skip species....
252+ new_taxa = []
253+ for t in taxonomy:
254+ # skip odd ones that should be in there
255+ if start_level in taxonomy[t] and taxonomy[t][start_level] == top_level:
256+ try:
257+ new_taxa.append(taxonomy[t][level])
258+ except KeyError:
259+ continue # don't have this info
260+ new_taxa = _uniquify(new_taxa)
261+ for nt in new_taxa:
262+ taxa_to_add = []
263+ taxa_in_clade = []
264+ for t in taxonomy:
265+ if start_level in taxonomy[t] and taxonomy[t][start_level] == top_level:
266+ try:
267+ if taxonomy[t][level] == nt:
268+ taxa_to_add.append(t.replace(' ','_'))
269+ except KeyError:
270+ continue
271+ # add to tree
272+ for t in taxa_list:
273+ if level in tree_taxonomy[t] and tree_taxonomy[t][level] == nt:
274+ taxa_in_clade.append(t)
275+ if len(taxa_in_clade) > 0:
276+ tree = add_taxa(tree, taxa_to_add, taxa_in_clade)
277+ for t in taxa_to_add: # clean up taxonomy
278+ del taxonomy[t.replace('_',' ')]
279+
280+
281+ trees = {}
282+ trees['tree_1'] = tree
283+ output = stk._amalgamate_trees(trees,format='nexus')
284+ f = open(output_file, "w")
285+ f.write(output)
286+ f.close()
287+
288+ if not save_taxonomy_file == None:
289+ with open(save_taxonomy_file, 'w') as f:
290+ writer = csv.writer(f)
291+ headers = []
292+ headers.append("OTU")
293+ headers.extend(taxonomy_levels)
294+ headers.append("Data source")
295+ writer.writerow(headers)
296+ for t in taxonomy:
297+ otu = t
298+ try:
299+ species = taxonomy[t]['species']
300+ except KeyError:
301+ species = "-"
302+ try:
303+ genus = taxonomy[t]['genus']
304+ except KeyError:
305+ genus = "-"
306+ try:
307+ family = taxonomy[t]['family']
308+ except KeyError:
309+ family = "-"
310+ try:
311+ superfamily = taxonomy[t]['superfamily']
312+ except KeyError:
313+ superfamily = "-"
314+ try:
315+ infraorder = taxonomy[t]['infraorder']
316+ except KeyError:
317+ infraorder = "-"
318+ try:
319+ suborder = taxonomy[t]['suborder']
320+ except KeyError:
321+ suborder = "-"
322+ try:
323+ order = taxonomy[t]['order']
324+ except KeyError:
325+ order = "-"
326+ try:
327+ superorder = taxonomy[t]['superorder']
328+ except KeyError:
329+ superorder = "-"
330+ try:
331+ subclass = taxonomy[t]['subclass']
332+ except KeyError:
333+ subclass = "-"
334+ try:
335+ tclass = taxonomy[t]['class']
336+ except KeyError:
337+ tclass = "-"
338+ try:
339+ subphylum = taxonomy[t]['subphylum']
340+ except KeyError:
341+ subphylum = "-"
342+ try:
343+ phylum = taxonomy[t]['phylum']
344+ except KeyError:
345+ phylum = "-"
346+ try:
347+ superphylum = taxonomy[t]['superphylum']
348+ except KeyError:
349+ superphylum = "-"
350+ try:
351+ infrakingdom = taxonomy[t]['infrakingdom']
352+ except:
353+ infrakingdom = "-"
354+ try:
355+ subkingdom = taxonomy[t]['subkingdom']
356+ except:
357+ subkingdom = "-"
358+ try:
359+ kingdom = taxonomy[t]['kingdom']
360+ except KeyError:
361+ kingdom = "-"
362+ try:
363+ provider = taxonomy[t]['provider']
364+ except KeyError:
365+ provider = "-"
366+
367+ if (isinstance(species, list)):
368+ species = " ".join(species)
369+ this_classification = [
370+ otu.encode('utf-8'),
371+ species.encode('utf-8'),
372+ genus.encode('utf-8'),
373+ family.encode('utf-8'),
374+ superfamily.encode('utf-8'),
375+ infraorder.encode('utf-8'),
376+ suborder.encode('utf-8'),
377+ order.encode('utf-8'),
378+ superorder.encode('utf-8'),
379+ subclass.encode('utf-8'),
380+ tclass.encode('utf-8'),
381+ subphylum.encode('utf-8'),
382+ phylum.encode('utf-8'),
383+ superphylum.encode('utf-8'),
384+ infrakingdom.encode('utf-8'),
385+ subkingdom.encode('utf-8'),
386+ kingdom.encode('utf-8'),
387+ provider.encode('utf-8')]
388+ writer.writerow(this_classification)
389+
390+
391+def _uniquify(l):
392+ """
393+ Make a list, l, contain only unique data
394+ """
395+ keys = {}
396+ for e in l:
397+ keys[e] = 1
398+
399+ return keys.keys()
400+
401+def add_taxa(tree, new_taxa, taxa_in_clade):
402+
403+ # create new tree of the new taxa
404+ #tree_string = "(" + ",".join(new_taxa) + ");"
405+ #additionalTaxa = stk._parse_tree(tree_string)
406+
407+ # find mrca parent
408+ treeobj = stk._parse_tree(tree)
409+ mrca = stk.get_mrca(tree,taxa_in_clade)
410+ mrca_parent = treeobj.node(mrca).parent
411+
412+ # insert a node into the tree between the MRCA and it's parent (p4.addNodeBetweenNodes)
413+ newNode = treeobj.addNodeBetweenNodes(mrca, mrca_parent)
414+
415+ # add the new tree at the new node using p4.addSubTree(self, selfNode, theSubTree, subTreeTaxNames=None)
416+ #treeobj.addSubTree(newNode, additionalTaxa)
417+ for t in new_taxa:
418+ treeobj.addSibLeaf(newNode,t)
419+
420+ # return new tree
421+ return treeobj.writeNewick(fName=None,toString=True).strip()
422+
423+if __name__ == "__main__":
424+ main()
425+
426+
427+
428
429=== modified file 'stk/supertree_toolkit.py'
430--- stk/supertree_toolkit.py 2014-12-10 08:55:43 +0000
431+++ stk/supertree_toolkit.py 2015-03-26 09:59:01 +0000
432@@ -52,6 +52,7 @@
433 IDENTICAL = 0
434 SUBSET = 1
435 PLATFORM = sys.platform
436+taxonomy_levels = ['species','genus','family','superfamily','infraorder','suborder','order','superorder','subclass','class','subphylum','phylum','superphylum','infrakingdom','subkingdom','kingdom']
437
438 # supertree_toolkit is the backend for the STK. Loaded by both the GUI and
439 # CLI, this contains all the functions to actually *do* something
440@@ -1991,6 +1992,32 @@
441
442 return output_string
443
444+
445+
446+def load_taxonomy(taxonomy_csv):
447+ """Load in a taxonomy CSV file and convert to taxonomy Dict"""
448+
449+ import csv
450+
451+ taxonomy = {}
452+
453+ with open(taxonomy_csv, 'rU') as csvfile:
454+ tax_reader = csv.reader(csvfile, delimiter=',')
455+ tax_reader.next()
456+ for row in tax_reader:
457+ current_taxonomy = {}
458+ i = 1
459+ for t in taxonomy_levels:
460+ if not row[i] == '-':
461+ current_taxonomy[t] = row[i]
462+ i = i+ 1
463+
464+ current_taxonomy['provider'] = row[17] # data source
465+ taxonomy[row[0]] = current_taxonomy
466+
467+ return taxonomy
468+
469+
470 def data_overlap(XML, overlap_amount=2, filename=None, detailed=False, show=False, verbose=False, ignoreWarnings=False):
471 """ Calculate the amount of taxonomic overlap between source trees.
472 The output is a True/False by default, but you can specify an
473@@ -2852,6 +2879,58 @@
474
475 return XML
476
477+def get_mrca(tree,taxa_list):
478+ """Return the node number for the MRCA of the list of given taxa
479+ This node number must be used in conjection with a p4 tree object, along
480+ the lines of:
481+ treeobj = _parse_tree(tree_string)
482+ treeobj.node(mrca).parent
483+ """
484+
485+ # find MRCA of all taxa within this clade, already in the tree
486+ node_ids = []
487+ # get the nodes of the taxa in question
488+ node_id_for_taxa = []
489+ treeobj = _parse_tree(tree)
490+ for t in taxa_list:
491+ node_id_for_taxa.append(treeobj.node(t).nodeNum)
492+ # for each, get all parents to root
493+ for n in node_id_for_taxa:
494+ nodes = []
495+ nodes.append(treeobj.node(n).parent.nodeNum)
496+ while 1:
497+ nn = treeobj.node(nodes[-1]).parent
498+ if nn == None:
499+ break
500+ else:
501+ nodes.append(nn.nodeNum)
502+ node_ids.append(nodes)
503+ # in the shortest list, loop through the values, check they exist in all lists. If it does,
504+ # that node is your MRCA
505+ big = sys.maxsize
506+ node_ids
507+ shortest = 0
508+ for n in node_ids:
509+ if len(n) < big:
510+ big = len(n)
511+ shortest = n
512+ mrca = -1
513+ for s in shortest:
514+ found = True
515+ for n in node_ids:
516+ if not s in n:
517+ found = False
518+ break # move to next s
519+ # if we get here, we have the MRCA
520+ if (found):
521+ mrca = s
522+ break
523+ if mrca == -1:
524+ # something went wrong!
525+ raise InvalidSTKData("Error finding MRCA of: "+" ".join(taxa_list))
526+
527+ return mrca
528+
529 ################ PRIVATE FUNCTIONS ########################
530
531 def _uniquify(l):
532
533=== modified file 'stk/test/_supertree_toolkit.py'
534--- stk/test/_supertree_toolkit.py 2014-12-10 08:55:43 +0000
535+++ stk/test/_supertree_toolkit.py 2015-03-26 09:59:01 +0000
536@@ -12,7 +12,7 @@
537 from stk.supertree_toolkit import data_overlap, read_matrix, subs_file_from_str, clean_data, obtain_trees, get_all_source_names
538 from stk.supertree_toolkit import add_historical_event, _sort_data, _parse_xml, _check_sources, _swap_tree_in_XML, replace_genera
539 from stk.supertree_toolkit import get_all_taxa, _get_all_siblings, _parse_tree, get_characters_used, _trees_equal, get_weights
540-from stk.supertree_toolkit import get_outgroup, set_all_tree_names, create_tree_name
541+from stk.supertree_toolkit import get_outgroup, set_all_tree_names, create_tree_name, load_taxonomy
542 from lxml import etree
543 from util import *
544 from stk.stk_exceptions import *
545@@ -558,6 +558,18 @@
546 self.assert_(c in expected_characters)
547 self.assert_(len(characters) == len(expected_characters))
548
549+ def test_load_taxonomy(self):
550+ csv_file = "data/input/create_taxonomy.csv"
551+ expected = {'Archaeopteryx lithographica': {'subkingdom': 'Metazoa', 'subclass': 'Tetrapodomorpha', 'suborder': 'Coelurosauria', 'provider': 'Paleobiology Database', 'genus': 'Archaeopteryx', 'class': 'Aves'},
552+ 'Egretta tricolor': {'kingdom': 'Animalia', 'family': 'Ardeidae', 'subkingdom': 'Bilateria', 'subclass': 'Neoloricata', 'class': 'Aves', 'phylum': 'Chordata', 'superphylum': 'Lophozoa', 'suborder': 'Ischnochitonina', 'provider': 'Species 2000 & ITIS Catalogue of Life: April 2013', 'infrakingdom': 'Protostomia', 'genus': 'Egretta', 'order': 'Pelecaniformes', 'species': 'Egretta tricolor'},
553+ 'Gallus gallus': {'kingdom': 'Animalia', 'infrakingdom': 'Protostomia', 'family': 'Phasianidae', 'subkingdom': 'Bilateria', 'class': 'Aves', 'phylum': 'Chordata', 'superphylum': 'Lophozoa', 'provider': 'Species 2000 & ITIS Catalogue of Life: April 2013', 'genus': 'Gallus', 'order': 'Galliformes', 'species': 'Gallus gallus'},
554+ 'Thalassarche melanophris': {'kingdom': 'Animalia', 'family': 'Diomedeidae', 'subkingdom': 'Bilateria', 'class': 'Aves', 'phylum': 'Chordata', 'provider': 'Species 2000 & ITIS Catalogue of Life: April 2013', 'infrakingdom': 'Deuterostomia', 'subphylum': 'Vertebrata', 'genus': 'Thalassarche', 'order': 'Procellariiformes', 'species': 'Thalassarche melanophris'},
555+ 'Jeletzkytes criptonodosus': {'kingdom': 'Metazoa', 'family': 'Scaphitidae', 'order': 'Ammonoidea', 'phylum': 'Mollusca', 'provider': 'PBDB', 'species': 'Jeletzkytes criptonodosus', 'class': 'Cephalopoda'}}
556+ taxonomy = load_taxonomy(csv_file)
557+ self.maxDiff = None
558+
559+ self.assertDictEqual(taxonomy, expected)
560+
561 def test_name_tree(self):
562 XML = etree.tostring(etree.parse('data/input/single_source_no_names.phyml',parser),pretty_print=True)
563 xml_root = _parse_xml(XML)
564
565=== modified file 'stk/test/_trees.py'
566--- stk/test/_trees.py 2014-06-09 15:36:19 +0000
567+++ stk/test/_trees.py 2015-03-26 09:59:01 +0000
568@@ -6,7 +6,7 @@
569 from stk.supertree_toolkit import import_tree, obtain_trees, get_all_taxa, _assemble_tree_matrix, create_matrix, _delete_taxon, _sub_taxon,_tree_contains
570 from stk.supertree_toolkit import _swap_tree_in_XML, substitute_taxa, get_taxa_from_tree, get_characters_from_tree, amalgamate_trees, _uniquify
571 from stk.supertree_toolkit import import_trees, import_tree, _trees_equal, _find_trees_for_permuting, permute_tree, get_all_source_names, _getTaxaFromNewick
572-
573+from stk.supertree_toolkit import get_mrca
574 import os
575 from lxml import etree
576 from util import *
577@@ -33,8 +33,8 @@
578 self.assert_(e_tree == tree)
579
580 def test_import_tutorial_tree(self):
581- test_file = "../../doc/tutorial/Cebezas_etal_tree1.tre"
582- e_tree = "(Onconida_alaini, ((Paramunida_granulata, ((Paramunida_pronoe, ((Paramunida_thalie, (Paramunida_pictura, Paramunida_labis)), (Paramunida_luminata, (Paramunida_belone, (Paramunida_salai, Paramunida_lophia))))), (Paramunida_stichas, Paramunida_proxima))), (Plesionida_concava, Plesionida_aliena)));"
583+ test_file = "../../doc/tutorial/5.3_DataEntry/HallThatje_2009.tre"
584+ e_tree = "((Aegla_sp., (Pagurus_bernhardus, Pagurus_hirsutiusculus)), (((Cryptolithodes_sitchensis, Cryptolithodes_typicus), (Phyllolithodes_papillosus, (Lopholithodes_mandtii, (Glyptolithodes_cristatipes, (Paralomis_formosa, Paralomis_spinosissima))), (Neolithodes_brodiei, (Paralithodes_camtschaticus, Paralithodes_brevipes), (Lithodes_confundens, Lithodes_ferox)))), (Oedignathus_inermis, (Hapalogaster_dentata, Hapalogaster_mertensii))));"
585 tree = import_tree(test_file)
586 self.assert_(e_tree == tree)
587
588@@ -209,6 +209,12 @@
589
590 class TestTreeManipulation(unittest.TestCase):
591
592+
593+ def test_get_mrca(self):
594+ tree = "(B,(C,(D,(E,((A,F),((I,(G,H)),(J,(K,L))))))));"
595+ mrca = get_mrca(tree,["A","I", "L"])
596+ self.assert_(mrca == 8)
597+
598 def test_get_all_trees(self):
599 XML = etree.tostring(etree.parse(single_source_input,parser),pretty_print=True)
600 tree = obtain_trees(XML)
601
602=== added file 'stk/test/data/input/create_taxonomy.csv'
603--- stk/test/data/input/create_taxonomy.csv 1970-01-01 00:00:00 +0000
604+++ stk/test/data/input/create_taxonomy.csv 2015-03-26 09:59:01 +0000
605@@ -0,0 +1,6 @@
606+OTU,species,genus,family,superfamily,infraorder,suborder,order,superorder,subclass,class,subphylum,phylum,superphylum,infrakingdom,subkingdom,kingdom,Data source
607+Archaeopteryx lithographica,-,Archaeopteryx,-,-,-,Coelurosauria,-,-,Tetrapodomorpha,Aves,-,-,-,-,Metazoa,-,Paleobiology Database
608+Thalassarche melanophris,Thalassarche melanophris,Thalassarche,Diomedeidae,-,-,-,Procellariiformes,-,-,Aves,Vertebrata,Chordata,-,Deuterostomia,Bilateria,Animalia,Species 2000 & ITIS Catalogue of Life: April 2013
609+Egretta tricolor,Egretta tricolor,Egretta,Ardeidae,-,-,Ischnochitonina,Pelecaniformes,-,Neoloricata,Aves,-,Chordata,Lophozoa,Protostomia,Bilateria,Animalia,Species 2000 & ITIS Catalogue of Life: April 2013
610+Gallus gallus,Gallus gallus,Gallus,Phasianidae,-,-,-,Galliformes,-,-,Aves,-,Chordata,Lophozoa,Protostomia,Bilateria,Animalia,Species 2000 & ITIS Catalogue of Life: April 2013
611+Jeletzkytes criptonodosus,Jeletzkytes criptonodosus,-,Scaphitidae,-,-,-,Ammonoidea,-,-,Cephalopoda,-,Mollusca,-,-,-,Metazoa,PBDB

Subscribers

People subscribed via source and target branches

to all changes: