Merge lp:~therp-nl/openupgrade-server/7.0-address_deduplication into lp:openupgrade-server

Proposed by Stefan Rijnhart (Opener)
Status: Work in progress
Proposed branch: lp:~therp-nl/openupgrade-server/7.0-address_deduplication
Merge into: lp:openupgrade-server
Diff against target: 104 lines (+35/-4)
1 file modified
openerp/addons/base/migrations/7.0.1.3/post-migration.py (+35/-4)
To merge this branch: bzr merge lp:~therp-nl/openupgrade-server/7.0-address_deduplication
Reviewer Review Type Date Requested Status
OpenUpgrade Committers Pending
Review via email: mp+180517@code.launchpad.net

Description of the change

Don't merge, this is a tentative/optional change.

Due to the way that one of our customers imported their addresses, their database contained a large amount of address duplicates. In the new partner model, duplicate addresses are very annoying. This branch performs a simple deduplication, which is quite transparent within the address-to-partner mechanism.

I don't think this is very useful for general usage though, so setting to work in progress.

To post a comment you must log in.

Unmerged revisions

4628. By Stefan Rijnhart (Opener)

[IMP] Perform address deduplication

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'openerp/addons/base/migrations/7.0.1.3/post-migration.py'
--- openerp/addons/base/migrations/7.0.1.3/post-migration.py 2013-07-25 06:42:40 +0000
+++ openerp/addons/base/migrations/7.0.1.3/post-migration.py 2013-08-16 10:40:28 +0000
@@ -19,6 +19,7 @@
19#19#
20##############################################################################20##############################################################################
2121
22import logging
22from openupgrade import openupgrade23from openupgrade import openupgrade
23from openerp import pooler, SUPERUSER_ID24from openerp import pooler, SUPERUSER_ID
2425
@@ -76,8 +77,14 @@
76 'mobile', 'phone', 'state_id', 'street', 'street2', 'type', 'zip',77 'mobile', 'phone', 'state_id', 'street', 'street2', 'type', 'zip',
77 'partner_id', 'name',78 'partner_id', 'name',
78 ]79 ]
80 dedup_fields = [
81 'birthdate', 'city', 'country_id', 'email', 'fax', 'function',
82 'mobile', 'phone', 'state_id', 'street', 'street2', 'zip',
83 ]
79 partner_found = []84 partner_found = []
80 processed_ids = []85 processed_ids = []
86 reprs = {}
87 logger = logging.getLogger('OpenUpgrade.base')
8188
82 def set_address_partner(address_id, partner_id):89 def set_address_partner(address_id, partner_id):
83 cr.execute(90 cr.execute(
@@ -93,17 +100,22 @@
93 already in vals. Register the created partner_id100 already in vals. Register the created partner_id
94 on the obsolete address table101 on the obsolete address table
95 """102 """
103
96 for key in defaults:104 for key in defaults:
97 if key not in vals:105 if key not in vals:
98 vals[key] = defaults[key]106 vals[key] = defaults[key]
99107
100 partner_id = partner_obj.create(cr, SUPERUSER_ID, vals)108 partner_id = partner_obj.create(cr, SUPERUSER_ID, vals)
101 set_address_partner(address_id, partner_id)109 set_address_partner(address_id, partner_id)
110 return partner_id
102111
103 def process_address_type(cr, whereclause, args=None):112 def process_address_type(cr, whereclause, args=None):
104 """113 """
105 Migrate addresses to partners, based on sql WHERE clause114 Migrate addresses to partners, based on sql WHERE clause
115
116 :return: number of squashed duplicates
106 """117 """
118 duplicates = 0
107 cr.execute(119 cr.execute(
108 "SELECT " + ', '.join(fields) + " FROM res_partner_address "120 "SELECT " + ', '.join(fields) + " FROM res_partner_address "
109 "WHERE " + whereclause, args or ())121 "WHERE " + whereclause, args or ())
@@ -126,31 +138,50 @@
126 # not supplier and not customer138 # not supplier and not customer
127 create_partner(address['id'], partner_vals, partner_defaults)139 create_partner(address['id'], partner_vals, partner_defaults)
128 else:140 else:
141
142 # Create duplication key
143 representation = unicode(
144 [address[field] for field in dedup_fields])
145 # Squash duplicates
146 if representation in reprs.get(address['partner_id'], {}):
147 set_address_partner(address['id'], reprs[address['partner_id']][representation])
148 duplicates += 1
149 continue
129 if address['partner_id'] not in partner_found:150 if address['partner_id'] not in partner_found:
130 # Main partner address151 # Main partner address
131 partner_obj.write(152 partner_obj.write(
132 cr, SUPERUSER_ID, address['partner_id'], partner_vals)153 cr, SUPERUSER_ID, address['partner_id'], partner_vals)
133 partner_found.append(address['partner_id'])154 partner_found.append(address['partner_id'])
155 partner_id = address['partner_id']
134 set_address_partner(address['id'], address['partner_id'])156 set_address_partner(address['id'], address['partner_id'])
135 else:157 else:
136 # any following address for an existing partner158 # any following address for an existing partner
137 partner_vals.update({159 partner_vals.update({
138 'is_company': False,160 'is_company': False,
139 'parent_id': address['partner_id']})161 'parent_id': address['partner_id']})
140 create_partner(162 partner_id = create_partner(
141 address['id'], partner_vals, partner_defaults)163 address['id'], partner_vals, partner_defaults)
164
165 # Fill the deduplication search space with default addresses
166 # or the else the first address found
167 if (not reprs.get(address['partner_id'])
168 or not address['type'] or address['type'] == 'default'):
169 reprs.setdefault(address['partner_id'], {})[representation] = partner_id
170
142 processed_ids.append(address['id'])171 processed_ids.append(address['id'])
172 return duplicates
143173
144 # Process all addresses, default type first 174 # Process all addresses, default type first
145 process_address_type(cr, "type = 'default'")175 duplicates = process_address_type(cr, "type = 'default'")
146 process_address_type(cr, "type IS NULL OR type = ''")176 duplicates += process_address_type(cr, "type IS NULL OR type = ''")
147 process_address_type(cr, "id NOT IN %s", (tuple(processed_ids),))177 duplicates += process_address_type(cr, "id NOT IN %s", (tuple(processed_ids),))
148178
149 # Check that all addresses have been migrated179 # Check that all addresses have been migrated
150 cr.execute(180 cr.execute(
151 "SELECT COUNT(*) FROM res_partner_address "181 "SELECT COUNT(*) FROM res_partner_address "
152 "WHERE openupgrade_7_migrated_to_partner_id is NULL ")182 "WHERE openupgrade_7_migrated_to_partner_id is NULL ")
153 assert(not cr.fetchone()[0])183 assert(not cr.fetchone()[0])
184 logger.debug('Squashed %s address duplicates', duplicates)
154185
155def update_users_partner(cr, pool):186def update_users_partner(cr, pool):
156 """ 187 """

Subscribers

People subscribed via source and target branches