Merge lp:~therp-nl/openupgrade-server/7.0-address_deduplication into lp:openupgrade-server

Proposed by Stefan Rijnhart (Opener)
Status: Work in progress
Proposed branch: lp:~therp-nl/openupgrade-server/7.0-address_deduplication
Merge into: lp:openupgrade-server
Diff against target: 104 lines (+35/-4)
1 file modified
openerp/addons/base/migrations/7.0.1.3/post-migration.py (+35/-4)
To merge this branch: bzr merge lp:~therp-nl/openupgrade-server/7.0-address_deduplication
Reviewer Review Type Date Requested Status
OpenUpgrade Committers Pending
Review via email: mp+180517@code.launchpad.net

Description of the change

Don't merge, this is a tentative/optional change.

Due to the way that one of our customers imported their addresses, their database contained a large amount of address duplicates. In the new partner model, duplicate addresses are very annoying. This branch performs a simple deduplication, which is quite transparent within the address-to-partner mechanism.

I don't think this is very useful for general usage though, so setting to work in progress.

To post a comment you must log in.

Unmerged revisions

4628. By Stefan Rijnhart (Opener)

[IMP] Perform address deduplication

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'openerp/addons/base/migrations/7.0.1.3/post-migration.py'
2--- openerp/addons/base/migrations/7.0.1.3/post-migration.py 2013-07-25 06:42:40 +0000
3+++ openerp/addons/base/migrations/7.0.1.3/post-migration.py 2013-08-16 10:40:28 +0000
4@@ -19,6 +19,7 @@
5 #
6 ##############################################################################
7
8+import logging
9 from openupgrade import openupgrade
10 from openerp import pooler, SUPERUSER_ID
11
12@@ -76,8 +77,14 @@
13 'mobile', 'phone', 'state_id', 'street', 'street2', 'type', 'zip',
14 'partner_id', 'name',
15 ]
16+ dedup_fields = [
17+ 'birthdate', 'city', 'country_id', 'email', 'fax', 'function',
18+ 'mobile', 'phone', 'state_id', 'street', 'street2', 'zip',
19+ ]
20 partner_found = []
21 processed_ids = []
22+ reprs = {}
23+ logger = logging.getLogger('OpenUpgrade.base')
24
25 def set_address_partner(address_id, partner_id):
26 cr.execute(
27@@ -93,17 +100,22 @@
28 already in vals. Register the created partner_id
29 on the obsolete address table
30 """
31+
32 for key in defaults:
33 if key not in vals:
34 vals[key] = defaults[key]
35
36 partner_id = partner_obj.create(cr, SUPERUSER_ID, vals)
37 set_address_partner(address_id, partner_id)
38+ return partner_id
39
40 def process_address_type(cr, whereclause, args=None):
41 """
42 Migrate addresses to partners, based on sql WHERE clause
43+
44+ :return: number of squashed duplicates
45 """
46+ duplicates = 0
47 cr.execute(
48 "SELECT " + ', '.join(fields) + " FROM res_partner_address "
49 "WHERE " + whereclause, args or ())
50@@ -126,31 +138,50 @@
51 # not supplier and not customer
52 create_partner(address['id'], partner_vals, partner_defaults)
53 else:
54+
55+ # Create duplication key
56+ representation = unicode(
57+ [address[field] for field in dedup_fields])
58+ # Squash duplicates
59+ if representation in reprs.get(address['partner_id'], {}):
60+ set_address_partner(address['id'], reprs[address['partner_id']][representation])
61+ duplicates += 1
62+ continue
63 if address['partner_id'] not in partner_found:
64 # Main partner address
65 partner_obj.write(
66 cr, SUPERUSER_ID, address['partner_id'], partner_vals)
67 partner_found.append(address['partner_id'])
68+ partner_id = address['partner_id']
69 set_address_partner(address['id'], address['partner_id'])
70 else:
71 # any following address for an existing partner
72 partner_vals.update({
73 'is_company': False,
74 'parent_id': address['partner_id']})
75- create_partner(
76+ partner_id = create_partner(
77 address['id'], partner_vals, partner_defaults)
78+
79+ # Fill the deduplication search space with default addresses
80+ # or the else the first address found
81+ if (not reprs.get(address['partner_id'])
82+ or not address['type'] or address['type'] == 'default'):
83+ reprs.setdefault(address['partner_id'], {})[representation] = partner_id
84+
85 processed_ids.append(address['id'])
86+ return duplicates
87
88 # Process all addresses, default type first
89- process_address_type(cr, "type = 'default'")
90- process_address_type(cr, "type IS NULL OR type = ''")
91- process_address_type(cr, "id NOT IN %s", (tuple(processed_ids),))
92+ duplicates = process_address_type(cr, "type = 'default'")
93+ duplicates += process_address_type(cr, "type IS NULL OR type = ''")
94+ duplicates += process_address_type(cr, "id NOT IN %s", (tuple(processed_ids),))
95
96 # Check that all addresses have been migrated
97 cr.execute(
98 "SELECT COUNT(*) FROM res_partner_address "
99 "WHERE openupgrade_7_migrated_to_partner_id is NULL ")
100 assert(not cr.fetchone()[0])
101+ logger.debug('Squashed %s address duplicates', duplicates)
102
103 def update_users_partner(cr, pool):
104 """

Subscribers

People subscribed via source and target branches