Merge ~pguimaraes/maas:kvm-overcommit-fix-lp1842896 into ~blake-rouse/maas:fix-1843493

Proposed by Pedro Guimarães
Status: Rejected
Rejected by: Blake Rouse
Proposed branch: ~pguimaraes/maas:kvm-overcommit-fix-lp1842896
Merge into: ~blake-rouse/maas:fix-1843493
Diff against target: 110 lines (+72/-2)
2 files modified
src/maasserver/forms/pods.py (+55/-2)
src/maasserver/models/bmc.py (+17/-0)
Reviewer Review Type Date Requested Status
Blake Rouse Pending
Review via email: mp+372808@code.launchpad.net

Commit message

Fixes LP: #1842896. Following changes made on Pods form and model:
* created machine inherits Pod zone instead of Default value, since we are filtering Pods by zone just before calling get_allocated_composed_machine
* Pod model's create_machine now checks for overcommit ratios just before running the creation process, failing with PodProblem if overcommit limits arenot respected
* create_and_sync runs create_machine within transactional() method to avoid concurrency where multiple allocate requests ends up landing on same Pod and not respecting overcommit limits for that Pod

Description of the change

Fixes LP: #1842896. Following changes made on Pods form and model:
* created machine inherits Pod zone instead of Default value, since we are filtering Pods by zone just before calling get_allocated_composed_machine
* Pod model's create_machine now checks for overcommit ratios just before running the creation process, failing with PodProblem if overcommit limits arenot respected
* create_and_sync runs create_machine within transactional() method to avoid concurrency where multiple allocate requests ends up landing on same Pod and not respecting overcommit limits for that Pod

To post a comment you must log in.
Revision history for this message
Blake Rouse (blake-rouse) wrote :

I am actively working on proper fix for this, jsyk

Unmerged commits

8aad7b4... by Pedro Guimarães

Fixes LP: #1842896. Following changes made on Pods form and model:
* created machine inherits Pod zone instead of Default value, since we are filtering Pods by zone just before calling get_allocated_composed_machine
* Pod model's create_machine now checks for overcommit ratios just before running the creation process, failing with PodProblem if overcommit limits arenot respected
* create_and_sync runs create_machine within transactional() method to avoid concurrency where multiple allocate requests ends up landing on same Pod and not respecting overcommit limits for that Pod

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
diff --git a/src/maasserver/forms/pods.py b/src/maasserver/forms/pods.py
index 85c1da2..54dc71f 100644
--- a/src/maasserver/forms/pods.py
+++ b/src/maasserver/forms/pods.py
@@ -443,7 +443,7 @@ class ComposeMachineForm(forms.Form):
443 self.initial['domain'] = Domain.objects.get_default_domain()443 self.initial['domain'] = Domain.objects.get_default_domain()
444 self.fields['zone'] = ModelChoiceField(444 self.fields['zone'] = ModelChoiceField(
445 required=False, queryset=Zone.objects.all())445 required=False, queryset=Zone.objects.all())
446 self.initial['zone'] = Zone.objects.get_default_zone()446 self.initial['zone'] = self.pod.zone
447 self.fields['pool'] = ModelChoiceField(447 self.fields['pool'] = ModelChoiceField(
448 required=False, queryset=ResourcePool.objects.all())448 required=False, queryset=ResourcePool.objects.all())
449 self.initial['pool'] = self.pod.pool449 self.initial['pool'] = self.pod.pool
@@ -639,6 +639,59 @@ class ComposeMachineForm(forms.Form):
639 self.pod.sync_hints(pod_hints)639 self.pod.sync_hints(pod_hints)
640 return created_machine640 return created_machine
641641
642 def create_and_sync_nonIOThread(result, post_pod_sync=False):
643 requested_machine, result = result
644 discovered_machine, pod_hints = result
645
646 @asynchronous
647 def wrap_create_machine(func,
648 discovered_machine,
649 commissioning_user,
650 skip_commissioning,
651 creation_type,
652 interfaces,requested_machine,
653 domain,pool,zone):
654 d = deferToDatabase(transactional(
655 partial(
656 func,
657 discovered_machine=discovered_machine,
658 commissioning_user=commissioning_user,
659 skip_commissioning=skip_commissioning,
660 creation_type=creation_type,
661 interfaces=interfaces,
662 requested_machine=requested_machine,
663 domain=domain,
664 pool=pool,
665 zone=zone,
666 )
667 ))
668 return d
669 try:
670 created_machine = wrap_create_machine(
671 self.pod.create_machine,
672 discovered_machine=discovered_machine,
673 commissioning_user=self.request.user,
674 skip_commissioning=skip_commissioning,
675 creation_type=creation_type,
676 interfaces=self.get_value_for('interfaces'),
677 requested_machine=requested_machine,
678 domain=self.get_value_for('domain'),
679 pool=self.get_value_for('pool'),
680 zone=self.get_value_for('zone'),
681 ).wait(timeout)
682 except crochet.TimeoutError:
683 log.debug("Compose: wrap_create_machine timed out")
684 raise Exception(
685 "Unable to compose a machine because database driver "
686 "timed out after %d seconds." % (
687 timeout))
688
689 post_commit_do(
690 deferToDatabase,
691 transactional(self.pod.sync_hints), pod_hints)
692 log.info("Composition: Result received is: {}".format(created_machine))
693 return created_machine
694
642 @inlineCallbacks695 @inlineCallbacks
643 def async_compose_machine(696 def async_compose_machine(
644 result, power_type, power_paramaters, **kwargs):697 result, power_type, power_paramaters, **kwargs):
@@ -706,7 +759,7 @@ class ComposeMachineForm(forms.Form):
706 "timed out after %d seconds." % (759 "timed out after %d seconds." % (
707 self.pod.power_type, timeout))760 self.pod.power_type, timeout))
708 try:761 try:
709 return create_and_sync(762 return create_and_sync_nonIOThread(
710 (requested_machine, result), post_pod_sync=True)763 (requested_machine, result), post_pod_sync=True)
711 except Exception:764 except Exception:
712 # Issue saving the machine into the database. The composed765 # Issue saving the machine into the database. The composed
diff --git a/src/maasserver/models/bmc.py b/src/maasserver/models/bmc.py
index 5b76b6c..b312dbf 100644
--- a/src/maasserver/models/bmc.py
+++ b/src/maasserver/models/bmc.py
@@ -783,6 +783,23 @@ class Pod(BMC):
783 skip_commissioning=False,783 skip_commissioning=False,
784 creation_type=NODE_CREATION_TYPE.PRE_EXISTING, interfaces=None,784 creation_type=NODE_CREATION_TYPE.PRE_EXISTING, interfaces=None,
785 requested_machine=None, **kwargs):785 requested_machine=None, **kwargs):
786
787 # TODO: Both check_over_commit_ratios and following code to create
788 # machine should be run within same DB atomic transaction.
789 # that will ensure that a single thread is adding machines
790 # to this KVM host.
791 # TODO action is to try detect if we're under
792 # transaction.atomic()
793 # if not, generate LOG warning in case with:
794 # "models.bmc.pod.create_machine should always run within
795 # transaction.atomic(): <ADD STACK TRACE>"
796 msg = self.check_over_commit_ratios(discovered_machine.cores,
797 discovered_machine.memory)
798 if msg:
799 raise PodProblem(
800 "Unable to compose KVM instance in '%s'. %s" % (
801 self.name, msg))
802
786 """Create's a `Machine` from `discovered_machines` for this pod."""803 """Create's a `Machine` from `discovered_machines` for this pod."""
787 if skip_commissioning:804 if skip_commissioning:
788 status = NODE_STATUS.READY805 status = NODE_STATUS.READY

Subscribers

People subscribed via source and target branches

to all changes: