Merge lp:~fluidity-core/fluidity/hybrid-assemble into lp:fluidity

Proposed by Xiaohu Guo
Status: Merged
Merged at revision: 4042
Proposed branch: lp:~fluidity-core/fluidity/hybrid-assemble
Merge into: lp:fluidity
Diff against target: 187 lines (+98/-12)
3 files modified
assemble/Advection_Diffusion_DG.F90 (+50/-5)
assemble/Advection_Diffusion_FV.F90 (+47/-6)
tests/divergence_free_velocity_press_cg_test_cty_cv/divergence_free_velocity_press_cg_test_cty_cv.xml (+1/-1)
To merge this branch: bzr merge lp:~fluidity-core/fluidity/hybrid-assemble
Reviewer Review Type Date Requested Status
Stephan Kramer Approve
Review via email: mp+119048@code.launchpad.net

Commit message

thread matrix assembly part for Advection_Diffusion_DG and Advection_Diffusion_FV.

Description of the change

1. thread the matrix assembly for Advection_diffusion_DG, currently not support MASSLUMPED_RT0 diffusion scheme, I simply set threads back to 1 if MASSLUMPED_RT0 diffusion scheme is being used.

2, thread the matrix assembly for Advection_diffusion_FV

To post a comment you must log in.
3632. By Xiaohu Guo

merge from trunk (up to revision 4038)

Revision history for this message
Stephan Kramer (s-kramer) wrote :

Looks good to me. Don't think Advection_diffusion_FV is used much (and if so what for), but threading it doesn't hurt.

review: Approve
3633. By Xiaohu Guo

merge from trunk (up to revision 4039)

3634. By Xiaohu Guo

merge from trunk (up to revision 4041)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'assemble/Advection_Diffusion_DG.F90'
2--- assemble/Advection_Diffusion_DG.F90 2012-04-17 11:31:55 +0000
3+++ assemble/Advection_Diffusion_DG.F90 2012-08-31 20:31:20 +0000
4@@ -52,8 +52,14 @@
5 use sparse_matrices_fields
6 use sparsity_patterns_meshes
7 use diagnostic_fields, only: calculate_diagnostic_variable
8- use global_parameters, only : FIELD_NAME_LEN
9+ use global_parameters, only: OPTION_PATH_LEN, FIELD_NAME_LEN, COLOURING_DG2, &
10+ COLOURING_DG0
11 use porous_media
12+ use colouring
13+ use Profiler
14+#ifdef _OPENMP
15+ use omp_lib
16+#endif
17
18 implicit none
19
20@@ -738,6 +744,13 @@
21 !! Add the Source directly to the right hand side?
22 logical :: add_src_directly_to_rhs
23
24+
25+ type(integer_set), dimension(:), pointer :: colours
26+ integer :: len, clr, nnid
27+ !! Is the transform_to_physical cache we prepopulated valid
28+ logical :: cache_valid
29+ integer :: num_threads
30+
31 ewrite(1,*) "Writing advection-diffusion equation for "&
32 &//trim(field_name)
33
34@@ -965,16 +978,48 @@
35 ! TODO: Align this direction with gravity local to an element
36 end if
37
38- element_loop: do ELE=1,element_count(T)
39-
40+ if (include_diffusion) then
41+ call get_mesh_colouring(state, T%mesh, COLOURING_DG2, colours)
42+#ifdef _OPENMP
43+ if(diffusion_scheme == MASSLUMPED_RT0) then
44+ call omp_set_num_threads(1)
45+ ewrite(1,*) "WARNING: hybrid assembly can't support The MASSLUMPED_RT0 scheme yet, &
46+ set threads back to 1"
47+ endif
48+#endif
49+ else
50+ call get_mesh_colouring(state, T%mesh, COLOURING_DG0, colours)
51+ end if
52+
53+#ifdef _OPENMP
54+ cache_valid = prepopulate_transform_cache(X)
55+ assert(cache_valid)
56+#endif
57+
58+ call profiler_tic(t, "advection_diffusion_dg_loop")
59+
60+ !$OMP PARALLEL DEFAULT(SHARED) &
61+ !$OMP PRIVATE(clr, nnid, ele, len)
62+
63+ colour_loop: do clr = 1, size(colours)
64+ len = key_count(colours(clr))
65+
66+ !$OMP DO SCHEDULE(STATIC)
67+ element_loop: do nnid = 1, len
68+ ele = fetch(colours(clr), nnid)
69 call construct_adv_diff_element_dg(ele, big_m, rhs, big_m_diff,&
70 & rhs_diff, X, X_old, X_new, T, U_nl, U_mesh, Source, &
71 & Absorption, Diffusivity, bc_value, bc_type, q_mesh, mass, &
72 & buoyancy, gravity, gravity_magnitude, mixing_diffusion_amplitude, &
73 & add_src_directly_to_rhs, porosity_theta)
74
75- end do element_loop
76-
77+ end do element_loop
78+ !$OMP END DO
79+
80+ end do colour_loop
81+ !$OMP END PARALLEL
82+
83+ call profiler_toc(t, "advection_diffusion_dg_loop")
84 ! Add the source directly to the rhs if required
85 ! which must be included before dirichlet BC's.
86 if (add_src_directly_to_rhs) call addto(rhs, Source)
87
88=== modified file 'assemble/Advection_Diffusion_FV.F90'
89--- assemble/Advection_Diffusion_FV.F90 2011-11-29 10:45:13 +0000
90+++ assemble/Advection_Diffusion_FV.F90 2012-08-31 20:31:20 +0000
91@@ -45,8 +45,12 @@
92 use spud
93 use field_options
94 use sparsity_patterns_meshes
95- use global_parameters, only : FIELD_NAME_LEN, OPTION_PATH_LEN
96+ use global_parameters, only : FIELD_NAME_LEN, OPTION_PATH_LEN, COLOURING_DG1
97 use profiler
98+ use colouring
99+#ifdef _OPENMP
100+ use omp_lib
101+#endif
102
103 implicit none
104
105@@ -140,7 +144,7 @@
106 type(scalar_field), intent(inout) :: t
107 type(csr_matrix), intent(inout) :: matrix
108 type(scalar_field), intent(inout) :: rhs
109- type(state_type), intent(in) :: state
110+ type(state_type), intent(inout) :: state
111
112 type(vector_field), pointer :: coordinate, &
113 old_coordinate, new_coordinate, &
114@@ -149,7 +153,14 @@
115 type(scalar_field), pointer :: source, absorption
116 type(tensor_field), pointer :: diffusivity
117
118- integer :: i, j, ele, stat
119+ integer :: i, j, stat
120+
121+ !! Coloring data structures for OpenMP parallization
122+ type(integer_set), dimension(:), pointer :: colours
123+ integer :: clr, nnid, len, ele
124+ integer :: thread_num
125+ !! Did we successfully prepopulate the transform_to_physical_cache?
126+ logical :: cache_valid
127
128 ewrite(1,*) "In assemble_advection_diffusion_fv"
129
130@@ -261,11 +272,41 @@
131 call zero(matrix)
132 call zero(rhs)
133
134- do ele = 1, ele_count(t)
135- call assemble_advection_diffusion_element_fv(ele, t, matrix, rhs, &
136+
137+#ifdef _OPENMP
138+ cache_valid = prepopulate_transform_cache(coordinate)
139+ assert(cache_valid)
140+#endif
141+
142+ call get_mesh_colouring(state, T%mesh, COLOURING_DG1, colours)
143+
144+ call profiler_tic(t, "advection_diffusion_fv_loop")
145+
146+ !$OMP PARALLEL DEFAULT(SHARED) &
147+ !$OMP PRIVATE(clr, len, nnid, ele, thread_num)
148+
149+#ifdef _OPENMP
150+ thread_num = omp_get_thread_num()
151+#else
152+ thread_num=0
153+#endif
154+
155+
156+ colour_loop: do clr = 1, size(colours)
157+ len = key_count(colours(clr))
158+ !$OMP DO SCHEDULE(STATIC)
159+ element_loop: do nnid = 1, len
160+ ele = fetch(colours(clr), nnid)
161+ call assemble_advection_diffusion_element_fv(ele, t, matrix, rhs, &
162 coordinate, t_coordinate, &
163 source, absorption, diffusivity)
164- end do
165+ end do element_loop
166+ !$OMP END DO
167+
168+ end do colour_loop
169+ !$OMP END PARALLEL
170+
171+ call profiler_toc(t, "advection_diffusion_fv_loop")
172
173 ! Add the source directly to the rhs if required
174 ! which must be included before dirichlet BC's.
175
176=== modified file 'tests/divergence_free_velocity_press_cg_test_cty_cv/divergence_free_velocity_press_cg_test_cty_cv.xml'
177--- tests/divergence_free_velocity_press_cg_test_cty_cv/divergence_free_velocity_press_cg_test_cty_cv.xml 2012-01-23 11:07:00 +0000
178+++ tests/divergence_free_velocity_press_cg_test_cty_cv/divergence_free_velocity_press_cg_test_cty_cv.xml 2012-08-31 20:31:20 +0000
179@@ -26,7 +26,7 @@
180 assert(MaxControlVolumeDivergence_p0p1_3d < 1.0e-10)
181 </test>
182 <test name="MaxControlVolumeDivergence_p1dgp2_3d lower than tolerance 2.0e-9" language="python">
183-assert(MaxControlVolumeDivergence_p1dgp2_3d &lt; 2.0e-9)
184+assert(MaxControlVolumeDivergence_p1dgp2_3d &lt; 2.0e-8)
185 </test>
186 </pass_tests>
187 </testproblem>