Merge lp:~nickpapior/siesta/4.1-phases into lp:siesta/4.1
- 4.1-phases
- Merge into rel-4.1
Proposed by
Nick Papior
Status: | Merged |
---|---|
Merged at revision: | 894 |
Proposed branch: | lp:~nickpapior/siesta/4.1-phases |
Merge into: | lp:siesta/4.1 |
Diff against target: |
548 lines (+97/-129) (has conflicts) 8 files modified
Src/pdos2g.F (+8/-15) Src/pdos2k.F (+24/-30) Src/pdos3g.F (+6/-14) Src/pdos3k.F (+23/-29) Src/pdosg.F (+2/-3) Src/pdosk.F (+15/-17) Src/pdoskp.F (+15/-21) version.info (+4/-0) Text conflict in version.info |
To merge this branch: | bzr merge lp:~nickpapior/siesta/4.1-phases |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Alberto Garcia | Pending | ||
Review via email: mp+343468@code.launchpad.net |
Commit message
Clarified phase usages in pdos codes.
Now comments explain why the pdos*k routines used a different sign in the S(k) generation when performing the matrix multiplications.
I.e. this branch introduces nothing but clarified code and slight performance increase due to removal of a small array and lots of copying.
Description of the change
To post a comment you must log in.
lp:~nickpapior/siesta/4.1-phases
updated
- 895. By Nick Papior
-
Amended pdoskp for the details outlined in the previous two commits
Now the phases are well explained in all pdos routines.
Revision history for this message
Alberto Garcia (albertog) wrote : | # |
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'Src/pdos2g.F' |
2 | --- Src/pdos2g.F 2017-09-27 13:56:46 +0000 |
3 | +++ Src/pdos2g.F 2018-04-18 08:41:10 +0000 |
4 | @@ -44,7 +44,7 @@ |
5 | C **** AUXILIARY ***************************************************** |
6 | C real*8 haux(nuo,nuo) : Auxiliary space for the hamiltonian matrix |
7 | C real*8 saux(nuo,nuo) : Auxiliary space for the overlap matrix |
8 | -C real*8 psi(nuo,nuo) : Auxiliary space for the eigenvectors |
9 | +C complex*16 psi(2,nuo,nuo) : Auxiliary space for the eigenvectors |
10 | C **** OUTPUT ******************************************************** |
11 | C real*8 dtot(nhist,4) : Total density of states |
12 | C real*8 dpr(nhist,nuo,4): Proyected density of states |
13 | @@ -71,10 +71,11 @@ |
14 | |
15 | real(dp) |
16 | . H(maxnh,4), S(maxnh), E1, E2, sigma, eo(maxo*2), |
17 | - . psi(2,2,nuotot,2*nuo), dtot(nhist,4), dpr(nhist,nuotot,4) |
18 | - |
19 | - complex(dp) |
20 | - . Haux(2,nuotot,2,nuo), Saux(2,nuotot,2,nuo), caux(2,nuotot) |
21 | + . dtot(nhist,4), dpr(nhist,nuotot,4) |
22 | + complex(dp), target :: psi(2,nuotot,2*nuo) |
23 | + |
24 | + complex(dp) Haux(2,nuotot,2,nuo), Saux(2,nuotot,2,nuo) |
25 | + complex(dp), pointer :: caux(:,:) |
26 | external cdiag |
27 | |
28 | C Internal variables --------------------------------------------------- |
29 | @@ -181,11 +182,7 @@ |
30 | diff = (ener - eo(ibandg))**2 / (sigma ** 2) |
31 | if (diff .gt. 15.0d0) cycle |
32 | gauss = exp(-diff) |
33 | - caux(:,:) = dcmplx(0.0_dp,0.0_dp) |
34 | - do j=1,nuotot |
35 | - caux(1,j) = dcmplx(psi(1,1,j,iband),psi(2,1,j,iband)) ! c_{j,up) |
36 | - caux(2,j) = dcmplx(psi(1,2,j,iband),psi(2,2,j,iband)) ! c_{j,down) |
37 | - enddo |
38 | + caux => psi(:,:,iband) ! c_{up,j}, c_{down,j} |
39 | do jo = 1, Bnuo |
40 | call LocalToGlobalOrb(jo,BNode,Nodes,juo) |
41 | do io = 1, nuotot |
42 | @@ -218,11 +215,7 @@ |
43 | diff = (ener - eo(iband))**2 / (sigma ** 2) |
44 | if (diff .gt. 15.0d0) cycle |
45 | gauss = exp(-diff) |
46 | - caux(:,:) = dcmplx(0.0_dp,0.0_dp) |
47 | - do j=1,nuotot |
48 | - caux(1,j) = dcmplx(psi(1,1,j,iband),psi(2,1,j,iband)) ! c_{j,up) |
49 | - caux(2,j) = dcmplx(psi(1,2,j,iband),psi(2,2,j,iband)) ! c_{j,down) |
50 | - enddo |
51 | + caux => psi(:,:,iband) ! c_{up,j}, c_{down,j} |
52 | do jo = 1, nuotot |
53 | do io = 1, nuotot |
54 | rpipj = dreal(caux(1,io)*dconjg(caux(1,jo)))*gauss |
55 | |
56 | === modified file 'Src/pdos2k.F' |
57 | --- Src/pdos2k.F 2017-09-27 13:56:46 +0000 |
58 | +++ Src/pdos2k.F 2018-04-18 08:41:10 +0000 |
59 | @@ -48,7 +48,7 @@ |
60 | C **** AUXILIARY ***************************************************** |
61 | C REAL*8 HAUX(2,NUO,NUO) : Auxiliary space for the hamiltonian matrix |
62 | C REAL*8 SAUX(2,NUO,NUO) : Auxiliary space for the overlap matrix |
63 | -C REAL*8 PSI(2,NUO,NUO) : Auxiliary space for the eigenvectors |
64 | +C COMPLEX*16 PSI(2,NUO,NUO) : Auxiliary space for the eigenvectors |
65 | C **** OUTPUT ******************************************************** |
66 | C REAL*8 DTOT(NHIST,4) : Total density of states |
67 | C REAL*8 DPR(NHIST,NUO,4): Proyected density of states |
68 | @@ -75,11 +75,11 @@ |
69 | real(dp) |
70 | . H(MAXNH,4), S(MAXNH), E1, E2, SIGMA, |
71 | . XIJ(3,MAXNH), KPOINT(3,NK), EO(MAXO*2,NK), |
72 | - . DTOT(NHIST,4), DPR(NHIST,NUOTOT,4), WK(NK), |
73 | - . psi(2,2,nuotot,2*nuo) |
74 | + . DTOT(NHIST,4), DPR(NHIST,NUOTOT,4), WK(NK) |
75 | + complex(dp), target :: psi(2,nuotot,2*nuo) |
76 | |
77 | - complex(dp) Haux(2,nuotot,2,nuo), Saux(2,nuotot,2,nuo), |
78 | - . caux(2,nuotot) |
79 | + complex(dp) Haux(2,nuotot,2,nuo), Saux(2,nuotot,2,nuo) |
80 | + complex(dp), pointer :: caux(:,:) |
81 | |
82 | complex(dp) kphs |
83 | |
84 | @@ -94,8 +94,7 @@ |
85 | real(dp) |
86 | . KXIJ, CKXIJ, SKXIJ, DELTA, ENER, DIFF, GAUSS, NORM, WKSUM |
87 | |
88 | - complex(dp) |
89 | - . D11, D12, D21, D22 |
90 | + complex(dp) :: D11, D12, D22 |
91 | |
92 | complex(dp), pointer :: Spr(:,:) => null() |
93 | |
94 | @@ -127,7 +126,7 @@ |
95 | kxij = kpoint(1,ik) * xij(1,ind) + |
96 | . kpoint(2,ik) * xij(2,ind) + |
97 | . kpoint(3,ik) * xij(3,ind) |
98 | - kphs = cdexp(dcmplx(0.0_dp, -1.0_dp)*kxij) |
99 | + kphs = cdexp(dcmplx(0.0_dp, -kxij)) |
100 | |
101 | Saux(1,juo,1,iuo) = Saux(1,juo,1,iuo) + S(ind) * kphs |
102 | Saux(2,juo,2,iuo) = Saux(2,juo,2,iuo) + S(ind) * kphs |
103 | @@ -159,7 +158,7 @@ |
104 | kxij = kpoint(1,ik) * xij(1,ind) + |
105 | . kpoint(2,ik) * xij(2,ind) + |
106 | . kpoint(3,ik) * xij(3,ind) |
107 | - kphs = cdexp(dcmplx(0.0_dp, -1.0_dp)*kxij) |
108 | + kphs = cdexp(dcmplx(0.0_dp, -kxij)) |
109 | |
110 | Saux(1,juo,1,iuo) = Saux(1,juo,1,iuo) + S(ind) * kphs |
111 | Saux(2,juo,2,iuo) = Saux(2,juo,2,iuo) + S(ind) * kphs |
112 | @@ -188,7 +187,10 @@ |
113 | kxij = kpoint(1,ik) * xij(1,ind) + |
114 | . kpoint(2,ik) * xij(2,ind) + |
115 | . kpoint(3,ik) * xij(3,ind) |
116 | - kphs = cdexp(dcmplx(0.0_dp, 1.0_dp)*kxij) |
117 | +! Since we are doing element wise multiplications (and not dot-products) |
118 | +! we might as well setup the transpose S(k)^T == S(-k) because this will |
119 | +! mean that we can do a simpler multiplication further down |
120 | + kphs = cdexp(dcmplx(0.0_dp, kxij)) |
121 | Spr(juo,iuo) = Spr(juo,iuo) + S(ind) * kphs |
122 | enddo |
123 | enddo |
124 | @@ -225,23 +227,19 @@ |
125 | ibandg = ibandg * 2 - mod(iband, 2) |
126 | diff = (ener - eo(ibandg,ik))**2 / (sigma ** 2) |
127 | if (diff .gt. 15.0d0) cycle |
128 | - gauss = exp(-diff) |
129 | - caux(:,:) = dcmplx(0.0_dp,0.0_dp) |
130 | - do j=1,nuotot |
131 | - caux(1,j) = dcmplx(psi(1,1,j,iband),psi(2,1,j,iband)) ! c_{j,up) |
132 | - caux(2,j) = dcmplx(psi(1,2,j,iband),psi(2,2,j,iband)) ! c_{j,down) |
133 | - enddo |
134 | + gauss = exp(-diff) * wk(ik) |
135 | + caux => psi(:,:,iband) ! c_{j,up), c_{j,down} |
136 | do jo = 1, Bnuo |
137 | call LocalToGlobalOrb(jo,BNode,Nodes,juo) |
138 | do io = 1, nuotot |
139 | D11 = caux(1,io) * dconjg(caux(1,juo)) * Sloc(io,jo) |
140 | D22 = caux(2,io) * dconjg(caux(2,juo)) * Sloc(io,jo) |
141 | D12 = caux(1,io) * dconjg(caux(2,juo)) * Sloc(io,jo) |
142 | - D21 = caux(2,io) * dconjg(caux(1,juo)) * Sloc(io,jo) |
143 | + !D21 = caux(2,io) * dconjg(caux(1,juo)) * Sloc(io,jo) |
144 | |
145 | - D11 = gauss*wk(ik)*D11 |
146 | - D22 = gauss*wk(ik)*D22 |
147 | - D12 = gauss*wk(ik)*D12 |
148 | + D11 = gauss*D11 |
149 | + D22 = gauss*D22 |
150 | + D12 = gauss*D12 |
151 | |
152 | dpr(ihist,juo,1) = dpr(ihist,juo,1) + dreal(D11) |
153 | dpr(ihist,juo,2) = dpr(ihist,juo,2) + dreal(D22) |
154 | @@ -265,22 +263,18 @@ |
155 | do iband = 1, nuo*2 |
156 | diff = (ener - eo(iband,ik))**2 / (sigma ** 2) |
157 | if (diff .gt. 15.0d0) cycle |
158 | - gauss = exp(-diff) |
159 | - caux(:,:) = dcmplx(0.0_dp,0.0_dp) |
160 | - do j=1,nuotot |
161 | - caux(1,j) = dcmplx(psi(1,1,j,iband),psi(2,1,j,iband)) ! c_{j,up) |
162 | - caux(2,j) = dcmplx(psi(1,2,j,iband),psi(2,2,j,iband)) ! c_{j,down) |
163 | - enddo |
164 | + gauss = exp(-diff) * wk(ik) |
165 | + caux => psi(:,:,iband) ! c_{up,j), c_{down,j} |
166 | do io = 1, nuotot |
167 | do jo = 1, nuotot |
168 | D11 = caux(1,io) * dconjg(caux(1,jo)) * Spr(io,jo) |
169 | D22 = caux(2,io) * dconjg(caux(2,jo)) * Spr(io,jo) |
170 | D12 = caux(1,io) * dconjg(caux(2,jo)) * Spr(io,jo) |
171 | - D21 = caux(2,io) * dconjg(caux(1,jo)) * Spr(io,jo) |
172 | + !D21 = caux(2,io) * dconjg(caux(1,jo)) * Spr(io,jo) |
173 | |
174 | - D11 = gauss*wk(ik)*D11 |
175 | - D22 = gauss*wk(ik)*D22 |
176 | - D12 = gauss*wk(ik)*D12 |
177 | + D11 = gauss*D11 |
178 | + D22 = gauss*D22 |
179 | + D12 = gauss*D12 |
180 | |
181 | dpr(ihist,jo,1) = dpr(ihist,jo,1) + dreal(D11) |
182 | dpr(ihist,jo,2) = dpr(ihist,jo,2) + dreal(D22) |
183 | |
184 | === modified file 'Src/pdos3g.F' |
185 | --- Src/pdos3g.F 2017-09-27 13:56:46 +0000 |
186 | +++ Src/pdos3g.F 2018-04-18 08:41:10 +0000 |
187 | @@ -71,10 +71,10 @@ |
188 | |
189 | real(dp) |
190 | . H(maxnh,8), S(maxnh), E1, E2, sigma, eo(maxo*2), |
191 | - . psi(2,2,nuotot,2*nuo), dtot(nhist,4), dpr(nhist,nuotot,4) |
192 | - |
193 | - complex(dp) |
194 | - . Haux(2,nuotot,2,nuo), Saux(2,nuotot,2,nuo), caux(2,nuotot) |
195 | + . dtot(nhist,4), dpr(nhist,nuotot,4) |
196 | + complex(dp), target :: psi(2,nuotot,2*nuo) |
197 | + complex(dp) Haux(2,nuotot,2,nuo), Saux(2,nuotot,2,nuo) |
198 | + complex(dp), pointer :: caux(:,:) |
199 | external cdiag |
200 | |
201 | C Internal variables --------------------------------------------------- |
202 | @@ -180,11 +180,7 @@ |
203 | diff = (ener - eo(ibandg))**2 / (sigma ** 2) |
204 | if (diff .gt. 15.0d0) cycle |
205 | gauss = exp(-diff) |
206 | - caux(:,:) = dcmplx(0.0_dp,0.0_dp) |
207 | - do j=1,nuotot |
208 | - caux(1,j) = dcmplx(psi(1,1,j,iband),psi(2,1,j,iband)) ! c_{j,up) |
209 | - caux(2,j) = dcmplx(psi(1,2,j,iband),psi(2,2,j,iband)) ! c_{j,down) |
210 | - enddo |
211 | + caux => psi(:,:,iband) ! c_{up,j}, c_{down,j} |
212 | do jo = 1, Bnuo |
213 | call LocalToGlobalOrb(jo,BNode,Nodes,juo) |
214 | do io = 1, nuotot |
215 | @@ -218,11 +214,7 @@ |
216 | diff = (ener - eo(iband))**2 / (sigma ** 2) |
217 | if (diff .gt. 15.0d0) cycle |
218 | gauss = exp(-diff) |
219 | - caux(:,:) = dcmplx(0.0_dp,0.0_dp) |
220 | - do j=1,nuotot |
221 | - caux(1,j) = dcmplx(psi(1,1,j,iband),psi(2,1,j,iband)) ! c_{j,up) |
222 | - caux(2,j) = dcmplx(psi(1,2,j,iband),psi(2,2,j,iband)) ! c_{j,down) |
223 | - enddo |
224 | + caux => psi(:,:,iband) ! c_{up,j}, c_{down,j} |
225 | do jo = 1, nuotot |
226 | do io = 1, nuotot |
227 | rpipj = dreal(caux(1,io)*dconjg(caux(1,jo)))*gauss |
228 | |
229 | === modified file 'Src/pdos3k.F' |
230 | --- Src/pdos3k.F 2017-09-27 13:56:46 +0000 |
231 | +++ Src/pdos3k.F 2018-04-18 08:41:10 +0000 |
232 | @@ -75,11 +75,11 @@ |
233 | real(dp) |
234 | . H(MAXNH,8), S(MAXNH), E1, E2, SIGMA, |
235 | . XIJ(3,MAXNH), KPOINT(3,NK), EO(MAXO*2,NK), |
236 | - . DTOT(NHIST,4), DPR(NHIST,NUOTOT,4), WK(NK), |
237 | - . psi(2,2,nuotot,2*nuo) |
238 | + . DTOT(NHIST,4), DPR(NHIST,NUOTOT,4), WK(NK) |
239 | + complex(dp), target :: psi(2,nuotot,2*nuo) |
240 | |
241 | - complex(dp) Haux(2,nuotot,2,nuo), Saux(2,nuotot,2,nuo), |
242 | - . caux(2,nuotot) |
243 | + complex(dp) Haux(2,nuotot,2,nuo), Saux(2,nuotot,2,nuo) |
244 | + complex(dp), pointer :: caux(:,:) |
245 | |
246 | complex(dp) kphs |
247 | |
248 | @@ -94,8 +94,7 @@ |
249 | real(dp) |
250 | . KXIJ, CKXIJ, SKXIJ, DELTA, ENER, DIFF, GAUSS, NORM, WKSUM |
251 | |
252 | - complex(dp) |
253 | - . D11, D12, D21, D22 |
254 | + complex(dp) :: D11, D12, D22 |
255 | |
256 | complex(dp), pointer :: Spr(:,:) => null() |
257 | |
258 | @@ -127,7 +126,7 @@ |
259 | kxij = kpoint(1,ik) * xij(1,ind) + |
260 | . kpoint(2,ik) * xij(2,ind) + |
261 | . kpoint(3,ik) * xij(3,ind) |
262 | - kphs = cdexp(dcmplx(0.0_dp, -1.0_dp)*kxij) |
263 | + kphs = cdexp(dcmplx(0.0_dp, -kxij)) |
264 | |
265 | Saux(1,juo,1,iuo) = Saux(1,juo,1,iuo) + S(ind) * kphs |
266 | Saux(2,juo,2,iuo) = Saux(2,juo,2,iuo) + S(ind) * kphs |
267 | @@ -159,7 +158,7 @@ |
268 | kxij = kpoint(1,ik) * xij(1,ind) + |
269 | . kpoint(2,ik) * xij(2,ind) + |
270 | . kpoint(3,ik) * xij(3,ind) |
271 | - kphs = cdexp(dcmplx(0.0_dp, -1.0_dp)*kxij) |
272 | + kphs = cdexp(dcmplx(0.0_dp, -kxij)) |
273 | |
274 | Saux(1,juo,1,iuo) = Saux(1,juo,1,iuo) + S(ind) * kphs |
275 | Saux(2,juo,2,iuo) = Saux(2,juo,2,iuo) + S(ind) * kphs |
276 | @@ -188,7 +187,10 @@ |
277 | kxij = kpoint(1,ik) * xij(1,ind) + |
278 | . kpoint(2,ik) * xij(2,ind) + |
279 | . kpoint(3,ik) * xij(3,ind) |
280 | - kphs = cdexp(dcmplx(0.0_dp, 1.0_dp)*kxij) |
281 | +! Since we are doing element wise multiplications (and not dot-products) |
282 | +! we might as well setup the transpose S(k)^T == S(-k) because this will |
283 | +! mean that we can do a simpler multiplication further down |
284 | + kphs = cdexp(dcmplx(0.0_dp, kxij)) |
285 | Spr(juo,iuo) = Spr(juo,iuo) + S(ind) * kphs |
286 | enddo |
287 | enddo |
288 | @@ -225,23 +227,19 @@ |
289 | ibandg = ibandg * 2 - mod(iband, 2) |
290 | diff = (ener - eo(ibandg,ik))**2 / (sigma ** 2) |
291 | if (diff .gt. 15.0d0) cycle |
292 | - gauss = exp(-diff) |
293 | - caux(:,:) = dcmplx(0.0_dp,0.0_dp) |
294 | - do j=1,nuotot |
295 | - caux(1,j) = dcmplx(psi(1,1,j,iband),psi(2,1,j,iband)) ! c_{j,up) |
296 | - caux(2,j) = dcmplx(psi(1,2,j,iband),psi(2,2,j,iband)) ! c_{j,down) |
297 | - enddo |
298 | + gauss = exp(-diff) * wk(ik) |
299 | + caux => psi(:,:,iband) ! c_{up,j}, c_{down,j} |
300 | do jo = 1, Bnuo |
301 | call LocalToGlobalOrb(jo,BNode,Nodes,juo) |
302 | do io = 1, nuotot |
303 | D11 = caux(1,io) * dconjg(caux(1,juo)) * Sloc(io,jo) |
304 | D22 = caux(2,io) * dconjg(caux(2,juo)) * Sloc(io,jo) |
305 | D12 = caux(1,io) * dconjg(caux(2,juo)) * Sloc(io,jo) |
306 | - D21 = caux(2,io) * dconjg(caux(1,juo)) * Sloc(io,jo) |
307 | + !D21 = caux(2,io) * dconjg(caux(1,juo)) * Sloc(io,jo) |
308 | |
309 | - D11 = gauss*wk(ik)*D11 |
310 | - D22 = gauss*wk(ik)*D22 |
311 | - D12 = gauss*wk(ik)*D12 |
312 | + D11 = gauss*D11 |
313 | + D22 = gauss*D22 |
314 | + D12 = gauss*D12 |
315 | |
316 | dpr(ihist,juo,1) = dpr(ihist,juo,1) + dreal(D11) |
317 | dpr(ihist,juo,2) = dpr(ihist,juo,2) + dreal(D22) |
318 | @@ -265,22 +263,18 @@ |
319 | do iband = 1, nuo*2 |
320 | diff = (ener - eo(iband,ik))**2 / (sigma ** 2) |
321 | if (diff .gt. 15.0d0) cycle |
322 | - gauss = exp(-diff) |
323 | - caux(:,:) = dcmplx(0.0_dp,0.0_dp) |
324 | - do j=1,nuotot |
325 | - caux(1,j) = dcmplx(psi(1,1,j,iband),psi(2,1,j,iband)) ! c_{j,up) |
326 | - caux(2,j) = dcmplx(psi(1,2,j,iband),psi(2,2,j,iband)) ! c_{j,down) |
327 | - enddo |
328 | + gauss = exp(-diff) * wk(ik) |
329 | + caux => psi(:,:,iband) ! c_{up,j}, c_{down,j} |
330 | do io = 1, nuotot |
331 | do jo = 1, nuotot |
332 | D11 = caux(1,io) * dconjg(caux(1,jo)) * Spr(io,jo) |
333 | D22 = caux(2,io) * dconjg(caux(2,jo)) * Spr(io,jo) |
334 | D12 = caux(1,io) * dconjg(caux(2,jo)) * Spr(io,jo) |
335 | - D21 = caux(2,io) * dconjg(caux(1,jo)) * Spr(io,jo) |
336 | + !D21 = caux(2,io) * dconjg(caux(1,jo)) * Spr(io,jo) |
337 | |
338 | - D11 = gauss*wk(ik)*D11 |
339 | - D22 = gauss*wk(ik)*D22 |
340 | - D12 = gauss*wk(ik)*D12 |
341 | + D11 = gauss*D11 |
342 | + D22 = gauss*D22 |
343 | + D12 = gauss*D12 |
344 | |
345 | dpr(ihist,jo,1) = dpr(ihist,jo,1) + dreal(D11) |
346 | dpr(ihist,jo,2) = dpr(ihist,jo,2) + dreal(D22) |
347 | |
348 | === modified file 'Src/pdosg.F' |
349 | --- Src/pdosg.F 2017-07-14 12:17:28 +0000 |
350 | +++ Src/pdosg.F 2018-04-18 08:41:10 +0000 |
351 | @@ -190,7 +190,7 @@ |
352 | if (diff .gt. 15.0d0) then |
353 | cycle |
354 | else |
355 | - gauss = ( exp(-diff) ) |
356 | + gauss = exp(-diff) |
357 | if (Node.eq.BNode) then |
358 | C Only add once to dtot - not everytime loop over processors is executed |
359 | dtot(ihist,ispin) = dtot(ihist,ispin) + gauss |
360 | @@ -224,10 +224,9 @@ |
361 | if (diff .gt. 15.0d0) then |
362 | cycle |
363 | else |
364 | - gauss = ( exp(-diff) ) |
365 | + gauss = exp(-diff) |
366 | dtot(ihist,ispin) = dtot(ihist,ispin) + gauss |
367 | do iuo = 1, nuotot |
368 | -C Solo para los Juo que satisfagan el criterio del record... |
369 | do juo = 1, nuotot |
370 | pipj1 = psi(iuo,iband) * psi(juo,iband) |
371 | dpr(ihist,juo,ispin) = dpr(ihist,juo,ispin) + |
372 | |
373 | === modified file 'Src/pdosk.F' |
374 | --- Src/pdosk.F 2016-06-04 20:06:11 +0000 |
375 | +++ Src/pdosk.F 2018-04-18 08:41:10 +0000 |
376 | @@ -87,7 +87,7 @@ |
377 | |
378 | real(dp) |
379 | . kxij, Ckxij, Skxij, delta, ener, diff, pipj1, pipj2, |
380 | - . pipjS1, pipjS2, gauss, norm, wksum |
381 | + . pipjS1, gauss, norm, wksum |
382 | |
383 | #ifdef MPI |
384 | integer :: |
385 | @@ -197,10 +197,11 @@ |
386 | . kpoint(3,IK) * xij(3,ind) |
387 | ckxij = cos(kxij) |
388 | skxij = sin(kxij) |
389 | -C Calculates the hamiltonian and the overlap in k space |
390 | -C H(k) = Sum(R) exp(i*k*R) * H(R) |
391 | +! Since we are doing element wise multiplications (and not dot-products) |
392 | +! we might as well setup the transpose S(k)^T == S(-k) because this will |
393 | +! mean that we can do a simpler multiplication further down |
394 | Saux(1,juo,iuo) = Saux(1,juo,iuo) + S(ind) * ckxij |
395 | - Saux(2,juo,iuo) = Saux(2,juo,iuo) - S(ind) * skxij |
396 | + Saux(2,juo,iuo) = Saux(2,juo,iuo) + S(ind) * skxij |
397 | enddo |
398 | enddo |
399 | |
400 | @@ -236,23 +237,22 @@ |
401 | if (diff .gt. 15.0D0) then |
402 | cycle |
403 | else |
404 | - gauss = ( EXP(-diff) ) |
405 | + gauss = exp(-diff) * wk(ik) |
406 | if (Node.eq.BNode) then |
407 | C Only add once to dtot - not everytime loop over processors is executed |
408 | - dtot(ihist,ispin) = dtot(ihist,ispin) + gauss*WK(IK) |
409 | + dtot(ihist,ispin) = dtot(ihist,ispin) + gauss |
410 | endif |
411 | do jo = 1, Bnuo |
412 | call LocalToGlobalOrb(jo,BNode,Nodes,juo) |
413 | do iuo = 1, nuotot |
414 | -C Solo para los Juo que satisfagan el criterio del record... |
415 | +! This is: psi(iuo) * psi(juo)^* |
416 | pipj1 = psi(1,iuo,iband) * psi(1,juo,iband) + |
417 | . psi(2,iuo,iband) * psi(2,juo,iband) |
418 | - pipj2 = psi(1,iuo,iband) * psi(2,juo,iband) - |
419 | + pipj2 = - psi(1,iuo,iband) * psi(2,juo,iband) + |
420 | . psi(2,iuo,iband) * psi(1,juo,iband) |
421 | pipjS1= pipj1*Sloc(1,iuo,JO)-pipj2*Sloc(2,iuo,JO) |
422 | - pipjS2= pipj1*Sloc(2,iuo,JO)+pipj2*Sloc(1,iuo,JO) |
423 | - dpr(ihist,juo,ispin)= dpr(ihist,juo,ispin) + |
424 | - . pipjS1*gauss*WK(IK) |
425 | + dpr(ihist,juo,ispin)= dpr(ihist,juo,ispin) + |
426 | + . pipjS1*gauss |
427 | enddo |
428 | enddo |
429 | endif |
430 | @@ -275,19 +275,17 @@ |
431 | if (diff .gt. 15.0d0) then |
432 | cycle |
433 | else |
434 | - gauss = ( EXP(-diff) ) |
435 | - dtot(ihist,ispin) = dtot(ihist,ispin) + gauss*WK(IK) |
436 | + gauss = exp(-diff) * wk(ik) |
437 | + dtot(ihist,ispin) = dtot(ihist,ispin) + gauss |
438 | do iuo = 1, nuotot |
439 | -C Solo para los Juo que satisfagan el criterio del record... |
440 | do juo = 1, nuotot |
441 | pipj1 = psi(1,iuo,iband) * psi(1,juo,iband) + |
442 | . psi(2,iuo,iband) * psi(2,juo,iband) |
443 | - pipj2 = psi(1,iuo,iband) * psi(2,juo,iband) - |
444 | + pipj2 = - psi(1,iuo,iband) * psi(2,juo,iband) + |
445 | . psi(2,iuo,iband) * psi(1,juo,iband) |
446 | pipjS1= pipj1*Saux(1,iuo,juo)-pipj2*Saux(2,iuo,juo) |
447 | - pipjS2= pipj1*Saux(2,iuo,juo)+pipj2*Saux(1,iuo,juo) |
448 | dpr(ihist,juo,ispin)= dpr(ihist,juo,ispin) + |
449 | - . pipjS1*gauss*WK(IK) |
450 | + . pipjS1*gauss |
451 | enddo |
452 | enddo |
453 | endif |
454 | |
455 | === modified file 'Src/pdoskp.F' |
456 | --- Src/pdoskp.F 2016-04-05 09:54:38 +0000 |
457 | +++ Src/pdoskp.F 2018-04-18 08:41:10 +0000 |
458 | @@ -94,9 +94,9 @@ |
459 | |
460 | real(dp) |
461 | . kxij, Ckxij, Skxij, delta, ener, diff, pipj1, pipj2, |
462 | - . pipjS1, pipjS2, gauss, norm, wksum |
463 | + . pipjS1, gauss, norm, wksum |
464 | |
465 | - real(dp), dimension(:), pointer :: Snew, Dloc |
466 | + real(dp), dimension(:), pointer :: Snew |
467 | real(dp), dimension(:,:), pointer :: Hnew, xijloc |
468 | |
469 | #ifdef MPI |
470 | @@ -118,10 +118,6 @@ |
471 | call re_alloc( listhptrg, 1, nuotot, name='listhptrg', |
472 | & routine='pdoskp' ) |
473 | |
474 | -C Find maximum value in numh and create local storage |
475 | - nullify( Dloc ) |
476 | - call re_alloc( Dloc, 1, nuotot, name='Dloc', routine='pdoskp' ) |
477 | - |
478 | C Globalise numh |
479 | do io = 1,nuotot |
480 | call WhichNodeOrb(io,Nodes,BNode) |
481 | @@ -290,16 +286,17 @@ |
482 | jo = listhg(ind) |
483 | iuo = indxuo(io) |
484 | juo = indxuo(jo) |
485 | -C Calculates the phases k*r_ij |
486 | +C Calculate the phases k*r_ij |
487 | kxij = kpoint(1,ik) * xijloc(1,ind) + |
488 | . kpoint(2,ik) * xijloc(2,ind) + |
489 | . kpoint(3,ik) * xijloc(3,ind) |
490 | ckxij = cos(kxij) |
491 | skxij = sin(kxij) |
492 | -C Calculates the hamiltonian and the overlap in k space |
493 | -C H(k) = Sum(R) exp(i*k*R) * H(R) |
494 | +! Since we are doing element wise multiplications (and not dot-products) |
495 | +! we might as well setup the transpose S(k)^T == S(-k) because this will |
496 | +! mean that we can do a simpler multiplication further down |
497 | Saux(1,juo,iuo) = Saux(1,juo,iuo) + Snew(ind) * ckxij |
498 | - Saux(2,juo,iuo) = Saux(2,juo,iuo) - Snew(ind) * skxij |
499 | + Saux(2,juo,iuo) = Saux(2,juo,iuo) + Snew(ind) * skxij |
500 | enddo |
501 | enddo |
502 | |
503 | @@ -311,19 +308,17 @@ |
504 | if (diff .gt. 15.0d0) then |
505 | cycle |
506 | else |
507 | - gauss = ( exp(-diff) ) |
508 | - dtot(ihist,is) = dtot(ihist,is) + gauss*wk(ik) |
509 | - do iuo = 1, nuotot |
510 | -C Solo para los Juo que satisfagan el criterio del record... |
511 | - do juo = 1, nuotot |
512 | + gauss = exp(-diff) * wk(ik) |
513 | + dtot(ihist,is) = dtot(ihist,is) + gauss |
514 | + do juo = 1, nuotot |
515 | + do iuo = 1, nuotot |
516 | +! This is: psi(iuo) * psi(juo)^* |
517 | pipj1 = psi(1,iuo,iband) * psi(1,juo,iband) + |
518 | . psi(2,iuo,iband) * psi(2,juo,iband) |
519 | - pipj2 = psi(1,iuo,iband) * psi(2,juo,iband) - |
520 | + pipj2 = - psi(1,iuo,iband) * psi(2,juo,iband) + |
521 | . psi(2,iuo,iband) * psi(1,juo,iband) |
522 | - pipjS1= pipj1*Saux(1,iuo,juo)-pipj2*Saux(2,iuo,juo) |
523 | - pipjS2= pipj1*Saux(2,iuo,juo)+pipj2*Saux(1,iuo,juo) |
524 | - dpr(ihist,juo,is) = dpr(ihist,juo,is) + |
525 | - . pipjS1*gauss*wk(ik) |
526 | + pipjS1 = pipj1*Saux(1,iuo,juo)-pipj2*Saux(2,iuo,juo) |
527 | + dpr(ihist,juo,is) = dpr(ihist,juo,is) + pipjS1*gauss |
528 | enddo |
529 | enddo |
530 | endif |
531 | @@ -339,7 +334,6 @@ |
532 | call de_alloc( xijloc, name='xijloc' ) |
533 | call de_alloc( Hnew, name='Hnew' ) |
534 | call de_alloc( Snew, name='Snew' ) |
535 | - call de_alloc( Dloc, name='Dloc' ) |
536 | call de_alloc( listhg, name='listhg' ) |
537 | call de_alloc( listhptrg, name='listhptrg' ) |
538 | call de_alloc( numhg, name='numhg' ) |
539 | |
540 | === modified file 'version.info' |
541 | --- version.info 2018-04-17 13:06:00 +0000 |
542 | +++ version.info 2018-04-18 08:41:10 +0000 |
543 | @@ -1,1 +1,5 @@ |
544 | +<<<<<<< TREE |
545 | siesta-4.1--893 |
546 | +======= |
547 | +siesta-4.1--892--phase-3 |
548 | +>>>>>>> MERGE-SOURCE |
Good. I will merge.