From 0e98d4266ced516f95d60bd4e7523e4c9bdd7626 Mon Sep 17 00:00:00 2001 From: Marco Ratto Date: Wed, 14 Apr 2010 17:19:21 +0200 Subject: [PATCH] 1) Several bug fixes for making the remote threads work properly under the Always/Open mode; 2) small cosmethics; --- matlab/masterParallelMan.m | 183 +++++++++++++++++++++---------------- 1 file changed, 104 insertions(+), 79 deletions(-) diff --git a/matlab/masterParallelMan.m b/matlab/masterParallelMan.m index c543a8925..fca03849f 100644 --- a/matlab/masterParallelMan.m +++ b/matlab/masterParallelMan.m @@ -47,36 +47,40 @@ function [fOutVar,nBlockPerCPU, totCPU] = masterParallelMan(Parallel,fBlock,nBlo % % You should have received a copy of the GNU General Public License % along with Dynare. If not, see . + +% Delete the traces (if exists) of last section computations. persistent initialize if isempty(initialize), - mydelete(['P_slave_*End.txt']); - mydelete(['slaveParallel_input*.mat']); - initialize = 0; - pause(1), + mydelete(['P_slave_*End.txt']); + mydelete(['slaveParallel_input*.mat']); + initialize = 0; + pause(1), end totCPU=0; % Determine my hostname and my working directory + DyMo=pwd; fInputVar.DyMo=DyMo; if isunix || (~matlab_ver_less_than('7.4') && ismac) , -% [tempo, MasterName]=system(['ifconfig | grep ''inet addr:''| grep -v ''127.0.0.1'' | cut -d: -f2 | awk ''{ print $1}''']); + % [tempo, MasterName]=system(['ifconfig | grep ''inet addr:''| grep -v ''127.0.0.1'' | cut -d: -f2 | awk ''{ print $1}''']); [tempo, MasterName]=system('hostname --fqdn'); -else +else [tempo, MasterName]=system('hostname'); end + MasterName=deblank(MasterName); fInputVar.MasterName = MasterName; % Save input data for use by the slaves if exist('fGlobalVar'), - save(['temp_input.mat'],'fInputVar','fGlobalVar') + save(['temp_input.mat'],'fInputVar','fGlobalVar') else - save(['temp_input.mat'],'fInputVar') + save(['temp_input.mat'],'fInputVar') end -save(['temp_input.mat'],'Parallel','-append') +save(['temp_input.mat'],'Parallel','-append') % Determine the total number of available CPUs, and the number of threads to run on each CPU for j=1:length(Parallel), @@ -131,54 +135,54 @@ for j=1:totCPU, end pause(1); % wait for possibly local alive CPU to start the new job or close by internal criteria newInstance = 0; + if isempty( dir(['P_slave_',int2str(j),'End.txt'])); % check if j CPU is already alive fid1=fopen(['P_slave_',int2str(j),'End.txt'],'w+'); fclose(fid1); newInstance = 1; - storeGlobalVars( ['slaveParallel_input',int2str(j)]); - save( ['slaveParallel_input',int2str(j)],'Parallel','-append'); + storeGlobalVars( ['slaveParallel_input',int2str(j)]); + save( ['slaveParallel_input',int2str(j)],'Parallel','-append'); % prepare global vars for Slave end if Parallel(indPC).Local == 1 & newInstance, % run on the local machine if isunix || (~matlab_ver_less_than('7.4') && ismac), if exist('OCTAVE_VERSION') -% command1=['octave --eval fParallel\(',int2str(offset+1),',',int2str(sum(nBlockPerCPU(1:j))),',',int2str(j),',',int2str(indPC),',\''',fname,'\''\) &']; + % command1=['octave --eval fParallel\(',int2str(offset+1),',',int2str(sum(nBlockPerCPU(1:j))),',',int2str(j),',',int2str(indPC),',\''',fname,'\''\) &']; command1=['octave --eval slaveParallel\(',int2str(j),',',int2str(indPC),'\) &']; else -% command1=['matlab -nosplash -nodesktop -minimize -r fParallel\(',int2str(offset+1),',',int2str(sum(nBlockPerCPU(1:j))),',',int2str(j),',',int2str(indPC),',\''',fname,'\''\) &']; + % command1=['matlab -nosplash -nodesktop -minimize -r fParallel\(',int2str(offset+1),',',int2str(sum(nBlockPerCPU(1:j))),',',int2str(j),',',int2str(indPC),',\''',fname,'\''\) &']; command1=['matlab -nosplash -nodesktop -minimize -r slaveParallel\(',int2str(j),',',int2str(indPC),'\) &']; end else if exist('OCTAVE_VERSION') -% command1=['start /B psexec -W ',DyMo, ' -a ',int2str(Parallel(indPC).NumCPU(j-nCPU0)),' -low octave --eval fParallel(',int2str(offset+1),',',int2str(sum(nBlockPerCPU(1:j))),',',int2str(j),',',int2str(indPC),',''',fname,''')']; + % command1=['start /B psexec -W ',DyMo, ' -a ',int2str(Parallel(indPC).NumCPU(j-nCPU0)),' -low octave --eval fParallel(',int2str(offset+1),',',int2str(sum(nBlockPerCPU(1:j))),',',int2str(j),',',int2str(indPC),',''',fname,''')']; command1=['start /B psexec -W ',DyMo, ' -a ',int2str(Parallel(indPC).NumCPU(j-nCPU0)),' -low octave --eval slaveParallel(',int2str(j),',',int2str(indPC),')']; else -% command1=['start /B psexec -W ',DyMo, ' -a ',int2str(Parallel(indPC).NumCPU(j-nCPU0)),' -low matlab -nosplash -nodesktop -minimize -r fParallel(',int2str(offset+1),',',int2str(sum(nBlockPerCPU(1:j))),',',int2str(j),',',int2str(indPC),',''',fname,''')']; + % command1=['start /B psexec -W ',DyMo, ' -a ',int2str(Parallel(indPC).NumCPU(j-nCPU0)),' -low matlab -nosplash -nodesktop -minimize -r fParallel(',int2str(offset+1),',',int2str(sum(nBlockPerCPU(1:j))),',',int2str(j),',',int2str(indPC),',''',fname,''')']; command1=['start /B psexec -W ',DyMo, ' -a ',int2str(Parallel(indPC).NumCPU(j-nCPU0)),' -low matlab -nosplash -nodesktop -minimize -r slaveParallel(',int2str(j),',',int2str(indPC),')']; end end elseif Parallel(indPC).Local==0, if isunix || (~matlab_ver_less_than('7.4') && ismac), -% [tempo, RemoteName]=system(['ssh ',Parallel(indPC).user,'@',Parallel(indPC).PcName,' "ifconfig | grep \''inet addr:\''| grep -v \''127.0.0.1\'' | cut -d: -f2 | awk \''{ print $1}\''"']); + % [tempo, RemoteName]=system(['ssh ',Parallel(indPC).user,'@',Parallel(indPC).PcName,' "ifconfig | grep \''inet addr:\''| grep -v \''127.0.0.1\'' | cut -d: -f2 | awk \''{ print $1}\''"']); [tempo, RemoteName]=system(['ssh ',Parallel(indPC).user,'@',Parallel(indPC).PcName,' "hostname --fqdn"']); RemoteName=RemoteName(1:end-1); RemoteFolder = Parallel(indPC).RemoteFolder; - else + else RemoteName = Parallel(indPC).PcName; RemoteFolder = [Parallel(indPC).RemoteDrive,':\',Parallel(indPC).RemoteFolder]; end remoteFlag=1; - + if strcmpi(RemoteName,MasterName), if ~copyfile(['P_',fname,'_',int2str(j),'End.txt'],RemoteFolder), remoteFlag=0; end end if remoteFlag, - if j==nCPU0+1, - if newInstance, % clean remote folder + if (j==nCPU0+1) & newInstance, % clean remote folder if isunix || (~matlab_ver_less_than('7.4') && ismac), system(['ssh ',Parallel(indPC).user,'@',Parallel(indPC).PcName,' rm -fr ',Parallel(indPC).RemoteFolder,'/*']); else @@ -191,60 +195,62 @@ for j=1:totCPU, end end end + end + + if isunix || (~matlab_ver_less_than('7.4') && ismac), + % system(['scp ',fname,'_input.mat ',Parallel(indPC).user,'@',Parallel(indPC).PcName,':',Parallel(indPC).RemoteFolder]); + for jfil=1:size(NamFileInput,1) + if ~isempty(NamFileInput{jfil,1}) + system(['ssh ',Parallel(indPC).user,'@',Parallel(indPC).PcName,' mkdir -p ',Parallel(indPC).RemoteFolder,'/',NamFileInput{jfil,1}]) + end + system(['scp ',NamFileInput{jfil,1},NamFileInput{jfil,2},' ',Parallel(indPC).user,'@',Parallel(indPC).PcName,':',Parallel(indPC).RemoteFolder,'/',NamFileInput{jfil,1}]); end - - if isunix || (~matlab_ver_less_than('7.4') && ismac), - system(['scp ',fname,'_input.mat ',Parallel(indPC).user,'@',Parallel(indPC).PcName,':',Parallel(indPC).RemoteFolder]); - for jfil=1:size(NamFileInput,1) - if ~isempty(NamFileInput{jfil,1}) - system(['ssh ',Parallel(indPC).user,'@',Parallel(indPC).PcName,' mkdir -p ',Parallel(indPC).RemoteFolder,'/',NamFileInput{jfil,1}]) - end - system(['scp ',NamFileInput{jfil,1},NamFileInput{jfil,2},' ',Parallel(indPC).user,'@',Parallel(indPC).PcName,':',Parallel(indPC).RemoteFolder,'/',NamFileInput{jfil,1}]); - end - system(['scp slaveJob',int2str(j),'.mat ',Parallel(indPC).user,'@',Parallel(indPC).PcName,':',Parallel(indPC).RemoteFolder]); - if newInstance, - system(['scp slaveParallel_input',int2str(j),'.mat ',Parallel(indPC).user,'@',Parallel(indPC).PcName,':',Parallel(indPC).RemoteFolder]); - end - else - copyfile([fname,'_input.mat'], ['\\',Parallel(indPC).PcName,'\',Parallel(indPC).RemoteDrive,'$\',Parallel(indPC).RemoteFolder]); - for jfil=1:size(NamFileInput,1) - copyfile([NamFileInput{jfil,1},NamFileInput{jfil,2}],['\\',Parallel(indPC).PcName,'\',Parallel(indPC).RemoteDrive,'$\',Parallel(indPC).RemoteFolder,'\',NamFileInput{jfil,1}]) - end - copyfile(['slaveJob',int2str(j),'.mat'], ['\\',Parallel(indPC).PcName,'\',Parallel(indPC).RemoteDrive,'$\',Parallel(indPC).RemoteFolder]); - if newInstance, - copyfile(['slaveParallel_input',int2str(j),'.mat'], ['\\',Parallel(indPC).PcName,'\',Parallel(indPC).RemoteDrive,'$\',Parallel(indPC).RemoteFolder]); + system(['scp slaveJob',int2str(j),'.mat ',Parallel(indPC).user,'@',Parallel(indPC).PcName,':',Parallel(indPC).RemoteFolder]); + if newInstance, + system(['scp slaveParallel_input',int2str(j),'.mat ',Parallel(indPC).user,'@',Parallel(indPC).PcName,':',Parallel(indPC).RemoteFolder]); + end + else + % copyfile([fname,'_input.mat'], ['\\',Parallel(indPC).PcName,'\',Parallel(indPC).RemoteDrive,'$\',Parallel(indPC).RemoteFolder]) + for jfil=1:size(NamFileInput,1) + if ~isempty(NamFileInput{jfil,1}) + mkdir(['\\',Parallel(indPC).PcName,'\',Parallel(indPC).RemoteDrive,'$\',Parallel(indPC).RemoteFolder,'\',NamFileInput{jfil,1}]); end + copyfile([NamFileInput{jfil,1},NamFileInput{jfil,2}],['\\',Parallel(indPC).PcName,'\',Parallel(indPC).RemoteDrive,'$\',Parallel(indPC).RemoteFolder,'\',NamFileInput{jfil,1}]) + end + copyfile(['slaveJob',int2str(j),'.mat'], ['\\',Parallel(indPC).PcName,'\',Parallel(indPC).RemoteDrive,'$\',Parallel(indPC).RemoteFolder]); + if newInstance, + copyfile(['slaveParallel_input',int2str(j),'.mat'], ['\\',Parallel(indPC).PcName,'\',Parallel(indPC).RemoteDrive,'$\',Parallel(indPC).RemoteFolder]); end end end if newInstance, - if isunix || (~matlab_ver_less_than('7.4') && ismac), - if exist('OCTAVE_VERSION'), - command1=['ssh ',Parallel(indPC).user,'@',Parallel(indPC).PcName,' "cd ',Parallel(indPC).RemoteFolder, '; octave --eval fParallel\(',int2str(offset+1),',',int2str(sum(nBlockPerCPU(1:j))),',',int2str(j),',',int2str(indPC),',\''',fname,'\''\);" &']; + if isunix || (~matlab_ver_less_than('7.4') && ismac), + if exist('OCTAVE_VERSION'), + command1=['ssh ',Parallel(indPC).user,'@',Parallel(indPC).PcName,' "cd ',Parallel(indPC).RemoteFolder, '; octave --eval slaveParallel\(',int2str(j),',',int2str(indPC),'\);" &']; + else + command1=['ssh ',Parallel(indPC).user,'@',Parallel(indPC).PcName,' "cd ',Parallel(indPC).RemoteFolder, '; matlab -nosplash -nodesktop -minimize -r slaveParallel\(',int2str(j),',',int2str(indPC),'\);" &']; + end else - command1=['ssh ',Parallel(indPC).user,'@',Parallel(indPC).PcName,' "cd ',Parallel(indPC).RemoteFolder, '; matlab -nosplash -nodesktop -minimize -r fParallel\(',int2str(offset+1),',',int2str(sum(nBlockPerCPU(1:j))),',',int2str(j),',',int2str(indPC),',\''',fname,'\''\);" &']; - end - else - if ~strcmp(Parallel(indPC).PcName,MasterName), % run on a remote machine - if exist('OCTAVE_VERSION'), - command1=['start /B psexec \\',Parallel(indPC).PcName,' -e -u ',Parallel(indPC).user,' -p ',Parallel(indPC).passwd,' -W ',Parallel(indPC).RemoteDrive,':\',Parallel(indPC).RemoteFolder,'\ -a ',int2str(Parallel(indPC).NumCPU(j-nCPU0)), ... - ' -low octave --eval fParallel(',int2str(offset+1),',',int2str(sum(nBlockPerCPU(1:j))),',',int2str(j),',',int2str(indPC),',''',fname,''')']; - else - command1=['start /B psexec \\',Parallel(indPC).PcName,' -e -u ',Parallel(indPC).user,' -p ',Parallel(indPC).passwd,' -W ',Parallel(indPC).RemoteDrive,':\',Parallel(indPC).RemoteFolder,'\ -a ',int2str(Parallel(indPC).NumCPU(j-nCPU0)), ... - ' -low matlab -nosplash -nodesktop -minimize -r fParallel(',int2str(offset+1),',',int2str(sum(nBlockPerCPU(1:j))),',',int2str(j),',',int2str(indPC),',''',fname,''')']; - end - else % run on the local machine via the network - if exist('OCTAVE_VERSION'), - command1=['start /B psexec \\',Parallel(indPC).PcName,' -e -W ',Parallel(indPC).RemoteDrive,':\',Parallel(indPC).RemoteFolder,'\ -a ',int2str(Parallel(indPC).NumCPU(j-nCPU0)), ... - ' -low octave --eval fParallel(',int2str(offset+1),',',int2str(sum(nBlockPerCPU(1:j))),',',int2str(j),',',int2str(indPC),',''',fname,''')']; - else - command1=['start /B psexec \\',Parallel(indPC).PcName,' -e -W ',Parallel(indPC).RemoteDrive,':\',Parallel(indPC).RemoteFolder,'\ -a ',int2str(Parallel(indPC).NumCPU(j-nCPU0)), ... - ' -low matlab -nosplash -nodesktop -minimize -r fParallel(',int2str(offset+1),',',int2str(sum(nBlockPerCPU(1:j))),',',int2str(j),',',int2str(indPC),',''',fname,''')']; + if ~strcmp(Parallel(indPC).PcName,MasterName), % run on a remote machine + if exist('OCTAVE_VERSION'), + command1=['start /B psexec \\',Parallel(indPC).PcName,' -e -u ',Parallel(indPC).user,' -p ',Parallel(indPC).passwd,' -W ',Parallel(indPC).RemoteDrive,':\',Parallel(indPC).RemoteFolder,'\ -a ',int2str(Parallel(indPC).NumCPU(j-nCPU0)), ... + ' -low octave --eval slaveParallel(',int2str(j),',',int2str(indPC),')']; + else + command1=['start /B psexec \\',Parallel(indPC).PcName,' -e -u ',Parallel(indPC).user,' -p ',Parallel(indPC).passwd,' -W ',Parallel(indPC).RemoteDrive,':\',Parallel(indPC).RemoteFolder,'\ -a ',int2str(Parallel(indPC).NumCPU(j-nCPU0)), ... + ' -low matlab -nosplash -nodesktop -minimize -r slaveParallel(',int2str(j),',',int2str(indPC),')']; + end + else % run on the local machine via the network + if exist('OCTAVE_VERSION'), + command1=['start /B psexec \\',Parallel(indPC).PcName,' -e -W ',Parallel(indPC).RemoteDrive,':\',Parallel(indPC).RemoteFolder,'\ -a ',int2str(Parallel(indPC).NumCPU(j-nCPU0)), ... + ' -low octave --eval slaveParallel(',int2str(j),',',int2str(indPC),')']; + else + command1=['start /B psexec \\',Parallel(indPC).PcName,' -e -W ',Parallel(indPC).RemoteDrive,':\',Parallel(indPC).RemoteFolder,'\ -a ',int2str(Parallel(indPC).NumCPU(j-nCPU0)), ... + ' -low matlab -nosplash -nodesktop -minimize -r slaveParallel(',int2str(j),',',int2str(indPC),')']; + end end end end - end end fprintf(fid,'%s\n',command1); end @@ -268,25 +274,28 @@ if exist('OCTAVE_VERSION'), printf('\n'); else hfigstatus = figure('name',['Parallel ',fname],... - 'MenuBar', 'none', ... - 'NumberTitle','off'); + 'MenuBar', 'none', ... + 'NumberTitle','off'); vspace = 0.1; ncol = ceil(totCPU/10); hspace = 0.9/ncol; for j=1:totCPU, jrow = mod(j-1,10)+1; - jcol = ceil(j/10); + jcol = ceil(j/10); hstatus(j) = axes('position',[0.05/ncol+(jcol-1)/ncol 0.92-vspace*(jrow-1) 0.9/ncol 0.03], ... - 'box','on','xtick',[],'ytick',[],'xlim',[0 1],'ylim',[0 1]); + 'box','on','xtick',[],'ytick',[],'xlim',[0 1],'ylim',[0 1]); end cumBlockPerCPU = cumsum(nBlockPerCPU); end pcerdone = NaN(1,totCPU); while (1) + waitbarString = ''; statusString = ''; pause(1) - stax = dir(['comp_status_',fname,'*.mat']); + + stax = dir(['comp_status_',fname,'*.mat']); + for j=1:length(stax), try @@ -295,8 +304,8 @@ while (1) if exist('OCTAVE_VERSION'), statusString = [statusString, int2str(j), ' %3.f%% done! ']; else - status_String{j} = waitbarString; - status_Title{j} = waitbarTitle; + status_String{j} = waitbarString; + status_Title{j} = waitbarTitle; idCPU(j) = njob; end if prtfrc==1, delete(stax(j).name), end @@ -307,19 +316,35 @@ while (1) if exist('OCTAVE_VERSION'), printf([statusString,'\r'], 100 .* pcerdone); else + figure(hfigstatus), - for j=1:length(stax), - axes(hstatus(idCPU(j))), - hpat = findobj(hstatus(idCPU(j)),'Type','patch'); - if ~isempty(hpat), - set(hpat,'XData',[0 0 pcerdone(j) pcerdone(j)]) - else - patch([0 0 pcerdone(j) pcerdone(j)],[0 1 1 0],'r','EdgeColor','r') + try + for j=1:length(stax) + + axes(hstatus(idCPU(j))), + hpat = findobj(hstatus(idCPU(j)),'Type','patch'); + + + if ~isempty(hpat), + set(hpat,'XData',[0 0 pcerdone(j) pcerdone(j)]) + else + patch([0 0 pcerdone(j) pcerdone(j)],[0 1 1 0],'r','EdgeColor','r') + end + title([status_Title{j},' - ',status_String{j}]); + end - title([status_Title{j},' - ',status_String{j}]); + catch + + E='ERRORE in while cycle masterParallelMann!' + j + if j>1 + j=j-1 + end + j end + end - if isempty(dir(['P_',fname,'_*End.txt'])) + if isempty(dir(['P_',fname,'_*End.txt'])) mydelete(['comp_status_',fname,'*.mat']) if ~exist('OCTAVE_VERSION'), close(hfigstatus),