Merge remote-tracking branch 'ratto/master'

time-shift
Sébastien Villemot 2012-02-13 11:15:24 +01:00
commit f09c10e2d3
11 changed files with 128 additions and 36 deletions

View File

@ -59,6 +59,7 @@ while i<n
if gg(i)*(hh(i)*gg(i))/2 > htol
[f0 x fc retcode] = csminit(func0,x,f0,gg,0,diag(hh),DynareDataset,DynareOptions,Model,EstimatedParameters,BayesInfo,DynareResults);
ig(i)=1;
fprintf(['Done for param %s = %8.4f\n'],BayesInfo.name{i},x(i))
end
xh1=x;
end
@ -67,4 +68,3 @@ end
save gstep.mat x h1 f0

View File

@ -559,6 +559,7 @@ for Node=1:length(DataInput) % To obtain a recoursive function remove the 'for'
si0=[];
de0=[];
disp('Checking Hardware please wait ...');
if (DataInput(Node).Local == 1)
if Environment,
[si0 de0]=system('grep processor /proc/cpuinfo');
@ -579,7 +580,7 @@ for Node=1:length(DataInput) % To obtain a recoursive function remove the 'for'
RealCPUnbr='';
keyboard;
% keyboard;
RealCPUnbr=GiveCPUnumber(de0,OStargetUnix);
% Questo controllo penso che si possa MIGLIORARE!!!!!

View File

@ -1,4 +1,4 @@
function closeSlave(Parallel,TmpFolder),
function closeSlave(Parallel,TmpFolder,partial),
% PARALLEL CONTEXT
% In parallel context, this utility closes all remote matlab instances
% called by masterParallel when strategy (1) is active i.e. always open (which leaves
@ -32,6 +32,32 @@ function closeSlave(Parallel,TmpFolder),
% You should have received a copy of the GNU General Public License
% along with Dynare. If not, see <http://www.gnu.org/licenses/>.
if nargin<3,
partial=0;
end
s=warning('off');
if partial==1
save('slaveParallel_break','partial')
for indPC=1:length(Parallel),
if (Parallel(indPC).Local==0),
dynareParallelSendFiles('slaveParallel_break.mat',TmpFolder,Parallel(indPC));
end
end
% delete('slaveParallel_break')
return
end
if partial==-1
delete('slaveParallel_break.mat')
for indPC=1:length(Parallel),
if (Parallel(indPC).Local==0),
dynareParallelDelete( 'slaveParallel_break.mat',TmpFolder,Parallel(indPC));
end
end
% delete('slaveParallel_break')
return
end
for indPC=1:length(Parallel),
if (Parallel(indPC).Local==0),
@ -58,3 +84,5 @@ while(1)
end
end
s=warning('on');

View File

@ -40,7 +40,7 @@ else
end
for indPC=1:length(Parallel),
if isunix
if ~ispc || strcmpi('unix',Parallel(indPC).OperatingSystem),
[NonServeS NonServeD]=system(['ssh ',Parallel(indPC).UserName,'@',Parallel(indPC).ComputerName,' rm -f ',Parallel(indPC).RemoteDirectory,'/',pname,fname]);
else
delete(['\\',Parallel(indPC).ComputerName,'\',Parallel(indPC).RemoteDrive,'$\',Parallel(indPC).RemoteDirectory,'\',pname,fname]);

View File

@ -57,9 +57,9 @@ for indPC=1:length(Parallel),
fileaddress={sT(1:SlashNumberAndPosition(end)),sT(SlashNumberAndPosition(end)+1:end)};
dynareParallelDelete(fileaddress{2},[PRCDir,fS,fileaddress{1}],Parallel(indPC));
display('New file deleted in remote -->');
display(fileaddress{2});
display('<--');
disp('New file deleted in remote -->');
disp(fileaddress{2});
disp('<--');
end
else

View File

@ -58,9 +58,9 @@ for indPC=1:length(Parallel),
fileaddress={sT(1:SlashNumberAndPosition(end)),sT(SlashNumberAndPosition(end)+1:end)};
dynareParallelGetFiles(fileaddress,PRCDir,Parallel(indPC));
display('New file copied in local -->');
display(fileaddress{2});
display('<--');
disp('New file copied in local -->');
disp(fileaddress{2});
disp('<--');
end
else

View File

@ -35,7 +35,25 @@ if nargin ==0,
return
end
% security check of remote folder delete
ok(1)=isempty(strfind(Parallel_info.RemoteTmpFolder,'..'));
tmp1=strfind(Parallel_info.RemoteTmpFolder,'2');
ok(2)=tmp1(1)==1;
ok(3)=~isempty(strfind(Parallel_info.RemoteTmpFolder,'-'));
ok(4)=~isempty(strfind(Parallel_info.RemoteTmpFolder,'h'));
ok(5)=~isempty(strfind(Parallel_info.RemoteTmpFolder,'m'));
ok(6)=~isempty(strfind(Parallel_info.RemoteTmpFolder,'s'));
ok(7)=~isempty(PRCDir);
if sum(ok)<7,
error('The name of the remote tmp folder does not comply the security standards!'),
end
for indPC=1:length(Parallel),
ok(1)=isempty(strfind(Parallel(indPC).RemoteDirectory,'..'));
if sum(ok)<7,
error('The remote folder path structure does not comply the security standards!'),
end
while (1)
if ~ispc || strcmpi('unix',Parallel(indPC).OperatingSystem)
[stat NonServe] = system(['ssh ',Parallel(indPC).UserName,'@',Parallel(indPC).ComputerName,' rm -fr ',Parallel(indPC).RemoteDirectory,'/',PRCDir,]);

View File

@ -43,7 +43,8 @@ catch
end
fslave = dir( ['slaveParallel_input',int2str(njob),'.mat']);
if isempty(fslave),
fbreak = dir( ['slaveParallel_break.mat']);
if isempty(fslave) || ~isempty(fbreak),
error('Master asked to break the job');
end

View File

@ -81,20 +81,33 @@ try,
% Save the output result.
save([ fname,'_output_',int2str(whoiam),'.mat'],'fOutputVar' )
end
if isfield(fOutputVar,'CloseAllSlaves'),
CloseAllSlaves = 1;
fOutputVar = rmfield(fOutputVar,'CloseAllSlaves');
save([ fname,'_output_',int2str(whoiam),'.mat'],'fOutputVar' )
save(['comp_status_',funcName,int2str(whoiam),'.mat'],'CloseAllSlaves');
end
disp(['fParallel ',int2str(whoiam),' completed.'])
catch,
disp(['fParallel ',int2str(whoiam),' crashed.'])
fOutputVar.error = lasterror;
save([ fname,'_output_',int2str(whoiam),'.mat'],'fOutputVar' )
waitbarString = fOutputVar.error.message;
% waitbarTitle=['Metropolis-Hastings ',options_.parallel(ThisMatlab).ComputerName];
if Parallel(ThisMatlab).Local,
waitbarTitle='Local ';
theerror = lasterror;
if strfind(theerror.message,'Master asked to break the job')
fOutputVar.message = theerror;
save([ fname,'_output_',int2str(whoiam),'.mat'],'fOutputVar' )
waitbarString = theerror.message;
else
waitbarTitle=[Parallel(ThisMatlab).ComputerName];
disp(['fParallel ',int2str(whoiam),' crashed.'])
fOutputVar.error = theerror;
save([ fname,'_output_',int2str(whoiam),'.mat'],'fOutputVar' )
waitbarString = theerror.message;
% waitbarTitle=['Metropolis-Hastings ',options_.parallel(ThisMatlab).ComputerName];
if Parallel(ThisMatlab).Local,
waitbarTitle='Local ';
else
waitbarTitle=[Parallel(ThisMatlab).ComputerName];
end
fMessageStatus(NaN,whoiam,waitbarString, waitbarTitle, Parallel(ThisMatlab));
end
fMessageStatus(NaN,whoiam,waitbarString, waitbarTitle, Parallel(ThisMatlab));
end
diary off;

View File

@ -142,6 +142,7 @@ switch Strategy
save(['temp_input.mat'],'fInputVar')
end
save(['temp_input.mat'],'Parallel','-append')
closeSlave(Parallel,PRCDir,-1);
end
@ -423,6 +424,7 @@ for j=1:totCPU,
if isempty(PRCDirSnapshot{indPC}),
PRCDirSnapshot(indPC)=dynareParallelSnapshot(PRCDir,Parallel(indPC));
PRCDirSnapshotInit(indPC) = PRCDirSnapshot(indPC);
else
PRCDirSnapshot(indPC)=dynareParallelGetNewFiles(PRCDir,Parallel(indPC),PRCDirSnapshot(indPC));
end
@ -453,6 +455,7 @@ end
if Strategy==0 || newInstance, % See above.
PRCDirSnapshot=dynareParallelSnapshot(PRCDir,Parallel(1:totSlaves));
PRCDirSnapshotInit = PRCDirSnapshot;
% Run the slaves.
if ~ispc, %isunix || (~matlab_ver_less_than('7.4') && ismac),
@ -587,6 +590,7 @@ NuoviFilecopiati=zeros(1,totSlaves);
ForEver=1;
statusString = '';
flag_CloseAllSlaves=0;
while (ForEver)
@ -607,6 +611,12 @@ while (ForEver)
try
if ~isempty(['comp_status_',fname,int2str(j),'.mat'])
load(['comp_status_',fname,int2str(j),'.mat']);
% whoCloseAllSlaves = who(['comp_status_',fname,int2str(j),'.mat','CloseAllSlaves']);
if exist('CloseAllSlaves') && flag_CloseAllSlaves==0,
flag_CloseAllSlaves=1;
whoiamCloseAllSlaves=j;
closeSlave(Parallel(1:totSlaves),PRCDir,1);
end
end
pcerdone(j) = prtfrc;
idCPU(j) = njob;
@ -711,11 +721,16 @@ for j=1:totCPU,
for jstack=1:length(fOutputVar.error.stack)
fOutputVar.error.stack(jstack),
end
else
elseif flag_CloseAllSlaves==0,
fOutVar(j)=fOutputVar;
elseif j==whoiamCloseAllSlaves,
fOutVar=fOutputVar;
end
end
if flag_CloseAllSlaves==1,
closeSlave(Parallel(1:totSlaves),PRCDir,-1);
end
if iscrash,
error('Remote jobs crashed');
@ -737,10 +752,11 @@ switch Strategy
[A B C]=rmdir('dynareParallelLogFiles');
mkdir('dynareParallelLogFiles');
end
copyfile('*.log','dynareParallelLogFiles');
delete([fname,'*.log']);
try
copyfile('*.log','dynareParallelLogFiles');
mydelete([fname,'*.log']);
catch
end
mydelete(['*_core*_input*.mat']);
% if Parallel(indPC).Local == 1
% delete(['slaveParallel_input*.mat']);

View File

@ -136,6 +136,13 @@ while (etime(clock,t0)<1200 && ~isempty(fslave)) || ~isempty(dir(['stayalive',in
% Save the output result.
save([ fname,'_output_',int2str(whoiam),'.mat'],'fOutputVar' );
% keyboard,
if isfield(fOutputVar,'CloseAllSlaves'),
CloseAllSlaves = 1;
fOutputVar = rmfield(fOutputVar,'CloseAllSlaves');
save([ fname,'_output_',int2str(whoiam),'.mat'],'fOutputVar' )
save(['comp_status_',funcName,int2str(whoiam),'.mat'],'CloseAllSlaves');
end
% Inform the master that the job is finished, and transfer the output data
delete(['P_',fname,'_',int2str(whoiam),'End.txt']);
@ -143,19 +150,27 @@ while (etime(clock,t0)<1200 && ~isempty(fslave)) || ~isempty(dir(['stayalive',in
disp(['Job ',fname,' on CPU ',int2str(whoiam),' completed.']);
t0 =clock; % Re-set waiting time of 20 mins
catch ME
disp(['Job ',fname,' on CPU ',int2str(whoiam),' crashed.']);
fOutputVar.error = ME;
save([ fname,'_output_',int2str(whoiam),'.mat'],'fOutputVar' );
waitbarString = fOutputVar.error.message;
if Parallel(ThisMatlab).Local,
waitbarTitle='Local ';
catch,
theerror = lasterror;
if strfind(theerror.message,'Master asked to break the job')
disp(['Job ',fname,' on CPU ',int2str(whoiam),' broken from master.']);
fOutputVar.message = theerror;
save([ fname,'_output_',int2str(whoiam),'.mat'],'fOutputVar' )
delete(['P_',fname,'_',int2str(whoiam),'End.txt']);
else
waitbarTitle=[Parallel(ThisMatlab).ComputerName];
disp(['Job ',fname,' on CPU ',int2str(whoiam),' crashed.']);
fOutputVar.error = lasterror;
save([ fname,'_output_',int2str(whoiam),'.mat'],'fOutputVar' );
waitbarString = fOutputVar.error.message;
if Parallel(ThisMatlab).Local,
waitbarTitle='Local ';
else
waitbarTitle=[Parallel(ThisMatlab).ComputerName];
end
fMessageStatus(NaN,whoiam,waitbarString, waitbarTitle, Parallel(ThisMatlab));
delete(['P_',fname,'_',int2str(whoiam),'End.txt']);
break
end
fMessageStatus(NaN,whoiam,waitbarString, waitbarTitle, Parallel(ThisMatlab));
delete(['P_',fname,'_',int2str(whoiam),'End.txt']);
break
end
end