270 likes | 380 Views
This document describes a powerful extensible directory-walking macro designed to search through directories and subdirectories for files based on specific attributes like size and name contents. With a focus on recursive implementation, the macro allows customization via pluggable macros to enhance functionality. This approach is particularly beneficial for managing large or unfamiliar file systems, automating repetitive tasks, and identifying duplicate files. Techniques demonstrated include the use of recursion and specifying file attributes effectively. The document also covers key Linux tools and programming concepts. ###
E N D
Cary Miller DSUG Colorado Day September 13, 2007 An extensible directory-walking macro
The problem • Search a directory and all subdirectories for files with certain attributes such as • Large files • Files with a specific string in the file name • Files containing a specific string inside
Why? • Large file system • Unfamiliar system (new job) • Human memory failure • Automation/repetition • Duplicates Linux tools (grep, etc)
How to walk a directory • Recursion with a generic macro • Customize behavior with • Pluggable macros • Enclosing macros • Tools used • File functions • String functions • Modularity
Recursion • Simple in concept, tricky in practice • A macro that calls itself • Factorial • Fibonacci
%macro factorial(n); %if &n GT 1%then%eval(&n*%factorial(%eval(&n-1))) %else1 %mend; %let x = %factorial(1); *ok; %let x = %factorial(2); *ok; %let x = %factorial(3); *ok; %put &x; %put %factorial(5);
%macro dirWalk(fname); %if &fname is a directory %then; for each &file in &fname %dirWalk(&fname\&file); %ELSE*&fname is an ordinary file; %put &fname; %mend dirWalk;
%macro dirWalk(fname); %local i filrf rc did memcnt; * Try to open this fname as a directory; %let rc=%sysfunc(filename(filrf,&fname)); %let did=%sysfunc(dopen(&filrf)); %if &did > 0%then%do; * seems to be a directory, so walk it; %let memcnt=%sysfunc(dnum(&did)); %if &memcnt > 0%then%do i=1%to &memcnt; %let name=&fname/%sysfunc(dread(&did,&i)); %dirWalk(&name); %end; %let rc=%sysfunc(dclose(&did)); %end; %ELSE %PUT &fname; %mend dirWalk;
180 %dirwalk(s:\foo); s:\foo s:\foo/c.txt s:\foo/d.sas s:\foo/a s:\foo/a/x.txt s:\foo/a/y.txt s:\foo/b
Recursion works! • Add functionality by passing custom macro
%macro dirWalk(fname, fileMacro); %if &fname is a directory %then; for each &file in &fname %dirWalk(&fname\&file); %ELSE*&fname is an ordinary file; %&fileMacro(&fname); %mend dirWalk; %macro parrot(fname); %put ..... &fname ...... parrot; %mend;
Great • Same result as the code that did not pass a macro name. • So why go to the extra trouble? • Because we want to pass a non-trivial macro.
%macro parrot(fname); %put ..... &fname ...... parrot; %mend; %macro inFileName(fname, substr); /* If substr is in fname print fname. */ %if%index(&fname,&substr) GT 0 %then%put &fname; %mend;
How to pass a macro that accepts parameters? %macro dirWalk(fname, fileMacro, macroParams); %if &fname is a directory %then; for each &file in &fname %dirWalk(&fname\&fname); %ELSE*&fname is an ordinary file; %&fileMacro(&fname, ¯oParams); %mend dirWalk; %macro parrot(fname, ignore); %put ..... &fname ...... parrot; %mend;
146 %dirwalk(s:\foo, fileMacro=parrot, macroParams=nothing); ..... s:\foo\c.txt ...... parrot ..... s:\foo\d.sas ...... parrot ..... s:\foo\a\x.txt ...... parrot ..... s:\foo\a\y.txt ...... parrot 148 %dirwalk(s:\~Cary\, fileMacro=inFileName, macroParams=txt); s:\~Cary\\bak\phone.txt s:\~Cary\\sas\sas.formats\win32user.txt s:\~Cary\\sas.items\bak\sas.item.macro.txt s:\~Cary\\sas.mm\notes.txt s:\~Cary\\to.go\data\RN99060.txt s:\~Cary\\to.go\data\states.54.txt s:\~Cary\\to.go\sas.cfmc\reports.1\plan.txt s:\~Cary\\vb.code\newReport.txt s:\~Cary\\vb.code\report.code.txt
It works! • But the macro call is unwieldy • Enclose the call in another macro
%macro listAll(dirName); %dirwalk(&dirName, fileMacro=parrot, macroParams=nothing); %mend; %macro subInFN(dirName, substring); %dirwalk(&dirName, fileMacro=inFileName, macroParams=&substring); %mend;
146 %dirwalk(s:\foo, fileMacro=parrot, macroParams=nothing); 146 %listAll(s:\foo); ..... s:\foo\c.txt ...... parrot ..... s:\foo\d.sas ...... parrot ..... s:\foo\a\x.txt ...... parrot ..... s:\foo\a\y.txt ...... parrot 148 %dirwalk(s:\~Cary\, fileMacro=inFileName, macroParams=txt); 148 %subInFname(s:\~Cary\, substring=txt); s:\~Cary\\bak\phone.txt s:\~Cary\\sas\sas.formats\win32user.txt s:\~Cary\\sas.items\bak\sas.item.macro.txt s:\~Cary\\sas.mm\notes.txt s:\~Cary\\to.go\data\RN99060.txt s:\~Cary\\to.go\data\states.54.txt s:\~Cary\\to.go\sas.cfmc\reports.1\plan.txt s:\~Cary\\vb.code\newReport.txt s:\~Cary\\vb.code\report.code.txt
That’s it! • Adding plug-ins and wrappers is easy
Find big files %macro findBigFile(fname, sizeCutoff); /* If the file is big print out its name and size. */ %let fsize = %fileDataParse(&fname, fsize); %if &fsize GT &sizeCutoff %then %put &fsize &fname; %mend; %macro findBig(dirName, cutoff=10000); %dirwalk(&dirName, fileMacro=findBigFile, macroParams=&cutoff); %mend;
%macro fileData(filename); /* Grab the result of a DOS 'dir' command and return it. */ %local fname fid str rc command; %let command = dir &filename; %let rc=%sysfunc(filename(fname,&command,pipe)); %let fid=%sysfunc(fopen(&fname,s)); %do %while(%sysfunc(fread(&fid)) EQ 0); %let rc=%sysfunc(fget(&fid,str,200)); %let ind = %index("&str", %filebase(&filename)); %if &ind EQ 0 %then %let str=; %else %let res=&str; &str %end; %let rc=%sysfunc(fclose(&fid)); %mend; /*%let foo = %fileData(&testFile1); %put &foo;*/
%macro fileDataParse(filename, attribute); /* Parse the line with the data. Grab the pieces. Return one piece. */ /* This could probably be done better. */ %let str = %fileData(&filename); %let str = %sysfunc(compress("&str", ',')); /*remove commas*/ %let str = %sysfunc(compbl(&str)); /*reduce space*/ %let len = %eval(%sysfunc(length(&str)) -2); %let str = %substr(&str,2,&len); /*remove enclosing quotes*/ %let modDate = %sysfunc(scan(&str,1, ' ')); %let modTime = %sysfunc(scan(&str,2, ' ')) %sysfunc(scan(&str,3, ' ')); %let fsize = %sysfunc(scan(&str,4, ' ')); %let fname = %scan(&str,5, ' '); &&&attribute. %mend; /*%let fs = %fileDataParse(&testFile1, fsize); %put fs &fs;*/ /*%let fn = %fileDataParse(&testFile1, fname); %put fn &fn;*/ /*%let md = %fileDataParse(&testFile1, modDate); %put md &md;*/ /*%let mt = %fileDataParse(&testFile1, modTime); %put mt &mt;*/
%findBig(&testDir2, cutoff=1000); 7338 annotate.png 3161 sas.item.macros.sas 17408 sas.items.xls 3216 dirwalk.1.sas 5142 dirwalk.2.sas 12485 recursion.1.sas 11125 recursion.sas 4585 recursion.whitlock.sas Big file example
String in file example %strInFile(s:\~Cary, goto, line) s:\~Cary\sas.items\flatten.sas :::: %if &i = 3 %then %goto continue ;
Same strategy works for common analytic tasks • Frequency tables • Histograms • etc
If you don’t want to reinvent the wheel • Roland Rashleigh-Berry has similar macros • DataSavantConsulting.com • SAS-L