/* lextract.c Norbert H. Doerry SYNTAX: lextract infile -sXXX -cXX -i -rXX -oOutfile where infile = Name of input ascii flat file -sXXX = XXX is the search string -cXX = XX is the column to look for the search string, if omitted, searches the entire record (line) -i = ignore case of search string -rXX = record length, including terminating newline if present if omitted, assumes lines are terminated with newlines Outfile = Name of ascii output file if omitted, output is written to stdout NOTE: A column is the same as the character offset from the start of the record. Version 1.0 of May 1997 Version 1.0a of June 2002: added GNU GPL and recompiled with 32 bit compiler --------------------------------------------------------------- This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. --------------------------------------------------------------- If you discover any bugs, or have any questions concerning these programs, please send me an email (doerry@aol.com) */ #include #include #include #include #define VERSION "1.0a" #define MAXCHAR 2047 typedef struct Lextract { char *infile; FILE *in; char *outfile; FILE *out; char *search_string; int search_column; int ignore_flag; int record_length; int debug; char *rdline; } LEXTRACT; void initialize_data(LEXTRACT *); void process_command_line(LEXTRACT *,int,char **); void print_help(FILE *); void print_error(void); char *copystr(char *); int strncmpa(char *, char *, size_t ); int read_line(LEXTRACT *); int check_line(LEXTRACT *); int main(int argc, char ** argv) { LEXTRACT le; initialize_data(&le); process_command_line(&le,argc,argv); while (read_line(&le)) { if (check_line(&le)) fputs(le.rdline,le.out); } if (le.in != stdin) fclose(le.in); if (le.out != stdout) fclose(le.out); return EXIT_SUCCESS; } void initialize_data(LEXTRACT *le) { le->infile = (char *) NULL; le->in = (FILE *) NULL; le->outfile = (char *) NULL; le->out = (FILE *) NULL; le->search_string = (char *) NULL; le->search_column = 0; le->ignore_flag = 0; le->record_length = 0; le->debug = 0; le->rdline = (char *) NULL; } void process_command_line(LEXTRACT *le,int argc,char **argv) { int i; for (i = 1 ; i < argc ; i++) { /* see if command line switch */ if (argv[i][0] == '-' || argv[i][0] == '/') { if (argv[i][1] == 's' || argv[i][1] == 'S') { if (le->search_string != (char *) NULL) { fprintf(stderr," *** ERROR : Search String defined more than once\n"); print_error(); } le->search_string = copystr(argv[i]+2); } else if (argv[i][1] == 'c' || argv[i][1] == 'C') { le->search_column = atoi(argv[i] + 2); if (le->search_column < 0) le->search_column = 0; } else if (argv[i][1] == 'i' || argv[i][1] == 'I') { le->ignore_flag = 1; } else if (argv[i][1] == 'r' || argv[i][1] == 'R') { le->record_length = atoi(argv[i] + 2); if (le->record_length < 0) le->record_length = 0; } else if (argv[i][1] == 'o' || argv[i][1] == 'O') { if (le->outfile != (char *) NULL) { fprintf(stderr," *** ERROR : Output file defined more than once\n"); print_error(); } le->outfile = copystr(argv[i] + 2); if (le->outfile[0] == (char ) NULL) { fprintf(stderr," *** ERROR: Output file not specified\n"); print_error(); } if (le->infile != (char *) NULL) { /* NOTE This check will only work if the input file comes before the output file */ if (strncmpa(le->infile,le->outfile,strlen(le->outfile)) == 0) { fprintf(stderr," *** ERROR: Input and Output filenames are identical\n"); print_error(); } } le->out = fopen(le->outfile,"wt"); if (le->out == (FILE *) NULL) { fprintf(stderr," *** ERROR : Unable to open output file %s\n", le->outfile); exit(EXIT_FAILURE); } } else if (argv[i][1] == '?' || argv[i][1] == 'h' || argv[i][1] == 'H') { print_help(stdout); exit(EXIT_SUCCESS); } else if (argv[i][1] == 'd' || argv[i][1] == 'D') { /* debug flag */ le->debug = 1; } else { /* error */ fprintf(stderr," *** ERROR : Unknown option (%s)\n",argv[i]); print_error(); } } else { /* input filename */ if (le->infile != (char *) NULL) { fprintf(stderr," *** ERROR : Input file defined more than once\n"); print_error(); } le->infile = copystr(argv[i]); if (le->outfile != (char *) NULL) { /* NOTE This check will only work if the output file comes before the input file. One can also fool this check by using different paths to the same file */ if (strncmpa(le->infile,le->outfile,strlen(le->infile)) == 0) { fprintf(stderr," *** ERROR: Input and Output filenames are identical\n"); print_error(); } } le->in = fopen(le->infile,"rt"); if (le->in == (FILE *) NULL) { fprintf(stderr," *** ERROR : Unable to open input file %s\n", le->infile); exit(EXIT_FAILURE); } } } /* check for default conditions */ if (le->out == (FILE *) NULL) { le->out = stdout; le->outfile = copystr(""); } if (le->in == (FILE *) NULL) { le->in = stdin; le->infile = copystr(""); } /* print the debug information */ if (le->debug) { printf(" infile = %s\n",le->infile); printf(" outfile = %s\n",le->outfile); printf(" search_string = (%s)\n",le->search_string); printf(" search_column = %d\n",le->search_column); printf(" ignore_flag = %d\n",le->ignore_flag); printf(" record_length = %d\n",le->record_length); } /* see if a search string was specified */ if (le->search_string == (char *) NULL) { fprintf(stderr," *** ERROR : Search string not specified\n"); print_error(); } /* convert the search string to lower case if ignore flag is set */ if (le->ignore_flag) { for (i = 0 ; le->search_string[i] != (char) NULL ; i++) le->search_string[i] = (char) tolower((int)le->search_string[i]); } /* allocate the read buffer */ if (le->record_length > 0) le->rdline = (char *) calloc((size_t)(le->record_length + 1),sizeof(char)); else le->rdline = (char *) calloc((size_t)(MAXCHAR + 1),sizeof(char)); if (le->rdline == (char *) NULL) { fprintf(stderr," *** FATAL ERROR, out of memory\n"); exit(EXIT_FAILURE); } } void print_error(void) { print_help(stderr); exit(EXIT_FAILURE); } void print_help(FILE *out) { fprintf(out," \n"); fprintf(out," LEXTRACT.EXE version %s (%s)\n",VERSION,__DATE__); fprintf(out," COPYRIGHT (C) 1997-2002 Norbert H. Doerry\n\n"); fprintf(out," SYNTAX:\n\n"); fprintf(out," lextract infile -sXXX -cXX -i -rXX -oOutfile\n\n"); fprintf(out," where\n\n"); fprintf(out," infile = Name of input ascii flat file\n"); fprintf(out," -sXXX = XXX is the search string\n"); fprintf(out," -cXX = XX is the column to look for the search string\n"); fprintf(out," if omitted, searches the entire record (line)\n"); fprintf(out," -i = ignore case of search string\n"); fprintf(out," -rXX = record length, including terminating newline if present\n"); fprintf(out," if omitted, assumes lines are terminated with newlines\n"); fprintf(out," Outfile = Name of ascii output file\n"); fprintf(out," if omitted, output is written to stdout\n"); } /* copystr() allocates a block of memory and copies the string passed to it. It returns the starting address of the string, or exits with EXIT_FAILURE if unable to allocate the memory. Use the free() function to unallocate the memory */ char *copystr(char *string) { char *s; s = (char *) calloc(strlen(string) + 1,sizeof(char)); if (s == (char *) NULL) { fprintf(stderr," *** FATAL ERROR : Out of Memory in copystr()\n"); exit(EXIT_FAILURE); } strcpy(s,string); return s; } /* strncmpa() is identical to the strncmp() function except that the comparison is case insensitive */ int strncmpa(char *s1,char *s2,size_t n) { int ans; char *ps1, *ps2; int i; ps1 = copystr(s1); ps2 = copystr(s2); for (i = 0 ; ps1[i] != (char) NULL && i < (int) n ; i++) ps1[i] = (char) tolower((int)ps1[i]); for (i = 0 ; ps2[i] != (char) NULL && i < (int) n ; i++) ps2[i] = (char) tolower((int)ps2[i]); ans = strncmp(ps1,ps2,n); free((void *) ps1); free((void *) ps2); return ans; } /* read_line() reads in a line from the input file and stores it in the rdline element, it returns 1 if a successful read has occured, and 0 if the EOF was reached */ int read_line(LEXTRACT *le) { int i; /* see if newline delimited, note MAXCHAR is the maximum size of the line */ if (le->record_length == 0) { if (fgets(le->rdline,MAXCHAR+1,le->in) == (char *) NULL) return 0; else return 1; } for (i = 0 ; i < le->record_length ; i++) { le->rdline[i] = (char) fgetc(le->in); /* read in a character */ if (le->rdline[i] == EOF) /* see if at the end of the file */ return 0; } le->rdline[i] = (char) NULL; /* NULL terminate */ return 1; } /* returns 1 if the search string was found and 0 if not found */ int check_line(LEXTRACT *le) { int i; char *ch; /* see if no search column was specified */ if (le->search_column == 0) { ch = copystr(le->rdline); /* see if the ignore flag is set, if so, convert all to lower case */ if (le->ignore_flag == 1) { for (i = 0 ; ch[i] != (char) NULL ; i++) ch[i] = (char) tolower((int)ch[i]); } if (strstr(ch,le->search_string) == (char *) NULL) { free((void *) ch); return 0; } else { free((void *) ch); return 1; } } /* otherwise look at the appropriate column */ if ((int) strlen(le->rdline) < le->search_column) return 0; /* see if ignore flag is set */ if (le->ignore_flag == 1) { if (strncmpa(le->rdline + le->search_column - 1, le->search_string , strlen(le->search_string)) == 0) return 1; else return 0; } if (strncmp(le->rdline + le->search_column - 1, le->search_string , strlen(le->search_string)) == 0) return 1; return 0; }