View Single Post
Old Jul 29th, 2005, 11:32 AM   #7
jim mcnamara
Hobbyist Programmer
 
Join Date: Jun 2005
Location: New Mexico
Posts: 228
Rep Power: 4 jim mcnamara is on a distinguished road
You can use grep "123-333-1234" <filename>, it is slow.
We have a C routine that can do a find in about 20 I/O operations or less
searching a fixed record length file - kind of a binary search on a file -
This is a simplified version - i tested it on a file with 8 million records.
test results:
kcsdev:/home/jmcnama> time ffind 000000000 8000000 /tmp/test

real    0m0.21s
user    0m0.00s
sys     0m0.01s
kcsdev:/home/jmcnama> echo $?
1
kcsdev:/home/jmcnama> time ffind 000050000 8000000 /tmp/test

real    0m0.08s
user    0m0.01s
sys     0m0.02s
kcsdev:/home/jmcnama> echo $?
0
kcsdev:/home/jmcnama> time ffind 000000001 8000000 /tmp/test

real    0m0.07s
user    0m0.00s
sys     0m0.02s

code:
/*****************************************
*   ffind.c --
*   find a value in a file using random access
*   usage ffind <value> <nrec> <filename>
*   requires a fixed length record file
*   possibly with a newline for each record,
*   reclen= data + newline
*   returns:
*    0 if the value is found,
*    1 if not found
**********************************/
#define RECL 10

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>

FILE *in=NULL;
char _Wvalue[RECL*2]={0x0};

void usage(void)       /* help */
{
    fprintf(stderr,"%s\n",
        "usage: ffind <value to find> <number records> <filename>");
    exit(EXIT_FAILURE);
}

char  *setfilepos(long pos)  /* read from  in the file*/
{
     if(fseek(in,pos*RECL,SEEK_SET)!=0)
     {  /* note we do not check for EINTR which is possible */
     	perror("Parameter error - incorrect number of records");
     	exit(EXIT_FAILURE);
     }
     memset(_Wvalue,0x0,sizeof(_Wvalue));
     if(fgets(_Wvalue,sizeof(_Wvalue),in)==NULL)
     {
        if(!feof(in))
        {
            perror("File read error");
            exit(EXIT_FAILURE);
        }      
     }
     return _Wvalue;
}

int compar(const char *key,long rec) /* compare key & test value */
{
    char value[RECL+1]={0x0};
    char *p=NULL;

    strcpy(value,setfilepos(rec));
    p=strchr(value,'\n');
    if(p!=NULL) *p=0x0;
    return strcmp(key,value);
}

/* binary search against a fixed Rec Len  file */
int fbsearch(const char *key, long nrecs)
{
    int retval=0;
    long offset=0;
    long guess=0;

    nrecs--;
    guess=(nrecs - offset)/2;
    for(;;)
    {
        retval=compar(key,guess);
        if(retval)
        {
            if(retval>0)
            {
            	offset=guess+1;
            }
            else
            {
                nrecs=guess-1;
            }
            if(offset > nrecs) break;

            guess=offset + (nrecs-offset)/2;
            continue;
        }
        break;
    }
    return retval;
}

int main(int argc, char *argv[])
{
    int result=0;
    if(argc<4)
    {
        usage();
    }
    in=fopen(argv[3],"r");
    if(in==NULL)
    {
        perror("Error opening input file");
        exit(EXIT_FAILURE);
    }
    result=fbsearch(argv[1],atol(argv[2]));
    return (result!=0);
}
jim mcnamara is offline   Reply With Quote