Main Page   Class Hierarchy   Compound List   File List   Compound Members   File Members   Related Pages  

cpl_csv.cpp

00001 /******************************************************************************
00002  * $Id: cpl_csv_cpp-source.html,v 1.3 2000/11/06 04:49:01 warmerda Exp $
00003  *
00004  * Project:  CPL - Common Portability Library
00005  * Purpose:  CSV (comma separated value) file access.
00006  * Author:   Frank Warmerdam, warmerda@home.com
00007  *
00008  ******************************************************************************
00009  * Copyright (c) 1999, Frank Warmerdam
00010  *
00011  * Permission is hereby granted, free of charge, to any person obtaining a
00012  * copy of this software and associated documentation files (the "Software"),
00013  * to deal in the Software without restriction, including without limitation
00014  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
00015  * and/or sell copies of the Software, and to permit persons to whom the
00016  * Software is furnished to do so, subject to the following conditions:
00017  *
00018  * The above copyright notice and this permission notice shall be included
00019  * in all copies or substantial portions of the Software.
00020  *
00021  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
00022  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00023  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
00024  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00025  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
00026  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
00027  * DEALINGS IN THE SOFTWARE.
00028  ******************************************************************************
00029  *
00030  * $Log: cpl_csv_cpp-source.html,v $
00030  * Revision 1.3  2000/11/06 04:49:01  warmerda
00030  * *** empty log message ***
00030  *
00031  * Revision 1.1  2000/10/06 15:20:45  warmerda
00032  * New
00033  *
00034  * Revision 1.2  2000/08/29 21:08:08  warmerda
00035  * fallback to use CPLFindFile()
00036  *
00037  * Revision 1.1  2000/04/05 21:55:59  warmerda
00038  * New
00039  *
00040  */
00041 
00042 #include "cpl_csv.h"
00043 #include "cpl_conv.h"
00044 
00045 /* ==================================================================== */
00046 /*      The CSVTable is a persistant set of info about an open CSV      */
00047 /*      table.  While it doesn't currently maintain a record index,     */
00048 /*      or in-memory copy of the table, it could be changed to do so    */
00049 /*      in the future.                                                  */
00050 /* ==================================================================== */
00051 typedef struct ctb {
00052     FILE        *fp;
00053 
00054     struct ctb *psNext;
00055 
00056     char        *pszFilename;
00057 
00058     char        **papszFieldNames;
00059 
00060     char        **papszRecFields;
00061 } CSVTable;
00062 
00063 static CSVTable *psCSVTableList = NULL;
00064 
00065 /************************************************************************/
00066 /*                             CSVAccess()                              */
00067 /*                                                                      */
00068 /*      This function will fetch a handle to the requested table.       */
00069 /*      If not found in the ``open table list'' the table will be       */
00070 /*      opened and added to the list.  Eventually this function may     */
00071 /*      become public with an abstracted return type so that            */
00072 /*      applications can set options about the table.  For now this     */
00073 /*      isn't done.                                                     */
00074 /************************************************************************/
00075 
00076 static CSVTable *CSVAccess( const char * pszFilename )
00077 
00078 {
00079     CSVTable    *psTable;
00080     FILE        *fp;
00081 
00082 /* -------------------------------------------------------------------- */
00083 /*      Is the table already in the list.                               */
00084 /* -------------------------------------------------------------------- */
00085     for( psTable = psCSVTableList; psTable != NULL; psTable = psTable->psNext )
00086     {
00087         if( EQUAL(psTable->pszFilename,pszFilename) )
00088         {
00089             /*
00090              * Eventually we should consider promoting to the front of
00091              * the list to accelerate frequently accessed tables.
00092              */
00093             
00094             return( psTable );
00095         }
00096     }
00097 
00098 /* -------------------------------------------------------------------- */
00099 /*      If not, try to open it.                                         */
00100 /* -------------------------------------------------------------------- */
00101     fp = VSIFOpen( pszFilename, "r" );
00102     if( fp == NULL )
00103         return NULL;
00104 
00105 /* -------------------------------------------------------------------- */
00106 /*      Create an information structure about this table, and add to    */
00107 /*      the front of the list.                                          */
00108 /* -------------------------------------------------------------------- */
00109     psTable = (CSVTable *) CPLCalloc(sizeof(CSVTable),1);
00110 
00111     psTable->fp = fp;
00112     psTable->pszFilename = CPLStrdup( pszFilename );
00113     psTable->psNext = psCSVTableList;
00114     
00115     psCSVTableList = psTable;
00116 
00117 /* -------------------------------------------------------------------- */
00118 /*      Read the table header record containing the field names.        */
00119 /* -------------------------------------------------------------------- */
00120     psTable->papszFieldNames = CSVReadParseLine( fp );
00121 
00122     return( psTable );
00123 }
00124 
00125 /************************************************************************/
00126 /*                            CSVDeaccess()                             */
00127 /************************************************************************/
00128 
00129 void CSVDeaccess( const char * pszFilename )
00130 
00131 {
00132     CSVTable    *psLast, *psTable;
00133     
00134 /* -------------------------------------------------------------------- */
00135 /*      A NULL means deaccess all tables.                               */
00136 /* -------------------------------------------------------------------- */
00137     if( pszFilename == NULL )
00138     {
00139         while( psCSVTableList != NULL )
00140             CSVDeaccess( psCSVTableList->pszFilename );
00141         
00142         return;
00143     }
00144 
00145 /* -------------------------------------------------------------------- */
00146 /*      Find this table.                                                */
00147 /* -------------------------------------------------------------------- */
00148     psLast = NULL;
00149     for( psTable = psCSVTableList;
00150          psTable != NULL && !EQUAL(psTable->pszFilename,pszFilename);
00151          psTable = psTable->psNext )
00152     {
00153         psLast = psTable;
00154     }
00155 
00156     if( psTable == NULL )
00157         return;
00158 
00159 /* -------------------------------------------------------------------- */
00160 /*      Remove the link from the list.                                  */
00161 /* -------------------------------------------------------------------- */
00162     if( psLast != NULL )
00163         psLast->psNext = psTable->psNext;
00164     else
00165         psCSVTableList = psTable->psNext;
00166 
00167 /* -------------------------------------------------------------------- */
00168 /*      Free the table.                                                 */
00169 /* -------------------------------------------------------------------- */
00170     VSIFClose( psTable->fp );
00171 
00172     CSLDestroy( psTable->papszFieldNames );
00173     CSLDestroy( psTable->papszRecFields );
00174     CPLFree( psTable->pszFilename );
00175 
00176     CPLFree( psTable );
00177 }
00178 
00179 /************************************************************************/
00180 /*                          CSVReadParseLine()                          */
00181 /*                                                                      */
00182 /*      Read one line, and return split into fields.  The return        */
00183 /*      result is a stringlist, in the sense of the CSL functions.      */
00184 /************************************************************************/
00185 
00186 char **CSVReadParseLine( FILE * fp )
00187 
00188 {
00189     const char  *pszLine;
00190     char        *pszWorkLine;
00191     char        **papszReturn;
00192 
00193     CPLAssert( fp != NULL );
00194     if( fp == NULL )
00195         return( NULL );
00196     
00197     pszLine = CPLReadLine( fp );
00198     if( pszLine == NULL )
00199         return( NULL );
00200 
00201 /* -------------------------------------------------------------------- */
00202 /*      If there are no quotes, then this is the simple case.           */
00203 /*      Parse, and return tokens.                                       */
00204 /* -------------------------------------------------------------------- */
00205     if( strchr(pszLine,'\"') == NULL )
00206         return CSLTokenizeStringComplex( pszLine, ",", TRUE, TRUE );
00207 
00208 /* -------------------------------------------------------------------- */
00209 /*      We must now count the quotes in our working string, and as      */
00210 /*      long as it is odd, keep adding new lines.                       */
00211 /* -------------------------------------------------------------------- */
00212     pszWorkLine = CPLStrdup( pszLine );
00213 
00214     while( TRUE )
00215     {
00216         int             i, nCount = 0;
00217 
00218         for( i = 0; pszWorkLine[i] != '\0'; i++ )
00219         {
00220             if( pszWorkLine[i] == '\"'
00221                 && (i == 0 || pszWorkLine[i-1] != '\\') )
00222                 nCount++;
00223         }
00224 
00225         if( nCount % 2 == 0 )
00226             break;
00227 
00228         pszLine = CPLReadLine( fp );
00229         if( pszLine == NULL )
00230             break;
00231 
00232         pszWorkLine = (char *)
00233             CPLRealloc(pszWorkLine,
00234                        strlen(pszWorkLine) + strlen(pszLine) + 1);
00235         strcat( pszWorkLine, pszLine );
00236     }
00237     
00238     papszReturn = CSLTokenizeStringComplex( pszWorkLine, ",", TRUE, TRUE );
00239 
00240     CPLFree( pszWorkLine );
00241 
00242     return papszReturn;
00243 }
00244 
00245 /************************************************************************/
00246 /*                             CSVCompare()                             */
00247 /*                                                                      */
00248 /*      Compare a field to a search value using a particular            */
00249 /*      criteria.                                                       */
00250 /************************************************************************/
00251 
00252 static int CSVCompare( const char * pszFieldValue, const char * pszTarget,
00253                        CSVCompareCriteria eCriteria )
00254 
00255 {
00256     if( eCriteria == CC_ExactString )
00257     {
00258         return( strcmp( pszFieldValue, pszTarget ) == 0 );
00259     }
00260     else if( eCriteria == CC_ApproxString )
00261     {
00262         return( EQUAL( pszFieldValue, pszTarget ) );
00263     }
00264     else if( eCriteria == CC_Integer )
00265     {
00266         return( atoi(pszFieldValue) == atoi(pszTarget) );
00267     }
00268 
00269     return FALSE;
00270 }
00271 
00272 /************************************************************************/
00273 /*                            CSVScanLines()                            */
00274 /*                                                                      */
00275 /*      Read the file scanline for lines where the key field equals     */
00276 /*      the indicated value with the suggested comparison criteria.     */
00277 /*      Return the first matching line split into fields.               */
00278 /************************************************************************/
00279 
00280 char **CSVScanLines( FILE *fp, int iKeyField, const char * pszValue,
00281                      CSVCompareCriteria eCriteria )
00282 
00283 {
00284     char        **papszFields = NULL;
00285     int         bSelected = FALSE, nTestValue;
00286 
00287     CPLAssert( pszValue != NULL );
00288     CPLAssert( iKeyField >= 0 );
00289     CPLAssert( fp != NULL );
00290     
00291     nTestValue = atoi(pszValue);
00292     
00293     while( !bSelected ) {
00294         papszFields = CSVReadParseLine( fp );
00295         if( papszFields == NULL )
00296             return( NULL );
00297 
00298         if( CSLCount( papszFields ) < iKeyField+1 )
00299         {
00300             /* not selected */
00301         }
00302         else if( eCriteria == CC_Integer
00303                  && atoi(papszFields[iKeyField]) == nTestValue )
00304         {
00305             bSelected = TRUE;
00306         }
00307         else
00308         {
00309             bSelected = CSVCompare( papszFields[iKeyField], pszValue,
00310                                     eCriteria );
00311         }
00312 
00313         if( !bSelected )
00314         {
00315             CSLDestroy( papszFields );
00316             papszFields = NULL;
00317         }
00318     }
00319     
00320     return( papszFields );
00321 }
00322 
00323 /************************************************************************/
00324 /*                            CSVScanFile()                             */
00325 /*                                                                      */
00326 /*      Scan a whole file using criteria similar to above, but also     */
00327 /*      taking care of file opening and closing.                        */
00328 /************************************************************************/
00329 
00330 char **CSVScanFile( const char * pszFilename, int iKeyField,
00331                     const char * pszValue, CSVCompareCriteria eCriteria )
00332 
00333 {
00334     CSVTable    *psTable;
00335 
00336 /* -------------------------------------------------------------------- */
00337 /*      Get access to the table.                                        */
00338 /* -------------------------------------------------------------------- */
00339     CPLAssert( pszFilename != NULL );
00340 
00341     if( iKeyField < 0 )
00342         return NULL;
00343 
00344     psTable = CSVAccess( pszFilename );
00345     if( psTable == NULL )
00346         return NULL;
00347 
00348 /* -------------------------------------------------------------------- */
00349 /*      Does the current record match the criteria?  If so, just        */
00350 /*      return it again.                                                */
00351 /* -------------------------------------------------------------------- */
00352     if( iKeyField >= 0
00353         && iKeyField < CSLCount(psTable->papszRecFields)
00354         && CSVCompare(pszValue,psTable->papszRecFields[iKeyField],eCriteria) )
00355     {
00356         return psTable->papszRecFields;
00357     }
00358 
00359 /* -------------------------------------------------------------------- */
00360 /*      Scan the file from the beginning, replacing the ``current       */
00361 /*      record'' in our structure with the one that is found.           */
00362 /* -------------------------------------------------------------------- */
00363     VSIRewind( psTable->fp );
00364     CPLReadLine( psTable->fp );         /* throw away the header line */
00365     
00366     CSLDestroy( psTable->papszRecFields );
00367     psTable->papszRecFields =
00368         CSVScanLines( psTable->fp, iKeyField, pszValue, eCriteria );
00369 
00370     return( psTable->papszRecFields );
00371 }
00372 
00373 /************************************************************************/
00374 /*                           CPLGetFieldId()                            */
00375 /*                                                                      */
00376 /*      Read the first record of a CSV file (rewinding to be sure),     */
00377 /*      and find the field with the indicated name.  Returns -1 if      */
00378 /*      it fails to find the field name.  Comparison is case            */
00379 /*      insensitive, but otherwise exact.  After this function has      */
00380 /*      been called the file pointer will be positioned just after      */
00381 /*      the first record.                                               */
00382 /************************************************************************/
00383 
00384 int CSVGetFieldId( FILE * fp, const char * pszFieldName )
00385 
00386 {
00387     char        **papszFields;
00388     int         i;
00389     
00390     CPLAssert( fp != NULL && pszFieldName != NULL );
00391 
00392     VSIRewind( fp );
00393 
00394     papszFields = CSVReadParseLine( fp );
00395     for( i = 0; papszFields != NULL && papszFields[i] != NULL; i++ )
00396     {
00397         if( EQUAL(papszFields[i],pszFieldName) )
00398         {
00399             CSLDestroy( papszFields );
00400             return i;
00401         }
00402     }
00403 
00404     CSLDestroy( papszFields );
00405 
00406     return -1;
00407 }
00408 
00409 /************************************************************************/
00410 /*                         CSVGetFileFieldId()                          */
00411 /*                                                                      */
00412 /*      Same as CPLGetFieldId(), except that we get the file based      */
00413 /*      on filename, rather than having an existing handle.             */
00414 /************************************************************************/
00415 
00416 int CSVGetFileFieldId( const char * pszFilename, const char * pszFieldName )
00417 
00418 {
00419     CSVTable    *psTable;
00420     int         i;
00421     
00422 /* -------------------------------------------------------------------- */
00423 /*      Get access to the table.                                        */
00424 /* -------------------------------------------------------------------- */
00425     CPLAssert( pszFilename != NULL );
00426 
00427     psTable = CSVAccess( pszFilename );
00428     if( psTable == NULL )
00429         return -1;
00430 
00431 /* -------------------------------------------------------------------- */
00432 /*      Find the requested field.                                       */
00433 /* -------------------------------------------------------------------- */
00434     for( i = 0;
00435          psTable->papszFieldNames != NULL
00436              && psTable->papszFieldNames[i] != NULL;
00437          i++ )
00438     {
00439         if( EQUAL(psTable->papszFieldNames[i],pszFieldName) )
00440         {
00441             return i;
00442         }
00443     }
00444 
00445     return -1;
00446 }
00447 
00448 
00449 /************************************************************************/
00450 /*                         CSVScanFileByName()                          */
00451 /*                                                                      */
00452 /*      Same as CSVScanFile(), but using a field name instead of a      */
00453 /*      field number.                                                   */
00454 /************************************************************************/
00455 
00456 char **CSVScanFileByName( const char * pszFilename,
00457                           const char * pszKeyFieldName,
00458                           const char * pszValue, CSVCompareCriteria eCriteria )
00459 
00460 {
00461     int         iKeyField;
00462 
00463     iKeyField = CSVGetFileFieldId( pszFilename, pszKeyFieldName );
00464     if( iKeyField == -1 )
00465         return NULL;
00466 
00467     return( CSVScanFile( pszFilename, iKeyField, pszValue, eCriteria ) );
00468 }
00469 
00470 /************************************************************************/
00471 /*                            CSVGetField()                             */
00472 /*                                                                      */
00473 /*      The all-in-one function to fetch a particular field value       */
00474 /*      from a CSV file.  Note this function will return an empty       */
00475 /*      string, rather than NULL if it fails to find the desired        */
00476 /*      value for some reason.  The caller can't establish that the     */
00477 /*      fetch failed.                                                   */
00478 /************************************************************************/
00479 
00480 const char *CSVGetField( const char * pszFilename,
00481                          const char * pszKeyFieldName,
00482                          const char * pszKeyFieldValue,
00483                          CSVCompareCriteria eCriteria,
00484                          const char * pszTargetField )
00485 
00486 {
00487     CSVTable    *psTable;
00488     char        **papszRecord;
00489     int         iTargetField;
00490     
00491 /* -------------------------------------------------------------------- */
00492 /*      Find the table.                                                 */
00493 /* -------------------------------------------------------------------- */
00494     psTable = CSVAccess( pszFilename );
00495     if( psTable == NULL )
00496         return "";
00497 
00498 /* -------------------------------------------------------------------- */
00499 /*      Find the correct record.                                        */
00500 /* -------------------------------------------------------------------- */
00501     papszRecord = CSVScanFileByName( pszFilename, pszKeyFieldName,
00502                                      pszKeyFieldValue, eCriteria );
00503 
00504     if( papszRecord == NULL )
00505         return "";
00506 
00507 /* -------------------------------------------------------------------- */
00508 /*      Figure out which field we want out of this.                     */
00509 /* -------------------------------------------------------------------- */
00510     iTargetField = CSVGetFileFieldId( pszFilename, pszTargetField );
00511     if( iTargetField < 0 )
00512         return "";
00513 
00514     if( iTargetField >= CSLCount( papszRecord ) )
00515         return "";
00516 
00517     return( papszRecord[iTargetField] );
00518 }
00519 
00520 /************************************************************************/
00521 /*                            CSVFilename()                             */
00522 /*                                                                      */
00523 /*      Return the full path to a particular CSV file.  This will       */
00524 /*      eventually be something the application can override.           */
00525 /************************************************************************/
00526 
00527 static const char *(*pfnCSVFilenameHook)(const char *) = NULL;
00528 
00529 const char * CSVFilename( const char *pszBasename )
00530 
00531 {
00532     static char         szPath[512];
00533 
00534     if( pfnCSVFilenameHook == NULL )
00535     {
00536         FILE    *fp = NULL;
00537         const char *pszResult = CPLFindFile( "epsg_csv", pszBasename );
00538 
00539         if( pszResult != NULL )
00540             return pszResult;
00541 
00542         if( getenv("GEOTIFF_CSV") != NULL )
00543         {
00544             sprintf( szPath, "%s/%s", getenv("GEOTIFF_CSV"), pszBasename );
00545         }
00546         else if( (fp = fopen( "csv/horiz_cs.csv", "rt" )) != NULL )
00547         {
00548             sprintf( szPath, "csv/%s", pszBasename );
00549         }
00550         else
00551         {
00552             sprintf( szPath, "/usr/local/share/epsg_csv/%s", pszBasename );
00553         }
00554 
00555         if( fp != NULL )
00556             fclose( fp );
00557         
00558         return( szPath );
00559     }
00560     else
00561         return( pfnCSVFilenameHook( pszBasename ) );
00562 }
00563 
00564 /************************************************************************/
00565 /*                         SetCSVFilenameHook()                         */
00566 /*                                                                      */
00567 /*      Applications can use this to set a function that will           */
00568 /*      massage CSV filenames.                                          */
00569 /************************************************************************/
00570 
00615 void SetCSVFilenameHook( const char *(*pfnNewHook)( const char * ) )
00616 
00617 {
00618     pfnCSVFilenameHook = pfnNewHook;
00619 }

doxygen1.2.3-20001105 Dimitri van Heesch, © 1997-2000