00001 /****************************************************************************** 00002 * $Id: cpl_csv_cpp-source.html,v 1.3 2000/11/06 04:49:01 warmerda Exp $ 00003 * 00004 * Project: CPL - Common Portability Library 00005 * Purpose: CSV (comma separated value) file access. 00006 * Author: Frank Warmerdam, warmerda@home.com 00007 * 00008 ****************************************************************************** 00009 * Copyright (c) 1999, Frank Warmerdam 00010 * 00011 * Permission is hereby granted, free of charge, to any person obtaining a 00012 * copy of this software and associated documentation files (the "Software"), 00013 * to deal in the Software without restriction, including without limitation 00014 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 00015 * and/or sell copies of the Software, and to permit persons to whom the 00016 * Software is furnished to do so, subject to the following conditions: 00017 * 00018 * The above copyright notice and this permission notice shall be included 00019 * in all copies or substantial portions of the Software. 00020 * 00021 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 00022 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 00023 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 00024 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 00025 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 00026 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 00027 * DEALINGS IN THE SOFTWARE. 00028 ****************************************************************************** 00029 * 00030 * $Log: cpl_csv_cpp-source.html,v $ 00030 * Revision 1.3 2000/11/06 04:49:01 warmerda 00030 * *** empty log message *** 00030 * 00031 * Revision 1.1 2000/10/06 15:20:45 warmerda 00032 * New 00033 * 00034 * Revision 1.2 2000/08/29 21:08:08 warmerda 00035 * fallback to use CPLFindFile() 00036 * 00037 * Revision 1.1 2000/04/05 21:55:59 warmerda 00038 * New 00039 * 00040 */ 00041 00042 #include "cpl_csv.h" 00043 #include "cpl_conv.h" 00044 00045 /* ==================================================================== */ 00046 /* The CSVTable is a persistant set of info about an open CSV */ 00047 /* table. While it doesn't currently maintain a record index, */ 00048 /* or in-memory copy of the table, it could be changed to do so */ 00049 /* in the future. */ 00050 /* ==================================================================== */ 00051 typedef struct ctb { 00052 FILE *fp; 00053 00054 struct ctb *psNext; 00055 00056 char *pszFilename; 00057 00058 char **papszFieldNames; 00059 00060 char **papszRecFields; 00061 } CSVTable; 00062 00063 static CSVTable *psCSVTableList = NULL; 00064 00065 /************************************************************************/ 00066 /* CSVAccess() */ 00067 /* */ 00068 /* This function will fetch a handle to the requested table. */ 00069 /* If not found in the ``open table list'' the table will be */ 00070 /* opened and added to the list. Eventually this function may */ 00071 /* become public with an abstracted return type so that */ 00072 /* applications can set options about the table. For now this */ 00073 /* isn't done. */ 00074 /************************************************************************/ 00075 00076 static CSVTable *CSVAccess( const char * pszFilename ) 00077 00078 { 00079 CSVTable *psTable; 00080 FILE *fp; 00081 00082 /* -------------------------------------------------------------------- */ 00083 /* Is the table already in the list. */ 00084 /* -------------------------------------------------------------------- */ 00085 for( psTable = psCSVTableList; psTable != NULL; psTable = psTable->psNext ) 00086 { 00087 if( EQUAL(psTable->pszFilename,pszFilename) ) 00088 { 00089 /* 00090 * Eventually we should consider promoting to the front of 00091 * the list to accelerate frequently accessed tables. 00092 */ 00093 00094 return( psTable ); 00095 } 00096 } 00097 00098 /* -------------------------------------------------------------------- */ 00099 /* If not, try to open it. */ 00100 /* -------------------------------------------------------------------- */ 00101 fp = VSIFOpen( pszFilename, "r" ); 00102 if( fp == NULL ) 00103 return NULL; 00104 00105 /* -------------------------------------------------------------------- */ 00106 /* Create an information structure about this table, and add to */ 00107 /* the front of the list. */ 00108 /* -------------------------------------------------------------------- */ 00109 psTable = (CSVTable *) CPLCalloc(sizeof(CSVTable),1); 00110 00111 psTable->fp = fp; 00112 psTable->pszFilename = CPLStrdup( pszFilename ); 00113 psTable->psNext = psCSVTableList; 00114 00115 psCSVTableList = psTable; 00116 00117 /* -------------------------------------------------------------------- */ 00118 /* Read the table header record containing the field names. */ 00119 /* -------------------------------------------------------------------- */ 00120 psTable->papszFieldNames = CSVReadParseLine( fp ); 00121 00122 return( psTable ); 00123 } 00124 00125 /************************************************************************/ 00126 /* CSVDeaccess() */ 00127 /************************************************************************/ 00128 00129 void CSVDeaccess( const char * pszFilename ) 00130 00131 { 00132 CSVTable *psLast, *psTable; 00133 00134 /* -------------------------------------------------------------------- */ 00135 /* A NULL means deaccess all tables. */ 00136 /* -------------------------------------------------------------------- */ 00137 if( pszFilename == NULL ) 00138 { 00139 while( psCSVTableList != NULL ) 00140 CSVDeaccess( psCSVTableList->pszFilename ); 00141 00142 return; 00143 } 00144 00145 /* -------------------------------------------------------------------- */ 00146 /* Find this table. */ 00147 /* -------------------------------------------------------------------- */ 00148 psLast = NULL; 00149 for( psTable = psCSVTableList; 00150 psTable != NULL && !EQUAL(psTable->pszFilename,pszFilename); 00151 psTable = psTable->psNext ) 00152 { 00153 psLast = psTable; 00154 } 00155 00156 if( psTable == NULL ) 00157 return; 00158 00159 /* -------------------------------------------------------------------- */ 00160 /* Remove the link from the list. */ 00161 /* -------------------------------------------------------------------- */ 00162 if( psLast != NULL ) 00163 psLast->psNext = psTable->psNext; 00164 else 00165 psCSVTableList = psTable->psNext; 00166 00167 /* -------------------------------------------------------------------- */ 00168 /* Free the table. */ 00169 /* -------------------------------------------------------------------- */ 00170 VSIFClose( psTable->fp ); 00171 00172 CSLDestroy( psTable->papszFieldNames ); 00173 CSLDestroy( psTable->papszRecFields ); 00174 CPLFree( psTable->pszFilename ); 00175 00176 CPLFree( psTable ); 00177 } 00178 00179 /************************************************************************/ 00180 /* CSVReadParseLine() */ 00181 /* */ 00182 /* Read one line, and return split into fields. The return */ 00183 /* result is a stringlist, in the sense of the CSL functions. */ 00184 /************************************************************************/ 00185 00186 char **CSVReadParseLine( FILE * fp ) 00187 00188 { 00189 const char *pszLine; 00190 char *pszWorkLine; 00191 char **papszReturn; 00192 00193 CPLAssert( fp != NULL ); 00194 if( fp == NULL ) 00195 return( NULL ); 00196 00197 pszLine = CPLReadLine( fp ); 00198 if( pszLine == NULL ) 00199 return( NULL ); 00200 00201 /* -------------------------------------------------------------------- */ 00202 /* If there are no quotes, then this is the simple case. */ 00203 /* Parse, and return tokens. */ 00204 /* -------------------------------------------------------------------- */ 00205 if( strchr(pszLine,'\"') == NULL ) 00206 return CSLTokenizeStringComplex( pszLine, ",", TRUE, TRUE ); 00207 00208 /* -------------------------------------------------------------------- */ 00209 /* We must now count the quotes in our working string, and as */ 00210 /* long as it is odd, keep adding new lines. */ 00211 /* -------------------------------------------------------------------- */ 00212 pszWorkLine = CPLStrdup( pszLine ); 00213 00214 while( TRUE ) 00215 { 00216 int i, nCount = 0; 00217 00218 for( i = 0; pszWorkLine[i] != '\0'; i++ ) 00219 { 00220 if( pszWorkLine[i] == '\"' 00221 && (i == 0 || pszWorkLine[i-1] != '\\') ) 00222 nCount++; 00223 } 00224 00225 if( nCount % 2 == 0 ) 00226 break; 00227 00228 pszLine = CPLReadLine( fp ); 00229 if( pszLine == NULL ) 00230 break; 00231 00232 pszWorkLine = (char *) 00233 CPLRealloc(pszWorkLine, 00234 strlen(pszWorkLine) + strlen(pszLine) + 1); 00235 strcat( pszWorkLine, pszLine ); 00236 } 00237 00238 papszReturn = CSLTokenizeStringComplex( pszWorkLine, ",", TRUE, TRUE ); 00239 00240 CPLFree( pszWorkLine ); 00241 00242 return papszReturn; 00243 } 00244 00245 /************************************************************************/ 00246 /* CSVCompare() */ 00247 /* */ 00248 /* Compare a field to a search value using a particular */ 00249 /* criteria. */ 00250 /************************************************************************/ 00251 00252 static int CSVCompare( const char * pszFieldValue, const char * pszTarget, 00253 CSVCompareCriteria eCriteria ) 00254 00255 { 00256 if( eCriteria == CC_ExactString ) 00257 { 00258 return( strcmp( pszFieldValue, pszTarget ) == 0 ); 00259 } 00260 else if( eCriteria == CC_ApproxString ) 00261 { 00262 return( EQUAL( pszFieldValue, pszTarget ) ); 00263 } 00264 else if( eCriteria == CC_Integer ) 00265 { 00266 return( atoi(pszFieldValue) == atoi(pszTarget) ); 00267 } 00268 00269 return FALSE; 00270 } 00271 00272 /************************************************************************/ 00273 /* CSVScanLines() */ 00274 /* */ 00275 /* Read the file scanline for lines where the key field equals */ 00276 /* the indicated value with the suggested comparison criteria. */ 00277 /* Return the first matching line split into fields. */ 00278 /************************************************************************/ 00279 00280 char **CSVScanLines( FILE *fp, int iKeyField, const char * pszValue, 00281 CSVCompareCriteria eCriteria ) 00282 00283 { 00284 char **papszFields = NULL; 00285 int bSelected = FALSE, nTestValue; 00286 00287 CPLAssert( pszValue != NULL ); 00288 CPLAssert( iKeyField >= 0 ); 00289 CPLAssert( fp != NULL ); 00290 00291 nTestValue = atoi(pszValue); 00292 00293 while( !bSelected ) { 00294 papszFields = CSVReadParseLine( fp ); 00295 if( papszFields == NULL ) 00296 return( NULL ); 00297 00298 if( CSLCount( papszFields ) < iKeyField+1 ) 00299 { 00300 /* not selected */ 00301 } 00302 else if( eCriteria == CC_Integer 00303 && atoi(papszFields[iKeyField]) == nTestValue ) 00304 { 00305 bSelected = TRUE; 00306 } 00307 else 00308 { 00309 bSelected = CSVCompare( papszFields[iKeyField], pszValue, 00310 eCriteria ); 00311 } 00312 00313 if( !bSelected ) 00314 { 00315 CSLDestroy( papszFields ); 00316 papszFields = NULL; 00317 } 00318 } 00319 00320 return( papszFields ); 00321 } 00322 00323 /************************************************************************/ 00324 /* CSVScanFile() */ 00325 /* */ 00326 /* Scan a whole file using criteria similar to above, but also */ 00327 /* taking care of file opening and closing. */ 00328 /************************************************************************/ 00329 00330 char **CSVScanFile( const char * pszFilename, int iKeyField, 00331 const char * pszValue, CSVCompareCriteria eCriteria ) 00332 00333 { 00334 CSVTable *psTable; 00335 00336 /* -------------------------------------------------------------------- */ 00337 /* Get access to the table. */ 00338 /* -------------------------------------------------------------------- */ 00339 CPLAssert( pszFilename != NULL ); 00340 00341 if( iKeyField < 0 ) 00342 return NULL; 00343 00344 psTable = CSVAccess( pszFilename ); 00345 if( psTable == NULL ) 00346 return NULL; 00347 00348 /* -------------------------------------------------------------------- */ 00349 /* Does the current record match the criteria? If so, just */ 00350 /* return it again. */ 00351 /* -------------------------------------------------------------------- */ 00352 if( iKeyField >= 0 00353 && iKeyField < CSLCount(psTable->papszRecFields) 00354 && CSVCompare(pszValue,psTable->papszRecFields[iKeyField],eCriteria) ) 00355 { 00356 return psTable->papszRecFields; 00357 } 00358 00359 /* -------------------------------------------------------------------- */ 00360 /* Scan the file from the beginning, replacing the ``current */ 00361 /* record'' in our structure with the one that is found. */ 00362 /* -------------------------------------------------------------------- */ 00363 VSIRewind( psTable->fp ); 00364 CPLReadLine( psTable->fp ); /* throw away the header line */ 00365 00366 CSLDestroy( psTable->papszRecFields ); 00367 psTable->papszRecFields = 00368 CSVScanLines( psTable->fp, iKeyField, pszValue, eCriteria ); 00369 00370 return( psTable->papszRecFields ); 00371 } 00372 00373 /************************************************************************/ 00374 /* CPLGetFieldId() */ 00375 /* */ 00376 /* Read the first record of a CSV file (rewinding to be sure), */ 00377 /* and find the field with the indicated name. Returns -1 if */ 00378 /* it fails to find the field name. Comparison is case */ 00379 /* insensitive, but otherwise exact. After this function has */ 00380 /* been called the file pointer will be positioned just after */ 00381 /* the first record. */ 00382 /************************************************************************/ 00383 00384 int CSVGetFieldId( FILE * fp, const char * pszFieldName ) 00385 00386 { 00387 char **papszFields; 00388 int i; 00389 00390 CPLAssert( fp != NULL && pszFieldName != NULL ); 00391 00392 VSIRewind( fp ); 00393 00394 papszFields = CSVReadParseLine( fp ); 00395 for( i = 0; papszFields != NULL && papszFields[i] != NULL; i++ ) 00396 { 00397 if( EQUAL(papszFields[i],pszFieldName) ) 00398 { 00399 CSLDestroy( papszFields ); 00400 return i; 00401 } 00402 } 00403 00404 CSLDestroy( papszFields ); 00405 00406 return -1; 00407 } 00408 00409 /************************************************************************/ 00410 /* CSVGetFileFieldId() */ 00411 /* */ 00412 /* Same as CPLGetFieldId(), except that we get the file based */ 00413 /* on filename, rather than having an existing handle. */ 00414 /************************************************************************/ 00415 00416 int CSVGetFileFieldId( const char * pszFilename, const char * pszFieldName ) 00417 00418 { 00419 CSVTable *psTable; 00420 int i; 00421 00422 /* -------------------------------------------------------------------- */ 00423 /* Get access to the table. */ 00424 /* -------------------------------------------------------------------- */ 00425 CPLAssert( pszFilename != NULL ); 00426 00427 psTable = CSVAccess( pszFilename ); 00428 if( psTable == NULL ) 00429 return -1; 00430 00431 /* -------------------------------------------------------------------- */ 00432 /* Find the requested field. */ 00433 /* -------------------------------------------------------------------- */ 00434 for( i = 0; 00435 psTable->papszFieldNames != NULL 00436 && psTable->papszFieldNames[i] != NULL; 00437 i++ ) 00438 { 00439 if( EQUAL(psTable->papszFieldNames[i],pszFieldName) ) 00440 { 00441 return i; 00442 } 00443 } 00444 00445 return -1; 00446 } 00447 00448 00449 /************************************************************************/ 00450 /* CSVScanFileByName() */ 00451 /* */ 00452 /* Same as CSVScanFile(), but using a field name instead of a */ 00453 /* field number. */ 00454 /************************************************************************/ 00455 00456 char **CSVScanFileByName( const char * pszFilename, 00457 const char * pszKeyFieldName, 00458 const char * pszValue, CSVCompareCriteria eCriteria ) 00459 00460 { 00461 int iKeyField; 00462 00463 iKeyField = CSVGetFileFieldId( pszFilename, pszKeyFieldName ); 00464 if( iKeyField == -1 ) 00465 return NULL; 00466 00467 return( CSVScanFile( pszFilename, iKeyField, pszValue, eCriteria ) ); 00468 } 00469 00470 /************************************************************************/ 00471 /* CSVGetField() */ 00472 /* */ 00473 /* The all-in-one function to fetch a particular field value */ 00474 /* from a CSV file. Note this function will return an empty */ 00475 /* string, rather than NULL if it fails to find the desired */ 00476 /* value for some reason. The caller can't establish that the */ 00477 /* fetch failed. */ 00478 /************************************************************************/ 00479 00480 const char *CSVGetField( const char * pszFilename, 00481 const char * pszKeyFieldName, 00482 const char * pszKeyFieldValue, 00483 CSVCompareCriteria eCriteria, 00484 const char * pszTargetField ) 00485 00486 { 00487 CSVTable *psTable; 00488 char **papszRecord; 00489 int iTargetField; 00490 00491 /* -------------------------------------------------------------------- */ 00492 /* Find the table. */ 00493 /* -------------------------------------------------------------------- */ 00494 psTable = CSVAccess( pszFilename ); 00495 if( psTable == NULL ) 00496 return ""; 00497 00498 /* -------------------------------------------------------------------- */ 00499 /* Find the correct record. */ 00500 /* -------------------------------------------------------------------- */ 00501 papszRecord = CSVScanFileByName( pszFilename, pszKeyFieldName, 00502 pszKeyFieldValue, eCriteria ); 00503 00504 if( papszRecord == NULL ) 00505 return ""; 00506 00507 /* -------------------------------------------------------------------- */ 00508 /* Figure out which field we want out of this. */ 00509 /* -------------------------------------------------------------------- */ 00510 iTargetField = CSVGetFileFieldId( pszFilename, pszTargetField ); 00511 if( iTargetField < 0 ) 00512 return ""; 00513 00514 if( iTargetField >= CSLCount( papszRecord ) ) 00515 return ""; 00516 00517 return( papszRecord[iTargetField] ); 00518 } 00519 00520 /************************************************************************/ 00521 /* CSVFilename() */ 00522 /* */ 00523 /* Return the full path to a particular CSV file. This will */ 00524 /* eventually be something the application can override. */ 00525 /************************************************************************/ 00526 00527 static const char *(*pfnCSVFilenameHook)(const char *) = NULL; 00528 00529 const char * CSVFilename( const char *pszBasename ) 00530 00531 { 00532 static char szPath[512]; 00533 00534 if( pfnCSVFilenameHook == NULL ) 00535 { 00536 FILE *fp = NULL; 00537 const char *pszResult = CPLFindFile( "epsg_csv", pszBasename ); 00538 00539 if( pszResult != NULL ) 00540 return pszResult; 00541 00542 if( getenv("GEOTIFF_CSV") != NULL ) 00543 { 00544 sprintf( szPath, "%s/%s", getenv("GEOTIFF_CSV"), pszBasename ); 00545 } 00546 else if( (fp = fopen( "csv/horiz_cs.csv", "rt" )) != NULL ) 00547 { 00548 sprintf( szPath, "csv/%s", pszBasename ); 00549 } 00550 else 00551 { 00552 sprintf( szPath, "/usr/local/share/epsg_csv/%s", pszBasename ); 00553 } 00554 00555 if( fp != NULL ) 00556 fclose( fp ); 00557 00558 return( szPath ); 00559 } 00560 else 00561 return( pfnCSVFilenameHook( pszBasename ) ); 00562 } 00563 00564 /************************************************************************/ 00565 /* SetCSVFilenameHook() */ 00566 /* */ 00567 /* Applications can use this to set a function that will */ 00568 /* massage CSV filenames. */ 00569 /************************************************************************/ 00570 00615 void SetCSVFilenameHook( const char *(*pfnNewHook)( const char * ) ) 00616 00617 { 00618 pfnCSVFilenameHook = pfnNewHook; 00619 }