00001 /****************************************************************************** 00002 * $Id: cpl_csv_c-source.html,v 1.2 2000/09/26 20:39:00 warmerda Exp $ 00003 * 00004 * Project: CPL - Common Portability Library 00005 * Purpose: CSV (comma separated value) file access. 00006 * Author: Frank Warmerdam, warmerda@home.com 00007 * 00008 ****************************************************************************** 00009 * Copyright (c) 1999, Frank Warmerdam 00010 * 00011 * Permission is hereby granted, free of charge, to any person obtaining a 00012 * copy of this software and associated documentation files (the "Software"), 00013 * to deal in the Software without restriction, including without limitation 00014 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 00015 * and/or sell copies of the Software, and to permit persons to whom the 00016 * Software is furnished to do so, subject to the following conditions: 00017 * 00018 * The above copyright notice and this permission notice shall be included 00019 * in all copies or substantial portions of the Software. 00020 * 00021 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 00022 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 00023 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 00024 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 00025 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 00026 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 00027 * DEALINGS IN THE SOFTWARE. 00028 ****************************************************************************** 00029 * 00030 * $Log: cpl_csv_c-source.html,v $ 00030 * Revision 1.2 2000/09/26 20:39:00 warmerda 00030 * *** empty log message *** 00030 * 00031 * Revision 1.2 2000/08/29 21:08:08 warmerda 00032 * fallback to use CPLFindFile() 00033 * 00034 * Revision 1.1 2000/04/05 21:55:59 warmerda 00035 * New 00036 * 00037 */ 00038 00039 #include "cpl_csv.h" 00040 #include "cpl_conv.h" 00041 00042 /* ==================================================================== */ 00043 /* The CSVTable is a persistant set of info about an open CSV */ 00044 /* table. While it doesn't currently maintain a record index, */ 00045 /* or in-memory copy of the table, it could be changed to do so */ 00046 /* in the future. */ 00047 /* ==================================================================== */ 00048 typedef struct ctb { 00049 FILE *fp; 00050 00051 struct ctb *psNext; 00052 00053 char *pszFilename; 00054 00055 char **papszFieldNames; 00056 00057 char **papszRecFields; 00058 } CSVTable; 00059 00060 static CSVTable *psCSVTableList = NULL; 00061 00062 /************************************************************************/ 00063 /* CSVAccess() */ 00064 /* */ 00065 /* This function will fetch a handle to the requested table. */ 00066 /* If not found in the ``open table list'' the table will be */ 00067 /* opened and added to the list. Eventually this function may */ 00068 /* become public with an abstracted return type so that */ 00069 /* applications can set options about the table. For now this */ 00070 /* isn't done. */ 00071 /************************************************************************/ 00072 00073 static CSVTable *CSVAccess( const char * pszFilename ) 00074 00075 { 00076 CSVTable *psTable; 00077 FILE *fp; 00078 00079 /* -------------------------------------------------------------------- */ 00080 /* Is the table already in the list. */ 00081 /* -------------------------------------------------------------------- */ 00082 for( psTable = psCSVTableList; psTable != NULL; psTable = psTable->psNext ) 00083 { 00084 if( EQUAL(psTable->pszFilename,pszFilename) ) 00085 { 00086 /* 00087 * Eventually we should consider promoting to the front of 00088 * the list to accelerate frequently accessed tables. 00089 */ 00090 00091 return( psTable ); 00092 } 00093 } 00094 00095 /* -------------------------------------------------------------------- */ 00096 /* If not, try to open it. */ 00097 /* -------------------------------------------------------------------- */ 00098 fp = VSIFOpen( pszFilename, "r" ); 00099 if( fp == NULL ) 00100 return NULL; 00101 00102 /* -------------------------------------------------------------------- */ 00103 /* Create an information structure about this table, and add to */ 00104 /* the front of the list. */ 00105 /* -------------------------------------------------------------------- */ 00106 psTable = (CSVTable *) CPLCalloc(sizeof(CSVTable),1); 00107 00108 psTable->fp = fp; 00109 psTable->pszFilename = CPLStrdup( pszFilename ); 00110 psTable->psNext = psCSVTableList; 00111 00112 psCSVTableList = psTable; 00113 00114 /* -------------------------------------------------------------------- */ 00115 /* Read the table header record containing the field names. */ 00116 /* -------------------------------------------------------------------- */ 00117 psTable->papszFieldNames = CSVReadParseLine( fp ); 00118 00119 return( psTable ); 00120 } 00121 00122 /************************************************************************/ 00123 /* CSVDeaccess() */ 00124 /************************************************************************/ 00125 00126 void CSVDeaccess( const char * pszFilename ) 00127 00128 { 00129 CSVTable *psLast, *psTable; 00130 00131 /* -------------------------------------------------------------------- */ 00132 /* A NULL means deaccess all tables. */ 00133 /* -------------------------------------------------------------------- */ 00134 if( pszFilename == NULL ) 00135 { 00136 while( psCSVTableList != NULL ) 00137 CSVDeaccess( psCSVTableList->pszFilename ); 00138 00139 return; 00140 } 00141 00142 /* -------------------------------------------------------------------- */ 00143 /* Find this table. */ 00144 /* -------------------------------------------------------------------- */ 00145 psLast = NULL; 00146 for( psTable = psCSVTableList; 00147 psTable != NULL && !EQUAL(psTable->pszFilename,pszFilename); 00148 psTable = psTable->psNext ) 00149 { 00150 psLast = psTable; 00151 } 00152 00153 if( psTable == NULL ) 00154 return; 00155 00156 /* -------------------------------------------------------------------- */ 00157 /* Remove the link from the list. */ 00158 /* -------------------------------------------------------------------- */ 00159 if( psLast != NULL ) 00160 psLast->psNext = psTable->psNext; 00161 else 00162 psCSVTableList = psTable->psNext; 00163 00164 /* -------------------------------------------------------------------- */ 00165 /* Free the table. */ 00166 /* -------------------------------------------------------------------- */ 00167 VSIFClose( psTable->fp ); 00168 00169 CSLDestroy( psTable->papszFieldNames ); 00170 CSLDestroy( psTable->papszRecFields ); 00171 CPLFree( psTable->pszFilename ); 00172 00173 CPLFree( psTable ); 00174 } 00175 00176 /************************************************************************/ 00177 /* CSVReadParseLine() */ 00178 /* */ 00179 /* Read one line, and return split into fields. The return */ 00180 /* result is a stringlist, in the sense of the CSL functions. */ 00181 /************************************************************************/ 00182 00183 char **CSVReadParseLine( FILE * fp ) 00184 00185 { 00186 const char *pszLine; 00187 char *pszWorkLine; 00188 char **papszReturn; 00189 00190 CPLAssert( fp != NULL ); 00191 if( fp == NULL ) 00192 return( NULL ); 00193 00194 pszLine = CPLReadLine( fp ); 00195 if( pszLine == NULL ) 00196 return( NULL ); 00197 00198 /* -------------------------------------------------------------------- */ 00199 /* If there are no quotes, then this is the simple case. */ 00200 /* Parse, and return tokens. */ 00201 /* -------------------------------------------------------------------- */ 00202 if( strchr(pszLine,'\"') == NULL ) 00203 return CSLTokenizeStringComplex( pszLine, ",", TRUE, TRUE ); 00204 00205 /* -------------------------------------------------------------------- */ 00206 /* We must now count the quotes in our working string, and as */ 00207 /* long as it is odd, keep adding new lines. */ 00208 /* -------------------------------------------------------------------- */ 00209 pszWorkLine = CPLStrdup( pszLine ); 00210 00211 while( TRUE ) 00212 { 00213 int i, nCount = 0; 00214 00215 for( i = 0; pszWorkLine[i] != '\0'; i++ ) 00216 { 00217 if( pszWorkLine[i] == '\"' 00218 && (i == 0 || pszWorkLine[i-1] != '\\') ) 00219 nCount++; 00220 } 00221 00222 if( nCount % 2 == 0 ) 00223 break; 00224 00225 pszLine = CPLReadLine( fp ); 00226 if( pszLine == NULL ) 00227 break; 00228 00229 pszWorkLine = (char *) 00230 CPLRealloc(pszWorkLine, 00231 strlen(pszWorkLine) + strlen(pszLine) + 1); 00232 strcat( pszWorkLine, pszLine ); 00233 } 00234 00235 papszReturn = CSLTokenizeStringComplex( pszWorkLine, ",", TRUE, TRUE ); 00236 00237 CPLFree( pszWorkLine ); 00238 00239 return papszReturn; 00240 } 00241 00242 /************************************************************************/ 00243 /* CSVCompare() */ 00244 /* */ 00245 /* Compare a field to a search value using a particular */ 00246 /* criteria. */ 00247 /************************************************************************/ 00248 00249 static int CSVCompare( const char * pszFieldValue, const char * pszTarget, 00250 CSVCompareCriteria eCriteria ) 00251 00252 { 00253 if( eCriteria == CC_ExactString ) 00254 { 00255 return( strcmp( pszFieldValue, pszTarget ) == 0 ); 00256 } 00257 else if( eCriteria == CC_ApproxString ) 00258 { 00259 return( EQUAL( pszFieldValue, pszTarget ) ); 00260 } 00261 else if( eCriteria == CC_Integer ) 00262 { 00263 return( atoi(pszFieldValue) == atoi(pszTarget) ); 00264 } 00265 00266 return FALSE; 00267 } 00268 00269 /************************************************************************/ 00270 /* CSVScanLines() */ 00271 /* */ 00272 /* Read the file scanline for lines where the key field equals */ 00273 /* the indicated value with the suggested comparison criteria. */ 00274 /* Return the first matching line split into fields. */ 00275 /************************************************************************/ 00276 00277 char **CSVScanLines( FILE *fp, int iKeyField, const char * pszValue, 00278 CSVCompareCriteria eCriteria ) 00279 00280 { 00281 char **papszFields = NULL; 00282 int bSelected = FALSE, nTestValue; 00283 00284 CPLAssert( pszValue != NULL ); 00285 CPLAssert( iKeyField >= 0 ); 00286 CPLAssert( fp != NULL ); 00287 00288 nTestValue = atoi(pszValue); 00289 00290 while( !bSelected ) { 00291 papszFields = CSVReadParseLine( fp ); 00292 if( papszFields == NULL ) 00293 return( NULL ); 00294 00295 if( CSLCount( papszFields ) < iKeyField+1 ) 00296 { 00297 /* not selected */ 00298 } 00299 else if( eCriteria == CC_Integer 00300 && atoi(papszFields[iKeyField]) == nTestValue ) 00301 { 00302 bSelected = TRUE; 00303 } 00304 else 00305 { 00306 bSelected = CSVCompare( papszFields[iKeyField], pszValue, 00307 eCriteria ); 00308 } 00309 00310 if( !bSelected ) 00311 { 00312 CSLDestroy( papszFields ); 00313 papszFields = NULL; 00314 } 00315 } 00316 00317 return( papszFields ); 00318 } 00319 00320 /************************************************************************/ 00321 /* CSVScanFile() */ 00322 /* */ 00323 /* Scan a whole file using criteria similar to above, but also */ 00324 /* taking care of file opening and closing. */ 00325 /************************************************************************/ 00326 00327 char **CSVScanFile( const char * pszFilename, int iKeyField, 00328 const char * pszValue, CSVCompareCriteria eCriteria ) 00329 00330 { 00331 CSVTable *psTable; 00332 00333 /* -------------------------------------------------------------------- */ 00334 /* Get access to the table. */ 00335 /* -------------------------------------------------------------------- */ 00336 CPLAssert( pszFilename != NULL ); 00337 00338 if( iKeyField < 0 ) 00339 return NULL; 00340 00341 psTable = CSVAccess( pszFilename ); 00342 if( psTable == NULL ) 00343 return NULL; 00344 00345 /* -------------------------------------------------------------------- */ 00346 /* Does the current record match the criteria? If so, just */ 00347 /* return it again. */ 00348 /* -------------------------------------------------------------------- */ 00349 if( iKeyField >= 0 00350 && iKeyField < CSLCount(psTable->papszRecFields) 00351 && CSVCompare(pszValue,psTable->papszRecFields[iKeyField],eCriteria) ) 00352 { 00353 return psTable->papszRecFields; 00354 } 00355 00356 /* -------------------------------------------------------------------- */ 00357 /* Scan the file from the beginning, replacing the ``current */ 00358 /* record'' in our structure with the one that is found. */ 00359 /* -------------------------------------------------------------------- */ 00360 VSIRewind( psTable->fp ); 00361 CPLReadLine( psTable->fp ); /* throw away the header line */ 00362 00363 CSLDestroy( psTable->papszRecFields ); 00364 psTable->papszRecFields = 00365 CSVScanLines( psTable->fp, iKeyField, pszValue, eCriteria ); 00366 00367 return( psTable->papszRecFields ); 00368 } 00369 00370 /************************************************************************/ 00371 /* CPLGetFieldId() */ 00372 /* */ 00373 /* Read the first record of a CSV file (rewinding to be sure), */ 00374 /* and find the field with the indicated name. Returns -1 if */ 00375 /* it fails to find the field name. Comparison is case */ 00376 /* insensitive, but otherwise exact. After this function has */ 00377 /* been called the file pointer will be positioned just after */ 00378 /* the first record. */ 00379 /************************************************************************/ 00380 00381 int CSVGetFieldId( FILE * fp, const char * pszFieldName ) 00382 00383 { 00384 char **papszFields; 00385 int i; 00386 00387 CPLAssert( fp != NULL && pszFieldName != NULL ); 00388 00389 VSIRewind( fp ); 00390 00391 papszFields = CSVReadParseLine( fp ); 00392 for( i = 0; papszFields != NULL && papszFields[i] != NULL; i++ ) 00393 { 00394 if( EQUAL(papszFields[i],pszFieldName) ) 00395 { 00396 CSLDestroy( papszFields ); 00397 return i; 00398 } 00399 } 00400 00401 CSLDestroy( papszFields ); 00402 00403 return -1; 00404 } 00405 00406 /************************************************************************/ 00407 /* CSVGetFileFieldId() */ 00408 /* */ 00409 /* Same as CPLGetFieldId(), except that we get the file based */ 00410 /* on filename, rather than having an existing handle. */ 00411 /************************************************************************/ 00412 00413 int CSVGetFileFieldId( const char * pszFilename, const char * pszFieldName ) 00414 00415 { 00416 CSVTable *psTable; 00417 int i; 00418 00419 /* -------------------------------------------------------------------- */ 00420 /* Get access to the table. */ 00421 /* -------------------------------------------------------------------- */ 00422 CPLAssert( pszFilename != NULL ); 00423 00424 psTable = CSVAccess( pszFilename ); 00425 if( psTable == NULL ) 00426 return -1; 00427 00428 /* -------------------------------------------------------------------- */ 00429 /* Find the requested field. */ 00430 /* -------------------------------------------------------------------- */ 00431 for( i = 0; 00432 psTable->papszFieldNames != NULL 00433 && psTable->papszFieldNames[i] != NULL; 00434 i++ ) 00435 { 00436 if( EQUAL(psTable->papszFieldNames[i],pszFieldName) ) 00437 { 00438 return i; 00439 } 00440 } 00441 00442 return -1; 00443 } 00444 00445 00446 /************************************************************************/ 00447 /* CSVScanFileByName() */ 00448 /* */ 00449 /* Same as CSVScanFile(), but using a field name instead of a */ 00450 /* field number. */ 00451 /************************************************************************/ 00452 00453 char **CSVScanFileByName( const char * pszFilename, 00454 const char * pszKeyFieldName, 00455 const char * pszValue, CSVCompareCriteria eCriteria ) 00456 00457 { 00458 int iKeyField; 00459 00460 iKeyField = CSVGetFileFieldId( pszFilename, pszKeyFieldName ); 00461 if( iKeyField == -1 ) 00462 return NULL; 00463 00464 return( CSVScanFile( pszFilename, iKeyField, pszValue, eCriteria ) ); 00465 } 00466 00467 /************************************************************************/ 00468 /* CSVGetField() */ 00469 /* */ 00470 /* The all-in-one function to fetch a particular field value */ 00471 /* from a CSV file. Note this function will return an empty */ 00472 /* string, rather than NULL if it fails to find the desired */ 00473 /* value for some reason. The caller can't establish that the */ 00474 /* fetch failed. */ 00475 /************************************************************************/ 00476 00477 const char *CSVGetField( const char * pszFilename, 00478 const char * pszKeyFieldName, 00479 const char * pszKeyFieldValue, 00480 CSVCompareCriteria eCriteria, 00481 const char * pszTargetField ) 00482 00483 { 00484 CSVTable *psTable; 00485 char **papszRecord; 00486 int iTargetField; 00487 00488 /* -------------------------------------------------------------------- */ 00489 /* Find the table. */ 00490 /* -------------------------------------------------------------------- */ 00491 psTable = CSVAccess( pszFilename ); 00492 if( psTable == NULL ) 00493 return ""; 00494 00495 /* -------------------------------------------------------------------- */ 00496 /* Find the correct record. */ 00497 /* -------------------------------------------------------------------- */ 00498 papszRecord = CSVScanFileByName( pszFilename, pszKeyFieldName, 00499 pszKeyFieldValue, eCriteria ); 00500 00501 if( papszRecord == NULL ) 00502 return ""; 00503 00504 /* -------------------------------------------------------------------- */ 00505 /* Figure out which field we want out of this. */ 00506 /* -------------------------------------------------------------------- */ 00507 iTargetField = CSVGetFileFieldId( pszFilename, pszTargetField ); 00508 if( iTargetField < 0 ) 00509 return ""; 00510 00511 if( iTargetField >= CSLCount( papszRecord ) ) 00512 return ""; 00513 00514 return( papszRecord[iTargetField] ); 00515 } 00516 00517 /************************************************************************/ 00518 /* CSVFilename() */ 00519 /* */ 00520 /* Return the full path to a particular CSV file. This will */ 00521 /* eventually be something the application can override. */ 00522 /************************************************************************/ 00523 00524 static const char *(*pfnCSVFilenameHook)(const char *) = NULL; 00525 00526 const char * CSVFilename( const char *pszBasename ) 00527 00528 { 00529 static char szPath[512]; 00530 00531 if( pfnCSVFilenameHook == NULL ) 00532 { 00533 FILE *fp = NULL; 00534 const char *pszResult = CPLFindFile( "epsg_csv", pszBasename ); 00535 00536 if( pszResult != NULL ) 00537 return pszResult; 00538 00539 if( getenv("GEOTIFF_CSV") != NULL ) 00540 { 00541 sprintf( szPath, "%s/%s", getenv("GEOTIFF_CSV"), pszBasename ); 00542 } 00543 else if( (fp = fopen( "csv/horiz_cs.csv", "rt" )) != NULL ) 00544 { 00545 sprintf( szPath, "csv/%s", pszBasename ); 00546 } 00547 else 00548 { 00549 sprintf( szPath, "/usr/local/share/epsg_csv/%s", pszBasename ); 00550 } 00551 00552 if( fp != NULL ) 00553 fclose( fp ); 00554 00555 return( szPath ); 00556 } 00557 else 00558 return( pfnCSVFilenameHook( pszBasename ) ); 00559 } 00560 00561 /************************************************************************/ 00562 /* SetCSVFilenameHook() */ 00563 /* */ 00564 /* Applications can use this to set a function that will */ 00565 /* massage CSV filenames. */ 00566 /************************************************************************/ 00567 00612 void SetCSVFilenameHook( const char *(*pfnNewHook)( const char * ) ) 00613 00614 { 00615 pfnCSVFilenameHook = pfnNewHook; 00616 }