Read and process Comma-Separated-Value (or equivalent) files a record at a time.
More...
|
| CSV_File (OS_HANDLE_TYPE descriptor, bool hasHeader=true, char sepChar=',') |
| Parse conventional CSV file. More...
|
|
| CSV_File (OS_HANDLE_TYPE descriptor, const char *separatorList, const char *quoteChars="\"", bool hasHeader=true) |
| Parse a CSV-style file that has more than one column separator character in use. More...
|
|
virtual | ~CSV_File () |
|
int | getFieldIndex (const char *fieldHeading) const |
| Return the relative subscript for a named column. More...
|
|
int | parseIntoFields (int maxFields, char *fieldStart[], unsigned char *line, size_t lineLen) |
| Parse text line into fields. More...
|
|
int | readAndProcessCSV_File () |
| Top-level routine to parse CSV file. More...
|
|
int | processPacket (unsigned char *data, size_t len) |
| Streaming interface equivalent to readAndProcessCSV_File(). API is compatible with processPacketUsingClass<> convenience interface for the IO_Processor class processing routine. More...
|
|
virtual int | processHeaderLine (unsigned char *line, size_t lineLen) VIRTUAL_OVERRIDE |
| Process initial header line in file. More...
|
|
virtual int | parsedHeadingLine () |
| User-exit invoked when initial header line is parsed. More...
|
|
virtual int | completedFile (int recordsSeen) VIRTUAL_OVERRIDE |
| User-exit to notify that file has been completely read. More...
|
|
Public Member Functions inherited from Read_And_Process_File |
| Read_And_Process_File (OS_HANDLE_TYPE srcDescriptor, ReadModes mode=READ_NORMAL, const FileTypeReaderSelector *selectorTable=nullptr) |
| Construct from an existing file descriptor. More...
|
|
| Read_And_Process_File (const char *fileName, ReadModes mode=READ_NORMAL, const FileTypeReaderSelector *selectorTable=nullptr) |
| Construct given the name of a file to be opened. More...
|
|
virtual | ~Read_And_Process_File () |
|
void | setReadRoutine (ReadDataFP altRoutine) OME_ALWAYS_INLINE |
| Set a new file read routine. More...
|
|
virtual void | noteDataRead (const unsigned char *bfr, size_t bfrLen) const |
| user-exit to see original copy of any data read More...
|
|
int | readAndProcessBlocksFromFile (size_t recordLength) |
| Process fixed length records. More...
|
|
int | findAndProcessNextLine (File_Buffer *fileBfr, bool hasHeaderLine=false) |
| Process next text line from buffer. More...
|
|
int | readAndProcessTextLines (bool hasHeaderLine=false) |
| Process text lines. More...
|
|
int | readIntoFileBuffer (File_Buffer *bfr) |
|
int | readAndProcessFile () |
| Process file contents with no imposed structure. More...
|
|
virtual int | beginFile () |
|
virtual int | processLine (const unsigned char *line, size_t lineLen) |
|
virtual int_fast32_t | processBlock (unsigned char *block, size_t blockLen) |
|
virtual int | processBuffer (File_Buffer *bfrState) |
|
|
Static Public Member Functions inherited from Read_And_Process_File |
static int | defaultReadRoutine (Read_And_Process_File *input, unsigned char *bfr, uint32_t bfrLen) |
| Default read routine. More...
|
|
static const FileTypeReaderSelector * | findTypeOfFile (const char *fileName, const FileTypeReaderSelector *selectorTable) |
|
static OS_HANDLE_TYPE | openFile (const char *fileName, ReadModes mode=READ_NORMAL) |
| Open the indicated file. More...
|
|
static int | closeFile (OS_HANDLE_TYPE fd, ReadModes mode=READ_NORMAL) |
| Close an native operating system file handle. More...
|
|
static int | findFileInPathsWithSuffixes (char *path, uint_fast32_t pathLen, const char *searchRootPaths, const char *possibleFilenames="", const char *possibleSuffixes="") |
| Search for a file using a combination of directory roots and file suffixes. More...
|
|
Public Attributes inherited from Read_And_Process_File |
ReadDataFP | readRoutine |
|
void * | auxData |
|
OS_HANDLE_TYPE | descriptor |
|
uint32_t | recordsProcessed |
|
enum ReadModes | readMode |
|
uint32_t | _explicitAlignmentPadding |
|
Protected Member Functions inherited from Read_And_Process_File |
virtual int | readIntoBuffer (unsigned char *bfr, size_t bfrLen) |
|
Static Protected Member Functions inherited from Read_And_Process_File |
static uint32_t | matchFileHeader (const FileTypeReaderSelector *criteria, const unsigned char *fileHdr, const size_t hdrLen) |
|
Read and process Comma-Separated-Value (or equivalent) files a record at a time.
CSV_File::CSV_File |
( |
OS_HANDLE_TYPE |
descriptor, |
|
|
bool |
hasHeader = true , |
|
|
char |
sepChar = ',' |
|
) |
| |
|
explicit |
Parse conventional CSV file.
- Parameters
-
descriptor | is the descriptor/handle of an already open file. |
hasHeader | is an optional argument that indicates if the file has an initial header describing the column names. |
sepChar | is an optional argument indicating the separator character used between columns; it defaults to a comma. |
By default, the double quote mark can be used to group elements as a single field.
References doHeaderLine, fieldTotal, headerLine, quoteCharList, quoteListLen, readsPerformed, separatorCharList, and separatorListLen.
CSV_File::CSV_File |
( |
OS_HANDLE_TYPE |
descriptor, |
|
|
const char * |
separatorList, |
|
|
const char * |
quoteChars = "\"" , |
|
|
bool |
hasHeader = true |
|
) |
| |
Parse a CSV-style file that has more than one column separator character in use.
- Parameters
-
descriptor | is the descriptor/handle of an already open file. |
separatorList | is a pointer to text string listing possible separator characters used between columns. |
quoteChars | is an optional pointer to a text string listing possible quote characters. |
hasHeader | is an optional argument that indicates if the file has an initial header describing the column names. |
References doHeaderLine, fieldTotal, headerLine, LOG_CERR, LOG_ENDLINE, OME_EXPECT_FALSE, quoteCharList, quoteListLen, readsPerformed, separatorCharList, and separatorListLen.
int CSV_File::parseIntoFields |
( |
int |
maxFields, |
|
|
char * |
fieldStart[], |
|
|
unsigned char * |
line, |
|
|
size_t |
lineLen |
|
) |
| |
Parse text line into fields.
Fields are separated by any character specified in the separator list, unless inside a quoted sequence. Quoted sequences are started by any character in the quoted character list.
- Parameters
-
maxFields | indicates the number of elements available in fieldStart. |
fieldStart | is an array of character pointers into which will be stored a pointer to each field's value. |
line | points to the line to be parsed. It is expected that it will end in a newline character. NOTE: the contents of line will be modified. |
lineLen | is the length of the line; the last character (which would be line[lineLen]) should be a newline. |
- Returns
- The number of fields parsed is returned. Only pointers to the first maxFields are stored into the fieldStart array, but the total number of fields seen is returned.
References AS_HEXADECIMAL_BUFFER, LOG_CERR, LOG_ENDLINE, OME_EXPECT_FALSE, OME_EXPECT_TRUE, quoteCharList, quoteListLen, separatorCharList, and separatorListLen.
Referenced by processHeaderLine().