FARGOS/VISTA Object Management Environment Core  ..
FARGOS/VISTA Object Management Environment Core Table of Contents
read_file.hpp
Go to the documentation of this file.
1 #ifndef _READ_FILE_HPP_
2 #define _READ_FILE_HPP_ "$Id: read_file.hpp 455 2020-07-23 20:23:59Z geoff $"
4 
8 
38 class File_Buffer {
39 public:
40  enum {
41  EXTERNAL_SEGMENT = (1 << 0),
42  IS_READ_ONLY = (1 << 1)
43  };
44  unsigned char *buffer;
45  size_t bufferLen;
46  size_t endBufferOffset;
49 
52  explicit File_Buffer(size_t bfrSize=65536) {
53  bufferLen = bfrSize;
54  endBufferOffset = 0;
56  buffer = new unsigned char[bufferLen];
58  }
59 
75  File_Buffer(unsigned char *existingBuffer, size_t bfrSize,
76  size_t currentLen=0, uint8_t ownershipFlags=EXTERNAL_SEGMENT) {
77  bufferLen = bfrSize;
78  endBufferOffset = currentLen;
80  buffer = existingBuffer;
81  externalBufferFlags = ownershipFlags;
82  }
83 
86  delete[] buffer;
87  }
88  }
89 
105  void useRegionAsBuffer(unsigned char *existingBuffer, size_t bfrSize,
106  size_t currentLen, uint8_t ownershipFlags) OME_ALWAYS_INLINE OME_ALWAYS_OPTIMIZE("-O3") {
108  delete[] buffer; // delete prior content
109  }
110  bufferLen = bfrSize;
111  endBufferOffset = currentLen;
112  startBufferOffset = 0;
113  buffer = existingBuffer;
114  externalBufferFlags = ownershipFlags;
115  }
116 
119  inline size_t spaceLeft() const OME_ALWAYS_INLINE {
120  return (bufferLen - endBufferOffset);
121  }
122 
125  inline size_t spaceUsed() const OME_ALWAYS_INLINE {
127  }
128 
131  inline unsigned char *startBufferLocation() const OME_ALWAYS_INLINE {
132  return (buffer + startBufferOffset);
133  }
134 
138  inline unsigned char *endBufferLocation() const OME_ALWAYS_INLINE {
139  return (buffer + endBufferOffset);
140  }
141 
147  inline void adjustBufferEnd(size_t amount) OME_ALWAYS_INLINE {
148  endBufferOffset += amount;
149  }
150 
163  inline void adjustBufferStart(size_t amount) OME_ALWAYS_INLINE {
164  startBufferOffset += amount;
165  }
166 
171  inline void discardInitialBytes(size_t bytes=0) OME_ALWAYS_INLINE OME_ALWAYS_OPTIMIZE("-O3") {
172  size_t offset = startBufferOffset + bytes;
173  // this test is done first to avoid any unneccessary writes
174  if (offset == 0) { // already at start, no bytes to discard
175  return; // nothing to do
176  }
178  if (offset == endBufferOffset) { // would be empty
179  startBufferOffset = 0; // reset
180  endBufferOffset = 0;
181  return;
182  }
183  // cannot permit buffer to be modified
184  startBufferOffset += bytes;
185  return;
186  }
187  size_t used = spaceUsed();
188  if (used <= bytes) { // empty, reset to fill from start of buffer
189  startBufferOffset = 0;
190  endBufferOffset = 0;
191  return;
192  }
193  size_t bytesLeft = used - bytes;
194  memmove(buffer, buffer + offset, bytesLeft);
195  startBufferOffset = 0;
196  endBufferOffset = bytesLeft;
197  }
198 
215  inline size_t appendDataToBuffer(const unsigned char *newData,
216  size_t bytesToAdd, size_t startFromOffset=0) OME_ALWAYS_INLINE OME_ALWAYS_OPTIMIZE("-O3")
217  {
219  if (spaceUsed() == 0) { // nothing was stored at this moment
220  // drop const, and set as new region, propagate existing flags
221  unsigned char *bfrAddr = const_cast<unsigned char *>(newData) + startFromOffset;
222  useRegionAsBuffer(bfrAddr, bytesToAdd, bytesToAdd, externalBufferFlags);
223  return (bytesToAdd);
224  }
225  // cannot permit buffer to be modified
226  return (0);
227  }
228  size_t byteCount = bytesToAdd;
229  size_t left = spaceLeft();
230  if (OME_EXPECT_FALSE(left < byteCount)) { // cannot add more than space left in the buffer
231  byteCount = left; // truncate amount to store
232  }
233  memcpy(buffer + endBufferOffset, newData + startFromOffset, byteCount);
234  endBufferOffset += byteCount;
235  return (byteCount);
236  }
237 
238 }; // end class File_Buffer
239 
245 public:
246  enum ReadModes {
250  };
251  typedef int (*ReadDataFP)(class Read_And_Process_File *input, unsigned char *bfr, uint32_t bfrLen);
252 
253  static int defaultReadRoutine(Read_And_Process_File *input, unsigned char *bfr, uint32_t bfrLen);
254 
263  unsigned char bytesToMatch[16];
264  };
265 
266  static const FileTypeReaderSelector *findTypeOfFile(const char *fileName,
267  const FileTypeReaderSelector *selectorTable);
268 protected:
269  static uint32_t matchFileHeader(const FileTypeReaderSelector *criteria,
270  const unsigned char *fileHdr, const size_t hdrLen);
271 
272 #pragma GCC diagnostic push
273 #pragma GCC diagnostic ignored "-Wsuggest-final-methods"
274  virtual int readIntoBuffer(unsigned char *bfr, size_t bfrLen);
275 #pragma GCC diagnostic pop
276 
277 protected:
279 public:
281  void *auxData;
286 
296  static OS_HANDLE_TYPE openFile(const char *fileName,
297  ReadModes mode=READ_NORMAL);
298 
304  static int closeFile(OS_HANDLE_TYPE fd, ReadModes mode=READ_NORMAL);
305 
324  static int findFileInPathsWithSuffixes(char *path, uint_fast32_t pathLen,
325  const char *searchRootPaths, const char *possibleFilenames="",
326  const char *possibleSuffixes="");
327 
330  explicit Read_And_Process_File(OS_HANDLE_TYPE srcDescriptor,
331  ReadModes mode=READ_NORMAL,
332  const FileTypeReaderSelector *selectorTable=nullptr);
333 
347  explicit Read_And_Process_File(const char *fileName,
348  ReadModes mode=READ_NORMAL,
349  const FileTypeReaderSelector *selectorTable=nullptr);
350 
351  virtual ~Read_And_Process_File();
352 
356  readRoutine = altRoutine;
357  }
358 
359 #pragma GCC diagnostic push
360 #pragma GCC diagnostic ignored "-Wsuggest-final-methods"
361  virtual void noteDataRead(const unsigned char *bfr, size_t bfrLen) const {}
363 #pragma GCC diagnostic pop
364 
366  int readAndProcessBlocksFromFile(size_t recordLength);
367 
372  int findAndProcessNextLine(File_Buffer *fileBfr, bool hasHeaderLine=false);
373 
381  int readAndProcessTextLines(bool hasHeaderLine=false); // text file
382 
384  size_t result = readIntoBuffer(bfr->endBufferLocation(),
385  bfr->spaceLeft());
386  if (result > 0) {
387  bfr->adjustBufferEnd(result);
388  }
389  return (result);
390  }
391 
398  int rc = beginFile(); // invoke user exit to do any initial work
399  if (rc != 0) {
400  return (rc);
401  }
402 
403  ssize_t len = readIntoFileBuffer(intermediateBuffer);
404  while (len > 0) {
407  }
408  rc = completedFile(recordsProcessed); // invoke user routine to finish
409  return (rc);
410  }
411 
412 
413 #pragma GCC diagnostic push
414 #pragma GCC diagnostic ignored "-Wsuggest-final-methods"
415 
416  // called after successful open, useful to output file headers
417  virtual int beginFile() { return (0); }
418 
419  // called when all records read, useful to close output file
420  virtual int completedFile(int recordsSeen) { return (recordsSeen); }
421 
422  // implement to do useful work when reading text lines...
423  virtual int processLine(const unsigned char *line, size_t lineLen) { return (0); }
424 
431  virtual int processHeaderLine(unsigned char *line, size_t lineLen) {
432  return (2);
433  }
434 
435  // implement to do useful work when reading fixed length blocks
436  virtual int_fast32_t processBlock(unsigned char *block, size_t blockLen) {
437 //LOG_COMPONENT_CERR(io,warn) << "unexpected: Default Read_And_Process_File::processBlock() len=" << blockLen << LOG_ENDLINE;
438  return (0);
439  };
440 
441  // implement to do useful work when reading arbitrary-sized blocks
442  virtual int processBuffer(File_Buffer *bfrState) {
443 //LOG_COMPONENT_CERR(io,warn) << "unexpected: Default Read_And_Process_File::processBuffer() len=" << bfrState->spaceUsed() << LOG_ENDLINE;
444  return (0);
445  }
446 
447 #pragma GCC diagnostic pop
448 
449 }; // end class Read_And_Process_File
450 
451 
452 
457 protected:
458  enum {
464  unsigned char documentStartPrefix[64];
465  unsigned char documentEndTag[64];
467  ssize_t neededBlockLen;
470 
471  int scanForDocument(); // TODO: invokes processDocument(doc,len) when found
472 
473 public:
475  const char *endTag, size_t maxDocSize=1024*128)
476  {
477  pendingContent = new File_Buffer(maxDocSize);
479  safe_strcpy(documentEndTag, endTag, sizeof(documentEndTag));
480 
481  documentStartLen = strlen((const char *) documentStartPrefix);
482  documentEndLen = strlen((const char *) documentEndTag);
483  neededBlockLen = -1;
484 
485  if (documentStartLen > 0) {
487  } else {
489  }
491  }
492 
494  {
495  delete pendingContent;
496  }
497 
498  int scanAndProcessBuffer(const unsigned char *block, size_t blockLen) {
499  size_t left = pendingContent->spaceLeft();
500 //LOG_COMPONENT_CERR(io,trace) << "scanAndProcessBuffer add=" << blockLen << " left=" << left << " rtained=" << pendingContent->spaceUsed() << LOG_ENDLINE;
501  while (OME_EXPECT_FALSE(left < blockLen)) { // not enough space for all of it
502  // force relocation of any remaining data to front of buffer
504  // then add what we can
505  size_t bytesAdded = pendingContent->appendDataToBuffer(block, blockLen);
506  block += bytesAdded;
507  blockLen -= bytesAdded;
508  // try scanning for one or more complete documents
509  scanForDocument();
510  if (blockLen == 0) { // added everything
511  return (0);
512  }
513  left = pendingContent->spaceLeft();
514  if (left == 0) {
515  return (-1);
516  }
517  }
518  pendingContent->appendDataToBuffer(block, blockLen);
519  scanForDocument();
520  return (0);
521  }
522 
524  unsigned char *block = bfrState->startBufferLocation();
525  size_t blockLen = bfrState->spaceUsed();
526  int result = scanAndProcessBuffer(block, blockLen);
527  bfrState->discardInitialBytes(blockLen); // drop content passed in
528  return (result);
529  }
530 
542  size_t newLen;
543  unsigned char *newData = controller->bufferAddress(rec, &newLen);
544  size_t left = pendingContent->spaceLeft();
545  if (OME_EXPECT_FALSE(left < newLen)) { // not enough space for all of it
546  // force relocation of any remaining data to front of buffer
548  left = pendingContent->spaceLeft();
549  if (left < newLen) { // still not enough space
550  LOG_COMPONENT_CERR(app,error) << "Could not add " << newLen <<
551  " bytes as only space for " << left << " remains" << LOG_ENDLINE;
552  return (-1);
553  }
554  }
555  // then add what we can
556  size_t bytesAdded = pendingContent->appendDataToBuffer(newData, newLen);
557  controller->bfrManager->returnBlock(rec); // force return of block
558  scanForDocument();
559  return (0);
560  }
561 
568  virtual int processDocument(unsigned char *docStart, size_t docLen) = 0;
569 
574  virtual int_fast32_t processBlock(unsigned char *blockStart, size_t blockLen) {
575  return (0);
576  }
577 
578 }; // end class Extract_And_Process_Document_Stream
579 
580 #pragma GCC diagnostic push
581 #pragma GCC diagnostic ignored "-Wsuggest-final-types"
582 
585 public:
586  enum ParseState {
591  };
592 protected:
595 
596 public:
597  char httpVersion[16]; // typically: HTTP/1.1
598  char statusCode[8]; // typically: 200
599  char responseReason[48]; // typically: OK
600  char httpRequest[16];
601  char requestURL[1024];
603  std::map<std::string,std::string> headerAttributes;
604 
605  explicit Parse_And_Process_HTTP_Stream(size_t maxDocSize=1024*128,
606  ParseState initialState=READ_RESPONSE) :
607  Extract_And_Process_Document_Stream("", "\r\n", maxDocSize)
608  {
609  restartParseState = initialState;
610  parseState = initialState;
611  contentLengthWanted = -1;
612 
613  httpVersion[0] = '\0';
614  statusCode[0] = '\0';
615  responseReason[0] = '\0';
616  }
617 
619 
620  const char *findHeaderAttribute(const char *headerElement) const {
621  std::map<std::string,std::string>::const_iterator i = headerAttributes.find(headerElement);
622  if (i != headerAttributes.end()) {
623  return (i->second.c_str());
624  }
625  return (nullptr);
626  }
627 
633  virtual int processHTTPresponse(unsigned char *line, size_t lineLen) {
634  if (line[lineLen - 1] == '\n') {
635  lineLen -= 1;
636  if (line[lineLen - 1] == '\r') {
637  lineLen -= 1;
638  }
639  }
640  if (lineLen == 0) {
641  LOG_COMPONENT_CERR(io,trace) << "Got null HTTP request" << LOG_ENDLINE;
642  return (0);
643  }
644  LOG_COMPONENT_CERR(io,trace) << "Got HTTP response: len=" << lineLen <<
645  " line=" << AS_TEXT_BUFFER((char *) line, lineLen) << LOG_ENDLINE;
646  size_t offset = 0;
647  size_t i = 0;
648  while ((i < (sizeof(httpVersion) - 1)) && (offset < lineLen)) {
649  if (isspace(line[offset])) break;
650  httpVersion[i++] = (char) line[offset++];
651  }
652  httpVersion[i] = '\0';
653  while (isspace(line[offset]) && (offset < lineLen)) {
654  offset += 1;
655  }
656  i = 0;
657  while ((i < (sizeof(statusCode) - 1)) && (offset < lineLen)) {
658  if (isspace(line[offset])) break;
659  statusCode[i++] = (char) line[offset++];
660  }
661  statusCode[i] = '\0';
662  while (isspace(line[offset]) && (offset < lineLen)) {
663  offset += 1;
664  }
665  i = 0;
666  while ((i < (sizeof(responseReason) - 1)) && (offset < lineLen)) {
667  if (isspace(line[offset])) break;
668  responseReason[i++] = (char) line[offset++];
669  }
670  responseReason[i] = '\0';
671  headerAttributes.clear();
672 
673  return (0);
674  }
675 
676 #pragma GCC diagnostic push
677 #pragma GCC diagnostic ignored "-Wsuggest-final-methods"
678 
683  virtual int processHTTPrequest(unsigned char *line, size_t lineLen) {
684  if (line[lineLen - 1] == '\n') {
685  lineLen -= 1;
686  if (line[lineLen - 1] == '\r') {
687  lineLen -= 1;
688  }
689  }
690  if (lineLen == 0) {
691  LOG_COMPONENT_CERR(io,trace) << "Got null HTTP request" << LOG_ENDLINE;
692  return (0);
693  }
694  LOG_COMPONENT_CERR(io,trace) << "Got HTTP request: len=" << lineLen <<
695  " line=" << AS_TEXT_BUFFER((char *) line, lineLen) << LOG_ENDLINE;
696  size_t offset = 0;
697  size_t i = 0;
698  while ((i < (sizeof(httpRequest) - 1)) && (offset < lineLen)) {
699  if (isspace(line[offset])) break;
700  httpRequest[i++] = (char) line[offset++];
701  }
702  httpRequest[i] = '\0';
703  while (isspace(line[offset]) && (offset < lineLen)) {
704  offset += 1;
705  }
706  i = 0;
707  while ((i < (sizeof(requestURL) - 1)) && (offset < lineLen)) {
708  if (isspace(line[offset])) break;
709  requestURL[i++] = (char) line[offset++];
710  }
711  requestURL[i] = '\0';
712  while (isspace(line[offset]) && (offset < lineLen)) {
713  offset += 1;
714  }
715  i = 0;
716  while ((i < (sizeof(httpVersion) - 1)) && (offset < lineLen)) {
717  if (isspace(line[offset])) break;
718  httpVersion[i++] = (char) line[offset++];
719  }
720  httpVersion[i] = '\0';
721  headerAttributes.clear();
722 
723  return (0);
724  }
725 
732  virtual int processHTTPheader(unsigned char *line, size_t lineLen) {
733  if (line[lineLen - 1] == '\n') {
734  lineLen -= 1;
735  if (line[lineLen - 1] == '\r') {
736  lineLen -= 1;
737  }
738  }
739  if (lineLen == 0) { // end of HTTP headers
740  return (0);
741  }
742  LOG_COMPONENT_CERR(io,trace) << "Got HTTP header len=" << lineLen <<
743  " line=" << AS_TEXT_BUFFER((char *) line, lineLen) << LOG_ENDLINE;
744  unsigned char *colon = (unsigned char *) memchr(line, ':', lineLen);
745  if (colon != nullptr) {
746  size_t keyLen = (colon - line);
747  size_t valLen = lineLen - (keyLen + 1);
748  colon += 1; // past colon
749  while (valLen > 0) { // skip initial white space
750  if (isspace(*colon) == false) break;
751  valLen -= 1;
752  colon += 1;
753  }
754 // LOG_COMPONENT_CERR(io,trace) << "key=" << AS_TEXT_BUFFER((char *) line, keyLen) << " val=" << AS_TEXT_BUFFER((char *) colon, valLen) << LOG_ENDLINE;
755  if (memcmp(line, "Content-Length:", 15) == 0) {
756  contentLengthWanted = text2uint32((char *) colon, valLen);
757  }
758  headerAttributes[std::string((const char *) line, keyLen)] = std::string((const char *) colon, valLen);
759  }
760  return (0);
761  }
762 #pragma GCC diagnostic pop
763 
764  // callback from Extract_And_Process_Document_Stream
765  virtual int_fast32_t processBlock(unsigned char *blockStart, size_t blockLen) VIRTUAL_OVERRIDE {
766 // LOG_COMPONENT_CERR(io,warn) << "invoked Parse_And_Process_HTTP_Stream::processBlock() with no implementation, blockLen=" << blockLen << LOG_ENDLINE;
767  return (0);
768  }
769 
770  /* callback from Extract_And_Process_Document_Stream (abstract function)
771  *
772  * In the context of the HTTP stream, this will be invoked for each
773  * HTTP request/response/header line.
774  */
775  int processDocument(unsigned char *docStart, size_t docLen) VIRTUAL_OVERRIDE {
776 //LOG_COMPONENT_CERR(io,trace) << "processDocument curState=" << parseState << " len=" << docLen << " doc=" << AS_TEXT_BUFFER((char *) docStart, docLen) << std::endl;
777  int result = 0;
778  switch (parseState) {
779  case READ_REQUEST:
780  if (docLen <= 2) { // assume just CR/LF, consume silently
781  LOG_COMPONENT_CERR(io,info) << "Expected HTTP request, treating as null because length=" << docLen << LOG_ENDLINE;
782  return (0);
783  }
784  processHTTPrequest(docStart, docLen);
786  contentLengthWanted = -1;
787  break;
788  case READ_RESPONSE:
789  if (docLen <= 2) { // assume just CR/LF, consume silently
790  LOG_COMPONENT_CERR(io,info) << "Expected HTTP response, treating as null because length=" << docLen << LOG_ENDLINE;
791  return (0);
792  }
793  processHTTPresponse(docStart, docLen);
795  contentLengthWanted = -1;
796  break;
797  case READ_HEADER_LINE:
798  processHTTPheader(docStart, docLen);
799  if (docLen == 2) { // just the CR/LF
800  if (contentLengthWanted <= 0) {
801  parseState = restartParseState; // do next response
802  // if Transfer-Encoding: chunked, handle change to mode here
803  } else {
805  result = contentLengthWanted;
806  }
807  }
808  break;
809  case READ_CONTENT_BODY:
810  LOG_COMPONENT_CERR(io,error) << "unexpected: state=READ_CONTENT_BODY" << LOG_ENDLINE;
812  break;
813  }
814  return (result);
815  }
816 
817 }; // end Parse_And_Process_HTTP_Stream
818 
819 #pragma GCC diagnostic pop
820 
821 
822 #pragma GCC diagnostic push
823 #pragma GCC diagnostic ignored "-Wsuggest-final-types"
824 
828 public:
829  enum { MAX_FIELDS = 128};
830 protected:
831  unsigned char *headerLine;
832  unsigned char separatorCharList[8];
833  unsigned char quoteCharList[8];
836  uint32_t readsPerformed;
837  unsigned char separatorListLen;
838  unsigned char quoteListLen;
840 public:
841 
851  explicit CSV_File(OS_HANDLE_TYPE descriptor, bool hasHeader=true, char sepChar=',');
852 
863  CSV_File(OS_HANDLE_TYPE descriptor, const char *separatorList,
864  const char *quoteChars = "\"", bool hasHeader=true);
865 
866 
867  virtual ~CSV_File() {
868  delete[] headerLine;
869  }
870 
879  int getFieldIndex(const char *fieldHeading) const;
880 
902  int parseIntoFields(int maxFields, char *fieldStart[],
903  unsigned char *line, size_t lineLen);
904 
912  int readAndProcessCSV_File() { // CSV text file with headings
914  return (result);
915  };
916 
924  int processPacket(unsigned char *data, size_t len) {
925  if (readsPerformed == 0) {
926  beginFile();
927  }
928  readsPerformed += 1;
929  if (len == 0) {
931  return (0);
932  }
933  noteDataRead(data, len);
935  int bytesProcessed;
936  do {
938  if ((doHeaderLine != false) && (bytesProcessed > 0)) {
939  doHeaderLine = false;
940  }
941  } while (bytesProcessed != 0);
943  return (0);
944  }
945 
950  virtual int processHeaderLine(unsigned char *line, size_t lineLen) VIRTUAL_OVERRIDE;
951 
960  virtual int parsedHeadingLine() { return (0); }
961 
966  virtual int completedFile(int recordsSeen) VIRTUAL_OVERRIDE;
967 }; // end class CSV_File
968 
969 #pragma GCC diagnostic pop
970 
972 #endif
973 /* vim: set expandtab shiftwidth=4 tabstop=4: */
Parse_And_Process_HTTP_Stream::contentLengthWanted
int32_t contentLengthWanted
Definition: read_file.hpp:602
File_Buffer::startBufferOffset
size_t startBufferOffset
offset of first byte used in buffer
Definition: read_file.hpp:47
_STDERR_FD
#define _STDERR_FD
Platform-independent reference to standard error.
Definition: logging_api.hpp:2617
safe_strcpy
#define safe_strcpy(d, s, l)
Safe strcpy() routine that will not copy more than l bytes and always ensures that a null is present ...
Definition: compiler_hints.h:696
Read_And_Process_File::FileTypeReaderSelector::bytesToMatch
unsigned char bytesToMatch[16]
Definition: read_file.hpp:263
Extract_And_Process_Document_Stream::neededBlockLen
ssize_t neededBlockLen
Definition: read_file.hpp:467
Extract_And_Process_Document_Stream::documentStartPrefix
unsigned char documentStartPrefix[64]
Definition: read_file.hpp:464
File_Buffer::endBufferLocation
unsigned char * endBufferLocation() const OME_ALWAYS_INLINE
Returns just past the end of the active content in the buffer. Normally, this would be where new cont...
Definition: read_file.hpp:138
CSV_File::separatorCharList
unsigned char separatorCharList[8]
Definition: read_file.hpp:832
IO_Processor
Intermediary I/O processing object for performing multi-threaded receive-and-process operations on a ...
Definition: io_processor.hpp:154
Read_And_Process_File::defaultReadRoutine
static int defaultReadRoutine(Read_And_Process_File *input, unsigned char *bfr, uint32_t bfrLen)
Default read routine.
Definition: read_file.cpp:462
CSV_File::readAndProcessCSV_File
int readAndProcessCSV_File()
Top-level routine to parse CSV file.
Definition: read_file.hpp:912
Extract_And_Process_Document_Stream::restartScanState
enum Extract_And_Process_Document_Stream::@13 restartScanState
read_file.hpp
FARGOS file processing infrastructure.
memchr_in_long_block
#define memchr_in_long_block(s, c, l)
Definition: read_file.cpp:15
CSV_File::headerLine
unsigned char * headerLine
Definition: read_file.hpp:831
CSV_File::separatorListLen
unsigned char separatorListLen
Definition: read_file.hpp:837
Extract_And_Process_Document_Stream::FIND_END_TAG
@ FIND_END_TAG
Definition: read_file.hpp:461
Parse_And_Process_HTTP_Stream::responseReason
char responseReason[48]
Definition: read_file.hpp:599
OS_SOCKET_TYPE
#define OS_SOCKET_TYPE
Definition: io_processor.hpp:41
io_processor.hpp
FARGOS I/O Processing classes.
Read_And_Process_File::auxData
void * auxData
Definition: read_file.hpp:281
Parse_And_Process_HTTP_Stream::READ_REQUEST
@ READ_REQUEST
Definition: read_file.hpp:588
File_Buffer::File_Buffer
File_Buffer(size_t bfrSize=65536)
Creates an empty buffer of the indicated size in the heap.
Definition: read_file.hpp:52
Read_And_Process_File::READ_FROM_SOCKET
@ READ_FROM_SOCKET
Definition: read_file.hpp:249
Read_And_Process_File::readIntoFileBuffer
int readIntoFileBuffer(File_Buffer *bfr)
Definition: read_file.hpp:383
Extract_And_Process_Document_Stream::processDocument
virtual int processDocument(unsigned char *docStart, size_t docLen)=0
User-exit to process extracted document.
SharedBufferAllocRecord_32
Allocation record for chains in a 32-bit shared memory buffer.
Definition: circular_bfr.hpp:103
File_Buffer::~File_Buffer
~File_Buffer()
Definition: read_file.hpp:84
Parse_And_Process_HTTP_Stream::headerAttributes
std::map< std::string, std::string > headerAttributes
Definition: read_file.hpp:603
Extract_And_Process_Document_Stream::Extract_And_Process_Document_Stream
Extract_And_Process_Document_Stream(const char *docStart, const char *endTag, size_t maxDocSize=1024 *128)
Definition: read_file.hpp:474
Extract_And_Process_Document_Stream::pendingContent
File_Buffer * pendingContent
Definition: read_file.hpp:466
Read_And_Process_File::openFile
static OS_HANDLE_TYPE openFile(const char *fileName, ReadModes mode=READ_NORMAL)
Open the indicated file.
Definition: read_file.cpp:192
Read_And_Process_File::closeFile
static int closeFile(OS_HANDLE_TYPE fd, ReadModes mode=READ_NORMAL)
Close an native operating system file handle.
Definition: read_file.cpp:268
Extract_And_Process_Document_Stream::scanAndProcessBuffer
int scanAndProcessBuffer(const unsigned char *block, size_t blockLen)
Definition: read_file.hpp:498
Read_And_Process_File::processHeaderLine
virtual int processHeaderLine(unsigned char *line, size_t lineLen)
Interface to handle special case processing of header lines.
Definition: read_file.hpp:431
Parse_And_Process_HTTP_Stream::Parse_And_Process_HTTP_Stream
Parse_And_Process_HTTP_Stream(size_t maxDocSize=1024 *128, ParseState initialState=READ_RESPONSE)
Definition: read_file.hpp:605
IO_Processor::bufferAddress
unsigned char * bufferAddress(SharedBufferAllocRecord *rec, size_t *bufferLen=nullptr) const OME_ALWAYS_INLINE
Return physical address of a buffer within the context of the local process' address space.
Definition: io_processor.hpp:275
Read_And_Process_File::readIntoBuffer
virtual int readIntoBuffer(unsigned char *bfr, size_t bfrLen)
Definition: read_file.cpp:487
Extract_And_Process_Document_Stream::~Extract_And_Process_Document_Stream
virtual ~Extract_And_Process_Document_Stream()
Definition: read_file.hpp:493
File_Buffer
Implements an adjustable sliding buffer that minimizes data movement while enabling streams to proces...
Definition: read_file.hpp:38
CSV_File::MAX_FIELDS
@ MAX_FIELDS
Definition: read_file.hpp:829
R_OK
#define R_OK
Definition: tmp.o.cpp:486
Read_And_Process_File::processBlock
virtual int_fast32_t processBlock(unsigned char *block, size_t blockLen)
Definition: read_file.hpp:436
Read_And_Process_File::processLine
virtual int processLine(const unsigned char *line, size_t lineLen)
Definition: read_file.hpp:423
VIRTUAL_OVERRIDE
#define VIRTUAL_OVERRIDE
Generates override if the compiler supports it.
Definition: compiler_hints.h:435
OME_ALWAYS_OPTIMIZE
#define OME_ALWAYS_OPTIMIZE(level)
Mark a function to be compiled with a specific level of optimization.
Definition: compiler_hints.h:406
Parse_And_Process_HTTP_Stream::READ_HEADER_LINE
@ READ_HEADER_LINE
Definition: read_file.hpp:589
Read_And_Process_File::readAndProcessFile
int readAndProcessFile()
Process file contents with no imposed structure.
Definition: read_file.hpp:397
File_Buffer::appendDataToBuffer
size_t appendDataToBuffer(const unsigned char *newData, size_t bytesToAdd, size_t startFromOffset=0) OME_ALWAYS_INLINE OME_ALWAYS_OPTIMIZE("-O3")
Convenience routine to append data to the buffer.
Definition: read_file.hpp:215
io
LogMaskType_t COMPONENT_LOG_MASK() io("io_logMask", &DEFAULT_sharedMemoryVariableManager, COMPONENT_LEVEL(io, warn)|COMPONENT_LEVEL(io, error)|COMPONENT_LEVEL(io, fatal))
_INVALID_DESCRIPTOR
#define _INVALID_DESCRIPTOR
Platform-independent reference to invalid descriptor.
Definition: logging_api.hpp:2618
INVALID_HANDLE_VALUE
#define INVALID_HANDLE_VALUE
Definition: poll_monitor.hpp:19
Extract_And_Process_Document_Stream::FIND_NOTHING
@ FIND_NOTHING
Definition: read_file.hpp:459
Read_And_Process_File::noteDataRead
virtual void noteDataRead(const unsigned char *bfr, size_t bfrLen) const
user-exit to see original copy of any data read
Definition: read_file.hpp:362
File_Buffer::EXTERNAL_SEGMENT
@ EXTERNAL_SEGMENT
flag indicates buffer is in external segment
Definition: read_file.hpp:41
Read_And_Process_File::findAndProcessNextLine
int findAndProcessNextLine(File_Buffer *fileBfr, bool hasHeaderLine=false)
Process next text line from buffer.
Definition: read_file.cpp:525
Read_And_Process_File::setReadRoutine
void setReadRoutine(ReadDataFP altRoutine) OME_ALWAYS_INLINE
Set a new file read routine.
Definition: read_file.hpp:355
CSV_File::completedFile
virtual int completedFile(int recordsSeen) VIRTUAL_OVERRIDE
User-exit to notify that file has been completely read.
Definition: read_file.cpp:649
AS_HEXADECIMAL_BUFFER
#define AS_HEXADECIMAL_BUFFER(d,...)
Convenience label to enable passing buffer with known length to output operator<<() but output the by...
Definition: logging_api.hpp:2135
Read_And_Process_File::FileTypeReaderSelector::readInterfaceRoutine
ReadDataFP readInterfaceRoutine
Definition: read_file.hpp:259
Parse_And_Process_HTTP_Stream
Scan HTTP and parse stream for HTTP requests/responses.
Definition: read_file.hpp:584
File_Buffer::spaceLeft
size_t spaceLeft() const OME_ALWAYS_INLINE
Returns the amount of unused space in the buffer.
Definition: read_file.hpp:119
Read_And_Process_File::FileTypeReaderSelector
Describes magic numbers needed to identify a file's type and the routine capable of decoding a file's...
Definition: read_file.hpp:258
File_Buffer::adjustBufferStart
void adjustBufferStart(size_t amount) OME_ALWAYS_INLINE
Adjusts the start of the active content in the buffer.
Definition: read_file.hpp:163
Extract_And_Process_Document_Stream::documentStartLen
size_t documentStartLen
Definition: read_file.hpp:468
CSV_File::parseIntoFields
int parseIntoFields(int maxFields, char *fieldStart[], unsigned char *line, size_t lineLen)
Parse text line into fields.
Definition: read_file.cpp:671
Read_And_Process_File::FileTypeReaderSelector::initialBytesToMatchMask
uint16_t initialBytesToMatchMask
Definition: read_file.hpp:262
Extract_And_Process_Document_Stream::processBlock
virtual int_fast32_t processBlock(unsigned char *blockStart, size_t blockLen)
User-exit to process block of raw data.
Definition: read_file.hpp:574
Parse_And_Process_HTTP_Stream::processHTTPresponse
virtual int processHTTPresponse(unsigned char *line, size_t lineLen)
User-exit for HTTP response line.
Definition: read_file.hpp:633
Extract_And_Process_Document_Stream::FIND_DOC_START
@ FIND_DOC_START
Definition: read_file.hpp:460
Read_And_Process_File::Read_And_Process_File
Read_And_Process_File(OS_HANDLE_TYPE srcDescriptor, ReadModes mode=READ_NORMAL, const FileTypeReaderSelector *selectorTable=nullptr)
Construct from an existing file descriptor.
Definition: read_file.cpp:408
Parse_And_Process_HTTP_Stream::httpRequest
char httpRequest[16]
Definition: read_file.hpp:600
srcID
const char srcID[]
Definition: catSym.c:17
Parse_And_Process_HTTP_Stream::~Parse_And_Process_HTTP_Stream
virtual ~Parse_And_Process_HTTP_Stream()
Definition: read_file.hpp:618
Read_And_Process_File::readAndProcessBlocksFromFile
int readAndProcessBlocksFromFile(size_t recordLength)
Process fixed length records.
Definition: read_file.cpp:502
Read_And_Process_File::WRITE_APPEND
@ WRITE_APPEND
Definition: read_file.hpp:248
Read_And_Process_File::_explicitAlignmentPadding
uint32_t _explicitAlignmentPadding
Definition: read_file.hpp:285
File_Buffer::buffer
unsigned char * buffer
base of the buffer segment
Definition: read_file.hpp:44
Parse_And_Process_HTTP_Stream::requestURL
char requestURL[1024]
Definition: read_file.hpp:601
Parse_And_Process_HTTP_Stream::READ_RESPONSE
@ READ_RESPONSE
Definition: read_file.hpp:587
Parse_And_Process_HTTP_Stream::processDocument
int processDocument(unsigned char *docStart, size_t docLen) VIRTUAL_OVERRIDE
User-exit to process extracted document.
Definition: read_file.hpp:775
OME_EXPECT_TRUE
#define OME_EXPECT_TRUE(expr)
Annotation macro for conditional expression expected to be true.
Definition: compiler_hints.h:541
Read_And_Process_File::findFileInPathsWithSuffixes
static int findFileInPathsWithSuffixes(char *path, uint_fast32_t pathLen, const char *searchRootPaths, const char *possibleFilenames="", const char *possibleSuffixes="")
Search for a file using a combination of directory roots and file suffixes.
Definition: read_file.cpp:289
memchrSSE2.h
Fast memchr() and strlen() using SSE2 instructions.
MAX_CHARS_IN_A_BYTE
#define MAX_CHARS_IN_A_BYTE
Definition: OMEstring.cpp:14
BufferRegion::returnBlock
virtual void returnBlock(SharedBufferAllocRecord *record)=0
Read_And_Process_File::completedFile
virtual int completedFile(int recordsSeen)
Definition: read_file.hpp:420
Read_And_Process_File::READ_LZ4
@ READ_LZ4
Definition: read_file.hpp:247
text2uint32
uint_fast32_t text2uint32(const char *textString, uint_fast8_t text_len) NONNULL_PARAMETERS(1) OME_ALWAYS_INLINE OME_ALWAYS_OPTIMIZE("-O3")
Convert a sequence of text characters into an unsigned integer as quickly as possible....
Definition: text2int.h:79
CSV_File::doHeaderLine
bool doHeaderLine
Definition: read_file.hpp:839
Extract_And_Process_Document_Stream::documentEndLen
size_t documentEndLen
Definition: read_file.hpp:469
CSV_File::CSV_File
CSV_File(OS_HANDLE_TYPE descriptor, bool hasHeader=true, char sepChar=',')
Parse conventional CSV file.
Definition: read_file.cpp:591
Read_And_Process_File::readMode
enum ReadModes readMode
Definition: read_file.hpp:284
Read_And_Process_File::readRoutine
ReadDataFP readRoutine
Definition: read_file.hpp:280
NULL
#define NULL
Definition: tmp.o.cpp:327
Parse_And_Process_HTTP_Stream::READ_CONTENT_BODY
@ READ_CONTENT_BODY
Definition: read_file.hpp:590
Read_And_Process_File
Fundamental class that can read data from a memory-mapped region, file or socket and process data in ...
Definition: read_file.hpp:244
app
LogMaskType_t COMPONENT_LOG_MASK() app("app_logMask", &DEFAULT_sharedMemoryVariableManager, COMPONENT_LEVEL(app, defaultMask))
Read_And_Process_File::readAndProcessTextLines
int readAndProcessTextLines(bool hasHeaderLine=false)
Process text lines.
Definition: read_file.cpp:559
CSV_File::getFieldIndex
int getFieldIndex(const char *fieldHeading) const
Return the relative subscript for a named column.
Definition: read_file.cpp:655
CSV_File::readsPerformed
uint32_t readsPerformed
Definition: read_file.hpp:836
File_Buffer::discardInitialBytes
void discardInitialBytes(size_t bytes=0) OME_ALWAYS_INLINE OME_ALWAYS_OPTIMIZE("-O3")
Discards bytes from the beginning of the active portion of the buffer; typically as a result of havin...
Definition: read_file.hpp:171
Parse_And_Process_HTTP_Stream::processBlock
virtual int_fast32_t processBlock(unsigned char *blockStart, size_t blockLen) VIRTUAL_OVERRIDE
User-exit to process block of raw data.
Definition: read_file.hpp:765
OME_USED
const char srcID[] OME_USED
Definition: tick_time.cpp:24
Read_And_Process_File::FileTypeReaderSelector::fileTypeDescription
char fileTypeDescription[24]
Definition: read_file.hpp:260
File_Buffer::File_Buffer
File_Buffer(unsigned char *existingBuffer, size_t bfrSize, size_t currentLen=0, uint8_t ownershipFlags=EXTERNAL_SEGMENT)
Accepts the use (and potentially transfers ownership) of an existing buffer that may already have som...
Definition: read_file.hpp:75
CSV_File::quoteListLen
unsigned char quoteListLen
Definition: read_file.hpp:838
Parse_And_Process_HTTP_Stream::processHTTPheader
virtual int processHTTPheader(unsigned char *line, size_t lineLen)
User exit for HTTP header line.
Definition: read_file.hpp:732
File_Buffer::IS_READ_ONLY
@ IS_READ_ONLY
flag indicates buffer is read-only
Definition: read_file.hpp:42
Read_And_Process_File::processBuffer
virtual int processBuffer(File_Buffer *bfrState)
Definition: read_file.hpp:442
AS_TEXT_BUFFER
#define AS_TEXT_BUFFER(s,...)
Convenience label to enable passing text with known length to output operator<<().
Definition: logging_api.hpp:2087
Extract_And_Process_Document_Stream::documentEndTag
unsigned char documentEndTag[64]
Definition: read_file.hpp:465
File_Buffer::spaceUsed
size_t spaceUsed() const OME_ALWAYS_INLINE
Returns the amount of space current used in the buffer.
Definition: read_file.hpp:125
Read_And_Process_File::FileTypeReaderSelector::modeToSelect
enum ReadModes modeToSelect
Definition: read_file.hpp:261
Parse_And_Process_HTTP_Stream::parseState
ParseState parseState
Definition: read_file.hpp:593
Read_And_Process_File::descriptor
OS_HANDLE_TYPE descriptor
Definition: read_file.hpp:282
Extract_And_Process_Document_Stream::FIND_BLOCK_LENGTH
@ FIND_BLOCK_LENGTH
Definition: read_file.hpp:462
Read_And_Process_File::findTypeOfFile
static const FileTypeReaderSelector * findTypeOfFile(const char *fileName, const FileTypeReaderSelector *selectorTable)
Definition: read_file.cpp:375
Parse_And_Process_HTTP_Stream::restartParseState
ParseState restartParseState
Definition: read_file.hpp:594
_STDIN_FD
#define _STDIN_FD
Platform-independent reference to standard in.
Definition: logging_api.hpp:2615
Read_And_Process_File::ReadDataFP
int(* ReadDataFP)(class Read_And_Process_File *input, unsigned char *bfr, uint32_t bfrLen)
Definition: read_file.hpp:251
File_Buffer::startBufferLocation
unsigned char * startBufferLocation() const OME_ALWAYS_INLINE
Returns the start of the active content in the buffer.
Definition: read_file.hpp:131
OME_EXPECT_FALSE
#define OME_EXPECT_FALSE(expr)
Annotation macro for conditional expression expected to be false.
Definition: compiler_hints.h:540
LOG_COMPONENT_CERR
#define LOG_COMPONENT_CERR(component, lvl)
Convenience macro that uses LOG_COMPONENT_INTO to conditionally log a message to standard error.
Definition: logging_api.hpp:3030
Read_And_Process_File::READ_ZLIB
@ READ_ZLIB
Definition: read_file.hpp:247
OME_ALWAYS_INLINE
#define OME_ALWAYS_INLINE
Tell the compiler to alway inline a function, regardless of optimization level.
Definition: compiler_hints.h:364
_STDOUT_FD
#define _STDOUT_FD
Platform-independent reference to standard out.
Definition: logging_api.hpp:2616
File_Buffer::endBufferOffset
size_t endBufferOffset
offset of last byte used in buffer
Definition: read_file.hpp:46
Read_And_Process_File::recordsProcessed
uint32_t recordsProcessed
Definition: read_file.hpp:283
File_Buffer::adjustBufferEnd
void adjustBufferEnd(size_t amount) OME_ALWAYS_INLINE
Adjusts the end of the buffer. This is typically called after new content has been added.
Definition: read_file.hpp:147
Read_And_Process_File::~Read_And_Process_File
virtual ~Read_And_Process_File()
Definition: read_file.cpp:453
CSV_File::parsedHeadingLine
virtual int parsedHeadingLine()
User-exit invoked when initial header line is parsed.
Definition: read_file.hpp:960
Extract_And_Process_Document_Stream::addIOblockThenProcess
int addIOblockThenProcess(SharedBufferAllocRecord *rec, IO_Processor *controller)
Specialized interface to add incoming block from BufferRegion maintained by an IO_Processor....
Definition: read_file.hpp:541
Parse_And_Process_HTTP_Stream::httpVersion
char httpVersion[16]
Definition: read_file.hpp:597
CSV_File::quoteCharList
unsigned char quoteCharList[8]
Definition: read_file.hpp:833
IO_Processor::bfrManager
BufferRegion * bfrManager
buffer region
Definition: io_processor.hpp:194
File_Buffer::externalBufferFlags
uint8_t externalBufferFlags
flags indicating read-only, external
Definition: read_file.hpp:48
LOG_ENDLINE
#define LOG_ENDLINE
Closing clause for text line output using << operators.
Definition: logging_api.hpp:2956
CSV_File
Read and process Comma-Separated-Value (or equivalent) files a record at a time.
Definition: read_file.hpp:827
OS_HANDLE_TYPE
#define OS_HANDLE_TYPE
Definition: io_processor.hpp:48
CSV_File::processPacket
int processPacket(unsigned char *data, size_t len)
Streaming interface equivalent to readAndProcessCSV_File(). API is compatible with processPacketUsing...
Definition: read_file.hpp:924
Parse_And_Process_HTTP_Stream::statusCode
char statusCode[8]
Definition: read_file.hpp:598
Read_And_Process_File::WRITE
@ WRITE
Definition: read_file.hpp:248
Extract_And_Process_Document_Stream::scanForDocument
int scanForDocument()
Definition: read_file.cpp:122
Read_And_Process_File::matchFileHeader
static uint32_t matchFileHeader(const FileTypeReaderSelector *criteria, const unsigned char *fileHdr, const size_t hdrLen)
Definition: read_file.cpp:352
Extract_And_Process_Document_Stream
Scan streams of data for start and end tags, invokes processDocument() on found content.
Definition: read_file.hpp:456
File_Buffer::useRegionAsBuffer
void useRegionAsBuffer(unsigned char *existingBuffer, size_t bfrSize, size_t currentLen, uint8_t ownershipFlags) OME_ALWAYS_INLINE OME_ALWAYS_OPTIMIZE("-O3")
Replace the existing buffer with a new region.
Definition: read_file.hpp:105
Parse_And_Process_HTTP_Stream::ParseState
ParseState
Definition: read_file.hpp:586
Read_And_Process_File::WRITE_TRUNCATE
@ WRITE_TRUNCATE
Definition: read_file.hpp:248
Parse_And_Process_HTTP_Stream::findHeaderAttribute
const char * findHeaderAttribute(const char *headerElement) const
Definition: read_file.hpp:620
CSV_File::processHeaderLine
virtual int processHeaderLine(unsigned char *line, size_t lineLen) VIRTUAL_OVERRIDE
Process initial header line in file.
Definition: read_file.cpp:636
Extract_And_Process_Document_Stream::scanAndProcessBuffer
int scanAndProcessBuffer(File_Buffer *bfrState)
Definition: read_file.hpp:523
Extract_And_Process_Document_Stream::scanState
enum Extract_And_Process_Document_Stream::@13 scanState
Read_And_Process_File::READ_GUESS
@ READ_GUESS
Definition: read_file.hpp:247
File_Buffer::bufferLen
size_t bufferLen
total length of buffer segment
Definition: read_file.hpp:45
Read_And_Process_File::READ_PCAP
@ READ_PCAP
Definition: read_file.hpp:247
Parse_And_Process_HTTP_Stream::processHTTPrequest
virtual int processHTTPrequest(unsigned char *line, size_t lineLen)
User-exit for HTTP request line.
Definition: read_file.hpp:683
fd
int fd
Definition: ethers.c:41
CSV_File::fields
char * fields[MAX_FIELDS]
Definition: read_file.hpp:834
Read_And_Process_File::intermediateBuffer
File_Buffer * intermediateBuffer
Definition: read_file.hpp:278
Read_And_Process_File::beginFile
virtual int beginFile()
Definition: read_file.hpp:417
CSV_File::~CSV_File
virtual ~CSV_File()
Definition: read_file.hpp:867
Read_And_Process_File::ReadModes
ReadModes
Definition: read_file.hpp:246
CSV_File::fieldTotal
int fieldTotal
Definition: read_file.hpp:835
LOG_CERR
#define LOG_CERR(lvl)
Convenience macro that uses LOG_INTO() to conditionally log a message to standard error.
Definition: logging_api.hpp:3014
logging_api.hpp
FARGOS Logging API.
Read_And_Process_File::READ_NORMAL
@ READ_NORMAL
Definition: read_file.hpp:247
Generated: Tue Jul 28 2020 16:03:26
Support Information