GDAL
ograrrowarrayhelper.h
1 /******************************************************************************
2  *
3  * Project: OpenGIS Simple Features Reference Implementation
4  * Purpose: Helper to fill ArrowArray
5  * Author: Even Rouault <even dot rouault at spatialys.com>
6  *
7  ******************************************************************************
8  * Copyright (c) 2022, Even Rouault <even dot rouault at spatialys.com>
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a
11  * copy of this software and associated documentation files (the "Software"),
12  * to deal in the Software without restriction, including without limitation
13  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
14  * and/or sell copies of the Software, and to permit persons to whom the
15  * Software is furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included
18  * in all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26  * DEALINGS IN THE SOFTWARE.
27  ****************************************************************************/
28 
29 #pragma once
30 
32 
33 #include <algorithm>
34 
35 #include "cpl_time.h"
36 
37 #include "ogrsf_frmts.h"
38 #include "ogr_recordbatch.h"
39 
40 class CPL_DLL OGRArrowArrayHelper
41 {
42  OGRArrowArrayHelper(const OGRArrowArrayHelper&) = delete;
43  OGRArrowArrayHelper& operator= (const OGRArrowArrayHelper&) = delete;
44 
45 public:
46  bool bIncludeFID = false;
47  int nMaxBatchSize = 0;
48  int nChildren = 0;
49  int nFieldCount = 0;
50  int nGeomFieldCount = 0;
51  std::vector<int> mapOGRFieldToArrowField{};
52  std::vector<int> mapOGRGeomFieldToArrowField{};
53  std::vector<bool> abNullableFields{};
54  std::vector<uint32_t> anArrowFieldMaxAlloc{};
55  int64_t* panFIDValues = nullptr;
56  struct ArrowArray* m_out_array = nullptr;
57 
58  static int GetMaxFeaturesInBatch(const CPLStringList& aosArrowArrayStreamOptions);
59 
60  OGRArrowArrayHelper(GDALDataset* poDS,
61  OGRFeatureDefn* poFeatureDefn,
62  const CPLStringList& aosArrowArrayStreamOptions,
63  struct ArrowArray* out_array);
64 
65  bool SetNull(int iArrowField, int iFeat)
66  {
67  auto psArray = m_out_array->children[iArrowField];
68  ++psArray->null_count;
69  uint8_t* pabyNull = static_cast<uint8_t*>(const_cast<void*>(psArray->buffers[0]));
70  if( psArray->buffers[0] == nullptr )
71  {
72  pabyNull = static_cast<uint8_t*>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nMaxBatchSize + 7) / 8));
73  if( pabyNull == nullptr )
74  {
75  return false;
76  }
77  memset(pabyNull, 0xFF, (nMaxBatchSize + 7) / 8);
78  psArray->buffers[0] = pabyNull;
79  }
80  pabyNull[iFeat / 8] &= static_cast<uint8_t>(~(1 << (iFeat % 8)));
81 
82  if( psArray->n_buffers == 3 )
83  {
84  auto panOffsets = static_cast<int32_t*>(const_cast<void*>(psArray->buffers[1]));
85  panOffsets[iFeat+1] = panOffsets[iFeat];
86  }
87  return true;
88  }
89 
90  inline static void SetBoolOn(struct ArrowArray* psArray, int iFeat)
91  {
92  static_cast<uint8_t*>(const_cast<void*>(
93  psArray->buffers[1]))[iFeat / 8] |= static_cast<uint8_t>(1 << (iFeat / 8));
94  }
95 
96  inline static void SetInt8(struct ArrowArray* psArray, int iFeat, int8_t nVal)
97  {
98  static_cast<int8_t*>(const_cast<void*>(psArray->buffers[1]))[iFeat] = nVal;
99  }
100 
101  inline static void SetUInt8(struct ArrowArray* psArray, int iFeat, uint8_t nVal)
102  {
103  static_cast<uint8_t*>(const_cast<void*>(psArray->buffers[1]))[iFeat] = nVal;
104  }
105 
106  inline static void SetInt16(struct ArrowArray* psArray, int iFeat, int16_t nVal)
107  {
108  static_cast<int16_t*>(const_cast<void*>(psArray->buffers[1]))[iFeat] = nVal;
109  }
110 
111  inline static void SetUInt16(struct ArrowArray* psArray, int iFeat, uint16_t nVal)
112  {
113  static_cast<uint16_t*>(const_cast<void*>(psArray->buffers[1]))[iFeat] = nVal;
114  }
115 
116  inline static void SetInt32(struct ArrowArray* psArray, int iFeat, int32_t nVal)
117  {
118  static_cast<int32_t*>(const_cast<void*>(psArray->buffers[1]))[iFeat] = nVal;
119  }
120 
121  inline static void SetUInt32(struct ArrowArray* psArray, int iFeat, uint32_t nVal)
122  {
123  static_cast<uint32_t*>(const_cast<void*>(psArray->buffers[1]))[iFeat] = nVal;
124  }
125 
126  inline static void SetInt64(struct ArrowArray* psArray, int iFeat, int64_t nVal)
127  {
128  static_cast<int64_t*>(const_cast<void*>(psArray->buffers[1]))[iFeat] = nVal;
129  }
130 
131  inline static void SetUInt64(struct ArrowArray* psArray, int iFeat, uint64_t nVal)
132  {
133  static_cast<uint64_t*>(const_cast<void*>(psArray->buffers[1]))[iFeat] = nVal;
134  }
135 
136  inline static void SetFloat(struct ArrowArray* psArray, int iFeat, float fVal)
137  {
138  static_cast<float*>(const_cast<void*>(psArray->buffers[1]))[iFeat] = fVal;
139  }
140 
141  inline static void SetDouble(struct ArrowArray* psArray, int iFeat, double dfVal)
142  {
143  static_cast<double*>(const_cast<void*>(psArray->buffers[1]))[iFeat] = dfVal;
144  }
145 
146  static
147  void SetDate(struct ArrowArray* psArray, int iFeat,
148  struct tm& brokenDown, const OGRField& ogrField)
149  {
150  brokenDown.tm_year = ogrField.Date.Year - 1900;
151  brokenDown.tm_mon = ogrField.Date.Month - 1;
152  brokenDown.tm_mday = ogrField.Date.Day;
153  brokenDown.tm_hour = ogrField.Date.Hour;
154  brokenDown.tm_min = ogrField.Date.Minute;
155  brokenDown.tm_sec = static_cast<int>(ogrField.Date.Second);
156  static_cast<int32_t*>(const_cast<void*>(
157  psArray->buffers[1]))[iFeat] =
158  static_cast<int>((CPLYMDHMSToUnixTime(&brokenDown) + 36200) / 86400);
159  }
160 
161  static
162  void SetDateTime(struct ArrowArray* psArray, int iFeat,
163  struct tm& brokenDown, const OGRField& ogrField)
164  {
165  brokenDown.tm_year = ogrField.Date.Year - 1900;
166  brokenDown.tm_mon = ogrField.Date.Month - 1;
167  brokenDown.tm_mday = ogrField.Date.Day;
168  brokenDown.tm_hour = ogrField.Date.Hour;
169  brokenDown.tm_min = ogrField.Date.Minute;
170  brokenDown.tm_sec = static_cast<int>(ogrField.Date.Second);
171  static_cast<int64_t*>(const_cast<void*>(
172  psArray->buffers[1]))[iFeat] =
173  CPLYMDHMSToUnixTime(&brokenDown) * 1000 +
174  (static_cast<int>(ogrField.Date.Second * 1000 + 0.5) % 1000);
175  }
176 
177  GByte* GetPtrForStringOrBinary(int iArrowField, int iFeat, size_t nLen)
178  {
179  auto psArray = m_out_array->children[iArrowField];
180  auto panOffsets = static_cast<int32_t*>(const_cast<void*>(psArray->buffers[1]));
181  const uint32_t nCurLength = static_cast<uint32_t>(panOffsets[iFeat]);
182  if( nLen > anArrowFieldMaxAlloc[iArrowField] - nCurLength )
183  {
184  if( nLen > static_cast<uint32_t>(std::numeric_limits<int32_t>::max()) - nCurLength )
185  {
186  CPLError(CE_Failure, CPLE_AppDefined, "Too large string or binary content");
187  return nullptr;
188  }
189  uint32_t nNewSize = nCurLength + static_cast<uint32_t>(nLen);
190  if( (anArrowFieldMaxAlloc[iArrowField] >> 31) == 0 )
191  {
192  const uint32_t nDoubleSize = 2U * anArrowFieldMaxAlloc[iArrowField];
193  if( nNewSize < nDoubleSize )
194  nNewSize = nDoubleSize;
195  }
196  void* newBuffer = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nNewSize);
197  if( newBuffer == nullptr )
198  return nullptr;
199  anArrowFieldMaxAlloc[iArrowField] = nNewSize;
200  memcpy(newBuffer, psArray->buffers[2], nCurLength);
201  VSIFreeAligned(const_cast<void*>(psArray->buffers[2]));
202  psArray->buffers[2] = newBuffer;
203  }
204  GByte* paby = static_cast<GByte*>(const_cast<void*>(psArray->buffers[2])) + nCurLength;
205  panOffsets[iFeat+1] = panOffsets[iFeat] + static_cast<int32_t>(nLen);
206  return paby;
207  }
208 
209  static
210  void SetEmptyStringOrBinary(struct ArrowArray* psArray, int iFeat)
211  {
212  auto panOffsets = static_cast<int32_t*>(const_cast<void*>(psArray->buffers[1]));
213  panOffsets[iFeat+1] = panOffsets[iFeat];
214  }
215 
216  void Shrink(int nFeatures)
217  {
218  if( nFeatures < nMaxBatchSize )
219  {
220  m_out_array->length = nFeatures;
221  for( int i = 0; i < nChildren; i++ )
222  {
223  m_out_array->children[i]->length = nFeatures;
224  }
225  }
226  }
227 
228  void ClearArray()
229  {
230  m_out_array->release(m_out_array);
231  memset(m_out_array, 0, sizeof(*m_out_array));
232  }
233 
234  static bool FillDict(struct ArrowArray* psChild,
235  const OGRCodedFieldDomain* poCodedDomain);
236 
237 };
238 
VSIFreeAligned
void VSIFreeAligned(void *ptr)
Free a buffer allocated with VSIMallocAligned().
Definition: cpl_vsisimple.cpp:974
GByte
unsigned char GByte
Unsigned byte type.
Definition: cpl_port.h:203
CPLStringList
String list class designed around our use of C "char**" string lists.
Definition: cpl_string.h:429
GDALDataset
A set of associated raster bands, usually from one file.
Definition: gdal_priv.h:342
OGRField
OGRFeature field attribute value union.
Definition: ogr_core.h:802
ogrsf_frmts.h
CPLError
void CPLError(CPLErr eErrClass, CPLErrorNum err_no, const char *fmt,...)
Report an error.
Definition: cpl_error.cpp:309
VSI_MALLOC_ALIGNED_AUTO_VERBOSE
#define VSI_MALLOC_ALIGNED_AUTO_VERBOSE(size)
VSIMallocAlignedAutoVerbose() with FILE and LINE reporting.
Definition: cpl_vsi.h:290
OGRCodedFieldDomain
Definition of a coded / enumerated field domain.
Definition: ogr_feature.h:983
OGRFeatureDefn
Definition of a feature class or feature layer.
Definition: ogr_feature.h:280
CPLE_AppDefined
#define CPLE_AppDefined
Application defined error.
Definition: cpl_error.h:99