GDAL
ograrrowarrayhelper.h
1 /******************************************************************************
2  *
3  * Project: OpenGIS Simple Features Reference Implementation
4  * Purpose: Helper to fill ArrowArray
5  * Author: Even Rouault <even dot rouault at spatialys.com>
6  *
7  ******************************************************************************
8  * Copyright (c) 2022, Even Rouault <even dot rouault at spatialys.com>
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a
11  * copy of this software and associated documentation files (the "Software"),
12  * to deal in the Software without restriction, including without limitation
13  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
14  * and/or sell copies of the Software, and to permit persons to whom the
15  * Software is furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included
18  * in all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26  * DEALINGS IN THE SOFTWARE.
27  ****************************************************************************/
28 
29 #pragma once
30 
32 
33 #include <algorithm>
34 #include <limits>
35 
36 #include "cpl_time.h"
37 
38 #include "ogrsf_frmts.h"
39 #include "ogr_recordbatch.h"
40 
41 class CPL_DLL OGRArrowArrayHelper
42 {
43  OGRArrowArrayHelper(const OGRArrowArrayHelper &) = delete;
44  OGRArrowArrayHelper &operator=(const OGRArrowArrayHelper &) = delete;
45 
46  public:
47  bool bIncludeFID = false;
48  int nMaxBatchSize = 0;
49  int nChildren = 0;
50  int nFieldCount = 0;
51  int nGeomFieldCount = 0;
52  std::vector<int> mapOGRFieldToArrowField{};
53  std::vector<int> mapOGRGeomFieldToArrowField{};
54  std::vector<bool> abNullableFields{};
55  std::vector<uint32_t> anArrowFieldMaxAlloc{};
56  int64_t *panFIDValues = nullptr;
57  struct ArrowArray *m_out_array = nullptr;
58 
59  static int
60  GetMaxFeaturesInBatch(const CPLStringList &aosArrowArrayStreamOptions);
61 
62  OGRArrowArrayHelper(GDALDataset *poDS, OGRFeatureDefn *poFeatureDefn,
63  const CPLStringList &aosArrowArrayStreamOptions,
64  struct ArrowArray *out_array);
65 
66  bool SetNull(int iArrowField, int iFeat)
67  {
68  auto psArray = m_out_array->children[iArrowField];
69  ++psArray->null_count;
70  uint8_t *pabyNull =
71  static_cast<uint8_t *>(const_cast<void *>(psArray->buffers[0]));
72  if (psArray->buffers[0] == nullptr)
73  {
74  pabyNull = static_cast<uint8_t *>(
75  VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nMaxBatchSize + 7) / 8));
76  if (pabyNull == nullptr)
77  {
78  return false;
79  }
80  memset(pabyNull, 0xFF, (nMaxBatchSize + 7) / 8);
81  psArray->buffers[0] = pabyNull;
82  }
83  pabyNull[iFeat / 8] &= static_cast<uint8_t>(~(1 << (iFeat % 8)));
84 
85  if (psArray->n_buffers == 3)
86  {
87  auto panOffsets =
88  static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]));
89  panOffsets[iFeat + 1] = panOffsets[iFeat];
90  }
91  return true;
92  }
93 
94  inline static void SetBoolOn(struct ArrowArray *psArray, int iFeat)
95  {
96  static_cast<uint8_t *>(
97  const_cast<void *>(psArray->buffers[1]))[iFeat / 8] |=
98  static_cast<uint8_t>(1 << (iFeat / 8));
99  }
100 
101  inline static void SetInt8(struct ArrowArray *psArray, int iFeat,
102  int8_t nVal)
103  {
104  static_cast<int8_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
105  nVal;
106  }
107 
108  inline static void SetUInt8(struct ArrowArray *psArray, int iFeat,
109  uint8_t nVal)
110  {
111  static_cast<uint8_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
112  nVal;
113  }
114 
115  inline static void SetInt16(struct ArrowArray *psArray, int iFeat,
116  int16_t nVal)
117  {
118  static_cast<int16_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
119  nVal;
120  }
121 
122  inline static void SetUInt16(struct ArrowArray *psArray, int iFeat,
123  uint16_t nVal)
124  {
125  static_cast<uint16_t *>(
126  const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal;
127  }
128 
129  inline static void SetInt32(struct ArrowArray *psArray, int iFeat,
130  int32_t nVal)
131  {
132  static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
133  nVal;
134  }
135 
136  inline static void SetUInt32(struct ArrowArray *psArray, int iFeat,
137  uint32_t nVal)
138  {
139  static_cast<uint32_t *>(
140  const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal;
141  }
142 
143  inline static void SetInt64(struct ArrowArray *psArray, int iFeat,
144  int64_t nVal)
145  {
146  static_cast<int64_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
147  nVal;
148  }
149 
150  inline static void SetUInt64(struct ArrowArray *psArray, int iFeat,
151  uint64_t nVal)
152  {
153  static_cast<uint64_t *>(
154  const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal;
155  }
156 
157  inline static void SetFloat(struct ArrowArray *psArray, int iFeat,
158  float fVal)
159  {
160  static_cast<float *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
161  fVal;
162  }
163 
164  inline static void SetDouble(struct ArrowArray *psArray, int iFeat,
165  double dfVal)
166  {
167  static_cast<double *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
168  dfVal;
169  }
170 
171  static void SetDate(struct ArrowArray *psArray, int iFeat,
172  struct tm &brokenDown, const OGRField &ogrField)
173  {
174  brokenDown.tm_year = ogrField.Date.Year - 1900;
175  brokenDown.tm_mon = ogrField.Date.Month - 1;
176  brokenDown.tm_mday = ogrField.Date.Day;
177  brokenDown.tm_hour = 0;
178  brokenDown.tm_min = 0;
179  brokenDown.tm_sec = 0;
180  static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
181  static_cast<int>(CPLYMDHMSToUnixTime(&brokenDown) / 86400);
182  }
183 
184  static void SetDateTime(struct ArrowArray *psArray, int iFeat,
185  struct tm &brokenDown, const OGRField &ogrField)
186  {
187  brokenDown.tm_year = ogrField.Date.Year - 1900;
188  brokenDown.tm_mon = ogrField.Date.Month - 1;
189  brokenDown.tm_mday = ogrField.Date.Day;
190  brokenDown.tm_hour = ogrField.Date.Hour;
191  brokenDown.tm_min = ogrField.Date.Minute;
192  brokenDown.tm_sec = static_cast<int>(ogrField.Date.Second);
193  static_cast<int64_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
194  CPLYMDHMSToUnixTime(&brokenDown) * 1000 +
195  (static_cast<int>(ogrField.Date.Second * 1000 + 0.5) % 1000);
196  }
197 
198  GByte *GetPtrForStringOrBinary(int iArrowField, int iFeat, size_t nLen)
199  {
200  auto psArray = m_out_array->children[iArrowField];
201  auto panOffsets =
202  static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]));
203  const uint32_t nCurLength = static_cast<uint32_t>(panOffsets[iFeat]);
204  if (nLen > anArrowFieldMaxAlloc[iArrowField] - nCurLength)
205  {
206  if (nLen >
207  static_cast<uint32_t>(std::numeric_limits<int32_t>::max()) -
208  nCurLength)
209  {
210  CPLError(CE_Failure, CPLE_AppDefined,
211  "Too large string or binary content");
212  return nullptr;
213  }
214  uint32_t nNewSize = nCurLength + static_cast<uint32_t>(nLen);
215  if ((anArrowFieldMaxAlloc[iArrowField] >> 31) == 0)
216  {
217  const uint32_t nDoubleSize =
218  2U * anArrowFieldMaxAlloc[iArrowField];
219  if (nNewSize < nDoubleSize)
220  nNewSize = nDoubleSize;
221  }
222  void *newBuffer = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nNewSize);
223  if (newBuffer == nullptr)
224  return nullptr;
225  anArrowFieldMaxAlloc[iArrowField] = nNewSize;
226  memcpy(newBuffer, psArray->buffers[2], nCurLength);
227  VSIFreeAligned(const_cast<void *>(psArray->buffers[2]));
228  psArray->buffers[2] = newBuffer;
229  }
230  GByte *paby =
231  static_cast<GByte *>(const_cast<void *>(psArray->buffers[2])) +
232  nCurLength;
233  panOffsets[iFeat + 1] = panOffsets[iFeat] + static_cast<int32_t>(nLen);
234  return paby;
235  }
236 
237  static void SetEmptyStringOrBinary(struct ArrowArray *psArray, int iFeat)
238  {
239  auto panOffsets =
240  static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]));
241  panOffsets[iFeat + 1] = panOffsets[iFeat];
242  }
243 
244  void Shrink(int nFeatures)
245  {
246  if (nFeatures < nMaxBatchSize)
247  {
248  m_out_array->length = nFeatures;
249  for (int i = 0; i < nChildren; i++)
250  {
251  m_out_array->children[i]->length = nFeatures;
252  }
253  }
254  }
255 
256  void ClearArray()
257  {
258  m_out_array->release(m_out_array);
259  memset(m_out_array, 0, sizeof(*m_out_array));
260  }
261 
262  static bool FillDict(struct ArrowArray *psChild,
263  const OGRCodedFieldDomain *poCodedDomain);
264 };
265 
VSIFreeAligned
void VSIFreeAligned(void *ptr)
Free a buffer allocated with VSIMallocAligned().
Definition: cpl_vsisimple.cpp:979
GByte
unsigned char GByte
Unsigned byte type.
Definition: cpl_port.h:205
CPLStringList
String list class designed around our use of C "char**" string lists.
Definition: cpl_string.h:437
GDALDataset
A set of associated raster bands, usually from one file.
Definition: gdal_priv.h:347
OGRField
OGRFeature field attribute value union.
Definition: ogr_core.h:877
ogrsf_frmts.h
CPLError
void CPLError(CPLErr eErrClass, CPLErrorNum err_no, const char *fmt,...)
Report an error.
Definition: cpl_error.cpp:329
VSI_MALLOC_ALIGNED_AUTO_VERBOSE
#define VSI_MALLOC_ALIGNED_AUTO_VERBOSE(size)
VSIMallocAlignedAutoVerbose() with FILE and LINE reporting.
Definition: cpl_vsi.h:301
OGRCodedFieldDomain
Definition of a coded / enumerated field domain.
Definition: ogr_feature.h:1472
OGRFeatureDefn
Definition of a feature class or feature layer.
Definition: ogr_feature.h:374
CPLE_AppDefined
#define CPLE_AppDefined
Application defined error.
Definition: cpl_error.h:100