OpenMS
Loading...
Searching...
No Matches
Base64.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Marc Sturm, Chris Bielow, Moritz Aubermann $
7// --------------------------------------------------------------------------
8
9#pragma once
10
11#ifndef OPENMS_IS_BIG_ENDIAN
12#if defined OPENMS_BIG_ENDIAN
13#define OPENMS_IS_BIG_ENDIAN true
14#else
15#define OPENMS_IS_BIG_ENDIAN false
16#endif
17#endif
18
23
24#include <algorithm>
25#include <array>
26#include <cmath>
27#include <iostream>
28#include <iterator>
29#include <string>
30#include <vector>
31
32#ifdef OPENMS_COMPILER_MSVC
33#pragma comment(linker, "/export:compress")
34#endif
35
36namespace OpenMS
37{
43 class OPENMS_DLLAPI Base64
44 {
45
46public:
47
49 Base64() = default;
50
53 {
55 BYTEORDER_LITTLEENDIAN
56 };
57
65 template <typename FromType>
66 static void encode(std::vector<FromType> & in, ByteOrder to_byte_order, String & out, bool zlib_compression = false);
67
73 template <typename ToType>
74 static void decode(const String & in, ByteOrder from_byte_order, std::vector<ToType> & out, bool zlib_compression = false);
75
83 template <typename FromType>
84 static void encodeIntegers(std::vector<FromType> & in, ByteOrder to_byte_order, String & out, bool zlib_compression = false);
85
91 template <typename ToType>
92 static void decodeIntegers(const String & in, ByteOrder from_byte_order, std::vector<ToType> & out, bool zlib_compression = false);
93
106 static void encodeStrings(const std::vector<String> & in, String & out, bool zlib_compression = false, bool append_null_byte = true);
107
117 static void decodeStrings(const String & in, std::vector<String> & out, bool zlib_compression = false);
118
126 static void decodeSingleString(const String& in, String& out, bool zlib_compression);
127
128private:
129
132 {
133 double f;
135 };
136
139 {
140 float f;
142 };
143
144 static const char encoder_[];
145 static const char decoder_[];
147 template <typename ToType>
148 static void decodeUncompressed_(const String & in, ByteOrder from_byte_order, std::vector<ToType> & out);
149
151 template <typename ToType>
152 static void decodeCompressed_(const String & in, ByteOrder from_byte_order, std::vector<ToType> & out);
153
155 template <typename ToType>
156 static void decodeIntegersUncompressed_(const String & in, ByteOrder from_byte_order, std::vector<ToType> & out);
157
159 template <typename ToType>
160 static void decodeIntegersCompressed_(const String & in, ByteOrder from_byte_order, std::vector<ToType> & out);
161
162 static void stringSimdEncoder_(std::string& in, std::string& out);
163
164 static void stringSimdDecoder_(const std::string& in, std::string& out);
165 };
166
167 // Possible optimization: add simd registerwise endianizer (this will only be beneficial for ARM, since mzML + x64 CPU does not need to convert since both use LITTLE_ENDIAN).
168 // mzXML(!), which is outdated uses BIG_ENDIAN, i.e. "network", in its base64 encoding, so there x64 will benefit, but not ARM.
169 // However: the code below gets optimized to the bswap instruction by most compilers, which is very fast (1 cycle latency + 1 ops)
170 // and it is doubtful that SSE4's _mm_shuffle_epi8 will do better, see https://dev.to/wunk/fast-array-reversal-with-simd-j3p
172 inline UInt32 endianize32(const UInt32& n)
173 {
174 return ((n & 0x000000ff) << 24) |
175 ((n & 0x0000ff00) << 8) |
176 ((n & 0x00ff0000) >> 8) |
177 ((n & 0xff000000) >> 24);
178 }
179
181 inline UInt64 endianize64(const UInt64& n)
182 {
183 return ((n >> 56) & 0x00000000000000FF) |
184 ((n >> 40) & 0x000000000000FF00) |
185 ((n >> 24) & 0x0000000000FF0000) |
186 ((n >> 8) & 0x00000000FF000000) |
187 ((n << 8) & 0x000000FF00000000) |
188 ((n << 24) & 0x0000FF0000000000) |
189 ((n << 40) & 0x00FF000000000000) |
190 ((n << 56) & 0xFF00000000000000);
191 }
192
193 template <typename FromType>
194 void Base64::encode(std::vector<FromType> & in, ByteOrder to_byte_order, String & out, bool zlib_compression)
195 {
196 out.clear();
197 if (in.empty())
198 {
199 return;
200 }
201
202 // initialize
203 const Size element_size = sizeof(FromType);
204 const Size input_bytes = element_size * in.size();
205 // change endianness if necessary
207 {
208 if (element_size == 4)
209 {
210 for (Size i = 0; i < in.size(); ++i)
211 {
213 tmp.f = in[i];
214 tmp.i = endianize32(tmp.i);
215 in[i] = tmp.f;
216 }
217 }
218 else
219 {
220 for (Size i = 0; i < in.size(); ++i)
221 {
223 tmp.f = static_cast<double>(in[i]);
224 tmp.i = endianize64(tmp.i);
225 in[i] = tmp.f;
226 }
227 }
229 }
230
231 // encode with compression
232 if (zlib_compression)
233 {
234 String compressed;
235 ZlibCompression::compressData((void*)in.data(), input_bytes, compressed);
236 stringSimdEncoder_(compressed, out);
237 }
238 else // encode without compression
239 {
240 String str((char*)in.data(), input_bytes);
241 stringSimdEncoder_(str, out);
242 }
243
244 }
245
246 template <typename ToType>
247 void Base64::decode(const String & in, ByteOrder from_byte_order, std::vector<ToType> & out, bool zlib_compression)
248 {
249 if (zlib_compression)
250 {
251 decodeCompressed_(in, from_byte_order, out);
252 }
253 else
254 {
255 decodeUncompressed_(in, from_byte_order, out);
256 }
257 }
258
259 template <int type_size>
260 inline void invertEndianess(void* byte_buffer, const size_t element_count);
261 template<>
262 inline void invertEndianess<4>(void* byte_buffer, const size_t element_count)
263 {
264 UInt32* p = reinterpret_cast<UInt32*>(byte_buffer);
265 std::transform(p, p + element_count, p, endianize32);
266 }
267 template<>
268 inline void invertEndianess<8>(void* byte_buffer, const size_t element_count)
269 {
270 UInt64* p = reinterpret_cast<UInt64*>(byte_buffer);
271 std::transform(p, p + element_count, p, endianize64);
272 }
273
274
275 template <typename ToType>
276 void Base64::decodeCompressed_(const String& in, ByteOrder from_byte_order, std::vector<ToType>& out)
277 {
278 out.clear();
279 if (in.empty())
280 {
281 return;
282 }
283
284 String decompressed;
285 Base64::decodeSingleString(in, decompressed, true);
286
287 void* byte_buffer = reinterpret_cast<void*>(&decompressed[0]);
288 Size buffer_size = decompressed.size();
289
290 const ToType * float_buffer = reinterpret_cast<const ToType *>(byte_buffer);
291 constexpr Size element_size = sizeof(ToType);
292 if (buffer_size % element_size != 0)
293 {
294 throw Exception::ConversionError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Bad BufferCount?");
295 }
296
297 Size float_count = buffer_size / element_size;
298
299 // change endianness if necessary
300 if ((OPENMS_IS_BIG_ENDIAN && from_byte_order == Base64::BYTEORDER_LITTLEENDIAN) || (!OPENMS_IS_BIG_ENDIAN && from_byte_order == Base64::BYTEORDER_BIGENDIAN))
301 {
302 invertEndianess<element_size>(byte_buffer, float_count);
303 }
304
305 // copy values
306 out.assign(float_buffer, float_buffer + float_count);
307 }
308
309 template <typename ToType>
310 void Base64::decodeUncompressed_(const String& in, ByteOrder from_byte_order , std::vector<ToType>& out)
311 {
312 out.clear();
313
314 // The length of a base64 string is always a multiple of 4 (always 3
315 // bytes are encoded as 4 characters)
316 if (in.size() < 4)
317 {
318 return;
319 }
320 if (in.size() % 4 != 0)
321 {
322 throw Exception::ConversionError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Malformed base64 input, length is not a multiple of 4.");
323 }
324
325 Size src_size = in.size();
326 // last one or two '=' are skipped if contained
327 int padding = 0;
328 if (in[src_size - 1] == '=') padding++;
329 if (in[src_size - 2] == '=') padding++;
330
331 src_size -= padding;
332
333 constexpr Size element_size = sizeof(ToType);
334 String s;
335 stringSimdDecoder_(in,s);
336
337 // change endianness if necessary (mzML is always LITTLE_ENDIAN; x64 is LITTLE_ENDIAN)
338 if ((OPENMS_IS_BIG_ENDIAN && from_byte_order == Base64::BYTEORDER_LITTLEENDIAN) || (!OPENMS_IS_BIG_ENDIAN && from_byte_order == Base64::BYTEORDER_BIGENDIAN))
339 {
340 invertEndianess<element_size>((void*)s.data(), s.size() / element_size);
341 }
342
343 const char* cptr = s.data();
344 const ToType * fptr = reinterpret_cast<const ToType*>(cptr);
345 out.assign(fptr,fptr + s.size()/element_size);
346 }
347
348 template <typename FromType>
349 void Base64::encodeIntegers(std::vector<FromType> & in, ByteOrder to_byte_order, String & out, bool zlib_compression)
350 {
351 out.clear();
352 if (in.empty())
353 return;
354
355 // initialize
356 const Size element_size = sizeof(FromType);
357 const Size input_bytes = element_size * in.size();
358
359 // change endianness if necessary
361 {
362 if (element_size == 4)
363 {
364 for (Size i = 0; i < in.size(); ++i)
365 {
366 UInt32 tmp = in[i];
367 tmp = endianize32(tmp);
368 in[i] = tmp;
369 }
370 }
371 else
372 {
373 for (Size i = 0; i < in.size(); ++i)
374 {
375 UInt64 tmp = in[i];
376 tmp = endianize64(tmp);
377 in[i] = tmp;
378 }
379 }
380 }
381
382 // encode with compression (use Qt because of zlib support)
383 if (zlib_compression)
384 {
385 String compressed;
386 ZlibCompression::compressData((void*)in.data(), input_bytes, compressed);
387 stringSimdEncoder_(compressed, out);
388 }
389 else // encode without compression
390 {
391 String str((char*)in.data(), input_bytes);
392 stringSimdEncoder_(str, out);
393 }
394 }
395
396 template <typename ToType>
397 void Base64::decodeIntegers(const String & in, ByteOrder from_byte_order, std::vector<ToType> & out, bool zlib_compression)
398 {
399 if (zlib_compression)
400 {
401 decodeIntegersCompressed_(in, from_byte_order, out);
402 }
403 else
404 {
405 decodeIntegersUncompressed_(in, from_byte_order, out);
406 }
407 }
408
409 template <typename ToType>
410 void Base64::decodeIntegersCompressed_(const String & in, ByteOrder from_byte_order, std::vector<ToType> & out)
411 {
412 out.clear();
413 if (in.empty())
414 return;
415
416 constexpr Size element_size = sizeof(ToType);
417
418 String decompressed;
419 Base64::decodeSingleString(in, decompressed, true);
420 if (decompressed.empty())
421 {
422 throw Exception::ConversionError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Decompression error?");
423 }
424
425 void* byte_buffer = reinterpret_cast<void*>(&decompressed[0]);
426 Size buffer_size = decompressed.size();
427
428 // change endianness if necessary
429 if ((OPENMS_IS_BIG_ENDIAN && from_byte_order == Base64::BYTEORDER_LITTLEENDIAN) || (!OPENMS_IS_BIG_ENDIAN && from_byte_order == Base64::BYTEORDER_BIGENDIAN))
430 {
431 if constexpr(element_size == 4)
432 {
433 const Int32 * float_buffer = reinterpret_cast<const Int32 *>(byte_buffer);
434 if (buffer_size % element_size != 0)
435 throw Exception::ConversionError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Bad BufferCount?");
436 Size float_count = buffer_size / element_size;
437 UInt32 * p = reinterpret_cast<UInt32 *>(byte_buffer);
438 std::transform(p, p + float_count, p, endianize32);
439
440 out.resize(float_count);
441 // do NOT use assign here, as it will give a lot of type conversion warnings on VS compiler
442 for (Size i = 0; i < float_count; ++i)
443 {
444 out[i] = (ToType) * float_buffer;
445 ++float_buffer;
446 }
447 }
448 else
449 {
450 const Int64 * float_buffer = reinterpret_cast<const Int64 *>(byte_buffer);
451
452 if (buffer_size % element_size != 0)
453 throw Exception::ConversionError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Bad BufferCount?");
454
455 Size float_count = buffer_size / element_size;
456
457 UInt64 * p = reinterpret_cast<UInt64 *>(byte_buffer);
458 std::transform(p, p + float_count, p, endianize64);
459
460 out.resize(float_count);
461 // do NOT use assign here, as it will give a lot of type conversion warnings on VS compiler
462 for (Size i = 0; i < float_count; ++i)
463 {
464 out[i] = (ToType) * float_buffer;
465 ++float_buffer;
466 }
467 }
468 }
469 else
470 {
471 if constexpr(element_size == 4)
472 {
473 const Int * float_buffer = reinterpret_cast<const Int *>(byte_buffer);
474 if (buffer_size % element_size != 0)
475 throw Exception::ConversionError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Bad BufferCount while decoding?");
476
477 Size float_count = buffer_size / element_size;
478 out.resize(float_count);
479 // do NOT use assign here, as it will give a lot of type conversion warnings on VS compiler
480 for (Size i = 0; i < float_count; ++i)
481 {
482 out[i] = (ToType) * float_buffer;
483 ++float_buffer;
484 }
485 }
486 else
487 {
488 const Int64 * float_buffer = reinterpret_cast<const Int64 *>(byte_buffer);
489
490 if (buffer_size % element_size != 0)
491 throw Exception::ConversionError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Bad BufferCount while decoding?");
492
493 Size float_count = buffer_size / element_size;
494 out.resize(float_count);
495 // do NOT use assign here, as it will give a lot of type conversion warnings on VS compiler
496 for (Size i = 0; i < float_count; ++i)
497 {
498 out[i] = (ToType) * float_buffer;
499 ++float_buffer;
500 }
501 }
502 }
503
504 }
505
506 template <typename ToType>
507 void Base64::decodeIntegersUncompressed_(const String & in, ByteOrder from_byte_order, std::vector<ToType> & out)
508 {
509 out.clear();
510
511 // The length of a base64 string is a always a multiple of 4 (always 3
512 // bytes are encoded as 4 characters)
513 if (in.size() < 4)
514 {
515 return;
516 }
517
518 Size src_size = in.size();
519 // last one or two '=' are skipped if contained
520 int padding = 0;
521 if (in[src_size - 1] == '=') padding++;
522 if (in[src_size - 2] == '=') padding++;
523
524 src_size -= padding;
525
526 UInt a;
527 UInt b;
528
529 UInt offset = 0;
530 int inc = 1;
531 UInt written = 0;
532
533 const Size element_size = sizeof(ToType);
534
535 // enough for either float or double
536 char element[8] = "\x00\x00\x00\x00\x00\x00\x00";
537
538 if ((OPENMS_IS_BIG_ENDIAN && from_byte_order == Base64::BYTEORDER_LITTLEENDIAN) || (!OPENMS_IS_BIG_ENDIAN && from_byte_order == Base64::BYTEORDER_BIGENDIAN))
539 {
540 offset = (element_size - 1); // other endian
541 inc = -1;
542 }
543 else
544 {
545 offset = 0;
546 inc = 1;
547 }
548
549 // reserve enough space in the output vector
550 out.reserve((UInt)(std::ceil((4.0 * src_size) / 3.0) + 6.0));
551
552 // sort all read bytes correctly into a char[4] (double) or
553 // char[8] (float) and push_back when necessary.
554 for (Size i = 0; i < src_size; i += 4)
555 {
556
557 // decode 4 Base64-Chars to 3 Byte
558 // -------------------------------
559
560 // decode the first two chars
561 a = decoder_[(int)in[i] - 43] - 62;
562 b = decoder_[(int)in[i + 1] - 43] - 62;
563 if (i + 1 >= src_size)
564 {
565 b = 0;
566 }
567 // write first byte (6 bits from a and 2 highest bits from b)
568 element[offset] = (unsigned char) ((a << 2) | (b >> 4));
569 written++;
570 offset = (offset + inc) % element_size;
571
572 if (written % element_size == 0)
573 {
574 ToType float_value;
575 if (element_size == 4)
576 {
577 Int32 * value = reinterpret_cast<Int32 *>(&element[0]);
578 float_value = (ToType) * value;
579 }
580 else
581 {
582 Int64 * value = reinterpret_cast<Int64 *>(&element[0]);
583 float_value = (ToType) * value;
584 }
585 out.push_back(float_value);
586 strcpy(element, "");
587 }
588
589 // decode the third char
590 a = decoder_[(int)in[i + 2] - 43] - 62;
591 if (i + 2 >= src_size)
592 {
593 a = 0;
594 }
595 // write second byte (4 lowest bits from b and 4 highest bits from a)
596 element[offset] = (unsigned char) (((b & 15) << 4) | (a >> 2));
597 written++;
598 offset = (offset + inc) % element_size;
599
600 if (written % element_size == 0)
601 {
602 ToType float_value;
603 if (element_size == 4)
604 {
605 Int32 * value = reinterpret_cast<Int32 *>(&element[0]);
606 float_value = (ToType) * value;
607 }
608 else
609 {
610 Int64 * value = reinterpret_cast<Int64 *>(&element[0]);
611 float_value = (ToType) * value;
612 }
613 out.push_back(float_value);
614 strcpy(element, "");
615 }
616
617 // decode the fourth char
618 b = decoder_[(int)in[i + 3] - 43] - 62;
619 if (i + 3 >= src_size)
620 {
621 b = 0;
622 }
623 // write third byte (2 lowest bits from a and 6 bits from b)
624 element[offset] = (unsigned char) (((a & 3) << 6) | b);
625 written++;
626 offset = (offset + inc) % element_size;
627
628 if (written % element_size == 0)
629 {
630 ToType float_value;
631 if (element_size == 4)
632 {
633 Int32 * value = reinterpret_cast<Int32 *>(&element[0]);
634 float_value = (ToType) * value;
635 }
636 else
637 {
638 Int64 * value = reinterpret_cast<Int64 *>(&element[0]);
639 float_value = (ToType) * value;
640 }
641 out.push_back(float_value);
642 strcpy(element, "");
643 }
644 }
645 }
646
647} //namespace OpenMS
648
#define OPENMS_IS_BIG_ENDIAN
Definition Base64.h:15
Class to encode and decode Base64.
Definition Base64.h:44
static void stringSimdDecoder_(const std::string &in, std::string &out)
static void decode(const String &in, ByteOrder from_byte_order, std::vector< ToType > &out, bool zlib_compression=false)
Decodes a Base64 string to a vector of floating point numbers.
Definition Base64.h:247
static void decodeIntegersCompressed_(const String &in, ByteOrder from_byte_order, std::vector< ToType > &out)
Decodes a compressed Base64 string to a vector of integer numbers.
Definition Base64.h:410
double f
Definition Base64.h:133
static const char decoder_[]
Definition Base64.h:145
static void decodeCompressed_(const String &in, ByteOrder from_byte_order, std::vector< ToType > &out)
Decodes a compressed Base64 string to a vector of floating point numbers.
Definition Base64.h:276
static void decodeIntegersUncompressed_(const String &in, ByteOrder from_byte_order, std::vector< ToType > &out)
Decodes a Base64 string to a vector of integer numbers.
Definition Base64.h:507
static void decodeStrings(const String &in, std::vector< String > &out, bool zlib_compression=false)
Decodes a Base64 string to a vector of (null-terminated) strings.
static void stringSimdEncoder_(std::string &in, std::string &out)
static void decodeUncompressed_(const String &in, ByteOrder from_byte_order, std::vector< ToType > &out)
Decodes a Base64 string to a vector of floating point numbers.
Definition Base64.h:310
Base64()=default
default constructor
static void encodeStrings(const std::vector< String > &in, String &out, bool zlib_compression=false, bool append_null_byte=true)
Encodes a vector of strings to a Base64 string.
UInt64 i
Definition Base64.h:134
ByteOrder
Byte order type.
Definition Base64.h:53
@ BYTEORDER_BIGENDIAN
Big endian type.
Definition Base64.h:54
@ BYTEORDER_LITTLEENDIAN
Little endian type.
Definition Base64.h:55
static void encode(std::vector< FromType > &in, ByteOrder to_byte_order, String &out, bool zlib_compression=false)
Encodes a vector of floating point numbers to a Base64 string.
Definition Base64.h:194
static void decodeIntegers(const String &in, ByteOrder from_byte_order, std::vector< ToType > &out, bool zlib_compression=false)
Decodes a Base64 string to a vector of integer numbers.
Definition Base64.h:397
UInt32 i
Definition Base64.h:141
static void decodeSingleString(const String &in, String &out, bool zlib_compression)
Decodes a Base64 string.
static void encodeIntegers(std::vector< FromType > &in, ByteOrder to_byte_order, String &out, bool zlib_compression=false)
Encodes a vector of integer point numbers to a Base64 string.
Definition Base64.h:349
float f
Definition Base64.h:140
Internal class needed for type-punning.
Definition Base64.h:139
Internal class needed for type-punning.
Definition Base64.h:132
Invalid conversion exception.
Definition Exception.h:331
A more convenient string class.
Definition String.h:34
static void compressData(const void *raw_data, const size_t in_length, std::string &compressed_data)
Compresses data using zlib directly.
int32_t Int32
Signed integer type (32bit)
Definition Types.h:26
int64_t Int64
Signed integer type (64bit)
Definition Types.h:40
int Int
Signed integer type.
Definition Types.h:72
uint32_t UInt32
Unsigned integer type (32bit)
Definition Types.h:33
uint64_t UInt64
Unsigned integer type (64bit)
Definition Types.h:47
unsigned int UInt
Unsigned integer type.
Definition Types.h:64
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
void invertEndianess< 8 >(void *byte_buffer, const size_t element_count)
Definition Base64.h:268
void invertEndianess(void *byte_buffer, const size_t element_count)
UInt64 endianize64(const UInt64 &n)
Endianizes a 64 bit type from big endian to little endian and vice versa.
Definition Base64.h:181
void invertEndianess< 4 >(void *byte_buffer, const size_t element_count)
Definition Base64.h:262
UInt32 endianize32(const UInt32 &n)
Endianizes a 32 bit type from big endian to little endian and vice versa.
Definition Base64.h:172