randolf.ca  1.00
Randolf Richardson's C++ classes
Loading...
Searching...
No Matches
rtools
1#pragma once
2
3#include <algorithm> // std::max and std::min
4#include <array> // std::array
5#include <cstring> // std::strlen
6#include <ctime> // std::time
7#include <exception> // std::exception::runtime_error
8#include <iostream> // std::cout
9#include <stdexcept> // std::runtime_error
10#include <string> // std::string
11#include <unordered_map> // std::unordered_map
12#include <vector> // std::vector
13
14namespace randolf {
15
16 /*======================================================================*//**
17 @brief
18 This @ref rtools class primarily provides a collection of static methods that
19 facilitate a variety of general-purpose computer programming needs. Separate
20 classes may also be added in the future for more sophisticated needs.
21 @par History
22 - 2023-May-17 v1.00 Initial version
23 - 2024-Oct-23 v1.00 Added three more well-known base64 sets, added support
24 for base64 encoding/decoding to use ASCIIZ&nbsp;strings
25 for inputs, and made various minor improvements to the
26 documentation since the previous update
27 - 2024-Nov-24 v1.00 Added @c delimiter parameter to two @ref to_hex methods
28 - 2024-Nov-25 v1.00 Added support for negative positions in @ref to_lower
29 and @ref to_upper methods so that they count backward
30 from the end of the string
31 @version 1.00
32 @author Randolf Richardson
33 *///=========================================================================
34 class rtools {
35
36 public:
37 /*======================================================================*//**
38 @brief
39 This character set is suggested by RFC4648 (see page 8) as "safe" for use in
40 URLs and filenames.
41 @see base64_decode
42 @see base64_encode
43 *///=========================================================================
44 inline static const char* base64_set_minus_underscore = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
45
46 /*======================================================================*//**
47 @brief
48 This character set is normally used to encode IMAP4 mailbox names.
49 @see base64_decode
50 @see base64_encode
51 *///=========================================================================
52 inline static const char* base64_set_plus_comma = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
53
54 /*======================================================================*//**
55 @brief
56 This character set is the default in every @c base64_ method in this library
57 because it's the most commonly used for base64 encoding.
58 @note
59 Although other well-known base64 character sets are included here, or you can
60 create your own, which has a simple format -- it must be 64 characters long
61 without a NULL terminator (additional characters will be ignored), and each
62 character can only be specified once (or else encoding or decoding will fail
63 to render consistent results; there's no checking performed beforehand since
64 the software developers providing these customized character sets are trusted
65 to not introduce such problems).
66 @see base64_decode
67 @see base64_encode
68 *///=========================================================================
69 inline static const char* base64_set_plus_slash = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
70
71 /*======================================================================*//**
72 @brief
73 This is an alternate character set that is rarely used, but is mentioned in
74 RFC4648 (see page 7, second paragraph of section 5).
75 @note
76 RFC4648 incorrectly specifies the 63rd character when it's the 64th character
77 (the slash) that's being replaced with a tilde. This error likely came from
78 not counting the zero "value" label when referencing the set in which the 1st
79 character is labeled as having a value of 0.
80 @see base64_decode
81 @see base64_encode
82 *///=========================================================================
83 inline static const char* base64_set_plus_tilde = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+~";
84
85 /*======================================================================*//**
86 @brief
87 Split string into an @c std::vector that's perfectly-sized to store all the
88 elements separated by whitespace, but not whitespace that's enclosed within
89 quotation marks (literal quotation marks - @c " - will not be intrepreted,
90 and will be treated as literal/non-functional non-whitespace characters).
91
92 Any leading and/or trailing whitespace characters will be ignored.
93
94 Multiple whitespace delimiters will be treated as a single delimiter.
95
96 Whitespace characters:
97 - 0: NULL
98 - 9: Tab
99 - 10: Linefeed
100 - 13: Carriage Return
101 - 32: Space
102
103 @returns Pointer to an array of "atoms" stored in an @c std::vector object
104 *///=========================================================================
105 static std::vector<std::string>* atomize(
106 /// Source string (to be split into atoms)
107 const char* str,
108 /// Length of string (in bytes), or 0 if @c str is an ASCIIZ string (NULL-terminated)
109 size_t len = 0) { // TODO: Add support for a "maximum_elements" parameter
110
111 // --------------------------------------------------------------------------
112 // Internal variables.
113 //
114 // Includes not measuring size of string if an ASCIIZ string was indicated
115 // because "if (len == 0) len = std::strlen(str);" is less optimal than what
116 // the main loop already does in this regard.
117 // --------------------------------------------------------------------------
118 const char* MAX = len > 0 ? str + len - 1 : (char*)-1; // Loop optimization
119 bool w = true; // Begin in whitespace mode to skip leading whitespace
120 std::string* atom = new std::string();
121
122 // --------------------------------------------------------------------------
123 // Pre-allocate std::vector with a size that's half the length of the string,
124 // because this is the maximum number of atoms there could be if each atom is
125 // 1 byte long and each delimiting whitespace character is also 1 byte long.
126 // --------------------------------------------------------------------------
127 std::vector<std::string>* ary = new std::vector<std::string>;
128 if (len > 0) ary->reserve(len >> (1 + 1)); // Ensure array-like efficiency
129
130 // --------------------------------------------------------------------------
131 // Main loop.
132 //
133 // Note: See notes about memory leak prevention above the "done:" label that
134 // immediately follows this loop.
135 // --------------------------------------------------------------------------
136 do { // TODO: Add support for quotation marks
137 switch (*str) {
138 case '\0': // Whitespace: NULL
139 if (len == 0) goto done; // End of ASCIIZ string
140 case ' ': // Whitespace: Space
141 case '\t': // Whitespace: Tab
142 case '\r': // Whitespace: Carriage Return
143 case '\n': // Whitespace: Linefeed
144 if (!w) {
145 ary->push_back(*atom); // Save current atom to vector
146 w = true; // Enable whitespace mode
147 } // -x- if !w -x-
148 break;
149 default:
150 if (w) {
151 atom = new std::string(); // Create new atom (string)
152 w = false; // Disable whitespace mode
153 } // -x- if w -x-
154 atom->append(str, 1); // Add this character to the atom
155 } // -x- swtich str -x-
156 } while (str++ < MAX); // -x- while str -x-
157
158 done:
159 // --------------------------------------------------------------------------
160 // If we're not in whitespace mode, and the current string is not empty, then
161 // add it to the vector as the final atom.
162 //
163 // Memory leak prevention (we don't need any free radicals taking up memory!
164 // {yes, I had fun writing this, and whether or not you like it, you can't
165 // deny the cleverness of this pun}): Because of the way the loop (above) is
166 // structured, there will never be a memory leak here resulting from trailing
167 // whitespace because a new atom is created only when a first non-whitespace
168 // character is encountered.
169 // --------------------------------------------------------------------------
170 if (!w && !atom->empty()) ary->push_back(*atom);
171
172 // --------------------------------------------------------------------------
173 // Shrink the array before returning it so sizing can be considered properly
174 // and unneeded memory can be freed.
175 // --------------------------------------------------------------------------
176 ary->shrink_to_fit(); // TODO: Make this optional with a flag called RTOOLS_SHRINK
177
178 return ary;
179 } // -x- atomize -x-
180
181 /*======================================================================*//**
182 @copydoc atomize(const char*, const size_t)
183 *///=========================================================================
184 static std::vector<std::string>* atomize(
185 /// Source string (to be split into atoms)
186 std::string str,
187 /// Length of string (in bytes), or 0 if @c str is an ASCIIZ string (NULL-terminated)
188 size_t len = 0) {
189 return atomize(str.c_str(), len > 0 ? len : str.size());
190 } // -x- atomize -x-
191
192 /*======================================================================*//**
193 @brief
194 Decode a Base64-encoded @c ASCIIZ&nbsp;string.
195
196 All invalid characters are simply ignored.
197 @returns Decoded string
198 @see base64_encode
199 @see base64_set_plus_slash
200 *///=========================================================================
201 static std::string base64_decode(
202 /// ASCIIZ string to decode
203 const char *in,
204 /// Base64 character set to use
205 const std::string b = base64_set_plus_slash) {
206 std::string out;
207 std::vector<int> T(256, -1);
208 int val = 0;
209 int valb = -8;
210 for (int i = 0; i < 64; i++) T[b[i]] = i;
211
212 unsigned char c;
213 while ((c = (unsigned char)*in++) != 0) {
214 if (T[c] == -1) break;
215 val = (val << 6) + T[c];
216 valb += 6;
217 if (valb >= 0) {
218 out.push_back(char((val >> valb) & 0xFF));
219 valb -= 8;
220 } // -x- if valb -x-
221 } // -x- while c -x-
222 return out;
223 } // -x- std::string base64_decode -x-
224
225 /*======================================================================*//**
226 @brief
227 Decode a Base64-encoded @c std::string.
228
229 All invalid characters are simply ignored.
230 @returns Decoded string
231 @see base64_encode
232 @see base64_set_plus_slash
233 *///=========================================================================
234 static std::string base64_decode(
235 /// String to decode
236 const std::string &in,
237 /// Base64 character set to use
238 const std::string b = base64_set_plus_slash) {
239 std::string out;
240 std::vector<int> T(256, -1);
241 int val = 0;
242 int valb = -8;
243 for (int i = 0; i < 64; i++) T[b[i]] = i;
244
245 for (unsigned char c: in) {
246 if (T[c] == -1) break;
247 val = (val << 6) + T[c];
248 valb += 6;
249 if (valb >= 0) {
250 out.push_back(char((val >> valb) & 0xFF));
251 valb -= 8;
252 } // -x- if valb -x-
253 } // -x- for c -x-
254 return out;
255 } // -x- std::string base64_decode -x-
256
257 /*======================================================================*//**
258 @brief
259 Encode an @c ASCIIZ&nbsp;string into Base64 format.
260
261 All invalid characters are simply ignored.
262 @returns Base64-encoded string
263 @see base64_decode
264 @see base64_set_plus_slash
265 *///=========================================================================
266 static std::string base64_encode(
267 /// String to encode
268 const char *in,
269 /// Base64 character set to use
270 const std::string b = base64_set_plus_slash) {
271 std::string out;
272 int val = 0;
273 int valb =- 6;
274
275 unsigned char c;
276 while ((c = (unsigned char)*in++) != 0) {
277 val = (val << 8) + c;
278 valb += 8;
279 while (valb >= 0) {
280 out.push_back(b[(val >> valb) & 0x3F]);
281 valb -= 6;
282 } // -x- while valb -x-
283 } // -x- for c -x-
284 if (valb >- 6) out.push_back(b[((val << 8) >> (valb + 8)) & 0x3F]);
285 while (out.size() % 4) out.push_back('=');
286 return out;
287 } // -x- std::string base64_encode -x-
288
289 /*======================================================================*//**
290 @brief
291 Encode an @c std::string into Base64 format.
292
293 All invalid characters are simply ignored.
294 @returns Base64-encoded string
295 @see base64_decode
296 @see base64_set_plus_slash
297 *///=========================================================================
298 static std::string base64_encode(
299 /// String to encode
300 const std::string &in,
301 /// Base64 character set to use
302 const std::string b = base64_set_plus_slash) {
303 std::string out;
304 int val = 0;
305 int valb =- 6;
306
307 for (unsigned char c: in) {
308 val = (val << 8) + c;
309 valb += 8;
310 while (valb >= 0) {
311 out.push_back(b[(val >> valb) & 0x3F]);
312 valb -= 6;
313 } // -x- while valb -x-
314 } // -x- for c -x-
315 if (valb >- 6) out.push_back(b[((val << 8) >> (valb + 8)) & 0x3F]);
316 while (out.size() % 4) out.push_back('=');
317 return out;
318 } // -x- std::string base64_encode -x-
319
320 /*======================================================================*//**
321 @brief
322 Insert commas into the last numeric sequence of digits in the supplied string
323 and insert spaces before that (commas and spaces are configurable). If a
324 decimal point is found, then comma insertions will only occur before that
325 (this is also configurable).
326 @returns Numeric value as a char* array converted to a properly-delimited
327 string as an std::string
328 *///=========================================================================
329 static std::string insert_commas(
330 /// Pointer to ASCII representation of numeric value
331 const char* value,
332 /// Length of value (in bytes), or 0 to auto-detect length if value string is an ASCIIZ string
333 size_t len = 0,
334 /// Don't insert any commas after the last period (or whatever string is set as the @c dot character)
335 bool skip_dot = true,
336 /// Number of digits between commas
337 const int digits = 3,
338 /// Pointer to ASCIIZ comma character string (nullptr = disabled)
339 const char* comma = ",",
340 /// Pointer to ASCIIZ space character string (nullptr = disabled) used instead of commas for non-digit fill-ins where commas would normally be inserted
341 const char* space = " ",
342 /// Period character used when @c skip_dot is enabled
343 const char dot = '.') noexcept {
344
345 // --------------------------------------------------------------------------
346 // Measure size of format string if an ASCIIZ string was indicated.
347 // --------------------------------------------------------------------------
348 if (len == 0) len = std::strlen(value);
349
350 // --------------------------------------------------------------------------
351 // Find the dot, and adjust len accordingly.
352 // --------------------------------------------------------------------------
353 for (int i = len - 1; i > 0; i--) {
354 if (value[i] == dot) {
355 len = i;
356 break;
357 } // -x- if dot -x-
358 } // -x- for i -x-
359
360 // --------------------------------------------------------------------------
361 // Internal variables.
362 // --------------------------------------------------------------------------
363 std::string v(value, len);
364 const int m = len % digits; // Modulus for every 3rd character
365 bool blank = false; // Add blank space instead of comma
366
367 // --------------------------------------------------------------------------
368 // Insert commas as long as there are digits.
369 // --------------------------------------------------------------------------
370 for (int i = len - 1; i > 0; i--) {
371 if (v[i] < '0' || v[i] > '9') blank = true; // Not a digit, so we're switching from commas to blanks (spaces)
372 if ((i % digits) == m) { // This is where a separator belongs
373 if (!blank && comma != nullptr) v.insert(i, comma); // Insert comma, if one is defined
374 else if (blank && space != nullptr) v.insert(i, space); // Insert space, if one is defined
375 } // -x- if m -x-
376 } // -x- for i -x-
377
378 return v;
379 } // -x- std::string insert_commas -x-
380
381 /*======================================================================*//**
382 @brief
383 Parses a SASL exchange, returning an @c std::unordered_map of the key-value
384 pairs that were encountered.
385 @throws std::runtime_error If a SASL message is improperly formatted (the
386 error message includes the offset where the format problem occurred)
387 @returns Key-value pairs in an unordered map where the key is an std::string
388 object and the value is a vector of std::string objects
389 *///=========================================================================
390 static std::unordered_map<std::string, std::vector<std::string>> parse_sasl_exchange(
391 /// Unparsed SASL exchange string (must not be Base64-encoded)
392 const std::string sasl,
393 /// Ensure the following keys exist, each with one empty string: nonce, nc, cnonce, qop, realm, username, digest-uri, authzid
394 const bool add_missing_keys = false) {
395
396 // --------------------------------------------------------------------------
397 // Internal variables.
398 // --------------------------------------------------------------------------
399 std::unordered_map<std::string, std::vector<std::string>> map;
400 char ch; // Used in string parsing
401 std::string temp; // Used to build current element
402 std::string key; // Key name
403 bool keyMode = true; // TRUE = parsing key name; FALSE = parsing value
404 bool quoteMode = false; // Used for tracking "quote mode" with quotation mark usage
405 int offset = -1; // Offset within the unparsed string of the character currently being processed
406 const int MAX = sasl.size();
407
408 // --------------------------------------------------------------------------
409 // Parse the string, one character at a time.
410 // --------------------------------------------------------------------------
411 for (int offset = 0; offset < MAX; offset++) {
412 switch (ch = sasl[offset]) {
413 case '"': // Quotation mark
414 if (keyMode) throw std::runtime_error("Key names can't contain quotation marks at offset " + std::to_string(offset));
415 if (!quoteMode && temp.length() > 0) throw std::runtime_error("Malformed quoted value at offset " + std::to_string(offset));
416 quoteMode = !quoteMode; // Toggle "quote mode"
417 temp.push_back(ch); // Save quotation mark
418 break;
419 case '\\': // Back-slash
420 if (keyMode) throw std::runtime_error("Key names can't contain escaped literal characters at offset " + std::to_string(offset));
421 ch = sasl[++offset]; // Get next character after incrementing offset
422 if (offset >= MAX) throw std::runtime_error("Literal character is missing at offset " + std::to_string(offset));
423 if (ch < ' ' || ch == 127) throw std::runtime_error("Invalid charater at offset " + std::to_string(offset)); // Any characters except CTLs and separators
424 temp.push_back('\\'); // Save backslash
425 temp.push_back(ch); // Save literal character
426 break;
427 case ' ': // Space
428 if (!quoteMode) throw std::runtime_error("White space characters not permitted at offset " + std::to_string(offset));
429 temp.push_back(ch);
430 break;
431 case '=': // Equal sign (signifies end of key, and beginning of value)
432 if (quoteMode) {
433 temp.push_back(ch);
434 break;
435 }
436 if (!keyMode) throw std::runtime_error("Invalid character at offset " + std::to_string(offset));
437 keyMode = !keyMode; // Toggle flag
438 if (temp.length() == 0) throw std::runtime_error("Missing key name at offset " + std::to_string(offset));
439 key = temp; // Save string for later
440 temp = ""; // Clear temporary string
441 break;
442 case ',': // Comma delimiter (signifies end of value)
443 if (quoteMode) {
444 temp.push_back(ch);
445 break;
446 }
447 if (keyMode) throw std::runtime_error("Invalid character at offset " + std::to_string(offset));
448 keyMode = !keyMode; // Toggle flag
449 if (temp[0] == '"' && temp[temp.length() - 1] != '"') throw std::runtime_error("Malformed quoted value at offset " + std::to_string(offset));
450 map[key].push_back(temp);
451 temp = "";
452 break;
453 default: // Everything else is a literal character
454 if (ch < ' ' || ch == 127) throw std::runtime_error("Invalid character at offset " + std::to_string(offset)); // Any characters except CTLs and separators
455 if (keyMode && (std::string("()<>@,;:\\\"/[]?={} ").find_first_of(ch) != std::string::npos)) throw std::runtime_error("Invalid character at offset " + std::to_string(offset));
456 temp.push_back(ch);
457 break;
458 } // -x- switch ch -x-
459 } // -x- for ch -x-
460
461 // --------------------------------------------------------------------------
462 // Syntax checks. There is no need to check for additional data because we
463 // added a comma to the unparsed_string -- if the original unparsed_string
464 // had a trailing comma, then an exception would have been thrown already.
465 // --------------------------------------------------------------------------
466 if (quoteMode) throw std::runtime_error("Incomplete value at offset " + std::to_string(offset));
467
468 // --------------------------------------------------------------------------
469 // Save last key-value pair if we're not in keyMode.
470 // --------------------------------------------------------------------------
471 if (!keyMode) map[key].push_back(temp);
472
473 // --------------------------------------------------------------------------
474 // Add missing keys, each with one empty string.
475 // --------------------------------------------------------------------------
476 if (add_missing_keys) {
477 if (map.try_emplace("nonce", std::vector<std::string>()).second) { map["nonce" ].push_back(""); };
478 if (map.try_emplace("nc", std::vector<std::string>()).second) { map["nc" ].push_back(""); };
479 if (map.try_emplace("cnonce", std::vector<std::string>()).second) { map["cnonce" ].push_back(""); };
480 if (map.try_emplace("qop", std::vector<std::string>()).second) { map["qop" ].push_back(""); };
481 if (map.try_emplace("realm", std::vector<std::string>()).second) { map["realm" ].push_back(""); };
482 if (map.try_emplace("username", std::vector<std::string>()).second) { map["username" ].push_back(""); };
483 if (map.try_emplace("digest-uri", std::vector<std::string>()).second) { map["digest-uri"].push_back(""); };
484 if (map.try_emplace("authzid", std::vector<std::string>()).second) { map["authzid" ].push_back(""); };
485 } // -x- if add_missing_keys -x-
486
487 // --------------------------------------------------------------------------
488 // Return the newly-created unordered_map.
489 // --------------------------------------------------------------------------
490 return map;
491
492 } // -x- std::unordered_map<std::string, std::vector<std::string>> parse_sasl_exchange -x-
493
494 /*======================================================================*//**
495 @copydoc split(const char, const char*, const size_t)
496 *///=========================================================================
497 static std::vector<std::string> split(
498 /// Character to use for the delimiter
499 const char delimiter,
500 /// Source string (to be split into atoms)
501 std::string str,
502 /// Length of string (in bytes), or 0 if using the full length of the string
503 const size_t len = -1) {
504 return split(delimiter, str.c_str(), len >= -1 ? str.size() : len);
505 } // -x- std::vector<std::string> split -x-
506
507 /*======================================================================*//**
508 @brief
509 Split string into an @c std::vector that's perfectly-sized to store all the
510 elements separated by a delimiter character. If no delimiters are
511 encountered, the resulting vector will contain the entire string as its only
512 element. If the string is empty, the resulting vector will contain an empty
513 string as its only element.
514
515 @pre
516 Using (char)0 as a delimiter necessitates specifying the length of the source
517 string, otherwise the resulting vector will contain only the first element
518 (this behaviour might change in the future, so don't rely on it).
519
520 @returns Pointer to an array of "atoms" (strings) stored in an @c std::vector
521 object
522 *///=========================================================================
523 static std::vector<std::string> split(
524 /// Character to use for the delimiter
525 const char delimiter,
526 /// Source string (to be split)
527 const char* str,
528 /// Length of string (in bytes), or -1 if @c str is an ASCIIZ string (NULL-terminated)
529 const size_t len = -1) { // TODO: Add support for a "maximum_elements" parameter
530
531 // --------------------------------------------------------------------------
532 // Internal variables.
533 //
534 // Includes not measuring size of string if an ASCIIZ string was indicated
535 // because "if (len == -1) len = std::strlen(str);" is less optimal than what
536 // the main loop already does in this regard.
537 // --------------------------------------------------------------------------
538 const char* MAX = len == -1 ? (char*)-1 : str + len; // Loop pre-optimization
539 std::string* atom = new std::string();
540
541 // --------------------------------------------------------------------------
542 // Pre-allocate std::vector with a size that's half the length of the string,
543 // because this is the maximum number of atoms there could be if each atom is
544 // 1 byte long and each delimiting whitespace character is also 1 byte long.
545 // --------------------------------------------------------------------------
546 std::vector<std::string> ary;
547 if (len > 0) ary.reserve(len >> (1 + 1)); // Ensure array-like efficiency
548
549 // --------------------------------------------------------------------------
550 // Main loops.
551 //
552 // Optimization: By using separate loops for a non-NULL delimiter from the
553 // NULL delimiter, both loops are faster and less complicated.
554 // --------------------------------------------------------------------------
555 if (delimiter != (char)0) { // Process using non-NULL delimiter
556 do {
557 if (*str == delimiter) { // Delimiter character
558 ary.push_back(*atom); // Save current atom to vector
559 atom = new std::string(); // Create new atom (string)
560 } else {
561 atom->append(str, 1); // Add this character to the atom
562 } // -x- if delimiter -x-
563 } while (*str != (char)0 && ++str < MAX); // -x- while str -x-
564
565 // --------------------------------------------------------------------------
566 // If we're not in delimited mode, and the current string is not empty, then
567 // add it to the vector as the final atom.
568 //
569 // Memory leak prevention (we don't need any free radicals taking up memory!
570 // {yes, I had fun writing this, and whether or not you like it, you can't
571 // deny the cleverness of this pun}): Because of the way the loop (above) is
572 // structured, there will never be a memory leak here resulting from trailing
573 // non-delimiter characters because a new atom is created only when a first
574 // delimiter character is encountered.
575 // --------------------------------------------------------------------------
576 ary.push_back(*atom);
577
578 // --------------------------------------------------------------------------
579 // Process NULL delimiter.
580 // --------------------------------------------------------------------------
581 } else { // Process using NULL delimiter
582 do {
583 if (*str == (char)0) { // NULL delimiter
584 if (len == 0) break;
585 ary.push_back(*atom); // Save current atom to vector
586 atom = new std::string(); // Create new atom (string)
587 } else { // -x- if *str -x-
588 atom->append(str, 1); // Add this character to the atom
589 } // -x- if (char)0 -x-
590 } while (str++ < MAX); // -x- while str -x-
591 } // -x- if delimiter -x-
592
593 // --------------------------------------------------------------------------
594 // Shrink the array before returning it so sizing can be considered properly
595 // and unneeded memory can be freed.
596 // --------------------------------------------------------------------------
597 ary.shrink_to_fit(); // TODO: Make this optional with a flag called RTOOLS_SHRINK
598
599 return ary;
600 } // -x- std::vector<std::string> split -x-
601
602 /*======================================================================*//**
603 @brief
604 Convert an array of octets (8-bit bytes) to hexadecimal.
605
606 @returns std::string of hexadecimal characters (in lower case)
607 *///=========================================================================
608 static std::string to_hex(
609 /// Binary data to convert to hexadecimal
610 const void* data,
611 /// Length of array (in 8-bit bytes), which can be as short as 0; if -1, then
612 /// the length of the data will be measured as an ASCIIZ string; default is 1
613 /// if not specified since this is the safest option
614 size_t len = 1,
615 /// Delimiter character sequence (ASCIIZ string) to insert between multiple
616 /// pairs of nybbles@n
617 /// @c nullptr = no delimiter (default)
618 const char* delimiter = nullptr) noexcept {
619 std::string h; // Target string
620 char buf[3]; // Temporary buffer for use by snprintf()
621 if (len == -1) len = std::strlen((const char*)data); // Measure as if ASCIIZ string if length is 0
622 for(int i = 0; i < len; i++) {
623 if (delimiter != nullptr && i > 0) h.append(delimiter);
624 snprintf(buf, (size_t)3, "%02x", ((const unsigned char*)data)[i]);
625 h.append(buf);
626 } // -x- for i -x-
627 return h;
628 } // -x- std::string to_hex -x-
629
630 /*======================================================================*//**
631 @brief
632 Convert an std::string's internal array of octets (8-bit bytes) to
633 hexadecimal.
634
635 @returns std::string of hexadecimal characters (in lower case)
636 *///=========================================================================
637 static std::string to_hex(
638 /// Binary data to convert to hexadecimal
639 std::string data,
640 /// Delimiter character sequence (ASCIIZ string) to insert between multiple
641 /// pairs of nybbles@n
642 /// @c nullptr = no delimiter (default)
643 const char* delimiter = nullptr) noexcept { return to_hex(data.data(), data.size(), delimiter); } // -x- std::string to_hex -x-
644
645 /*======================================================================*//**
646 @brief
647 Convert a 32-bit integer to hexadecimal.
648
649 This method is needed because std::to_string() doesn't include an option to
650 specify the radix.
651 @returns Up to 8 hexadecimal characters
652 *///=========================================================================
653 static std::string to_hex(
654 /// Integer to convert to hexadecimal
655 const int i) noexcept {
656 std::string h;
657 h.resize(9); // 32-bit integer needs 8 nybbles to be represented in hexadecimal plus a NULL terminator
658 h.resize(snprintf(h.data(), h.size(), "%x", i)); // Convert to hexadecimal, and truncate NULL terminator
659 return h;
660 } // -x- std::string to_hex -x-
661
662 /*======================================================================*//**
663 @brief
664 Convert a 32-bit unsigned integer to hexadecimal.
665
666 This method is needed because std::to_string() doesn't include an option to
667 specify the radix.
668 @returns Up to 8 hexadecimal characters
669 *///=========================================================================
670 static std::string to_hex(
671 /// Integer to convert to hexadecimal
672 const unsigned int i) noexcept {
673 std::string h;
674 h.resize(9); // 32-bit integer needs 8 nybbles to be represented in hexadecimal plus a NULL terminator
675 h.resize(snprintf(h.data(), h.size(), "%x", i)); // Convert to hexadecimal, and truncate NULL terminator
676 return h;
677 } // -x- std::string to_hex -x-
678
679 /*======================================================================*//**
680 @brief
681 Convert ASCII characters in an std::string to lower case. UTF-8 characters
682 are not converted.
683 @returns Copy of std::string, in lower-case form
684 @see to_upper
685 *///=========================================================================
686 static std::string to_lower(
687 /// Source string
688 std::string& str,
689 /// Perform in-place conversion (default is FALSE / non-destructive)
690 bool in_place_conversion = false,
691 /// Begin conversion from this position (0 = first character)@n
692 /// Negative positions are caculated backward from the end of the string
693 const int begin = 0,
694 /// Number of characters to convert (values exceeding string length will not
695 /// cause any exceptions as the excess will be effectively ignored)@n
696 /// -1 = maximum number of characters (a.k.a., until end of string)
697 const int len = -1) noexcept {
698
699 // --------------------------------------------------------------------------
700 // Internal variables.
701 // --------------------------------------------------------------------------
702 const int BEGIN = begin >= 0 ? begin : str.length() + begin; // Calculate negative values from end of string
703 const int MAX = len == -1 ? str.length() : std::min(str.length(), (size_t)(begin + len)); // Optimization: For faster loop operations
704
705 // --------------------------------------------------------------------------
706 // Perform in-place conversion.
707 // --------------------------------------------------------------------------
708 if (in_place_conversion) {
709 for (int i = BEGIN; i < MAX; i++) {
710 char ch = str.at(i);
711 if (ch >= 'A' && ch <= 'Z') str.at(i) = ch += 32; // Convert to lower-case
712 } // -x- for i -x-
713 return str; // Return updated string
714 } // -x- if in_place_conversion -x-
715
716 // --------------------------------------------------------------------------
717 // Internal variables.
718 // --------------------------------------------------------------------------
719 std::string new_string = str; // Copy string (this allocates additional memory)
720
721 // --------------------------------------------------------------------------
722 // Perform isolated conversion.
723 // --------------------------------------------------------------------------
724 for (int i = BEGIN; i < MAX; i++) {
725 char ch = str.at(i);
726 if (ch >= 'A' && ch <= 'Z') new_string.at(i) = ch += 32; // Convert to lower-case
727 } // -x- for i -x-
728 return new_string; // Return new string
729
730 } // -x- std::string to_lower -x-
731
732 /*======================================================================*//**
733 @brief
734 Convert ASCII characters in an std::string to upper case. UTF-8 characters
735 are not converted.
736 @returns Copy of std::string, in upper-case form
737 @see to_lower
738 *///=========================================================================
739 static std::string to_upper(
740 /// Source string
741 std::string& str,
742 /// Perform in-place conversion (default is FALSE / non-destructive)
743 bool in_place_conversion = false,
744 /// Begin conversion from this position (0 = first character)@n
745 /// Negative positions are caculated backward from the end of the string
746 const int begin = 0,
747 /// Number of characters to convert (values exceeding string length will not
748 /// cause any exceptions as the excess will be effectively ignored)@n
749 /// -1 = maximum number of characters (a.k.a., until end of string)
750 const int len = -1) noexcept {
751
752 // --------------------------------------------------------------------------
753 // Internal variables.
754 // --------------------------------------------------------------------------
755 const int BEGIN = begin >= 0 ? begin : str.length() + begin; // Calculate negative values from end of string
756 const int MAX = len == -1 ? str.length() : std::min(str.length(), (size_t)(begin + len)); // Optimization: For faster loop operations
757
758 // --------------------------------------------------------------------------
759 // Perform in-place conversion.
760 // --------------------------------------------------------------------------
761 if (in_place_conversion) {
762 for (int i = BEGIN; i < MAX; i++) {
763 char ch = str.at(i);
764 if (ch >= 'a' && ch <= 'z') str.at(i) = ch -= 32; // Convert to upper-case
765 } // -x- for i -x-
766 return str; // Return updated string
767 } // -x- if in_place_conversion -x-
768
769 // --------------------------------------------------------------------------
770 // Internal variables.
771 // --------------------------------------------------------------------------
772 std::string new_string = str; // Copy string (this allocates additional memory)
773
774 // --------------------------------------------------------------------------
775 // Perform isolated conversion.
776 // --------------------------------------------------------------------------
777 for (int i = BEGIN; i < MAX; i++) {
778 char ch = str.at(i);
779 if (ch >= 'a' && ch <= 'z') new_string.at(i) = ch -= 32; // Convert to upper-case
780 } // -x- for i -x-
781 return new_string; // Return new string
782
783 } // -x- std::string to_upper -x-
784
785 /*======================================================================*//**
786 @brief
787 Removes the outer-most/enclosing set of quotation marks from the beginning
788 and end of the specified String, but only if both are present.
789 @returns Copy of std::string, with quotation marks removed (if both were
790 present)
791 *///=========================================================================
792 static std::string trim_quotes(
793 /// Source string
794 std::string str) noexcept {
795
796 // --------------------------------------------------------------------------
797 // Internal variables.
798 // --------------------------------------------------------------------------
799 const int LAST = str.length() - 1; // Optimization: For faster loop operations
800 if (LAST < 2) return str; // Less than two characters, so return
801
802 // --------------------------------------------------------------------------
803 // Process string.
804 // --------------------------------------------------------------------------
805 std::string new_string = (str[0] == '"' && str[LAST] == '"') ? str.substr(1, LAST - 1) : str; // This allocates additional memory
806 return new_string; // Return new string
807
808 } // -x- std::string trim_quotes -x-
809
810 /*======================================================================*//**
811 @brief
812 Wipe the contents of the supplied string with random data by XOR'ing random
813 unsigned char values with every character in the string. The clear() method
814 is not used because it's a waste of CPU cycles for a string that's just going
815 to be de-allocated anyway.
816 @warning
817 This method calls @c srand() with high resolution time once before starting
818 the loop that calls the @c std::rand() function. (Only the first 8 bits
819 returned by @c std::rand() are used; the remaining higher bits are not used.)
820 *///=========================================================================
821 static void wipe(
822 /// String to wipe
823 std::string& str,
824 /// Number of passes (default is 1)
825 unsigned int passes = 1) { wipe(str.data(), str.capacity(), passes); } // -x- void wipe -x-
826
827 /*======================================================================*//**
828 @brief
829 Wipe the contents of the supplied data with random data by XOR'ing random
830 unsigned char values with every character in the string.
831 @warning
832 This method calls @c srand() with high resolution time once before starting
833 the loop that calls the @c std::rand() function. (Only the first 8 bits
834 returned by @c std::rand() are used; the remaining higher bits are not used.)
835 *///=========================================================================
836 static void wipe(
837 /// String to wipe
838 char* data,
839 /// Length of string (-1 = ASCIIZ string)
840 size_t len = -1,
841 /// Number of passes (default is 1)
842 unsigned int passes = 1) {
843
844 // --------------------------------------------------------------------------
845 // Internal variables.
846 // --------------------------------------------------------------------------
847 if (len == -1) len = std::strlen(data);
848 std::timespec ts;
849 std::timespec_get(&ts, TIME_UTC);
850
851 // --------------------------------------------------------------------------
852 // Seed random number generator with high-resolution time data.
853 // --------------------------------------------------------------------------
854 std::srand(ts.tv_sec * ts.tv_nsec);
855
856 // --------------------------------------------------------------------------
857 // Data wipe loop.
858 // --------------------------------------------------------------------------
859 while (passes-- > 0) {
860 for (int i = len; i >= 0; i--)
861 ((unsigned char*)data)[i] ^= (unsigned char)std::rand();
862 } // -x- while passes -x-
863
864 } // -x- void wipe -x-
865
866 }; // -x- class rtools -x-
867
868} // -x- namespace randolf -x-