randolf.ca  1.00
Randolf Richardson's C++ classes
Loading...
Searching...
No Matches
rmailaddr
1#pragma once
2
3#include <algorithm>
4#include <atomic>
5#include <cstring>
6#include <stdexcept> // std::invalid_argument
7#include <vector>
8
9#include <arpa/inet.h> // Used only for checking for valid IP addresses in domain literals (inet_pton)
10
11namespace randolf {
12
13 // --------------------------------------------------------------------------
14 // Constants that list sets of valid characters, which are optimized to test
15 // for ranges of the most commonly-used characters first during parsing, are
16 // named consistently with their respective rule names as defined in RFC2822.
17 //
18 // CRLF and \ are invisible in the quoted string according to RFC2822 section
19 // 3.2.5.
20 //
21 // RFC2822 section 3.2.5 also defines a "quoted-string" as containing the
22 // following valid characters (spaces are also permitted): 33, 35-91, 93-126
23 // Certain characters must be quoted first though, and every character
24 // following a backslash is taken literally (and the backslash is removed
25 // from the result).
26 //
27 // RFC2822 section 3.2.4 defines an "atom" as containing the following valid
28 // characters: 0123456789
29 // ABCDEFGHIJKLMNOPQRSTUVWXYZ
30 // abcdefghijklmnopqrstuvwxyz
31 // !#$%&'*+-/=?^_`{|}~
32 //
33 // Quote characters and quotation marks are not permitted in the domain part.
34 //
35 // According to RFC2822 section 3.2.5, a phrase (DisplayName / Comments) can
36 // be either an atom (ATEXT) or quoted-text (QTEXT).
37 //
38 // According to RFC2822 section 2.2.2, whitespace characters are tabs (ASCII
39 // character 9) and spaces (ASCII character 32).
40 //
41 // RFC2822 section 3.4 last paragraph indicates that a group construct is
42 // optional, and preceeded by a colon following any number of comma-delimited
43 // recipients (including zero or one). Group constructs must end with a
44 // semi-colon though.
45 // --------------------------------------------------------------------------
46
47 // --------------------------------------------------------------------------
48 // The following macros are optimized for performance by testing for the most
49 // commonly-used characters first.
50 //
51 // ATEXT
52 // 94...126 ^_`abcdefghijklmnopqrstuvwxyz{|}~
53 // 65...90 ABCDEFGHIJKLMNOPQRSTUVWXYZ
54 // 47...57 /0123456789
55 // 45 | 33 -!
56 // 35...39 #$%&'
57 // 42 | 43 *+
58 // 61 | 63 =?
59 // --------------------------------------------------------------------------
60 #define ATEXT(a) ((a >= 94 && a <= 126) \
61 || (a >= 65 && a <= 90) \
62 || (a >= 47 && a <= 57) \
63 || a == 45 || a == 33 \
64 || (a >= 35 && a <= 39) \
65 || a == 42 || a == 43 \
66 || a == 61 || a == 63 )
67
68 // --------------------------------------------------------------------------
69 // ATEXT_OBS ("obsolete standard" is ATEXT plus periods, spaces, and tabs)
70 // 94...126 ^_`abcdefghijklmnopqrstuvwxyz{|}~
71 // 65...90 ABCDEFGHIJKLMNOPQRSTUVWXYZ
72 // 45...57 -./0123456789
73 // 32 | 33 {space:32}!
74 // 35...39 #$%&'
75 // 42 | 43 *+
76 // 61 | 63 =?
77 // 9 {tab:9}
78 // --------------------------------------------------------------------------
79 #define ATEXT_OBS(a) ((a >= 94 && a <= 126) \
80 || (a >= 65 && a <= 90) \
81 || (a >= 45 && a <= 57) \
82 || a == 32 || a == 33 \
83 || (a >= 35 && a <= 39) \
84 || a == 42 || a == 43 \
85 || a == 61 || a == 63 \
86 || a == 9 )
87
88 // --------------------------------------------------------------------------
89 // CTEXT (comment text)
90 // 93...126 ]^_`abcdefghijklmnopqrstuvwxyz{|}~
91 // 42...91 *+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[
92 // 33...39 !"#$%&'
93 // --------------------------------------------------------------------------
94 #define CTEXT(a) ((a >= 93 && a <= 126) \
95 || (a >= 42 && a <= 91) \
96 || (a >= 33 && a <= 39) )
97
98 // --------------------------------------------------------------------------
99 // CTEXT_WSP (comment text with white space)
100 // 93...126 ]^_`abcdefghijklmnopqrstuvwxyz{|}~
101 // 42...91 *+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[
102 // 32...39 {space:32}!"#$%&'
103 // 9 {tab:9}
104 // --------------------------------------------------------------------------
105 #define CTEXT_WSP(a) ((a >= 93 && a <= 126) \
106 || (a >= 42 && a <= 91) \
107 || (a >= 32 && a <= 39) \
108 || a == 9 )
109
110 // --------------------------------------------------------------------------
111 // CTEXT_OBS (obsolete comment text)
112 // CTEXT {CTEXT}
113 // 32 {space:32}
114 // 9 {tab:9}
115 // --------------------------------------------------------------------------
116 #define CTEXT_OBS(a) (CTEXT(a) \
117 || a == 32 \
118 || a == 9 )
119
120 // --------------------------------------------------------------------------
121 // DTEXT (domain-part, not including characters needed for domain-literals)
122 // 94...126 ^_`abcdefghijklmnopqrstuvwxyz{|}~
123 // 33...90 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ
124 // --------------------------------------------------------------------------
125 #define DTEXT(a) ((a >= 94 && a <= 126) \
126 || (a >= 33 && a <= 90) )
127
128 // --------------------------------------------------------------------------
129 // FWS (folding white space)
130 // 10 {lf:10}
131 // 13 {cr:13}
132 // 9 {tab:9}
133 // --------------------------------------------------------------------------
134 #define FWS(a) (a == 10 \
135 || a == 13 \
136 || a == 9 )
137
138 // --------------------------------------------------------------------------
139 // QTEXT (quoted text)
140 // 93...126 ]^_`abcdefghijklmnopqrstuvwxyz{|}~
141 // 35...91 #$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[
142 // 32 {space:32}
143 // 33 !
144 // --------------------------------------------------------------------------
145 #define QTEXT(a) ((a >= 93 && a <= 126) \
146 || (a >= 35 && a <= 91) \
147 || a == 32 || a == 33 )
148
149 // --------------------------------------------------------------------------
150 // TEXT
151 // 14...127 {char:14-31}{space:32}!"#$%&'()*+,-./0123456789:;<=>
152 // ?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^
153 // _`abcdefghijklmnopqrstuvwxyz{|}~
154 // {delete:127}
155 // 1...9 {char:1-6}{beep:7}{backspace:8}{tab:9}
156 // 11 | 12 {char:11}{char:12}
157 // --------------------------------------------------------------------------
158 #define TEXT(a) ((a >= 14 && a <= 127) \
159 || (a >= 1 && a <= 9) \
160 || a == 11 || a == 12 )
161
162 // --------------------------------------------------------------------------
163 // WSP (white space)
164 // 32 {space:32}
165 // 9 {tab:9}
166 // --------------------------------------------------------------------------
167 #define WSP(a) (a == 32 \
168 || a == 9 )
169
170 // --------------------------------------------------------------------------
171 // Used by the set() method to consistently reset internal variables when
172 // moving onward to the next token.
173 //
174 // token_begin: Configures beginning of next token.
175 // --------------------------------------------------------------------------
176 #define RESET_FOR_NEXT_TOKEN \
177 flag_utf8 = false; \
178 flag_quote = false; \
179 token_begin = offset + 1; \
180 p_token.clear(); \
181 p_token_sp.clear();
182
183 /*======================================================================*//**
184 @brief
185 This @ref rmailaddr class provides an object-oriented eMail address.
186
187 @par Features
188
189 Some of the key features are:
190
191 - constructors with sensible defaults help to simplify coding
192 - documentation includes code samples (with @c \#include lines as needed)
193 - can handle ASCIIZ without needing to specify string length
194 - can handle @c std::string (which tracks its own string length)
195
196 @par Use case
197
198 Validation of the format of an eMail address is helpful in ensuring that
199 eMail addresses received from elsewhere comply with internet standards.
200
201 @par Background
202
203 I created this class to make it easier to write internet server daemons and
204 other software that needs to accept and/or handle eMail addresses. (This is
205 a complete re-write of the version I wrote in Java 17 years ago in 2007,
206 which includes a significant array of differences due to the improved parsing
207 approaches I use now that are more efficient, and the need to make sure that
208 UTF-8 characters and punycode are both handled in a transparent manner.)
209
210 @par Getting started
211
212 @author Randolf Richardson
213 @version 1.00
214 @par History
215 2024-May-07 v1.00 Initial version
216
217 @par Conventions
218 Lower-case letter "m" is regularly used in partial example code to represent
219 an instantiated rmailaddr object.
220
221 An ASCIIZ string is a C-string (char* array) that includes a terminating null
222 (0) character at the end.
223
224 @par Notes
225
226 I use the term "ASCIIZ string" to indicate an array of characters that's
227 terminated by a 0 (a.k.a., null). Although this is very much the same as a
228 C-string, the difference is that in many API functions a C-string must often
229 be accompanied by its length value. When referring to an ASCIIZ string, I'm
230 intentionally indicating that the length of the string is not needed because
231 the string is null-terminated. (This term was also commonly used in assembly
232 language programming in the 1970s, 1980s, and 1990s, and as far as I know is
233 still used by machine language programmers today.)
234
235 @par Examples
236
237 @code{.cpp}
238 #include <iostream> // std::cout, std::cerr, std::endl, etc.
239 #include <stdexcept> // std::invalid_argument exception
240
241 #include <randolf/rmailaddr>
242
243 int main(int argc, char *argv[]) {
244 try {
245 randolf::rmailaddr m("nobody@example.com");
246 } catch (const std::invalid_argument e) {
247 std::cerr << "eMail address format exception: " << e.what() << std::endl;
248 return EXIT_FAILURE;
249 } catch (const std::exception e) {
250 std::cerr << "Other exception: " << e.what() << std::endl;
251 return EXIT_FAILURE;
252 }
253 return EXIT_SUCCESS;
254 } // -x- int main -x-
255 @endcode
256
257 Parameter stacking is supported (with methods that return @c rmailaddr*); in
258 this example, notice that semicolons (";") and "e." references are omittted
259 (when compared with the above):
260
261 @code{.cpp}
262 #include <iostream> // std::cout, std::cerr, std::endl, etc.
263 #include <stdexcept> // std::invalid_argument exception
264
265 #include <randolf/rmailaddr>
266
267 int main(int argc, char *argv[]) {
268 try {
269 randolf::rmailaddr m("nobody@example.com");
270 } catch (const std::invalid_argument e) {
271 std::cerr << "eMail address format exception: " << e.what() << std::endl;
272 return EXIT_FAILURE;
273 } catch (const std::exception e) {
274 std::cerr << "Other exception: " << e.what() << std::endl;
275 return EXIT_FAILURE;
276 }
277 return EXIT_SUCCESS;
278 } // -x- int main -x-
279 @endcode
280 *///=========================================================================
281 class rmailaddr {
282
283 public:
284 /*======================================================================*//**
285 @brief
286 Structure of errors (only used when exceptions are disabled).
287 @see errors
288 @see policy_throw_exceptions
289 *///=========================================================================
290 struct error_data {
291 /// Error message
292 std::string message;
293 /// Offset (0 = position of first byte)
294 unsigned int offset;
295 }; // -x- struct error_data -x-
296
297 private:
298 /*======================================================================*//**
299 @brief
300 Structure of positions within the original eMail string where a portion
301 begins, and its length (in bytes), along with various other information about
302 the section.
303
304 This is used internally, and std::vector<mail_addr_token> organizes them and
305 looks after freeing memory.
306 *///=========================================================================
307 struct mail_addr_token {
308 /// Types:
309 /// g = group name (beginning; includes colon)
310 /// ; = group termination (semi-colon character)
311 /// n = display name
312 /// e = eMail address (includes angle brackets, if present)
313 /// l = local-part
314 /// d = domain-part
315 /// c = comment
316 /// \0 = not initialized (null can effectively be regarded as meaning "unknown")
317 char type = (char)0;
318 /// Offset, within the string, where this part begins
319 unsigned int offset = 0;
320 /// Total number of bytes
321 unsigned int len = 0;
322 /// Whether any UTF-8 characters are present in this part
323 bool flag_utf8 = false;
324 /// Whether this part is in punycode (begins with "xn--")
325 bool flag_punycode = false; // TODO
326 /// Whether this part is "obsolete" (according to RFCs)
327 bool flag_obsolete = false; // TODO
328 /// Whether eMail address was enclosed in angle brackets (type "e" only)
329 bool flag_angle = false;
330 /// Whether the token was enclosed in quotation marks
331 bool flag_quotes = false;
332 /// Whether eMail address is a null address enclosed in angle brackets (type "e" only)
333 bool flag_null_addr = false;
334 /// Whether the domain-part is an FQDN (type "d" only)
335 bool flag_fqdn = false; // TODO
336 /// Whether the domain-part is a domain-literal (type "d" only)
337 bool flag_domain_literal = false; // TODO
338 /// Depth of groups (types "g" and ";" only)
339 unsigned short depth = 0;
340 /// Processed data, with quotation marks, angle brackets, comments, whitespace, etc., removed
341 std::u8string p_token;
342 /// Index to display-name (type "e" only)
343 int index_display_name = -1;
344 /// Index to local-part (type "e" only)
345 int index_local_part = -1;
346 /// Index to domain-part (type "e" only)
347 int index_domain_part = -1;
348 }; // -x- struct mail_addr_token -x-
349
350 // --------------------------------------------------------------------------
351 // Internal variables.
352 // --------------------------------------------------------------------------
353 std::u8string _addr; // Original eMail address
354 std::vector<mail_addr_token> _tokens; // All eMail address tokens
355 std::vector<int> _index_e; // Index of type "e" records in _tokens
356 std::vector<error_data> _errors; // Error tracking
357 short group_depth = 0; // Recursive group tracking
358 bool angle_bracket_mode = false; // Angle-bracket mode tracking
359 bool quote_mode = false; // Quotation-marks mode tracking
360
361 // --------------------------------------------------------------------------
362 // Policy variables.
363 // --------------------------------------------------------------------------
364 bool _policy_keep_comments = false; // Wether to retain comments embedded in eMail addresses
365 bool _policy_throw_exceptions = true; // TRUE = throw exceptions; FALSE = save internally
366 bool _policy_tabs_to_spaces = false; // Whether to convert every tab into a space
367 bool _policy_support_utf8 = true; // Whether to support UTF-8 (FALSE = 7bit characters only)
368
369 /*======================================================================*//**
370 Exception handler.
371 *///=========================================================================
372 void _exception(
373 /// Error message
374 std::string message,
375 /// Offset (0 = position of first byte)
376 int offset) {
377 if (_policy_throw_exceptions) throw std::invalid_argument(message + " at offset " + std::to_string(offset));
378 _errors.push_back({ message, offset });
379 return;
380 } // -x- void _exception -x-
381
382 public:
383 /*======================================================================*//**
384 @brief
385 Instantiate an empty rmailaddr that doesn't qualify as a properly-formatted
386 internet eMail address (because the minimum length of a valid internet eMail
387 address is 1 character).
388
389 Instantiating an empty rmailaddr is particularly useful for header-file
390 definitions; for example:
391 @code{.cpp}
392 #include <iostream> // std::cout, std::cerr, std::endl, etc.
393 #include <stdexcept> // std::invalid_argument exception
394
395 #include <randolf/rmailaddr>
396
397 randolf::rmailaddr m; // <-- Empty rmailaddr initialization (no exceptions)
398
399 int main(int argc, char *argv[]) {
400 try {
401 m.set("nobody@example.com");
402 } catch (const std::invalid_argument e) {
403 std::cerr << "eMail address format exception: " << e.what() << std::endl;
404 return EXIT_FAILURE;
405 } catch (const std::exception e) {
406 std::cerr << "Other exception: " << e.what() << std::endl;
407 return EXIT_FAILURE;
408 }
409 return EXIT_SUCCESS;
410 } // -x- int main -x-
411 @endcode
412 *///=========================================================================
413 rmailaddr() noexcept {}; // -x- constructor rmailaddr -x-
414
415 /*======================================================================*//**
416 @brief
417 Instantiate an rmailaddr that qualifies as a properly-formatted internet
418 eMail address (if it doesn't qualify, then an exception will be thrown).
419
420 Usage example:
421 @code{.cpp}
422 #include <iostream> // std::cout, std::cerr, std::endl, etc.
423 #include <stdexcept> // std::invalid_argument exception
424
425 #include <randolf/rmailaddr>
426
427 int main(int argc, char *argv[]) {
428 try {
429 randolf::rmailaddr m("nobody@example.com");
430 } catch (const std::invalid_argument e) {
431 std::cerr << "eMail address format exception: " << e.what() << std::endl;
432 return EXIT_FAILURE;
433 } catch (const std::exception e) {
434 std::cerr << "Other exception: " << e.what() << std::endl;
435 return EXIT_FAILURE;
436 }
437 return EXIT_SUCCESS;
438 } // -x- int main -x-
439 @endcode
440 @throws std::invalid_argument describing the problem, along with the byte
441 offset where the problem originated from
442 @see rmailaddr
443 *///=========================================================================
444 rmailaddr(
445 /// RFC-compliant eMail address
446 const char8_t* mailbox,
447 /// Number of characters (-1 = ASCIIZ string)
448 int len = -1) { set(mailbox, len); }; // -x- constructor rmailaddr -x-
449
450 /*======================================================================*//**
451 @copydoc rmailaddr(const char8_t*, int)
452 @see rmailaddr
453 *///=========================================================================
454 rmailaddr(
455 /// RFC-compliant eMail address
456 const char* mailbox,
457 /// Number of characters (-1 = ASCIIZ string)
458 int len = -1) { set((char8_t*)mailbox, len); }; // -x- constructor rmailaddr -x-
459
460 /*======================================================================*//**
461 @copydoc rmailaddr(const char8_t*, int)
462 @see rmailaddr
463 *///=========================================================================
464 rmailaddr(
465 /// RFC-compliant eMail address
466 const std::string mailbox) { set((char8_t*)mailbox.data(), mailbox.size()); }; // -x- constructor rmailaddr -x-
467
468 /*======================================================================*//**
469 @copydoc rmailaddr(const char8_t*, int)
470 @see rmailaddr
471 *///=========================================================================
472 rmailaddr(
473 /// RFC-compliant eMail address
474 const std::u8string mailbox) { set(mailbox.data(), mailbox.size()); }; // -x- constructor rmailaddr -x-
475
476 /*======================================================================*//**
477 @brief
478 Access only the eMail address, without display-name, and without any sets of
479 enclosing quotation-marks or enclosing angle-brackets, etc.
480 @see display_name
481 @see domain_part
482 @see email
483 @see local_part
484 @see operator[](int)
485 @throws std::out_of_range if the index if out-of-range
486 @returns std::string with only the eMail address (no display-name, and no
487 enclosing sets of quotation-marks or enclosing angle-brackets, etc.)
488 *///=========================================================================
489 std::string addr(
490 /// Index of eMail address to query for (0 = first element; negative index
491 /// values are calculated in reverse, starting with -1 as the final position)
492 int index = 0) {
493// return std::string((char*)_tokens[_index_e[index >= 0 ? index : _index_e.size() + index]].p_token.c_str());
494 return std::string((char*)_tokens.at(_index_e.at(index >= 0 ? index : _index_e.size() + index)).p_token.c_str());
495 }; // -x- std::string addr -x-
496
497 /*======================================================================*//**
498 @brief
499 Access an eMail address's display-name (the portion preceding the angle
500 brackets).&nbsp; If there were no angle-brackets, then an empty string will
501 be returned.
502 @see addr
503 @see domain_part
504 @see email
505 @see local_part
506 @see operator[](int)
507 @returns std::string with only the display-name (no quotation marks, etc.)
508 *///=========================================================================
509 std::string display_name(
510 /// Index of eMail address to query for (0 = first element; negative index
511 /// values are calculated in reverse, starting with -1 as the final position)
512 int index = 0) {
513 return std::string((char*)_tokens[_tokens[_index_e[index >= 0 ? index : _index_e.size() + index]].index_display_name].p_token.c_str());
514 }; // -x- std::string display_name -x-
515
516 /*======================================================================*//**
517 @brief
518 Access an eMail address's domain-part (the portion following the @c @ sign).
519 @see get
520 @see addr
521 @see display_name
522 @see email
523 @see local_part
524 @see operator[](int)
525 @returns std::string with only the domain-part (no angle brackets, etc.)
526 *///=========================================================================
527 std::string domain_part(
528 /// Index of eMail address to query for (0 = first element; negative index
529 /// values are calculated in reverse, starting with -1 as the final position)
530 int index = 0) {
531 return std::string((char*)_tokens[_tokens[_index_e[index >= 0 ? index : _index_e.size() + index]].index_domain_part].p_token.c_str());
532 }; // -x- std::string domain_part -x-
533
534 /*======================================================================*//**
535 @brief
536 Access an eMail address (enclosed in angle-brackets), and preceded by the
537 display-name (if one is available).
538
539 @note
540 If the original form of the display-name had a delimiting space before the
541 eMail address, then that space will be present in the result here. If not, a
542 space will not be inserted. (In other words, this aspect of the original
543 full eMail address will be retained.)
544 @see addr
545 @see display_name
546 @see domain_part
547 @see local_part
548 @see operator[](int)
549 @returns std::string with display-name and eMail address (in angle-brackets)
550 *///=========================================================================
551 std::string email(
552 /// Index of eMail address to query for (0 = first element; negative index
553 /// values are calculated in reverse, starting with -1 as the final position)
554 int index = 0) {
555
556 // --------------------------------------------------------------------------
557 // The eMail address has no display-name because it wasn't enclosed in angle
558 // brackets, so present the eMail address on its own, in angle brackets.
559 // --------------------------------------------------------------------------
560 mail_addr_token e = _tokens[_index_e[index >= 0 ? index : _index_e.size() + index]];
561 if (e.index_display_name < 0)
562 return "<" + std::string((char*)e.p_token.c_str()) + ">";
563
564 // --------------------------------------------------------------------------
565 // There was a display-name, so return the eMail address with display-name
566 // (enclosed in quotation marks if it started out that way).
567 // --------------------------------------------------------------------------
568 mail_addr_token n = _tokens[e.index_display_name];
569 if (n.flag_quotes)
570 return "\""
571 + std::string((char*)n.p_token.c_str())
572 + "\""
573 + "<"
574 + std::string((char*)e.p_token.c_str())
575 + ">";
576
577 return std::string((char*)n.p_token.c_str())
578 + "<"
579 + std::string((char*)e.p_token.c_str())
580 + ">";
581 }; // -x- std::string email -x-
582
583 /*======================================================================*//**
584 @brief
585 Find out if this object doesn't hold any eMail addresses.
586 @see has_any
587 @see has_multiple
588 @see has_one
589 @see size
590 @returns TRUE = no eMail addresses@n
591 FALSE = one or more eMail addresses
592 *///=========================================================================
593 bool empty() { return _index_e.empty(); }; // -x- bool empty -x-
594
595 /*======================================================================*//**
596 @brief
597 Return a list of errors that have been collected (instead of throwing
598 exceptions).
599 @see errors_clear
600 @see policy_throw_exceptions
601 *///=========================================================================
602 std::vector<error_data> errors() { return _errors; }; // -x- std::vector<error-data> errors -x-
603
604 /*======================================================================*//**
605 @brief
606 Clear the list of errors that have been collected (instead of throwing
607 exceptions).
608 @see errors
609 @see policy_throw_exceptions
610 @returns The same rmailaddr object so as to facilitate stacking
611 *///=========================================================================
612 rmailaddr* errors_clear() { _errors.clear(); return this; }; // -x- rmailaddr* errors_clear -x-
613
614 /*======================================================================*//**
615 @brief
616 Grade an eMail address, similar to traditional elementary school grades. For
617 simplicity, grades "a" through "c" are passes, while grades "d" through "f"
618 are failures, although if less strict then "d" should also qualify as a pass.
619
620 @code
621 Ratings:
622 a = Angle-brackets surrounding eMail address (optional display-name)
623 b = Bare eMail address (no display-name)
624 c = Complex eMail address (groups; optional angle-brackets; optional display-name)
625 d = Defective (because obsolete RFC standards were utilized)
626 e = Errors (only when collecting errors instead of throwing exceptions)
627 f = Failure (an exception was thrown, or eMail address is blank)
628 @endcode
629
630 To test for a pass, use a comparison such as <tt>m.grade() <= 'c'</tt>
631 (strict) or <tt>m.grade() <= 'd'</tt> (not strict).
632 *///=========================================================================
633 char grade(
634 /// eMail address index (default is 0 for the first eMail address)
635 const int index = 0
636 ) {
637 return 'f';
638 }; // -x- char grade -x-
639
640 /*======================================================================*//**
641 @brief
642 Indicates whether a display-name was included with this eMail address.
643 @returns TRUE = eMail address includes a display-name@n
644 FALSE = eMail address has no display-name
645 *///=========================================================================
646 bool has_display_name(
647 /// eMail address index (default is 0 for the first eMail address)
648 const int index = 0
649 ) {
650 return _tokens[_index_e[index >= 0 ? index : _index_e.size() + index]].index_display_name != -1;
651 }; // -x- bool has_display-name -x-
652
653 /*======================================================================*//**
654 @brief
655 Find out whether this object holds any number of eMail addresses. If there
656 are no eMail addresses, then this method returns @c FALSE.
657 @see empty
658 @see has_multiple
659 @see has_one
660 @see size
661 @returns TRUE = one or more eMail addresses@n
662 FALSE = no eMail addresses
663 *///=========================================================================
664 bool has_any() { return _index_e.size() > 0; }; // -x- bool has_any -x-
665
666 /*======================================================================*//**
667 @brief
668 Find out whether this object holds multiple eMail addresses. If there is
669 only one eMail address, or no eMail addresses at all, then this method
670 returns @c FALSE.
671 @see empty
672 @see has_any
673 @see has_one
674 @see size
675 @returns TRUE = two or more eMail addresses@n
676 FALSE = one eMail address@n
677 FALSE = no eMail addresses
678 *///=========================================================================
679 bool has_multiple() { return _index_e.size() > 1; }; // -x- bool has_multiple -x-
680
681 /*======================================================================*//**
682 @brief
683 Find out whether this object holds exactly one eMail address. If there are
684 two or more eMail addresses, or no eMail addresses, then this method returns
685 @c FALSE.
686 @see empty
687 @see has_any
688 @see has_multiple
689 @see size
690 @returns TRUE = exactly one eMail address@n
691 FALSE = two or more eMail addresses@n
692 FALSE = no eMail addresses
693 *///=========================================================================
694 bool has_one() { return _index_e.size() == 1; }; // -x- bool has_one -x-
695
696 /*======================================================================*//**
697 @brief
698 Find out the state of this policy.
699 @see policy_keep_comments
700 @returns policy status
701 *///=========================================================================
702 bool is_policy_keep_comments() { return _policy_keep_comments; }; // -x- bool is_policy_keep_comments -x-
703
704 /*======================================================================*//**
705 @brief
706 Find out the state of this policy.
707 @see policy_tabs_to_spaces
708 @returns policy status
709 *///=========================================================================
710 bool is_policy_tabs_to_spaces() { return _policy_tabs_to_spaces; }; // -x- bool is_policy_tabs_to_spaces -x-
711
712 /*======================================================================*//**
713 @brief
714 Find out the state of this policy.
715 @see policy_throw_exceptions
716 @returns policy status
717 *///=========================================================================
718 bool is_policy_throw_exceptions() { return _policy_throw_exceptions; }; // -x- bool is_policy_throw_exceptions -x-
719
720 /*======================================================================*//**
721 @brief
722 Find out the state of this policy.
723 @see policy_support_utf8
724 @returns policy status
725 *///=========================================================================
726 bool is_policy_support_utf8() { return _policy_support_utf8; }; // -x- bool is_policy_support_utf8 -x-
727
728 /*======================================================================*//**
729 @brief
730 Indicates whether this is just an eMail address, without any other parts such
731 as display-name, group constructs, comments, etc.
732 @returns TRUE = eMail address includes a display-name@n
733 FALSE = eMail address has no display-name
734 *///=========================================================================
735 bool is_pure(
736 /// indicate wither angle-brackets are okay (default is FALSE so that the
737 /// meaning of the word "pure" is not tainted)
738 const bool angle_flag = false,
739 /// eMail address index (default is 0 for the first eMail address)
740 const int index = 0
741 ) {
742//TODO: Finish this (we need to consider groups, display-name, comments, etc.)
743 return _tokens[_index_e[index >= 0 ? index : _index_e.size() + index]].index_display_name != -1;
744 }; // -x- bool is_pure -x-
745
746 /*======================================================================*//**
747 @brief
748 Access an eMail address's local-part (the portion preceding the @c @ sign).
749 @see addr
750 @see display_name
751 @see domain_part
752 @see email
753 @see operator[](int)
754 @returns std::string with only the local-part (no angle brackets, etc.)
755 *///=========================================================================
756 std::string local_part(
757 /// Index of eMail address to query for (0 = first element; negative index
758 /// values are calculated in reverse, starting with -1 as the final position)
759 int index = 0) {
760 return std::string((char*)_tokens[_tokens[_index_e[index >= 0 ? index : _index_e.size() + index]].index_local_part].p_token.c_str());
761 }; // -x- std::string local_part -x-
762
763 /*======================================================================*//**
764 @brief
765 Sets the policy for whether to keep comments that were embedded in eMail
766 address group-construct, display-name, and local-part portions.
767
768 Comments are excluded by default because most systems don't need them, but in
769 the event that they are needed (or wanted), this policy makes it possible to
770 make sure they aren't excluded during processing.
771 @see set
772 @see is_policy_keep_comments
773 @returns The same rmailaddr object so as to facilitate stacking
774 *///=========================================================================
775 rmailaddr* policy_keep_comments(
776 /// FALSE = do not retain comments embedded in eMail addresses (deafult)@n
777 /// TRUE = retain comments embedded in eMail addresses
778 bool policy_flag) {
779
780 // --------------------------------------------------------------------------
781 // Update internal policy.
782 // --------------------------------------------------------------------------
783 _policy_keep_comments = policy_flag;
784
785 // --------------------------------------------------------------------------
786 // Return this object to facilitate stacking.
787 // --------------------------------------------------------------------------
788 return this;
789
790 }; // -x- rmailaddr* policy_keep_comments -x-
791
792 /*======================================================================*//**
793 @brief
794 Sets the policy for whether to support UTF-8 characters.
795
796 Some older systems may not be able to handle 8-bit data that UTF-8 utilizes,
797 in which case this policy makes it possible to easily reject incompatible
798 eMail addresses before attempting to use them with such systems.
799 @see set
800 @see is_policy_support_utf8
801 @returns The same rmailaddr object so as to facilitate stacking
802 *///=========================================================================
803 rmailaddr* policy_support_utf8(
804 /// TRUE = support UTF-8 characters in eMail addresses (deafult)@n
805 /// FALSE = do not support UTF-8 characters in eMail addresses
806 bool policy_flag) {
807
808 // --------------------------------------------------------------------------
809 // Update internal policy.
810 // --------------------------------------------------------------------------
811 _policy_support_utf8 = policy_flag;
812
813 // --------------------------------------------------------------------------
814 // Return this object to facilitate stacking.
815 // --------------------------------------------------------------------------
816 return this;
817
818 }; // -x- rmailaddr* policy_support_utf8 -x-
819
820 /*======================================================================*//**
821 @brief
822 Sets the policy for whether to convert every tab character (ASCII charcter 9)
823 to a space (ASCII character 32). This conversion occurs only once when the
824 eMail address is initially specified in a constructor or by way of one of the
825 @ref set() methods (changing this policy after this point will not be applied
826 to the current eMail address, but it will be in effect for future calls to
827 any of the @ref set() methods).
828
829 There are some situations where a tab character can create problems, such as
830 when interacting with certain older software or software that makes incorrect
831 assumptions about how to parse an eMail address, and this policy makes it
832 easy to accomodate such situations for the tab character, which some users
833 may be including by using the tab key on their keyboards.
834 @see set
835 @see is_policy_tabs_to_spaces
836 @returns The same rmailaddr object so as to facilitate stacking
837 *///=========================================================================
838 rmailaddr* policy_tabs_to_spaces(
839 /// TRUE = convert every tab character to a space@n
840 /// FALSE = do not convert tab characters to spaces (default)
841 bool policy_flag) {
842
843 // --------------------------------------------------------------------------
844 // Update internal policy.
845 // --------------------------------------------------------------------------
846 _policy_tabs_to_spaces = policy_flag;
847
848 // --------------------------------------------------------------------------
849 // Return this object to facilitate stacking.
850 // --------------------------------------------------------------------------
851 return this;
852
853 }; // -x- rmailaddr* policy_tabs_to_spaces -x-
854
855 /*======================================================================*//**
856 @brief
857 Sets the policy for whether to throw exceptions when an error is encountered.
858
859 When this flag is set, errors are tracked internally instead of throwing any
860 exceptions, and will need to be retrieved using the @ref errors() method,
861 which is useful for analyzing an eMail address. (Enabling or disabling this
862 flag does not erase the errors that are stored internally; you will need to
863 use the @ref errors_clear method for this.)
864
865 @warning
866 This policy is not meant for general use in the majority of applications; it
867 is intended for technical analysis, which would be useful in diagnostic and
868 research applications, or packet analysis applications like WireShark, or for
869 advanced users who are interested in more techincal detail.
870 @see errors
871 @see errors_clear
872 @see is_policy_throw_exceptions
873 @returns The same rmailaddr object so as to facilitate stacking
874 *///=========================================================================
875 rmailaddr* policy_throw_exceptions(
876 /// TRUE = throw exceptions (default)@n
877 /// FALSE = don't throw exceptions
878 bool policy_flag) {
879
880 // --------------------------------------------------------------------------
881 // Update internal policy.
882 // --------------------------------------------------------------------------
883 _policy_throw_exceptions = policy_flag;
884
885 // --------------------------------------------------------------------------
886 // Return this object to facilitate stacking.
887 // --------------------------------------------------------------------------
888 return this;
889
890 }; // -x- rmailaddr* policy_throw_exceptions -x-
891
892 /*======================================================================*//**
893 @brief
894 Set a new eMail address, resetting all internal flags, counters, and arrays
895 (but not changing any existing policies). Any existing eMail addresses will
896 be cleared out. (This method is also used internally by most of this class's
897 constructors.)
898 @throws std::invalid_argument describing the problem, along with the byte
899 offset where the problem originated from
900 @see rmailaddr
901 @returns The same rmailaddr object so as to facilitate stacking
902 *///=========================================================================
903 rmailaddr* set(
904 /// RFC-compliant eMail address
905 const char* mailbox,
906 /// Number of characters (-1 = ASCIIZ string)
907 int len = -1) {
908 return set((char8_t*)mailbox, len);
909 }; // -x- rmailaddr* set -x-
910
911 /*======================================================================*//**
912 @copydoc set(const char*, int)
913 @see rmailaddr
914 @returns The same rmailaddr object so as to facilitate stacking
915 *///=========================================================================
916 rmailaddr* set(
917 /// RFC-compliant eMail address
918 const std::string mailbox) {
919 return set((char8_t*)mailbox.data(), mailbox.size());
920 }; // -x- rmailaddr* set -x-
921
922 /*======================================================================*//**
923 @copydoc set(const char*, int)
924 @see rmailaddr
925 @returns The same rmailaddr object so as to facilitate stacking
926 *///=========================================================================
927 rmailaddr* set(
928 /// RFC-compliant eMail address
929 const std::u8string mailbox) {
930 return set(mailbox.data(), mailbox.size());
931 }; // -x- rmailaddr* set -x-
932
933 /*======================================================================*//**
934 @copydoc set(const char*, int)
935 @see rmailaddr
936 @returns The same rmailaddr object so as to facilitate stacking
937 *///=========================================================================
938 rmailaddr* set(
939 /// RFC-compliant eMail address
940 const char8_t* mailbox,
941 /// Number of characters (-1 = ASCIIZ string)
942 int len = -1) {
943
944 // --------------------------------------------------------------------------
945 // Measure size of format string if an ASCIIZ string was indicated.
946 // --------------------------------------------------------------------------
947 if (len == -1) len = std::strlen((char*)mailbox);
948
949 // --------------------------------------------------------------------------
950 // Save a copy of the original eMail address.
951 // --------------------------------------------------------------------------
952 _addr.assign(mailbox, len); // We need to save this for later reference
953
954 // --------------------------------------------------------------------------
955 // Pre-adjustments (optional, as per policy flags).
956 // --------------------------------------------------------------------------
957 if (_policy_tabs_to_spaces) // Policy: Convert all tabs to spaces
958 _addr.replace(_addr.begin(), _addr.end(), '\t', ' '); // Efficient replacement
959
960 // --------------------------------------------------------------------------
961 // Internal variables.
962 // --------------------------------------------------------------------------
963 int offset = 0; // Offset within original mailbox char8_t[] array
964 int last_display_name = -1; // Used to build type "e" eMail tokens
965 int last_local_part = -1; // Used to build type "e" eMail tokens
966 int last_domain_part = -1; // Used to build type "e" eMail tokens
967
968 // --------------------------------------------------------------------------
969 // Internal variables that are reset or updated together at various times,
970 // such as when a token is [in most cases] completed.
971 // --------------------------------------------------------------------------
972 int token_begin = 0; // Beginning offset within current portion of string being parsed
973 char8_t ch; // Character being tested (this needs to be defined outside of the main loop)
974 bool flag_utf8 = false; // UTF8 character(s) detected
975 bool flag_angle = false; // Angle-bracket detected
976 bool flag_quote = false; // Quotation-marks mode detected
977 bool active_angle = false; // Angle-bracket mode is active
978 bool active_at_sign = false; // At-sign mode is active (domain-part instead of local-part interpretation)
979 bool active_quote = false; // Quotation-marks mode is active
980 int comment_depth = 0; // Comments are active when this value is greater than 0 (too many closed comments are in the negative)
981 std::u8string p_token; // Processed token data (angle brackets, quotation marks, comments, and whitespace omitted)
982 std::u8string p_token_sp; // Processed token data (angle brackets, quotation marks, and comments omitted), with spaces preserved
983
984 // --------------------------------------------------------------------------
985 // Main parsing loop that identifies tokens and ensures compliance, and also
986 // effectively pre-processes eMail addresses on-the-fly for faster access
987 // from the _emails vector later.
988 // --------------------------------------------------------------------------
989 do {
990
991 // --------------------------------------------------------------------------
992 // Obtain next character.
993 // --------------------------------------------------------------------------
994 ch = mailbox[offset];
995
996 // --------------------------------------------------------------------------
997 // Compare one character at a time, but first process special cases of quoted
998 // data (copy most of the data) and comments (ignore the data).
999 // --------------------------------------------------------------------------
1000 if (flag_quote && active_quote && ch != '"') {
1001 if (QTEXT(ch)) { // Include only quoted text
1002 p_token.push_back(ch);
1003 p_token_sp.push_back(ch);
1004 } // -x- if QTEXT -x-
1005 continue;
1006 } else if (comment_depth > 0 && ch != ')') { // Ignore all comment data
1007 if (_policy_keep_comments) {
1008 p_token.push_back(ch);
1009 p_token_sp.push_back(ch);
1010 } // -x- if _policy_keep_comments -x-
1011 continue;
1012 } else
1013 main_parsing_switch: switch (ch) {
1014
1015 // --------------------------------------------------------------------------
1016 // Group name ends with a colon.
1017 // --------------------------------------------------------------------------
1018 case '"': {
1019 if (!active_quote) { // Enable quotation-marks mode
1020 if (flag_quote) _exception("quotation-marks mode can't be re-opened", offset);
1021 active_quote = true;
1022 flag_quote = true;
1023 } else { // Disable quotation-marks mode
1024 active_quote = false;
1025 }
1026 continue;
1027 } // -x- case " -x-
1028
1029 // --------------------------------------------------------------------------
1030 // Group name ends with a colon.
1031 // --------------------------------------------------------------------------
1032 case ':': {
1033
1034 // --------------------------------------------------------------------------
1035 // Internal tracking.
1036 // --------------------------------------------------------------------------
1037 group_depth++;
1038
1039 // --------------------------------------------------------------------------
1040 // Add this token to the tokens vector.
1041 // --------------------------------------------------------------------------
1042 _tokens.push_back({ .type = 'g',
1043 .offset = token_begin,
1044 .len = offset - token_begin,
1045 .flag_utf8 = flag_utf8,
1046 .p_token = p_token_sp, });
1047
1048 // --------------------------------------------------------------------------
1049 // Reset and prepare internal variables for the next token.
1050 // --------------------------------------------------------------------------
1051 RESET_FOR_NEXT_TOKEN;
1052 continue;
1053
1054 } // -x- case : -x-
1055
1056 // --------------------------------------------------------------------------
1057 // Group of eMail addresses is terminated by a semi-colon.
1058 // --------------------------------------------------------------------------
1059 case ';': {
1060
1061 // --------------------------------------------------------------------------
1062 // Internal tracking.
1063 // --------------------------------------------------------------------------
1064 if (--group_depth < 0) _exception("too many group construct terminators", offset);
1065 if (active_angle) _exception("unbalanced open angle bracket", offset);
1066
1067 // --------------------------------------------------------------------------
1068 // Add this token terminator to the tokens vector.
1069 // --------------------------------------------------------------------------
1070 _tokens.push_back({ .type = ';',
1071 .offset = token_begin,
1072 .len = offset - token_begin,
1073 .flag_utf8 = flag_utf8,
1074 .p_token = p_token_sp, });
1075
1076 // --------------------------------------------------------------------------
1077 // Reset and prepare internal variables for the next token.
1078 // --------------------------------------------------------------------------
1079 RESET_FOR_NEXT_TOKEN;
1080 continue;
1081
1082 } // -x- case ; -x-
1083
1084 // --------------------------------------------------------------------------
1085 // Opening angle bracket.
1086 // --------------------------------------------------------------------------
1087 case '<': {
1088
1089 // --------------------------------------------------------------------------
1090 // Internal tracking.
1091 // --------------------------------------------------------------------------
1092 if (flag_angle) _exception("unbalanced open angle bracket", offset);
1093 active_angle = true;
1094 flag_angle = true;
1095
1096 // --------------------------------------------------------------------------
1097 // Add this token terminator to the tokens vector if a display-name exists.
1098 // --------------------------------------------------------------------------
1099 if (token_begin < offset) {
1100 last_display_name = _tokens.size();
1101 _tokens.push_back({ .type = 'n',
1102 .offset = token_begin,
1103 .len = offset - token_begin,
1104 .flag_utf8 = flag_utf8,
1105 .p_token = p_token_sp, });
1106 } // -x- if token_begin -x-
1107
1108 // --------------------------------------------------------------------------
1109 // Reset and prepare internal variables for the next token.
1110 // --------------------------------------------------------------------------
1111 RESET_FOR_NEXT_TOKEN;
1112 continue;
1113
1114 } // -x- case < -x-
1115
1116 // --------------------------------------------------------------------------
1117 // At sign ("@") delimiter.
1118 // --------------------------------------------------------------------------
1119 case '@': {
1120
1121 // --------------------------------------------------------------------------
1122 // Internal tracking.
1123 // --------------------------------------------------------------------------
1124 if (active_at_sign) _exception("too many at (\"@\") signs", offset);
1125 active_at_sign = true;
1126
1127 // --------------------------------------------------------------------------
1128 // Add this token terminator to the tokens vector if a display-name exists.
1129 // --------------------------------------------------------------------------
1130 last_local_part = _tokens.size();
1131 _tokens.push_back({ .type = 'l',
1132 .offset = token_begin,
1133 .len = offset - token_begin,
1134 .flag_utf8 = flag_utf8,
1135 .flag_angle = flag_angle,
1136 .p_token = p_token, });
1137
1138 // --------------------------------------------------------------------------
1139 // Reset and prepare internal variables for the next token.
1140 // --------------------------------------------------------------------------
1141 RESET_FOR_NEXT_TOKEN;
1142 continue;
1143
1144 } // -x- case @ -x-
1145
1146 // --------------------------------------------------------------------------
1147 // Closing angle-bracket.
1148 // --------------------------------------------------------------------------
1149 case '>': {
1150
1151 // --------------------------------------------------------------------------
1152 // Internal tracking.
1153 // --------------------------------------------------------------------------
1154 if (!active_angle) _exception("unbalanced closing angle bracket", offset);
1155 active_angle = false;
1156 goto main_parsing_email;
1157
1158 // --------------------------------------------------------------------------
1159 // Reset and prepare internal variables for the next token.
1160 // --------------------------------------------------------------------------
1161 RESET_FOR_NEXT_TOKEN;
1162 continue;
1163
1164 } // -x- case > -x-
1165
1166 // --------------------------------------------------------------------------
1167 // Comma delimiter, signifies the end of an eMail address.
1168 // --------------------------------------------------------------------------
1169 case ',': {
1170
1171 main_parsing_comma:
1172 // --------------------------------------------------------------------------
1173 // Internal tracking.
1174 // --------------------------------------------------------------------------
1175 if (active_quote) _exception("unbalanced quotation-marks", offset);
1176 if (active_angle) _exception("unbalanced open angle bracket before comma", offset);
1177
1178 main_parsing_email:
1179 // --------------------------------------------------------------------------
1180 // Add this token terminator to the tokens vector if a display-name exists.
1181 // --------------------------------------------------------------------------
1182 if (active_at_sign) { // Domain-part has been started
1183 last_domain_part = _tokens.size();
1184 _tokens.push_back({ .type = 'd',
1185 .offset = token_begin,
1186 .len = offset - token_begin,
1187 .flag_utf8 = flag_utf8,
1188 .flag_angle = _tokens[last_local_part].flag_angle,
1189 .p_token = p_token, });
1190 active_at_sign = false;
1191 } else { // Domain-part has not been started, so there's only a local-part here
1192 last_local_part = _tokens.size();
1193 _tokens.push_back({ .type = 'l',
1194 .offset = token_begin,
1195 .len = offset - token_begin,
1196 .flag_utf8 = flag_utf8,
1197 .flag_angle = flag_angle,
1198 .p_token = p_token, });
1199 } // -x- if active_at_sign -x-
1200
1201 // --------------------------------------------------------------------------
1202 // Perform a few checks to make sure we're not creating phantom addresses.
1203 // --------------------------------------------------------------------------
1204 int __email_len = last_domain_part == -1 ? _tokens[last_local_part].len : (_tokens[last_domain_part].offset - _tokens[last_local_part].offset) + _tokens[last_domain_part].len;
1205//std::cout << "__email_len=" << std::to_string(__email_len) << std::endl;
1206 if (__email_len == 0 && !flag_angle) continue;
1207//std::cout << "last_local_part=" << std::to_string(last_local_part) << std::endl;
1208//std::cout << "last_domain_part=" << std::to_string(last_domain_part) << std::endl;
1209
1210 // --------------------------------------------------------------------------
1211 // Create a token of type "e" now that this eMail address is closed.
1212 //
1213 // The reason we're calculating size based on offsets instead of by adding
1214 // sizes together (and adding 1 for the "@" sign) is that commants can be
1215 // included in the localpart portion, which normally won't be counted in any
1216 // localpart sizes.
1217 // --------------------------------------------------------------------------
1218 _index_e.push_back(_tokens.size()); // Add to index of eMail addresses (before adding to _tokens vector, _tokens.size() is the position)
1219 _tokens.push_back({ .type = 'e',
1220 .offset = _tokens[last_local_part].offset,
1221 .len = __email_len,// - token_begin,
1222 .flag_utf8 = _tokens[last_local_part].flag_utf8 || flag_utf8,
1223 .flag_angle = _tokens[last_local_part].flag_angle,
1224 .flag_null_addr = __email_len == 0,
1225 .p_token = _tokens[last_local_part].p_token + ((last_domain_part == -1 || _tokens[last_domain_part].p_token.empty()) ? u8"" : u8"@" + _tokens[last_domain_part].p_token),
1226 .index_display_name = last_display_name,
1227 .index_local_part = last_local_part,
1228 .index_domain_part = last_domain_part, });
1229 last_display_name = -1;
1230 last_local_part = -1;
1231 last_domain_part = -1;
1232 flag_angle = false;
1233
1234 // --------------------------------------------------------------------------
1235 // Reset and prepare internal variables for the next token.
1236 // --------------------------------------------------------------------------
1237 RESET_FOR_NEXT_TOKEN;
1238 continue;
1239
1240 } // -x- case , -x-
1241
1242 // --------------------------------------------------------------------------
1243 // Opening comment parenthesis.
1244 // --------------------------------------------------------------------------
1245 case '(': {
1246 comment_depth++;
1247 continue;
1248 } // -x- case ( -x-
1249
1250 // --------------------------------------------------------------------------
1251 // Closing comment parenthesis.
1252 // --------------------------------------------------------------------------
1253 case ')': {
1254 if (--comment_depth < 0) _exception("unbalanced closing comment parenthesis", offset);
1255 continue;
1256 } // -x- case ) -x-
1257
1258 // --------------------------------------------------------------------------
1259 // Backslash (quote-literal).
1260 // --------------------------------------------------------------------------
1261 case '\\': {
1262
1263 // --------------------------------------------------------------------------
1264 // Prevent a potential out-of-bounds buffer-overrun problem.
1265 // --------------------------------------------------------------------------
1266 if (++offset == len) {
1267 _exception("unbalanced quote-literal (backslash)", offset);
1268 continue; // Do this in case we're not throwing exceptions
1269 } // -x- if offset -x-
1270
1271 // --------------------------------------------------------------------------
1272 // Update to next character (whatever it is, we're taking it literally).
1273 // --------------------------------------------------------------------------
1274 ch = mailbox[offset];
1275 goto main_parsing_loop_default; // Fall-through to default
1276
1277 } // -x- case \ -x-
1278
1279 // --------------------------------------------------------------------------
1280 // All remaining characters.
1281 // --------------------------------------------------------------------------
1282 default:
1283 //if (flag_angle) _exception("additional data not permitted", offset);
1284 main_parsing_loop_default:
1285 if (ch > 127) { // Include all UTF-8 character (unless prevented by the exception)
1286 flag_utf8 = true;
1287 if (!_policy_support_utf8) _exception("UTF-8 byte encountered", offset);
1288 p_token.push_back(ch);
1289 p_token_sp.push_back(ch);
1290 } else if (CTEXT(ch) || ' ') { // Include almost everything for now (including spaces)
1291 if (ch != ' ') p_token.push_back(ch); // Exclude spaces
1292 if (!(ch == ' ' && p_token.size() == 0)) p_token_sp.push_back(ch); // Keep spaces
1293 } // -x- if ch -x-
1294
1295 } // -x- switch ch -x-
1296
1297 } while (++offset < len); // -x- do while -x-
1298
1299 // --------------------------------------------------------------------------
1300 // If the final token isn't empty (a.k.a., unfinished / not sealed), then
1301 // figure out what to do and run one more time, or else throw an exception.
1302 // --------------------------------------------------------------------------
1303 if (offset == len && token_begin < offset) {
1304 ch = ','; // Force comma (",") on parsing loop
1305 goto main_parsing_switch;
1306 } else if (offset > len && token_begin < offset) {
1307 _exception("incomplete data", offset - 1);
1308 } // -x- if offset -x-
1309 return this;
1310
1311 }; // -x- rmailaddr* set -x-
1312
1313 /*======================================================================*//**
1314 @brief
1315 Find out how many eMail addresses this object holds.
1316 @see empty
1317 @see has_any
1318 @see has_multiple
1319 @see has_one
1320 @returns The number of eMail addresses
1321 *///=========================================================================
1322 int size() { return _index_e.size(); } // -x- int size -x-
1323
1324 /*======================================================================*//**
1325 @brief
1326 Generate a detailed output of all tokens that's useful for debugging.
1327
1328 @code
1329 Types:
1330 g = group name (beginning; includes colon)
1331 ; = group termination (semi-colon character)
1332 n = display name
1333 e = eMail address (includes angle brackets, if present)
1334 l = local-part
1335 d = domain-part
1336 c = comment (not implemented)
1337 \0 = not initialized (null; regard as "unknown"; this should never happen)
1338 @endcode
1339
1340 The difference between "token" and "p_token" is that "token" is the original
1341 and [mostly] unprocessed atom, while "p_token" has been processed with any
1342 sets of angle-brackets, sets of quotation-marks, comments, and whitespace
1343 removed. In nearly all instances, the value of "p_token" is what's needed.
1344 @returns std::string containing multi-line text (one token per line)
1345 *///=========================================================================
1346 std::string tokens_to_string(
1347 /// Filter (string containing characters for those types that are to be
1348 /// included {unrecognized types will be ignored}; the default is no filter)
1349 const std::string filter = "",
1350 /// Prefix (text to insert before the beginning of each line)
1351 const std::string prefix = "",
1352 /// End-of-Line sequence (default is "\n")
1353 const std::string eol = "\n") {
1354
1355 // --------------------------------------------------------------------------
1356 // Internal variables.
1357 // --------------------------------------------------------------------------
1358 std::string t;
1359
1360 // --------------------------------------------------------------------------
1361 // Loop that builds list of tokens (one per line).
1362 // --------------------------------------------------------------------------
1363 for (int i = 0; i < _tokens.size(); i++) {
1364
1365 // --------------------------------------------------------------------------
1366 // Check filter.
1367 // --------------------------------------------------------------------------
1368 if (filter.empty() || filter.find(_tokens[i].type) != std::string::npos) {
1369
1370 // --------------------------------------------------------------------------
1371 // Shared characteristics.
1372 // --------------------------------------------------------------------------
1373 t.append(prefix + "index=" + std::to_string(i)
1374 + " type=" + _tokens[i].type
1375 + " utf8=" + (_tokens[i].flag_utf8 ? "y" : "n")
1376 + " punycode=" + (_tokens[i].flag_punycode ? "y" : "n")
1377 + " obsolete=" + (_tokens[i].flag_obsolete ? "y" : "n")
1378 + " offset=" + std::to_string(_tokens[i].offset)
1379 + " length=" + std::to_string(_tokens[i].len)
1380 + " token=" + std::string((char*)_addr.c_str()).substr(_tokens[i].offset, _tokens[i].len)
1381 + " p_token=" + (char*)_tokens[i].p_token.c_str());
1382
1383 // --------------------------------------------------------------------------
1384 // Type-specific characteristics.
1385 // --------------------------------------------------------------------------
1386 switch (_tokens[i].type) {
1387 case 'd':
1388 t.append(std::string( " fqdn=") + (_tokens[i].flag_fqdn ? "y" : "n"));
1389 break;
1390 case 'e':
1391 t.append(std::string( " angle=") + (_tokens[i].flag_angle ? "y" : "n"));
1392 // Fall-through to type "l"
1393 case 'l':
1394 t.append(std::string(" null_addr=") + (_tokens[i].flag_null_addr ? "y" : "n"));
1395 break;
1396 } // -x- switch type -x-
1397
1398 // --------------------------------------------------------------------------
1399 // Final EoL (End of Line) sequence.
1400 // --------------------------------------------------------------------------
1401 t.append(eol);
1402
1403 } // -x- if filter -x-
1404
1405 } // -x- for i -x-
1406 return t;
1407
1408 }; // -x- std::string tokens_to_string -x-
1409
1410 /*======================================================================*//**
1411 @brief
1412 Array-style access to eMail addresses. The first element is at index 0.
1413 @see get
1414 @see domain_part
1415 @see local_part
1416 @returns std::u8string with only the eMail address (no angle brackets, etc.)
1417 as a native UTF-8 string
1418 *///=========================================================================
1419 std::u8string operator[](
1420 /// Index of eMail address to query for (0 = first element; negative index
1421 /// values are calculated in reverse, starting with -1 as the final position)
1422 int index) {
1423 return _tokens[_index_e[index >= 0 ? index : _index_e.size() + index]].p_token;
1424 }; // -x- std::u8string operator[] -x-
1425
1426 /*======================================================================*//**
1427 @brief
1428 Support convenient streaming usage with std::cout, std::cerr, and friends.
1429 @returns eMail address in human-readable form
1430 *///=========================================================================
1431 friend std::ostream& operator<< (
1432 /// Output stream (provided automatically by std::cout and std::cerr)
1433 std::ostream& o,
1434 /// Object class (matched by compiler)
1435 rmailaddr const& c) { return o << (char*)c._addr.c_str(); }; // -x- std::ostream& operator<< -x-
1436
1437 }; // -x- class rmailaddr -x-
1438
1439}; // -x- namespace randolf -x-