randolf.ca  1.00
Randolf Richardson's C++ classes
Loading...
Searching...
No Matches
rmailaddr
1#pragma once
2
3#include <algorithm>
4#include <atomic>
5#include <cstring>
6#include <stdexcept> // std::invalid_argument
7#include <vector>
8
9#include <arpa/inet.h> // Used only for checking for valid IP addresses in domain literals (inet_pton)
10
11namespace randolf {
12
13 // --------------------------------------------------------------------------
14 // Constants that list sets of valid characters, which are optimized to test
15 // for ranges of the most commonly-used characters first during parsing, are
16 // named consistently with their respective rule names as defined in RFC2822.
17 //
18 // CRLF and \ are invisible in the quoted string according to RFC2822 section
19 // 3.2.5.
20 //
21 // RFC2822 section 3.2.5 also defines a "quoted-string" as containing the
22 // following valid characters (spaces are also permitted): 33, 35-91, 93-126
23 // Certain characters must be quoted first though, and every character
24 // following a backslash is taken literally (and the backslash is removed
25 // from the result).
26 //
27 // RFC2822 section 3.2.4 defines an "atom" as containing the following valid
28 // characters: 0123456789
29 // ABCDEFGHIJKLMNOPQRSTUVWXYZ
30 // abcdefghijklmnopqrstuvwxyz
31 // !#$%&'*+-/=?^_`{|}~
32 //
33 // Quote characters and quotation marks are not permitted in the domain part.
34 //
35 // According to RFC2822 section 3.2.5, a phrase (DisplayName / Comments) can
36 // be either an atom (ATEXT) or quoted-text (QTEXT).
37 //
38 // According to RFC2822 section 2.2.2, whitespace characters are tabs (ASCII
39 // character 9) and spaces (ASCII character 32).
40 //
41 // RFC2822 section 3.4 last paragraph indicates that a group construct is
42 // optional, and preceeded by a colon following any number of comma-delimited
43 // recipients (including zero or one). Group constructs must end with a
44 // semi-colon though.
45 // --------------------------------------------------------------------------
46
47 // --------------------------------------------------------------------------
48 // The following macros are optimized for performance by testing for the most
49 // commonly-used characters first.
50 //
51 // ATEXT
52 // 94...126 ^_`abcdefghijklmnopqrstuvwxyz{|}~
53 // 65...90 ABCDEFGHIJKLMNOPQRSTUVWXYZ
54 // 47...57 /0123456789
55 // 45 | 33 -!
56 // 35...39 #$%&'
57 // 42 | 43 *+
58 // 61 | 63 =?
59 // --------------------------------------------------------------------------
60 #define ATEXT(a) ((a >= 94 && a <= 126) \
61 || (a >= 65 && a <= 90) \
62 || (a >= 47 && a <= 57) \
63 || a == 45 || a == 33 \
64 || (a >= 35 && a <= 39) \
65 || a == 42 || a == 43 \
66 || a == 61 || a == 63 )
67
68 // --------------------------------------------------------------------------
69 // ATEXT_OBS ("obsolete standard" is ATEXT plus periods, spaces, and tabs)
70 // 94...126 ^_`abcdefghijklmnopqrstuvwxyz{|}~
71 // 65...90 ABCDEFGHIJKLMNOPQRSTUVWXYZ
72 // 45...57 -./0123456789
73 // 32 | 33 {space:32}!
74 // 35...39 #$%&'
75 // 42 | 43 *+
76 // 61 | 63 =?
77 // 9 {tab:9}
78 // --------------------------------------------------------------------------
79 #define ATEXT_OBS(a) ((a >= 94 && a <= 126) \
80 || (a >= 65 && a <= 90) \
81 || (a >= 45 && a <= 57) \
82 || a == 32 || a == 33 \
83 || (a >= 35 && a <= 39) \
84 || a == 42 || a == 43 \
85 || a == 61 || a == 63 \
86 || a == 9 )
87
88 // --------------------------------------------------------------------------
89 // CTEXT (comment text)
90 // 93...126 ]^_`abcdefghijklmnopqrstuvwxyz{|}~
91 // 42...91 *+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[
92 // 33...39 !"#$%&'
93 // --------------------------------------------------------------------------
94 #define CTEXT(a) ((a >= 93 && a <= 126) \
95 || (a >= 42 && a <= 91) \
96 || (a >= 33 && a <= 39) )
97
98 // --------------------------------------------------------------------------
99 // CTEXT_WSP (comment text with white space)
100 // 93...126 ]^_`abcdefghijklmnopqrstuvwxyz{|}~
101 // 42...91 *+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[
102 // 32...39 {space:32}!"#$%&'
103 // 9 {tab:9}
104 // --------------------------------------------------------------------------
105 #define CTEXT_WSP(a) ((a >= 93 && a <= 126) \
106 || (a >= 42 && a <= 91) \
107 || (a >= 32 && a <= 39) \
108 || a == 9 )
109
110 // --------------------------------------------------------------------------
111 // CTEXT_OBS (obsolete comment text)
112 // CTEXT {CTEXT}
113 // 32 {space:32}
114 // 9 {tab:9}
115 // --------------------------------------------------------------------------
116 #define CTEXT_OBS(a) (CTEXT(a) \
117 || a == 32 \
118 || a == 9 )
119
120 // --------------------------------------------------------------------------
121 // DTEXT (domain-part, not including characters needed for domain-literals)
122 // 94...126 ^_`abcdefghijklmnopqrstuvwxyz{|}~
123 // 33...90 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ
124 // --------------------------------------------------------------------------
125 #define DTEXT(a) ((a >= 94 && a <= 126) \
126 || (a >= 33 && a <= 90) )
127
128 // --------------------------------------------------------------------------
129 // FWS (folding white space)
130 // 10 {lf:10}
131 // 13 {cr:13}
132 // 9 {tab:9}
133 // --------------------------------------------------------------------------
134 #define FWS(a) (a == 10 \
135 || a == 13 \
136 || a == 9 )
137
138 // --------------------------------------------------------------------------
139 // QTEXT (quoted text)
140 // 93...126 ]^_`abcdefghijklmnopqrstuvwxyz{|}~
141 // 35...91 #$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[
142 // 32 {space:32}
143 // 33 !
144 // --------------------------------------------------------------------------
145 #define QTEXT(a) ((a >= 93 && a <= 126) \
146 || (a >= 35 && a <= 91) \
147 || a == 32 || a == 33 )
148
149 // --------------------------------------------------------------------------
150 // TEXT
151 // 14...127 {char:14-31}{space:32}!"#$%&'()*+,-./0123456789:;<=>
152 // ?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^
153 // _`abcdefghijklmnopqrstuvwxyz{|}~
154 // {delete:127}
155 // 1...9 {char:1-6}{beep:7}{backspace:8}{tab:9}
156 // 11 | 12 {char:11}{char:12}
157 // --------------------------------------------------------------------------
158 #define TEXT(a) ((a >= 14 && a <= 127) \
159 || (a >= 1 && a <= 9) \
160 || a == 11 || a == 12 )
161
162 // --------------------------------------------------------------------------
163 // WSP (white space)
164 // 32 {space:32}
165 // 9 {tab:9}
166 // --------------------------------------------------------------------------
167 #define WSP(a) (a == 32 \
168 || a == 9 )
169
170 // --------------------------------------------------------------------------
171 // Used by the set() method to consistently reset internal variables when
172 // moving onward to the next token.
173 //
174 // token_begin: Configures beginning of next token.
175 // --------------------------------------------------------------------------
176 #define RESET_FOR_NEXT_TOKEN \
177 flag_utf8 = false; \
178 flag_quote = false; \
179 token_begin = offset + 1; \
180 p_token.clear(); \
181 p_token_sp.clear();
182
183 /*======================================================================*//**
184 @brief
185 This @ref rmailaddr class provides an object-oriented eMail address.
186
187 @par Features
188
189 Some of the key features are:
190
191 - constructors with sensible defaults help to simplify coding
192 - documentation includes code samples (with @c \#include lines as needed)
193 - can handle ASCIIZ without needing to specify string length
194 - can handle @c std::string (which tracks its own string length)
195
196 @par Use case
197
198 Validation of the format of an eMail address is helpful in ensuring that
199 eMail addresses received from elsewhere comply with internet standards.
200
201 @par Background
202
203 I created this class to make it easier to write internet server daemons and
204 other software that needs to accept and/or handle eMail addresses. (This is
205 a complete re-write of the version I wrote in Java 17 years ago in 2007,
206 which includes a significant array of differences due to the improved parsing
207 approaches I use now that are more efficient, and the need to make sure that
208 UTF-8 characters and punycode are both handled in a transparent manner.)
209
210 @par Getting started
211
212 @author Randolf Richardson
213 @version 1.00
214 @par History
215 2024-May-07 v1.00 Initial version
216
217 @par Conventions
218 Lower-case letter "m" is regularly used in partial example code to represent
219 an instantiated rmailaddr object.
220
221 An ASCIIZ string is a C-string (char* array) that includes a terminating null
222 (0) character at the end.
223
224 @par Notes
225
226 I use the term "ASCIIZ string" to indicate an array of characters that's
227 terminated by a 0 (a.k.a., null). Although this is very much the same as a
228 C-string, the difference is that in many API functions a C-string must often
229 be accompanied by its length value. When referring to an ASCIIZ string, I'm
230 intentionally indicating that the length of the string is not needed because
231 the string is null-terminated. (This term was also commonly used in assembly
232 language programming in the 1970s, 1980s, and 1990s, and as far as I know is
233 still used by machine language programmers today.)
234
235 @par Examples
236
237 @code{.cpp}
238 #include <iostream> // std::cout, std::cerr, std::endl, etc.
239 #include <stdexcept> // std::invalid_argument exception
240
241 #include <randolf/rmailaddr>
242
243 int main(int argc, char *argv[]) {
244 try {
245 randolf::rmailaddr m("nobody@example.com");
246 } catch (const std::invalid_argument e) {
247 std::cerr << "eMail address format exception: " << e.what() << std::endl;
248 return EXIT_FAILURE;
249 } catch (const std::exception e) {
250 std::cerr << "Other exception: " << e.what() << std::endl;
251 return EXIT_FAILURE;
252 }
253 return EXIT_SUCCESS;
254 } // -x- int main -x-
255 @endcode
256
257 Parameter stacking is supported (with methods that return @c rmailaddr*); in
258 this example, notice that semicolons (";") and "e." references are omittted
259 (when compared with the above):
260
261 @code{.cpp}
262 #include <iostream> // std::cout, std::cerr, std::endl, etc.
263 #include <stdexcept> // std::invalid_argument exception
264
265 #include <randolf/rmailaddr>
266
267 int main(int argc, char *argv[]) {
268 try {
269 randolf::rmailaddr m("nobody@example.com");
270 } catch (const std::invalid_argument e) {
271 std::cerr << "eMail address format exception: " << e.what() << std::endl;
272 return EXIT_FAILURE;
273 } catch (const std::exception e) {
274 std::cerr << "Other exception: " << e.what() << std::endl;
275 return EXIT_FAILURE;
276 }
277 return EXIT_SUCCESS;
278 } // -x- int main -x-
279 @endcode
280 *///=========================================================================
281 class rmailaddr {
282
283 public:
284 /*======================================================================*//**
285 @brief
286 Structure of errors (only used when exceptions are disabled).
287 @see errors
288 @see policy_throw_exceptions
289 *///=========================================================================
290 struct error_data {
291 /// Error message
292 std::string message;
293 /// Offset (0 = position of first byte)
294 unsigned int offset;
295 }; // -x- struct error_data -x-
296
297 private:
298 /*======================================================================*//**
299 @brief
300 Structure of positions within the original eMail string where a portion
301 begins, and its length (in bytes), along with various other information about
302 the section.
303
304 This is used internally, and std::vector<mail_addr_token> organizes them and
305 looks after freeing memory.
306 *///=========================================================================
307 struct mail_addr_token {
308 /// Types:
309 /// g = group name (beginning; includes colon)
310 /// ; = group termination (semi-colon character)
311 /// n = display name
312 /// e = eMail address (includes angle brackets, if present)
313 /// l = local-part
314 /// d = domain-part
315 /// c = comment
316 /// \0 = not initialized (null can effectively be regarded as meaning "unknown")
317 char type = (char)0;
318 /// Offset, within the string, where this part begins
319 unsigned int offset = 0;
320 /// Total number of bytes
321 unsigned int len = 0;
322 /// Whether any UTF-8 characters are present in this part
323 bool flag_utf8 = false;
324 /// Whether this part is in punycode (begins with "xn--")
325 bool flag_punycode = false; // TODO
326 /// Whether this part is "obsolete" (according to RFCs)
327 bool flag_obsolete = false; // TODO
328 /// Whether eMail address was enclosed in angle brackets (type "e" only)
329 bool flag_angle = false;
330 /// Whether the token was enclosed in quotation marks
331 bool flag_quotes = false;
332 /// Whether eMail address is a null address enclosed in angle brackets (type "e" only)
333 bool flag_null_addr = false;
334 /// Whether the domain-part is an FQDN (type "d" only)
335 bool flag_fqdn = false; // TODO
336 /// Whether the domain-part is a domain-literal (type "d" only)
337 bool flag_domain_literal = false; // TODO
338 /// Depth of groups (types "g" and ";" only)
339 unsigned short depth = 0;
340 /// Processed data, with quotation marks, angle brackets, comments, whitespace, etc., removed
341 std::u8string p_token;
342 /// Index to display-name (type "e" only)
343 int index_display_name = -1;
344 /// Index to local-part (type "e" only)
345 int index_local_part = -1;
346 /// Index to domain-part (type "e" only)
347 int index_domain_part = -1;
348 }; // -x- struct mail_addr_token -x-
349
350 // --------------------------------------------------------------------------
351 // Internal variables.
352 // --------------------------------------------------------------------------
353 std::u8string _addr; // Original eMail address
354 std::vector<mail_addr_token> _tokens; // All eMail address tokens
355 std::vector<int> _index_e; // Index of type "e" records in _tokens
356 std::vector<error_data> _errors; // Error tracking
357 short group_depth = 0; // Recursive group tracking
358 bool angle_bracket_mode = false; // Angle-bracket mode tracking
359 bool quote_mode = false; // Quotation-marks mode tracking
360
361 // --------------------------------------------------------------------------
362 // Policy variables.
363 // --------------------------------------------------------------------------
364 bool _policy_keep_comments = false; // Wether to retain comments embedded in eMail addresses
365 bool _policy_throw_exceptions = true; // TRUE = throw exceptions; FALSE = save internally
366 bool _policy_tabs_to_spaces = false; // Whether to convert every tab into a space
367 bool _policy_support_utf8 = true; // Whether to support UTF-8 (FALSE = 7bit characters only)
368
369 /*======================================================================*//**
370 Exception handler.
371 *///=========================================================================
372 void _exception(
373 /// Error message
374 std::string message,
375 /// Offset (0 = position of first byte)
376 int offset) {
377 if (_policy_throw_exceptions) throw std::invalid_argument(message + " at offset " + std::to_string(offset));
378 _errors.push_back({ message, offset });
379 return;
380 } // -x- void _exception -x-
381
382 public:
383 /*======================================================================*//**
384 @brief
385 Instantiate an empty rmailaddr that doesn't qualify as a properly-formatted
386 internet eMail address (because the minimum length of a valid internet eMail
387 address is 1 character).
388
389 Instantiating an empty rmailaddr is particularly useful for header-file
390 definitions; for example:
391 @code{.cpp}
392 #include <iostream> // std::cout, std::cerr, std::endl, etc.
393 #include <stdexcept> // std::invalid_argument exception
394
395 #include <randolf/rmailaddr>
396
397 randolf::rmailaddr m; // <-- Empty rmailaddr initialization (no exceptions)
398
399 int main(int argc, char *argv[]) {
400 try {
401 m.set("nobody@example.com");
402 } catch (const std::invalid_argument e) {
403 std::cerr << "eMail address format exception: " << e.what() << std::endl;
404 return EXIT_FAILURE;
405 } catch (const std::exception e) {
406 std::cerr << "Other exception: " << e.what() << std::endl;
407 return EXIT_FAILURE;
408 }
409 return EXIT_SUCCESS;
410 } // -x- int main -x-
411 @endcode
412 *///=========================================================================
413 rmailaddr() noexcept {}; // -x- constructor rmailaddr -x-
414
415 /*======================================================================*//**
416 @brief
417 Instantiate an rmailaddr that qualifies as a properly-formatted internet
418 eMail address (if it doesn't qualify, then an exception will be thrown).
419
420 Usage example:
421 @code{.cpp}
422 #include <iostream> // std::cout, std::cerr, std::endl, etc.
423 #include <stdexcept> // std::invalid_argument exception
424
425 #include <randolf/rmailaddr>
426
427 int main(int argc, char *argv[]) {
428 try {
429 randolf::rmailaddr m("nobody@example.com");
430 } catch (const std::invalid_argument e) {
431 std::cerr << "eMail address format exception: " << e.what() << std::endl;
432 return EXIT_FAILURE;
433 } catch (const std::exception e) {
434 std::cerr << "Other exception: " << e.what() << std::endl;
435 return EXIT_FAILURE;
436 }
437 return EXIT_SUCCESS;
438 } // -x- int main -x-
439 @endcode
440 @throws std::invalid_argument describing the problem, along with the byte
441 offset where the problem originated from
442 @see rmailaddr
443 *///=========================================================================
444 rmailaddr(
445 /// RFC-compliant eMail address
446 const char8_t* mailbox,
447 /// Number of characters (-1 = ASCIIZ string)
448 int len = -1) { set(mailbox, len); }; // -x- constructor rmailaddr -x-
449
450 /*======================================================================*//**
451 @copydoc rmailaddr(const char8_t*, int)
452 @see rmailaddr
453 *///=========================================================================
454 rmailaddr(
455 /// RFC-compliant eMail address
456 const char* mailbox,
457 /// Number of characters (-1 = ASCIIZ string)
458 int len = -1) { set((char8_t*)mailbox, len); }; // -x- constructor rmailaddr -x-
459
460 /*======================================================================*//**
461 @copydoc rmailaddr(const char8_t*, int)
462 @see rmailaddr
463 *///=========================================================================
464 rmailaddr(
465 /// RFC-compliant eMail address
466 const std::string mailbox) { set((char8_t*)mailbox.data(), mailbox.size()); }; // -x- constructor rmailaddr -x-
467
468 /*======================================================================*//**
469 @copydoc rmailaddr(const char8_t*, int)
470 @see rmailaddr
471 *///=========================================================================
472 rmailaddr(
473 /// RFC-compliant eMail address
474 const std::u8string mailbox) { set(mailbox.data(), mailbox.size()); }; // -x- constructor rmailaddr -x-
475
476 /*======================================================================*//**
477 @brief
478 Access only the eMail address, without display-name, and without any sets of
479 enclosing quotation-marks or enclosing angle-brackets, etc.
480 @see display_name
481 @see domain_part
482 @see email
483 @see local_part
484 @see operator[](int)
485 @throws std::out_of_range if the index is out-of-range
486 @returns std::string with only the eMail address (no display-name, and no
487 enclosing sets of quotation-marks or enclosing angle-brackets, etc.)
488 *///=========================================================================
489 std::string addr(
490 /// Index of eMail address to query for (0 = first element; negative index
491 /// values are calculated in reverse, starting with -1 as the final position)
492 int index = 0) {
493// return std::string((char*)_tokens[_index_e[index >= 0 ? index : _index_e.size() + index]].p_token.c_str());
494 return std::string((char*)_tokens.at(_index_e.at(index >= 0 ? index : _index_e.size() + index)).p_token.c_str());
495 }; // -x- std::string addr -x-
496
497 /*======================================================================*//**
498 @brief
499 Access an eMail address's display-name (the portion preceding the angle
500 brackets).&nbsp; If there were no angle-brackets, then an empty string will
501 be returned.
502 @see addr
503 @see domain_part
504 @see email
505 @see local_part
506 @see operator[](int)
507 @returns std::string with only the display-name (no quotation marks, etc.)
508 *///=========================================================================
509 std::string display_name(
510 /// Index of eMail address to query for (0 = first element; negative index
511 /// values are calculated in reverse, starting with -1 as the final position)
512 int index = 0) {
513 return std::string((char*)_tokens[_tokens[_index_e[index >= 0 ? index : _index_e.size() + index]].index_display_name].p_token.c_str());
514 }; // -x- std::string display_name -x-
515
516 /*======================================================================*//**
517 @brief
518 Access an eMail address's domain-part (the portion following the @c @ sign).
519 @see get
520 @see addr
521 @see display_name
522 @see email
523 @see local_part
524 @see operator[](int)
525 @returns std::string with only the domain-part (no angle brackets, etc.)
526 *///=========================================================================
527 std::string domain_part(
528 /// Index of eMail address to query for (0 = first element; negative index
529 /// values are calculated in reverse, starting with -1 as the final position)
530 int index = 0) {
531 return std::string((char*)_tokens[_tokens[_index_e[index >= 0 ? index : _index_e.size() + index]].index_domain_part].p_token.c_str());
532 }; // -x- std::string domain_part -x-
533
534 /*======================================================================*//**
535 @brief
536 Access an eMail address (enclosed in angle-brackets), and preceded by the
537 display-name (if one is available).
538
539 @note
540 If the original form of the display-name had a delimiting space before the
541 eMail address, then that space will be present in the result here. If not, a
542 space will not be inserted. (In other words, this aspect of the original
543 full eMail address will be retained.)
544 @see addr
545 @see display_name
546 @see domain_part
547 @see local_part
548 @see operator[](int)
549 @returns std::string with display-name and eMail address (in angle-brackets)
550 *///=========================================================================
551 std::string email(
552 /// Index of eMail address to query for (0 = first element; negative index
553 /// values are calculated in reverse, starting with -1 as the final position)
554 int index = 0) {
555
556 // --------------------------------------------------------------------------
557 // The eMail address has no display-name because it wasn't enclosed in angle
558 // brackets, so present the eMail address on its own, in angle brackets.
559 // --------------------------------------------------------------------------
560 mail_addr_token e = _tokens[_index_e[index >= 0 ? index : _index_e.size() + index]];
561 if (e.index_display_name < 0)
562 return "<" + std::string((char*)e.p_token.c_str()) + ">";
563
564 // --------------------------------------------------------------------------
565 // There was a display-name, so return the eMail address with display-name
566 // (enclosed in quotation marks if it started out that way).
567 // --------------------------------------------------------------------------
568 mail_addr_token n = _tokens[e.index_display_name];
569 if (n.flag_quotes)
570 return "\""
571 + std::string((char*)n.p_token.c_str())
572 + "\""
573 + "<"
574 + std::string((char*)e.p_token.c_str())
575 + ">";
576
577 return std::string((char*)n.p_token.c_str())
578 + "<"
579 + std::string((char*)e.p_token.c_str())
580 + ">";
581 }; // -x- std::string email -x-
582
583 /*======================================================================*//**
584 @brief
585 Find out if this object doesn't hold any eMail addresses.
586 @see has_any
587 @see has_multiple
588 @see has_one
589 @see size
590 @returns TRUE = no eMail addresses@n
591 FALSE = one or more eMail addresses
592 *///=========================================================================
593 bool empty() { return _index_e.empty(); }; // -x- bool empty -x-
594
595 /*======================================================================*//**
596 @brief
597 Return a list of errors that have been collected (instead of throwing
598 exceptions).
599 @see errors_clear
600 @see policy_throw_exceptions
601 @returns Vector containing @c error_data
602 *///=========================================================================
603 std::vector<error_data> errors() { return _errors; }; // -x- std::vector<error-data> errors -x-
604
605 /*======================================================================*//**
606 @brief
607 Clear the list of errors that have been collected (instead of throwing
608 exceptions).
609 @see errors
610 @see policy_throw_exceptions
611 @returns The same rmailaddr object so as to facilitate stacking
612 *///=========================================================================
613 rmailaddr* errors_clear() { _errors.clear(); return this; }; // -x- rmailaddr* errors_clear -x-
614
615 /*======================================================================*//**
616 @brief
617 Grade an eMail address, similar to traditional elementary school grades. For
618 simplicity, grades "a" through "c" are passes, while grades "d" through "f"
619 are failures, although if less strict then "d" should also qualify as a pass.
620
621 @code
622 Ratings:
623 a = Angle-brackets surrounding eMail address (optional display-name)
624 b = Bare eMail address (no display-name)
625 c = Complex eMail address (groups; optional angle-brackets; optional display-name)
626 d = Defective (because obsolete RFC standards were utilized)
627 e = Errors (only when collecting errors instead of throwing exceptions)
628 f = Failure (an exception was thrown, or eMail address is blank)
629 @endcode
630
631 To test for a pass, use a comparison such as <tt>m.grade() <= 'c'</tt>
632 (strict) or <tt>m.grade() <= 'd'</tt> (not strict).
633 @returns Rating code
634 *///=========================================================================
635 char grade(
636 /// eMail address index (default is 0 for the first eMail address)
637 const int index = 0
638 ) {
639 return 'f';
640 }; // -x- char grade -x-
641
642 /*======================================================================*//**
643 @brief
644 Indicates whether a display-name was included with this eMail address.
645 @returns TRUE = eMail address includes a display-name@n
646 FALSE = eMail address has no display-name
647 *///=========================================================================
648 bool has_display_name(
649 /// eMail address index (default is 0 for the first eMail address)
650 const int index = 0
651 ) {
652 return _tokens[_index_e[index >= 0 ? index : _index_e.size() + index]].index_display_name != -1;
653 }; // -x- bool has_display-name -x-
654
655 /*======================================================================*//**
656 @brief
657 Find out whether this object holds any number of eMail addresses. If there
658 are no eMail addresses, then this method returns @c FALSE.
659 @see empty
660 @see has_multiple
661 @see has_one
662 @see size
663 @returns TRUE = one or more eMail addresses@n
664 FALSE = no eMail addresses
665 *///=========================================================================
666 bool has_any() { return _index_e.size() > 0; }; // -x- bool has_any -x-
667
668 /*======================================================================*//**
669 @brief
670 Find out whether this object holds multiple eMail addresses. If there is
671 only one eMail address, or no eMail addresses at all, then this method
672 returns @c FALSE.
673 @see empty
674 @see has_any
675 @see has_one
676 @see size
677 @returns TRUE = two or more eMail addresses@n
678 FALSE = one eMail address@n
679 FALSE = no eMail addresses
680 *///=========================================================================
681 bool has_multiple() { return _index_e.size() > 1; }; // -x- bool has_multiple -x-
682
683 /*======================================================================*//**
684 @brief
685 Find out whether this object holds exactly one eMail address. If there are
686 two or more eMail addresses, or no eMail addresses, then this method returns
687 @c FALSE.
688 @see empty
689 @see has_any
690 @see has_multiple
691 @see size
692 @returns TRUE = exactly one eMail address@n
693 FALSE = two or more eMail addresses@n
694 FALSE = no eMail addresses
695 *///=========================================================================
696 bool has_one() { return _index_e.size() == 1; }; // -x- bool has_one -x-
697
698 /*======================================================================*//**
699 @brief
700 Find out the state of this policy.
701 @see policy_keep_comments
702 @returns policy status
703 *///=========================================================================
704 bool is_policy_keep_comments() { return _policy_keep_comments; }; // -x- bool is_policy_keep_comments -x-
705
706 /*======================================================================*//**
707 @brief
708 Find out the state of this policy.
709 @see policy_tabs_to_spaces
710 @returns policy status
711 *///=========================================================================
712 bool is_policy_tabs_to_spaces() { return _policy_tabs_to_spaces; }; // -x- bool is_policy_tabs_to_spaces -x-
713
714 /*======================================================================*//**
715 @brief
716 Find out the state of this policy.
717 @see policy_throw_exceptions
718 @returns policy status
719 *///=========================================================================
720 bool is_policy_throw_exceptions() { return _policy_throw_exceptions; }; // -x- bool is_policy_throw_exceptions -x-
721
722 /*======================================================================*//**
723 @brief
724 Find out the state of this policy.
725 @see policy_support_utf8
726 @returns policy status
727 *///=========================================================================
728 bool is_policy_support_utf8() { return _policy_support_utf8; }; // -x- bool is_policy_support_utf8 -x-
729
730 /*======================================================================*//**
731 @brief
732 Indicates whether this is just an eMail address, without any other parts such
733 as display-name, group constructs, comments, etc.
734 @returns TRUE = eMail address includes a display-name@n
735 FALSE = eMail address has no display-name
736 *///=========================================================================
737 bool is_pure(
738 /// indicate wither angle-brackets are okay (default is FALSE so that the
739 /// meaning of the word "pure" is not tainted)
740 const bool angle_flag = false,
741 /// eMail address index (default is 0 for the first eMail address)
742 const int index = 0
743 ) {
744//TODO: Finish this (we need to consider groups, display-name, comments, etc.)
745 return _tokens[_index_e[index >= 0 ? index : _index_e.size() + index]].index_display_name != -1;
746 }; // -x- bool is_pure -x-
747
748 /*======================================================================*//**
749 @brief
750 Access an eMail address's local-part (the portion preceding the @c @ sign).
751 @see addr
752 @see display_name
753 @see domain_part
754 @see email
755 @see operator[](int)
756 @returns std::string with only the local-part (no angle brackets, etc.)
757 *///=========================================================================
758 std::string local_part(
759 /// Index of eMail address to query for (0 = first element; negative index
760 /// values are calculated in reverse, starting with -1 as the final position)
761 int index = 0) {
762 return std::string((char*)_tokens[_tokens[_index_e[index >= 0 ? index : _index_e.size() + index]].index_local_part].p_token.c_str());
763 }; // -x- std::string local_part -x-
764
765 /*======================================================================*//**
766 @brief
767 Sets the policy for whether to keep comments that were embedded in eMail
768 address group-construct, display-name, and local-part portions.
769
770 Comments are excluded by default because most systems don't need them, but in
771 the event that they are needed (or wanted), this policy makes it possible to
772 make sure they aren't excluded during processing.
773 @see set
774 @see is_policy_keep_comments
775 @returns The same rmailaddr object so as to facilitate stacking
776 *///=========================================================================
777 rmailaddr* policy_keep_comments(
778 /// FALSE = do not retain comments embedded in eMail addresses (deafult)@n
779 /// TRUE = retain comments embedded in eMail addresses
780 bool policy_flag) {
781
782 // --------------------------------------------------------------------------
783 // Update internal policy.
784 // --------------------------------------------------------------------------
785 _policy_keep_comments = policy_flag;
786
787 // --------------------------------------------------------------------------
788 // Return this object to facilitate stacking.
789 // --------------------------------------------------------------------------
790 return this;
791
792 }; // -x- rmailaddr* policy_keep_comments -x-
793
794 /*======================================================================*//**
795 @brief
796 Sets the policy for whether to support UTF-8 characters.
797
798 Some older systems may not be able to handle 8-bit data that UTF-8 utilizes,
799 in which case this policy makes it possible to easily reject incompatible
800 eMail addresses before attempting to use them with such systems.
801 @see set
802 @see is_policy_support_utf8
803 @returns The same rmailaddr object so as to facilitate stacking
804 *///=========================================================================
805 rmailaddr* policy_support_utf8(
806 /// TRUE = support UTF-8 characters in eMail addresses (deafult)@n
807 /// FALSE = do not support UTF-8 characters in eMail addresses
808 bool policy_flag) {
809
810 // --------------------------------------------------------------------------
811 // Update internal policy.
812 // --------------------------------------------------------------------------
813 _policy_support_utf8 = policy_flag;
814
815 // --------------------------------------------------------------------------
816 // Return this object to facilitate stacking.
817 // --------------------------------------------------------------------------
818 return this;
819
820 }; // -x- rmailaddr* policy_support_utf8 -x-
821
822 /*======================================================================*//**
823 @brief
824 Sets the policy for whether to convert every tab character (ASCII charcter 9)
825 to a space (ASCII character 32). This conversion occurs only once when the
826 eMail address is initially specified in a constructor or by way of one of the
827 @ref set() methods (changing this policy after this point will not be applied
828 to the current eMail address, but it will be in effect for future calls to
829 any of the @ref set() methods).
830
831 There are some situations where a tab character can create problems, such as
832 when interacting with certain older software or software that makes incorrect
833 assumptions about how to parse an eMail address, and this policy makes it
834 easy to accomodate such situations for the tab character, which some users
835 may be including by using the tab key on their keyboards.
836 @see set
837 @see is_policy_tabs_to_spaces
838 @returns The same rmailaddr object so as to facilitate stacking
839 *///=========================================================================
840 rmailaddr* policy_tabs_to_spaces(
841 /// TRUE = convert every tab character to a space@n
842 /// FALSE = do not convert tab characters to spaces (default)
843 bool policy_flag) {
844
845 // --------------------------------------------------------------------------
846 // Update internal policy.
847 // --------------------------------------------------------------------------
848 _policy_tabs_to_spaces = policy_flag;
849
850 // --------------------------------------------------------------------------
851 // Return this object to facilitate stacking.
852 // --------------------------------------------------------------------------
853 return this;
854
855 }; // -x- rmailaddr* policy_tabs_to_spaces -x-
856
857 /*======================================================================*//**
858 @brief
859 Sets the policy for whether to throw exceptions when an error is encountered.
860
861 When this flag is set, errors are tracked internally instead of throwing any
862 exceptions, and will need to be retrieved using the @ref errors() method,
863 which is useful for analyzing an eMail address. (Enabling or disabling this
864 flag does not erase the errors that are stored internally; you will need to
865 use the @ref errors_clear method for this.)
866
867 @warning
868 This policy is not meant for general use in the majority of applications; it
869 is intended for technical analysis, which would be useful in diagnostic and
870 research applications, or packet analysis applications like WireShark, or for
871 advanced users who are interested in more techincal detail.
872 @see errors
873 @see errors_clear
874 @see is_policy_throw_exceptions
875 @returns The same rmailaddr object so as to facilitate stacking
876 *///=========================================================================
877 rmailaddr* policy_throw_exceptions(
878 /// TRUE = throw exceptions (default)@n
879 /// FALSE = don't throw exceptions
880 bool policy_flag) {
881
882 // --------------------------------------------------------------------------
883 // Update internal policy.
884 // --------------------------------------------------------------------------
885 _policy_throw_exceptions = policy_flag;
886
887 // --------------------------------------------------------------------------
888 // Return this object to facilitate stacking.
889 // --------------------------------------------------------------------------
890 return this;
891
892 }; // -x- rmailaddr* policy_throw_exceptions -x-
893
894 /*======================================================================*//**
895 @brief
896 Set a new eMail address, resetting all internal flags, counters, and arrays
897 (but not changing any existing policies). Any existing eMail addresses will
898 be cleared out. (This method is also used internally by most of this class's
899 constructors.)
900 @throws std::invalid_argument describing the problem, along with the byte
901 offset where the problem originated from
902 @see rmailaddr
903 @returns The same rmailaddr object so as to facilitate stacking
904 *///=========================================================================
905 rmailaddr* set(
906 /// RFC-compliant eMail address
907 const char* mailbox,
908 /// Number of characters (-1 = ASCIIZ string)
909 int len = -1) {
910 return set((char8_t*)mailbox, len);
911 }; // -x- rmailaddr* set -x-
912
913 /*======================================================================*//**
914 @copydoc set(const char*, int)
915 @see rmailaddr
916 @returns The same rmailaddr object so as to facilitate stacking
917 *///=========================================================================
918 rmailaddr* set(
919 /// RFC-compliant eMail address
920 const std::string mailbox) {
921 return set((char8_t*)mailbox.data(), mailbox.size());
922 }; // -x- rmailaddr* set -x-
923
924 /*======================================================================*//**
925 @copydoc set(const char*, int)
926 @see rmailaddr
927 @returns The same rmailaddr object so as to facilitate stacking
928 *///=========================================================================
929 rmailaddr* set(
930 /// RFC-compliant eMail address
931 const std::u8string mailbox) {
932 return set(mailbox.data(), mailbox.size());
933 }; // -x- rmailaddr* set -x-
934
935 /*======================================================================*//**
936 @copydoc set(const char*, int)
937 @see rmailaddr
938 @returns The same rmailaddr object so as to facilitate stacking
939 *///=========================================================================
940 rmailaddr* set(
941 /// RFC-compliant eMail address
942 const char8_t* mailbox,
943 /// Number of characters (-1 = ASCIIZ string)
944 int len = -1) {
945
946 // --------------------------------------------------------------------------
947 // Measure size of format string if an ASCIIZ string was indicated.
948 // --------------------------------------------------------------------------
949 if (len == -1) len = std::strlen((char*)mailbox);
950
951 // --------------------------------------------------------------------------
952 // Save a copy of the original eMail address.
953 // --------------------------------------------------------------------------
954 _addr.assign(mailbox, len); // We need to save this for later reference
955
956 // --------------------------------------------------------------------------
957 // Pre-adjustments (optional, as per policy flags).
958 // --------------------------------------------------------------------------
959 if (_policy_tabs_to_spaces) // Policy: Convert all tabs to spaces
960 _addr.replace(_addr.begin(), _addr.end(), '\t', ' '); // Efficient replacement
961
962 // --------------------------------------------------------------------------
963 // Internal variables.
964 // --------------------------------------------------------------------------
965 int offset = 0; // Offset within original mailbox char8_t[] array
966 int last_display_name = -1; // Used to build type "e" eMail tokens
967 int last_local_part = -1; // Used to build type "e" eMail tokens
968 int last_domain_part = -1; // Used to build type "e" eMail tokens
969
970 // --------------------------------------------------------------------------
971 // Internal variables that are reset or updated together at various times,
972 // such as when a token is [in most cases] completed.
973 // --------------------------------------------------------------------------
974 int token_begin = 0; // Beginning offset within current portion of string being parsed
975 char8_t ch; // Character being tested (this needs to be defined outside of the main loop)
976 bool flag_utf8 = false; // UTF8 character(s) detected
977 bool flag_angle = false; // Angle-bracket detected
978 bool flag_quote = false; // Quotation-marks mode detected
979 bool active_angle = false; // Angle-bracket mode is active
980 bool active_at_sign = false; // At-sign mode is active (domain-part instead of local-part interpretation)
981 bool active_quote = false; // Quotation-marks mode is active
982 int comment_depth = 0; // Comments are active when this value is greater than 0 (too many closed comments are in the negative)
983 std::u8string p_token; // Processed token data (angle brackets, quotation marks, comments, and whitespace omitted)
984 std::u8string p_token_sp; // Processed token data (angle brackets, quotation marks, and comments omitted), with spaces preserved
985
986 // --------------------------------------------------------------------------
987 // Main parsing loop that identifies tokens and ensures compliance, and also
988 // effectively pre-processes eMail addresses on-the-fly for faster access
989 // from the _emails vector later.
990 // --------------------------------------------------------------------------
991 do {
992
993 // --------------------------------------------------------------------------
994 // Obtain next character.
995 // --------------------------------------------------------------------------
996 ch = mailbox[offset];
997
998 // --------------------------------------------------------------------------
999 // Compare one character at a time, but first process special cases of quoted
1000 // data (copy most of the data) and comments (ignore the data).
1001 // --------------------------------------------------------------------------
1002 if (flag_quote && active_quote && ch != '"') {
1003 if (QTEXT(ch)) { // Include only quoted text
1004 p_token.push_back(ch);
1005 p_token_sp.push_back(ch);
1006 } // -x- if QTEXT -x-
1007 continue;
1008 } else if (comment_depth > 0 && ch != ')') { // Ignore all comment data
1009 if (_policy_keep_comments) {
1010 p_token.push_back(ch);
1011 p_token_sp.push_back(ch);
1012 } // -x- if _policy_keep_comments -x-
1013 continue;
1014 } else
1015 main_parsing_switch: switch (ch) {
1016
1017 // --------------------------------------------------------------------------
1018 // Group name ends with a colon.
1019 // --------------------------------------------------------------------------
1020 case '"': {
1021 if (!active_quote) { // Enable quotation-marks mode
1022 if (flag_quote) _exception("quotation-marks mode can't be re-opened", offset);
1023 active_quote = true;
1024 flag_quote = true;
1025 } else { // Disable quotation-marks mode
1026 active_quote = false;
1027 }
1028 continue;
1029 } // -x- case " -x-
1030
1031 // --------------------------------------------------------------------------
1032 // Group name ends with a colon.
1033 // --------------------------------------------------------------------------
1034 case ':': {
1035
1036 // --------------------------------------------------------------------------
1037 // Internal tracking.
1038 // --------------------------------------------------------------------------
1039 group_depth++;
1040
1041 // --------------------------------------------------------------------------
1042 // Add this token to the tokens vector.
1043 // --------------------------------------------------------------------------
1044 _tokens.push_back({ .type = 'g',
1045 .offset = token_begin,
1046 .len = offset - token_begin,
1047 .flag_utf8 = flag_utf8,
1048 .p_token = p_token_sp, });
1049
1050 // --------------------------------------------------------------------------
1051 // Reset and prepare internal variables for the next token.
1052 // --------------------------------------------------------------------------
1053 RESET_FOR_NEXT_TOKEN;
1054 continue;
1055
1056 } // -x- case : -x-
1057
1058 // --------------------------------------------------------------------------
1059 // Group of eMail addresses is terminated by a semi-colon.
1060 // --------------------------------------------------------------------------
1061 case ';': {
1062
1063 // --------------------------------------------------------------------------
1064 // Internal tracking.
1065 // --------------------------------------------------------------------------
1066 if (--group_depth < 0) _exception("too many group construct terminators", offset);
1067 if (active_angle) _exception("unbalanced open angle bracket", offset);
1068
1069 // --------------------------------------------------------------------------
1070 // Add this token terminator to the tokens vector.
1071 // --------------------------------------------------------------------------
1072 _tokens.push_back({ .type = ';',
1073 .offset = token_begin,
1074 .len = offset - token_begin,
1075 .flag_utf8 = flag_utf8,
1076 .p_token = p_token_sp, });
1077
1078 // --------------------------------------------------------------------------
1079 // Reset and prepare internal variables for the next token.
1080 // --------------------------------------------------------------------------
1081 RESET_FOR_NEXT_TOKEN;
1082 continue;
1083
1084 } // -x- case ; -x-
1085
1086 // --------------------------------------------------------------------------
1087 // Opening angle bracket.
1088 // --------------------------------------------------------------------------
1089 case '<': {
1090
1091 // --------------------------------------------------------------------------
1092 // Internal tracking.
1093 // --------------------------------------------------------------------------
1094 if (flag_angle) _exception("unbalanced open angle bracket", offset);
1095 active_angle = true;
1096 flag_angle = true;
1097
1098 // --------------------------------------------------------------------------
1099 // Add this token terminator to the tokens vector if a display-name exists.
1100 // --------------------------------------------------------------------------
1101 if (token_begin < offset) {
1102 last_display_name = _tokens.size();
1103 _tokens.push_back({ .type = 'n',
1104 .offset = token_begin,
1105 .len = offset - token_begin,
1106 .flag_utf8 = flag_utf8,
1107 .p_token = p_token_sp, });
1108 } // -x- if token_begin -x-
1109
1110 // --------------------------------------------------------------------------
1111 // Reset and prepare internal variables for the next token.
1112 // --------------------------------------------------------------------------
1113 RESET_FOR_NEXT_TOKEN;
1114 continue;
1115
1116 } // -x- case < -x-
1117
1118 // --------------------------------------------------------------------------
1119 // At sign ("@") delimiter.
1120 // --------------------------------------------------------------------------
1121 case '@': {
1122
1123 // --------------------------------------------------------------------------
1124 // Internal tracking.
1125 // --------------------------------------------------------------------------
1126 if (active_at_sign) _exception("too many at (\"@\") signs", offset);
1127 active_at_sign = true;
1128
1129 // --------------------------------------------------------------------------
1130 // Add this token terminator to the tokens vector if a display-name exists.
1131 // --------------------------------------------------------------------------
1132 last_local_part = _tokens.size();
1133 _tokens.push_back({ .type = 'l',
1134 .offset = token_begin,
1135 .len = offset - token_begin,
1136 .flag_utf8 = flag_utf8,
1137 .flag_angle = flag_angle,
1138 .p_token = p_token, });
1139
1140 // --------------------------------------------------------------------------
1141 // Reset and prepare internal variables for the next token.
1142 // --------------------------------------------------------------------------
1143 RESET_FOR_NEXT_TOKEN;
1144 continue;
1145
1146 } // -x- case @ -x-
1147
1148 // --------------------------------------------------------------------------
1149 // Closing angle-bracket.
1150 // --------------------------------------------------------------------------
1151 case '>': {
1152
1153 // --------------------------------------------------------------------------
1154 // Internal tracking.
1155 // --------------------------------------------------------------------------
1156 if (!active_angle) _exception("unbalanced closing angle bracket", offset);
1157 active_angle = false;
1158 goto main_parsing_email;
1159
1160 // --------------------------------------------------------------------------
1161 // Reset and prepare internal variables for the next token.
1162 // --------------------------------------------------------------------------
1163 RESET_FOR_NEXT_TOKEN;
1164 continue;
1165
1166 } // -x- case > -x-
1167
1168 // --------------------------------------------------------------------------
1169 // Comma delimiter, signifies the end of an eMail address.
1170 // --------------------------------------------------------------------------
1171 case ',': {
1172
1173 main_parsing_comma:
1174 // --------------------------------------------------------------------------
1175 // Internal tracking.
1176 // --------------------------------------------------------------------------
1177 if (active_quote) _exception("unbalanced quotation-marks", offset);
1178 if (active_angle) _exception("unbalanced open angle bracket before comma", offset);
1179
1180 main_parsing_email:
1181 // --------------------------------------------------------------------------
1182 // Add this token terminator to the tokens vector if a display-name exists.
1183 // --------------------------------------------------------------------------
1184 if (active_at_sign) { // Domain-part has been started
1185 last_domain_part = _tokens.size();
1186 _tokens.push_back({ .type = 'd',
1187 .offset = token_begin,
1188 .len = offset - token_begin,
1189 .flag_utf8 = flag_utf8,
1190 .flag_angle = _tokens[last_local_part].flag_angle,
1191 .p_token = p_token, });
1192 active_at_sign = false;
1193 } else { // Domain-part has not been started, so there's only a local-part here
1194 last_local_part = _tokens.size();
1195 _tokens.push_back({ .type = 'l',
1196 .offset = token_begin,
1197 .len = offset - token_begin,
1198 .flag_utf8 = flag_utf8,
1199 .flag_angle = flag_angle,
1200 .p_token = p_token, });
1201 } // -x- if active_at_sign -x-
1202
1203 // --------------------------------------------------------------------------
1204 // Perform a few checks to make sure we're not creating phantom addresses.
1205 // --------------------------------------------------------------------------
1206 int __email_len = last_domain_part == -1 ? _tokens[last_local_part].len : (_tokens[last_domain_part].offset - _tokens[last_local_part].offset) + _tokens[last_domain_part].len;
1207//std::cout << "__email_len=" << std::to_string(__email_len) << std::endl;
1208 if (__email_len == 0 && !flag_angle) continue;
1209//std::cout << "last_local_part=" << std::to_string(last_local_part) << std::endl;
1210//std::cout << "last_domain_part=" << std::to_string(last_domain_part) << std::endl;
1211
1212 // --------------------------------------------------------------------------
1213 // Create a token of type "e" now that this eMail address is closed.
1214 //
1215 // The reason we're calculating size based on offsets instead of by adding
1216 // sizes together (and adding 1 for the "@" sign) is that commants can be
1217 // included in the localpart portion, which normally won't be counted in any
1218 // localpart sizes.
1219 // --------------------------------------------------------------------------
1220 _index_e.push_back(_tokens.size()); // Add to index of eMail addresses (before adding to _tokens vector, _tokens.size() is the position)
1221 _tokens.push_back({ .type = 'e',
1222 .offset = _tokens[last_local_part].offset,
1223 .len = __email_len,// - token_begin,
1224 .flag_utf8 = _tokens[last_local_part].flag_utf8 || flag_utf8,
1225 .flag_angle = _tokens[last_local_part].flag_angle,
1226 .flag_null_addr = __email_len == 0,
1227 .p_token = _tokens[last_local_part].p_token + ((last_domain_part == -1 || _tokens[last_domain_part].p_token.empty()) ? u8"" : u8"@" + _tokens[last_domain_part].p_token),
1228 .index_display_name = last_display_name,
1229 .index_local_part = last_local_part,
1230 .index_domain_part = last_domain_part, });
1231 last_display_name = -1;
1232 last_local_part = -1;
1233 last_domain_part = -1;
1234 flag_angle = false;
1235
1236 // --------------------------------------------------------------------------
1237 // Reset and prepare internal variables for the next token.
1238 // --------------------------------------------------------------------------
1239 RESET_FOR_NEXT_TOKEN;
1240 continue;
1241
1242 } // -x- case , -x-
1243
1244 // --------------------------------------------------------------------------
1245 // Opening comment parenthesis.
1246 // --------------------------------------------------------------------------
1247 case '(': {
1248 comment_depth++;
1249 continue;
1250 } // -x- case ( -x-
1251
1252 // --------------------------------------------------------------------------
1253 // Closing comment parenthesis.
1254 // --------------------------------------------------------------------------
1255 case ')': {
1256 if (--comment_depth < 0) _exception("unbalanced closing comment parenthesis", offset);
1257 continue;
1258 } // -x- case ) -x-
1259
1260 // --------------------------------------------------------------------------
1261 // Backslash (quote-literal).
1262 // --------------------------------------------------------------------------
1263 case '\\': {
1264
1265 // --------------------------------------------------------------------------
1266 // Prevent a potential out-of-bounds buffer-overrun problem.
1267 // --------------------------------------------------------------------------
1268 if (++offset == len) {
1269 _exception("unbalanced quote-literal (backslash)", offset);
1270 continue; // Do this in case we're not throwing exceptions
1271 } // -x- if offset -x-
1272
1273 // --------------------------------------------------------------------------
1274 // Update to next character (whatever it is, we're taking it literally).
1275 // --------------------------------------------------------------------------
1276 ch = mailbox[offset];
1277 goto main_parsing_loop_default; // Fall-through to default
1278
1279 } // -x- case \ -x-
1280
1281 // --------------------------------------------------------------------------
1282 // All remaining characters.
1283 // --------------------------------------------------------------------------
1284 default:
1285 //if (flag_angle) _exception("additional data not permitted", offset);
1286 main_parsing_loop_default:
1287 if (ch > 127) { // Include all UTF-8 character (unless prevented by the exception)
1288 flag_utf8 = true;
1289 if (!_policy_support_utf8) _exception("UTF-8 byte encountered", offset);
1290 p_token.push_back(ch);
1291 p_token_sp.push_back(ch);
1292 } else if (CTEXT(ch) || ' ') { // Include almost everything for now (including spaces)
1293 if (ch != ' ') p_token.push_back(ch); // Exclude spaces
1294 if (!(ch == ' ' && p_token.size() == 0)) p_token_sp.push_back(ch); // Keep spaces
1295 } // -x- if ch -x-
1296
1297 } // -x- switch ch -x-
1298
1299 } while (++offset < len); // -x- do while -x-
1300
1301 // --------------------------------------------------------------------------
1302 // If the final token isn't empty (a.k.a., unfinished / not sealed), then
1303 // figure out what to do and run one more time, or else throw an exception.
1304 // --------------------------------------------------------------------------
1305 if (offset == len && token_begin < offset) {
1306 ch = ','; // Force comma (",") on parsing loop
1307 goto main_parsing_switch;
1308 } else if (offset > len && token_begin < offset) {
1309 _exception("incomplete data", offset - 1);
1310 } // -x- if offset -x-
1311 return this;
1312
1313 }; // -x- rmailaddr* set -x-
1314
1315 /*======================================================================*//**
1316 @brief
1317 Find out how many eMail addresses this object holds.
1318 @see empty
1319 @see has_any
1320 @see has_multiple
1321 @see has_one
1322 @returns The number of eMail addresses
1323 *///=========================================================================
1324 int size() { return _index_e.size(); } // -x- int size -x-
1325
1326 /*======================================================================*//**
1327 @brief
1328 Generate a detailed output of all tokens that's useful for debugging.
1329
1330 @code
1331 Types:
1332 g = group name (beginning; includes colon)
1333 ; = group termination (semi-colon character)
1334 n = display name
1335 e = eMail address (includes angle brackets, if present)
1336 l = local-part
1337 d = domain-part
1338 c = comment (not implemented)
1339 \0 = not initialized (null; regard as "unknown"; this should never happen)
1340 @endcode
1341
1342 The difference between "token" and "p_token" is that "token" is the original
1343 and [mostly] unprocessed atom, while "p_token" has been processed with any
1344 sets of angle-brackets, sets of quotation-marks, comments, and whitespace
1345 removed. In nearly all instances, the value of "p_token" is what's needed.
1346 @returns std::string containing multi-line text (one token per line)
1347 *///=========================================================================
1348 std::string tokens_to_string(
1349 /// Filter (string containing characters for those types that are to be
1350 /// included {unrecognized types will be ignored}; the default is no filter)
1351 const std::string filter = "",
1352 /// Prefix (text to insert before the beginning of each line)
1353 const std::string prefix = "",
1354 /// End-of-Line sequence (default is "\n")
1355 const std::string eol = "\n") {
1356
1357 // --------------------------------------------------------------------------
1358 // Internal variables.
1359 // --------------------------------------------------------------------------
1360 std::string t;
1361
1362 // --------------------------------------------------------------------------
1363 // Loop that builds list of tokens (one per line).
1364 // --------------------------------------------------------------------------
1365 for (int i = 0; i < _tokens.size(); i++) {
1366
1367 // --------------------------------------------------------------------------
1368 // Check filter.
1369 // --------------------------------------------------------------------------
1370 if (filter.empty() || filter.find(_tokens[i].type) != std::string::npos) {
1371
1372 // --------------------------------------------------------------------------
1373 // Shared characteristics.
1374 // --------------------------------------------------------------------------
1375 t.append(prefix + "index=" + std::to_string(i)
1376 + " type=" + _tokens[i].type
1377 + " utf8=" + (_tokens[i].flag_utf8 ? "y" : "n")
1378 + " punycode=" + (_tokens[i].flag_punycode ? "y" : "n")
1379 + " obsolete=" + (_tokens[i].flag_obsolete ? "y" : "n")
1380 + " offset=" + std::to_string(_tokens[i].offset)
1381 + " length=" + std::to_string(_tokens[i].len)
1382 + " token=" + std::string((char*)_addr.c_str()).substr(_tokens[i].offset, _tokens[i].len)
1383 + " p_token=" + (char*)_tokens[i].p_token.c_str());
1384
1385 // --------------------------------------------------------------------------
1386 // Type-specific characteristics.
1387 // --------------------------------------------------------------------------
1388 switch (_tokens[i].type) {
1389 case 'd':
1390 t.append(std::string( " fqdn=") + (_tokens[i].flag_fqdn ? "y" : "n"));
1391 break;
1392 case 'e':
1393 t.append(std::string( " angle=") + (_tokens[i].flag_angle ? "y" : "n"));
1394 // Fall-through to type "l"
1395 case 'l':
1396 t.append(std::string(" null_addr=") + (_tokens[i].flag_null_addr ? "y" : "n"));
1397 break;
1398 } // -x- switch type -x-
1399
1400 // --------------------------------------------------------------------------
1401 // Final EoL (End of Line) sequence.
1402 // --------------------------------------------------------------------------
1403 t.append(eol);
1404
1405 } // -x- if filter -x-
1406
1407 } // -x- for i -x-
1408 return t;
1409
1410 }; // -x- std::string tokens_to_string -x-
1411
1412 /*======================================================================*//**
1413 @brief
1414 Array-style access to eMail addresses. The first element is at index 0.
1415 @see get
1416 @see domain_part
1417 @see local_part
1418 @returns std::u8string with only the eMail address (no angle brackets, etc.)
1419 as a native UTF-8 string
1420 *///=========================================================================
1421 std::u8string operator[](
1422 /// Index of eMail address to query for (0 = first element; negative index
1423 /// values are calculated in reverse, starting with -1 as the final position)
1424 int index) {
1425 return _tokens[_index_e[index >= 0 ? index : _index_e.size() + index]].p_token;
1426 }; // -x- std::u8string operator[] -x-
1427
1428 /*======================================================================*//**
1429 @brief
1430 Support convenient streaming usage with std::cout, std::cerr, and friends.
1431 @returns eMail address in human-readable form
1432 *///=========================================================================
1433 friend std::ostream& operator<< (
1434 /// Output stream (provided automatically by std::cout and std::cerr)
1435 std::ostream& o,
1436 /// Object class (matched by compiler)
1437 rmailaddr const& c) { return o << (char*)c._addr.c_str(); }; // -x- std::ostream& operator<< -x-
1438
1439 }; // -x- class rmailaddr -x-
1440
1441}; // -x- namespace randolf -x-