c++/docs/rmailaddr_source.html

#pragma once


#include <algorithm>

#include <atomic>

#include <cstring>

#include <stdexcept>   // std::invalid_argument

#include <vector>


#include <arpa/inet.h> // Used only for checking for valid IP addresses in domain literals (inet_pton)


namespace randolf {


  // --------------------------------------------------------------------------

  // Constants that list sets of valid characters, which are optimized to test

  // for ranges of the most commonly-used characters first during parsing, are

  // named consistently with their respective rule names as defined in RFC2822.

  //

  // CRLF and \ are invisible in the quoted string according to RFC2822 section

  // 3.2.5.

  //

  // RFC2822 section 3.2.5 also defines a "quoted-string" as containing the

  // following valid characters (spaces are also permitted):  33, 35-91, 93-126

  // Certain characters must be quoted first though, and every character

  // following a backslash is taken literally (and the backslash is removed

  // from the result).

  //

  // RFC2822 section 3.2.4 defines an "atom" as containing the following valid

  //   characters:  0123456789

  //                ABCDEFGHIJKLMNOPQRSTUVWXYZ

  //                abcdefghijklmnopqrstuvwxyz

  //                !#$%&'*+-/=?^_`{|}~

  //

  // Quote characters and quotation marks are not permitted in the domain part.

  //

  // According to RFC2822 section 3.2.5, a phrase (DisplayName / Comments) can

  // be either an atom (ATEXT) or quoted-text (QTEXT).

  //

  // According to RFC2822 section 2.2.2, whitespace characters are tabs (ASCII

  // character 9) and spaces (ASCII character 32).

  //

  // RFC2822 section 3.4 last paragraph indicates that a group construct is

  // optional, and preceeded by a colon following any number of comma-delimited

  // recipients (including zero or one).  Group constructs must end with a

  // semi-colon though.

  // --------------------------------------------------------------------------


  // --------------------------------------------------------------------------

  // The following macros are optimized for performance by testing for the most

  // commonly-used characters first.

  //

  // ATEXT

  //   94...126   ^_`abcdefghijklmnopqrstuvwxyz{|}~

  //   65...90    ABCDEFGHIJKLMNOPQRSTUVWXYZ

  //   47...57    /0123456789

  //   45 | 33    -!

  //   35...39    #$%&'

  //   42 | 43    *+

  //   61 | 63    =?

  // --------------------------------------------------------------------------

  #define ATEXT(a) ((a >= 94 && a <= 126) \

                 || (a >= 65 && a <=  90) \

                 || (a >= 47 && a <=  57) \

                 ||  a == 45 || a ==  33  \

                 || (a >= 35 && a <=  39) \

                 ||  a == 42 || a ==  43  \

                 ||  a == 61 || a ==  63  )


  // --------------------------------------------------------------------------

  // ATEXT_OBS ("obsolete standard" is ATEXT plus periods, spaces, and tabs)

  //   94...126   ^_`abcdefghijklmnopqrstuvwxyz{|}~

  //   65...90    ABCDEFGHIJKLMNOPQRSTUVWXYZ

  //   45...57    -./0123456789

  //   32 | 33    {space:32}!

  //   35...39    #$%&'

  //   42 | 43    *+

  //   61 | 63    =?

  //    9         {tab:9}

  // --------------------------------------------------------------------------

  #define ATEXT_OBS(a) ((a >= 94 && a <= 126) \

                     || (a >= 65 && a <=  90) \

                     || (a >= 45 && a <=  57) \

                     ||  a == 32 || a ==  33  \

                     || (a >= 35 && a <=  39) \

                     ||  a == 42 || a ==  43  \

                     ||  a == 61 || a ==  63  \

                     ||  a ==  9              )


  // --------------------------------------------------------------------------

  // CTEXT (comment text)

  //   93...126   ]^_`abcdefghijklmnopqrstuvwxyz{|}~

  //   42...91    *+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[

  //   33...39    !"#$%&'

  // --------------------------------------------------------------------------

  #define CTEXT(a) ((a >= 93 && a <= 126) \

                 || (a >= 42 && a <=  91) \

                 || (a >= 33 && a <=  39) )


  // --------------------------------------------------------------------------

  // CTEXT_WSP (comment text with white space)

  //   93...126   ]^_`abcdefghijklmnopqrstuvwxyz{|}~

  //   42...91    *+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[

  //   32...39    {space:32}!"#$%&'

  //    9         {tab:9}

  // --------------------------------------------------------------------------

  #define CTEXT_WSP(a) ((a >= 93 && a <= 126) \

                     || (a >= 42 && a <=  91) \

                     || (a >= 32 && a <=  39) \

                     ||  a ==  9              )


  // --------------------------------------------------------------------------

  // CTEXT_OBS (obsolete comment text)

  //   CTEXT      {CTEXT}

  //   32         {space:32}

  //    9         {tab:9}

  // --------------------------------------------------------------------------

  #define CTEXT_OBS(a) (CTEXT(a) \

                     || a == 32  \

                     || a ==  9  )


  // --------------------------------------------------------------------------

  // DTEXT (domain-part, not including characters needed for domain-literals)

  //   94...126   ^_`abcdefghijklmnopqrstuvwxyz{|}~

  //   33...90    !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ

  // --------------------------------------------------------------------------

  #define DTEXT(a) ((a >= 94 && a <= 126) \

                 || (a >= 33 && a <=  90) )


  // --------------------------------------------------------------------------

  // FWS (folding white space)

  //   10         {lf:10}

  //   13         {cr:13}

  //    9         {tab:9}

  // --------------------------------------------------------------------------

  #define FWS(a) (a == 10 \

               || a == 13 \

               || a ==  9 )


  // --------------------------------------------------------------------------

  // QTEXT (quoted text)

  //   93...126   ]^_`abcdefghijklmnopqrstuvwxyz{|}~

  //   35...91    #$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[

  //   32         {space:32}

  //   33         !

  // --------------------------------------------------------------------------

  #define QTEXT(a) ((a >= 93 && a <= 126) \

                 || (a >= 35 && a <=  91) \

                 ||  a == 32 || a ==  33  )


  // --------------------------------------------------------------------------

  // TEXT

  //   14...127   {char:14-31}{space:32}!"#$%&'()*+,-./0123456789:;<=>

  //              ?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^

  //              _`abcdefghijklmnopqrstuvwxyz{|}~

  //              {delete:127}

  //    1...9     {char:1-6}{beep:7}{backspace:8}{tab:9}

  //   11 | 12    {char:11}{char:12}

  // --------------------------------------------------------------------------

  #define TEXT(a) ((a >= 14 && a <= 127) \

                || (a >=  1 && a <=   9) \

                ||  a == 11 || a ==  12  )


  // --------------------------------------------------------------------------

  // WSP (white space)

  //   32         {space:32}

  //    9         {tab:9}

  // --------------------------------------------------------------------------

  #define WSP(a) (a == 32 \

               || a ==  9 )


  // --------------------------------------------------------------------------

  // Used by the set() method to consistently reset internal variables when

  // moving onward to the next token.

  //

  // token_begin:  Configures beginning of next token.

  // --------------------------------------------------------------------------

  #define RESET_FOR_NEXT_TOKEN   \

    flag_utf8      = false;      \

    flag_quote     = false;      \

    token_begin    = offset + 1; \

    p_token.clear();             \

    p_token_sp.clear();


  /*======================================================================*//**

  @brief

  This @ref rmailaddr class provides an object-oriented eMail address.


  @par Features


  Some of the key features are:


     - constructors with sensible defaults help to simplify coding

     - documentation includes code samples (with @c \#include lines as needed)

     - can handle ASCIIZ without needing to specify string length

     - can handle @c std::string (which tracks its own string length)


  @par Use case


  Validation of the format of an eMail address is helpful in ensuring that

  eMail addresses received from elsewhere comply with internet standards.


  @par Background


  I created this class to make it easier to write internet server daemons and

  other software that needs to accept and/or handle eMail addresses.  (This is

  a complete re-write of the version I wrote in Java 17 years ago in 2007,

  which includes a significant array of differences due to the improved parsing

  approaches I use now that are more efficient, and the need to make sure that

  UTF-8 characters and punycode are both handled in a transparent manner.)


  @par Getting started


  @author Randolf Richardson

  @version 1.00

  @par History

  2024-May-07 v1.00 Initial version


  @par Conventions

  Lower-case letter "m" is regularly used in partial example code to represent

  an instantiated rmailaddr object.


  An ASCIIZ string is a C-string (char* array) that includes a terminating null

  (0) character at the end.


  @par Notes


  I use the term "ASCIIZ string" to indicate an array of characters that's

  terminated by a 0 (a.k.a., null).  Although this is very much the same as a

  C-string, the difference is that in many API functions a C-string must often

  be accompanied by its length value.  When referring to an ASCIIZ string, I'm

  intentionally indicating that the length of the string is not needed because

  the string is null-terminated.  (This term was also commonly used in assembly

  language programming in the 1970s, 1980s, and 1990s, and as far as I know is

  still used by machine language programmers today.)


  @par Examples


  @code{.cpp}

    #include <iostream>  // std::cout, std::cerr, std::endl, etc.

    #include <stdexcept> // std::invalid_argument exception


    #include <randolf/rmailaddr>


    int main(int argc, char *argv[]) {

      try {

        randolf::rmailaddr m("nobody@example.com");

      } catch (const std::invalid_argument e) {

        std::cerr << "eMail address format exception: " << e.what() << std::endl;

        return EXIT_FAILURE;

      } catch (const std::exception e) {

        std::cerr << "Other exception: " << e.what() << std::endl;

        return EXIT_FAILURE;

      }

      return EXIT_SUCCESS;

    } // -x- int main -x-

  @endcode


  Parameter stacking is supported (with methods that return @c rmailaddr*); in

  this example, notice that semicolons (";") and "e." references are omittted

  (when compared with the above):


  @code{.cpp}

    #include <iostream>  // std::cout, std::cerr, std::endl, etc.

    #include <stdexcept> // std::invalid_argument exception


    #include <randolf/rmailaddr>


    int main(int argc, char *argv[]) {

      try {

        randolf::rmailaddr m("nobody@example.com");

      } catch (const std::invalid_argument e) {

        std::cerr << "eMail address format exception: " << e.what() << std::endl;

        return EXIT_FAILURE;

      } catch (const std::exception e) {

        std::cerr << "Other exception: " << e.what() << std::endl;

        return EXIT_FAILURE;

      }

      return EXIT_SUCCESS;

    } // -x- int main -x-

  @endcode

  *///=========================================================================

  class rmailaddr {


    public:

      /*======================================================================*//**

      @brief

      Structure of errors (only used when exceptions are disabled).

      @see errors

      @see policy_throw_exceptions

      *///=========================================================================

      struct error_data {

        /// Error message

        std::string message;

        /// Offset (0 = position of first byte)

        unsigned int  offset;

      }; // -x- struct error_data -x-


    private:

      /*======================================================================*//**

      @brief

      Structure of positions within the original eMail string where a portion

      begins, and its length (in bytes), along with various other information about

      the section.


      This is used internally, and std::vector<mail_addr_token> organizes them and

      looks after freeing memory.

      *///=========================================================================

      struct mail_addr_token {

        /// Types:

        ///    g = group name (beginning; includes colon)

        ///    ; = group termination (semi-colon character)

        ///    n = display name

        ///    e = eMail address (includes angle brackets, if present)

        ///    l = local-part

        ///    d = domain-part

        ///    c = comment

        ///   \0 = not initialized (null can effectively be regarded as meaning "unknown")

        char type                = (char)0;

        /// Offset, within the string, where this part begins

        unsigned int offset      = 0;

        /// Total number of bytes

        unsigned int len         = 0;

        /// Whether any UTF-8 characters are present in this part

        bool flag_utf8           = false;

        /// Whether this part is in punycode (begins with "xn--")

        bool flag_punycode       = false; // TODO

        /// Whether this part is "obsolete" (according to RFCs)

        bool flag_obsolete       = false; // TODO

        /// Whether eMail address was enclosed in angle brackets (type "e" only)

        bool flag_angle          = false;

        /// Whether the token was enclosed in quotation marks

        bool flag_quotes         = false;

        /// Whether eMail address is a null address enclosed in angle brackets (type "e" only)

        bool flag_null_addr      = false;

        /// Whether the domain-part is an FQDN (type "d" only)

        bool flag_fqdn           = false; // TODO

        /// Whether the domain-part is a domain-literal (type "d" only)

        bool flag_domain_literal = false; // TODO

        /// Depth of groups (types "g" and ";" only)

        unsigned short depth     = 0;

        /// Processed data, with quotation marks, angle brackets, comments, whitespace, etc., removed

        std::u8string p_token;

        /// Index to display-name (type "e" only)

        int index_display_name   = -1;

        /// Index to local-part (type "e" only)

        int index_local_part     = -1;

        /// Index to domain-part (type "e" only)

        int index_domain_part    = -1;

      }; // -x- struct mail_addr_token -x-


      // --------------------------------------------------------------------------

      // Internal variables.

      // --------------------------------------------------------------------------

      std::u8string                _addr;                            // Original eMail address

      std::vector<mail_addr_token> _tokens;                          // All eMail address tokens

      std::vector<int>             _index_e;                         // Index of type "e" records in _tokens

      std::vector<error_data>      _errors;                          // Error tracking

      short                        group_depth              = 0;     // Recursive group tracking

      bool                         angle_bracket_mode       = false; // Angle-bracket mode tracking

      bool                         quote_mode               = false; // Quotation-marks mode tracking


      // --------------------------------------------------------------------------

      // Policy variables.

      // --------------------------------------------------------------------------

      bool                         _policy_keep_comments    = false; // Wether to retain comments embedded in eMail addresses

      bool                         _policy_throw_exceptions = true;  // TRUE = throw exceptions; FALSE = save internally

      bool                         _policy_tabs_to_spaces   = false; // Whether to convert every tab into a space

      bool                         _policy_support_utf8     = true;  // Whether to support UTF-8 (FALSE = 7bit characters only)


      /*======================================================================*//**

      Exception handler.

      *///=========================================================================

      void _exception(

      /// Error message

      std::string message,

      /// Offset (0 = position of first byte)

      int offset) {

        if (_policy_throw_exceptions) throw std::invalid_argument(message + " at offset " + std::to_string(offset));

        _errors.push_back({ message, offset });

        return;

      } // -x- void _exception -x-


    public:

      /*======================================================================*//**

      @brief

      Instantiate an empty rmailaddr that doesn't qualify as a properly-formatted

      internet eMail address (because the minimum length of a valid internet eMail

      address is 1 character).


      Instantiating an empty rmailaddr is particularly useful for header-file

      definitions; for example:

      @code{.cpp}

        #include <iostream>  // std::cout, std::cerr, std::endl, etc.

        #include <stdexcept> // std::invalid_argument exception


        #include <randolf/rmailaddr>


        randolf::rmailaddr m; //   <-- Empty rmailaddr initialization (no exceptions)


        int main(int argc, char *argv[]) {

          try {

            m.set("nobody@example.com");

          } catch (const std::invalid_argument e) {

            std::cerr << "eMail address format exception: " << e.what() << std::endl;

            return EXIT_FAILURE;

          } catch (const std::exception e) {

            std::cerr << "Other exception: " << e.what() << std::endl;

            return EXIT_FAILURE;

          }

          return EXIT_SUCCESS;

        } // -x- int main -x-

      @endcode

      *///=========================================================================

      rmailaddr() noexcept {}; // -x- constructor rmailaddr -x-


      /*======================================================================*//**

      @brief

      Instantiate an rmailaddr that qualifies as a properly-formatted internet

      eMail address (if it doesn't qualify, then an exception will be thrown).


      Usage example:

      @code{.cpp}

        #include <iostream>  // std::cout, std::cerr, std::endl, etc.

        #include <stdexcept> // std::invalid_argument exception


        #include <randolf/rmailaddr>


        int main(int argc, char *argv[]) {

          try {

            randolf::rmailaddr m("nobody@example.com");

          } catch (const std::invalid_argument e) {

            std::cerr << "eMail address format exception: " << e.what() << std::endl;

            return EXIT_FAILURE;

          } catch (const std::exception e) {

            std::cerr << "Other exception: " << e.what() << std::endl;

            return EXIT_FAILURE;

          }

          return EXIT_SUCCESS;

        } // -x- int main -x-

      @endcode

      @throws std::invalid_argument describing the problem, along with the byte

        offset where the problem originated from

      @see rmailaddr

      *///=========================================================================

      rmailaddr(

      /// RFC-compliant eMail address

      const char8_t* mailbox,

      /// Number of characters (-1 = ASCIIZ string)

      int len = -1) { set(mailbox, len); }; // -x- constructor rmailaddr -x-


      /*======================================================================*//**

      @copydoc rmailaddr(const char8_t*, int)

      @see rmailaddr

      *///=========================================================================

      rmailaddr(

      /// RFC-compliant eMail address

      const char* mailbox,

      /// Number of characters (-1 = ASCIIZ string)

      int len = -1) { set((char8_t*)mailbox, len); }; // -x- constructor rmailaddr -x-


      /*======================================================================*//**

      @copydoc rmailaddr(const char8_t*, int)

      @see rmailaddr

      *///=========================================================================

      rmailaddr(

      /// RFC-compliant eMail address

      const std::string mailbox) { set((char8_t*)mailbox.data(), mailbox.size()); }; // -x- constructor rmailaddr -x-


      /*======================================================================*//**

      @copydoc rmailaddr(const char8_t*, int)

      @see rmailaddr

      *///=========================================================================

      rmailaddr(

      /// RFC-compliant eMail address

      const std::u8string mailbox) { set(mailbox.data(), mailbox.size()); }; // -x- constructor rmailaddr -x-


      /*======================================================================*//**

      @brief

      Access only the eMail address, without display-name, and without any sets of

      enclosing quotation-marks or enclosing angle-brackets, etc.

      @see display_name

      @see domain_part

      @see email

      @see local_part

      @see operator[](int)

      @throws std::out_of_range if the index is out-of-range

      @returns std::string with only the eMail address (no display-name, and no

      enclosing sets of quotation-marks or enclosing angle-brackets, etc.)

      *///=========================================================================

      std::string addr(

      /// Index of eMail address to query for (0 = first element; negative index

      /// values are calculated in reverse, starting with -1 as the final position)

      int index = 0) {

//        return std::string((char*)_tokens[_index_e[index >= 0 ? index : _index_e.size() + index]].p_token.c_str());

        return std::string((char*)_tokens.at(_index_e.at(index >= 0 ? index : _index_e.size() + index)).p_token.c_str());

      }; // -x- std::string addr -x-


      /*======================================================================*//**

      @brief

      Access an eMail address's display-name (the portion preceding the angle

      brackets).&nbsp; If there were no angle-brackets, then an empty string will

      be returned.

      @see addr

      @see domain_part

      @see email

      @see local_part

      @see operator[](int)

      @returns std::string with only the display-name (no quotation marks, etc.)

      *///=========================================================================

      std::string display_name(

      /// Index of eMail address to query for (0 = first element; negative index

      /// values are calculated in reverse, starting with -1 as the final position)

      int index = 0) {

        return std::string((char*)_tokens[_tokens[_index_e[index >= 0 ? index : _index_e.size() + index]].index_display_name].p_token.c_str());

      }; // -x- std::string display_name -x-


      /*======================================================================*//**

      @brief

      Access an eMail address's domain-part (the portion following the @c @ sign).

      @see get

      @see addr

      @see display_name

      @see email

      @see local_part

      @see operator[](int)

      @returns std::string with only the domain-part (no angle brackets, etc.)

      *///=========================================================================

      std::string domain_part(

      /// Index of eMail address to query for (0 = first element; negative index

      /// values are calculated in reverse, starting with -1 as the final position)

      int index = 0) {

        return std::string((char*)_tokens[_tokens[_index_e[index >= 0 ? index : _index_e.size() + index]].index_domain_part].p_token.c_str());

      }; // -x- std::string domain_part -x-


      /*======================================================================*//**

      @brief

      Access an eMail address (enclosed in angle-brackets), and preceded by the

      display-name (if one is available).


      @note

      If the original form of the display-name had a delimiting space before the

      eMail address, then that space will be present in the result here.  If not, a

      space will not be inserted.  (In other words, this aspect of the original

      full eMail address will be retained.)

      @see addr

      @see display_name

      @see domain_part

      @see local_part

      @see operator[](int)

      @returns std::string with display-name and eMail address (in angle-brackets)

      *///=========================================================================

      std::string email(

      /// Index of eMail address to query for (0 = first element; negative index

      /// values are calculated in reverse, starting with -1 as the final position)

      int index = 0) {


        // --------------------------------------------------------------------------

        // The eMail address has no display-name because it wasn't enclosed in angle

        // brackets, so present the eMail address on its own, in angle brackets.

        // --------------------------------------------------------------------------

        mail_addr_token e = _tokens[_index_e[index >= 0 ? index : _index_e.size() + index]];

        if (e.index_display_name < 0)

          return "<" + std::string((char*)e.p_token.c_str()) + ">";


        // --------------------------------------------------------------------------

        // There was a display-name, so return the eMail address with display-name

        // (enclosed in quotation marks if it started out that way).

        // --------------------------------------------------------------------------

        mail_addr_token n = _tokens[e.index_display_name];

        if (n.flag_quotes)

          return "\""

               + std::string((char*)n.p_token.c_str())

               + "\""

               + "<"

               + std::string((char*)e.p_token.c_str())

               + ">";


        return std::string((char*)n.p_token.c_str())

             + "<"

             + std::string((char*)e.p_token.c_str())

             + ">";

      }; // -x- std::string email -x-


      /*======================================================================*//**

      @brief

      Find out if this object doesn't hold any eMail addresses.

      @see has_any

      @see has_multiple

      @see has_one

      @see size

      @returns TRUE = no eMail addresses@n

               FALSE = one or more eMail addresses

      *///=========================================================================

      bool empty() { return _index_e.empty(); }; // -x- bool empty -x-


      /*======================================================================*//**

      @brief

      Return a list of errors that have been collected (instead of throwing

      exceptions).

      @see errors_clear

      @see policy_throw_exceptions

      @returns Vector containing @c error_data

      *///=========================================================================

      std::vector<error_data> errors() { return _errors; }; // -x- std::vector<error-data> errors -x-


      /*======================================================================*//**

      @brief

      Clear the list of errors that have been collected (instead of throwing

      exceptions).

      @see errors

      @see policy_throw_exceptions

      @returns The same rmailaddr object so as to facilitate stacking

      *///=========================================================================

      rmailaddr* errors_clear() { _errors.clear(); return this; }; // -x- rmailaddr* errors_clear -x-


      /*======================================================================*//**

      @brief

      Grade an eMail address, similar to traditional elementary school grades.  For

      simplicity, grades "a" through "c" are passes, while grades "d" through "f"

      are failures, although if less strict then "d" should also qualify as a pass.


      @code

      Ratings:

         a = Angle-brackets surrounding eMail address (optional display-name)

         b = Bare eMail address (no display-name)

         c = Complex eMail address (groups; optional angle-brackets; optional display-name)

         d = Defective (because obsolete RFC standards were utilized)

         e = Errors (only when collecting errors instead of throwing exceptions)

         f = Failure (an exception was thrown, or eMail address is blank)

      @endcode


      To test for a pass, use a comparison such as <tt>m.grade() <= 'c'</tt>

      (strict) or <tt>m.grade() <= 'd'</tt> (not strict).

      @returns Rating code

      *///=========================================================================

      char grade(

      /// eMail address index (default is 0 for the first eMail address)

      const int index = 0

      ) {

        return 'f';

      }; // -x- char grade -x-


      /*======================================================================*//**

      @brief

      Indicates whether a display-name was included with this eMail address.

      @returns TRUE = eMail address includes a display-name@n

               FALSE = eMail address has no display-name

      *///=========================================================================

      bool has_display_name(

      /// eMail address index (default is 0 for the first eMail address)

      const int index = 0

      ) {

        return _tokens[_index_e[index >= 0 ? index : _index_e.size() + index]].index_display_name != -1;

      }; // -x- bool has_display-name -x-


      /*======================================================================*//**

      @brief

      Find out whether this object holds any number of eMail addresses.  If there

      are no eMail addresses, then this method returns @c FALSE.

      @see empty

      @see has_multiple

      @see has_one

      @see size

      @returns TRUE = one or more eMail addresses@n

               FALSE = no eMail addresses

      *///=========================================================================

      bool has_any() { return _index_e.size() > 0; }; // -x- bool has_any -x-


      /*======================================================================*//**

      @brief

      Find out whether this object holds multiple eMail addresses.  If there is

      only one eMail address, or no eMail addresses at all, then this method

      returns @c FALSE.

      @see empty

      @see has_any

      @see has_one

      @see size

      @returns TRUE = two or more eMail addresses@n

               FALSE = one eMail address@n

               FALSE = no eMail addresses

      *///=========================================================================

      bool has_multiple() { return _index_e.size() > 1; }; // -x- bool has_multiple -x-


      /*======================================================================*//**

      @brief

      Find out whether this object holds exactly one eMail address.  If there are

      two or more eMail addresses, or no eMail addresses, then this method returns

      @c FALSE.

      @see empty

      @see has_any

      @see has_multiple

      @see size

      @returns TRUE = exactly one eMail address@n

               FALSE = two or more eMail addresses@n

               FALSE = no eMail addresses

      *///=========================================================================

      bool has_one() { return _index_e.size() == 1; }; // -x- bool has_one -x-


      /*======================================================================*//**

      @brief

      Find out the state of this policy.

      @see policy_keep_comments

      @returns policy status

      *///=========================================================================

      bool is_policy_keep_comments() { return _policy_keep_comments; }; // -x- bool is_policy_keep_comments -x-


      /*======================================================================*//**

      @brief

      Find out the state of this policy.

      @see policy_tabs_to_spaces

      @returns policy status

      *///=========================================================================

      bool is_policy_tabs_to_spaces() { return _policy_tabs_to_spaces; }; // -x- bool is_policy_tabs_to_spaces -x-


      /*======================================================================*//**

      @brief

      Find out the state of this policy.

      @see policy_throw_exceptions

      @returns policy status

      *///=========================================================================

      bool is_policy_throw_exceptions() { return _policy_throw_exceptions; }; // -x- bool is_policy_throw_exceptions -x-


      /*======================================================================*//**

      @brief

      Find out the state of this policy.

      @see policy_support_utf8

      @returns policy status

      *///=========================================================================

      bool is_policy_support_utf8() { return _policy_support_utf8; }; // -x- bool is_policy_support_utf8 -x-


      /*======================================================================*//**

      @brief

      Indicates whether this is just an eMail address, without any other parts such

      as display-name, group constructs, comments, etc.

      @returns TRUE = eMail address includes a display-name@n

               FALSE = eMail address has no display-name

      *///=========================================================================

      bool is_pure(

      /// indicate wither angle-brackets are okay (default is FALSE so that the

      /// meaning of the word "pure" is not tainted)

      const bool angle_flag = false,

      /// eMail address index (default is 0 for the first eMail address)

      const int index = 0

      ) {

//TODO: Finish this (we need to consider groups, display-name, comments, etc.)

        return _tokens[_index_e[index >= 0 ? index : _index_e.size() + index]].index_display_name != -1;

      }; // -x- bool is_pure -x-


      /*======================================================================*//**

      @brief

      Access an eMail address's local-part (the portion preceding the @c @ sign).

      @see addr

      @see display_name

      @see domain_part

      @see email

      @see operator[](int)

      @returns std::string with only the local-part (no angle brackets, etc.)

      *///=========================================================================

      std::string local_part(

      /// Index of eMail address to query for (0 = first element; negative index

      /// values are calculated in reverse, starting with -1 as the final position)

      int index = 0) {

        return std::string((char*)_tokens[_tokens[_index_e[index >= 0 ? index : _index_e.size() + index]].index_local_part].p_token.c_str());

      }; // -x- std::string local_part -x-


      /*======================================================================*//**

      @brief

      Sets the policy for whether to keep comments that were embedded in eMail

      address group-construct, display-name, and local-part portions.


      Comments are excluded by default because most systems don't need them, but in

      the event that they are needed (or wanted), this policy makes it possible to

      make sure they aren't excluded during processing.

      @see set

      @see is_policy_keep_comments

      @returns The same rmailaddr object so as to facilitate stacking

      *///=========================================================================

      rmailaddr* policy_keep_comments(

      /// FALSE = do not retain comments embedded in eMail addresses (deafult)@n

      /// TRUE = retain comments embedded in eMail addresses

      bool policy_flag) {


        // --------------------------------------------------------------------------

        // Update internal policy.

        // --------------------------------------------------------------------------

        _policy_keep_comments = policy_flag;


        // --------------------------------------------------------------------------

        // Return this object to facilitate stacking.

        // --------------------------------------------------------------------------

        return this;


      }; // -x- rmailaddr* policy_keep_comments -x-


      /*======================================================================*//**

      @brief

      Sets the policy for whether to support UTF-8 characters.


      Some older systems may not be able to handle 8-bit data that UTF-8 utilizes,

      in which case this policy makes it possible to easily reject incompatible

      eMail addresses before attempting to use them with such systems.

      @see set

      @see is_policy_support_utf8

      @returns The same rmailaddr object so as to facilitate stacking

      *///=========================================================================

      rmailaddr* policy_support_utf8(

      /// TRUE = support UTF-8 characters in eMail addresses (deafult)@n

      /// FALSE = do not support UTF-8 characters in eMail addresses

      bool policy_flag) {


        // --------------------------------------------------------------------------

        // Update internal policy.

        // --------------------------------------------------------------------------

        _policy_support_utf8 = policy_flag;


        // --------------------------------------------------------------------------

        // Return this object to facilitate stacking.

        // --------------------------------------------------------------------------

        return this;


      }; // -x- rmailaddr* policy_support_utf8 -x-


      /*======================================================================*//**

      @brief

      Sets the policy for whether to convert every tab character (ASCII charcter 9)

      to a space (ASCII character 32).  This conversion occurs only once when the

      eMail address is initially specified in a constructor or by way of one of the

      @ref set() methods (changing this policy after this point will not be applied

      to the current eMail address, but it will be in effect for future calls to

      any of the @ref set() methods).


      There are some situations where a tab character can create problems, such as

      when interacting with certain older software or software that makes incorrect

      assumptions about how to parse an eMail address, and this policy makes it

      easy to accomodate such situations for the tab character, which some users

      may be including by using the tab key on their keyboards.

      @see set

      @see is_policy_tabs_to_spaces

      @returns The same rmailaddr object so as to facilitate stacking

      *///=========================================================================

      rmailaddr* policy_tabs_to_spaces(

      /// TRUE = convert every tab character to a space@n

      /// FALSE = do not convert tab characters to spaces (default)

      bool policy_flag) {


        // --------------------------------------------------------------------------

        // Update internal policy.

        // --------------------------------------------------------------------------

        _policy_tabs_to_spaces = policy_flag;


        // --------------------------------------------------------------------------

        // Return this object to facilitate stacking.

        // --------------------------------------------------------------------------

        return this;


      }; // -x- rmailaddr* policy_tabs_to_spaces -x-


      /*======================================================================*//**

      @brief

      Sets the policy for whether to throw exceptions when an error is encountered.


      When this flag is set, errors are tracked internally instead of throwing any

      exceptions, and will need to be retrieved using the @ref errors() method,

      which is useful for analyzing an eMail address.  (Enabling or disabling this

      flag does not erase the errors that are stored internally; you will need to

      use the @ref errors_clear method for this.)


      @warning

      This policy is not meant for general use in the majority of applications; it

      is intended for technical analysis, which would be useful in diagnostic and

      research applications, or packet analysis applications like WireShark, or for

      advanced users who are interested in more techincal detail.

      @see errors

      @see errors_clear

      @see is_policy_throw_exceptions

      @returns The same rmailaddr object so as to facilitate stacking

      *///=========================================================================

      rmailaddr* policy_throw_exceptions(

      /// TRUE = throw exceptions (default)@n

      /// FALSE = don't throw exceptions

      bool policy_flag) {


        // --------------------------------------------------------------------------

        // Update internal policy.

        // --------------------------------------------------------------------------

        _policy_throw_exceptions = policy_flag;


        // --------------------------------------------------------------------------

        // Return this object to facilitate stacking.

        // --------------------------------------------------------------------------

        return this;


      }; // -x- rmailaddr* policy_throw_exceptions -x-


      /*======================================================================*//**

      @brief

      Set a new eMail address, resetting all internal flags, counters, and arrays

      (but not changing any existing policies).  Any existing eMail addresses will

      be cleared out.  (This method is also used internally by most of this class's

      constructors.)

      @throws std::invalid_argument describing the problem, along with the byte

        offset where the problem originated from

      @see rmailaddr

      @returns The same rmailaddr object so as to facilitate stacking

      *///=========================================================================

      rmailaddr* set(

      /// RFC-compliant eMail address

      const char* mailbox,

      /// Number of characters (-1 = ASCIIZ string)

      int         len = -1) {

        return set((char8_t*)mailbox, len);

      }; // -x- rmailaddr* set -x-


      /*======================================================================*//**

      @copydoc set(const char*, int)

      @see rmailaddr

      @returns The same rmailaddr object so as to facilitate stacking

      *///=========================================================================

      rmailaddr* set(

      /// RFC-compliant eMail address

      const std::string mailbox) {

        return set((char8_t*)mailbox.data(), mailbox.size());

      }; // -x- rmailaddr* set -x-


      /*======================================================================*//**

      @copydoc set(const char*, int)

      @see rmailaddr

      @returns The same rmailaddr object so as to facilitate stacking

      *///=========================================================================

      rmailaddr* set(

      /// RFC-compliant eMail address

      const std::u8string mailbox) {

        return set(mailbox.data(), mailbox.size());

      }; // -x- rmailaddr* set -x-


      /*======================================================================*//**

      @copydoc set(const char*, int)

      @see rmailaddr

      @returns The same rmailaddr object so as to facilitate stacking

      *///=========================================================================

      rmailaddr* set(

      /// RFC-compliant eMail address

      const char8_t* mailbox,

      /// Number of characters (-1 = ASCIIZ string)

      int            len = -1) {


        // --------------------------------------------------------------------------

        // Measure size of format string if an ASCIIZ string was indicated.

        // --------------------------------------------------------------------------

        if (len == -1) len = std::strlen((char*)mailbox);


        // --------------------------------------------------------------------------

        // Save a copy of the original eMail address.

        // --------------------------------------------------------------------------

        _addr.assign(mailbox, len);             // We need to save this for later reference


        // --------------------------------------------------------------------------

        // Pre-adjustments (optional, as per policy flags).

        // --------------------------------------------------------------------------

        if (_policy_tabs_to_spaces)             // Policy: Convert all tabs to spaces

          _addr.replace(_addr.begin(), _addr.end(), '\t', ' '); // Efficient replacement


        // --------------------------------------------------------------------------

        // Internal variables.

        // --------------------------------------------------------------------------

        int           offset            =  0;    // Offset within original mailbox char8_t[] array

        int           last_display_name = -1;    // Used to build type "e" eMail tokens

        int           last_local_part   = -1;    // Used to build type "e" eMail tokens

        int           last_domain_part  = -1;    // Used to build type "e" eMail tokens


        // --------------------------------------------------------------------------

        // Internal variables that are reset or updated together at various times,

        // such as when a token is [in most cases] completed.

        // --------------------------------------------------------------------------

        int           token_begin       = 0;     // Beginning offset within current portion of string being parsed

        char8_t       ch;                        // Character being tested (this needs to be defined outside of the main loop)

        bool          flag_utf8         = false; // UTF8 character(s) detected

        bool          flag_angle        = false; // Angle-bracket detected

        bool          flag_quote        = false; // Quotation-marks mode detected

        bool          active_angle      = false; // Angle-bracket mode is active

        bool          active_at_sign    = false; // At-sign mode is active (domain-part instead of local-part interpretation)

        bool          active_quote      = false; // Quotation-marks mode is active

        int           comment_depth     = 0;     // Comments are active when this value is greater than 0 (too many closed comments are in the negative)

        std::u8string p_token;                   // Processed token data (angle brackets, quotation marks, comments, and whitespace omitted)

        std::u8string p_token_sp;                // Processed token data (angle brackets, quotation marks, and comments omitted), with spaces preserved


        // --------------------------------------------------------------------------

        // Main parsing loop that identifies tokens and ensures compliance, and also

        // effectively pre-processes eMail addresses on-the-fly for faster access

        // from the _emails vector later.

        // --------------------------------------------------------------------------

        do {


          // --------------------------------------------------------------------------

          // Obtain next character.

          // --------------------------------------------------------------------------

          ch = mailbox[offset];


          // --------------------------------------------------------------------------

          // Compare one character at a time, but first process special cases of quoted

          // data (copy most of the data) and comments (ignore the data).

          // --------------------------------------------------------------------------

          if (flag_quote && active_quote && ch != '"') {

            if (QTEXT(ch)) { // Include only quoted text

              p_token.push_back(ch);

              p_token_sp.push_back(ch);

            } // -x- if QTEXT -x-

            continue;

          } else if (comment_depth > 0 && ch != ')') { // Ignore all comment data

            if (_policy_keep_comments) {

              p_token.push_back(ch);

              p_token_sp.push_back(ch);

            } // -x- if _policy_keep_comments -x-

            continue;

          } else

          main_parsing_switch: switch (ch) {


            // --------------------------------------------------------------------------

            // Group name ends with a colon.

            // --------------------------------------------------------------------------

            case '"': {

              if (!active_quote) { // Enable quotation-marks mode

                if (flag_quote) _exception("quotation-marks mode can't be re-opened", offset);

                active_quote = true;

                  flag_quote = true;

              } else { // Disable quotation-marks mode

                active_quote = false;

              }

              continue;

            } // -x- case " -x-


            // --------------------------------------------------------------------------

            // Group name ends with a colon.

            // --------------------------------------------------------------------------

            case ':': {


              // --------------------------------------------------------------------------

              // Internal tracking.

              // --------------------------------------------------------------------------

              group_depth++;


              // --------------------------------------------------------------------------

              // Add this token to the tokens vector.

              // --------------------------------------------------------------------------

              _tokens.push_back({ .type      = 'g',

                                  .offset    = token_begin,

                                  .len       = offset - token_begin,

                                  .flag_utf8 = flag_utf8,

                                  .p_token   = p_token_sp, });


              // --------------------------------------------------------------------------

              // Reset and prepare internal variables for the next token.

              // --------------------------------------------------------------------------

              RESET_FOR_NEXT_TOKEN;

              continue;


            } // -x- case : -x-


            // --------------------------------------------------------------------------

            // Group of eMail addresses is terminated by a semi-colon.

            // --------------------------------------------------------------------------

            case ';': {


              // --------------------------------------------------------------------------

              // Internal tracking.

              // --------------------------------------------------------------------------

              if (--group_depth < 0) _exception("too many group construct terminators", offset);

              if (active_angle) _exception("unbalanced open angle bracket", offset);


              // --------------------------------------------------------------------------

              // Add this token terminator to the tokens vector.

              // --------------------------------------------------------------------------

              _tokens.push_back({ .type      = ';',

                                  .offset    = token_begin,

                                  .len       = offset - token_begin,

                                  .flag_utf8 = flag_utf8,

                                  .p_token   = p_token_sp, });


              // --------------------------------------------------------------------------

              // Reset and prepare internal variables for the next token.

              // --------------------------------------------------------------------------

              RESET_FOR_NEXT_TOKEN;

              continue;


            } // -x- case ; -x-


            // --------------------------------------------------------------------------

            // Opening angle bracket.

            // --------------------------------------------------------------------------

            case '<': {


              // --------------------------------------------------------------------------

              // Internal tracking.

              // --------------------------------------------------------------------------

              if (flag_angle) _exception("unbalanced open angle bracket", offset);

              active_angle = true;

              flag_angle   = true;


              // --------------------------------------------------------------------------

              // Add this token terminator to the tokens vector if a display-name exists.

              // --------------------------------------------------------------------------

              if (token_begin < offset) {

                last_display_name = _tokens.size();

                _tokens.push_back({ .type      = 'n',

                                    .offset    = token_begin,

                                    .len       = offset - token_begin,

                                    .flag_utf8 = flag_utf8,

                                    .p_token   = p_token_sp, });

              } // -x- if token_begin -x-


              // --------------------------------------------------------------------------

              // Reset and prepare internal variables for the next token.

              // --------------------------------------------------------------------------

              RESET_FOR_NEXT_TOKEN;

              continue;


            } // -x- case < -x-


            // --------------------------------------------------------------------------

            // At sign ("@") delimiter.

            // --------------------------------------------------------------------------

            case '@': {


              // --------------------------------------------------------------------------

              // Internal tracking.

              // --------------------------------------------------------------------------

              if (active_at_sign) _exception("too many at (\"@\") signs", offset);

              active_at_sign = true;


              // --------------------------------------------------------------------------

              // Add this token terminator to the tokens vector if a display-name exists.

              // --------------------------------------------------------------------------

              last_local_part = _tokens.size();

              _tokens.push_back({ .type       = 'l',

                                  .offset     = token_begin,

                                  .len        = offset - token_begin,

                                  .flag_utf8  = flag_utf8,

                                  .flag_angle = flag_angle,

                                  .p_token    = p_token, });


              // --------------------------------------------------------------------------

              // Reset and prepare internal variables for the next token.

              // --------------------------------------------------------------------------

              RESET_FOR_NEXT_TOKEN;

              continue;


            } // -x- case @ -x-


            // --------------------------------------------------------------------------

            // Closing angle-bracket.

            // --------------------------------------------------------------------------

            case '>': {


              // --------------------------------------------------------------------------

              // Internal tracking.

              // --------------------------------------------------------------------------

              if (!active_angle) _exception("unbalanced closing angle bracket", offset);

              active_angle = false;

              goto main_parsing_email;


              // --------------------------------------------------------------------------

              // Reset and prepare internal variables for the next token.

              // --------------------------------------------------------------------------

              RESET_FOR_NEXT_TOKEN;

              continue;


            } // -x- case > -x-


            // --------------------------------------------------------------------------

            // Comma delimiter, signifies the end of an eMail address.

            // --------------------------------------------------------------------------

            case ',': {


              main_parsing_comma:

              // --------------------------------------------------------------------------

              // Internal tracking.

              // --------------------------------------------------------------------------

              if (active_quote) _exception("unbalanced quotation-marks", offset);

              if (active_angle) _exception("unbalanced open angle bracket before comma", offset);


              main_parsing_email:

              // --------------------------------------------------------------------------

              // Add this token terminator to the tokens vector if a display-name exists.

              // --------------------------------------------------------------------------

              if (active_at_sign) { // Domain-part has been started

                last_domain_part = _tokens.size();

                _tokens.push_back({ .type       = 'd',

                                    .offset     = token_begin,

                                    .len        = offset - token_begin,

                                    .flag_utf8  = flag_utf8,

                                    .flag_angle = _tokens[last_local_part].flag_angle,

                                    .p_token    = p_token, });

                active_at_sign   = false;

              } else { // Domain-part has not been started, so there's only a local-part here

                last_local_part  = _tokens.size();

                _tokens.push_back({ .type       = 'l',

                                    .offset     = token_begin,

                                    .len        = offset - token_begin,

                                    .flag_utf8  = flag_utf8,

                                    .flag_angle = flag_angle,

                                    .p_token    = p_token, });

              } // -x- if active_at_sign -x-


              // --------------------------------------------------------------------------

              // Perform a few checks to make sure we're not creating phantom addresses.

              // --------------------------------------------------------------------------

              int __email_len = last_domain_part == -1 ? _tokens[last_local_part].len : (_tokens[last_domain_part].offset - _tokens[last_local_part].offset) + _tokens[last_domain_part].len;

//std::cout << "__email_len=" << std::to_string(__email_len) << std::endl;

              if (__email_len == 0 && !flag_angle) continue;

//std::cout << "last_local_part=" << std::to_string(last_local_part) << std::endl;

//std::cout << "last_domain_part=" << std::to_string(last_domain_part) << std::endl;


              // --------------------------------------------------------------------------

              // Create a token of type "e" now that this eMail address is closed.

              //

              // The reason we're calculating size based on offsets instead of by adding

              // sizes together (and adding 1 for the "@" sign) is that commants can be

              // included in the localpart portion, which normally won't be counted in any

              // localpart sizes.

              // --------------------------------------------------------------------------

              _index_e.push_back(_tokens.size()); // Add to index of eMail addresses (before adding to _tokens vector, _tokens.size() is the position)

              _tokens.push_back({ .type               = 'e',

                                  .offset             = _tokens[last_local_part].offset,

                                  .len                = __email_len,// - token_begin,

                                  .flag_utf8          = _tokens[last_local_part].flag_utf8 || flag_utf8,

                                  .flag_angle         = _tokens[last_local_part].flag_angle,

                                  .flag_null_addr     = __email_len == 0,

                                  .p_token            = _tokens[last_local_part].p_token + ((last_domain_part == -1 || _tokens[last_domain_part].p_token.empty()) ? u8"" : u8"@" + _tokens[last_domain_part].p_token),

                                  .index_display_name = last_display_name,

                                  .index_local_part   = last_local_part,

                                  .index_domain_part  = last_domain_part, });

              last_display_name = -1;

              last_local_part   = -1;

              last_domain_part  = -1;

              flag_angle        = false;


              // --------------------------------------------------------------------------

              // Reset and prepare internal variables for the next token.

              // --------------------------------------------------------------------------

              RESET_FOR_NEXT_TOKEN;

              continue;


            } // -x- case , -x-


            // --------------------------------------------------------------------------

            // Opening comment parenthesis.

            // --------------------------------------------------------------------------

            case '(': {

              comment_depth++;

              continue;

            } // -x- case ( -x-


            // --------------------------------------------------------------------------

            // Closing comment parenthesis.

            // --------------------------------------------------------------------------

            case ')': {

              if (--comment_depth < 0) _exception("unbalanced closing comment parenthesis", offset);

              continue;

            } // -x- case ) -x-


            // --------------------------------------------------------------------------

            // Backslash (quote-literal).

            // --------------------------------------------------------------------------

            case '\\': {


              // --------------------------------------------------------------------------

              // Prevent a potential out-of-bounds buffer-overrun problem.

              // --------------------------------------------------------------------------

              if (++offset == len) {

                _exception("unbalanced quote-literal (backslash)", offset);

                continue; // Do this in case we're not throwing exceptions

              } // -x- if offset -x-


              // --------------------------------------------------------------------------

              // Update to next character (whatever it is, we're taking it literally).

              // --------------------------------------------------------------------------

              ch = mailbox[offset];

              goto main_parsing_loop_default; // Fall-through to default


            } // -x- case \ -x-


            // --------------------------------------------------------------------------

            // All remaining characters.

            // --------------------------------------------------------------------------

            default:

              //if (flag_angle) _exception("additional data not permitted", offset);

              main_parsing_loop_default:

              if (ch > 127) { // Include all UTF-8 character (unless prevented by the exception)

                flag_utf8 = true;

                if (!_policy_support_utf8) _exception("UTF-8 byte encountered", offset);

                p_token.push_back(ch);

                p_token_sp.push_back(ch);

              } else if (CTEXT(ch) || ' ') { // Include almost everything for now (including spaces)

                if (ch != ' ') p_token.push_back(ch); // Exclude spaces

                if (!(ch == ' ' && p_token.size() == 0)) p_token_sp.push_back(ch); // Keep spaces

              } // -x- if ch -x-


          } // -x- switch ch -x-


        } while (++offset < len); // -x- do while -x-


        // --------------------------------------------------------------------------

        // If the final token isn't empty (a.k.a., unfinished / not sealed), then

        // figure out what to do and run one more time, or else throw an exception.

        // --------------------------------------------------------------------------

        if (offset == len && token_begin < offset) {

          ch = ','; // Force comma (",") on parsing loop

          goto main_parsing_switch;

        } else if (offset > len && token_begin < offset) {

          _exception("incomplete data", offset - 1);

        } // -x- if offset -x-

        return this;


      }; // -x- rmailaddr* set -x-


      /*======================================================================*//**

      @brief

      Find out how many eMail addresses this object holds.

      @see empty

      @see has_any

      @see has_multiple

      @see has_one

      @returns The number of eMail addresses

      *///=========================================================================

      int size() { return _index_e.size(); } // -x- int size -x-


      /*======================================================================*//**

      @brief

      Generate a detailed output of all tokens that's useful for debugging.


      @code

      Types:

         g = group name (beginning; includes colon)

         ; = group termination (semi-colon character)

         n = display name

         e = eMail address (includes angle brackets, if present)

         l = local-part

         d = domain-part

         c = comment (not implemented)

        \0 = not initialized (null; regard as "unknown"; this should never happen)

      @endcode


      The difference between "token" and "p_token" is that "token" is the original

      and [mostly] unprocessed atom, while "p_token" has been processed with any

      sets of angle-brackets, sets of quotation-marks, comments, and whitespace

      removed.  In nearly all instances, the value of "p_token" is what's needed.

      @returns std::string containing multi-line text (one token per line)

      *///=========================================================================

      std::string tokens_to_string(

      /// Filter (string containing characters for those types that are to be

      /// included {unrecognized types will be ignored}; the default is no filter)

      const std::string filter = "",

      /// Prefix (text to insert before the beginning of each line)

      const std::string prefix = "",

      /// End-of-Line sequence (default is "\n")

      const std::string eol = "\n") {


        // --------------------------------------------------------------------------

        // Internal variables.

        // --------------------------------------------------------------------------

        std::string t;


        // --------------------------------------------------------------------------

        // Loop that builds list of tokens (one per line).

        // --------------------------------------------------------------------------

        for (int i = 0; i < _tokens.size(); i++) {


          // --------------------------------------------------------------------------

          // Check filter.

          // --------------------------------------------------------------------------

          if (filter.empty() || filter.find(_tokens[i].type) != std::string::npos) {


             // --------------------------------------------------------------------------

             // Shared characteristics.

             // --------------------------------------------------------------------------

             t.append(prefix + "index=" + std::to_string(i)

                       +       " type=" +  _tokens[i].type

                       +       " utf8=" + (_tokens[i].flag_utf8     ? "y" : "n")

                       +   " punycode=" + (_tokens[i].flag_punycode ? "y" : "n")

                       +   " obsolete=" + (_tokens[i].flag_obsolete ? "y" : "n")

                       +     " offset=" + std::to_string(_tokens[i].offset)

                       +     " length=" + std::to_string(_tokens[i].len)

                       +      " token=" + std::string((char*)_addr.c_str()).substr(_tokens[i].offset, _tokens[i].len)

                       +    " p_token=" + (char*)_tokens[i].p_token.c_str());


             // --------------------------------------------------------------------------

             // Type-specific characteristics.

             // --------------------------------------------------------------------------

             switch (_tokens[i].type) {

               case 'd':

                 t.append(std::string(     " fqdn=") + (_tokens[i].flag_fqdn      ? "y" : "n"));

                 break;

               case 'e':

                 t.append(std::string(    " angle=") + (_tokens[i].flag_angle     ? "y" : "n"));

                 // Fall-through to type "l"

               case 'l':

                 t.append(std::string(" null_addr=") + (_tokens[i].flag_null_addr ? "y" : "n"));

                 break;

             } // -x- switch type -x-


             // --------------------------------------------------------------------------

             // Final EoL (End of Line) sequence.

             // --------------------------------------------------------------------------

             t.append(eol);


          } // -x- if filter -x-


        } // -x- for i -x-

        return t;


      }; // -x- std::string tokens_to_string -x-


      /*======================================================================*//**

      @brief

      Array-style access to eMail addresses.  The first element is at index 0.

      @see get

      @see domain_part

      @see local_part

      @returns std::u8string with only the eMail address (no angle brackets, etc.)

        as a native UTF-8 string

      *///=========================================================================

      std::u8string operator[](

      /// Index of eMail address to query for (0 = first element; negative index

      /// values are calculated in reverse, starting with -1 as the final position)

      int index) {

        return _tokens[_index_e[index >= 0 ? index : _index_e.size() + index]].p_token;

      }; // -x- std::u8string operator[] -x-


      /*======================================================================*//**

      @brief

      Support convenient streaming usage with std::cout, std::cerr, and friends.

      @returns eMail address in human-readable form

      *///=========================================================================

      friend std::ostream& operator<< (

      /// Output stream (provided automatically by std::cout and std::cerr)

      std::ostream& o,

      /// Object class (matched by compiler)

      rmailaddr const& c) { return o << (char*)c._addr.c_str(); }; // -x- std::ostream& operator<< -x-


  }; // -x- class rmailaddr -x-


}; // -x- namespace randolf -x-