3#include <randolf/rlabel>
12 /*======================================================================*//**
14 This @ref rhostname class provides an object-oriented internet hostname.
18 Some of the key features are:
20 - constructors with sensible defaults help to simplify coding
21 - documentation includes code samples (with @c \#include lines as needed)
22 - thread-safety is noted where it is absolutely available (or where some
24 - can handle ASCIIZ (C-strings) without needing to specify string length
25 - can handle @c std::string (which tracks its own string length)
29 Validation of the format of a hostname from a variety of angles is helpful
30 in ensuring that hostnames received from elsewhere comply with internet
35 I created this class to make it easier to write internet server daemons. I
36 also use it in my @ref rmailaddr class when extracting the @c domain-part.
40 @author Randolf Richardson
43 2023-Jan-17 v1.00 Initial version
46 Lower-case letter "h" is regularly used in partial example code to represent
47 an instantiated rhostname object.
49 An ASCIIZ string is a C-string (char* array) that includes a terminating null
50 (0) character at the end.
54 I use the term "ASCIIZ string" to indicate an array of characters that's
55 terminated by a 0 (a.k.a., null). Although this is very much the same as a
56 C-string, the difference is that in many API functions a C-string must often
57 be accompanied by its length value. When referring to an ASCIIZ string, I'm
58 intentionally indicating that the length of the string is not needed because
59 the string is null-terminated. (This term was also commonly used in assembly
60 language programming in the 1970s, 1980s, and 1990s, and as far as I know is
61 still used by machine language programmers today.)
66 #include <iostream> // std::cout, std::cerr, std::endl, etc.
67 #include <stdexcept> // std::invalid_argument exception
69 #include <randolf/rhostname>
71 int main(int argc, char *argv[]) {
73 randolf::rhostname h("www.example.com");
74 } catch (const std::invalid_argument e) {
75 std::cerr << "Hostname format exception: " << e.what() << std::endl;
77 } catch (const std::exception e) {
78 std::cerr << "Other exception: " << e.what() << std::endl;
85 Parameter stacking is supported (with methods that return @c rhostname*); in
86 this example, notice that semicolons (";") and "h." references are omittted
87 (when compared with the above):
90 #include <iostream> // std::cout, std::cerr, std::endl, etc.
91 #include <stdexcept> // std::invalid_argument exception
93 #include <randolf/rhostname>
95 int main(int argc, char *argv[]) {
97 randolf::rhostname h("www.example.com");
98 } catch (const std::invalid_argument e) {
99 std::cerr << "Hostname format exception: " << e.what() << std::endl;
101 } catch (const std::exception e) {
102 std::cerr << "Other exception: " << e.what() << std::endl;
106 } // -x- int main -x-
108 *///=========================================================================
112 // --------------------------------------------------------------------------
113 // Hostname variables.
114 // --------------------------------------------------------------------------
115 std::vector<rlabel> __labels{};
117 std::atomic_bool __fqdn = false;
118 std::atomic_bool __utf8 = false;
121 /*======================================================================*//**
123 Instantiate an empty rhostname that doesn't qualify as a properly-formatted
124 hostname (because the minimum length of a valid hostname is 1 character).
126 Instantiating an empty rhostname is particularly useful for header-file
127 definitions; for example:
129 #include <iostream> // std::cout, std::cerr, std::endl, etc.
130 #include <stdexcept> // std::invalid_argument exception
132 #include <randolf/rhostname>
134 randolf::rhostname h; // <-- Empty rhostname initialization (no exceptions)
136 int main(int argc, char *argv[]) {
138 h.set("www.example.com");
139 } catch (const std::invalid_argument e) {
140 std::cerr << "Hostname format exception: " << e.what() << std::endl;
142 } catch (const std::exception e) {
143 std::cerr << "Other exception: " << e.what() << std::endl;
147 } // -x- int main -x-
150 *///=========================================================================
151 rhostname() noexcept {}; // -x- constructor rhostname -x-
153 /*======================================================================*//**
155 Instantiate an rhostname.
160 #include <iostream> // std::cout, std::cerr, std::endl, etc.
161 #include <stdexcept> // std::invalid_argument exception
163 #include <randolf/rhostname>
165 int main(int argc, char *argv[]) {
167 randolf::rhostname h("www.example.com");
168 } catch (const std::invalid_argument e) {
169 std::cerr << "Hostname format exception: " << e.what() << std::endl;
171 } catch (const std::exception e) {
172 std::cerr << "Other exception: " << e.what() << std::endl;
176 } // -x- int main -x-
178 @throws std::invalid_argument If the hostname is improperly formatted (flags
179 can limit or eliminate conditions for throwing this exception)
180 @throws std::out_of_range if any DNS RR formatted label's size would require
181 reaching beyond the end of the hostname provided (this is called a
182 buffer overrun, which is a potential security risk that can also
183 cause a segmentation fault)
185 *///=========================================================================
187 /// The hostname as a std::string object
188 const std::string hostname,
189 /// @ref rlabel::HOSTNAME_DNS_RR Convert all labels from DNS RR format@n
190 /// @ref rlabel::HOSTNAME_UTF8 Convert all labels to raw UTF-8 format (optional)@n
191 /// @ref rlabel::HOSTNAME_XN Convert all labels to punycode format (optional)
192 const int flags = rlabel::HOSTNAME_DEFAULT) {
193 this->set(hostname.data(), hostname.size(), flags);
194 }; // -x- constructor rhostname -x-
196 /*======================================================================*//**
198 Instantiate an rhostname.
203 #include <iostream> // std::cout, std::cerr, std::endl, etc.
204 #include <stdexcept> // std::invalid_argument exception
206 #include <randolf/rhostname>
208 int main(int argc, char *argv[]) {
210 char[] example = "www.example.com";
211 randolf::rhostname h(example, 15);
212 } catch (const std::invalid_argument e) {
213 std::cerr << "Hostname format exception: " << e.what() << std::endl;
215 } catch (const std::exception e) {
216 std::cerr << "Other exception: " << e.what() << std::endl;
220 } // -x- int main -x-
222 @throws std::invalid_argument If the hostname is improperly formatted (flags
223 can limit or eliminate conditions for throwing this exception)
224 @throws std::out_of_range if any DNS RR formatted label's size would require
225 reaching beyond the end of the hostname provided (this is called a
226 buffer overrun, which is a potential security risk that can also
227 cause a segmentation fault)
229 *///=========================================================================
231 /// The hostname as a char* array
232 const char* hostname,
233 /// Length of hostname string (0 = ASCIIZ string)
234 const size_t len = 0,
235 /// @ref rlabel::HOSTNAME_DNS_RR Convert all labels from DNS RR format@n
236 /// @ref rlabel::HOSTNAME_UTF8 Convert all labels to raw UTF-8 format (optional)@n
237 /// @ref rlabel::HOSTNAME_XN Convert all labels to punycode format (optional)
238 const int flags = rlabel::HOSTNAME_DEFAULT) {
239 this->set(hostname, len, flags);
240 }; // -x- constructor rhostname -x-
242 /*======================================================================*//**
244 Clear this rhostname's underlying hostname and reset all states.
245 @returns The same rhostname object so as to facilitate stacking
246 *///=========================================================================
253 }; // -x- rhostname clear -x-
255 /*======================================================================*//**
257 Find out whether this hostname is an FQDN (it has a final dot).
259 This method is thread-safe.
260 @returns TRUE = hostname is an FQDN
261 @returns FALSE = hostanem is not an FQDN
262 *///=========================================================================
263 const bool is_fqdn() noexcept {
265 }; // -x- bool is_fqdn -x-
267 /*======================================================================*//**
269 Specify whether this hostname is an FQDN (has a final dot).
271 This method is thread-safe.
272 @returns The same rhostname object so as to facilitate stacking
273 *///=========================================================================
275 /// TRUE = ensure this rhostname is an FQDN (has a final dot)@n
276 /// FALSE = ensure this rhostname is not an FQDN (does not have a final dot)
277 const bool mode) noexcept {
280 }; // -x- rhostname is_fqdn -x-
282 /*======================================================================*//**
284 Find out whether this hostname is an internationalized internet domain name
285 (it has at least one label that has at least one UTF8/punycode character).
287 This method is thread-safe.
288 @returns TRUE = hostname is an internationalized internet domain name
289 @returns FALSE = hostanem is not an internationalized internet domain name
290 *///=========================================================================
291 const bool is_utf8() noexcept {
293 }; // -x- bool is_utf8 -x-
295 /*======================================================================*//**
297 Extract a specific label from the underlying hostname.
299 The @c index parameter begins at 0 for the first label. If @c index is a
300 negative integer, the counting begins at -1 for the last label.
305 #include <iostream> // std::cout, std::cerr, std::endl, etc.
306 #include <stdexcept> // std::invalid_argument exception
308 #include <randolf/rhostname>
310 int main(int argc, char *argv[]) {
312 randolf::rhostname h("www.example.com", 15);
313 std::cout << "Top level: " << h.label(-1) << std::endl;
314 // Output will be: Top level: com
315 } catch (const std::invalid_argument e) {
316 std::cerr << "Hostname format exception: " << e.what() << std::endl;
318 } catch (const std::exception e) {
319 std::cerr << "Other exception: " << e.what() << std::endl;
323 } // -x- int main -x-
325 @throws std::out_of_range if @c index is out of range (this is the exception
326 that the @c std::vector::at method throws)
327 @returns The specific label extracted from the hostname that this rhostname
330 *///=========================================================================
332 /// Which label to extract, with the first label beginning at index 0 (a
333 /// negative index value starts from the end with -1 as the last label)
335 /// @ref rlabel::HOSTNAME_WITHOUT_EXCEPTIONS returns an empty @c std::string (@c "")
336 /// instead of an exception being thrown@n
337 /// @ref rlabel::HOSTNAME_DNS_RR Convert label to DNS RR format@n
338 /// @ref rlabel::HOSTNAME_UTF8 Convert label to raw UTF-8 format (optional)@n
339 /// @ref rlabel::HOSTNAME_XN Convert label to punycode format (optional)
340 const int flags = rlabel::HOSTNAME_DEFAULT) {
342 // --------------------------------------------------------------------------
343 // Internal variables.
344 // --------------------------------------------------------------------------
345 const int MAX = __labels.size();
347 // --------------------------------------------------------------------------
349 // --------------------------------------------------------------------------
350 if (flags & rlabel::HOSTNAME_WITHOUT_EXCEPTIONS) {
351 if (index >= MAX ) return "";
352 if (index < -(MAX)) return "";
353 } // -x- if rlabel::HOSTNAME_WITHOUT_EXCEPTIONS -x-
355 // --------------------------------------------------------------------------
356 // Copy string contents of specific label to this internal one.
357 // --------------------------------------------------------------------------
358 return __labels.at(index >= 0 ? index : MAX + index).get(flags);
360 }; // -x- std::string label -x-
362 /*======================================================================*//**
364 Find out how many labels the underlying hostname is comprised of.
365 @returns Number of labels
366 *///=========================================================================
368 return __labels.size();
369 }; // -x- uint labels -x-
371 /*======================================================================*//**
373 Replace this rhostname's underlying hostname with a new hostname.
378 #include <iostream> // std::cout, std::cerr, std::endl, etc.
379 #include <stdexcept> // std::invalid_argument exception
381 #include <randolf/rhostname>
383 int main(int argc, char *argv[]) {
385 char[] example = "www.example.com";
386 randolf::rhostname h(example, 15);
387 h.set("mail.example.net");
388 } catch (const std::invalid_argument e) {
389 std::cerr << "Hostname format exception: " << e.what() << std::endl;
391 } catch (const std::exception e) {
392 std::cerr << "Other exception: " << e.what() << std::endl;
396 } // -x- int main -x-
398 @throws std::invalid_argument If the hostname is improperly formatted (flags
399 can limit or eliminate conditions for throwing this exception)
400 @throws std::out_of_range if any DNS RR formatted label's size would require
401 reaching beyond the end of the hostname provided (this is called a
402 buffer overrun, which is a potential security risk that can also
403 cause a segmentation fault)
404 @returns The same rhostname object so as to facilitate stacking
406 *///=========================================================================
408 /// The hostname as a std::string object
409 const std::string hostname,
410 /// @ref rlabel::HOSTNAME_DNS_RR Convert all labels from DNS RR format@n
411 /// @ref rlabel::HOSTNAME_UTF8 Convert all labels to raw UTF-8 format (optional)@n
412 /// @ref rlabel::HOSTNAME_XN Convert all labels to punycode format (optional)
413 const int flags = rlabel::HOSTNAME_DEFAULT) {
414 this->set(hostname.data(), hostname.size(), flags);
416 }; // -x- rhostname set -x-
418 /*======================================================================*//**
420 Replace this rhostname's underlying hostname with a new hostname.
425 #include <iostream> // std::cout, std::cerr, std::endl, etc.
426 #include <stdexcept> // std::invalid_argument exception
428 #include <randolf/rhostname>
430 int main(int argc, char *argv[]) {
432 randolf::rhostname h("www.example.com", 15);
433 h.set("mail.example.net");
434 } catch (const std::invalid_argument e) {
435 std::cerr << "Hostname format exception: " << e.what() << std::endl;
437 } catch (const std::exception e) {
438 std::cerr << "Other exception: " << e.what() << std::endl;
442 } // -x- int main -x-
444 @throws std::invalid_argument If the hostname is improperly formatted (flags
445 can limit or eliminate conditions for throwing this exception)
446 @throws std::out_of_range if any DNS RR formatted label's size would require
447 reaching beyond the end of the hostname provided (this is called a
448 buffer overrun, which is a potential security risk that can also
449 cause a segmentation fault)
450 @returns The same rhostname object so as to facilitate stacking
452 *///=========================================================================
454 /// The hostname as a char* array
455 const char* hostname,
456 /// Length of hostname string (0 = ASCIIZ string)
458 /// @ref rlabel::HOSTNAME_DNS_RR Convert all labels from DNS RR format@n
459 /// @ref rlabel::HOSTNAME_UTF8 Convert all labels to raw UTF-8 format (optional)@n
460 /// @ref rlabel::HOSTNAME_XN Convert all labels to punycode format (optional)
461 const int flags = rlabel::HOSTNAME_DEFAULT) {
463 // --------------------------------------------------------------------------
464 // Start over (re-initialization).
465 // --------------------------------------------------------------------------
468 // --------------------------------------------------------------------------
469 // Measure size of format string if an ASCIIZ string was indicated.
470 // --------------------------------------------------------------------------
471 if (len == 0) len = std::strlen(hostname);
473 // --------------------------------------------------------------------------
474 // Split DNS RR into labels.
475 // --------------------------------------------------------------------------
476 if (flags & rlabel::HOSTNAME_DNS_RR) {
477 for (int i = 0; i < len; i++) {
478 size_t bytes = (u_char)hostname[i];
479 if (bytes + i >= len) { // Bounds checking to prevent a buffer overrun
480 throw std::out_of_range("Hostname DNS RR label size out of range");
481 } // -x- if out-of-bounds -x-
482 __labels.push_back(rlabel(std::string(hostname + i, bytes + 1), flags));
486 } // -x- if rlabel::HOSTNAME_DNS_RR -x-
488 // --------------------------------------------------------------------------
489 // Split hostname into labels.
490 // --------------------------------------------------------------------------
493 for (i = 0; i < len; i++) {
494 if (hostname[i] == '.') {
495 __labels.push_back(rlabel(std::string(hostname + plateau, i - plateau), flags));
496 plateau = i + 1; // Advance to the next position after the period (dot delimiter)
499 if (plateau < i) __labels.push_back(rlabel(std::string(hostname + plateau, i - plateau), flags));
502 }; // -x- rhostname set -x-
504 /*======================================================================*//**
506 Convert the underlying hostname to a DNS RR and return it as an std::string.
507 @returns A new std::string with the expected data
508 *///=========================================================================
509 std::string to_dns_rr() {
510 return to_string(rlabel::HOSTNAME_DNS_RR);
511 }; // -x- std::string to_dns_rr -x-
513 /*======================================================================*//**
515 Return as an @c std::string the hostname that this @c rhostname represents.
520 #include <iostream> // std::cout, std::cerr, std::endl, etc.
521 #include <stdexcept> // std::invalid_argument exception
523 #include <randolf/rhostname>
525 int main(int argc, char *argv[]) {
527 randolf::rhostname h("www.example.com", 15);
528 std::cout << "Hostname: " << h.to_string() << std::endl;
529 } catch (const std::invalid_argument e) {
530 std::cerr << "Hostname format exception: " << e.what() << std::endl;
532 } catch (const std::exception e) {
533 std::cerr << "Other exception: " << e.what() << std::endl;
537 } // -x- int main -x-
539 @returns The hostname that this rhostname represents
541 *///=========================================================================
542 std::string to_string(
543 /// @ref rlabel::HOSTNAME_FQDN_OPT includes the trailing period in the resulting
544 /// std::string if @ref is_fqdn is TRUE@n
545 /// @ref rlabel::HOSTNAME_DNS_RR Convert all labels from DNS RR format@n
546 /// @ref rlabel::HOSTNAME_UTF8 Convert all labels to raw UTF-8 format (optional)@n
547 /// @ref rlabel::HOSTNAME_XN Convert all labels to punycode format (optional)
548 const int flags = rlabel::HOSTNAME_DEFAULT) noexcept {
550 // --------------------------------------------------------------------------
551 // Internal variables.
552 // --------------------------------------------------------------------------
553 std::string hostname;
554 const size_t MAX = __labels.size();
556 // --------------------------------------------------------------------------
557 // If this is empty, then end it now to avoid parsing ghosts from the heap.
558 // --------------------------------------------------------------------------
559 if (MAX == 0) return hostname;
561 // --------------------------------------------------------------------------
562 // If this is a DNS RR, then just build the RR and return it without an FQDN
563 // check because the final period can't be expressed in a DNS RR due to its
564 // different purpose.
565 // --------------------------------------------------------------------------
566 if (flags & rlabel::HOSTNAME_DNS_RR) {
567 for (int i = 0; i < MAX; i++) hostname.append(__labels[i].get(flags));
569 } // -x- if rlabel::HOSTNAME_DNS_RR -x-
571 // --------------------------------------------------------------------------
572 // Optimized loop to join labels into a dot-delimited hostname, by avoiding
573 // repetitive comparisons for loop position 0 to determine whether to append
574 // the period ("."), we save at least a few CPU cycles overall.
575 // --------------------------------------------------------------------------
576 hostname.append(__labels[0].get(flags)); // Add first rlabel, no matter what
577 for (int i = 1; i < MAX; i++) hostname.append(".").append(__labels[i].get(flags));
579 // --------------------------------------------------------------------------
580 // Optionally add trailing period if this is an FQDN.
581 // --------------------------------------------------------------------------
582 if ((flags & rlabel::HOSTNAME_FQDN_OPT) && __fqdn) hostname.append(".");
585 }; // -x- std::string to_string -x-
587 /*======================================================================*//**
589 Convert the underlying hostname to UTF-8 and return it as an std::string.
590 @returns A new std::string with the expected data
591 *///=========================================================================
592 std::string to_utf8() {
593 return to_string(rlabel::HOSTNAME_UTF8);
594 }; // -x- std::string to_utf8 -x-
596 /*======================================================================*//**
598 Convert the underlying hostname to punycode and return it as an std::string.
599 @returns A new std::string with the expected data
600 *///=========================================================================
601 std::string to_xn() {
602 return to_string(rlabel::HOSTNAME_XN);
603 }; // -x- std::string to_xn -x-
605 }; // -x- class rhostname -x-
607}; // -x- namespace randolf -x-