3#include <randolf/rlabel>
12 /*======================================================================*//**
14 This @ref rhostname class provides an object-oriented internet hostname.
18 Some of the key features are:
20 - constructors with sensible defaults help to simplify coding
21 - documentation includes code samples (with @c \#include lines as needed)
22 - thread-safety is noted where it is absolutely available (or where some
24 - can handle ASCIIZ (C-strings) without needing to specify string length
25 - can handle @c std::string (which tracks its own string length)
29 Validation of the format of a hostname from a variety of angles is helpful
30 in ensuring that hostnames received from elsewhere comply with internet
35 I created this class to make it easier to write internet server daemons. I
36 also use it in my @ref rmailaddr class when extracting the @c domain-part.
40 @author Randolf Richardson
43 - 2023-Jan-17 v1.00 Initial version
44 - 2025-Feb-03 v1.00 Increased use of references and pointers
47 Lower-case letter "h" is regularly used in partial example code to represent
48 an instantiated rhostname object.
50 An ASCIIZ string is a C-string (char* array) that includes a terminating null
51 (0) character at the end.
55 I use the term "ASCIIZ string" to indicate an array of characters that's
56 terminated by a 0 (a.k.a., null). Although this is very much the same as a
57 C-string, the difference is that in many API functions a C-string must often
58 be accompanied by its length value. When referring to an ASCIIZ string, I'm
59 intentionally indicating that the length of the string is not needed because
60 the string is null-terminated. (This term was also commonly used in assembly
61 language programming in the 1970s, 1980s, and 1990s, and as far as I know is
62 still used by machine language programmers today.)
67 #include <iostream> // std::cout, std::cerr, std::endl, etc.
68 #include <stdexcept> // std::invalid_argument exception
70 #include <randolf/rhostname>
72 int main(int argc, char *argv[]) {
74 randolf::rhostname h("www.example.com");
75 } catch (const std::invalid_argument e) {
76 std::cerr << "Hostname format exception: " << e.what() << std::endl;
78 } catch (const std::exception e) {
79 std::cerr << "Other exception: " << e.what() << std::endl;
86 Parameter stacking is supported (with methods that return @c rhostname*); in
87 this example, notice that semicolons (";") and "h." references are omittted
88 (when compared with the above):
91 #include <iostream> // std::cout, std::cerr, std::endl, etc.
92 #include <stdexcept> // std::invalid_argument exception
94 #include <randolf/rhostname>
96 int main(int argc, char *argv[]) {
98 randolf::rhostname h("www.example.com");
99 } catch (const std::invalid_argument e) {
100 std::cerr << "Hostname format exception: " << e.what() << std::endl;
102 } catch (const std::exception e) {
103 std::cerr << "Other exception: " << e.what() << std::endl;
107 } // -x- int main -x-
109 *///=========================================================================
113 // --------------------------------------------------------------------------
114 // Hostname variables.
115 // --------------------------------------------------------------------------
116 std::vector<rlabel>* __labels = new std::vector<rlabel>();
118 std::atomic_bool __fqdn = false;
119 std::atomic_bool __utf8 = false;
122 /*======================================================================*//**
124 Instantiate an empty rhostname that doesn't qualify as a properly-formatted
125 hostname (because the minimum length of a valid hostname is 1 character).
127 Instantiating an empty rhostname is particularly useful for header-file
128 definitions; for example:
130 #include <iostream> // std::cout, std::cerr, std::endl, etc.
131 #include <stdexcept> // std::invalid_argument exception
133 #include <randolf/rhostname>
135 randolf::rhostname h; // <-- Empty rhostname initialization (no exceptions)
137 int main(int argc, char *argv[]) {
139 h.set("www.example.com");
140 } catch (const std::invalid_argument e) {
141 std::cerr << "Hostname format exception: " << e.what() << std::endl;
143 } catch (const std::exception e) {
144 std::cerr << "Other exception: " << e.what() << std::endl;
148 } // -x- int main -x-
151 *///=========================================================================
152 rhostname() noexcept {} // -x- constructor rhostname -x-
154 /*======================================================================*//**
156 Instantiate an rhostname.
161 #include <iostream> // std::cout, std::cerr, std::endl, etc.
162 #include <stdexcept> // std::invalid_argument exception
164 #include <randolf/rhostname>
166 int main(int argc, char *argv[]) {
168 randolf::rhostname h("www.example.com");
169 } catch (const std::invalid_argument e) {
170 std::cerr << "Hostname format exception: " << e.what() << std::endl;
172 } catch (const std::exception e) {
173 std::cerr << "Other exception: " << e.what() << std::endl;
177 } // -x- int main -x-
179 @throws std::invalid_argument If the hostname is improperly formatted (flags
180 can limit or eliminate conditions for throwing this exception)
181 @throws std::out_of_range if any DNS RR formatted label's size would require
182 reaching beyond the end of the hostname provided (this is called a
183 buffer overrun, which is a potential security risk that can also
184 cause a segmentation fault)
186 *///=========================================================================
188 /// The hostname as a std::string object
189 const std::string& hostname,
190 /// @ref rlabel::HOSTNAME_DNS_RR Convert all labels from DNS RR format@n
191 /// @ref rlabel::HOSTNAME_UTF8 Convert all labels to raw UTF-8 format (optional)@n
192 /// @ref rlabel::HOSTNAME_XN Convert all labels to punycode format (optional)
193 const int flags = rlabel::HOSTNAME_DEFAULT) {
194 this->set(hostname.data(), hostname.size(), flags);
195 } // -x- constructor rhostname -x-
197 /*======================================================================*//**
199 Instantiate an rhostname.
204 #include <iostream> // std::cout, std::cerr, std::endl, etc.
205 #include <stdexcept> // std::invalid_argument exception
207 #include <randolf/rhostname>
209 int main(int argc, char *argv[]) {
211 char[] example = "www.example.com";
212 randolf::rhostname h(example, 15);
213 } catch (const std::invalid_argument e) {
214 std::cerr << "Hostname format exception: " << e.what() << std::endl;
216 } catch (const std::exception e) {
217 std::cerr << "Other exception: " << e.what() << std::endl;
221 } // -x- int main -x-
223 @throws std::invalid_argument If the hostname is improperly formatted (flags
224 can limit or eliminate conditions for throwing this exception)
225 @throws std::out_of_range if any DNS RR formatted label's size would require
226 reaching beyond the end of the hostname provided (this is called a
227 buffer overrun, which is a potential security risk that can also
228 cause a segmentation fault)
230 *///=========================================================================
232 /// The hostname as a char* array
233 const char* hostname,
234 /// Length of hostname string (0 = ASCIIZ string)
235 const size_t len = 0,
236 /// @ref rlabel::HOSTNAME_DNS_RR Convert all labels from DNS RR format@n
237 /// @ref rlabel::HOSTNAME_UTF8 Convert all labels to raw UTF-8 format (optional)@n
238 /// @ref rlabel::HOSTNAME_XN Convert all labels to punycode format (optional)
239 const int flags = rlabel::HOSTNAME_DEFAULT) {
240 this->set(hostname, len, flags);
241 } // -x- constructor rhostname -x-
243 /*======================================================================*//**
245 *///=========================================================================
246 ~rhostname() noexcept {
247 delete __labels; // Memory management
248 } // -x- destructor rhostname -x-
250 /*======================================================================*//**
252 Clear this rhostname's underlying hostname and reset all states.
253 @returns The same rhostname object so as to facilitate stacking
254 *///=========================================================================
261 } // -x- rhostname& clear -x-
263 /*======================================================================*//**
265 Find out whether this hostname is an FQDN (it has a final dot).
267 This method is thread-safe.
268 @returns TRUE = hostname is an FQDN
269 @returns FALSE = hostanem is not an FQDN
270 *///=========================================================================
271 const bool is_fqdn() noexcept {
273 } // -x- bool is_fqdn -x-
275 /*======================================================================*//**
277 Specify whether this hostname is an FQDN (has a final dot).
279 This method is thread-safe.
280 @returns The same rhostname object so as to facilitate stacking
281 *///=========================================================================
283 /// TRUE = ensure this rhostname is an FQDN (has a final dot)@n
284 /// FALSE = ensure this rhostname is not an FQDN (does not have a final dot)
285 const bool mode) noexcept {
288 }; // -x- rhostname& fqdn -x-
290 /*======================================================================*//**
292 Find out whether this hostname is an internationalized internet domain name
293 (it has at least one label that has at least one UTF8/punycode character).
295 This method is thread-safe.
296 @returns TRUE = hostname is an internationalized internet domain name
297 @returns FALSE = hostanem is not an internationalized internet domain name
298 *///=========================================================================
299 const bool is_utf8() noexcept {
301 }; // -x- bool is_utf8 -x-
303 /*======================================================================*//**
305 Extract a specific label from the underlying hostname.
307 The @c index parameter begins at 0 for the first label. If @c index is a
308 negative integer, the counting begins at -1 for the last label.
313 #include <iostream> // std::cout, std::cerr, std::endl, etc.
314 #include <stdexcept> // std::invalid_argument exception
316 #include <randolf/rhostname>
318 int main(int argc, char *argv[]) {
320 randolf::rhostname h("www.example.com", 15);
321 std::cout << "Top level: " << h.label(-1) << std::endl;
322 // Output will be: Top level: com
323 } catch (const std::invalid_argument e) {
324 std::cerr << "Hostname format exception: " << e.what() << std::endl;
326 } catch (const std::exception e) {
327 std::cerr << "Other exception: " << e.what() << std::endl;
331 } // -x- int main -x-
333 @throws std::out_of_range if @c index is out of range (this is the exception
334 that the @c std::vector::at method throws)
335 @returns The specific label extracted from the hostname that this rhostname
338 *///=========================================================================
340 /// Which label to extract, with the first label beginning at index 0 (a
341 /// negative index value starts from the end with -1 as the last label)
343 /// @ref rlabel::HOSTNAME_WITHOUT_EXCEPTIONS returns an empty @c std::string (@c "")
344 /// instead of an exception being thrown@n
345 /// @ref rlabel::HOSTNAME_DNS_RR Convert label to DNS RR format@n
346 /// @ref rlabel::HOSTNAME_UTF8 Convert label to raw UTF-8 format (optional)@n
347 /// @ref rlabel::HOSTNAME_XN Convert label to punycode format (optional)
348 const int flags = rlabel::HOSTNAME_DEFAULT) {
350 // --------------------------------------------------------------------------
351 // Internal variables.
352 // --------------------------------------------------------------------------
353 const int MAX = __labels->size();
355 // --------------------------------------------------------------------------
357 // --------------------------------------------------------------------------
358 if (flags & rlabel::HOSTNAME_WITHOUT_EXCEPTIONS) {
359 if (index >= MAX ) return "";
360 if (index < -(MAX)) return "";
361 } // -x- if rlabel::HOSTNAME_WITHOUT_EXCEPTIONS -x-
363 // --------------------------------------------------------------------------
364 // Copy string contents of specific label to this internal one.
365 // --------------------------------------------------------------------------
366 return __labels->at(index >= 0 ? index : MAX + index).get(flags);
368 } // -x- std::string label -x-
370 /*======================================================================*//**
372 Find out how many labels the underlying hostname is comprised of.
373 @returns Number of labels
374 *///=========================================================================
376 return __labels->size();
377 } // -x- uint labels -x-
379 /*======================================================================*//**
381 Replace this rhostname's underlying hostname with a new hostname.
386 #include <iostream> // std::cout, std::cerr, std::endl, etc.
387 #include <stdexcept> // std::invalid_argument exception
389 #include <randolf/rhostname>
391 int main(int argc, char *argv[]) {
393 char[] example = "www.example.com";
394 randolf::rhostname h(example, 15);
395 h.set("mail.example.net");
396 } catch (const std::invalid_argument e) {
397 std::cerr << "Hostname format exception: " << e.what() << std::endl;
399 } catch (const std::exception e) {
400 std::cerr << "Other exception: " << e.what() << std::endl;
404 } // -x- int main -x-
406 @throws std::invalid_argument If the hostname is improperly formatted (flags
407 can limit or eliminate conditions for throwing this exception)
408 @throws std::out_of_range if any DNS RR formatted label's size would require
409 reaching beyond the end of the hostname provided (this is called a
410 buffer overrun, which is a potential security risk that can also
411 cause a segmentation fault)
412 @returns The same rhostname object so as to facilitate stacking
414 *///=========================================================================
416 /// The hostname as a std::string object
417 const std::string& hostname,
418 /// @ref rlabel::HOSTNAME_DNS_RR Convert all labels from DNS RR format@n
419 /// @ref rlabel::HOSTNAME_UTF8 Convert all labels to raw UTF-8 format (optional)@n
420 /// @ref rlabel::HOSTNAME_XN Convert all labels to punycode format (optional)
421 const int flags = rlabel::HOSTNAME_DEFAULT) {
422 this->set(hostname.data(), hostname.size(), flags);
424 } // -x- rhostname& set -x-
426 /*======================================================================*//**
428 Replace this rhostname's underlying hostname with a new hostname.
433 #include <iostream> // std::cout, std::cerr, std::endl, etc.
434 #include <stdexcept> // std::invalid_argument exception
436 #include <randolf/rhostname>
438 int main(int argc, char *argv[]) {
440 randolf::rhostname h("www.example.com", 15);
441 h.set("mail.example.net");
442 } catch (const std::invalid_argument e) {
443 std::cerr << "Hostname format exception: " << e.what() << std::endl;
445 } catch (const std::exception e) {
446 std::cerr << "Other exception: " << e.what() << std::endl;
450 } // -x- int main -x-
452 @throws std::invalid_argument If the hostname is improperly formatted (flags
453 can limit or eliminate conditions for throwing this exception)
454 @throws std::out_of_range if any DNS RR formatted label's size would require
455 reaching beyond the end of the hostname provided (this is called a
456 buffer overrun, which is a potential security risk that can also
457 cause a segmentation fault)
458 @returns The same rhostname object so as to facilitate stacking
460 *///=========================================================================
462 /// The hostname as a char* array
463 const char* hostname,
464 /// Length of hostname string (0 = ASCIIZ string)
466 /// @ref rlabel::HOSTNAME_DNS_RR Convert all labels from DNS RR format@n
467 /// @ref rlabel::HOSTNAME_UTF8 Convert all labels to raw UTF-8 format (optional)@n
468 /// @ref rlabel::HOSTNAME_XN Convert all labels to punycode format (optional)
469 const int flags = rlabel::HOSTNAME_DEFAULT) {
471 // --------------------------------------------------------------------------
472 // Start over (re-initialization).
473 // --------------------------------------------------------------------------
476 // --------------------------------------------------------------------------
477 // Measure size of format string if an ASCIIZ string was indicated.
478 // --------------------------------------------------------------------------
479 if (len == 0) len = std::strlen(hostname);
481 // --------------------------------------------------------------------------
482 // Split DNS RR into labels.
483 // --------------------------------------------------------------------------
484 if (flags & rlabel::HOSTNAME_DNS_RR) {
485 for (int i = 0; i < len; i++) {
486 size_t bytes = (u_char)hostname[i]; // First character of an RR lable is the length
487 if (bytes + i >= len) { // Bounds checking to prevent a buffer overrun
488 throw std::out_of_range("Hostname DNS RR label size out of range");
489 } // -x- if out-of-bounds -x-
490 __labels->push_back(rlabel(std::string(hostname + i, bytes + 1), flags));
494 } // -x- if rlabel::HOSTNAME_DNS_RR -x-
496 // --------------------------------------------------------------------------
497 // Split hostname into labels.
498 // --------------------------------------------------------------------------
501 for (i = 0; i < len; i++) {
502 if (hostname[i] == '.') {
503 __labels->push_back(rlabel(std::string(hostname + plateau, i - plateau), flags));
504 plateau = i + 1; // Advance to the next position after the period (dot delimiter)
507 if (plateau < i) __labels->push_back(rlabel(std::string(hostname + plateau, i - plateau), flags));
510 } // -x- rhostname& set -x-
512 /*======================================================================*//**
514 Convert the underlying hostname to a DNS RR and return it as an std::string.
515 @returns A new std::string with the expected data
516 *///=========================================================================
517 std::string to_dns_rr() {
518 return to_string(rlabel::HOSTNAME_DNS_RR);
519 } // -x- std::string to_dns_rr -x-
521 /*======================================================================*//**
523 Return as an @c std::string the hostname that this @c rhostname represents.
528 #include <iostream> // std::cout, std::cerr, std::endl, etc.
529 #include <stdexcept> // std::invalid_argument exception
531 #include <randolf/rhostname>
533 int main(int argc, char *argv[]) {
535 randolf::rhostname h("www.example.com", 15);
536 std::cout << "Hostname: " << h.to_string() << std::endl;
537 } catch (const std::invalid_argument e) {
538 std::cerr << "Hostname format exception: " << e.what() << std::endl;
540 } catch (const std::exception e) {
541 std::cerr << "Other exception: " << e.what() << std::endl;
545 } // -x- int main -x-
547 @returns The hostname that this rhostname represents
549 *///=========================================================================
550 std::string to_string(
551 /// @ref rlabel::HOSTNAME_FQDN_OPT includes the trailing period in the resulting
552 /// std::string if @ref is_fqdn is TRUE@n
553 /// @ref rlabel::HOSTNAME_DNS_RR Convert all labels from DNS RR format@n
554 /// @ref rlabel::HOSTNAME_UTF8 Convert all labels to raw UTF-8 format (optional)@n
555 /// @ref rlabel::HOSTNAME_XN Convert all labels to punycode format (optional)
556 const int flags = rlabel::HOSTNAME_DEFAULT) noexcept {
558 // --------------------------------------------------------------------------
559 // Internal variables.
560 // --------------------------------------------------------------------------
561 std::string hostname;
562 const size_t MAX = __labels->size();
564 // --------------------------------------------------------------------------
565 // If this is empty, then end it now to avoid parsing ghosts from the heap.
566 // --------------------------------------------------------------------------
567 if (MAX == 0) return hostname;
569 // --------------------------------------------------------------------------
570 // If this is a DNS RR, then just build the RR and return it without an FQDN
571 // check because the final period can't be expressed in a DNS RR due to its
572 // different purpose.
573 // --------------------------------------------------------------------------
574 if (flags & rlabel::HOSTNAME_DNS_RR) {
575 for (int i = 0; i < MAX; i++) hostname.append(__labels->at(i).get(flags));
577 } // -x- if rlabel::HOSTNAME_DNS_RR -x-
579 // --------------------------------------------------------------------------
580 // Optimized loop to join labels into a dot-delimited hostname, by avoiding
581 // repetitive comparisons for loop position 0 to determine whether to append
582 // the period ("."), we save at least a few CPU cycles overall.
583 // --------------------------------------------------------------------------
584 hostname.append(__labels->at(0).get(flags)); // Add first rlabel, no matter what
585 for (int i = 1; i < MAX; i++) hostname.append(".").append(__labels->at(i).get(flags));
587 // --------------------------------------------------------------------------
588 // Optionally add trailing period if this is an FQDN.
589 // --------------------------------------------------------------------------
590 if ((flags & rlabel::HOSTNAME_FQDN_OPT) && __fqdn) hostname.append(".");
593 } // -x- std::string to_string -x-
595 /*======================================================================*//**
597 Convert the underlying hostname to UTF-8 and return it as an std::string.
598 @returns A new std::string with the expected data
599 *///=========================================================================
600 std::string to_utf8() {
601 return to_string(rlabel::HOSTNAME_UTF8);
602 } // -x- std::string to_utf8 -x-
604 /*======================================================================*//**
606 Convert the underlying hostname to punycode and return it as an std::string.
607 @returns A new std::string with the expected data
608 *///=========================================================================
609 std::string to_xn() {
610 return to_string(rlabel::HOSTNAME_XN);
611 } // -x- std::string to_xn -x-
613 }; // -x- class rhostname -x-
615}; // -x- namespace randolf -x-