randolf.ca  1.00
Randolf Richardson's C++ classes
Loading...
Searching...
No Matches
rhostname
1#pragma once
2
3#include <randolf/rlabel>
4
5#include <algorithm>
6#include <atomic>
7#include <cstring>
8#include <vector>
9
10namespace randolf {
11
12 /*======================================================================*//**
13 @brief
14 This @ref rhostname class provides an object-oriented internet hostname.
15
16 @par Features
17
18 Some of the key features are:
19
20 - constructors with sensible defaults help to simplify coding
21 - documentation includes code samples (with @c \#include lines as needed)
22 - thread-safety is noted where it is absolutely available (or where some
23 caution is warranted)
24 - can handle ASCIIZ (C-strings) without needing to specify string length
25 - can handle @c std::string (which tracks its own string length)
26
27 @par Use case
28
29 Validation of the format of a hostname from a variety of angles is helpful
30 in ensuring that hostnames received from elsewhere comply with internet
31 standards.
32
33 @par Background
34
35 I created this class to make it easier to write internet server daemons. I
36 also use it in my @ref rmailaddr class when extracting the @c domain-part.
37
38 @par Getting started
39
40 @author Randolf Richardson
41 @version 1.00
42 @par History
43 - 2023-Jan-17 v1.00 Initial version
44 - 2025-Feb-03 v1.00 Increased use of references and pointers
45
46 @par Conventions
47 Lower-case letter "h" is regularly used in partial example code to represent
48 an instantiated rhostname object.
49
50 An ASCIIZ string is a C-string (char* array) that includes a terminating null
51 (0) character at the end.
52
53 @par Notes
54
55 I use the term "ASCIIZ string" to indicate an array of characters that's
56 terminated by a 0 (a.k.a., null). Although this is very much the same as a
57 C-string, the difference is that in many API functions a C-string must often
58 be accompanied by its length value. When referring to an ASCIIZ string, I'm
59 intentionally indicating that the length of the string is not needed because
60 the string is null-terminated. (This term was also commonly used in assembly
61 language programming in the 1970s, 1980s, and 1990s, and as far as I know is
62 still used by machine language programmers today.)
63
64 @par Examples
65
66 @code{.cpp}
67 #include <iostream> // std::cout, std::cerr, std::endl, etc.
68 #include <stdexcept> // std::invalid_argument exception
69
70 #include <randolf/rhostname>
71
72 int main(int argc, char *argv[]) {
73 try {
74 randolf::rhostname h("www.example.com");
75 } catch (const std::invalid_argument e) {
76 std::cerr << "Hostname format exception: " << e.what() << std::endl;
77 return EXIT_FAILURE;
78 } catch (const std::exception e) {
79 std::cerr << "Other exception: " << e.what() << std::endl;
80 return EXIT_FAILURE;
81 }
82 return EXIT_SUCCESS;
83 } // -x- int main -x-
84 @endcode
85
86 Parameter stacking is supported (with methods that return @c rhostname*); in
87 this example, notice that semicolons (";") and "h." references are omittted
88 (when compared with the above):
89
90 @code{.cpp}
91 #include <iostream> // std::cout, std::cerr, std::endl, etc.
92 #include <stdexcept> // std::invalid_argument exception
93
94 #include <randolf/rhostname>
95
96 int main(int argc, char *argv[]) {
97 try {
98 randolf::rhostname h("www.example.com");
99 } catch (const std::invalid_argument e) {
100 std::cerr << "Hostname format exception: " << e.what() << std::endl;
101 return EXIT_FAILURE;
102 } catch (const std::exception e) {
103 std::cerr << "Other exception: " << e.what() << std::endl;
104 return EXIT_FAILURE;
105 }
106 return EXIT_SUCCESS;
107 } // -x- int main -x-
108 @endcode
109 *///=========================================================================
110 class rhostname {
111
112 private:
113 // --------------------------------------------------------------------------
114 // Hostname variables.
115 // --------------------------------------------------------------------------
116 std::vector<rlabel>* __labels = new std::vector<rlabel>();
117 int __flags = 0;
118 std::atomic_bool __fqdn = false;
119 std::atomic_bool __utf8 = false;
120
121 public:
122 /*======================================================================*//**
123 @brief
124 Instantiate an empty rhostname that doesn't qualify as a properly-formatted
125 hostname (because the minimum length of a valid hostname is 1 character).
126
127 Instantiating an empty rhostname is particularly useful for header-file
128 definitions; for example:
129 @code{.cpp}
130 #include <iostream> // std::cout, std::cerr, std::endl, etc.
131 #include <stdexcept> // std::invalid_argument exception
132
133 #include <randolf/rhostname>
134
135 randolf::rhostname h; // <-- Empty rhostname initialization (no exceptions)
136
137 int main(int argc, char *argv[]) {
138 try {
139 h.set("www.example.com");
140 } catch (const std::invalid_argument e) {
141 std::cerr << "Hostname format exception: " << e.what() << std::endl;
142 return EXIT_FAILURE;
143 } catch (const std::exception e) {
144 std::cerr << "Other exception: " << e.what() << std::endl;
145 return EXIT_FAILURE;
146 }
147 return EXIT_SUCCESS;
148 } // -x- int main -x-
149 @endcode
150 @see hostname
151 *///=========================================================================
152 rhostname() noexcept {} // -x- constructor rhostname -x-
153
154 /*======================================================================*//**
155 @brief
156 Instantiate an rhostname.
157
158 @par Examples
159
160 @code{.cpp}
161 #include <iostream> // std::cout, std::cerr, std::endl, etc.
162 #include <stdexcept> // std::invalid_argument exception
163
164 #include <randolf/rhostname>
165
166 int main(int argc, char *argv[]) {
167 try {
168 randolf::rhostname h("www.example.com");
169 } catch (const std::invalid_argument e) {
170 std::cerr << "Hostname format exception: " << e.what() << std::endl;
171 return EXIT_FAILURE;
172 } catch (const std::exception e) {
173 std::cerr << "Other exception: " << e.what() << std::endl;
174 return EXIT_FAILURE;
175 }
176 return EXIT_SUCCESS;
177 } // -x- int main -x-
178 @endcode
179 @throws std::invalid_argument If the hostname is improperly formatted (flags
180 can limit or eliminate conditions for throwing this exception)
181 @throws std::out_of_range if any DNS RR formatted label's size would require
182 reaching beyond the end of the hostname provided (this is called a
183 buffer overrun, which is a potential security risk that can also
184 cause a segmentation fault)
185 @see set()
186 *///=========================================================================
187 rhostname(
188 /// The hostname as a std::string object
189 const std::string& hostname,
190 /// @ref rlabel::HOSTNAME_DNS_RR Convert all labels from DNS RR format@n
191 /// @ref rlabel::HOSTNAME_UTF8 Convert all labels to raw UTF-8 format (optional)@n
192 /// @ref rlabel::HOSTNAME_XN Convert all labels to punycode format (optional)
193 const int flags = rlabel::HOSTNAME_DEFAULT) {
194 this->set(hostname.data(), hostname.size(), flags);
195 } // -x- constructor rhostname -x-
196
197 /*======================================================================*//**
198 @brief
199 Instantiate an rhostname.
200
201 @par Examples
202
203 @code{.cpp}
204 #include <iostream> // std::cout, std::cerr, std::endl, etc.
205 #include <stdexcept> // std::invalid_argument exception
206
207 #include <randolf/rhostname>
208
209 int main(int argc, char *argv[]) {
210 try {
211 char[] example = "www.example.com";
212 randolf::rhostname h(example, 15);
213 } catch (const std::invalid_argument e) {
214 std::cerr << "Hostname format exception: " << e.what() << std::endl;
215 return EXIT_FAILURE;
216 } catch (const std::exception e) {
217 std::cerr << "Other exception: " << e.what() << std::endl;
218 return EXIT_FAILURE;
219 }
220 return EXIT_SUCCESS;
221 } // -x- int main -x-
222 @endcode
223 @throws std::invalid_argument If the hostname is improperly formatted (flags
224 can limit or eliminate conditions for throwing this exception)
225 @throws std::out_of_range if any DNS RR formatted label's size would require
226 reaching beyond the end of the hostname provided (this is called a
227 buffer overrun, which is a potential security risk that can also
228 cause a segmentation fault)
229 @see set()
230 *///=========================================================================
231 rhostname(
232 /// The hostname as a char* array
233 const char* hostname,
234 /// Length of hostname string (0 = ASCIIZ string)
235 const size_t len = 0,
236 /// @ref rlabel::HOSTNAME_DNS_RR Convert all labels from DNS RR format@n
237 /// @ref rlabel::HOSTNAME_UTF8 Convert all labels to raw UTF-8 format (optional)@n
238 /// @ref rlabel::HOSTNAME_XN Convert all labels to punycode format (optional)
239 const int flags = rlabel::HOSTNAME_DEFAULT) {
240 this->set(hostname, len, flags);
241 } // -x- constructor rhostname -x-
242
243 /*======================================================================*//**
244 Destructor.
245 *///=========================================================================
246 ~rhostname() noexcept {
247 delete __labels; // Memory management
248 } // -x- destructor rhostname -x-
249
250 /*======================================================================*//**
251 @brief
252 Clear this rhostname's underlying hostname and reset all states.
253 @returns The same rhostname object so as to facilitate stacking
254 *///=========================================================================
255 rhostname& clear() {
256 __labels->clear();
257 __flags = 0;
258 __fqdn = false;
259 __utf8 = false;
260 return *this;
261 } // -x- rhostname& clear -x-
262
263 /*======================================================================*//**
264 @brief
265 Find out whether this hostname is an FQDN (it has a final dot).
266 @par Threads
267 This method is thread-safe.
268 @returns TRUE = hostname is an FQDN
269 @returns FALSE = hostanem is not an FQDN
270 *///=========================================================================
271 const bool is_fqdn() noexcept {
272 return __fqdn;
273 } // -x- bool is_fqdn -x-
274
275 /*======================================================================*//**
276 @brief
277 Specify whether this hostname is an FQDN (has a final dot).
278 @par Threads
279 This method is thread-safe.
280 @returns The same rhostname object so as to facilitate stacking
281 *///=========================================================================
282 rhostname& fqdn(
283 /// TRUE = ensure this rhostname is an FQDN (has a final dot)@n
284 /// FALSE = ensure this rhostname is not an FQDN (does not have a final dot)
285 const bool mode) noexcept {
286 __fqdn = mode;
287 return *this;
288 }; // -x- rhostname& fqdn -x-
289
290 /*======================================================================*//**
291 @brief
292 Find out whether this hostname is an internationalized internet domain name
293 (it has at least one label that has at least one UTF8/punycode character).
294 @par Threads
295 This method is thread-safe.
296 @returns TRUE = hostname is an internationalized internet domain name
297 @returns FALSE = hostanem is not an internationalized internet domain name
298 *///=========================================================================
299 const bool is_utf8() noexcept {
300 return __utf8;
301 }; // -x- bool is_utf8 -x-
302
303 /*======================================================================*//**
304 @brief
305 Extract a specific label from the underlying hostname.
306
307 The @c index parameter begins at 0 for the first label. If @c index is a
308 negative integer, the counting begins at -1 for the last label.
309
310 @par Examples
311
312 @code{.cpp}
313 #include <iostream> // std::cout, std::cerr, std::endl, etc.
314 #include <stdexcept> // std::invalid_argument exception
315
316 #include <randolf/rhostname>
317
318 int main(int argc, char *argv[]) {
319 try {
320 randolf::rhostname h("www.example.com", 15);
321 std::cout << "Top level: " << h.label(-1) << std::endl;
322 // Output will be: Top level: com
323 } catch (const std::invalid_argument e) {
324 std::cerr << "Hostname format exception: " << e.what() << std::endl;
325 return EXIT_FAILURE;
326 } catch (const std::exception e) {
327 std::cerr << "Other exception: " << e.what() << std::endl;
328 return EXIT_FAILURE;
329 }
330 return EXIT_SUCCESS;
331 } // -x- int main -x-
332 @endcode
333 @throws std::out_of_range if @c index is out of range (this is the exception
334 that the @c std::vector::at method throws)
335 @returns The specific label extracted from the hostname that this rhostname
336 represents
337 @see set()
338 *///=========================================================================
339 std::string label(
340 /// Which label to extract, with the first label beginning at index 0 (a
341 /// negative index value starts from the end with -1 as the last label)
342 const int index = 0,
343 /// @ref rlabel::HOSTNAME_WITHOUT_EXCEPTIONS returns an empty @c std::string (@c "")
344 /// instead of an exception being thrown@n
345 /// @ref rlabel::HOSTNAME_DNS_RR Convert label to DNS RR format@n
346 /// @ref rlabel::HOSTNAME_UTF8 Convert label to raw UTF-8 format (optional)@n
347 /// @ref rlabel::HOSTNAME_XN Convert label to punycode format (optional)
348 const int flags = rlabel::HOSTNAME_DEFAULT) {
349
350 // --------------------------------------------------------------------------
351 // Internal variables.
352 // --------------------------------------------------------------------------
353 const int MAX = __labels->size();
354
355 // --------------------------------------------------------------------------
356 // Syntax checks.
357 // --------------------------------------------------------------------------
358 if (flags & rlabel::HOSTNAME_WITHOUT_EXCEPTIONS) {
359 if (index >= MAX ) return "";
360 if (index < -(MAX)) return "";
361 } // -x- if rlabel::HOSTNAME_WITHOUT_EXCEPTIONS -x-
362
363 // --------------------------------------------------------------------------
364 // Copy string contents of specific label to this internal one.
365 // --------------------------------------------------------------------------
366 return __labels->at(index >= 0 ? index : MAX + index).get(flags);
367
368 } // -x- std::string label -x-
369
370 /*======================================================================*//**
371 @brief
372 Find out how many labels the underlying hostname is comprised of.
373 @returns Number of labels
374 *///=========================================================================
375 uint labels() {
376 return __labels->size();
377 } // -x- uint labels -x-
378
379 /*======================================================================*//**
380 @brief
381 Replace this rhostname's underlying hostname with a new hostname.
382
383 @par Examples
384
385 @code{.cpp}
386 #include <iostream> // std::cout, std::cerr, std::endl, etc.
387 #include <stdexcept> // std::invalid_argument exception
388
389 #include <randolf/rhostname>
390
391 int main(int argc, char *argv[]) {
392 try {
393 char[] example = "www.example.com";
394 randolf::rhostname h(example, 15);
395 h.set("mail.example.net");
396 } catch (const std::invalid_argument e) {
397 std::cerr << "Hostname format exception: " << e.what() << std::endl;
398 return EXIT_FAILURE;
399 } catch (const std::exception e) {
400 std::cerr << "Other exception: " << e.what() << std::endl;
401 return EXIT_FAILURE;
402 }
403 return EXIT_SUCCESS;
404 } // -x- int main -x-
405 @endcode
406 @throws std::invalid_argument If the hostname is improperly formatted (flags
407 can limit or eliminate conditions for throwing this exception)
408 @throws std::out_of_range if any DNS RR formatted label's size would require
409 reaching beyond the end of the hostname provided (this is called a
410 buffer overrun, which is a potential security risk that can also
411 cause a segmentation fault)
412 @returns The same rhostname object so as to facilitate stacking
413 @see rhostname()
414 *///=========================================================================
415 rhostname& set(
416 /// The hostname as a std::string object
417 const std::string& hostname,
418 /// @ref rlabel::HOSTNAME_DNS_RR Convert all labels from DNS RR format@n
419 /// @ref rlabel::HOSTNAME_UTF8 Convert all labels to raw UTF-8 format (optional)@n
420 /// @ref rlabel::HOSTNAME_XN Convert all labels to punycode format (optional)
421 const int flags = rlabel::HOSTNAME_DEFAULT) {
422 this->set(hostname.data(), hostname.size(), flags);
423 return *this;
424 } // -x- rhostname& set -x-
425
426 /*======================================================================*//**
427 @brief
428 Replace this rhostname's underlying hostname with a new hostname.
429
430 @par Examples
431
432 @code{.cpp}
433 #include <iostream> // std::cout, std::cerr, std::endl, etc.
434 #include <stdexcept> // std::invalid_argument exception
435
436 #include <randolf/rhostname>
437
438 int main(int argc, char *argv[]) {
439 try {
440 randolf::rhostname h("www.example.com", 15);
441 h.set("mail.example.net");
442 } catch (const std::invalid_argument e) {
443 std::cerr << "Hostname format exception: " << e.what() << std::endl;
444 return EXIT_FAILURE;
445 } catch (const std::exception e) {
446 std::cerr << "Other exception: " << e.what() << std::endl;
447 return EXIT_FAILURE;
448 }
449 return EXIT_SUCCESS;
450 } // -x- int main -x-
451 @endcode
452 @throws std::invalid_argument If the hostname is improperly formatted (flags
453 can limit or eliminate conditions for throwing this exception)
454 @throws std::out_of_range if any DNS RR formatted label's size would require
455 reaching beyond the end of the hostname provided (this is called a
456 buffer overrun, which is a potential security risk that can also
457 cause a segmentation fault)
458 @returns The same rhostname object so as to facilitate stacking
459 @see set()
460 *///=========================================================================
461 rhostname& set(
462 /// The hostname as a char* array
463 const char* hostname,
464 /// Length of hostname string (0 = ASCIIZ string)
465 size_t len = 0,
466 /// @ref rlabel::HOSTNAME_DNS_RR Convert all labels from DNS RR format@n
467 /// @ref rlabel::HOSTNAME_UTF8 Convert all labels to raw UTF-8 format (optional)@n
468 /// @ref rlabel::HOSTNAME_XN Convert all labels to punycode format (optional)
469 const int flags = rlabel::HOSTNAME_DEFAULT) {
470
471 // --------------------------------------------------------------------------
472 // Start over (re-initialization).
473 // --------------------------------------------------------------------------
474 clear();
475
476 // --------------------------------------------------------------------------
477 // Measure size of format string if an ASCIIZ string was indicated.
478 // --------------------------------------------------------------------------
479 if (len == 0) len = std::strlen(hostname);
480
481 // --------------------------------------------------------------------------
482 // Split DNS RR into labels.
483 // --------------------------------------------------------------------------
484 if (flags & rlabel::HOSTNAME_DNS_RR) {
485 for (int i = 0; i < len; i++) {
486 size_t bytes = (u_char)hostname[i]; // First character of an RR lable is the length
487 if (bytes + i >= len) { // Bounds checking to prevent a buffer overrun
488 throw std::out_of_range("Hostname DNS RR label size out of range");
489 } // -x- if out-of-bounds -x-
490 __labels->push_back(rlabel(std::string(hostname + i, bytes + 1), flags));
491 i += bytes;
492 } // -x- for i -x-
493 return *this;
494 } // -x- if rlabel::HOSTNAME_DNS_RR -x-
495
496 // --------------------------------------------------------------------------
497 // Split hostname into labels.
498 // --------------------------------------------------------------------------
499 int plateau = 0;
500 int i;
501 for (i = 0; i < len; i++) {
502 if (hostname[i] == '.') {
503 __labels->push_back(rlabel(std::string(hostname + plateau, i - plateau), flags));
504 plateau = i + 1; // Advance to the next position after the period (dot delimiter)
505 } // -x- if . -x-
506 } // -x- for i -x-
507 if (plateau < i) __labels->push_back(rlabel(std::string(hostname + plateau, i - plateau), flags));
508
509 return *this;
510 } // -x- rhostname& set -x-
511
512 /*======================================================================*//**
513 @brief
514 Convert the underlying hostname to a DNS RR and return it as an std::string.
515 @returns A new std::string with the expected data
516 *///=========================================================================
517 std::string to_dns_rr() {
518 return to_string(rlabel::HOSTNAME_DNS_RR);
519 } // -x- std::string to_dns_rr -x-
520
521 /*======================================================================*//**
522 @brief
523 Return as an @c std::string the hostname that this @c rhostname represents.
524
525 @par Examples
526
527 @code{.cpp}
528 #include <iostream> // std::cout, std::cerr, std::endl, etc.
529 #include <stdexcept> // std::invalid_argument exception
530
531 #include <randolf/rhostname>
532
533 int main(int argc, char *argv[]) {
534 try {
535 randolf::rhostname h("www.example.com", 15);
536 std::cout << "Hostname: " << h.to_string() << std::endl;
537 } catch (const std::invalid_argument e) {
538 std::cerr << "Hostname format exception: " << e.what() << std::endl;
539 return EXIT_FAILURE;
540 } catch (const std::exception e) {
541 std::cerr << "Other exception: " << e.what() << std::endl;
542 return EXIT_FAILURE;
543 }
544 return EXIT_SUCCESS;
545 } // -x- int main -x-
546 @endcode
547 @returns The hostname that this rhostname represents
548 @see set()
549 *///=========================================================================
550 std::string to_string(
551 /// @ref rlabel::HOSTNAME_FQDN_OPT includes the trailing period in the resulting
552 /// std::string if @ref is_fqdn is TRUE@n
553 /// @ref rlabel::HOSTNAME_DNS_RR Convert all labels from DNS RR format@n
554 /// @ref rlabel::HOSTNAME_UTF8 Convert all labels to raw UTF-8 format (optional)@n
555 /// @ref rlabel::HOSTNAME_XN Convert all labels to punycode format (optional)
556 const int flags = rlabel::HOSTNAME_DEFAULT) noexcept {
557
558 // --------------------------------------------------------------------------
559 // Internal variables.
560 // --------------------------------------------------------------------------
561 std::string hostname;
562 const size_t MAX = __labels->size();
563
564 // --------------------------------------------------------------------------
565 // If this is empty, then end it now to avoid parsing ghosts from the heap.
566 // --------------------------------------------------------------------------
567 if (MAX == 0) return hostname;
568
569 // --------------------------------------------------------------------------
570 // If this is a DNS RR, then just build the RR and return it without an FQDN
571 // check because the final period can't be expressed in a DNS RR due to its
572 // different purpose.
573 // --------------------------------------------------------------------------
574 if (flags & rlabel::HOSTNAME_DNS_RR) {
575 for (int i = 0; i < MAX; i++) hostname.append(__labels->at(i).get(flags));
576 return hostname;
577 } // -x- if rlabel::HOSTNAME_DNS_RR -x-
578
579 // --------------------------------------------------------------------------
580 // Optimized loop to join labels into a dot-delimited hostname, by avoiding
581 // repetitive comparisons for loop position 0 to determine whether to append
582 // the period ("."), we save at least a few CPU cycles overall.
583 // --------------------------------------------------------------------------
584 hostname.append(__labels->at(0).get(flags)); // Add first rlabel, no matter what
585 for (int i = 1; i < MAX; i++) hostname.append(".").append(__labels->at(i).get(flags));
586
587 // --------------------------------------------------------------------------
588 // Optionally add trailing period if this is an FQDN.
589 // --------------------------------------------------------------------------
590 if ((flags & rlabel::HOSTNAME_FQDN_OPT) && __fqdn) hostname.append(".");
591
592 return hostname;
593 } // -x- std::string to_string -x-
594
595 /*======================================================================*//**
596 @brief
597 Convert the underlying hostname to UTF-8 and return it as an std::string.
598 @returns A new std::string with the expected data
599 *///=========================================================================
600 std::string to_utf8() {
601 return to_string(rlabel::HOSTNAME_UTF8);
602 } // -x- std::string to_utf8 -x-
603
604 /*======================================================================*//**
605 @brief
606 Convert the underlying hostname to punycode and return it as an std::string.
607 @returns A new std::string with the expected data
608 *///=========================================================================
609 std::string to_xn() {
610 return to_string(rlabel::HOSTNAME_XN);
611 } // -x- std::string to_xn -x-
612
613 }; // -x- class rhostname -x-
614
615}; // -x- namespace randolf -x-