randolf.ca  1.00
Randolf Richardson's C++ classes
Loading...
Searching...
No Matches
rhostname
1#pragma once
2
3#include <randolf/rlabel>
4
5#include <algorithm>
6#include <atomic>
7#include <cstring>
8#include <vector>
9
10namespace randolf {
11
12 /*======================================================================*//**
13 @brief
14 This @ref rhostname class provides an object-oriented internet hostname.
15
16 @par Features
17
18 Some of the key features are:
19
20 - constructors with sensible defaults help to simplify coding
21 - documentation includes code samples (with @c \#include lines as needed)
22 - thread-safety is noted where it is absolutely available (or where some
23 caution is warranted)
24 - can handle ASCIIZ (C-strings) without needing to specify string length
25 - can handle @c std::string (which tracks its own string length)
26
27 @par Use case
28
29 Validation of the format of a hostname from a variety of angles is helpful
30 in ensuring that hostnames received from elsewhere comply with internet
31 standards.
32
33 @par Background
34
35 I created this class to make it easier to write internet server daemons. I
36 also use it in my @ref rmailaddr class when extracting the @c domain-part.
37
38 @par Getting started
39
40 @author Randolf Richardson
41 @version 1.00
42 @par History
43 2023-Jan-17 v1.00 Initial version
44
45 @par Conventions
46 Lower-case letter "h" is regularly used in partial example code to represent
47 an instantiated rhostname object.
48
49 An ASCIIZ string is a C-string (char* array) that includes a terminating null
50 (0) character at the end.
51
52 @par Notes
53
54 I use the term "ASCIIZ string" to indicate an array of characters that's
55 terminated by a 0 (a.k.a., null). Although this is very much the same as a
56 C-string, the difference is that in many API functions a C-string must often
57 be accompanied by its length value. When referring to an ASCIIZ string, I'm
58 intentionally indicating that the length of the string is not needed because
59 the string is null-terminated. (This term was also commonly used in assembly
60 language programming in the 1970s, 1980s, and 1990s, and as far as I know is
61 still used by machine language programmers today.)
62
63 @par Examples
64
65 @code{.cpp}
66 #include <iostream> // std::cout, std::cerr, std::endl, etc.
67 #include <stdexcept> // std::invalid_argument exception
68
69 #include <randolf/rhostname>
70
71 int main(int argc, char *argv[]) {
72 try {
73 randolf::rhostname h("www.example.com");
74 } catch (const std::invalid_argument e) {
75 std::cerr << "Hostname format exception: " << e.what() << std::endl;
76 return EXIT_FAILURE;
77 } catch (const std::exception e) {
78 std::cerr << "Other exception: " << e.what() << std::endl;
79 return EXIT_FAILURE;
80 }
81 return EXIT_SUCCESS;
82 } // -x- int main -x-
83 @endcode
84
85 Parameter stacking is supported (with methods that return @c rhostname*); in
86 this example, notice that semicolons (";") and "h." references are omittted
87 (when compared with the above):
88
89 @code{.cpp}
90 #include <iostream> // std::cout, std::cerr, std::endl, etc.
91 #include <stdexcept> // std::invalid_argument exception
92
93 #include <randolf/rhostname>
94
95 int main(int argc, char *argv[]) {
96 try {
97 randolf::rhostname h("www.example.com");
98 } catch (const std::invalid_argument e) {
99 std::cerr << "Hostname format exception: " << e.what() << std::endl;
100 return EXIT_FAILURE;
101 } catch (const std::exception e) {
102 std::cerr << "Other exception: " << e.what() << std::endl;
103 return EXIT_FAILURE;
104 }
105 return EXIT_SUCCESS;
106 } // -x- int main -x-
107 @endcode
108 *///=========================================================================
109 class rhostname {
110
111 private:
112 // --------------------------------------------------------------------------
113 // Hostname variables.
114 // --------------------------------------------------------------------------
115 std::vector<rlabel> __labels{};
116 int __flags = 0;
117 std::atomic_bool __fqdn = false;
118 std::atomic_bool __utf8 = false;
119
120 public:
121 /*======================================================================*//**
122 @brief
123 Instantiate an empty rhostname that doesn't qualify as a properly-formatted
124 hostname (because the minimum length of a valid hostname is 1 character).
125
126 Instantiating an empty rhostname is particularly useful for header-file
127 definitions; for example:
128 @code{.cpp}
129 #include <iostream> // std::cout, std::cerr, std::endl, etc.
130 #include <stdexcept> // std::invalid_argument exception
131
132 #include <randolf/rhostname>
133
134 randolf::rhostname h; // <-- Empty rhostname initialization (no exceptions)
135
136 int main(int argc, char *argv[]) {
137 try {
138 h.set("www.example.com");
139 } catch (const std::invalid_argument e) {
140 std::cerr << "Hostname format exception: " << e.what() << std::endl;
141 return EXIT_FAILURE;
142 } catch (const std::exception e) {
143 std::cerr << "Other exception: " << e.what() << std::endl;
144 return EXIT_FAILURE;
145 }
146 return EXIT_SUCCESS;
147 } // -x- int main -x-
148 @endcode
149 @see hostname
150 *///=========================================================================
151 rhostname() noexcept {}; // -x- constructor rhostname -x-
152
153 /*======================================================================*//**
154 @brief
155 Instantiate an rhostname.
156
157 @par Examples
158
159 @code{.cpp}
160 #include <iostream> // std::cout, std::cerr, std::endl, etc.
161 #include <stdexcept> // std::invalid_argument exception
162
163 #include <randolf/rhostname>
164
165 int main(int argc, char *argv[]) {
166 try {
167 randolf::rhostname h("www.example.com");
168 } catch (const std::invalid_argument e) {
169 std::cerr << "Hostname format exception: " << e.what() << std::endl;
170 return EXIT_FAILURE;
171 } catch (const std::exception e) {
172 std::cerr << "Other exception: " << e.what() << std::endl;
173 return EXIT_FAILURE;
174 }
175 return EXIT_SUCCESS;
176 } // -x- int main -x-
177 @endcode
178 @throws std::invalid_argument If the hostname is improperly formatted (flags
179 can limit or eliminate conditions for throwing this exception)
180 @throws std::out_of_range if any DNS RR formatted label's size would require
181 reaching beyond the end of the hostname provided (this is called a
182 buffer overrun, which is a potential security risk that can also
183 cause a segmentation fault)
184 @see set()
185 *///=========================================================================
186 rhostname(
187 /// The hostname as a std::string object
188 const std::string hostname,
189 /// @ref rlabel::HOSTNAME_DNS_RR Convert all labels from DNS RR format@n
190 /// @ref rlabel::HOSTNAME_UTF8 Convert all labels to raw UTF-8 format (optional)@n
191 /// @ref rlabel::HOSTNAME_XN Convert all labels to punycode format (optional)
192 const int flags = rlabel::HOSTNAME_DEFAULT) {
193 this->set(hostname.data(), hostname.size(), flags);
194 }; // -x- constructor rhostname -x-
195
196 /*======================================================================*//**
197 @brief
198 Instantiate an rhostname.
199
200 @par Examples
201
202 @code{.cpp}
203 #include <iostream> // std::cout, std::cerr, std::endl, etc.
204 #include <stdexcept> // std::invalid_argument exception
205
206 #include <randolf/rhostname>
207
208 int main(int argc, char *argv[]) {
209 try {
210 char[] example = "www.example.com";
211 randolf::rhostname h(example, 15);
212 } catch (const std::invalid_argument e) {
213 std::cerr << "Hostname format exception: " << e.what() << std::endl;
214 return EXIT_FAILURE;
215 } catch (const std::exception e) {
216 std::cerr << "Other exception: " << e.what() << std::endl;
217 return EXIT_FAILURE;
218 }
219 return EXIT_SUCCESS;
220 } // -x- int main -x-
221 @endcode
222 @throws std::invalid_argument If the hostname is improperly formatted (flags
223 can limit or eliminate conditions for throwing this exception)
224 @throws std::out_of_range if any DNS RR formatted label's size would require
225 reaching beyond the end of the hostname provided (this is called a
226 buffer overrun, which is a potential security risk that can also
227 cause a segmentation fault)
228 @see set()
229 *///=========================================================================
230 rhostname(
231 /// The hostname as a char* array
232 const char* hostname,
233 /// Length of hostname string (0 = ASCIIZ string)
234 const size_t len = 0,
235 /// @ref rlabel::HOSTNAME_DNS_RR Convert all labels from DNS RR format@n
236 /// @ref rlabel::HOSTNAME_UTF8 Convert all labels to raw UTF-8 format (optional)@n
237 /// @ref rlabel::HOSTNAME_XN Convert all labels to punycode format (optional)
238 const int flags = rlabel::HOSTNAME_DEFAULT) {
239 this->set(hostname, len, flags);
240 }; // -x- constructor rhostname -x-
241
242 /*======================================================================*//**
243 @brief
244 Clear this rhostname's underlying hostname and reset all states.
245 @returns The same rhostname object so as to facilitate stacking
246 *///=========================================================================
247 rhostname* clear() {
248 __labels.clear();
249 __flags = 0;
250 __fqdn = false;
251 __utf8 = false;
252 return this;
253 }; // -x- rhostname clear -x-
254
255 /*======================================================================*//**
256 @brief
257 Find out whether this hostname is an FQDN (it has a final dot).
258 @par Threads
259 This method is thread-safe.
260 @returns TRUE = hostname is an FQDN
261 @returns FALSE = hostanem is not an FQDN
262 *///=========================================================================
263 const bool is_fqdn() noexcept {
264 return __fqdn;
265 }; // -x- bool is_fqdn -x-
266
267 /*======================================================================*//**
268 @brief
269 Specify whether this hostname is an FQDN (has a final dot).
270 @par Threads
271 This method is thread-safe.
272 @returns The same rhostname object so as to facilitate stacking
273 *///=========================================================================
274 rhostname* is_fqdn(
275 /// TRUE = ensure this rhostname is an FQDN (has a final dot)@n
276 /// FALSE = ensure this rhostname is not an FQDN (does not have a final dot)
277 const bool mode) noexcept {
278 __fqdn = mode;
279 return this;
280 }; // -x- rhostname is_fqdn -x-
281
282 /*======================================================================*//**
283 @brief
284 Find out whether this hostname is an internationalized internet domain name
285 (it has at least one label that has at least one UTF8/punycode character).
286 @par Threads
287 This method is thread-safe.
288 @returns TRUE = hostname is an internationalized internet domain name
289 @returns FALSE = hostanem is not an internationalized internet domain name
290 *///=========================================================================
291 const bool is_utf8() noexcept {
292 return __utf8;
293 }; // -x- bool is_utf8 -x-
294
295 /*======================================================================*//**
296 @brief
297 Extract a specific label from the underlying hostname.
298
299 The @c index parameter begins at 0 for the first label. If @c index is a
300 negative integer, the counting begins at -1 for the last label.
301
302 @par Examples
303
304 @code{.cpp}
305 #include <iostream> // std::cout, std::cerr, std::endl, etc.
306 #include <stdexcept> // std::invalid_argument exception
307
308 #include <randolf/rhostname>
309
310 int main(int argc, char *argv[]) {
311 try {
312 randolf::rhostname h("www.example.com", 15);
313 std::cout << "Top level: " << h.label(-1) << std::endl;
314 // Output will be: Top level: com
315 } catch (const std::invalid_argument e) {
316 std::cerr << "Hostname format exception: " << e.what() << std::endl;
317 return EXIT_FAILURE;
318 } catch (const std::exception e) {
319 std::cerr << "Other exception: " << e.what() << std::endl;
320 return EXIT_FAILURE;
321 }
322 return EXIT_SUCCESS;
323 } // -x- int main -x-
324 @endcode
325 @throws std::out_of_range if @c index is out of range (this is the exception
326 that the @c std::vector::at method throws)
327 @returns The specific label extracted from the hostname that this rhostname
328 represents
329 @see set()
330 *///=========================================================================
331 std::string label(
332 /// Which label to extract, with the first label beginning at index 0 (a
333 /// negative index value starts from the end with -1 as the last label)
334 const int index = 0,
335 /// @ref rlabel::HOSTNAME_WITHOUT_EXCEPTIONS returns an empty @c std::string (@c "")
336 /// instead of an exception being thrown@n
337 /// @ref rlabel::HOSTNAME_DNS_RR Convert label to DNS RR format@n
338 /// @ref rlabel::HOSTNAME_UTF8 Convert label to raw UTF-8 format (optional)@n
339 /// @ref rlabel::HOSTNAME_XN Convert label to punycode format (optional)
340 const int flags = rlabel::HOSTNAME_DEFAULT) {
341
342 // --------------------------------------------------------------------------
343 // Internal variables.
344 // --------------------------------------------------------------------------
345 const int MAX = __labels.size();
346
347 // --------------------------------------------------------------------------
348 // Syntax checks.
349 // --------------------------------------------------------------------------
350 if (flags & rlabel::HOSTNAME_WITHOUT_EXCEPTIONS) {
351 if (index >= MAX ) return "";
352 if (index < -(MAX)) return "";
353 } // -x- if rlabel::HOSTNAME_WITHOUT_EXCEPTIONS -x-
354
355 // --------------------------------------------------------------------------
356 // Copy string contents of specific label to this internal one.
357 // --------------------------------------------------------------------------
358 return __labels.at(index >= 0 ? index : MAX + index).get(flags);
359
360 }; // -x- std::string label -x-
361
362 /*======================================================================*//**
363 @brief
364 Find out how many labels the underlying hostname is comprised of.
365 @returns Number of labels
366 *///=========================================================================
367 uint labels() {
368 return __labels.size();
369 }; // -x- uint labels -x-
370
371 /*======================================================================*//**
372 @brief
373 Replace this rhostname's underlying hostname with a new hostname.
374
375 @par Examples
376
377 @code{.cpp}
378 #include <iostream> // std::cout, std::cerr, std::endl, etc.
379 #include <stdexcept> // std::invalid_argument exception
380
381 #include <randolf/rhostname>
382
383 int main(int argc, char *argv[]) {
384 try {
385 char[] example = "www.example.com";
386 randolf::rhostname h(example, 15);
387 h.set("mail.example.net");
388 } catch (const std::invalid_argument e) {
389 std::cerr << "Hostname format exception: " << e.what() << std::endl;
390 return EXIT_FAILURE;
391 } catch (const std::exception e) {
392 std::cerr << "Other exception: " << e.what() << std::endl;
393 return EXIT_FAILURE;
394 }
395 return EXIT_SUCCESS;
396 } // -x- int main -x-
397 @endcode
398 @throws std::invalid_argument If the hostname is improperly formatted (flags
399 can limit or eliminate conditions for throwing this exception)
400 @throws std::out_of_range if any DNS RR formatted label's size would require
401 reaching beyond the end of the hostname provided (this is called a
402 buffer overrun, which is a potential security risk that can also
403 cause a segmentation fault)
404 @returns The same rhostname object so as to facilitate stacking
405 @see rhostname()
406 *///=========================================================================
407 rhostname* set(
408 /// The hostname as a std::string object
409 const std::string hostname,
410 /// @ref rlabel::HOSTNAME_DNS_RR Convert all labels from DNS RR format@n
411 /// @ref rlabel::HOSTNAME_UTF8 Convert all labels to raw UTF-8 format (optional)@n
412 /// @ref rlabel::HOSTNAME_XN Convert all labels to punycode format (optional)
413 const int flags = rlabel::HOSTNAME_DEFAULT) {
414 this->set(hostname.data(), hostname.size(), flags);
415 return this;
416 }; // -x- rhostname set -x-
417
418 /*======================================================================*//**
419 @brief
420 Replace this rhostname's underlying hostname with a new hostname.
421
422 @par Examples
423
424 @code{.cpp}
425 #include <iostream> // std::cout, std::cerr, std::endl, etc.
426 #include <stdexcept> // std::invalid_argument exception
427
428 #include <randolf/rhostname>
429
430 int main(int argc, char *argv[]) {
431 try {
432 randolf::rhostname h("www.example.com", 15);
433 h.set("mail.example.net");
434 } catch (const std::invalid_argument e) {
435 std::cerr << "Hostname format exception: " << e.what() << std::endl;
436 return EXIT_FAILURE;
437 } catch (const std::exception e) {
438 std::cerr << "Other exception: " << e.what() << std::endl;
439 return EXIT_FAILURE;
440 }
441 return EXIT_SUCCESS;
442 } // -x- int main -x-
443 @endcode
444 @throws std::invalid_argument If the hostname is improperly formatted (flags
445 can limit or eliminate conditions for throwing this exception)
446 @throws std::out_of_range if any DNS RR formatted label's size would require
447 reaching beyond the end of the hostname provided (this is called a
448 buffer overrun, which is a potential security risk that can also
449 cause a segmentation fault)
450 @returns The same rhostname object so as to facilitate stacking
451 @see set()
452 *///=========================================================================
453 rhostname* set(
454 /// The hostname as a char* array
455 const char* hostname,
456 /// Length of hostname string (0 = ASCIIZ string)
457 size_t len = 0,
458 /// @ref rlabel::HOSTNAME_DNS_RR Convert all labels from DNS RR format@n
459 /// @ref rlabel::HOSTNAME_UTF8 Convert all labels to raw UTF-8 format (optional)@n
460 /// @ref rlabel::HOSTNAME_XN Convert all labels to punycode format (optional)
461 const int flags = rlabel::HOSTNAME_DEFAULT) {
462
463 // --------------------------------------------------------------------------
464 // Start over (re-initialization).
465 // --------------------------------------------------------------------------
466 clear();
467
468 // --------------------------------------------------------------------------
469 // Measure size of format string if an ASCIIZ string was indicated.
470 // --------------------------------------------------------------------------
471 if (len == 0) len = std::strlen(hostname);
472
473 // --------------------------------------------------------------------------
474 // Split DNS RR into labels.
475 // --------------------------------------------------------------------------
476 if (flags & rlabel::HOSTNAME_DNS_RR) {
477 for (int i = 0; i < len; i++) {
478 size_t bytes = (u_char)hostname[i];
479 if (bytes + i >= len) { // Bounds checking to prevent a buffer overrun
480 throw std::out_of_range("Hostname DNS RR label size out of range");
481 } // -x- if out-of-bounds -x-
482 __labels.push_back(rlabel(std::string(hostname + i, bytes + 1), flags));
483 i += bytes;
484 } // -x- for i -x-
485 return this;
486 } // -x- if rlabel::HOSTNAME_DNS_RR -x-
487
488 // --------------------------------------------------------------------------
489 // Split hostname into labels.
490 // --------------------------------------------------------------------------
491 int plateau = 0;
492 int i;
493 for (i = 0; i < len; i++) {
494 if (hostname[i] == '.') {
495 __labels.push_back(rlabel(std::string(hostname + plateau, i - plateau), flags));
496 plateau = i + 1; // Advance to the next position after the period (dot delimiter)
497 } // -x- if . -x-
498 } // -x- for i -x-
499 if (plateau < i) __labels.push_back(rlabel(std::string(hostname + plateau, i - plateau), flags));
500
501 return this;
502 }; // -x- rhostname set -x-
503
504 /*======================================================================*//**
505 @brief
506 Convert the underlying hostname to a DNS RR and return it as an std::string.
507 @returns A new std::string with the expected data
508 *///=========================================================================
509 std::string to_dns_rr() {
510 return to_string(rlabel::HOSTNAME_DNS_RR);
511 }; // -x- std::string to_dns_rr -x-
512
513 /*======================================================================*//**
514 @brief
515 Return as an @c std::string the hostname that this @c rhostname represents.
516
517 @par Examples
518
519 @code{.cpp}
520 #include <iostream> // std::cout, std::cerr, std::endl, etc.
521 #include <stdexcept> // std::invalid_argument exception
522
523 #include <randolf/rhostname>
524
525 int main(int argc, char *argv[]) {
526 try {
527 randolf::rhostname h("www.example.com", 15);
528 std::cout << "Hostname: " << h.to_string() << std::endl;
529 } catch (const std::invalid_argument e) {
530 std::cerr << "Hostname format exception: " << e.what() << std::endl;
531 return EXIT_FAILURE;
532 } catch (const std::exception e) {
533 std::cerr << "Other exception: " << e.what() << std::endl;
534 return EXIT_FAILURE;
535 }
536 return EXIT_SUCCESS;
537 } // -x- int main -x-
538 @endcode
539 @returns The hostname that this rhostname represents
540 @see set()
541 *///=========================================================================
542 std::string to_string(
543 /// @ref rlabel::HOSTNAME_FQDN_OPT includes the trailing period in the resulting
544 /// std::string if @ref is_fqdn is TRUE@n
545 /// @ref rlabel::HOSTNAME_DNS_RR Convert all labels from DNS RR format@n
546 /// @ref rlabel::HOSTNAME_UTF8 Convert all labels to raw UTF-8 format (optional)@n
547 /// @ref rlabel::HOSTNAME_XN Convert all labels to punycode format (optional)
548 const int flags = rlabel::HOSTNAME_DEFAULT) noexcept {
549
550 // --------------------------------------------------------------------------
551 // Internal variables.
552 // --------------------------------------------------------------------------
553 std::string hostname;
554 const size_t MAX = __labels.size();
555
556 // --------------------------------------------------------------------------
557 // If this is empty, then end it now to avoid parsing ghosts from the heap.
558 // --------------------------------------------------------------------------
559 if (MAX == 0) return hostname;
560
561 // --------------------------------------------------------------------------
562 // If this is a DNS RR, then just build the RR and return it without an FQDN
563 // check because the final period can't be expressed in a DNS RR due to its
564 // different purpose.
565 // --------------------------------------------------------------------------
566 if (flags & rlabel::HOSTNAME_DNS_RR) {
567 for (int i = 0; i < MAX; i++) hostname.append(__labels[i].get(flags));
568 return hostname;
569 } // -x- if rlabel::HOSTNAME_DNS_RR -x-
570
571 // --------------------------------------------------------------------------
572 // Optimized loop to join labels into a dot-delimited hostname, by avoiding
573 // repetitive comparisons for loop position 0 to determine whether to append
574 // the period ("."), we save at least a few CPU cycles overall.
575 // --------------------------------------------------------------------------
576 hostname.append(__labels[0].get(flags)); // Add first rlabel, no matter what
577 for (int i = 1; i < MAX; i++) hostname.append(".").append(__labels[i].get(flags));
578
579 // --------------------------------------------------------------------------
580 // Optionally add trailing period if this is an FQDN.
581 // --------------------------------------------------------------------------
582 if ((flags & rlabel::HOSTNAME_FQDN_OPT) && __fqdn) hostname.append(".");
583
584 return hostname;
585 }; // -x- std::string to_string -x-
586
587 /*======================================================================*//**
588 @brief
589 Convert the underlying hostname to UTF-8 and return it as an std::string.
590 @returns A new std::string with the expected data
591 *///=========================================================================
592 std::string to_utf8() {
593 return to_string(rlabel::HOSTNAME_UTF8);
594 }; // -x- std::string to_utf8 -x-
595
596 /*======================================================================*//**
597 @brief
598 Convert the underlying hostname to punycode and return it as an std::string.
599 @returns A new std::string with the expected data
600 *///=========================================================================
601 std::string to_xn() {
602 return to_string(rlabel::HOSTNAME_XN);
603 }; // -x- std::string to_xn -x-
604
605 }; // -x- class rhostname -x-
606
607}; // -x- namespace randolf -x-