1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
/*
 * Copyright (C) 2015 Benjamin Fry <benjaminfry@me.com>
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

//! domain name, aka labels, implementaton

use std::ops::Index;
use std::sync::Arc as Rc;
use std::fmt;
use std::cmp::Ordering;
use std::char;

use ::serialize::binary::*;
use ::error::*;

/// TODO: all Names should be stored in a global "intern" space, and then everything that uses
///  them should be through references. As a workaround the Strings are all Rc as well as the array
/// TODO: Currently this probably doesn't support binary names, it would be nice to do that.
#[derive(Debug, PartialEq, Eq, Clone, Hash)]
pub struct Name {
  labels: Rc<Vec<Rc<String>>>
}

impl Name {
  pub fn new() -> Self {
    Name { labels: Rc::new(Vec::new()) }
  }

  // this is the root label, i.e. no labels, can probably make this better in the future.
  pub fn root() -> Self {
    Self::new()
  }

  pub fn is_root(&self) -> bool {
    self.labels.is_empty()
  }

  /// inline builder
  pub fn label(mut self, label: &'static str) -> Self {
    // TODO get_mut() on Arc was unstable when this was written
    let mut new_labels: Vec<Rc<String>> = (*self.labels).clone();
    new_labels.push(Rc::new(label.into()));
    self.labels = Rc::new(new_labels);
    assert!(self.labels.len() < 256); // this should be an error
    self
  }

  /// for mutating over time
  pub fn with_labels(labels: Vec<String>) -> Self {
    assert!(labels.len() < 256); // this should be an error
    Name { labels: Rc::new(labels.into_iter().map(|s|Rc::new(s)).collect()) }
  }

  /// prepend the String to the label
  pub fn prepend_label(&self, label: Rc<String>) -> Self {
    let mut new_labels: Vec<Rc<String>> = Vec::with_capacity(self.labels.len() + 1);
    new_labels.push(label);

    for label in &*self.labels {
      new_labels.push(label.clone());
    }

    assert!(new_labels.len() < 256); // this should be an error
    Name{ labels: Rc::new(new_labels) }
  }

  /// appends the String to this label at the end
  pub fn add_label(&mut self, label: Rc<String>) -> &mut Self {
    // TODO get_mut() on Arc was unstable when this was written
    let mut new_labels: Vec<Rc<String>> = (*self.labels).clone();
    new_labels.push(label);
    self.labels = Rc::new(new_labels);
    assert!(self.labels.len() < 256); // this should be an error
    self
  }

  /// appends the other to this name
  pub fn append(&mut self, other: &Self) -> &mut Self {
    for rcs in &*other.labels {
      self.add_label(rcs.clone());
    }

    self
  }

  /// Trims off the first part of the name, to help with searching for the domain piece
  pub fn base_name(&self) -> Name {
    if self.labels.len() >= 1 {
      Name { labels: Rc::new(self.labels[1..].to_vec()) }
    } else {
      Self::root()
    }
  }

  /// returns true if the name components of self are all present at the end of name
  pub fn zone_of(&self, name: &Self) -> bool {
    let self_len = self.labels.len();
    let name_len = name.labels.len();

    // TODO: there's probably a better way using iterators directly, but it wasn't obvious
    for i in 1..(self_len+1) {
      if self.labels.get(self_len - i) != name.labels.get(name_len - i) {
        return false;
      }
    }

    return true;
  }

  pub fn num_labels(&self) -> u8 {
    // it is illegal to have more than 256 labels.
    let num = self.labels.len() as u8;
    if num > 0 && self[0] == "*" {
      return num - 1
    }

    num
  }

  /// returns the length in bytes of the labels. '.' counts as 1
  pub fn len(&self) -> usize {
    let dots = if self.labels.len() > 0 { self.labels.len() } else { 1 };
    self.labels.iter().fold(dots, |acc, item| acc + item.len())
  }

  pub fn parse(local: &str, origin: Option<&Self>) -> ParseResult<Self> {
    let mut name = Name::new();
    let mut label = String::new();
    // split the local part

    let mut state = ParseState::Label;

    for ch in local.chars() {
      match state {
        ParseState::Label => {
          match ch {
            '.' => {
              name.add_label(Rc::new(label.clone()));
              label.clear();
            },
            '\\' => state = ParseState::Escape1,
            ch if !ch.is_control() && !ch.is_whitespace() => label.push(ch.to_lowercase().next().unwrap_or(ch)),
            _ => return Err(ParseError::LexerError(LexerError::UnrecognizedChar(ch))),
          }
        },
        ParseState::Escape1 => {
          if ch.is_numeric() { state = ParseState::Escape2(try!(ch.to_digit(10).ok_or(LexerError::IllegalCharacter(ch)))) }
          else {
            // it's a single escaped char
            label.push(ch);
            state = ParseState::Label;
          }
        },
        ParseState::Escape2(i) => {
          if ch.is_numeric() {
            state = ParseState::Escape3(i, try!(ch.to_digit(10).ok_or(LexerError::IllegalCharacter(ch))));
          } else { return Err(ParseError::LexerError(LexerError::UnrecognizedChar(ch))) }
        },
        ParseState::Escape3(i, ii) => {
          if ch.is_numeric() {
            let val: u32 = (i << 16) + (ii << 8) + try!(ch.to_digit(10).ok_or(LexerError::IllegalCharacter(ch)));
            let new: char = try!(char::from_u32(val).ok_or(ParseError::LexerError(LexerError::UnrecognizedOctet(val))));
            label.push(new.to_lowercase().next().unwrap_or(new));
            state = ParseState::Label;
          } else { return Err(ParseError::LexerError(LexerError::UnrecognizedChar(ch))) }
        },
      }
    }

    if !label.is_empty() { name.add_label(Rc::new(label)); }

    // TODO: this should be a real lexer, to varify all data is legal name...
    // for s in local.split('.') {
    //   if s.len() > 0 {
    //     build.add_label(Rc::new(s.to_string().to_lowercase())); // all names stored in lowercase
    //   }
    // }

    if !local.ends_with('.') {
      name.append(try!(origin.ok_or(ParseError::OriginIsUndefined)));
    }

    Ok(name)
  }

  pub fn emit_as_canonical(&self, encoder: &mut BinEncoder, canonical: bool) -> EncodeResult {
    let buf_len = encoder.len(); // lazily assert the size is less than 255...
    // lookup the label in the BinEncoder
    // if it exists, write the Pointer
    let mut labels: &[Rc<String>] = &self.labels;

    if canonical {
      for label in labels {
        try!(encoder.emit_character_data(label));
      }
    } else {
      while let Some(label) = labels.first() {
        // before we write the label, let's look for the current set of labels.
        if let Some(loc) = encoder.get_label_pointer(labels) {
          // write out the pointer marker
          //  or'd with the location with shouldn't be larger than this 2^14 or 16k
          try!(encoder.emit_u16(0xC000u16 | (loc & 0x3FFFu16)));

          // we found a pointer don't write more, break
          return Ok(())
        } else {
          if label.len() > 63 { return Err(EncodeError::LabelBytesTooLong(label.len())); }

          // to_owned is cloning the the vector, but the Rc's at least don't clone the strings.
          encoder.store_label_pointer(labels.to_owned());
          try!(encoder.emit_character_data(label));

          // return the next parts of the labels
          //  this should be safe, the labels.first() wouldn't have let us here if there wasn't
          //  at least one item.
          labels = &labels[1..];
        }
      }
    }

    // if we're getting here, then we didn't write out a pointer and are ending the name
    // the end of the list of names
    try!(encoder.emit(0));

     // the entire name needs to be less than 256.
    let length = encoder.len() - buf_len;
    if length > 255 { return Err(EncodeError::DomainNameTooLong(length)); }

    Ok(())
  }
}

enum ParseState {
  Label,
  Escape1,
  Escape2(u32),
  Escape3(u32,u32),
}

impl BinSerializable<Name> for Name {
  /// parses the chain of labels
  ///  this has a max of 255 octets, with each label being less than 63.
  ///  all names will be stored lowercase internally.
  /// This will consume the portions of the Vec which it is reading...
  fn read(decoder: &mut BinDecoder) -> DecodeResult<Name> {
    let mut state: LabelParseState = LabelParseState::LabelLengthOrPointer;
    let mut labels: Vec<Rc<String>> = Vec::with_capacity(3); // most labels will be around three, e.g. www.example.com

    // assume all chars are utf-8. We're doing byte-by-byte operations, no endianess issues...
    // reserved: (1000 0000 aka 0800) && (0100 0000 aka 0400)
    // pointer: (slice == 1100 0000 aka C0) & C0 == true, then 03FF & slice = offset
    // label: 03FF & slice = length; slice.next(length) = label
    // root: 0000
    loop {
      state = match state {
        LabelParseState::LabelLengthOrPointer => {
          // determine what the next label is
          match decoder.peek() {
            Some(0) | None => LabelParseState::Root,
            Some(byte) if byte & 0b1100_0000 == 0b1100_0000 => LabelParseState::Pointer,
            Some(byte) if byte & 0b1100_0000 == 0b0000_0000 => LabelParseState::Label,
            Some(byte) => return Err(DecodeError::UnrecognizedLabelCode(byte)),
          }
        },
        LabelParseState::Label => {
          labels.push(Rc::new(try!(decoder.read_character_data())));

          // reset to collect more data
          LabelParseState::LabelLengthOrPointer
        },
        //         4.1.4. Message compression
        //
        // In order to reduce the size of messages, the domain system utilizes a
        // compression scheme which eliminates the repetition of domain names in a
        // message.  In this scheme, an entire domain name or a list of labels at
        // the end of a domain name is replaced with a pointer to a prior occurance
        // of the same name.
        //
        // The pointer takes the form of a two octet sequence:
        //
        //     +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
        //     | 1  1|                OFFSET                   |
        //     +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
        //
        // The first two bits are ones.  This allows a pointer to be distinguished
        // from a label, since the label must begin with two zero bits because
        // labels are restricted to 63 octets or less.  (The 10 and 01 combinations
        // are reserved for future use.)  The OFFSET field specifies an offset from
        // the start of the message (i.e., the first octet of the ID field in the
        // domain header).  A zero offset specifies the first byte of the ID field,
        // etc.
        LabelParseState::Pointer => {
          let location = try!(decoder.read_u16()) & 0x3FFF; // get rid of the two high order bits
          let mut pointer = decoder.clone(location);
          let pointed = try!(Name::read(&mut pointer));

          for l in &*pointed.labels {
            labels.push(l.clone());
          }

          // Pointers always finish the name, break like Root.
          break;
        },
        LabelParseState::Root => {
          // need to pop() the 0 off the stack...
          try!(decoder.pop());
          break;
        }
      }
    }

    Ok(Name { labels: Rc::new(labels) })
  }

  fn emit(&self, encoder: &mut BinEncoder) -> EncodeResult {
    let is_canonical_names = encoder.is_canonical_names();
    self.emit_as_canonical(encoder, is_canonical_names)
  }
}

impl fmt::Display for Name {
  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
    for label in &*self.labels {
      try!(write!(f, "{}.", label));
    }
    if self.is_root() { try!(write!(f, ".")); }
    Ok(())
  }
}

impl Index<usize> for Name {
    type Output = String;

    fn index<'a>(&'a self, _index: usize) -> &'a String {
        &*(self.labels[_index])
    }
}

impl PartialOrd<Name> for Name {
  fn partial_cmp(&self, other: &Name) -> Option<Ordering> {
    Some(self.cmp(other))
  }
}

impl Ord for Name {
  /// RFC 4034                DNSSEC Resource Records               March 2005
  ///
  /// ```text
  /// 6.1.  Canonical DNS Name Order
  ///
  ///  For the purposes of DNS security, owner names are ordered by treating
  ///  individual labels as unsigned left-justified octet strings.  The
  ///  absence of a octet sorts before a zero value octet, and uppercase
  ///  US-ASCII letters are treated as if they were lowercase US-ASCII
  ///  letters.
  ///
  ///  To compute the canonical ordering of a set of DNS names, start by
  ///  sorting the names according to their most significant (rightmost)
  ///  labels.  For names in which the most significant label is identical,
  ///  continue sorting according to their next most significant label, and
  ///  so forth.
  ///
  ///  For example, the following names are sorted in canonical DNS name
  ///  order.  The most significant label is "example".  At this level,
  ///  "example" sorts first, followed by names ending in "a.example", then
  ///  by names ending "z.example".  The names within each level are sorted
  ///  in the same way.
  ///
  ///            example
  ///            a.example
  ///            yljkjljk.a.example
  ///            Z.a.example
  ///            zABC.a.EXAMPLE
  ///            z.example
  ///            \001.z.example
  ///            *.z.example
  ///            \200.z.example
  /// ```
  fn cmp(&self, other: &Self) -> Ordering {
    if self.labels.is_empty() && other.labels.is_empty() { return Ordering::Equal }

    let mut self_labels: Vec<_> = (*self.labels).clone();
    let mut other_labels: Vec<_> = (*other.labels).clone();

    self_labels.reverse();
    other_labels.reverse();

    for (l, r) in self_labels.iter().zip(other_labels.iter()) {
      match l.cmp(r) {
        o @ Ordering::Less | o @ Ordering::Greater => return o,
        Ordering::Equal => continue,
      }
    }

    self.labels.len().cmp(&other.labels.len())
  }
}

/// This is the list of states for the label parsing state machine
enum LabelParseState {
  LabelLengthOrPointer, // basically the start of the FSM
  Label,   // storing length of the label, must be < 63
  Pointer, // location of pointer in slice,
  Root,    // root is the end of the labels list, aka null
}

#[cfg(test)]
mod tests {
  use super::*;
  use std::sync::Arc as Rc;
  use std::cmp::Ordering;

  use ::serialize::binary::bin_tests::{test_read_data_set, test_emit_data_set};
  use ::serialize::binary::*;

  fn get_data() -> Vec<(Name, Vec<u8>)> {
    vec![
      (Name::new(), vec![0]), // base case, only the root
      (Name::new().label("a"), vec![1,b'a',0]), // a single 'a' label
      (Name::new().label("a").label("bc"), vec![1,b'a',2,b'b',b'c',0]), // two labels, 'a.bc'
      (Name::new().label("a").label("♥"), vec![1,b'a',3,0xE2,0x99,0xA5,0]), // two labels utf8, 'a.♥'
    ]
  }

  #[test]
  fn num_labels() {
    assert_eq!(Name::new().label("*").num_labels(), 0);
    assert_eq!(Name::new().label("a").num_labels(), 1);
    assert_eq!(Name::new().label("*").label("b").num_labels(), 1);
    assert_eq!(Name::new().label("a").label("b").num_labels(), 2);
    assert_eq!(Name::new().label("*").label("b").label("c").num_labels(), 2);
    assert_eq!(Name::new().label("a").label("b").label("c").num_labels(), 3);
  }

  #[test]
  fn parse() {
    test_read_data_set(get_data(), |ref mut d| Name::read(d));
  }

  #[test]
  fn write_to() {
    test_emit_data_set(get_data(), |e, n| n.emit(e));
  }

  #[test]
  fn test_pointer() {
    let mut bytes: Vec<u8> = Vec::with_capacity(512);

    let first = Name::new().label("ra").label("rb").label("rc");
    let second = Name::new().label("rb").label("rc");
    let third = Name::new().label("rc");
    let fourth = Name::new().label("z").label("ra").label("rb").label("rc");

    {
      let mut e = BinEncoder::new(&mut bytes);

      first.emit(&mut e).unwrap();
      assert_eq!(e.len(), 10); // should be 7 u8s...

      second.emit(&mut e).unwrap();
      // if this wrote the entire thing, then it would be +5... but a pointer should be +2
      assert_eq!(e.len(), 12);

      third.emit(&mut e).unwrap();
      assert_eq!(e.len(), 14);

      fourth.emit(&mut e).unwrap();
      assert_eq!(e.len(), 18);
    }

    // now read them back
    let mut d = BinDecoder::new(&bytes);

    let r_test = Name::read(&mut d).unwrap();
    assert_eq!(first, r_test);

    let r_test = Name::read(&mut d).unwrap();
    assert_eq!(second, r_test);

    let r_test = Name::read(&mut d).unwrap();
    assert_eq!(third, r_test);

    let r_test = Name::read(&mut d).unwrap();
    assert_eq!(fourth, r_test);
  }

  #[test]
  fn test_base_name() {
    let zone = Name::new().label("example").label("com");

    assert_eq!(zone.base_name(), Name::new().label("com"));
    assert!(zone.base_name().base_name().is_root());
    assert!(zone.base_name().base_name().base_name().is_root());
  }

  #[test]
  fn test_prepend() {
    let zone = Name::new().label("example").label("com");
    let www = zone.prepend_label(Rc::new("www".to_string()));

    assert_eq!(www, Name::new().label("www").label("example").label("com"));
  }

  #[test]
  fn test_zone_of() {
    let zone = Name::new().label("example").label("com");
    let www = Name::new().label("www").label("example").label("com");
    let none = Name::new().label("none").label("com");

    assert!(zone.zone_of(&zone));
    assert!(zone.zone_of(&www));
    assert!(!zone.zone_of(&none))
  }

  #[test]
  fn test_partial_cmp_eq() {
    let root = Some(Name::with_labels(vec![]));
    let comparisons:Vec<(Name, Name)> = vec![
     (root.clone().unwrap(), root.clone().unwrap()),
     (Name::parse("example", root.as_ref()).unwrap(), Name::parse("example", root.as_ref()).unwrap()),
     ];

    for (left, right) in comparisons {
      println!("left: {}, right: {}", left, right);
      assert_eq!(left.partial_cmp(&right), Some(Ordering::Equal));
     }
  }

  #[test]
  fn test_partial_cmp() {
    let root = Some(Name::with_labels(vec![]));
    let comparisons:Vec<(Name, Name)> = vec![
     (Name::parse("example", root.as_ref()).unwrap(), Name::parse("a.example", root.as_ref()).unwrap()),
     (Name::parse("a.example", root.as_ref()).unwrap(), Name::parse("yljkjljk.a.example", root.as_ref()).unwrap()),
     (Name::parse("yljkjljk.a.example", root.as_ref()).unwrap(), Name::parse("Z.a.example", root.as_ref()).unwrap()),
     (Name::parse("Z.a.example", root.as_ref()).unwrap(), Name::parse("zABC.a.EXAMPLE", root.as_ref()).unwrap()),
     (Name::parse("zABC.a.EXAMPLE", root.as_ref()).unwrap(), Name::parse("z.example", root.as_ref()).unwrap()),
     (Name::parse("z.example", root.as_ref()).unwrap(), Name::parse("\\001.z.example", root.as_ref()).unwrap()),
     (Name::parse("\\001.z.example", root.as_ref()).unwrap(), Name::parse("*.z.example", root.as_ref()).unwrap()),
     (Name::parse("*.z.example", root.as_ref()).unwrap(), Name::parse("\\200.z.example", root.as_ref()).unwrap())];

    for (left, right) in comparisons {
      println!("left: {}, right: {}", left, right);
      assert_eq!(left.partial_cmp(&right), Some(Ordering::Less));
     }
  }
}