hostname from url (v9)

Revision 9 of this benchmark created by gorhill on


Setup

var link = "http://stackoverflow.com/questions/8498592/extract-root-domain-name-from-string";
  var host;
  
  // parseUri 1.2.2
  // (c) Steven Levithan <stevenlevithan.com>
  // MIT License
  
  function parseUri(str) {
    var o = parseUri.options,
      m = o.parser[o.strictMode ? "strict" : "loose"].exec(str),
      uri = {},
      i = 14;
  
    while (i--) uri[o.key[i]] = m[i] || "";
  
    uri[o.q.name] = {};
    uri[o.key[12]].replace(o.q.parser, function($0, $1, $2) {
      if ($1) uri[o.q.name][$1] = $2;
    });
  
    return uri;
  };
  
  parseUri.options = {
    strictMode: false,
    key: ["source", "protocol", "authority", "userInfo", "user", "password", "host", "port", "relative", "path", "directory", "file", "query", "anchor"],
    q: {
      name: "queryKey",
      parser: /(?:^|&)([^&=]*)=?([^&]*)/g
    },
    parser: {
      strict: /^(?:([^:\/?#]+):)?(?:\/\/((?:(([^:@]*)(?::([^:@]*))?)?@)?([^:\/?#]*)(?::(\d*))?))?((((?:[^?#\/]*\/)*)([^?#]*))(?:\?([^#]*))?(?:#(.*))?)/,
      loose: /^(?:(?![^:@]+:[^:@\/]*@)([^:\/?#.]+):)?(?:\/\/)?((?:(([^:@]*)(?::([^:@]*))?)?@)?([^:\/?#]*)(?::(\d*))?)(((\/(?:[^?#](?![^?#\/]*\.[^?#\/.]+(?:[?#]|$)))*\/?)?([^?#\/]*))(?:\?([^#]*))?(?:#(.*))?)/
    }
  };
  
  var persistentAnchor = document.createElement('a');
  
  function persistent_anchor(data) {
    persistentAnchor.href = data;
    return persistentAnchor.hostname;
  }
  
  // any_uri() would return empty string for "data:..." or
  // whatever uri doesn't have a hostname in it. The extra
  // IPv6 regex allows to not overcomplicate the first regex
  // being used to extract hostname, hence it allows
  // optimal performance when IPv6 addresses are rarely
  // seen.
  // Further improvement if caller is most likely to extract
  // hostname from run of the mill URL is to merely *test* for
  // only valid hostname characters. In case of failure, the
  // longer code path is taken in order to get rid of userinfo
  // and/or port info.
  var any_uri = (function() {
    var reAuthorityFromURI = /^(?:[^:\/?#]+:)?(\/\/[^\/?#]+)/;
    var reHostFromBareAuthority = /^[0-9a-z.-]+$/i;
    var reHostFromAuthority = /^(?:[^@]+@)?([0-9a-z.-]+)(?::\d*)?$/i;
    var reIPv6FromAuthority = /^(?:[^@]+@)?(\[[0-9a-f:]+\])(?::\d*)?$/i;
  
    return function(uri) {
      var matches = reAuthorityFromURI.exec(uri);
      if (!matches) {
        return '';
      }
      var authority = matches[1].slice(2);
      // Assume very simple authority (most likely in my case)
      if (reHostFromBareAuthority.test(authority)) {
        return authority.toLowerCase();
      }
      matches = reHostFromAuthority.exec(authority);
      if (!matches) {
        matches = reIPv6FromAuthority.exec(authority);
        if (!matches) {
          return '';
        }
      }
      return matches[1].toLowerCase();
    };
  })();

Test runner

Ready to run.

Testing in
TestOps/sec
parseUri
host = parseUri(link).host;
ready
persistent anchor
host = persistent_anchor(link);
ready
any_uri
host = any_uri(link);
ready

Revisions

You can edit these tests or add more tests to this page by appending /edit to the URL.