| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184 | /* * to-markdown - an HTML to Markdown converter * * Copyright 2011, Dom Christie * Licenced under the MIT licence * */var toMarkdown = function(string) {  var ELEMENTS = [    {      patterns: 'p',      replacement: function(str, attrs, innerHTML) {        return innerHTML ? '\n\n' + innerHTML + '\n' : '';      }    },    {      patterns: 'br',      type: 'void',      replacement: '\n'    },    {      patterns: 'h([1-6])',      replacement: function(str, hLevel, attrs, innerHTML) {        var hPrefix = '';        for(var i = 0; i < hLevel; i++) {          hPrefix += '#';        }        return '\n\n' + hPrefix + ' ' + innerHTML + '\n';      }    },    {      patterns: 'hr',      type: 'void',      replacement: '\n\n* * *\n'    },    {      patterns: 'a',      replacement: function(str, attrs, innerHTML) {        var href = attrs.match(attrRegExp('href')),            title = attrs.match(attrRegExp('title'));        return href ? '[' + innerHTML + ']' + '(' + href[1] + (title && title[1] ? ' "' + title[1] + '"' : '') + ')' : str;      }    },    {      patterns: ['b', 'strong'],      replacement: function(str, attrs, innerHTML) {        return innerHTML ? '**' + innerHTML + '**' : '';      }    },    {      patterns: ['i', 'em'],      replacement: function(str, attrs, innerHTML) {        return innerHTML ? '_' + innerHTML + '_' : '';      }    },    {      patterns: 'code',      replacement: function(str, attrs, innerHTML) {        return innerHTML ? '`' + innerHTML + '`' : '';      }    },    {      patterns: 'img',      type: 'void',      replacement: function(str, attrs, innerHTML) {        var src = attrs.match(attrRegExp('src')),            alt = attrs.match(attrRegExp('alt')),            title = attrs.match(attrRegExp('title'));        return '![' + (alt && alt[1] ? alt[1] : '') + ']' + '(' + src[1] + (title && title[1] ? ' "' + title[1] + '"' : '') + ')';      }    }  ];  for(var i = 0, len = ELEMENTS.length; i < len; i++) {    if(typeof ELEMENTS[i].patterns === 'string') {      string = replaceEls(string, { tag: ELEMENTS[i].patterns, replacement: ELEMENTS[i].replacement, type:  ELEMENTS[i].type });    }    else {      for(var j = 0, pLen = ELEMENTS[i].patterns.length; j < pLen; j++) {        string = replaceEls(string, { tag: ELEMENTS[i].patterns[j], replacement: ELEMENTS[i].replacement, type:  ELEMENTS[i].type });      }    }  }  function replaceEls(html, elProperties) {    var pattern = elProperties.type === 'void' ? '<' + elProperties.tag + '\\b([^>]*)\\/?>' : '<' + elProperties.tag + '\\b([^>]*)>([\\s\\S]*?)<\\/' + elProperties.tag + '>',        regex = new RegExp(pattern, 'gi'),        markdown = '';    if(typeof elProperties.replacement === 'string') {      markdown = html.replace(regex, elProperties.replacement);    }    else {      markdown = html.replace(regex, function(str, p1, p2, p3) {        return elProperties.replacement.call(this, str, p1, p2, p3);      });    }    return markdown;  }  function attrRegExp(attr) {    return new RegExp(attr + '\\s*=\\s*["\']?([^"\']*)["\']?', 'i');  }  // Pre code blocks  string = string.replace(/<pre\b[^>]*>`([\s\S]*)`<\/pre>/gi, function(str, innerHTML) {    innerHTML = innerHTML.replace(/^\t+/g, '  '); // convert tabs to spaces (you know it makes sense)    innerHTML = innerHTML.replace(/\n/g, '\n    ');    return '\n\n    ' + innerHTML + '\n';  });  // Lists  // Escape numbers that could trigger an ol  // If there are more than three spaces before the code, it would be in a pre tag  // Make sure we are escaping the period not matching any character  string = string.replace(/^(\s{0,3}\d+)\. /g, '$1\\. ');  // Converts lists that have no child lists (of same type) first, then works it's way up  var noChildrenRegex = /<(ul|ol)\b[^>]*>(?:(?!<ul|<ol)[\s\S])*?<\/\1>/gi;  while(string.match(noChildrenRegex)) {    string = string.replace(noChildrenRegex, function(str) {      return replaceLists(str);    });  }  function replaceLists(html) {    html = html.replace(/<(ul|ol)\b[^>]*>([\s\S]*?)<\/\1>/gi, function(str, listType, innerHTML) {      var lis = innerHTML.split('</li>');      lis.splice(lis.length - 1, 1);      for(i = 0, len = lis.length; i < len; i++) {        if(lis[i]) {          var prefix = (listType === 'ol') ? (i + 1) + ".  " : "*   ";          lis[i] = lis[i].replace(/\s*<li[^>]*>([\s\S]*)/i, function(str, innerHTML) {            innerHTML = innerHTML.replace(/^\s+/, '');            innerHTML = innerHTML.replace(/\n\n/g, '\n\n    ');            // indent nested lists            innerHTML = innerHTML.replace(/\n([ ]*)+(\*|\d+\.) /g, '\n$1    $2 ');            return prefix + innerHTML;          });        }      }      return lis.join('\n');    });    return '\n\n' + html.replace(/[ \t]+\n|\s+$/g, '');  }  // Blockquotes  var deepest = /<blockquote\b[^>]*>((?:(?!<blockquote)[\s\S])*?)<\/blockquote>/gi;  while(string.match(deepest)) {    string = string.replace(deepest, function(str) {      return replaceBlockquotes(str);    });  }  function replaceBlockquotes(html) {    html = html.replace(/<blockquote\b[^>]*>([\s\S]*?)<\/blockquote>/gi, function(str, inner) {      inner = inner.replace(/^\s+|\s+$/g, '');      inner = cleanUp(inner);      inner = inner.replace(/^/gm, '> ');      inner = inner.replace(/^(>([ \t]{2,}>)+)/gm, '> >');      return inner;    });    return html;  }  function cleanUp(string) {    string = string.replace(/^[\t\r\n]+|[\t\r\n]+$/g, ''); // trim leading/trailing whitespace    string = string.replace(/\n\s+\n/g, '\n\n');    string = string.replace(/\n{3,}/g, '\n\n'); // limit consecutive linebreaks to 2    return string;  }  return cleanUp(string);};if (typeof exports === 'object') {  exports.toMarkdown = toMarkdown;}
 |