Microsoft Word creates awful HTML code. If you need to use it on a web page (especially in an HTML editor), you will want to clean it up a bit. Here’s a Javascript function that will take a string of text copied from Microsoft Word, and return it minus all the extraneous formatting that Word adds:

function cleanWordContent(wordContent)
{

        wordDiv = document.createElement(“DIV”);
 wordDiv.innerHTML = wordContent;

 for (var i=0;i {
  wordDiv.all[i].removeAttribute(“className”,”",0);
  wordDiv.all[i].removeAttribute(“style”,”",0);
 }
 wordContent = wordDiv.innerHTML;
 wordContent = String(wordContent).replace(/<\\?\?xml[^>]*>/g,”");
 wordContent = String(wordContent).replace(/<\/?o:p[^>]*>/g,”");
 wordContent = String(wordContent).replace(/<\/?v:[^>]*>/g,”");
 wordContent = String(wordContent).replace(/<\/?o:[^>]*>/g,”");
 wordContent = String(wordContent).replace(/ /g,”");//

 

 wordContent = String(wordContent).replace(/<\/?SPAN[^>]*>/g,”");
 wordContent = String(wordContent).replace(/<\/?FONT[^>]*>/g,”");
 wordContent = String(wordContent).replace(/<\/?STRONG[^>]*>/g,”");
 wordContent = String(wordContent).replace(/<\/?P[^>]*><\/P>/g,”");
 wordContent = String(wordContent).replace(/<\/?H1[^>]*>/g,”");
 wordContent = String(wordContent).replace(/<\/?H2[^>]*>/g,”");
 wordContent = String(wordContent).replace(/<\/?H3[^>]*>/g,”");
 wordContent = String(wordContent).replace(/<\/?H4[^>]*>/g,”");
 wordContent = String(wordContent).replace(/<\/?H5[^>]*>/g,”");
 wordContent = String(wordContent).replace(/<\/?H6[^>]*>/g,”");

 return(wordContent);

}

 

Leave a Reply

(required)

(required)

You may use these HTML tags and attributes: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <strike> <strong>

© 2012 TechBubble Suffusion theme by Sayontan Sinha