View Javadoc

1   /***
2    * Created on Nov 2, 2005, Copyright UC Regents
3    */
4   package wise2.converter.converters;
5   
6   import java.io.File;
7   import java.io.FileWriter;
8   import java.io.IOException;
9   import java.io.InputStream;
10  import java.net.URL;
11  import java.util.logging.Logger;
12  import java.util.regex.Matcher;
13  import java.util.regex.Pattern;
14  
15  import org.apache.commons.io.IOUtils;
16  import org.dom4j.Node;
17  import org.telscenter.pas.steps.AbstractUrlStep;
18  import org.telscenter.pas.steps.BrowseWeb;
19  
20  import wise2.converter.AbstractStepConverter;
21  import wise2.converter.util.HTMLUtils;
22  
23  /***
24   * @author turadg
25   *
26   */
27  public abstract class AbstractHtmlStepConverter extends AbstractStepConverter {
28  	private static final String CHARSET = "UTF-8";
29  	/***
30  	 * Logger for this class
31  	 */
32  	private static final Logger logger = Logger
33  			.getLogger(AbstractHtmlStepConverter.class.getName());
34  	private static int serialNumber;
35  
36  	protected void setUrlToIntroductionHtml(AbstractUrlStep urlstep) {
37  		Node htmlNode = stepNode.selectSingleNode("parameters/introHtml");
38  		String html = (htmlNode == null) ? "" : htmlNode.getText();
39  		html = HTMLUtils.tidyHtml(html);
40  		try {
41  			URL url = addHtmlToArchive(html);
42  			urlstep.setUrl(url);
43  		} catch (Exception e) {
44  			logger.severe("AbstractUrlStep -  : exception: " + e); //$NON-NLS-1$
45  		}
46  	}
47  	
48  	/***
49  	 * @param html
50  	 * @return url pointing to a file containing html stored in the pod archive
51  	 * @throws IOException 
52  	 */
53  	URL addHtmlToArchive(String html) throws IOException {
54  		html = resetDeclaredCharset(html, CHARSET);
55  		int htmlFileNumber = serialNumber++;
56  		String filename = htmlFileNumber+".html";
57  		InputStream source = IOUtils.toInputStream(html, CHARSET);
58  		return addToArchive(filename, source);
59  	}
60  
61  	protected void addHtmlToStep(String html, BrowseWeb step) {
62  
63  		html = HTMLUtils.tidyHtml(html, getStepTitle());
64  		
65  		// Check for "/upload/*" strings.  At least as image srcs these should be converted
66  		// to wise relative strings
67  		Pattern pattern = Pattern.compile("\"(/upload/[^\"]*)\"", Pattern.MULTILINE);
68  		Matcher m = pattern.matcher(html);
69  		StringBuffer sb = new StringBuffer();
70  		while(m.find()){
71  			m.appendReplacement(sb, "\"http://wise.berkeley.edu$1\"");
72  		}
73  		m.appendTail(sb);
74  		html = sb.toString();
75  
76  		// Save html files to an output folder if the following
77  		// property is set.  This is useful for debugging	
78  		String htmlOutputFolder = 
79  			System.getProperty("org.telscenter.html_output_folder");
80  		if(htmlOutputFolder != null){
81  			File folder = new File(htmlOutputFolder);
82  			folder.mkdirs();
83  			File stepFile= new File(folder, 
84  					getStepTitle() + ".html");
85  			FileWriter writer;
86  			try {
87  				writer = new FileWriter(stepFile);
88  				writer.write(html);
89  				writer.close();
90  			} catch (IOException e) {
91  				// TODO Auto-generated catch block
92  				e.printStackTrace();
93  			}
94  		}
95  		
96  		try {
97  			URL url = addHtmlToArchive(html);
98  			step.setUrl(url);
99  		} catch (Exception e) {
100 			logger.severe("exception: " + e); //$NON-NLS-1$
101 		}
102 	}
103 	
104 	private static String resetDeclaredCharset(String html, String charset) {
105         String patternStr = "charset=.+\"";
106         String replacementStr = "charset="+charset+"\"";
107 
108         // Compile regular expression
109         Pattern pattern = Pattern.compile(patternStr);
110 
111         // Replace all occurrences of pattern in input
112         Matcher matcher = pattern.matcher(html);
113         return matcher.replaceAll(replacementStr);
114 	}
115 
116 	/***
117 	 * @return introductionHtml parameter or empty string if absent
118 	 */
119 	String getIntroductionHtml() {
120 		try {
121 			
122 			String html = stepNode.selectSingleNode("parameters/introductionHtml")
123 					.getText();
124 			
125 			html = HTMLUtils.tidyHtml(html);
126 			
127 			return html;
128 		} catch (Exception e) {
129 			return "";
130 		}
131 	}
132 
133 }