autodetect encoding for html file
Submitted by Tuesday, 15 November, 2005 - 09:27
on
in fact i have made one, if you think useful you can add this feature to new jedit versions.
it find text like "content="text/html; charset=xxxxxx"" at the begin of the html.
[code] BufferIORequest.java /** * Tries to detect if the stream is gzipped, and if it has an encoding * specified with an XML PI. */ private Reader autodetect(InputStream in) throws IOException { in = new BufferedInputStream(in); String encoding = buffer.getStringProperty(Buffer.ENCODING); if (!in.markSupported()) Log.log(Log.WARNING, this, "Mark not supported: " + in); else if (buffer.getBooleanProperty(Buffer.ENCODING_AUTODETECT)) { {// neoe add: detect html's encoding String enc = getHtmlEncoding(in); if (enc != null && MiscUtilities.isSupportedEncoding(enc)) { buffer.setProperty(Buffer.ENCODING, enc); return new InputStreamReader(in, enc); } } .... (original lines) /**add by neoedmund*/ private String getHtmlEncoding(InputStream in) throws IOException { String enc = null; String key = "charset="; int bufSize=1000; byte[] buf = new byte[bufSize]; in.mark(bufSize); int len; if ((len = in.read(buf,0,bufSize)) >0) { String line=new String(buf,0,len); int p1 = line.indexOf(key); if (p1 >= 0) { int p2 = p1 + key.length(); p1 += key.length(); if (line.charAt(p1) == '\'' || line.charAt(p1) == '"') { p1++; } while (p2 < line.length() && "'\" >;,.".indexOf(line.charAt(p2)) < 0) { p2++; } if (p2 <= line.length()) { enc = line.substring(p1, p2); } } } in.reset(); return enc; } [/code]