autodetect encoding for html file
Submitted by on Tuesday, 15 November, 2005 - 09:27
in fact i have made one, if you think useful you can add this feature to new jedit versions.
it find text like "content="text/html; charset=xxxxxx"" at the begin of the html.
[code]
BufferIORequest.java
/**
* Tries to detect if the stream is gzipped, and if it has an encoding
* specified with an XML PI.
*/
private Reader autodetect(InputStream in) throws IOException {
in = new BufferedInputStream(in);
String encoding = buffer.getStringProperty(Buffer.ENCODING);
if (!in.markSupported())
Log.log(Log.WARNING, this, "Mark not supported: " + in);
else if (buffer.getBooleanProperty(Buffer.ENCODING_AUTODETECT)) {
{// neoe add: detect html's encoding
String enc = getHtmlEncoding(in);
if (enc != null && MiscUtilities.isSupportedEncoding(enc)) {
buffer.setProperty(Buffer.ENCODING, enc);
return new InputStreamReader(in, enc);
}
}
....
(original lines)
/**add by neoedmund*/
private String getHtmlEncoding(InputStream in) throws IOException {
String enc = null;
String key = "charset=";
int bufSize=1000;
byte[] buf = new byte[bufSize];
in.mark(bufSize);
int len;
if ((len = in.read(buf,0,bufSize)) >0) {
String line=new String(buf,0,len);
int p1 = line.indexOf(key);
if (p1 >= 0) {
int p2 = p1 + key.length();
p1 += key.length();
if (line.charAt(p1) == '\'' || line.charAt(p1) == '"') {
p1++;
}
while (p2 < line.length()
&& "'\" >;,.".indexOf(line.charAt(p2)) < 0) {
p2++;
}
if (p2 <= line.length()) {
enc = line.substring(p1, p2);
}
}
}
in.reset();
return enc;
}
[/code]

