首页 > 安全资讯 >

Java处理UTF-8带BOM的文本的读写

11-08-01

什么是BOMBOM(byte-order mark),即字节顺序标记,它是插入到以UTF-8、UTF16或UTF-32编码Unicode文件开头的特殊标记,用来识别Unicode文件的编码类型。对于UTF-8来说,BOM并不是必须的,因为BOM用来标记多字节...

 

什么是BOM

 

BOM(byte-order mark),即字节顺序标记,它是插入到以UTF-8、UTF16或UTF-32编码Unicode文件开头的特殊标记,用来识别Unicode文件的编码类型。对于UTF-8来说,BOM并不是必须的,因为BOM用来标记多字节编码文件的编码类型和字节顺序(big-endian或little- endian)。

 

BOMs 文件头:

   00 00 FE FF    = UTF-32, big-endian

   FF FE 00 00    = UTF-32, little-endian

   EF BB BF       = UTF-8,

   FE FF          = UTF-16, big-endian

   FF FE          = UTF-16, little-endian

 

 

下面举个例子,针对UTF-8的文件BOM做个处理:

 

String xmla = StringFileToolkit.file2String(new File(“D:\\projects\\mailpost\\src\\a.xml”),“UTF-8”);

 

byte[] b = xmla.getBytes(“UTF-8”);

 

String xml = new String(b,3,b.length-3,“UTF-8”);

 

..............

 

思路是:先按照UTF-8编码读取文件后,跳过前三个字符,重新构建一个新的字符串,然后用Dom4j解析处理,这样就不会报错了。

 

其他编码的方式处理思路类似,其实可以写一个通用的自动识别的BOM的工具,去掉BOM信息,返回字符串。

 

不过这个处理过程已经有牛人解决过了:http://koti.mbnet.fi/akini/java/unicodereader/

 

Java代码 

‍Example code using UnicodeReader class 

Here is an example method to read text file. It will recognize bom marker and skip it while reading.  

 

//import ‍http://koti.mbnet.fi/akini/java/unicodereader/UnicodeReader.java.txt 

   public static char[] loadFile(String file) throws IOException { 

      // read text file, auto recognize bom marker or use  

      // system default if markers not found. 

      BufferedReader reader = null; 

      CharArrayWriter writer = null; 

      UnicodeReader r = new UnicodeReader(new FileInputStream(file), null); 

   

      char[] buffer = new char[16 * 1024];   // 16k buffer 

      int read; 

      try { 

         reader = new BufferedReader(r); 

         writer = new CharArrayWriter(); 

         while( (read = reader.read(buffer)) != -1) { 

            writer.write(buffer, 0, read); 

         } 

         writer.flush(); 

         return writer.toCharArray(); 

      } catch (IOException ex) { 

         throw ex; 

      } finally { 

         try { 

            writer.close(); reader.close(); r.close(); 

         } catch (Exception ex) { } 

      } 

   } 

 

Java代码 

Example code to write UTF-8 with bom marker 

Write bom marker bytes to start of empty file and all proper text editors have no problems using a correct charset while reading files. Java's OutputStreamWriter does not write utf8 bom marker bytes.  

 

 

   public static void saveFile(String file, String data, boolean append) throws IOException { 

      BufferedWriter bw = null; 

      OutputStreamWriter osw = null; 

   

      File f = new File(file); 

      FileOutputStream fos = new FileOutputStream(f, append); 

      try { 

         // write UTF8 BOM mark if file is empty 

         if (f.length() < 1) { 

           final byte[] bom = new byte[] { (byte)0xEF, (byte)0xBB, (byte)0xBF }; 

            fos.write(bom); 

         } 

 

         osw = new OutputStreamWriter(fos, "UTF-8"); 

         bw = new BufferedWriter(osw); 

         if (data != null) bw.write(data); 

      } catch (IOException ex) { 

         throw ex; 

      } finally { 

         try { bw.close(); fos.close(); } catch (Exception ex) { } 

      } 

   } 

  

 

 

实际应用:

Java代码 

package com.dayo.gerber; 

 

import java.io.BufferedReader; 

import java.io.BufferedWriter; 

import java.io.File; 

import java.io.FileInputStream; 

import java.io.FileOutputStream; 

import java.io.IOException; 

import java.io.InputStream; 

import java.io.InputStreamReader; 

import java.io.OutputStreamWriter; 

import java.io.Reader; 

import java.util.Properties; 

 

/**

 * 

 * @author 刘飞(liufei)

 * 

 */ 

public class Generate4YYQTPScript { 

 

    private static final String ENCODING = "UTF-8"; 

    private static final String GERBER_CONFIG = "config/gerber4yy.properties"; 

 

    private static Properties GERBER_CONFIG_PROPS = null; 

    private static final String GERBER_FORMAT_DIALOG_TITLE_SCRIPT = "{#GERBER_FORMAT_DIALOG_TITLE}"; 

    private static String GERBER_FORMAT_DIALOG_TITLE = ""; 

 

    /* gerber properties parmters keys config */ 

    private static final String QTP_SCRIPT_IN = "script.in"; 

 

    private static final String QTP_SCRIPT_OUT = "script.out"; 

 

    private static final String QTP_SYSTEM_PATH = "QTP.system.path"; 

    private static final String QTP_SYSTEM_PATH_SCRIPT = "{#QTPSYSTEMPATH}"; 

 

    private static final String GERBER_FILE_DRIVER_PATH = "gerber.file.driver.path"; 

    private static final String GERBER_FILE_DRIVER_PATH_SCRIPT = "{#driver}"; 

 

    private static final String GERBER_FILE_DRIVER = "gerber.file.driver"; 

    private static final String GERBER_FILE_DRIVER_SCRIPT = "{#dr}"; 

 

    private static final String GERBER_FILE_DIR = "gerber.file.dir"; 

    private static final String GERBER_FILE_DIR_SCRIPT = "{#dirName}"; 

 

    private static final String GERBER_FILE = "gerber.file"; 

    private static final String GERBER_FILE_SCRIPT = "{#fileName}"; 

 

    private static final String GERBER_OUT = "gerber.out"; 

    private static final String GERBER_OUT_SCRIPT = "{#gerberout}"; 

 

    private static final String VB_EXE_PATH = "vb.exe.path"; 

 

    /* bigBoard props */ 

    private static final String LEAGUE_BOARD_NUM_SCRIPT = "{#LEAGUE_BOARD_NUM}"; 

    private static final String WIDTH_SCRIPT = "{#WIDTH}"; 

    private static final String P_SCRIPT = "{#P}" ; 

    private static final String DY_SCRIPT = "{#DY}"; 

 

    private Properties BIGBOARD_PROPS = null; 

 

    public Generate4YYQTPScript(Properties bigboard_props) { 

        super(); 

        BIGBOARD_PROPS = bigboard_props; 

 

        try { 

            GERBER_CONFIG_PROPS = ConfigHelper 

                    .getConfigProperties(GERBER_CONFIG); 

            GERBER_FORMAT_DIALOG_TITLE = GERBER_CONFIG_PROPS.getProperty( 

                    GERBER_FILE_DRIVER).trim().toUpperCase() 

                    + "\\" 

                    + GERBER_CONFIG_PROPS.getProperty(GERBER_FILE_DIR).trim() 

                            .toUpperCase() 

                    + "\\" 

                    + GERBER_CONFIG_PROPS.getProperty(GERBER_FILE).trim() 

                            .toUpperCase(); 

            GERBER_FORMAT_DIALOG_TITLE = GERBER_FORMAT_DIALOG_TITLE.substring(0, 17) ; 

        } catch (IOException e) { 

            e.printStackTrace(); 

        } 

    } 

 

    public static void main(String[] args) throws IOException { 

        Properties bigboard_props = new Properties() ; 

        bigboard_props.setProperty("{#LEAGUE_BOARD_NUM}", String.valueOf(4)) ; 

        bigboard_props.setProperty("{#WIDTH}", String.valueOf(new Double("54"))) ; 

        bigboard_props.setProperty("{#P}", String.valueOf(new Double("2"))) ; 

        bigboard_props.setProperty("{#DY}", String.valueOf(new Double("0.00"))) ; 

         

        Generate4YYQTPScript generateQTPScript = new Generate4YYQTPScript(bigboard_props); 

        generateQTPScript.generateQTPScript(); 

//      RuntimeUtil.getInstance().run(generateQTPScript.getVBEXE(), 1, 50000); 

    } 

 

    public String getCheckOutFilePath() { 

        return GERBER_CONFIG_PROPS.getProperty(GERBER_FILE_DRIVER).trim() + "/" 

                + GERBER_CONFIG_PROPS.getProperty(GERBER_FILE_DIR).trim(); 

    } 

 

    public String getSavePath() { 

        return GERBER_CONFIG_PROPS.getProperty(GERBER_OUT); 

    } 

 

    public String getVBEXE() { 

        return GERBER_CONFIG_PROPS.getProperty(VB_EXE_PATH); 

    } 

 

    /**

     * Generate QTP Script

     * 

     * @return

     * @throws IOException

     */ 

    public File generateQTPScript() throws IOException { 

        return generateQTPScript(GERBER_CONFIG_PROPS 

                .getProperty(QTP_SCRIPT_OUT), GERBER_CONFIG_PROPS 

                .getProperty(QTP_SCRIPT_IN)); 

    } 

 

    /**

     * set value to script

     * 

     * @param source

     * @return

     * @throws IOException

     */ 

    private String scriptConvey(String source) throws IOException { 

        String _source = source; 

        _source = this.replace(this.replace(this.replace( 

                this.replace(this.replace(this.replace(this.replace( 

                         

                        _source 

                        , 

                        GERBER_FORMAT_DIALOG_TITLE_SCRIPT, 

                        GERBER_FORMAT_DIALOG_TITLE), GERBER_FILE_SCRIPT, 

                        GERBER_CONFIG_PROPS.getProperty(GERBER_FILE)), 

                        GERBER_FILE_DRIVER_SCRIPT, GERBER_CONFIG_PROPS 

                                .getProperty(GERBER_FILE_DRIVER)), 

                        GERBER_OUT_SCRIPT, GERBER_CONFIG_PROPS 

                                .getProperty(GERBER_OUT)), 

                GERBER_FILE_DIR_SCRIPT, GERBER_CONFIG_PROPS 

                        .getProperty(GERBER_FILE_DIR)), 

                GERBER_FILE_DRIVER_PATH_SCRIPT, GERBER_CONFIG_PROPS 

                        .getProperty(GERBER_FILE_DRIVER_PATH)), 

                QTP_SYSTEM_PATH_SCRIPT, GERBER_CONFIG_PROPS 

                        .getProperty(QTP_SYSTEM_PATH)); 

 

        if (this.BIGBOARD_PROPS != null) { 

            _source = this.replace(this.replace(this.replace( 

                     

                    _source 

                     

                    , 

                    DY_SCRIPT, this.BIGBOARD_PROPS.getProperty(DY_SCRIPT)), 

                    WIDTH_SCRIPT, this.BIGBOARD_PROPS 

                            .getProperty(WIDTH_SCRIPT)), 

                    LEAGUE_BOARD_NUM_SCRIPT, this.BIGBOARD_PROPS 

                            .getProperty(LEAGUE_BOARD_NUM_SCRIPT)); 

             

            _source = this.replace(_source, P_SCRIPT, this.BIGBOARD_PROPS.getProperty(P_SCRIPT)) ; 

        } 

 

        return _source; 

    } 

 

    /**

     * Generate QTP Script

     * 

     * @param target

     *            target file

     * @param source

     *            source file

     * @throws IOException

     */ 

    public File generateQTPScript(File target, File source) throws IOException { 

        return generateQTPScript(target.getAbsolutePath(), source 

                .getAbsolutePath()); 

    } 

 

    /**

     * Generate QTP Script

     * 

     * @param target

     *            target file path

     * @param source

     *            source file path

     * @return

     * @throws IOException

     */ 

    public File generateQTPScript(String target, String source) 

            throws IOException { 

        File f = new File(target); 

        if (!f.exists()) { 

            f.getParentFile().mkdirs(); 

            try { 

                f.createNewFile(); 

            } catch (Exception e) { 

            } 

        } 

        FileOutputStream fos = null; 

        OutputStreamWriter osw = null; 

        BufferedWriter bw = null; 

        try { 

            final byte[] bom = new byte[] { (byte)0xEF, (byte)0xBB, (byte)0xBF }; 

            fos = new FileOutputStream(f); 

            osw = new OutputStreamWriter(fos, ENCODING); 

            bw = new BufferedWriter(osw); 

            fos.write(bom); 

            bw.write(scriptConvey(getSourceFileContentReader(source))); 

 

            bw.flush(); 

            bw.close(); 

            return f; 

        } catch (IOException e) { 

            throw e; 

        } 

    } 

 

    /**

     * Reader convey to string

     * 

     * @param source

     * @return

     * @throws IOException

     */ 

    private String reader2String(Reader source) throws IOException { 

        BufferedReader bufferedReader = new BufferedReader(source);  

        StringBuffer result = new StringBuffer(); 

        String buffer = null; 

        while ((buffer = bufferedReader.readLine()) != null) { 

            result.append(buffer + "\n"); 

        } 

        return result.toString(); 

    } 

 

    /**

     * 

     * @param source

     *            file path

     * @return

     * @throws IOException

     */ 

    private Reader getReader(String source) throws IOException { 

        return source == "" ? null : new BufferedReader(new InputStreamReader( 

                getInputStream(source))); 

    } 

 

    /**

     * get script file content string

     * 

     * @param source

     * @return

     * @throws IOException

     */ 

    private String getSourceFileContentReader(String source) throws IOException { 

        return source == "" ? "" : reader2String(getReader(source)); 

    } 

 

    /**

     * get inputstream

     * 

     * @param source

     *            file path

     * @return

     * @throws IOException

     */ 

    private InputStream getInputStream(String source) throws IOException { 

        return source == "" ? null : new FileInputStream(new File(source)); 

    } 

 

    /**

     * Replace all occurences of a substring within a string with another

     * string.

     * 

     * @param inString

     *            String to examine

     * @param oldPattern

     *            String to replace

     * @param newPattern

     *            String to insert

     * @return a String with the replacements

     */ 

    private String replace(String inString, String oldPattern, String newPattern) { 

        if (!hasLength(inString) || !hasLength(oldPattern) 

                || newPattern == null) { 

            return inString; 

        } 

        StringBuilder sb = new StringBuilder(); 

        int pos = 0; 

        int index = inString.indexOf(oldPattern); 

        int patLen = oldPattern.length(); 

        while (index >= 0) { 

            sb.append(inString.substring(pos, index)); 

            sb.append(newPattern); 

            pos = index + patLen; 

            index = inString.indexOf(oldPattern, pos); 

        } 

        sb.append(inString.substring(pos)); 

        return sb.toString(); 

    } 

 

    private boolean hasLength(String str) { 

        return hasLength((CharSequence) str); 

    } 

 

    private boolean hasLength(CharSequence str) { 

        return (str != null && str.length() > 0); 

    } 

}   

相关文章
最新文章
热点推荐