好得很程序员自学网

<tfoot draggable='sEl'></tfoot>

基于HttpClient上传文件中文名乱码的解决

现象

使用HttpClient工具上传文件时,如果文件名是中文,文件名会乱码

文件名乱码的代码:

?

1

2

3

4

5

6

7

8

9

10

11

12

private HttpEntity buildEntity(Long scenarioId, List<String> groupIds, String extension,File fileToUpload) {

          MultipartEntityBuilder builder = MultipartEntityBuilder.create();

          builder.addTextBody( "scenarioId" , scenarioId.toString());

          for (String groupId : groupIds) {

              builder.addTextBody( "groupIds" , groupId);

          }

          builder.addTextBody( "extension" , extension);

          builder.addPart( "fileToUpload" , new FileBody(fileToUpload));

          builder.addTextBody( "type" , AssetFileTypeEnum.CSV.getName());

          builder.addTextBody( "isSplit" , "false" );

          builder.addTextBody( "isRefresh" , "false" );

          return builder.build();

乱码原因:

HttpClient上传文件时,会调用doWriteTo方法,写一个输出流,但是在调用formatMultipartHeader方法时,底层主要有3种不同的实现,3种方式的采用的字符集不一样

HttpClient中的doWriteTo方法:

?

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

void doWriteTo(

       final OutputStream out,

       final boolean writeContent) throws IOException {

       final ByteArrayBuffer boundaryEncoded = encode( this .charset, this .boundary);

       for ( final FormBodyPart part: getBodyParts()) {

           writeBytes(TWO_DASHES, out);

           writeBytes(boundaryEncoded, out);

           writeBytes(CR_LF, out);

           //此处代码主要有3种不同的实现,不同的mode,实现方式不一样,采用的字符集也不同

           formatMultipartHeader(part, out);

           writeBytes(CR_LF, out);

           if (writeContent) {

               part.getBody().writeTo(out);

           }

           writeBytes(CR_LF, out);

       }

       writeBytes(TWO_DASHES, out);

       writeBytes(boundaryEncoded, out);

       writeBytes(TWO_DASHES, out);

       writeBytes(CR_LF, out);

   }

其中的formatMultipartHeader方法,不同的模式有不同的实现方式

MultipartEntityBuilder

?

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

MultipartFormEntity buildEntity() {

     String boundaryCopy = boundary;

     if (boundaryCopy == null && contentType != null ) {

         boundaryCopy = contentType.getParameter( "boundary" );

     }

     if (boundaryCopy == null ) {

         boundaryCopy = generateBoundary();

     }

     Charset charsetCopy = charset;

     if (charsetCopy == null && contentType != null ) {

         charsetCopy = contentType.getCharset();

     }

     final List<NameValuePair> paramsList = new ArrayList<NameValuePair>( 2 );

     paramsList.add( new BasicNameValuePair( "boundary" , boundaryCopy));

     if (charsetCopy != null ) {

         paramsList.add( new BasicNameValuePair( "charset" , charsetCopy.name()));

     }

     final NameValuePair[] params = paramsList.toArray( new NameValuePair[paramsList.size()]);

     final ContentType contentTypeCopy = contentType != null ?

             contentType.withParameters(params) :

             ContentType.create( "multipart/" + DEFAULT_SUBTYPE, params);

     final List<FormBodyPart> bodyPartsCopy = bodyParts != null ? new ArrayList<FormBodyPart>(bodyParts) :

             Collections.<FormBodyPart>emptyList();

     //此处将mode赋值给modeCopy

     final HttpMultipartMode modeCopy = mode != null ? mode : HttpMultipartMode.STRICT;

     final AbstractMultipartForm form;

     //此处根据modeCopy的值不同,构造3种form,每种的字符集都不一样,也是产生乱码的根源

     switch (modeCopy) {

         case BROWSER_COMPATIBLE:

             form = new HttpBrowserCompatibleMultipart(charsetCopy, boundaryCopy, bodyPartsCopy);

             break ;

         case RFC6532:

             form = new HttpRFC6532Multipart(charsetCopy, boundaryCopy, bodyPartsCopy);

             break ;

         default :

             form = new HttpStrictMultipart(charsetCopy, boundaryCopy, bodyPartsCopy);

     }

     return new MultipartFormEntity(form, contentTypeCopy, form.getTotalLength());

}

public HttpEntity build() {

     return buildEntity();

}

BROWSER_COMPATIBLE模式中的formatMultipartHeader方法

?

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

class HttpBrowserCompatibleMultipart extends AbstractMultipartForm {

     private final List<FormBodyPart> parts;

     public HttpBrowserCompatibleMultipart(

             final Charset charset,

             final String boundary,

             final List<FormBodyPart> parts) {

         super (charset, boundary);

         this .parts = parts;

     }

     @Override

     public List<FormBodyPart> getBodyParts() {

         return this .parts;

     }

     /**

       * Write the multipart header fields; depends on the style.

       */

     @Override

     protected void formatMultipartHeader(

             final FormBodyPart part,

             final OutputStream out) throws IOException {

         // For browser-compatible, only write Content-Disposition

         // Use content charset

         final Header header = part.getHeader();

         final MinimalField cd = header.getField(MIME.CONTENT_DISPOSITION);

         //可以看到此处的字符集采用的是设置的字符集

         writeField(cd, this .charset, out);

         final String filename = part.getBody().getFilename();

         if (filename != null ) {

             final MinimalField ct = header.getField(MIME.CONTENT_TYPE);

             //可以看到此处的字符集采用的也是设置的字符集

             writeField(ct, this .charset, out);

         }

     }

}

RFC6532模式中的formatMultipartHeader方法

?

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

class HttpRFC6532Multipart extends AbstractMultipartForm {

     private final List<FormBodyPart> parts;

     public HttpRFC6532Multipart(

             final Charset charset,

             final String boundary,

             final List<FormBodyPart> parts) {

         super (charset, boundary);

         this .parts = parts;

     }

     @Override

     public List<FormBodyPart> getBodyParts() {

         return this .parts;

     }

     @Override

     protected void formatMultipartHeader(

         final FormBodyPart part,

         final OutputStream out) throws IOException {

         // For RFC6532, we output all fields with UTF-8 encoding.

         final Header header = part.getHeader();

         for ( final MinimalField field: header) {

             //可以看到此处的字符集默认采用UTF8

             writeField(field, MIME.UTF8_CHARSET, out);

         }

     }

}

默认模式中的formatMultipartHeader方法

?

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

class HttpStrictMultipart extends AbstractMultipartForm {

     private final List<FormBodyPart> parts;

     public HttpStrictMultipart(

             final Charset charset,

             final String boundary,

             final List<FormBodyPart> parts) {

         super (charset, boundary);

         this .parts = parts;

     }

     @Override

     public List<FormBodyPart> getBodyParts() {

         return this .parts;

     }

     @Override

     protected void formatMultipartHeader(

         final FormBodyPart part,

         final OutputStream out) throws IOException {

         // For strict, we output all fields with MIME-standard encoding.

         //从上面注释中可以看到,此处的字符集采用的是默认字符集即ASCII(下面MIME类中可以看到)

         final Header header = part.getHeader();

         for ( final MinimalField field: header) {

             writeField(field, out);

         }

     }

}

MIME类

?

1

2

3

4

5

6

7

8

9

10

11

public final class MIME {

     public static final String CONTENT_TYPE          = "Content-Type" ;

     public static final String CONTENT_TRANSFER_ENC  = "Content-Transfer-Encoding" ;

     public static final String CONTENT_DISPOSITION   = "Content-Disposition" ;

     public static final String ENC_8BIT              = "8bit" ;

     public static final String ENC_BINARY            = "binary" ;

     /** The default character set to be used, i.e. "US-ASCII" */

     public static final Charset DEFAULT_CHARSET      = Consts.ASCII;

     /** UTF-8 is used for RFC6532 */

     public static final Charset UTF8_CHARSET         = Consts.UTF_8;

}

解决方法

知道乱码产生的根源,乱码问题也就好解决了,解决方式有两种

设置mode为:BROWSER_COMPATIBLE,并设置字符集为UTF8

?

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

private HttpEntity buildEntity(Long scenarioId, List<String> groupIds, String extension,

                                    File fileToUpload) {

         MultipartEntityBuilder builder = MultipartEntityBuilder.create();

         //设置模式为BROWSER_COMPATIBLE,并设置字符集为UTF8

         builder.setMode(HttpMultipartMode.BROWSER_COMPATIBLE);

         builder.setCharset(Charset.forName( "UTF-8" ));

         builder.addTextBody( "scenarioId" , scenarioId.toString());

         for (String groupId : groupIds) {

             builder.addTextBody( "groupIds" , groupId);

         }

         builder.addTextBody( "extension" , extension);

         builder.addPart( "fileToUpload" , new FileBody(fileToUpload));

         builder.addTextBody( "type" , AssetFileTypeEnum.CSV.getName());

         builder.addTextBody( "isSplit" , "false" );

         builder.addTextBody( "isRefresh" , "false" );

         return builder.build();

     }

设置模式为:RFC6532

?

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

private HttpEntity buildEntity(Long scenarioId, List<String> groupIds, String extension,

                                File fileToUpload) {

     MultipartEntityBuilder builder = MultipartEntityBuilder.create();

     //设置模式为RFC6532

     builder.setMode(HttpMultipartMode.RFC6532);

     builder.addTextBody( "scenarioId" , scenarioId.toString());

     for (String groupId : groupIds) {

         builder.addTextBody( "groupIds" , groupId);

     }

     builder.addTextBody( "extension" , extension);

     builder.addPart( "fileToUpload" , new FileBody(fileToUpload));

     builder.addTextBody( "type" , AssetFileTypeEnum.CSV.getName());

     builder.addTextBody( "isSplit" , "false" );

     builder.addTextBody( "isRefresh" , "false" );

     return builder.build();

}

以上为个人经验,希望能给大家一个参考,也希望大家多多支持。

原文链接:https://blog.csdn.net/youshounianhua123/article/details/81100778

查看更多关于基于HttpClient上传文件中文名乱码的解决的详细内容...

  阅读:14次