@@ -58,6 +58,7 @@ public final class PythonFileDetector implements TruffleFile.FileTypeDetector {
5858
5959 private static final String UTF_8_BOM_IN_LATIN_1 = new String (new byte []{(byte ) 0xEF , (byte ) 0xBB , (byte ) 0xBF }, StandardCharsets .ISO_8859_1 );
6060 private static final Pattern ENCODING_COMMENT = Pattern .compile ("^[ \t \f ]*#.*?coding[:=][ \t ]*([-_.a-zA-Z0-9]+).*" );
61+ private static final Pattern BLANK_LINE = Pattern .compile ("^[ \t \f ]*(?:#.*)?" );
6162
6263 @ Override
6364 public String findMimeType (TruffleFile file ) throws IOException {
@@ -106,19 +107,23 @@ private static Charset tryGetCharsetFromLine(String line, boolean hasBOM) {
106107
107108 @ TruffleBoundary
108109 public static Charset findEncodingStrict (BufferedReader reader ) throws IOException {
109- Charset charset ;
110110 // Read first two lines like CPython
111111 String firstLine = reader .readLine ();
112- boolean hasBOM = false ;
113- if (firstLine != null && firstLine .startsWith (UTF_8_BOM_IN_LATIN_1 )) {
114- hasBOM = true ;
115- firstLine = firstLine .substring (UTF_8_BOM_IN_LATIN_1 .length ());
116- }
117- if ((charset = tryGetCharsetFromLine (firstLine , hasBOM )) != null ) {
118- return charset ;
119- }
120- if ((charset = tryGetCharsetFromLine (reader .readLine (), hasBOM )) != null ) {
121- return charset ;
112+ if (firstLine != null ) {
113+ boolean hasBOM = false ;
114+ if (firstLine .startsWith (UTF_8_BOM_IN_LATIN_1 )) {
115+ hasBOM = true ;
116+ firstLine = firstLine .substring (UTF_8_BOM_IN_LATIN_1 .length ());
117+ }
118+ Charset charset ;
119+ if ((charset = tryGetCharsetFromLine (firstLine , hasBOM )) != null ) {
120+ return charset ;
121+ }
122+ if (BLANK_LINE .matcher (firstLine ).matches ()) {
123+ if ((charset = tryGetCharsetFromLine (reader .readLine (), hasBOM )) != null ) {
124+ return charset ;
125+ }
126+ }
122127 }
123128 return StandardCharsets .UTF_8 ;
124129 }
0 commit comments