@@ -108,7 +108,6 @@ def _parse(self, stream, innerHTML=False, container="div",
108108 # We only seem to have InBodyPhase testcases where the following is
109109 # relevant ... need others too
110110 self .lastPhase = None
111-
112111 self .beforeRCDataPhase = None
113112
114113 CharactersToken = tokenTypes ["Characters" ]
@@ -120,6 +119,8 @@ def _parse(self, stream, innerHTML=False, container="div",
120119
121120
122121 for token in self .normalizedTokens ():
122+ #print self.phase.__class__.__name__
123+ #print token
123124 type = token ["type" ]
124125 if type == CharactersToken :
125126 self .phase .processCharacters (token )
@@ -271,18 +272,6 @@ def __init__(self, parser, tree):
271272
272273 def processEOF (self ):
273274 raise NotImplementedError
274- self .tree .generateImpliedEndTags ()
275- if len (self .tree .openElements ) > 2 :
276- self .parser .parseError ("expected-closing-tag-but-got-eof" )
277- elif len (self .tree .openElements ) == 2 and \
278- self .tree .openElements [1 ].name != "body" :
279- # This happens for framesets or something?
280- self .parser .parseError ("expected-closing-tag-but-got-eof" )
281- elif self .parser .innerHTML and len (self .tree .openElements ) > 1 :
282- # XXX This is not what the specification says. Not sure what to do
283- # here.
284- self .parser .parseError ("eof-in-innerhtml" )
285- # Betting ends.
286275
287276 def processComment (self , token ):
288277 # For most phases the following is correct. Where it's not it will be
@@ -318,7 +307,7 @@ class InitialPhase(Phase):
318307 # this.
319308 def processEOF (self ):
320309 self .parser .parseError ("expected-doctype-but-got-eof" )
321- self .compatMode = "quirks"
310+ self .parser . compatMode = "quirks"
322311 self .parser .phase = self .parser .phases ["beforeHtml" ]
323312 self .parser .phase .processEOF ()
324313
@@ -346,8 +335,9 @@ def processDoctype(self, token):
346335 if publicId != "" :
347336 publicId = publicId .translate (asciiUpper2Lower )
348337
349- if (not correct or token ["name" ] != "html"
350- or publicId in
338+
339+ if ((not correct ) or nameLower != "html"
340+ or publicId in
351341 ("+//silmaril//dtd html pro v0r11 19970101//en" ,
352342 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//en" ,
353343 "-//as//dtd html 3.0 aswedit + extensions//en" ,
@@ -419,19 +409,18 @@ def processDoctype(self, token):
419409 "html" )
420410 or (publicId in
421411 ("-//w3c//dtd html 4.01 frameset//EN" ,
422- "-//w3c//dtd html 4.01 transitional//EN" ) and
423- systemId == None )
412+ "-//w3c//dtd html 4.01 transitional//EN" ) and systemId == None )
424413 or (systemId != None and
425- systemId == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" )):
426- self .compatMode = "quirks"
414+ systemId ==
415+ "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" )):
416+ self .parser .compatMode = "quirks"
427417 elif (publicId in
428- ("-//w3c//dtd xhtml 1.0 frameset//EN" ,
429- "-//w3c//dtd xhtml 1.0 transitional//EN" )
418+ ("-//w3c//dtd xhtml 1.0 frameset//EN" ,
419+ "-//w3c//dtd xhtml 1.0 transitional//EN" )
430420 or (publicId in
431421 ("-//w3c//dtd html 4.01 frameset//EN" ,
432- "-//w3c//dtd html 4.01 transitional//EN" ) and
433- systemId == None )):
434- self .compatMode = "limited quirks"
422+ "-//w3c//dtd html 4.01 transitional//EN" ) and systemId == None )):
423+ self .parser .compatMode = "limited quirks"
435424
436425 self .parser .phase = self .parser .phases ["beforeHtml" ]
437426
@@ -440,7 +429,7 @@ def processSpaceCharacters(self, token):
440429
441430 def processCharacters (self , token ):
442431 self .parser .parseError ("expected-doctype-but-got-chars" )
443- self .compatMode = "quirks"
432+ self .parser . compatMode = "quirks"
444433 self .parser .phase = self .parser .phases ["beforeHtml" ]
445434 self .parser .phase .processCharacters (token )
446435
@@ -595,7 +584,8 @@ def startTagMeta(self, token):
595584 codec = inputstream .codecName (attributes ["charset" ])
596585 self .parser .tokenizer .stream .changeEncoding (codec )
597586 elif "content" in attributes :
598- data = inputstream .EncodingBytes (attributes ["content" ])
587+ data = inputstream .EncodingBytes (
588+ attributes ["content" ].encode (self .parser .tokenizer .stream .charEncoding [0 ]))
599589 parser = inputstream .ContentAttrParser (data )
600590 codec = parser .parse ()
601591 self .parser .tokenizer .stream .changeEncoding (codec )
0 commit comments