diff --git a/Syntax.md b/Syntax.md index 600935d..90d8b16 100644 --- a/Syntax.md +++ b/Syntax.md @@ -121,11 +121,11 @@ To produce the familiar bracketed references to other specifications, e.g. > There are a number of dynamic selectors that can be used with HTML. This section defines when these selectors match HTML elements. [[SELECTORS]](https://html.spec.whatwg.org/#refsSELECTORS) [[CSSUI]](https://html.spec.whatwg.org/#refsCSSUI) -you can use the `` void element: +you can use the (nonstandard) `` element: ```html

There are a number of dynamic selectors that can be used with HTML. This section defines when -these selectors match HTML elements.

+these selectors match HTML elements. SELECTORS CSSUI

``` These match against a bibliography, which is a manually-maintained and sorted `
` at the end of the source file, identified by having the ID "`ref-list`". Its entries look like the following: diff --git a/src/html/htmlparser.pas b/src/html/htmlparser.pas index c7bea3f..08460ca 100644 --- a/src/html/htmlparser.pas +++ b/src/html/htmlparser.pas @@ -219,7 +219,6 @@ TToken = record // this is an expensive type, as it is basically all token const Marker = nil; var - FProprietaryVoids: specialize PlasticArray ; FInputStream: TInputStream; {$IFDEF PARSEERROR} FOnParseError: TParseErrorHandler; {$ENDIF} FTokeniserState: TTokeniserState; @@ -314,7 +313,6 @@ TToken = record // this is an expensive type, as it is basically all token procedure SpoonFeed(const Data: UTF8String); // call this any number of times until all characters have been provided {$ENDIF} procedure SpoonFeed(const Data: Pointer; const Length: QWord); // call this any number of times until all characters have been provided - procedure RegisterProperietaryVoidElements(const TagNames: array of TCanonicalString); function Parse(): TDocument; // then call this // XXX need a fragment parsing mode (if we support fragment parsing, set FFragmentParsingMode to true) {$IFDEF PARSEERROR} property OnParseError: TParseErrorHandler read FOnParseError write FOnParseError; {$ENDIF} @@ -1136,15 +1134,6 @@ procedure THTMLParser.SpoonFeed(const Data: Pointer; const Length: QWord); FInputStream.PushData(Data, Length); end; -procedure THTMLParser.RegisterProperietaryVoidElements(const TagNames: array of TCanonicalString); -var - Name: TCanonicalString; -begin - {$IFOPT C+} Assert(not FInputStream.WasStarted); {$ENDIF} - for Name in TagNames do - FProprietaryVoids.Push(Name); -end; - function THTMLParser.Parse(): TDocument; var OldKind: TTokenKind; @@ -1936,7 +1925,7 @@ procedure THTMLParser.Tokenise(); end; BogusComment(); end; - + procedure TryForCDATASection(); inline; begin // seen [ @@ -2125,7 +2114,7 @@ procedure THTMLParser.Tokenise(); CommentSize := 0; repeat case (FInputStream.CurrentCharacter.Value) of - $003E, kEOF: break; + $003E, kEOF: break; $0000: Inc(CommentSize, FFFD.Length); else Inc(CommentSize, FInputStream.CurrentCharacterLength); end; @@ -2562,7 +2551,7 @@ procedure THTMLParser.Tokenise(); {$IFDEF PARSEERROR} ParseError('unexpected U+0000 in attribute name'); {$ENDIF} FCurrentToken.CurrentAttributeName.Append($FFFD); end; - $0022, $0027, $003C: + $0022, $0027, $003C: begin {$IFDEF PARSEERROR} ParseError('invalid character in attribute name'); {$ENDIF} FCurrentToken.CurrentAttributeName.Append(FInputStream.CurrentCharacter); @@ -5907,7 +5896,7 @@ procedure THTMLParser.TheInBodyInsertionMode(var Token: TToken); if (Token.TagName = eBody) then begin {$IFDEF PARSEERROR} ParseError('unexpected body start tag'); {$ENDIF} - if ((FStackOfOpenElements.Length < 2) or + if ((FStackOfOpenElements.Length < 2) or (not FStackOfOpenElements[1].IsIdentity(nsHTML, eBody)) or (StackOfOpenElementsHas(nsHTML, eTemplate))) then exit; // ignore the token @@ -5930,7 +5919,7 @@ procedure THTMLParser.TheInBodyInsertionMode(var Token: TToken); if (Token.TagName = eFrameset) then begin {$IFDEF PARSEERROR} ParseError('unexpected body frameset start tag'); {$ENDIF} - if ((FStackOfOpenElements.Length < 2) or + if ((FStackOfOpenElements.Length < 2) or (not FStackOfOpenElements[1].IsIdentity(nsHTML, eBody))) then exit; // ignore the token if (not FFramesetOkFlag) then @@ -6298,11 +6287,6 @@ procedure THTMLParser.TheInBodyInsertionMode(var Token: TToken); // any other start tag ReconstructTheActiveFormattingElements(); InsertAnHTMLElementFor(Token); - if (FProprietaryVoids.Contains(Token.TagName)) then - begin - FStackOfOpenElements.Pop(); - {$IFDEF PARSEERROR} Token.AcknowledgeSelfClosingFlag(); {$ENDIF} - end; end; tkEndTag: // in this section things are hoisted also @@ -6381,7 +6365,7 @@ procedure THTMLParser.TheInBodyInsertionMode(var Token: TToken); if ((Token.TagName = eDiv) or (Token.TagName = ePre) or (Token.TagName = eOL) or - (Token.TagName = eDL)) then + (Token.TagName = eDL)) then begin if (not StackOfOpenElementsHasInScope(Token.TagName)) then begin @@ -6467,7 +6451,7 @@ procedure THTMLParser.TheInBodyInsertionMode(var Token: TToken); //(Token.TagName = ePre) or // hoisted (Token.TagName = eSection) or (Token.TagName = eSummary) or - (Token.TagName = eUL)) then + (Token.TagName = eUL)) then begin if (not StackOfOpenElementsHasInScope(Token.TagName)) then begin @@ -6815,7 +6799,7 @@ procedure THTMLParser.TheInTableInsertionMode(var Token: TToken); exit; end else - if ((Token.TagName = eBody) or + if ((Token.TagName = eBody) or (Token.TagName = eCaption) or (Token.TagName = eCol) or (Token.TagName = eColGroup) or @@ -7869,7 +7853,7 @@ procedure THTMLParser.TheAfterFramesetInsertionMode(var Token: TToken); end; end; tkExtraSpaceCharacter: InsertCharacters(Token.ExtraChars); - {$IFDEF PARSEERROR} tkExtraCharacters: ParseError('unexpected character token after frameset', Length(Token.ExtraChars)); {$ENDIF} // $R- + {$IFDEF PARSEERROR} tkExtraCharacters: ParseError('unexpected character token after frameset', Length(Token.ExtraChars)); {$ENDIF} // $R- tkComment: InsertAComment(Token); {$IFDEF PARSEERROR} tkDOCTYPE: ParseError('unexpected DOCTYPE'); {$ENDIF} tkStartTag: @@ -8131,49 +8115,49 @@ procedure THTMLParser.TheRulesForParsingTokensInForeignContent(var Token: TToken tkComment: InsertAComment(Token); // http://bugs.freepascal.org/view.php?id=26403 {$IFDEF PARSEERROR} tkDOCTYPE: ParseError('unexpected DOCTYPE'); {$ENDIF} tkStartTag: - if ((Token.TagName = eB) or - (Token.TagName = eBig) or - (Token.TagName = eBlockQuote) or - (Token.TagName = eBody) or - (Token.TagName = eBr) or - (Token.TagName = eCenter) or - (Token.TagName = eCode) or - (Token.TagName = eDD) or - (Token.TagName = eDiv) or - (Token.TagName = eDL) or - (Token.TagName = eDT) or - (Token.TagName = eEm) or - (Token.TagName = eEmbed) or - (Token.TagName = eH1) or - (Token.TagName = eH2) or - (Token.TagName = eH3) or - (Token.TagName = eH4) or - (Token.TagName = eH5) or - (Token.TagName = eH6) or - (Token.TagName = eHead) or - (Token.TagName = eHR) or - (Token.TagName = eI) or - (Token.TagName = eImg) or - (Token.TagName = eLI) or - (Token.TagName = eListing) or - (Token.TagName = eMenu) or - (Token.TagName = eMeta) or - (Token.TagName = eNoBr) or - (Token.TagName = eOL) or - (Token.TagName = eP) or - (Token.TagName = ePre) or - (Token.TagName = eRuby) or - (Token.TagName = eS) or - (Token.TagName = eSmall) or - (Token.TagName = eSpan) or - (Token.TagName = eStrong) or - (Token.TagName = eStrike) or - (Token.TagName = eSub) or - (Token.TagName = eSup) or - (Token.TagName = eTable) or - (Token.TagName = eTT) or - (Token.TagName = eU) or - (Token.TagName = eUL) or + if ((Token.TagName = eB) or + (Token.TagName = eBig) or + (Token.TagName = eBlockQuote) or + (Token.TagName = eBody) or + (Token.TagName = eBr) or + (Token.TagName = eCenter) or + (Token.TagName = eCode) or + (Token.TagName = eDD) or + (Token.TagName = eDiv) or + (Token.TagName = eDL) or + (Token.TagName = eDT) or + (Token.TagName = eEm) or + (Token.TagName = eEmbed) or + (Token.TagName = eH1) or + (Token.TagName = eH2) or + (Token.TagName = eH3) or + (Token.TagName = eH4) or + (Token.TagName = eH5) or + (Token.TagName = eH6) or + (Token.TagName = eHead) or + (Token.TagName = eHR) or + (Token.TagName = eI) or + (Token.TagName = eImg) or + (Token.TagName = eLI) or + (Token.TagName = eListing) or + (Token.TagName = eMenu) or + (Token.TagName = eMeta) or + (Token.TagName = eNoBr) or + (Token.TagName = eOL) or + (Token.TagName = eP) or + (Token.TagName = ePre) or + (Token.TagName = eRuby) or + (Token.TagName = eS) or + (Token.TagName = eSmall) or + (Token.TagName = eSpan) or + (Token.TagName = eStrong) or + (Token.TagName = eStrike) or + (Token.TagName = eSub) or + (Token.TagName = eSup) or + (Token.TagName = eTable) or + (Token.TagName = eTT) or + (Token.TagName = eU) or + (Token.TagName = eUL) or (Token.TagName = eVar) or ((Token.TagName = eFont) and (Token.HasAttributes(['color', 'face', 'size'])))) then begin diff --git a/src/wattsi.pas b/src/wattsi.pas index 7cb5694..e7dad94 100644 --- a/src/wattsi.pas +++ b/src/wattsi.pas @@ -1579,8 +1579,7 @@ TCrossReferences = record else if (Element.IsIdentity(nsHTML, eRef)) then begin - ExtractedData := Element.GetAttribute('spec'); - ReferenceName := ExtractedData.AsString; + ReferenceName := Element.TextContent.AsString; New(ListNode); ListNode^.Value := Element; ListNode^.Next := References[ReferenceName]; @@ -1589,12 +1588,11 @@ TCrossReferences = record NewLink := ConstructHTMLElement(eA); Scratch := Default(Rope); Scratch.Append('#refs'); - Scratch.AppendDestructively(ExtractedData); // $R- + Scratch.Append(ReferenceName); NewLink.SetAttributeDestructively('href', Scratch); - ExtractedData := Element.GetAttribute('spec'); Scratch := Default(Rope); Scratch.Append('['); - Scratch.AppendDestructively(ExtractedData); // $R- + Scratch.Append(ReferenceName); Scratch.Append(']'); NewLink.AppendChild(TText.CreateDestructively(Scratch)); (Node.ParentNode as TElement).ReplaceChild(NewLink, Node); @@ -2961,7 +2959,6 @@ function Main(): Boolean; Source := ReadFile(SourceFile); try Parser := THTMLParser.Create(); - Parser.RegisterProperietaryVoidElements([eRef]); try try Parser.SpoonFeed(Source.Start, Source.Length);