Made isTagStart() a local function in XmlNode.*

This commit is contained in:
Ivan Skytte Jørgensen
2018-02-15 15:09:55 +01:00
parent fbb0cdf048
commit b78cf045fe
2 changed files with 60 additions and 66 deletions

@ -210,6 +210,66 @@ const NodeType g_nodes[] = {
// NAME hasBackTag brk? isVisible? filterKeep1? filterKeep2 type/m_nodeId[i]
// . does "s" start a tag? (regular tag , back tag or comment tag)
static bool isTagStart(const char *s) {
// it must start with < to be a tag
if( s[0] != '<' )
return false;
// next char can be an alnum, !-- or / then alnum
// Extensible Markup Language (XML) 1.0 (Fifth Edition)
// https://www.w3.org/TR/REC-xml/#NT-Name
// NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] |
// [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] |
// [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] |
// [#x10000-#xEFFFF]
/// @todo ALC cater for other start characters
/// regex: "^<[A-Za-z]"
if( is_alpha_a(s[1]) )
return true;
// next char can be 1 of 3 things to be a tag
// / is also acceptable, followed only by an alnum or >
/// @todo ALC "</>" a valid tag?
/// regex: "^</[A-Za-z0-9>]"
if( s[1] == '/' ) {
if( is_alnum_a(s[2]) || (s[2] == '>') )
return true;
return false;
}
// office.microsoft.com uses <?xml ...?> tags
/// regex: "^<?[A-Za-z0-9]"
if( s[1]=='?' ) {
if( is_alnum_a(s[2]) )
return true;
return false;
}
// make sure the double hyphens follow the ! or alnum
if( s[1]=='!' ) {
// this is for <!xml> i guess
if( is_alnum_a(s[2]) )
return true;
// and the <![CDATA[
// and <![....]> i've seen too
// <![if gt IE 6]><script>.... for waterfordcoc.org
if( s[2] == '[' )
return true;
// and the <!-- comment here--> famous comment tag
if( s[2]=='-' && s[3]=='-' )
return true;
}
return false;
}
// . called by Xml class
// . returns the length of the node
// . TODO: "node" is now guaranteed to be \0 terminated -- make this faster

@ -270,72 +270,6 @@ public:
class XmlNode *m_parent;
};
// . does "s" start a tag? (regular tag , back tag or comment tag)
inline bool isTagStart ( const char *s ) {
// it must start with < to be a tag
if ( s[0] != '<' ) {
return false;
}
// next char can be an alnum, !-- or / then alnum
// Extensible Markup Language (XML) 1.0 (Fifth Edition)
// https://www.w3.org/TR/REC-xml/#NT-Name
// NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] |
// [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] |
// [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] |
// [#x10000-#xEFFFF]
/// @todo ALC cater for other start characters
/// regex: "^<[A-Za-z]"
if ( is_alpha_a( s[1] ) ) {
return true;
}
// next char can be 1 of 3 things to be a tag
// / is also acceptable, followed only by an alnum or >
/// @todo ALC "</>" a valid tag?
/// regex: "^</[A-Za-z0-9>]"
if ( s[1] == '/' ) {
if ( is_alnum_a( s[2] ) || (s[2] == '>') ) {
return true;
}
return false;
}
// office.microsoft.com uses <?xml ...?> tags
/// regex: "^<?[A-Za-z0-9]"
if ( s[1]=='?' ) {
if ( is_alnum_a(s[2]) ) {
return true;
}
return false;
}
// make sure the double hyphens follow the ! or alnum
if ( s[1]=='!' ) {
// this is for <!xml> i guess
if ( is_alnum_a(s[2]) ) {
return true;
}
// and the <![CDATA[
// and <![....]> i've seen too
// <![if gt IE 6]><script>.... for waterfordcoc.org
if ( s[2] == '[' ) {
return true;
}
// and the <!-- comment here--> famous comment tag
if ( s[2]=='-' && s[3]=='-' ) {
return true;
}
}
return false;
}
enum NodeTagType {
TAG_TYPE_UNKNOWN = 0,
TAG_TYPE_XML,