package language; public class HtmlTokenizer extends language.LexicalAnalyzer { public HtmlTokenizer() { } public boolean rNonBreak() { char c; if(this.isTag()) return false; if(isBreak()) return false; if(isTheEnd()) return false; while(true){ rAny1(); if(isTag()) return true; if(isBreak()) return true; if(isTheEnd()) return true; } // return false; } public boolean isTag() { if(isTheEnd()) return false; if(isC('<')){ int c=lookup2(); if(('a'<=c && c<='z')|| ('A'<=c && c<='Z')|| c=='!' || c=='/') return true; } return false; } public boolean parseNonTag() { if(isTag()) return false; while(true){ if(isTag()) return true; if(rStringConst()|| rNonBreak() || rBreak()) {} else return true; } // return true; } public void init() { tags=new String[]{"applet","APPLET","a","A", // 4 "brockquote","BROCKQUOTE", "body","BODY","br","BR","b","B",//8,12 "center","CENTER", //2,14 "dt","DT", "d","D",//4,18 "frame","FRAME","form","FORM", "font","FONT",//6,24 "head","HEAD","html","HTML","hr","HR","h","H", //8,32 "img","IMG", //2,34 "li","LI", //2,36 "mailto","MAILTO","meta","META", //4,40 "ol","OL", //2,42 "pre","PRE","p","P", //4,46 "t","T", //2,48 "ul","UL", //2,50 "title","TITLE", //2,52 "!"}; //1,53; super.init(); nOfTags=tags.length; } public boolean parseAToken() { aToken=""; if(isTheEnd()) { return false;} if(rTag()) {tokens.addElement(aToken); return true;} if(parseNonTag()) {tokens.addElement(aToken); return true;} return false; } public HtmlTokenizer(String html) { theText=html; char[] bs={'<','>','/'}; this.setBreakSymbols(bs); init(); parseTokens(); tokenIndex=0; } public boolean rTagName() { String t,f; char c; /* for(int i=0;i')){ dmy=rStringConst(); dmy=rNot('>'); } if(!rC('>')) return false; return true; } public int nOfTags; public String tags[]; /* ={"applet","APPLET","a","A", // 4 "brockquote","BROCKQUOTE", "body","BODY","br","BR","b","B",//8,12 "center","CENTER", //2,14 "dt","DT", "d","D",//4,18 "frame","FRAME","form","FORM", "font","FONT",//6,24 "head","HEAD","html","HTML","hr","HR","h","H", //8,32 "img","IMG", //2,34 "li","LI", //2,36 "mailto","MAILTO","meta","META", //4,40 "ol","OL", //2,42 "pre","PRE","p","P", //4,46 "t","T", //2,48 "ul","UL", //2,50 "title","TITLE", //2,52 "!"}; //1,53; */ public void comment() { /* input text ... html format ex. ... "" ... "" */ } }