I am trying to use a SAX Parser to parse a XML file. Although I keep getting the error below in my code:
Exception in thread "main" java.net.MalformedURLException: unknown protocol: c
at java.net.URL.<init>(URL.java:592)
at java.net.URL.<init>(URL.java:482)
at java.net.URL.<init>(URL.java:431)
at com.sun.org.apache.xerces.internal.impl.XMLEntityManager.setupCurrentEntity(XMLEntityManager.java:605)
at com.sun.org.apache.xerces.internal.impl.XMLVersionDetector.determineDocVersion(XMLVersionDetector.java:189)
at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:799)
at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:764)
at com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(XMLParser.java:123)
at com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(AbstractSAXParser.java:1137)
at com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.parse(SAXParserImpl.java:580)
at main.main(main.java:28)
Java Result: 1
Here Is My handler Class:
public class MySaxParser extends DefaultHandler {
Index i = new Index(12);
String bookxmlfilename;
String tmpValue;
BookCitation c;
JournalArticle j;
Unpublished u;
ConfProceedings p;
public MySaxParser() {
}
@Override
public void startElement(String s, String s1, String elementName, Attributes attr) throws SAXException {
if (elementName.equalsIgnoreCase("JournalArticle")) {
if (elementName.equalsIgnoreCase("Pages")) {
j.setstartPage(Integer.parseInt(attr.getValue("StartPage")));
j.setendPage(Integer.parseInt(attr.getValue("EndPage")));
}
}
if (elementName.equalsIgnoreCase("ConferenceProceedings")) {
if (elementName.equalsIgnoreCase("Pages")) {
p.setstartPage(Integer.parseInt(attr.getValue("StartPage")));
p.setendPage(Integer.parseInt(attr.getValue("EndPage")));
}
}
}
@Override
public void endElement(String s, String s1, String element) throws SAXException {
if (element.equalsIgnoreCase("book")) {
i.addCitation(c);
if (element.equalsIgnoreCase("name")) {
c.setName(tmpValue);
}
if (element.equalsIgnoreCase("publisher")) {
c.setpublisher(tmpValue);
}
if (element.equalsIgnoreCase("publicationDate")) {
c.setdateOfPublication(tmpValue);
}
if (element.equalsIgnoreCase("authors")) {
if (element.equalsIgnoreCase("author")) {
c.addAuthor(tmpValue);
}
}
if (element.equalsIgnoreCase("keywords")) {
if (element.equalsIgnoreCase("keyword")) {
c.addKeyword(tmpValue);
}
}
}
if (element.equalsIgnoreCase("JournalArticle")) {
i.addCitation(j);
if (element.equalsIgnoreCase("name")) {
j.setName(tmpValue);
}
if (element.equalsIgnoreCase("TitleOfJournal")) {
j.settitleOfJournal(tmpValue);
}
if (element.equalsIgnoreCase("TitleOfJournal")) {
j.settitleOfJournal(tmpValue);
}
if (element.equalsIgnoreCase("PublicationDate")) {
j.setpublicationDate(tmpValue);
}
if (element.equalsIgnoreCase("volNumber")) {
j.setvolNumber(Integer.parseInt(tmpValue));
}
if (element.equalsIgnoreCase("IssueNumber")) {
j.setissueNumber(Integer.parseInt(tmpValue));
}
if (element.equalsIgnoreCase("authors")) {
if (element.equalsIgnoreCase("author")) {
j.addAuthor(tmpValue);
}
if (element.equalsIgnoreCase("keywords")) {
if (element.equalsIgnoreCase("keyword")) {
j.addKeyword(tmpValue);
}
}
}
}
if (element.equalsIgnoreCase("Unpublished")) {
i.addCitation(u);
if (element.equalsIgnoreCase("name")) {
u.setName(tmpValue);
}
if (element.equalsIgnoreCase("authors")) {
if (element.equalsIgnoreCase("author")) {
u.addAuthor(tmpValue);
}
if (element.equalsIgnoreCase("keywords")) {
if (element.equalsIgnoreCase("keyword")) {
u.addKeyword(tmpValue);
}
}
}
}
if (element.equalsIgnoreCase("ConferenceProceedings")) {
i.addCitation(p);
if (element.equalsIgnoreCase("name")) {
p.setName(tmpValue);
}
if (element.equalsIgnoreCase("publisher")) {
p.setpublisher(tmpValue);
}
if (element.equalsIgnoreCase("ConferenceLocation")) {
p.setlocationOfConference(tmpValue);
}
if (element.equalsIgnoreCase("TitleOfConferenceproceeding")) {
p.settitleOfConferenceProc(tmpValue);
}
if (element.equalsIgnoreCase("ConferenceYea")) {
p.setconfYear(Integer.parseInt(tmpValue));
}
if (element.equalsIgnoreCase("Editor")) {
p.seteditor(tmpValue);
}
if (element.equalsIgnoreCase("authors")) {
if (element.equalsIgnoreCase("author")) {
p.addAuthor(tmpValue);
}
if (element.equalsIgnoreCase("keywords")) {
if (element.equalsIgnoreCase("keyword")) {
p.addKeyword(tmpValue);
}
}
}
}
if (element.equalsIgnoreCase("FormattingStyle")) {
i.setFormatType("IEEE");
}
if (element.equalsIgnoreCase("FormattingStyle")) {
try {
i.formatIEEE(tmpValue);
} catch (IOException ex) {
Logger.getLogger(MySaxParser.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
@Override
public void characters(char[] ac, int i, int j) throws SAXException {
tmpValue = new String(ac, i, j);
}
}
Here is my main class:
public class main {
public static void main(String[] args) throws IOException, ParserConfigurationException, SAXException {
// Create scanner
Scanner OswegoNote = new Scanner(System.in);
//Create a parser factory
SAXParserFactory factory = SAXParserFactory.newInstance();
//Make the parser
SAXParser saxParser = factory.newSAXParser();
XMLReader parser = saxParser.getXMLReader();
//Create a handler
MySaxParser handler = new MySaxParser();
//Tell the parser to use this handler
parser.setContentHandler(handler);
//read and parse the document
parser.parse("C:\\Users\\mhromalik\\Documents\\Suny Oswego\\fall2013\\csc241fall2012\\Assignment\\MyCitation.html");
}
}
And here is part of my XML file:
<Citation>
<ConferenceProceedings>
<Name>An efficient implementation of Smith Waterman algorithm on GPU using CUDA, for massively parallel scanning of sequence databases</Name>
<Publisher>Parallel and Distributed Processing</Publisher>
<ConferenceLocation>Austin,TX</ConferenceLocation>
<TitleOfConferenceproceeding> IEEE International Conference on Parallel and Distributed Processing</TitleOfConferenceproceeding>
<ConferenceYear>2009</ConferenceYear>
<Editor>S. M. Mann</Editor>
<Pages StartPage="85" EndPage="102"/>
<Authors>
<author>L. L. Ligowski</author>
<author>W. A. Rudnicki</author>
</Authors>
<Keywords>
<Keyword>Sparse Data</Keyword>
<Keyword>DNA</Keyword>
<Keyword>GPU</Keyword>
<Keyword>Data Mining</Keyword>
</Keywords>
</ConferenceProceedings>
</Citation>
<FormattingStyle>IEEE</FormattingStyle>
<FilePath>C:\\Users\\mhromalik\\Documents\\Suny Oswego\\fall2013\\csc241fall2012\\Assignment\\MyCitation.html</FilePath>
</Index>
I can not figure out why this error is happening. Any help would be greatly appreciated!
You are missing the protocol when you set the path for your html file. As you are trying to read a local html file, you can use file
protocol:
file:///{yourfilepath}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With