解析XML

2020-06-16

1. DOM解析XML

xml文档

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
<?xml version="1.0" encoding="UTF-8" ?>
<class>
<student>
<name nameAttr="sungm">sungm</name>
<age ageAttr="25">25</age>
<sex sexAttr="Man">Man</sex>
<className classAttr="ClassOne">Class One</className>
</student>
<student>
<name nameAttr="sunhw">sunhw</name>
<age ageAttr="1">1</age>
<sex sexAttr="Man">Man</sex>
<className classAttr="ClassTwo">Class Two</className>
</student>
</class>

程序入口

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import org.w3c.dom.*;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

/**
* @author : sungm
* @since : 2020-06-16 16:59
*/
public class Main {

public static void main(String[] args) throws Exception {
//获取一个xml解析器的文档构建者工厂实例(设计模式:工厂模式)
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
//获取一个xml解析器的文档构建者实例(设计模式:Builder模式)
DocumentBuilder builder = factory.newDocumentBuilder();
//使用xml解析器builder解析文档,获取解析后的Document对象
Document document = builder.parse("src/main/resources/student.xml");
//获取根元素
Element rootElement = document.getDocumentElement();
//获取文档中student元素列表(也可以直接通过document.getElementsByTagName("student")获取所有的student元素)
NodeList studentNodes = rootElement.getElementsByTagName("student");
for (int index = 0, size = studentNodes.getLength(); index < size; index++) {
Element element = (Element) studentNodes.item(index);
//元素(备注:通过getElementsByTagName()方法获取指定的元素)
Element nameElement = (Element) element.getElementsByTagName("name").item(0);
Element ageElement = (Element) element.getElementsByTagName("age").item(0);
Element sexElement = (Element) element.getElementsByTagName("sex").item(0);
Element classElement = (Element) element.getElementsByTagName("className").item(0);

//文本(备注:通过getNodeName()方法获取元素名称,通过getTextContent()方法获取文本内容)
System.out.println(nameElement.getNodeName() + " : " + nameElement.getTextContent());
System.out.println(ageElement.getNodeName() + " : " + ageElement.getTextContent());
System.out.println(sexElement.getNodeName() + " : " + sexElement.getTextContent());
System.out.println(classElement.getNodeName() + " : " + classElement.getTextContent());

//属性(备注:通过getAttributeNode(name)方法属性节点,通过getAttribute(name)方法获取属性内容)
Attr nameAttr = nameElement.getAttributeNode("nameAttr");
Attr ageAttr = ageElement.getAttributeNode("ageAttr");
Attr sexAttr = sexElement.getAttributeNode("sexAttr");
Attr classAttr = classElement.getAttributeNode("classAttr");
System.out.println(nameAttr.getName() + " : " + nameAttr.getValue());
System.out.println(ageAttr.getName() + " : " + ageAttr.getValue());
System.out.println(sexAttr.getName() + " : " + sexAttr.getValue());
System.out.println(classAttr.getName() + " : " + classAttr.getValue());
System.out.println();
}
}
}

运行结果

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
name : sungm
age : 25
sex : Man
className : Class One
nameAttr : sungm
ageAttr : 25
sexAttr : Man
classAttr : ClassOne

name : sunhw
age : 1
sex : Man
className : Class Two
nameAttr : sunhw
ageAttr : 1
sexAttr : Man
classAttr : ClassTwo

优点:

  • 树结构,方便理解与使用
  • 解析过程中树结构保存在内存中,方便修改

缺点:

  • 当文件过大时耗用内存,需谨慎使用

2. SAX解析XML

xml文档

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
<?xml version="1.0" encoding="UTF-8" ?>
<class>
<student>
<name nameAttr="sungm">sungm</name>
<age ageAttr="25">25</age>
<sex sexAttr="Man">Man</sex>
<className classAttr="ClassOne">Class One</className>
</student>
<student>
<name nameAttr="sunhw">sunhw</name>
<age ageAttr="1">1</age>
<sex sexAttr="Man">Man</sex>
<className classAttr="ClassTwo">Class Two</className>
</student>
</class>

自定义Handler处理器

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/**
* @author : sungm
* @since : 2020-06-17 17:42
*/
public class StudentParserHandler extends DefaultHandler {

interface Constants {

String ROOT_ELEMENT = "class";

String EMPTY = "";

}

//方法执行顺序:startDocument -> startElement -> characters -> endElement -> characters -> endDocument
//执行次数 只执行一次 元素循环执行 元素循环执行 元素循环执行 元素循环执行 只执行一次

@Override
public void startDocument() throws SAXException {
super.startDocument();
System.out.println("开始解析XML文档");
}

@Override
public void endDocument() throws SAXException {
super.endDocument();
System.out.println("解析XML文档结束");
}

@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
super.startElement(uri, localName, qName, attributes);
System.out.println(Constants.ROOT_ELEMENT.equals(qName) ? "开始解析根节点" : "节点名称 : " + qName);
//获取元素的属性
for (int index = 0, size = attributes.getLength(); index < size; index++) {
String attrName = attributes.getLocalName(index);
String attrValue = attributes.getValue(index);
System.out.println("attrName = " + attrName + ", attrValue = " + attrValue);
}
}

@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
super.endElement(uri, localName, qName);
if (Constants.ROOT_ELEMENT.equals(qName)) {
System.out.println("解析根节点结束");
}
}

@Override
public void characters(char[] ch, int start, int length) throws SAXException {
super.characters(ch, start, length);
String value = new String(ch, start, length).trim();
if (!Constants.EMPTY.equals(value)) {
System.out.println("节点值:" + value);
}
}
}

程序入口

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

/**
* @author : sungm
* @since : 2020-06-17 17:33
*/
public class Main {

public static void main(String[] args) throws Exception {
//获取一个Sax解析工厂实例
SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
//获取一个Sax解析器实例
SAXParser saxParser = saxParserFactory.newSAXParser();
//创建一个Handel实例
StudentParserHandler handler = new StudentParserHandler();
//解析文件
saxParser.parse("src/main/resources/student.xml", handler);
}

}

运行结果

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
开始解析XML文档
开始解析根节点
节点名称 : class
节点名称 : student
节点名称 : name
attrName = nameAttr, attrValue = sungm
节点值:sungm
节点名称 : age
attrName = ageAttr, attrValue = 25
节点值:25
节点名称 : sex
attrName = sexAttr, attrValue = Man
节点值:Man
节点名称 : className
attrName = classAttr, attrValue = ClassOne
节点值:Class One
节点名称 : student
节点名称 : name
attrName = nameAttr, attrValue = sunhw
节点值:sunhw
节点名称 : age
attrName = ageAttr, attrValue = 1
节点值:1
节点名称 : sex
attrName = sexAttr, attrValue = Man
节点值:Man
节点名称 : className
attrName = classAttr, attrValue = ClassTwo
节点值:Class Two
解析根节点结束
解析XML文档结束

优点:

  • 采用事件驱动的方式,耗用内存更少
  • 适用于只需要处理xml文件的数据时使用

缺点:

  • 需编写自定义的handler类,不易编写
  • 很难同时访问同一个xml中的不同数据

3. JDOM解析XML

引入依赖

1
2
3
4
5
6
<!-- https://mvnrepository.com/artifact/org.jdom/jdom -->
<dependency>
<groupId>org.jdom</groupId>
<artifactId>jdom</artifactId>
<version>2.0.2</version>
</dependency>

XML

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
<?xml version="1.0" encoding="UTF-8" ?>
<class>
<student>
<name nameAttr="sungm">sungm</name>
<age ageAttr="25">25</age>
<sex sexAttr="Man">Man</sex>
<className classAttr="ClassOne">Class One</className>
</student>
<student>
<name nameAttr="sunhw">sunhw</name>
<age ageAttr="1">1</age>
<sex sexAttr="Man">Man</sex>
<className classAttr="ClassTwo">Class Two</className>
</student>
</class>

程序入口

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import org.jdom2.Attribute;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.input.SAXBuilder;

import java.util.List;

/**
* @author : sungm
* @since : 2020-06-18 10:46
*/
public class Main {

interface Constants {

String EMPTY = "";

}

public static void main(String[] args) throws Exception{
SAXBuilder saxBuilder = new SAXBuilder();
Document document = saxBuilder.build("src/main/resources/student.xml");
Element rootElement = document.getRootElement();
System.out.println("根节点:" + rootElement.getName());
parseElement(rootElement);
}

private static void parseElement(Element element) {
//元素内容
if (element.getText() != null && !Constants.EMPTY.equals(element.getTextTrim())) {
System.out.println(element.getName() + ":" + element.getTextTrim());
}
//元素属性
if (element.hasAttributes()) {
element.getAttributes().forEach(Main::parseAttribute);
}
//解析子元素
List<Element> childrenElement = element.getChildren();
if (childrenElement != null && childrenElement.size() > 0) {
childrenElement.forEach(Main::parseElement);
}
}

private static void parseAttribute(Attribute attribute) {
System.out.println(attribute.getName() + " : " + attribute.getValue());
}

}

运行结果

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
根节点:class
name:sungm
nameAttr : sungm
age:25
ageAttr : 25
sex:Man
sexAttr : Man
className:Class One
classAttr : ClassOne
name:sunhw
nameAttr : sunhw
age:1
ageAttr : 1
sex:Man
sexAttr : Man
className:Class Two
classAttr : ClassTwo

优点:

  • API简单易用,推荐使用

缺点:

  • 用了很多具体类,没使用接口

4. DOM4J解析XML

引入依赖

1
2
3
4
5
6
<!-- https://mvnrepository.com/artifact/dom4j/dom4j -->
<dependency>
<groupId>dom4j</groupId>
<artifactId>dom4j</artifactId>
<version>1.6.1</version>
</dependency>

XML

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
<?xml version="1.0" encoding="UTF-8" ?>
<class>
<student>
<name nameAttr="sungm">sungm</name>
<age ageAttr="25">25</age>
<sex sexAttr="Man">Man</sex>
<className classAttr="ClassOne">Class One</className>
</student>
<student>
<name nameAttr="sunhw">sunhw</name>
<age ageAttr="1">1</age>
<sex sexAttr="Man">Man</sex>
<className classAttr="ClassTwo">Class Two</className>
</student>
</class>

程序入口

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import org.dom4j.Attribute;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;

import java.util.Iterator;

/**
* @author : sungm
* @since : 2020-06-18 11:27
*/
public class Main {

public static void main(String[] args) throws Exception {
SAXReader reader = new SAXReader();
Document document = reader.read("src/main/resources/student.xml");
Element element = document.getRootElement();
System.out.println("根节点:名称 = " + element.getName());
//递归解析
parseElement(element);
}

@SuppressWarnings("unchecked")
private static void parseElement(Element element) {
System.out.println("节点:名称 = " + element.getName() + ", value = " + element.getTextTrim());
Iterator iterator = element.elementIterator();
while (iterator.hasNext()) {
Element childElement = (Element) iterator.next();
//解析属性
if (childElement.attributeCount() > 0) {
childElement.attributes().forEach(Main::parseAttribute);
}
//解析元素
parseElement(childElement);
}
}

private static void parseAttribute(Object attribute) {
System.out.println("属性:" + ((Attribute) attribute).getName() + " : " + ((Attribute) attribute).getValue());
}

}

运行结果

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
根节点:名称 = class
节点:名称 = class, value =
节点:名称 = student, value =
属性:nameAttr : sungm
节点:名称 = name, value = sungm
属性:ageAttr : 25
节点:名称 = age, value = 25
属性:sexAttr : Man
节点:名称 = sex, value = Man
属性:classAttr : ClassOne
节点:名称 = className, value = Class One
节点:名称 = student, value =
属性:nameAttr : sunhw
节点:名称 = name, value = sunhw
属性:ageAttr : 1
节点:名称 = age, value = 1
属性:sexAttr : Man
节点:名称 = sex, value = Man
属性:classAttr : ClassTwo
节点:名称 = className, value = Class Two

优点:

  • 使用了接口和抽象方案,是个优秀的API
  • 灵活性好、性能优异、易用性好。推荐使用