-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathXLSX2VCF.java
179 lines (157 loc) · 6.67 KB
/
XLSX2VCF.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
/** Source: https://svn.apache.org/repos/asf/poi/trunk/poi-examples/src/main/java/org/apache/poi/examples/xssf/eventusermodel/XLSX2CSV.java */
package sayitobar;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.util.CellAddress;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.util.SAXHelper;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import javax.xml.parsers.ParserConfigurationException;
import java.io.*;
@SuppressWarnings({"java:S106","java:S4823","java:S1192"})
public class XLSX2VCF {
private final OPCPackage xlsxPackage;
// Number of columns to read starting with leftmost
private final int minColumns;
// Destination for data
private final PrintStream output;
/**
* Creates a new XLSX -> VCF converter
*
* @param pkg The XLSX package to process
* @param output The PrintStream to output the VCF to
* @param minColumns The minimum number of columns to output, or -1 for no minimum
*/
public XLSX2VCF(OPCPackage pkg, PrintStream output, int minColumns) {
this.xlsxPackage = pkg;
this.output = output;
this.minColumns = minColumns;
}
public XLSX2VCF(OPCPackage pkg, PrintStream output) {
this.xlsxPackage = pkg;
this.output = output;
this.minColumns = -1;
}
/**
* Uses the XSSF Event SAX helpers to do most of the work
* of parsing the Sheet XML, and outputs the contents
* as VCF.
*/
class SheetToVCF implements SheetContentsHandler {
private boolean firstCellOfRow;
private int currentRow = -1;
private int currentCol = -1;
private void outputMissingRows(int number) {
for (int i=0; i < number; i++) {
for (int j=0; j < minColumns; j++) {
output.append('\t');
}
output.append('\n');
}
}
@Override
public void startRow(int rowNum) {
// If there were gaps, output the missing rows
outputMissingRows(rowNum-currentRow-1);
// Prepare for this row
firstCellOfRow = true;
currentRow = rowNum;
currentCol = -1;
}
@Override
public void endRow(int rowNum) {
// Ensure the minimum number of columns
for (int i=currentCol; i<minColumns; i++) {
output.append('\t');
}
output.append('\n');
}
@Override
public void cell(String cellReference, String formattedValue, XSSFComment comment) {
if (firstCellOfRow) {
firstCellOfRow = false;
} else {
output.append('\t');
}
// gracefully handle missing CellRef here in a similar way as XSSFCell does
if(cellReference == null) {
cellReference = new CellAddress(currentRow, currentCol).formatAsString();
}
// Did we miss any cells?
int thisCol = (new CellReference(cellReference)).getCol();
int missedCols = thisCol - currentCol - 1;
for (int i=0; i<missedCols; i++) {
output.append('\t');
}
// no need to append anything if we do not have a value
if (formattedValue == null) {
return;
}
currentCol = thisCol;
output.append(formattedValue);
}
@Override
public void headerFooter(String s, boolean b, String s1) {
}
}
/**
* Initiates the processing of the XLS workbook file to CSV.
*
* @throws IOException If reading the data from the package fails.
* @throws SAXException if parsing the XML data fails.
*/
boolean oneSheet = false;
public void process() throws IOException, OpenXML4JException, SAXException {
process(-1);
}
public void process(int sheetNum) throws IOException, OpenXML4JException, SAXException {
oneSheet = (sheetNum >= 0);
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);
XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
StylesTable styles = xssfReader.getStylesTable();
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
int index = 0;
while (iter.hasNext()) {
try (InputStream stream = iter.next()) {
if (sheetNum < 0 || index == sheetNum) // If desired sheetNum was given, read only that
processSheet(styles, strings, new SheetToVCF(), stream);
}
++index;
}
}
/**
* Parses and shows the content of one sheet
* using the specified styles and shared-strings tables.
*
* @param styles The table of styles that may be referenced by cells in the sheet
* @param strings The table of strings that may be referenced by cells in the sheet
* @param sheetHandler
* @param sheetInputStream The stream to read the sheet-data from.
*/
public void processSheet(StylesTable styles, ReadOnlySharedStringsTable strings, SheetContentsHandler sheetHandler, InputStream sheetInputStream) {
// set emulateCSV=true on DataFormatter - it is also possible to provide a Locale
// when POI 5.2.0 is released, you can call formatter.setUse4DigitYearsInAllDateFormats(true) to ensure all dates are formatted with 4 digit years
DataFormatter formatter = new DataFormatter(true);
InputSource sheetSource = new InputSource(sheetInputStream);
try {
XMLReader sheetParser = SAXHelper.newXMLReader();
ContentHandler handler = new XSSFSheetXMLHandler(styles, null, strings, sheetHandler, formatter, false);
sheetParser.setContentHandler(handler);
sheetParser.parse(sheetSource);
} catch(ParserConfigurationException e) {
throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
} catch (IOException | SAXException e) {
e.printStackTrace();
}
}
}