admin管理员组

文章数量:1123273

At this point I simply want to retrieve all checkboxes in a sheet of a .xlsx excel file. I found an older answer, but my file doesn't seem to include the x:Checked attribute, thus it retrieves the correct amount of checkboxes, however their values are all false (even though they shouldn't be).

More context: My .xlsx file was originally a .xls file and I converted it via LibreOffice, maybe that might be the problem here, not sure, so happy to get help from anyone more experienced.

My code is very similar to the one in this answer:

At this point I simply want to retrieve all checkboxes in a sheet of a .xlsx excel file. I found an older answer, but my file doesn't seem to include the x:Checked attribute, thus it retrieves the correct amount of checkboxes, however their values are all false (even though they shouldn't be).

More context: My .xlsx file was originally a .xls file and I converted it via LibreOffice, maybe that might be the problem here, not sure, so happy to get help from anyone more experienced.

My code is very similar to the one in this answer: https://stackoverflow.com/a/48972176/13600212

Share Improve this question edited 9 hours ago Mark S. 2,5241 gold badge9 silver badges27 bronze badges asked 12 hours ago Osiris TeamOsiris Team 1017 bronze badges
Add a comment  | 

1 Answer 1

Reset to default 0

So... I wrote a "simple" .xlsx parser only for getting the checkboxes since I couldn't get it working with apache poi, here you go.

The code currently still has 2 problems, which I would appreciate some help with:

  1. No approximate for checkbox position (row/column)
  2. The returned checkboxes list seems to be out of order, optimally the checkboxes should be added by going through a row, adding all checkboxes from left to right and so on...
package com.osiris.danielmanager.excel;

import org.junit.jupiter.api.Test;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Paths;
import java.util.*;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

public class ParseExcelForCheckboxes {

    public static List<CheckboxInfo> parseXLSX(File file) throws Exception {
        List<CheckboxInfo> checkboxes = new ArrayList<>();
        Map<String, String> sheetNames = new HashMap<>();
        Map<String, List<String>> sheetAndRelationshipPaths = new HashMap<>();

        try (ZipInputStream zis = new ZipInputStream(new FileInputStream(file))) {
            ZipEntry entry;
            Map<String, String> xmlFiles = new HashMap<>();

            // Extract XML files from .xlsx
            while ((entry = zis.getNextEntry()) != null) {
                ByteArrayOutputStream baos = new ByteArrayOutputStream();
                byte[] buffer = new byte[1024];
                int length;
                while ((length = zis.read(buffer)) > 0) {
                    baos.write(buffer, 0, length);
                }
                xmlFiles.put(entry.getName(), baos.toString(StandardCharsets.UTF_8));
            }

            // Parse sheet names and relationships
            if (xmlFiles.containsKey("xl/workbook.xml")) {
                String workbookXml = xmlFiles.get("xl/workbook.xml");
                Document doc = parseXml(workbookXml);
                NodeList sheets = doc.getElementsByTagName("sheet");
                for (int i = 0; i < sheets.getLength(); i++) {
                    Element sheet = (Element) sheets.item(i);
                    String sheetId = sheet.getAttribute("sheetId");
                    String sheetName = sheet.getAttribute("name");
                    sheetNames.put(sheetId, sheetName);

                    // Find the corresponding relationship for each sheet
                    String sheetRelsPath = "xl/worksheets/_rels/sheet" + sheetId + ".xml.rels";
                    if (xmlFiles.containsKey(sheetRelsPath)) {
                        String relsXml = xmlFiles.get(sheetRelsPath);
                        Document relsDoc = parseXml(relsXml);
                        NodeList relationships = relsDoc.getElementsByTagName("Relationship");
                        for (int j = 0; j < relationships.getLength(); j++) {
                            Element relationship = (Element) relationships.item(j);
                            String type = relationship.getAttribute("Type");
                            if (type.contains("ctrlProp")) {
                                String absolutePath = relationship.getAttribute("Target").replace("../ctrlProps/", "xl/ctrlProps/");
                                var list = sheetAndRelationshipPaths.get(sheetId);
                                if (list == null) {
                                    list = new ArrayList<>();
                                    sheetAndRelationshipPaths.put(sheetId, list);
                                }
                                list.add(absolutePath);
                            }
                        }
                    }
                }
            }

            // Parse checkboxes in each sheet
            for (String sheetId : sheetNames.keySet()) {
                String sheetName = sheetNames.get(sheetId);
                if (sheetAndRelationshipPaths.containsKey(sheetId)) {

                    // Extract the control properties xml for checkboxes
                    for (String xmlFilePath : sheetAndRelationshipPaths.get(sheetId)) {
                        String ctrlPropsXml = xmlFiles.get(xmlFilePath);
                        Objects.requireNonNull(ctrlPropsXml);
                        Document ctrlDoc = parseXml(ctrlPropsXml);

                        NodeList controls = ctrlDoc.getElementsByTagName("formControlPr");
                        for (int i = 0; i < controls.getLength(); i++) {
                            Element control = (Element) controls.item(i);
                            if ("CheckBox".equals(control.getAttribute("objectType"))) {
                                CheckboxInfo checkboxInfo = new CheckboxInfo();
                                checkboxInfo.sheetName = sheetName;
                                checkboxInfo.isChecked = "Checked".equalsIgnoreCase(control.getAttribute("checked"));
                                checkboxInfo.cellReference = control.getAttribute("cellReference");
                                checkboxes.add(checkboxInfo);
                            }
                        }
                    }
                }
            }
        }

        return checkboxes;
    }

    private static Document parseXml(String xmlContent) throws Exception {
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        DocumentBuilder builder = factory.newDocumentBuilder();
        return builder.parse(new ByteArrayInputStream(xmlContent.getBytes()));
    }

    public static void main(String[] args) {
        try {
            File file = new File("example.xlsx"); // Replace with your .xlsx file path
            List<CheckboxInfo> checkboxes = parseXLSX(file);
            for (CheckboxInfo checkbox : checkboxes) {
                System.out.println(checkbox);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    @Test
    void test() throws Exception {
        var f = Paths.get("./simple.xlsx").toFile();
        var result = parseXLSX(f);
        System.out.println();
    }

    public static class CheckboxInfo {
        public String sheetName;
        public boolean isChecked;
        public String cellReference;

        @Override
        public String toString() {
            return "Checkbox [Sheet: " + sheetName + ", Checked: " + isChecked + ", Cell: " + cellReference + "]";
        }
    }
}

本文标签: excelquotFloatingquot checkboxcontrol boolean value not supportedStack Overflow