Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow access to other XML docs in docx file like the header and footer #73

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 53 additions & 6 deletions lib/docx/document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,24 @@ module Docx
# puts d.text
# end
class Document
attr_reader :xml, :doc, :zip, :styles

# A path with * indicates that there are possibly multiple documents
# matching that glob, eg. word/header1.xml, word/header2.xml
DOCUMENT_PATHS = {
doc: "word/document.xml",
styles: "word/styles.xml",
headers: "word/header*.xml",
footers: "word/footer*.xml",
numbering: "word/numbering.xml"
}

attr_reader :xml, :doc, :zip, :styles, :headers, :footers, :numbering

def initialize(path, &block)
@replace = {}
@zip = Zip::File.open(path)
@document_xml = @zip.read('word/document.xml')
@doc = Nokogiri::XML(@document_xml)
@styles_xml = @zip.read('word/styles.xml')
@styles = Nokogiri::XML(@styles_xml)
extract_documents

if block_given?
yield self
@zip.close
Expand Down Expand Up @@ -57,6 +66,8 @@ def paragraphs
def bookmarks
bkmrks_hsh = Hash.new
bkmrks_ary = @doc.xpath('//w:bookmarkStart').map { |b_node| parse_bookmark_from b_node }
bkmrks_ary += @headers.values.map { |xml_doc| xml_doc.xpath('//w:bookmarkStart').map { |b_node| parse_bookmark_from b_node } }.flatten
bkmrks_ary += @footers.values.map { |xml_doc| xml_doc.xpath('//w:bookmarkStart').map { |b_node| parse_bookmark_from b_node } }.flatten
# auto-generated by office 2010
bkmrks_ary.reject! {|b| b.name == "_GoBack" }
bkmrks_ary.each {|b| bkmrks_hsh[b.name] = b }
Expand Down Expand Up @@ -123,13 +134,49 @@ def replace_entry(entry_path, file_contents)

private

def extract_documents
DOCUMENT_PATHS.each do |attr_name, path|
if path.match /\*/
extract_multiple_documents_from_globbed_path(attr_name, path)
else
extract_single_document_from_path(attr_name, path)
end
end
end

def extract_single_document_from_path(attr_name, path)
if @zip.find_entry(path)
xml_doc = @zip.read(path)
self.instance_variable_set(:"@#{attr_name}", Nokogiri::XML(xml_doc))
end
end

def extract_multiple_documents_from_globbed_path(hash_attr_name, glob_path)
files = @zip.glob(glob_path).map { |h| h.name }
filename_and_contents_pairs = files.map do |file|
simple_file_name = file.sub(/^word\//, "").sub(/\.xml$/, "")
[simple_file_name, Nokogiri::XML(@zip.read(file))]
end
hash = Hash[filename_and_contents_pairs]
self.instance_variable_set(:"@#{hash_attr_name}", hash)
end

#--
# TODO: Flesh this out to be compatible with other files
# TODO: Method to set flag on files that have been edited, probably by inserting something at the
# end of methods that make edits?
#++
def update
replace_entry "word/document.xml", doc.serialize(:save_with => 0)
DOCUMENT_PATHS.each do |attr_name, path|
if path.match /\*/
self.instance_variable_get("@#{attr_name}").each do |simple_file_name, contents|
replace_entry("word/#{simple_file_name}.xml", contents.serialize(:save_with => 0))
end
else
xml_document = self.instance_variable_get("@#{attr_name}")
replace_entry path, xml_document.serialize(:save_with => 0) if xml_document
end
end
end

# generate Elements::Containers::Paragraph from paragraph XML node
Expand Down
16 changes: 16 additions & 0 deletions spec/docx/document_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,22 @@
end
end

describe 'multiple documents' do
before do
@doc = Docx::Document.open(@fixtures_path + '/multi_doc.docx')
end

it 'should extract all inner documents' do
expect(@doc.doc).to_not be_nil
expect(@doc.styles).to_not be_nil
expect(@doc.headers).to_not be_nil
expect(@doc.headers["header1"].text).to eq "Hello from the header."
expect(@doc.footers).to_not be_nil
expect(@doc.footers["footer1"].text).to eq "Hello from the footer."
expect(@doc.numbering).to_not be_nil
end
end

describe 'saving' do
before do
@doc = Docx::Document.open(@fixtures_path + '/saving.docx')
Expand Down
Binary file added spec/fixtures/multi_doc.docx
Binary file not shown.