Skip to content

Commit

Permalink
feat(parser): parser 구조 변경
Browse files Browse the repository at this point in the history
  • Loading branch information
hahnlee committed May 16, 2024
1 parent 06577bd commit 45c9e69
Show file tree
Hide file tree
Showing 8 changed files with 105 additions and 51 deletions.
22 changes: 22 additions & 0 deletions packages/parser/src/models/bin-data.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/**
* Copyright Han Lee <[email protected]> and other contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

export class BinData {
constructor(
public name: string,
public data: Uint8Array
) {}
}
4 changes: 0 additions & 4 deletions packages/parser/src/models/char-list.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,4 @@ export class CharList {
extendedControls() {
return this.chars.filter((char) => char instanceof ExtendedControl)
}

toString() {
return this.chars.map((char) => char.toString()).join('')
}
}
6 changes: 5 additions & 1 deletion packages/parser/src/models/controls/content.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

import { CommonCtrlID } from '../../constants/ctrl-id.js'
import { CommonCtrlID, OtherCtrlID } from '../../constants/ctrl-id.js'
import type { PeekableIterator } from '../../utils/generator.js'
import type { HWPRecord } from '../record.js'
import { HWPVersion } from '../version.js'
Expand All @@ -38,8 +38,12 @@ export function parseControl(
version: HWPVersion,
): ControlContent {
switch (ctrlId) {
case OtherCtrlID.Section:
return SectionControl.fromRecord(current, iterator, version)
case CommonCtrlID.Table:
return TableControl.fromRecord(current, iterator, version)
case CommonCtrlID.GenShapeObject:
return GenShapeObjectControl.fromRecord(current, iterator, version)
default:
return UnknownControl.fromRecord(current, iterator)
}
Expand Down
20 changes: 12 additions & 8 deletions packages/parser/src/models/controls/shapes/picture.ts
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,11 @@ export class PictureRecord {
const image = Image.fromReader(reader)
outline.alpha = reader.readUInt8()

const instanceId = !reader.isEOF() ? reader.readUInt32() : null
const effect = !reader.isEOF() ? PictureEffect.fromReader(reader) : null
const additionalProperties = !reader.isEOF()
? PictureAdditionalProperties.fromReader(reader)
: null
const instanceId = reader.isEOF() ? null : reader.readUInt32()
const effect = reader.isEOF() ? null : PictureEffect.fromReader(reader)
const additionalProperties = reader.isEOF()
? null
: PictureAdditionalProperties.fromReader(reader)

if (!reader.isEOF()) {
throw new Error('BodyText: PictureRecord: There are remaining bytes')
Expand Down Expand Up @@ -580,10 +580,14 @@ export class PictureAdditionalProperties {
) {}

static fromReader(reader: ByteReader) {
const width = reader.readUInt32()
const height = reader.readUInt32()
const alpha = reader.isEOF() ? 0 : reader.readUInt8()

return new PictureAdditionalProperties(
reader.readUInt32(),
reader.readUInt32(),
reader.readUInt8(),
width,
height,
alpha,
)
}
}
8 changes: 3 additions & 5 deletions packages/parser/src/models/doc-info/doc-info.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,17 +39,15 @@ export class DocInfo {
throw new Error('DocInfo not exist')
}

if (!ArrayBuffer.isView(docInfoEntry.content)) {
throw new Error('DocInfo content is not ArrayBuffer')
}
const content = Uint8Array.from(docInfoEntry.content)

if (header.flags.compressed) {
const decodedContent: Uint8Array = inflate(docInfoEntry.content, {
const decodedContent: Uint8Array = inflate(content, {
windowBits: -15,
})
return DocInfo.fromBytes(decodedContent, header.version)
}
return DocInfo.fromBytes(docInfoEntry.content, header.version)
return DocInfo.fromBytes(content, header.version)
}

static fromBytes(bytes: Uint8Array, version: HWPVersion): DocInfo {
Expand Down
10 changes: 9 additions & 1 deletion packages/parser/src/models/doc-info/id-mappings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import { readItems } from '../../utils/record.js'
import type { HWPRecord } from '../record.js'
import { DocInfoTagID } from '../../constants/tag-id.js'
import { HWPVersion } from '../version.js'
import { BinData } from './bin-data.js'
import { BinData, BinDataKind } from './bin-data.js'
import { FontFace } from './font-face.js'
import { BorderFill } from './border-fill.js'
import { CharShapeStyle } from './char-shape.js'
Expand Down Expand Up @@ -161,4 +161,12 @@ export class IDMappings {
),
)
}

*embeddings() {
for (const binData of this.binaryData) {
if (binData.properties.kind === BinDataKind.Embedding) {
yield binData
}
}
}
}
48 changes: 48 additions & 0 deletions packages/parser/src/models/document.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,62 @@
* limitations under the License.
*/

import { find, read } from 'cfb'
import { DocInfo } from './doc-info/doc-info.js'
import { HWPHeader } from './header.js'
import { Section } from './section.js'
import { BinData } from './bin-data.js'
import { inflate } from 'pako'

export class HWPDocument {
constructor(
public header: HWPHeader,
public info: DocInfo,
public sections: Section[],
public binDataList: BinData[],
) {}

static fromBytes(buffer: Uint8Array) {
const container = read(buffer, {
type: 'array',
})

const header = HWPHeader.fromCfbContainer(container)
const docInfo = DocInfo.fromCfbContainer(container, header)

const sections: Section[] = []

for (let i = 0; i < docInfo.properties.sections; i += 1) {
const entry = find(container, `Root Entry/BodyText/Section${i}`)

if (!entry) {
throw new Error('Section not exist')
}
sections.push(Section.fromEntry(entry, header))
}

const binDataList: BinData[] = []
for (const embedded of docInfo.idMappings.embeddings()) {
const fileName = embedded.getCFBFileName()
if (!fileName) {
throw new Error('BinData not exist')
}
const entry = find(container, `Root Entry/BinData/${fileName}`)

if (!entry) {
throw new Error('BinData not exist')
}

const payload = Uint8Array.from(entry.content)

if (embedded.compressed(header)) {
const data = inflate(payload, { windowBits: -15 })
binDataList.push(new BinData(fileName, data))
} else {
binDataList.push(new BinData(fileName, payload))
}
}

return new HWPDocument(header, docInfo, sections, binDataList)
}
}
38 changes: 6 additions & 32 deletions packages/parser/src/parse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,41 +14,15 @@
* limitations under the License.
*/

import {
read,
find,
type CFB$Blob,
type CFB$Container,
} from 'cfb'

import { HWPDocument } from './models/document.js'
import { DocInfo } from './models/doc-info/doc-info.js'
import { HWPHeader } from './models/header.js'
import { Section } from './models/section.js'

function parseSection(container: CFB$Container, header: HWPHeader, sectionNumber: number): Section {
const entry = find(container, `Root Entry/BodyText/Section${sectionNumber}`)

if (!entry) {
throw new Error('Section not exist')
}

return Section.fromEntry(entry, header)
export function parse(buffer: Uint8Array | ArrayBuffer): HWPDocument {
return HWPDocument.fromBytes(convertTypedArray(buffer))
}

export function parse(input: CFB$Blob): HWPDocument {
const container: CFB$Container = read(input, {
type: 'array',
})

const header = HWPHeader.fromCfbContainer(container)
const docInfo = DocInfo.fromCfbContainer(container, header)

const sections: Section[] = []

for (let i = 0; i < docInfo.properties.sections; i += 1) {
sections.push(parseSection(container, header, i))
function convertTypedArray(data: Uint8Array | ArrayBuffer): Uint8Array {
if (data instanceof ArrayBuffer) {
return new Uint8Array(data)
}

return new HWPDocument(header, docInfo, sections)
return data
}

0 comments on commit 45c9e69

Please sign in to comment.