Nothing Special   »   [go: up one dir, main page]

Skip to content

Commit

Permalink
Fix #930
Browse files Browse the repository at this point in the history
  • Loading branch information
hhrutter committed Aug 20, 2024
1 parent 3aff1b0 commit b9c28ae
Showing 1 changed file with 79 additions and 8 deletions.
87 changes: 79 additions & 8 deletions pkg/pdfcpu/optimize.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,22 +63,93 @@ func optimizeContentStreamUsage(ctx *model.Context, sd *types.StreamDict, objNr
return nil, nil
}

func removeEmptyContentStreams(ctx *model.Context, pageDict types.Dict, obj types.Object, pageObjNumber int) error {
var contentArr types.Array

if ir, ok := obj.(types.IndirectRef); ok {

objNr := ir.ObjectNumber.Value()
entry, found := ctx.FindTableEntry(objNr, ir.GenerationNumber.Value())
if !found {
return errors.Errorf("removeEmptyContentStreams: obj#:%d illegal indRef for Contents\n", pageObjNumber)
}

contentStreamDict, ok := entry.Object.(types.StreamDict)
if ok {
if err := contentStreamDict.Decode(); err != nil {
return err
}
if len(contentStreamDict.Content) == 0 {
pageDict.Delete("Contents")
}
return nil
}

contentArr, ok = entry.Object.(types.Array)
if !ok {
return errors.Errorf("removeEmptyContentStreams: obj#:%d page content entry neither stream dict nor array.\n", pageObjNumber)
}

} else if contentArr, ok = obj.(types.Array); !ok {
return errors.Errorf("removeEmptyContentStreams: obj#:%d corrupt page content array\n", pageObjNumber)
}

var newContentArr types.Array

for _, c := range contentArr {

ir, ok := c.(types.IndirectRef)
if !ok {
return errors.Errorf("removeEmptyContentStreams: obj#:%d corrupt page content array entry\n", pageObjNumber)
}

objNr := ir.ObjectNumber.Value()
entry, found := ctx.FindTableEntry(objNr, ir.GenerationNumber.Value())
if !found {
return errors.Errorf("removeEmptyContentStreams: obj#:%d illegal indRef for Contents\n", pageObjNumber)
}

contentStreamDict, ok := entry.Object.(types.StreamDict)
if !ok {
return errors.Errorf("identifyPageContent: obj#:%d page content entry is no stream dict\n", pageObjNumber)
}

if err := contentStreamDict.Decode(); err != nil {
return err
}
if len(contentStreamDict.Content) > 0 {
newContentArr = append(newContentArr, c)
}
}

pageDict["Contents"] = newContentArr

return nil
}

func optimizePageContent(ctx *model.Context, pageDict types.Dict, pageObjNumber int) error {
if !ctx.OptimizeDuplicateContentStreams {
o, found := pageDict.Find("Contents")
if !found {
return nil
}
if log.OptimizeEnabled() {
log.Optimize.Println("identifyPageContent begin")

if err := removeEmptyContentStreams(ctx, pageDict, o, pageObjNumber); err != nil {
return err
}

o, found := pageDict.Find("Contents")
o, found = pageDict.Find("Contents")
if !found {
if log.OptimizeEnabled() {
log.Optimize.Println("identifyPageContent end: no \"Contents\"")
}
return nil
}

if !ctx.OptimizeDuplicateContentStreams {
return nil
}

if log.OptimizeEnabled() {
log.Optimize.Println("identifyPageContent begin")
}

var contentArr types.Array

if ir, ok := o.(types.IndirectRef); ok {
Expand Down Expand Up @@ -685,7 +756,7 @@ func parseResourcesDict(ctx *model.Context, pageDict types.Dict, pageNumber, pag
return nil
}

// Iterate over all pages and optimize resources.
// Iterate over all pages and optimize content & resources.
func parsePagesDict(ctx *model.Context, pagesDict types.Dict, pageNumber int) (int, error) {
// TODO Integrate resource consolidation based on content stream requirements.

Expand Down

0 comments on commit b9c28ae

Please sign in to comment.