Create an array of Atoms from a byte array containing a sequence of PDF objects

 

   

Syntax
 

[C#]
static ArrayAtom FromContentStream(string value)
static ArrayAtom FromContentStream(byte[] value)

[Visual Basic]
Shared Function FromContentStream(value As String) As ArrayAtom
Shared Function FromContentStream(value() As Byte) As ArrayAtom

 

   

Params
 
Name Description
value The string holding the sequence of atoms.

 

   

Notes
 

Create an array of Atoms from a byte array containing a sequence of PDF objects.

This method is useful for deconstructing PDF content streams for analysis and modification. To convert back into a content stream you can use the Atom.GetData function.

 

   

Example
 

This example shows how to use the FromContentStream function to parse and display a PDF content stream.

[C#]
StringBuilder sb = new StringBuilder();
using (Doc doc = new Doc()) {
  doc.Read("spaceshuttle.pdf");
  Page page = doc.ObjectSoup[doc.Page] as Page;
  StreamObject[] layers = page.GetLayers();
  MemoryStream st = new MemoryStream();
  foreach (StreamObject layer in layers) {
    if (!layer.Decompress())
      throw new Exception("Unable to decompress stream.");
    byte[] data = layer.GetData();
    st.Write(data, 0, data.Length);
  }
  ArrayAtom array = ArrayAtom.FromContentStream(st.ToArray());
  int indent = 0;
  HashSet<string> indentPlus = new HashSet<string>(new string[] { "q", "BT" });
  HashSet<string> indentMinus = new HashSet<string>(new string[] { "Q", "ET" });
  IList<Tuple<string, int>> items = OpAtom.Find(array);
  int index = 0;
  foreach (var pair in items) {
    string op = ((OpAtom)array[pair.Item2]).Text;
    // add indent to code
    if (indentMinus.Contains(op))
      indent--;
    for (int i = 0; i < indent; i++)
      sb.Append(" ");
    // write out the operators
    for (int i = index; i <= pair.Item2; i++) {
      if (i != index)
        sb.Append(" ");
      Atom item = array[i];
      // we write arrays out individually so that
      // we can override default cr lf behavior
      ArrayAtom itemArray = item as ArrayAtom;
      if (itemArray != null) {
        int n = itemArray.Count;
        for (int j = 0; j < n; j++) {
          sb.Append(itemArray[j].ToString());
          if (j != n - 1)
            sb.Append(" ");
        }
      }
      else {
        sb.Append(item.ToString());
      }
    }
    sb.AppendLine();
    if (indentPlus.Contains(op))
      indent++;
    index = pair.Item2 + 1;
  }
  // write out any atoms that are left over
  for (int i = index; i < array.Count; i++) {
    sb.Append(" ");
    sb.Append(array[i].ToString());
  }
}
using (Doc doc = new Doc()) {
  doc.Font = doc.AddFont("Courier");
  doc.Rect.Inset(20, 20);
  doc.AddText(sb.ToString());
  doc.Save("PageContents.pdf");
}

[Visual Basic]
  Dim sb As New StringBuilder()
  Using doc As New Doc()
    doc.Read("spaceshuttle.pdf")
    Dim page As Page = TryCast(doc.ObjectSoup(doc.Page), Page)
    Dim layers As StreamObject() = page.GetLayers()
    Dim st As New MemoryStream()
    For Each layer As StreamObject In layers
      If Not layer.Decompress() Then
        Throw New Exception("Unable to decompress stream.")
      End If
      Dim data As Byte() = layer.GetData()
      st.Write(data, 0, data.Length)
    Next
    Dim array As ArrayAtom = ArrayAtom.FromContentStream(st.ToArray())
    Dim indent As Integer = 0
    Dim indentPlus As New HashSet(Of String)(New String() {"q", "BT"})
    Dim indentMinus As New HashSet(Of String)(New String() {"Q", "ET"})
    Dim items As IList(Of Tuple(Of String, Integer)) = OpAtom.Find(array)
    Dim index As Integer = 0
    For Each pair As var In items
      Dim op As String = DirectCast(array(pair.Item2), OpAtom).Text
      ' add indent to code
      If indentMinus.Contains(op) Then
        indent -= 1
      End If
      For i As Integer = 0 To indent - 1
        sb.Append(" ")
      Next
      ' write out the operators
      For i As Integer = index To pair.Item2
        If i <> index Then
          sb.Append(" ")
        End If
        Dim item As Atom = array(i)
        ' we write arrays out individually so that
        ' we can override default cr lf behavior
        Dim itemArray As ArrayAtom = TryCast(item, ArrayAtom)
        If itemArray IsNot Nothing Then
          Dim n As Integer = itemArray.Count
          For j As Integer = 0 To n - 1
            sb.Append(itemArray(j).ToString())
            If j <> n - 1 Then
              sb.Append(" ")
            End If
          Next
        Else
          sb.Append(item.ToString())
        End If
      Next
      sb.AppendLine()
      If indentPlus.Contains(op) Then
        indent += 1
      End If
      index = pair.Item2 + 1
    Next
    ' write out any atoms that are left over
    For i As Integer = index To array.Count - 1
      sb.Append(" ")
      sb.Append(array(i).ToString())
    Next
  End Using
  Using doc As New Doc()
    doc.Font = doc.AddFont("Courier")
    doc.Rect.Inset(20, 20)
    doc.AddText(sb.ToString())
    doc.Save("PageContents.pdf")
  End Using
End Sub


PageContents.pdf