Sunday, February 12, 2006

One of the most popular search hits for my blog is "Managed XMP Parser".  A while back (actually it was 1 year ago today...whoa, freaky), I blogged about extracting the XMP data out of my pictures after screwing up the upload into Flickr.  I ended up writing my own code to pull out the XMP data.  I mentioned making it available, but it was relatively straightforward, so I never got around to posting it.

In the last week, I've gotten lots of requests for the code, so here it is, uglyness and all.  One interesting thing about my approach is that I do not rely on any particular file format.  I simply look for the XMP markers and pull out the XML in-between.  This means it will work on ANY file with embedded XMP.

All the usual disclaimers apply.  I don't claim this is the best way, but it works.  I've just plucked it out of my little date fixing app I built.  At the end, you'll have an XPathNavigator and a namespace manager setup to run XPath queries.  There's probably some sweet stuff the 2.0 can help us out with, but I haven't updated it.  Enjoy:

MemoryStream xmpStream = new MemoryStream();

byte[] beginPattern = Encoding.ASCII.GetBytes("<?xpacket begin");

int beginIndex=0;

bool beginFound = false;

byte[] beginStopPattern = Encoding.ASCII.GetBytes(">\n");

int beginStopIndex = 0;

bool xmlStartFound = false;

byte[] endPattern = Encoding.ASCII.GetBytes("<?xpacket end");

int endIndex=0;

bool endFound = false;

bool backedUp = false;

using (Stream stream = new FileStream(path, FileMode.Open)) {

      int data;

      while ((data = stream.ReadByte()) != -1) {

            byte b = (byte)data;

            if (!beginFound) {

                  if (b == beginPattern[beginIndex]) {

                        beginIndex++;

                        if (beginIndex >= beginPattern.Length) {

                              beginFound = true;

                        }

                  }

                  else {

                        if (beginIndex != 0) {

                              beginIndex = 0;

                              stream.Seek(-1, SeekOrigin.Current);

                        }

                  }

            }

            else if (!xmlStartFound) {

                  if (b == beginStopPattern[beginStopIndex]) {

                        beginStopIndex++;

                        if (beginStopIndex >= beginStopPattern.Length) {

                              xmlStartFound = true;

                        }

                  }

                  else {

                        if (beginStopIndex != 0) {

                              beginStopIndex = 0;

                              stream.Seek(-1, SeekOrigin.Current);

                        }

                  }

            }

            else if (!endFound) {

                  //load up the memorystream

                  if (backedUp) {

                        backedUp = false;

                  }

                  else {

                        xmpStream.WriteByte(b);

                  }

                  if (b == endPattern[endIndex]) {

                        endIndex++;

                        if (endIndex >= endPattern.Length) {

                              endFound = true;

                              xmpStream.SetLength(xmpStream.Length-endPattern.Length);

                              break;

                        }

                  }

                  else {

                        if (endIndex != 0) {

                              endIndex = 0;

                              stream.Seek(-1, SeekOrigin.Current);

                              backedUp = true;

                        }

                  }

            }

      }

}

if (!endFound) {

      Console.WriteLine("No XMP data found");

      break;

}

//load up the xmp

xmpStream.Position = 0;

XPathDocument xmpDocument = new XPathDocument(xmpStream);

XPathNavigator xmpNav = xmpDocument.CreateNavigator();

XmlNamespaceManager nsManager = new XmlNamespaceManager(xmpNav.NameTable);

nsManager.AddNamespace("x", "adobe:ns:meta/");

nsManager.AddNamespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#");

nsManager.AddNamespace("iX", "http://ns.adobe.com/iX/1.0/");

nsManager.AddNamespace("crs", "http://ns.adobe.com/camera-raw-settings/1.0/");

nsManager.AddNamespace("exif", "http://ns.adobe.com/exif/1.0/");

nsManager.AddNamespace("aux", "http://ns.adobe.com/exif/1.0/aux/");

nsManager.AddNamespace("pdf", "http://ns.adobe.com/pdf/1.3/");

nsManager.AddNamespace("photoshop", "http://ns.adobe.com/photoshop/1.0/");

nsManager.AddNamespace("tiff", "http://ns.adobe.com/tiff/1.0/");

nsManager.AddNamespace("xap", "http://ns.adobe.com/xap/1.0/");

nsManager.AddNamespace("xapMM", "http://ns.adobe.com/xap/1.0/mm/");

nsManager.AddNamespace("dc", "http://purl.org/dc/elements/1.1/");

XPathExpression dateExpr = xmpNav.Compile("string(/x:xmpmeta/rdf:RDF/rdf:Description/exif:DateTimeOriginal)");

dateExpr.SetContext(nsManager);

string dateTimeStr = (string)xmpNav.Evaluate(dateExpr);

DateTime date = XmlConvert.ToDateTime(dateTimeStr);