Tuesday, September 17, 2013

Convert word document to pdf file in SharePoint

In this post we will see how to convert Microsoft word document to pdf files using interop. This can be used in SharePoint or Asp.net. These methods will support both .doc and .docx files.
What are the points that are covered
  • Generic method which converts single word document to pdf file
  • Generic method which converts all the word documents in a folder to pdf files


Convert single word document to pdf file

The following method uses Microsoft office interop dll which executes word saveas fuction to save the word document to pdf file

using Microsoft.Office.Interop.Word;
using System;
using System.IO;
 
private static void ConvertWordFileToPdf(string WordFilePath, string PdfFilePath)
{
    Document doc = null;
    // C# doesn't have optional arguments so we'll need a dummy value
    object oMissing = System.Reflection.Missing.Value;
    Microsoft.Office.Interop.Word.Application word = null;
    string sourceFile = "";
    string destinationFile = "";
 
    try
    {
        // Create a new Microsoft Word application object
        word = new Microsoft.Office.Interop.Word.Application();
 
        // Get list of Word files in specified directory
        FileInfo wordFile = new FileInfo(WordFilePath);
 
        word.Visible = false;
        word.ScreenUpdating = false;
 
        // Cast as Object for word Open method
        Object filename = (Object)wordFile.FullName;
 
        sourceFile = wordFile.Name;
        destinationFile = "";
 
        // Use the dummy value as a placeholder for optional arguments
        doc = word.Documents.Open(ref filename, ref oMissing,
            ref oMissing, ref oMissing, ref oMissing, ref oMissing, ref oMissing,
            ref oMissing, ref oMissing, ref oMissing, ref oMissing, ref oMissing,
            ref oMissing, ref oMissing, ref oMissing, ref oMissing);
        doc.Activate();
        object outputFileName = null;
 
        if (wordFile.FullName.ToUpper().Contains(".DOCX"))
        {
            outputFileName = wordFile.FullName.Replace(".docx", ".pdf");
            destinationFile = sourceFile.Replace(".docx", ".pdf");
 
        }
        else
        {
            outputFileName = wordFile.FullName.Replace(".doc", ".pdf");
            destinationFile = sourceFile.Replace(".doc", ".pdf");
        }
 
        sourceFile = WordFilePath;
        destinationFile = PdfFilePath + "\\" + destinationFile;
 
        object fileFormat = WdSaveFormat.wdFormatPDF;
 
        // Save document into PDF Format
        doc.SaveAs(ref outputFileName,
            ref fileFormat, ref oMissing, ref oMissing,
            ref oMissing, ref oMissing, ref oMissing, ref oMissing,
            ref oMissing, ref oMissing, ref oMissing, ref oMissing,
            ref oMissing, ref oMissing, ref oMissing, ref oMissing);
 
        // Close the Word document, but leave the Word application open.
        // doc has to be cast to type _Document so that it will find the
        // correct Close method.
        object saveChanges = WdSaveOptions.wdDoNotSaveChanges;
        ((_Document)doc).Close(ref saveChanges, ref oMissing, ref oMissing);
        doc = null;
 
        //there is options to save file in particular location, default is the current folder.
        // So move or replace a file to a new location explicitly
        if (System.IO.File.Exists(destinationFile))
        {
            System.IO.File.Replace(outputFileName.ToString(), destinationFile, null);
        }
        else
        {
            System.IO.File.Move(outputFileName.ToString(), destinationFile);
        }
 
        Console.WriteLine("Success:" + "SourceFile-" + sourceFile + " DestinationFile-" + destinationFile);
 
        // word has to be cast to type _Application so that it will find
        // the correct Quit method.
        ((_Application)word).Quit(ref oMissing, ref oMissing, ref oMissing);
        word = null;
    }
 
    catch (Exception ex)
    {
        Console.WriteLine("Error occured while processing");
        Console.WriteLine("Fail:" + "SourceFile-" + sourceFile + " DestinationFile-" + destinationFile + " Error-" + ex.Message);
    }
    finally
    {
        if (doc != null)
        {
            ((_Document)doc).Close(ref oMissing, ref oMissing, ref oMissing);
            doc = null;
 
        }
        if (word != null)
        {
            ((_Application)word).Quit(ref oMissing, ref oMissing, ref oMissing);
            word = null;
        }
    }
 
}

Usage


ConvertWordFileToPdf(“c:\Users\Adi\Documents\MyInfo.docx”, “D:\Adi\PdfFiles”);

This function converts MyInfo.docx to MyInfo.pdf; saves the converted pdf file to the location ‘D:\Adi\PdfFiles’

Convert all word documents in a folder to pdf file

The following method uses Microsoft office interop dll which converts all the word files in a folder to pdf files

using Microsoft.Office.Interop.Word;
using System;
using System.IO;
 
private static void ConvertAllWordFilesToPdf(string WordFilesLocation, string PdfFilesLocation)
{
    Document doc = null;
    // C# doesn't have optional arguments so we'll need a dummy value
    object oMissing = System.Reflection.Missing.Value;
    Microsoft.Office.Interop.Word.Application word = null;
    try
    {
        // Create a new Microsoft Word application object
        word = new Microsoft.Office.Interop.Word.Application();
 
        // Get list of Word files in specified directory
        DirectoryInfo dirInfo = new DirectoryInfo(WordFilesLocation);
 
        FileInfo[] wordFiles = dirInfo.GetFiles("*.doc");
 
        if (wordFiles.Length > 0)
        {
            word.Visible = false;
            word.ScreenUpdating = false;
            string sourceFile = "";
            string destinationFile = "";
            try
            {
                foreach (FileInfo wordFile in wordFiles)
                {
                    // Cast as Object for word Open method
                    Object filename = (Object)wordFile.FullName;
 
                    sourceFile = wordFile.Name;
                    destinationFile = "";
 
                    // Use the dummy value as a placeholder for optional arguments
                    doc = word.Documents.Open(ref filename, ref oMissing,
                        ref oMissing, ref oMissing, ref oMissing, ref oMissing, ref oMissing,
                        ref oMissing, ref oMissing, ref oMissing, ref oMissing, ref oMissing,
                        ref oMissing, ref oMissing, ref oMissing, ref oMissing);
                    doc.Activate();
                    object outputFileName = null;
 
                    if (wordFile.FullName.ToUpper().Contains(".DOCX"))
                    {
                        outputFileName = wordFile.FullName.Replace(".docx", ".pdf");
                        destinationFile = sourceFile.Replace(".docx", ".pdf");
 
                    }
                    else
                    {
                        outputFileName = wordFile.FullName.Replace(".doc", ".pdf");
                        destinationFile = sourceFile.Replace(".doc", ".pdf");
                    }
 
                    sourceFile = WordFilesLocation + "\\" + destinationFile;
                    destinationFile = PdfFilesLocation + "\\" + destinationFile;
 
                    object fileFormat = WdSaveFormat.wdFormatPDF;
 
                    // Save document into PDF Format
                    doc.SaveAs(ref outputFileName,
                        ref fileFormat, ref oMissing, ref oMissing,
                        ref oMissing, ref oMissing, ref oMissing, ref oMissing,
                        ref oMissing, ref oMissing, ref oMissing, ref oMissing,
                        ref oMissing, ref oMissing, ref oMissing, ref oMissing);
 
                    // Close the Word document, but leave the Word application open.
                    // doc has to be cast to type _Document so that it will find the
                    // correct Close method.
                    object saveChanges = WdSaveOptions.wdDoNotSaveChanges;
                    ((_Document)doc).Close(ref saveChanges, ref oMissing, ref oMissing);
                    doc = null;
 
                    //there is options to save file in particular location, default is the current folder.
                    // So move or replace a file to a new location explicitly
                    if (System.IO.File.Exists(destinationFile))
                    {
                        System.IO.File.Replace(sourceFile, destinationFile, null);
                    }
                    else
                    {
                        System.IO.File.Move(sourceFile, destinationFile);
                    }
 
                    Console.WriteLine("Success:" + "SourceFile-" + outputFileName.ToString() + " DestinationFile-" + destinationFile);
 
                }
 
                // word has to be cast to type _Application so that it will find
                // the correct Quit method.
                ((_Application)word).Quit(ref oMissing, ref oMissing, ref oMissing);
                word = null;
            }
            catch (Exception ex)
            {
                //individual file exception, do not stop but display the error
                //Log this if needed
                Console.WriteLine("Fail:" + "SourceFile-" + sourceFile + "  DestinationFile-" + destinationFile + "#Error-" + ex.Message);
            }
        }
    }
    catch (Exception ex)
    {
        Console.WriteLine("Error occured while processing");
        Console.WriteLine(ex.Message);
    }
    finally
    {
        if (doc != null)
        {
            ((_Document)doc).Close(ref oMissing, ref oMissing, ref oMissing);
            doc = null;
 
        }
        if (word != null)
        {
            ((_Application)word).Quit(ref oMissing, ref oMissing, ref oMissing);
            word = null;
        }
    }
}  

Usage

ConvertAllWordFilesToPdf(“c:\Users\Adi\Documents”, “D:\Adi\PdfFiles”);
This function converts all the word files in ‘c:\Users\Adi\Documents’ to pdf files and saves the converted pdf files to the location ‘D:\Adi\PdfFiles’

Other References

http://msdn.microsoft.com/en-us/library/office/bb216319(v=office.12).aspx

Conclusion

Most of the code I found in many sites was bits and pieces. Hope this generic methods will help who are looking for word document conversion.

No comments:

Post a Comment