Friday, August 22, 2014

How to Extract PowerPoint documents from SharePoint 2010 ”SQL Database”

using System;
using System.Collections.Generic;
using System.Text;
using System.Data;
using System.Data.SqlClient;
using System.IO;

// replace this string with your Sharepoint content DB connection string
string DBConnString = “Server=YOURSHAREPOINTSERVER;Database=CONTENTDATABASE;Trusted_Connection=True;”;

// create a DB connection
SqlConnection con = new SqlConnection(DBConnString);
con.Open();

// the query to grab all the files.
// Note: Feel free to alter the LeafName like ‘%.extension’ arguments to suit your purpose
  SqlCommand com = con.CreateCommand();
  com.CommandText = @"SELECT AllDocs.DirName, AllDocs.LeafName, AllDocStreams.[Content]
                                 FROM  AllDocs INNER JOIN
                                 AllDocStreams ON AllDocs.Id = AllDocStreams.Id                                
                                Where (AllDocs.LeafName like '%.doc%'
                                or AllDocs.LeafName like '%.xl%'
                                or AllDocs.LeafName like '%.pdf'
                                or AllDocs.LeafName like '%.pp%'
                                or AllDocs.LeafName like '%.pr%'
                                or AllDocs.LeafName like '%.rt%'
                                or AllDocs.LeafName like '%.ms%')
                                ORDER BY LeafName ASC";
// execute query
SqlDataReader reader = com.ExecuteReader();

while (reader.Read())
{
    // grab the file’s directory and name
    string DirName = (string)reader["DirName"];
    string LeafName = (string)reader["LeafName"];

    // create directory for the file if it doesn’t yet exist
    if (!Directory.Exists(DirName))
    {
        Directory.CreateDirectory(DirName);
        Console.WriteLine(“Creating directory: “ + DirName);
    }

           String fileName = DirName + "/" + LeafName;
                if (!File.Exists(fileName))
                {
                    // create a filestream to spit out the file
                    FileStream fs = new FileStream(DirName + "/" + LeafName, FileMode.Create, FileAccess.Write);
                    BinaryWriter writer = new BinaryWriter(fs);

                    // depending on the speed of your network, you may want to change the buffer size (it’s in bytes)
                    //int bufferSize = 1000000;
                    int bufferSize = 32000000;
                    long startIndex = 0;
                    long retval = 0;
                    byte[] outByte = new byte[bufferSize];

                    // grab the file out of the db one chunk (of size bufferSize) at a time
                    do
                    {
                        // retval = reader.GetBytes(2, startIndex, outByte, 0, bufferSize);
                        retval = reader.GetBytes(2, startIndex, outByte, 0, bufferSize);
                        startIndex += bufferSize;

                        writer.Write(outByte, 0, (int)retval);
                        writer.Flush();
                    } while (retval == bufferSize);

                    // finish writing the file
                    writer.Close();
                    fs.Close();

                    Console.WriteLine("Finished writing file: " + LeafName);

                }

// close the DB connection and whatnots
reader.Close();
con.Close();