diff --git a/SpCrawler/FileDownloader.cs b/SpCrawler/FileDownloader.cs deleted file mode 100644 index 62c07f7..0000000 --- a/SpCrawler/FileDownloader.cs +++ /dev/null @@ -1,82 +0,0 @@ -using System; -using System.Collections.Concurrent; -using System.IO; -using System.Net.Http; -using System.Threading; - -namespace SpPrefetchIndexBuilder -{ - class FileDownloader - { - static int NUM_RETRIES = 3; - - BlockingCollection fileDownloadBlockingCollection; - HttpClient client; - System.Collections.Generic.Dictionary webClients = new System.Collections.Generic.Dictionary(); - - public FileDownloader(BlockingCollection fileDownloadBlockingCollection, HttpClient client) - { - this.fileDownloadBlockingCollection = fileDownloadBlockingCollection; - this.client = client; - } - - public void AttemptToDownload(FileToDownload toDownload, int numRetry) - { - try - { - var responseResult = client.GetAsync(SpPrefetchIndexBuilder.topParentSite + toDownload.serverRelativeUrl); - if (responseResult.Result != null && responseResult.Result.StatusCode == System.Net.HttpStatusCode.OK) - { - using (var memStream = responseResult.Result.Content.ReadAsStreamAsync().GetAwaiter().GetResult()) - { - using (var fileStream = File.Create(toDownload.saveToPath)) - { - memStream.CopyTo(fileStream); - } - } - Console.WriteLine("Thread {0} - Successfully downloaded {1} to {2}", Thread.CurrentThread.ManagedThreadId, toDownload.serverRelativeUrl, toDownload.saveToPath); - } - else - { - Console.WriteLine("Got non-OK status {0} when trying to download url {1}", responseResult.Result.StatusCode, SpPrefetchIndexBuilder.topParentSite + toDownload.serverRelativeUrl); - } - } - catch (Exception e) - { - if (numRetry >= NUM_RETRIES) - { - Console.WriteLine("Gave up trying to download url {0} to file {1} after {2} retries due to error: {3}", SpPrefetchIndexBuilder.topParentSite + toDownload.serverRelativeUrl, toDownload.saveToPath, NUM_RETRIES, e); - } - else - { - AttemptToDownload(toDownload, numRetry + 1); - } - } - } - - public void StartDownloads(int timeout) - { - try - { - Console.WriteLine("Starting Thread {0}", Thread.CurrentThread.ManagedThreadId); - FileToDownload toDownload; - while (fileDownloadBlockingCollection.TryTake(out toDownload)) - { - SpPrefetchIndexBuilder.CheckAbort(); - AttemptToDownload(toDownload, 0); - - } - } - catch (Exception e2) - { - Console.WriteLine("Thread {0} File Downloader failed - {1}", Thread.CurrentThread.ManagedThreadId, e2); - Console.WriteLine(e2.StackTrace); - } - } - - public static void DownloadFiles(BlockingCollection fileDownloadBlockingCollection, int timeoutInMilliSec, HttpClient client) - { - new FileDownloader(fileDownloadBlockingCollection, client).StartDownloads(timeoutInMilliSec); - } - } -} \ No newline at end of file diff --git a/SpCrawler/FileToDownload.cs b/SpCrawler/FileToDownload.cs deleted file mode 100644 index 090012e..0000000 --- a/SpCrawler/FileToDownload.cs +++ /dev/null @@ -1,10 +0,0 @@ -using System; -namespace SpPrefetchIndexBuilder -{ - class FileToDownload - { - public String site; - public String serverRelativeUrl; - public String saveToPath; - } -} diff --git a/SpCrawler/SpPrefetchIndexBuilder.cs b/SpCrawler/SpPrefetchIndexBuilder.cs index 4a1feba..cc27b6e 100644 --- a/SpCrawler/SpPrefetchIndexBuilder.cs +++ b/SpCrawler/SpPrefetchIndexBuilder.cs @@ -63,6 +63,7 @@ public static void CheckAbort() public static bool excludeRoleDefinitions = false; public static bool excludeRoleAssignments = false; public static bool deleteExistingOutputDir = false; + public static bool doDownloadFiles = false; public static int maxFiles = -1; public int fileCount = 0; @@ -114,12 +115,15 @@ static void Main(string[] args) ); spib.writeAllListsToJson(); Console.WriteLine("Lists metadata dump of {0} complete. Took {1} milliseconds.", spib.topParentSite, swLists.ElapsedMilliseconds); - Console.WriteLine("Downloading the files recieved during the index building"); - Parallel.ForEach( - spib.fileDownloadList, - new ParallelOptions { MaxDegreeOfParallelism = numThreads }, - toDownload => { spib.DownloadFile(toDownload); } - ); + if (doDownloadFiles) + { + Console.WriteLine("Downloading the files recieved during the index building"); + Parallel.ForEach( + spib.fileDownloadList, + new ParallelOptions { MaxDegreeOfParallelism = numThreads }, + toDownload => { spib.DownloadFile(toDownload); } + ); + } } } Console.WriteLine("Export complete. Took {0} milliseconds.", sw.ElapsedMilliseconds); @@ -247,6 +251,10 @@ public SpPrefetchIndexBuilder(String[] args) { deleteExistingOutputDir = Boolean.Parse(arg.Split(new Char[] { '=' })[1]); } + else if (arg.StartsWith("--downloadFiles=")) + { + doDownloadFiles = Boolean.Parse(arg.Split(new Char[] { '=' })[1]); + } else { help = true; @@ -255,7 +263,7 @@ public SpPrefetchIndexBuilder(String[] args) if (help) { - Console.WriteLine("USAGE: SpPrefetchIndexBuilder.exe --siteUrl=siteUrl --outputDir=[outputDir] --domain=[domain] --username=[username] --password=[password (not recommended, do not specify to be prompted or use SP_PWD environment variable)] --numThreads=[optional number of threads to use while fetching] --maxFileSizeBytes=[optional maximum file size]"); + Console.WriteLine("USAGE: SpPrefetchIndexBuilder.exe --siteUrl=siteUrl --outputDir=[outputDir] --domain=[domain] --username=[username] --password=[password (not recommended, do not specify to be prompted or use SP_PWD environment variable)] --numThreads=[optional number of threads to use while fetching] --maxFileSizeBytes=[optional maximum file size] --onlyWebs=[true if you want to only download web metadeta. default false] --maxFiles=[if > 0 will only download this many files before quitting. default -1] --excludeRoleAssignments=[if true will not store obtain role assignment metadata. default false] --excludeRoleDefinitions=[if true will not store obtain role definition metadata. default false] --downloadFiles=[Set this to false if you don't want to download the files from the sharepoint instance. default false]"); Environment.Exit(0); } @@ -275,20 +283,25 @@ public SpPrefetchIndexBuilder(String[] args) } cc = new CredentialCache(); NetworkCredential nc; - if (spPassword == null) + if (spPassword == null && spUsername != null) { Console.WriteLine("Please enter password for {0}", spUsername); nc = new NetworkCredential(spUsername, GetPassword(), spDomain); } - else + else if (spUsername != null) { nc = new NetworkCredential(spUsername, spPassword, spDomain); } + else + { + nc = System.Net.CredentialCache.DefaultNetworkCredentials; + } cc.Add(new Uri(topParentSite), "NTLM", nc); HttpClientHandler handler = new HttpClientHandler(); handler.Credentials = cc; client = new HttpClient(handler); - client.Timeout = TimeSpan.FromMinutes(4); + client.Timeout = TimeSpan.FromSeconds(30); + client.DefaultRequestHeaders.ConnectionClose = true; } public void DownloadFile(FileToDownload toDownload) @@ -319,7 +332,7 @@ public void DownloadFile(FileToDownload toDownload) } catch (Exception e) { - Console.WriteLine("Gave up trying to download url {0} to file {1} due to error: {2}", rootSite + toDownload.serverRelativeUrl, toDownload.saveToPath, e); + Console.WriteLine("Gave up trying to download url {0}{1} to file {2} due to error: {3}", rootSite, toDownload.serverRelativeUrl, toDownload.saveToPath, e); } }