隐藏

c#读取指定目录下的所有doc文件方法类

发布:2021/11/30 14:42:54作者:管理员 来源:本站 浏览次数:880


using System;

using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace _5118
{
    class WordToHtml
    {
        public static Maticsoft.BLL.Baodian _bll_baodian = new Maticsoft.BLL.Baodian();
        /// <summary>
        /// 将指定的文件夹下所有doc文件转html,并读取html内容保存到数据库中
        /// </summary>
        /// <param name="WordFileDir"></param>
        public static void CreateWordToHtmlFile(string WordFileDir)
        {
            DealWithWordFile(WordFileDir);
        }
        /// <summary>
        /// 将指定的文件夹下所有的html文件保存到数据库中
        /// </summary>
        /// <param name="WordFileDir"></param>
        public static void GetHtmlFile(string WordFileDir)
        {
            GetDealWithHtmlFile(WordFileDir);
        }
        /// <summary>
        /// 搜索WordFileDir在的*.doc文件
        /// </summary>
        /// <param name="WordFileDir"></param>
        private static void GetDealWithHtmlFile(string WordFileDir)
        {
            //创建数组保存源文件夹下的文件名
            string[] strFiles = Directory.GetFiles(WordFileDir, "*.htm");
            for (int i = 0; i < strFiles.Length; i++)
            {
                GetHtmlFileToDb(strFiles[i]);
            }

            DirectoryInfo dirInfo = new DirectoryInfo(WordFileDir);
            //取得源文件夹下的所有子文件夹名称
            DirectoryInfo[] ZiPath = dirInfo.GetDirectories();
            for (int j = 0; j < ZiPath.Length; j++)
            {
                //获取所有子文件夹名
                string strZiPath = WordFileDir + "\\" + ZiPath[j].ToString();
                //把得到的子文件夹当成新的源文件夹,从头开始新一轮的搜索
                GetDealWithHtmlFile(strZiPath);
            }
        }
        /// <summary>
        /// 搜索WordFileDir在的*.doc文件
        /// </summary>
        /// <param name="WordFileDir"></param>
        private static void DealWithWordFile(string WordFileDir)
        {
            //创建数组保存源文件夹下的文件名
            string[] strFiles = Directory.GetFiles(WordFileDir, "*.doc");
            for (int i = 0; i < strFiles.Length; i++)
            {
                WordToHtmlFile(strFiles[i]);
            }

            DirectoryInfo dirInfo = new DirectoryInfo(WordFileDir);
            //取得源文件夹下的所有子文件夹名称
            DirectoryInfo[] ZiPath = dirInfo.GetDirectories();
            for (int j = 0; j < ZiPath.Length; j++)
            {
                //获取所有子文件夹名
                string strZiPath = WordFileDir + "\\" + ZiPath[j].ToString();
                //把得到的子文件夹当成新的源文件夹,从头开始新一轮的搜索
                DealWithWordFile(strZiPath);
            }
        }
        /// <summary>
        /// 转化
        /// </summary>
        /// <param name="WordFilePath"></param>
        private static void GetHtmlFileToDb(string WordFilePath) {
            string _filepath = string.Empty;
            string[] _WordFilePath = WordFilePath.Split(new string[] { "\\" }, StringSplitOptions.RemoveEmptyEntries);
            var s = string.Empty;
            var ss = string.Empty;
            var sss = string.Empty;
            if (_WordFilePath.Length > 0)
            {
                s = _WordFilePath[_WordFilePath.Length - 1];
                ss = _WordFilePath[_WordFilePath.Length - 2];
                sss = _WordFilePath[_WordFilePath.Length - 3];
                _filepath = $"{sss}/{ss}/{s}";
            }
            Maticsoft.Model.Baodian _baodian = _bll_baodian.GetModelList($"title='{s.Replace(".htm", "").Replace("'", "")}'").FirstOrDefault();
            if (_baodian != null) return;
            try
            {
                // 创建一个 StreamReader 的实例来读取文件
                // using 语句也能关闭 StreamReader
                using (StreamReader sr = new StreamReader(WordFilePath))
                {
                    string file;

                    // 从文件读取并显示行,直到文件的末尾
                    //while ((line = sr.ReadLine()) != null)
                    //{
                    //    Console.WriteLine(line);
                    //}
                    file = sr.ReadToEnd().ToString();
                    _bll_baodian.Add(new Maticsoft.Model.Baodian
                    {
                        title = s.Replace(".htm", "").Replace("'", ""),
                        wcontent = file,
                        wtype = ss,
                        filepath = _filepath,
                    });
                }
            }
            catch (Exception e)
            {
                // 向用户显示出错消息
                Console.WriteLine("The file could not be read:");
                Console.WriteLine(e.Message);
            }
        }
        /// <summary>
        /// 转化
        /// </summary>
        /// <param name="WordFilePath"></param>
        private static void WordToHtmlFile(string WordFilePath)
        {
            string _filepath = string.Empty;
            string[] _WordFilePath = WordFilePath.Split(new string[] {"\\"},StringSplitOptions.RemoveEmptyEntries);
            var s = string.Empty;
            var ss = string.Empty;
            var sss = string.Empty;
            if (_WordFilePath.Length > 0) {
                s = _WordFilePath[_WordFilePath.Length - 1];
                ss = _WordFilePath[_WordFilePath.Length - 2];
                sss = _WordFilePath[_WordFilePath.Length - 3];
                _filepath = $"{sss}/{ss}/{s}";
            }
            Maticsoft.Model.Baodian _baodian = _bll_baodian.GetModelList($"title='{s.Replace(".doc", "").Replace("'","")}'").FirstOrDefault();
            if (_baodian != null) return;
            try
            {
                Microsoft.Office.Interop.Word.Application newApp = new Microsoft.Office.Interop.Word.Application();
                // 指定原文件和目标文件
                object Source = WordFilePath;
                string SaveHtmlPath = WordFilePath.Substring(0, WordFilePath.Length - 3) + "html";
                object Target = SaveHtmlPath;

                // 缺省参数  
                object Unknown = Type.Missing;

                //为了保险,只读方式打开
                object readOnly = true;

                // 打开doc文件
                Microsoft.Office.Interop.Word.Document doc = newApp.Documents.Open(ref Source, ref Unknown,
                     ref readOnly, ref Unknown, ref Unknown,
                     ref Unknown, ref Unknown, ref Unknown,
                     ref Unknown, ref Unknown, ref Unknown,
                     ref Unknown, ref Unknown, ref Unknown,
                     ref Unknown, ref Unknown);

                // 指定另存为格式(rtf)
                object format = Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatHTML;
                // 转换格式
                doc.SaveAs(ref Target, ref format,
                        ref Unknown, ref Unknown, ref Unknown,
                        ref Unknown, ref Unknown, ref Unknown,
                        ref Unknown, ref Unknown, ref Unknown,
                        ref Unknown, ref Unknown, ref Unknown,
                        ref Unknown, ref Unknown);

                // 关闭文档和Word程序
                doc.Close(ref Unknown, ref Unknown, ref Unknown);
                newApp.Quit(ref Unknown, ref Unknown, ref Unknown);
                try
                {
                    // 创建一个 StreamReader 的实例来读取文件
                    // using 语句也能关闭 StreamReader
                    using (StreamReader sr = new StreamReader(SaveHtmlPath))
                    {
                        string file;

                        // 从文件读取并显示行,直到文件的末尾
                        //while ((line = sr.ReadLine()) != null)
                        //{
                        //    Console.WriteLine(line);
                        //}
                        file = sr.ReadToEnd().ToString();
                        _bll_baodian.Add(new Maticsoft.Model.Baodian {
                            title= s.Replace(".doc","").Replace("'",""),
                            wcontent=file,
                            wtype=ss,
                            filepath= _filepath,
                        });
                    }
                }
                catch (Exception e)
                {
                    // 向用户显示出错消息
                    Console.WriteLine("The file could not be read:");
                    Console.WriteLine(e.Message);
                }
            }
            catch (Exception e)
            {
                System.Windows.Forms.MessageBox.Show(e.Message);
            }
        }
    }
}