using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
namespace ExtractBookWords
{
/// <summary>
/// Given an input text such as the contents of a book, converts the words to wiki links (handling
/// letter casing, punctuation, etc.) so that missing words become easily-spotted red links.
/// </summary>
class Program
{
private const string INPUT_FILE = @"C:\Users\home\Desktop\input.txt";
private const string OUTPUT_FILE = @"C:\Users\home\Desktop\output.txt";
static void Main(string[] args)
{
string[] s = File.ReadAllText(INPUT_FILE)
.Split(new char[] { ' ','\r','\n','\t' }, StringSplitOptions.RemoveEmptyEntries);
StringBuilder sb = new StringBuilder();
bool wasSentenceEnd = false;
foreach (string t in s)
{
if (!wasSentenceEnd)
{
string u = KeepAlpha(t);
if (u.Length > 0) sb.Append("[[" + u + "]] ");
}
wasSentenceEnd = t.EndsWith(".") || t.EndsWith("?") || t.EndsWith("!") || t.EndsWith(@"""");
}
File.WriteAllText(OUTPUT_FILE, sb.ToString());
}
private static string KeepAlpha(string s)
{
string t = String.Empty;
bool first = true, forceLower = false;
foreach (char ch in s)
{
if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'))
{
t += ch;
}
else if (first)
{
forceLower = true;
}
first = false;
if (ch == '-' || ch == '\'') return String.Empty;
}
if (forceLower) t = t.ToLower();
return t;
}
}
}