
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;

namespace SortEnglishWordsByLength
    /// <summary>
    /// Given a tab-separated multi-language titles-only dump of the kind found here:
    /// creates a series of files 1.txt, 2.txt, 3.txt etc. each listing all the words of 1, 2, 3... letters long.
    /// </summary>
    class Program
        private const string INPUT_FILE = "c:/users/home/desktop/20221001.txt";
        private const string OUTPUT_FOLDER = "c:/users/home/desktop/output/";

        static void Main()
            var wordsForLengths = new Dictionary<int, List<string>>();
            var all = File.ReadAllLines(INPUT_FILE); // lazy, inefficient :)

            foreach (string line in all)
                var bits = line.Split('\t');
                if (!bits.Skip(1).Contains("en")) continue; // skip if not English

                string word = bits[0];

                if (IsWordlike(word))
                    if (!wordsForLengths.ContainsKey(word.Length))
                        wordsForLengths[word.Length] = new List<string>();



            foreach (int len in wordsForLengths.Keys)
                File.WriteAllLines(Path.Combine(OUTPUT_FOLDER, len + ".txt"), wordsForLengths[len].ToArray());

        static bool IsWordlike(string x)
            foreach (char ch in x)
                if (ch != '\'' && ch != '-' && !(ch >= 'a' && ch <= 'z') && !(ch >= 'A' && ch <= 'Z') && !(ch >= '0' && ch <= '9'))
                    return false;

            return true;