Creating a language model using a Bigram approach involves building a model that predicts the next word in a sequence based on the previous word. Below is a C# example that demonstrates a simple Bigram model:

C# Example Using a Bigram Approach

using System;
using System.Collections.Generic;
using System.Linq;

public class BigramModel
{
    private Dictionary<string, Dictionary<string, int>> bigrams;
    private Random random;

    public BigramModel()
    {
        bigrams = new Dictionary<string, Dictionary<string, int>>();
        random = new Random();
    }

    public void Train(string text)
    {
        var words = text.Split(new char[] { ' ', '\t', '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);
        for (int i = 0; i < words.Length - 1; i++)
        {
            var word = words[i];
            var nextWord = words[i + 1];

            if (!bigrams.ContainsKey(word))
            {
                bigrams[word] = new Dictionary<string, int>();
            }

            if (!bigrams[word].ContainsKey(nextWord))
            {
                bigrams[word][nextWord] = 0;
            }

            bigrams[word][nextWord]++;
        }
    }

    public string GenerateSentence(string startWord, int length)
    {
        var currentWord = startWord;
        var sentence = new List<string> { currentWord };

        for (int i = 1; i < length; i++)
        {
            if (bigrams.ContainsKey(currentWord))
            {
                var nextWord = GetNextWord(currentWord);
                sentence.Add(nextWord);
                currentWord = nextWord;
            }
            else
            {
                break;
            }
        }

        return string.Join(" ", sentence);
    }

    private string GetNextWord(string word)
    {
        var possibleNextWords = bigrams[word];
        var total = possibleNextWords.Values.Sum();
        var choice = random.Next(total);
        var cumulative = 0;

        foreach (var kvp in possibleNextWords)
        {
            cumulative += kvp.Value;
            if (choice < cumulative)
            {
                return kvp.Key;
            }
        }

        return possibleNextWords.Keys.First(); // Fallback in case of an error
    }
}

public class Program
{
    public static void Main()
    {
        string text = "this is a sample text to demonstrate how a bigram model works this is another sample text for the bigram model";
        var model = new BigramModel();
        model.Train(text);

        string startWord = "this";
        string sentence = model.GenerateSentence(startWord, 10);

        Console.WriteLine("Generated sentence:");
        Console.WriteLine(sentence);
    }
}

Explanation

  1. Training the Model:
  • The Train method splits the input text into words.
  • It iterates through the words to build a dictionary (bigrams) where each key is a word, and the value is another dictionary that tracks the frequency of words that follow the key word.
  1. Generating Sentences:
  • The GenerateSentence method starts with a given word and generates a sentence of a specified length.
  • It repeatedly selects the next word based on the frequency of words that follow the current word using the GetNextWord method.
  1. Getting the Next Word:
  • The GetNextWord method uses a random choice to select the next word, weighted by the frequency of occurrence.

Running the Program

Compile and run the program to see the generated sentence based on the trained Bigram model.

This is a simplified example for demonstration purposes. In a real-world application, you would need to handle punctuation, capitalization, and other linguistic features more carefully. Additionally, for larger datasets, optimizations and more sophisticated methods would be necessary.

Another large language model approach using bigram algorithm

Johannes Rest


.NET Architekt und Entwickler


Beitragsnavigation


Schreibe einen Kommentar

Deine E-Mail-Adresse wird nicht veröffentlicht. Erforderliche Felder sind mit * markiert