commit e25069cd9796c07ccbca8d74019493d5e8c408b3 Author: BlubbFish Date: Thu Mar 9 21:19:06 2017 +0000 TT-Crawler hinzugefügt diff --git a/TT-Crawler.sln b/TT-Crawler.sln new file mode 100644 index 0000000..4e8f169 --- /dev/null +++ b/TT-Crawler.sln @@ -0,0 +1,20 @@ + +Microsoft Visual Studio Solution File, Format Version 10.00 +# Visual Studio 2008 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TT-Crawler", "TT-Crawler\TT-Crawler.csproj", "{2E66D72B-7C9C-4BB5-AA51-C2686CBAF837}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {2E66D72B-7C9C-4BB5-AA51-C2686CBAF837}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {2E66D72B-7C9C-4BB5-AA51-C2686CBAF837}.Debug|Any CPU.Build.0 = Debug|Any CPU + {2E66D72B-7C9C-4BB5-AA51-C2686CBAF837}.Release|Any CPU.ActiveCfg = Release|Any CPU + {2E66D72B-7C9C-4BB5-AA51-C2686CBAF837}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/TT-Crawler/Crawler.cs b/TT-Crawler/Crawler.cs new file mode 100644 index 0000000..db55756 --- /dev/null +++ b/TT-Crawler/Crawler.cs @@ -0,0 +1,75 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using TT_Crawler.classes; +using TT_Crawler.classes.db; + +namespace TT_Crawler +{ + class Crawler + { + private bool HasNewPercent = false; + private int NewPercent = 0; + private bool HasNewStatus = false; + private string NewStatus = ""; + private Log log; + private Games games; + + public Crawler(Games games) + { + this.games = games; + } + public void arbeite() + { + this.NewStatus = "Beginne Downloads"; + this.HasNewStatus = true; + this.log = new Log(games.getSize()); + int i = 0; + while (games.hasNext()) + { + threadNewPercent = ++i; + threadHasNewPercent = true; + Games.dbStruct a = games.next(); + log.initEntry(); + if (a.done) + { + log.addToEntry("Die PDF gibt es schon auf der Festplatte", Log.Error.Warn); + //this.refreshList(); + log.setEntry(a.benum + ".pdf", a.benum); + continue; + } + Crawl cw = new Crawl(a.url, log); + log.addToEntry("Partie " + a.heim + " - " + a.gast + " am " + a.datum + " ausgewählt", Log.Error.Ok); + string pdf = cw.findGame(a.datum, a.heim, a.gast); + if (pdf != null) + { + log.addToEntry("Adresse der Spielberichtsseite gefunden: " + pdf, Log.Error.Ok); + + Crawl f2 = new Crawl(pdf, log); + log.addToEntry("Datei " + Lib.link(pdf) + " geöffnet", Log.Error.Ok); + bool pdfo = f2.findPdf(a.benum.ToString()); + if (pdfo) + log.addToEntry("PDF gefunden und im Ordner out unter dem Namen 00" + a.benum + ".pdf gespeichert.", Log.Error.Ok); + else + log.addToEntry("Spielbericht noch nicht freigegeben!", Log.Error.Falue); + } + else + { + log.addToEntry("Keine Adresse der Spielberichtsseite gefunden!", Log.Error.Falue); + } + a.done = true; + log.setEntry(a.benum + ".pdf", a.benum); + //this.refreshList(); + //break; + } + threadNewStatus = "Liste Abgearbeitet"; + threadHasNewStatus = true; + } + private void refreshList() + { + this.progress.Refresh(); + Application.DoEvents(); + } + } +} diff --git a/TT-Crawler/Form1.Designer.cs b/TT-Crawler/Form1.Designer.cs new file mode 100644 index 0000000..b3a9024 --- /dev/null +++ b/TT-Crawler/Form1.Designer.cs @@ -0,0 +1,199 @@ +namespace TT_Crawler +{ + partial class Form1 + { + /// + /// Erforderliche Designervariable. + /// + private System.ComponentModel.IContainer components = null; + + /// + /// Verwendete Ressourcen bereinigen. + /// + /// True, wenn verwaltete Ressourcen gelöscht werden sollen; andernfalls False. + protected override void Dispose(bool disposing) + { + if (disposing && (components != null)) + { + components.Dispose(); + } + base.Dispose(disposing); + } + + #region Vom Windows Form-Designer generierter Code + + /// + /// Erforderliche Methode für die Designerunterstützung. + /// Der Inhalt der Methode darf nicht mit dem Code-Editor geändert werden. + /// + private void InitializeComponent() + { + this.openFileDialog = new System.Windows.Forms.OpenFileDialog(); + this.open = new System.Windows.Forms.Button(); + this.groupBox1 = new System.Windows.Forms.GroupBox(); + this.progress = new System.Windows.Forms.ProgressBar(); + this.dbasefile = new System.Windows.Forms.TextBox(); + this.statusStrip1 = new System.Windows.Forms.StatusStrip(); + this.statusFeld = new System.Windows.Forms.ToolStripStatusLabel(); + this.groupBox2 = new System.Windows.Forms.GroupBox(); + this.statusBox = new System.Windows.Forms.TextBox(); + this.groupBox3 = new System.Windows.Forms.GroupBox(); + this.liste = new System.Windows.Forms.DataGridView(); + this.backgroundWorker1 = new System.ComponentModel.BackgroundWorker(); + this.groupBox1.SuspendLayout(); + this.statusStrip1.SuspendLayout(); + this.groupBox2.SuspendLayout(); + this.groupBox3.SuspendLayout(); + ((System.ComponentModel.ISupportInitialize)(this.liste)).BeginInit(); + this.SuspendLayout(); + // + // openFileDialog + // + this.openFileDialog.Filter = "Dbase|*.dbf|Alle Dateien|*.*"; + this.openFileDialog.RestoreDirectory = true; + // + // open + // + this.open.Location = new System.Drawing.Point(289, 19); + this.open.Name = "open"; + this.open.Size = new System.Drawing.Size(75, 23); + this.open.TabIndex = 0; + this.open.Text = "Öffnen"; + this.open.UseVisualStyleBackColor = true; + this.open.Click += new System.EventHandler(this.open_Click); + // + // groupBox1 + // + this.groupBox1.Controls.Add(this.progress); + this.groupBox1.Controls.Add(this.dbasefile); + this.groupBox1.Controls.Add(this.open); + this.groupBox1.Location = new System.Drawing.Point(12, 12); + this.groupBox1.Name = "groupBox1"; + this.groupBox1.Size = new System.Drawing.Size(372, 85); + this.groupBox1.TabIndex = 1; + this.groupBox1.TabStop = false; + this.groupBox1.Text = "Datenbank"; + // + // progress + // + this.progress.Location = new System.Drawing.Point(6, 51); + this.progress.Name = "progress"; + this.progress.Size = new System.Drawing.Size(358, 23); + this.progress.Style = System.Windows.Forms.ProgressBarStyle.Continuous; + this.progress.TabIndex = 2; + // + // dbasefile + // + this.dbasefile.AcceptsReturn = true; + this.dbasefile.AllowDrop = true; + this.dbasefile.BackColor = System.Drawing.SystemColors.ControlLightLight; + this.dbasefile.Location = new System.Drawing.Point(6, 19); + this.dbasefile.Name = "dbasefile"; + this.dbasefile.ReadOnly = true; + this.dbasefile.Size = new System.Drawing.Size(277, 20); + this.dbasefile.TabIndex = 1; + // + // statusStrip1 + // + this.statusStrip1.Items.AddRange(new System.Windows.Forms.ToolStripItem[] { + this.statusFeld}); + this.statusStrip1.Location = new System.Drawing.Point(0, 464); + this.statusStrip1.Name = "statusStrip1"; + this.statusStrip1.Size = new System.Drawing.Size(844, 22); + this.statusStrip1.TabIndex = 2; + this.statusStrip1.Text = "statusStrip1"; + // + // statusFeld + // + this.statusFeld.Name = "statusFeld"; + this.statusFeld.Size = new System.Drawing.Size(0, 17); + // + // groupBox2 + // + this.groupBox2.Controls.Add(this.statusBox); + this.groupBox2.Location = new System.Drawing.Point(390, 12); + this.groupBox2.Name = "groupBox2"; + this.groupBox2.Size = new System.Drawing.Size(436, 85); + this.groupBox2.TabIndex = 5; + this.groupBox2.TabStop = false; + this.groupBox2.Text = "Status"; + // + // statusBox + // + this.statusBox.BackColor = System.Drawing.SystemColors.Window; + this.statusBox.Cursor = System.Windows.Forms.Cursors.Arrow; + this.statusBox.Location = new System.Drawing.Point(6, 19); + this.statusBox.Multiline = true; + this.statusBox.Name = "statusBox"; + this.statusBox.ReadOnly = true; + this.statusBox.ScrollBars = System.Windows.Forms.ScrollBars.Vertical; + this.statusBox.Size = new System.Drawing.Size(421, 55); + this.statusBox.TabIndex = 0; + this.statusBox.TabStop = false; + // + // groupBox3 + // + this.groupBox3.Controls.Add(this.liste); + this.groupBox3.Location = new System.Drawing.Point(12, 103); + this.groupBox3.Name = "groupBox3"; + this.groupBox3.Size = new System.Drawing.Size(814, 357); + this.groupBox3.TabIndex = 6; + this.groupBox3.TabStop = false; + this.groupBox3.Text = "groupBox3"; + // + // liste + // + this.liste.AllowUserToAddRows = false; + this.liste.AllowUserToDeleteRows = false; + this.liste.AllowUserToOrderColumns = true; + this.liste.BackgroundColor = System.Drawing.SystemColors.Window; + this.liste.ColumnHeadersHeightSizeMode = System.Windows.Forms.DataGridViewColumnHeadersHeightSizeMode.AutoSize; + this.liste.Location = new System.Drawing.Point(6, 19); + this.liste.Name = "liste"; + this.liste.ReadOnly = true; + this.liste.Size = new System.Drawing.Size(799, 329); + this.liste.TabIndex = 0; + // + // Form1 + // + this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F); + this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font; + this.ClientSize = new System.Drawing.Size(844, 486); + this.Controls.Add(this.groupBox3); + this.Controls.Add(this.groupBox2); + this.Controls.Add(this.statusStrip1); + this.Controls.Add(this.groupBox1); + this.MaximizeBox = false; + this.Name = "Form1"; + this.Text = "TT-Crawler"; + this.groupBox1.ResumeLayout(false); + this.groupBox1.PerformLayout(); + this.statusStrip1.ResumeLayout(false); + this.statusStrip1.PerformLayout(); + this.groupBox2.ResumeLayout(false); + this.groupBox2.PerformLayout(); + this.groupBox3.ResumeLayout(false); + ((System.ComponentModel.ISupportInitialize)(this.liste)).EndInit(); + this.ResumeLayout(false); + this.PerformLayout(); + + } + + #endregion + + private System.Windows.Forms.OpenFileDialog openFileDialog; + private System.Windows.Forms.Button open; + private System.Windows.Forms.GroupBox groupBox1; + private System.Windows.Forms.ProgressBar progress; + private System.Windows.Forms.TextBox dbasefile; + private System.Windows.Forms.StatusStrip statusStrip1; + private System.Windows.Forms.ToolStripStatusLabel statusFeld; + private System.Windows.Forms.GroupBox groupBox2; + public System.Windows.Forms.TextBox statusBox; + private System.Windows.Forms.GroupBox groupBox3; + private System.Windows.Forms.DataGridView liste; + private System.ComponentModel.BackgroundWorker backgroundWorker1; + + } +} + diff --git a/TT-Crawler/Form1.cs b/TT-Crawler/Form1.cs new file mode 100644 index 0000000..f4c1009 --- /dev/null +++ b/TT-Crawler/Form1.cs @@ -0,0 +1,58 @@ +using System; +using System.Collections.Generic; +using System.ComponentModel; +using System.Data; +using System.Drawing; +using System.Linq; +using System.Text; +using System.Windows.Forms; +using TT_Crawler.classes; +using TT_Crawler.classes.db; + +namespace TT_Crawler +{ + public partial class Form1 : Form + { + + private Dbase dbase; + + public Form1() + { + InitializeComponent(); + } + private void open_Click(object sender, System.EventArgs e) + { + if (openFileDialog.ShowDialog() == System.Windows.Forms.DialogResult.OK) + { + this.dbasefile.Text = openFileDialog.FileName; + this.progress.Value = 0; + this.dbase = new Dbase(this.dbasefile.Text, this.statusFeld); + if (this.dbase.getError()) + this.statusFeld.Text = "Fehler beim Lesen der DBase Datei"; + else + { + this.statusFeld.Text = "DBase Datei gelesen, konvertiere..."; + Games games = new Games(this.dbase,this.progress); + this.statusFeld.Text = "Datenbank kovertiert"; + Crawler cw = new Crawler(games); + this.backgroundWorker1.RunWorkerAsync(cw); + this.liste.DataSource = games.getView(); + while (true) + { + if (threadHasNewPercent) + { + this.progress.Value = threadNewPercent; + threadHasNewPercent = false; + this.progress.Refresh(); + } + if (threadHasNewStatus) + { + this.statusFeld.Text = threadNewStatus; + threadHasNewStatus = false; + } + } + } + } + } + } +} diff --git a/TT-Crawler/Form1.resx b/TT-Crawler/Form1.resx new file mode 100644 index 0000000..bdd3212 --- /dev/null +++ b/TT-Crawler/Form1.resx @@ -0,0 +1,129 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text/microsoft-resx + + + 2.0 + + + System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + 17, 17 + + + 157, 17 + + + 273, 17 + + \ No newline at end of file diff --git a/TT-Crawler/Program.cs b/TT-Crawler/Program.cs new file mode 100644 index 0000000..3c60c96 --- /dev/null +++ b/TT-Crawler/Program.cs @@ -0,0 +1,21 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Windows.Forms; + +namespace TT_Crawler +{ + static class Program + { + /// + /// Der Haupteinstiegspunkt für die Anwendung. + /// + [STAThread] + static void Main() + { + Application.EnableVisualStyles(); + Application.SetCompatibleTextRenderingDefault(false); + Application.Run(new Form1()); + } + } +} diff --git a/TT-Crawler/Properties/AssemblyInfo.cs b/TT-Crawler/Properties/AssemblyInfo.cs new file mode 100644 index 0000000..2c57380 --- /dev/null +++ b/TT-Crawler/Properties/AssemblyInfo.cs @@ -0,0 +1,36 @@ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// Allgemeine Informationen über eine Assembly werden über die folgenden +// Attribute gesteuert. Ändern Sie diese Attributwerte, um die Informationen zu ändern, +// die mit einer Assembly verknüpft sind. +[assembly: AssemblyTitle("TT-Crawler")] +[assembly: AssemblyDescription("")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("")] +[assembly: AssemblyProduct("TT-Crawler")] +[assembly: AssemblyCopyright("Copyright © 2009")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// Durch Festlegen von ComVisible auf "false" werden die Typen in dieser Assembly unsichtbar +// für COM-Komponenten. Wenn Sie auf einen Typ in dieser Assembly von +// COM zugreifen müssen, legen Sie das ComVisible-Attribut für diesen Typ auf "true" fest. +[assembly: ComVisible(false)] + +// Die folgende GUID bestimmt die ID der Typbibliothek, wenn dieses Projekt für COM verfügbar gemacht wird +[assembly: Guid("7383b3ab-a018-40e1-9c86-c22e167e0288")] + +// Versionsinformationen für eine Assembly bestehen aus den folgenden vier Werten: +// +// Hauptversion +// Nebenversion +// Buildnummer +// Revision +// +// Sie können alle Werte angeben oder die standardmäßigen Build- und Revisionsnummern +// übernehmen, indem Sie "*" eingeben: +// [assembly: AssemblyVersion("1.0.*")] +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/TT-Crawler/Properties/Resources.Designer.cs b/TT-Crawler/Properties/Resources.Designer.cs new file mode 100644 index 0000000..1e725b4 --- /dev/null +++ b/TT-Crawler/Properties/Resources.Designer.cs @@ -0,0 +1,71 @@ +//------------------------------------------------------------------------------ +// +// Dieser Code wurde von einem Tool generiert. +// Laufzeitversion:2.0.50727.4927 +// +// Änderungen an dieser Datei können fehlerhaftes Verhalten verursachen und gehen verloren, wenn +// der Code neu generiert wird. +// +//------------------------------------------------------------------------------ + +namespace TT_Crawler.Properties +{ + + + /// + /// Eine stark typisierte Ressourcenklasse zum Suchen von lokalisierten Zeichenfolgen usw. + /// + // Diese Klasse wurde von der StronglyTypedResourceBuilder-Klasse + // über ein Tool wie ResGen oder Visual Studio automatisch generiert. + // Um einen Member hinzuzufügen oder zu entfernen, bearbeiten Sie die .ResX-Datei und führen dann ResGen + // mit der Option /str erneut aus, oder erstellen Sie Ihr VS-Projekt neu. + [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "2.0.0.0")] + [global::System.Diagnostics.DebuggerNonUserCodeAttribute()] + [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()] + internal class Resources + { + + private static global::System.Resources.ResourceManager resourceMan; + + private static global::System.Globalization.CultureInfo resourceCulture; + + [global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")] + internal Resources() + { + } + + /// + /// Gibt die zwischengespeicherte ResourceManager-Instanz zurück, die von dieser Klasse verwendet wird. + /// + [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] + internal static global::System.Resources.ResourceManager ResourceManager + { + get + { + if ((resourceMan == null)) + { + global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("TT_Crawler.Properties.Resources", typeof(Resources).Assembly); + resourceMan = temp; + } + return resourceMan; + } + } + + /// + /// Überschreibt die CurrentUICulture-Eigenschaft des aktuellen Threads für alle + /// Ressourcenlookups, die diese stark typisierte Ressourcenklasse verwenden. + /// + [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] + internal static global::System.Globalization.CultureInfo Culture + { + get + { + return resourceCulture; + } + set + { + resourceCulture = value; + } + } + } +} diff --git a/TT-Crawler/Properties/Resources.resx b/TT-Crawler/Properties/Resources.resx new file mode 100644 index 0000000..ffecec8 --- /dev/null +++ b/TT-Crawler/Properties/Resources.resx @@ -0,0 +1,117 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text/microsoft-resx + + + 2.0 + + + System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + \ No newline at end of file diff --git a/TT-Crawler/Properties/Settings.Designer.cs b/TT-Crawler/Properties/Settings.Designer.cs new file mode 100644 index 0000000..f7a71e2 --- /dev/null +++ b/TT-Crawler/Properties/Settings.Designer.cs @@ -0,0 +1,30 @@ +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Runtime Version:2.0.50727.4927 +// +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +//------------------------------------------------------------------------------ + +namespace TT_Crawler.Properties +{ + + + [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()] + [global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "9.0.0.0")] + internal sealed partial class Settings : global::System.Configuration.ApplicationSettingsBase + { + + private static Settings defaultInstance = ((Settings)(global::System.Configuration.ApplicationSettingsBase.Synchronized(new Settings()))); + + public static Settings Default + { + get + { + return defaultInstance; + } + } + } +} diff --git a/TT-Crawler/Properties/Settings.settings b/TT-Crawler/Properties/Settings.settings new file mode 100644 index 0000000..abf36c5 --- /dev/null +++ b/TT-Crawler/Properties/Settings.settings @@ -0,0 +1,7 @@ + + + + + + + diff --git a/TT-Crawler/TT-Crawler.csproj b/TT-Crawler/TT-Crawler.csproj new file mode 100644 index 0000000..8ca03e9 --- /dev/null +++ b/TT-Crawler/TT-Crawler.csproj @@ -0,0 +1,95 @@ + + + + Debug + AnyCPU + 9.0.30729 + 2.0 + {2E66D72B-7C9C-4BB5-AA51-C2686CBAF837} + WinExe + Properties + TT_Crawler + TT-Crawler + v3.5 + 512 + + + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + + + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + + + + + 3.5 + + + 3.5 + + + 3.5 + + + + + + + + + + + + + + + + Form + + + Form1.cs + + + + + Form1.cs + + + ResXFileCodeGenerator + Resources.Designer.cs + Designer + + + True + Resources.resx + + + SettingsSingleFileGenerator + Settings.Designer.cs + + + True + Settings.settings + True + + + + + \ No newline at end of file diff --git a/TT-Crawler/classes/Crawl.cs b/TT-Crawler/classes/Crawl.cs new file mode 100644 index 0000000..7d590f8 --- /dev/null +++ b/TT-Crawler/classes/Crawl.cs @@ -0,0 +1,152 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Net; +using System.Text.RegularExpressions; +using System.IO; + +namespace TT_Crawler.classes +{ + class Crawl + { + private string inh; + private string date; + private Log log; + public Crawl(string file, Log log) + { + WebClient wClient = new WebClient(); + this.inh = wClient.DownloadString(file); + if (inh.Equals("")) + { + Console.WriteLine(this.log.addToEntry("Fehler beim Aufruf der Seite " + Lib.link(file), Log.Error.Falue)); + } + this.log = log; + } + public string findGame(DateTime datum, string heim, string gast) + { + string date = datum.ToString("dd.MM.yyyy"); + var tmp_c = this.inh.Substring(this.inh.IndexOf("
") + 10); + tmp_c = tmp_c.Substring(tmp_c.IndexOf("")); + string[] tmp = tmp_c.Split(new String[] {"" }, StringSplitOptions.None); + if (!tmp1[1].Substring(tmp1[1].LastIndexOf(">") + 1).Trim().Equals(" ")) + this.date = tmp1[1].Substring(tmp1[1].LastIndexOf(">") + 1).Trim(); + if (this.date == date) + { + if (this.verg(tmp1[4].Substring(tmp1[4].LastIndexOf(">") + 1).Trim(), heim)) + { + if (this.verg(tmp1[5].Substring(tmp1[5].LastIndexOf(">") + 1).Trim(), gast)) + { + string url = tmp1[6].Substring(tmp1[6].LastIndexOf("href") + 6).Trim(); + url = "http://wttv.click-tt.de"+url.Substring(0, url.IndexOf("\"")); + string ret = url.Replace("&", "&"); + if (ret.Equals("")) + return null; + else + return ret; + } + } + } + } + return null; + } + public bool findPdf(string name) + { + if (this.inh.IndexOf("Spielbericht durch Staffelleiter genehmigt.") == 0) + return false; + string url = this.inh.Substring(this.inh.IndexOf("Spielbericht durch Staffelleiter genehmigt.")); + url = url.Substring(url.IndexOf("href=\"") + 6); + url = url.Substring(0, url.IndexOf("\"")); + url = "http://wttv.click-tt.de" + url.Replace("&", "&"); + HttpWebRequest myHttpWebRequest = (HttpWebRequest)WebRequest.Create(url); + HttpWebResponse myHttpWebResponse = (HttpWebResponse)myHttpWebRequest.GetResponse(); + string addr = myHttpWebResponse.ResponseUri.AbsoluteUri; + WebClient wClient = new WebClient(); + if(!Directory.Exists("out")) + new DirectoryInfo("out").Create(); + wClient.DownloadFile(addr,"out\\00"+name+".pdf"); + return true; + } + private bool verg(string s1, string s2) + { + s1 = s1.Replace(" ", " ").Trim(); + s2 = s2.Replace(" ", " ").Trim(); + if (s1.Equals(s2)) + return true; + s1 = this.ed(s1); + s1 = this.ed(s2); + if (s1.Equals(s2)) + return true; + float p = this.GetSimilarity(s1, s2); + if(p >= 0.45) + Console.WriteLine(this.log.addToEntry("ClickTT: \""+s1+"\", TT-Datenbank: \""+s2+"\", Unterschied: "+(p*100)+"%",Log.Error.Ok)); + if (p >= 0.99) + return true; + return false; + + } + private string ed(string s) + { + s = s.ToLower(); + s = s.Replace(" ", " "); + s = s.Replace("blau-weiß", "bw"); + s = Regex.Replace(s, "<.*?>", string.Empty); + return s; + } + /** + * An improvement on capturing similarity between strings + * By Thanh Dao + * www.codeproject.com + **/ + private int ComputeDistance(string s, string t) + { + int n = s.Length; + int m = t.Length; + int[,] distance = new int[n + 1, m + 1]; // matrix + + int cost = 0; + if (n == 0) return m; + if (m == 0) return n; + //init1 + + for (int i = 0; i <= n; distance[i, 0] = i++) ; + for (int j = 0; j <= m; distance[0, j] = j++) ; + //find min distance + + for (int i = 1; i <= n; i++) + { + for (int j = 1; j <= m; j++) + { + cost = (t.Substring(j - 1, 1) == + s.Substring(i - 1, 1) ? 0 : 1); + distance[i, j] = Min3(distance[i - 1, j] + 1, + distance[i, j - 1] + 1, + distance[i - 1, j - 1] + cost); + } + } + return distance[n, m]; + } + + private int Min3(int p, int p_2, int p_3) + { + p = Math.Min(p, p_2); + return Math.Min(p,p_3); + } + private float GetSimilarity(string string1, string string2) + { + float dis = ComputeDistance(string1, string2); + float maxLen = string1.Length; + if (maxLen < string2.Length) + maxLen = string2.Length; + if (maxLen == 0.0F) + return 1.0F; + else + return 1.0F - dis / maxLen; + } + } +} diff --git a/TT-Crawler/classes/Lib.cs b/TT-Crawler/classes/Lib.cs new file mode 100644 index 0000000..c68d793 --- /dev/null +++ b/TT-Crawler/classes/Lib.cs @@ -0,0 +1,15 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace TT_Crawler.classes +{ + class Lib + { + public static string link(string url) + { + return url; + } + } +} diff --git a/TT-Crawler/classes/Log.cs b/TT-Crawler/classes/Log.cs new file mode 100644 index 0000000..4e03d6e --- /dev/null +++ b/TT-Crawler/classes/Log.cs @@ -0,0 +1,61 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace TT_Crawler.classes +{ + class Log + { + private logEntry[] entrys; + public enum Error : int + { + Falue, + Warn, + Ok, + None, + } + public struct logEntry + { + public int id; + public Error state; + public string file; + public DateTime date; + public string head_err; + public string data; + } + private int index = -1; + public Log(int length) + { + this.entrys = new logEntry[length]; + } + public string addToEntry(string text, Error fail) + { + if (fail == Error.Falue) + this.entrys[this.index].state = fail; + else if (fail == Error.Warn && this.entrys[this.index].state == Error.Ok) + this.entrys[this.index].state = fail; + if (fail == Error.Warn || this.entrys[this.index].state == Error.Falue) + this.entrys[this.index].head_err += text; + this.entrys[this.index].data += text+"\n"; + return text; + } + public void initEntry() + { + this.index++; + this.entrys[this.index].head_err = ""; + this.entrys[this.index].state = Error.Ok; + this.entrys[this.index].data = ""; + } + public void setEntry(string file, int id) + { + this.entrys[this.index].id = id; + this.entrys[this.index].date = DateTime.Now; + this.entrys[this.index].file = file; + } + public logEntry getEntry() + { + return this.entrys[index]; + } + } +} diff --git a/TT-Crawler/classes/db/Dbase.cs b/TT-Crawler/classes/db/Dbase.cs new file mode 100644 index 0000000..496afd1 --- /dev/null +++ b/TT-Crawler/classes/db/Dbase.cs @@ -0,0 +1,51 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Windows.Forms; +using System.IO; +using System.Data.Odbc; +using System.Data; + +namespace TT_Crawler.classes.db +{ + class Dbase + { + private bool err = false; + private ToolStripStatusLabel status; + private DataTable data; + public Dbase(string file, ToolStripStatusLabel status) + { + this.status = status; + this.err = this.open(file); + } + + public bool getError() + { + return this.err; + } + private bool open(string file) + { + if (!File.Exists(file)) + return true; + string dir = file.Substring(0, file.LastIndexOf("\\")); + file = file.Substring(file.LastIndexOf("\\") + 1); + + OdbcConnection odbc = new OdbcConnection(); + odbc.ConnectionString = @"DRIVER={Microsoft dBase Driver (*.dbf)};DBQ=" + dir + ";DefaultDir=" + dir + ";DriverId=533;MaxBufferSize=2048;PageTimeout=5"; + + odbc.Open(); + OdbcCommand oCmd = odbc.CreateCommand(); + oCmd.CommandText = "SELECT * FROM " + file; + this.data = new DataTable(); + this.data.Load(oCmd.ExecuteReader()); + odbc.Close(); + + return false; + } + public DataTable getTable() + { + return this.data; + } + } +} diff --git a/TT-Crawler/classes/db/Games.cs b/TT-Crawler/classes/db/Games.cs new file mode 100644 index 0000000..f355219 --- /dev/null +++ b/TT-Crawler/classes/db/Games.cs @@ -0,0 +1,132 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Data; +using System.Windows.Forms; +using System.IO; + +namespace TT_Crawler.classes.db +{ + class Games + { + public struct dbStruct + { + public string url; + public DateTime datum; + public string heim; + public string gast; + public int benum; + public bool done; + } + private dbStruct[] db; + private int index = 0; + private Dbase dbase; + private ProgressBar p; + public Games(Dbase dbase, ProgressBar p) + { + this.dbase = dbase; + this.p = p; + this.load(); + } + private void load() + { + DataTable a = this.dbase.getTable(); + p.Maximum = a.Rows.Count; + this.db = new dbStruct[a.Rows.Count]; + int i = 0; + foreach (DataRow myRow in a.Rows) + { + db[i].url = (string)myRow["URL"]; + db[i].datum = (DateTime)myRow["DATUM"]; + db[i].heim = (string)myRow["HEIM"]; + db[i].gast = (string)myRow["GAST"]; + db[i].benum = (int)((double)myRow["BERICHTNR"]); + db[i].done = File.Exists("out\\00" + db[i].benum + ".pdf"); + i++; + } + } + public dbStruct next() + { + return this.db[this.index++]; + } + public bool hasNext() + { + if (this.db.Length <= this.index) + return false; + return true; + } + public void entryDone(int i) + { + db[this.index].done = true; + } + public int getSize() + { + return this.db.Length; + } + public DataTable getView() + { + DataTable a = new DataTable(); + DataColumn myDataColumn; + DataRow myDataRow; + + myDataColumn = new DataColumn(); + myDataColumn.DataType = System.Type.GetType("System.String"); + myDataColumn.ColumnName = "PDF"; + myDataColumn.ReadOnly = true; + myDataColumn.Unique = true; + a.Columns.Add(myDataColumn); + + myDataColumn = new DataColumn(); + myDataColumn.DataType = System.Type.GetType("System.String"); + myDataColumn.ColumnName = "Heim"; + myDataColumn.ReadOnly = true; + myDataColumn.Unique = false; + a.Columns.Add(myDataColumn); + + myDataColumn = new DataColumn(); + myDataColumn.DataType = System.Type.GetType("System.String"); + myDataColumn.ColumnName = "Gast"; + myDataColumn.ReadOnly = true; + myDataColumn.Unique = false; + a.Columns.Add(myDataColumn); + + myDataColumn = new DataColumn(); + myDataColumn.DataType = System.Type.GetType("System.DateTime"); + myDataColumn.ColumnName = "Datum"; + myDataColumn.ReadOnly = true; + myDataColumn.Unique = false; + a.Columns.Add(myDataColumn); + + myDataColumn = new DataColumn(); + myDataColumn.DataType = System.Type.GetType("System.String"); + myDataColumn.ColumnName = "Status"; + myDataColumn.ReadOnly = true; + myDataColumn.Unique = false; + a.Columns.Add(myDataColumn); + + myDataColumn = new DataColumn(); + myDataColumn.DataType = System.Type.GetType("System.String"); + myDataColumn.ColumnName = "Fehler"; + myDataColumn.ReadOnly = true; + myDataColumn.Unique = false; + a.Columns.Add(myDataColumn); + + foreach(dbStruct row in db) + { + myDataRow = a.NewRow(); + myDataRow["PDF"] = row.benum+".pdf"; + myDataRow["Heim"] = row.heim; + myDataRow["Gast"] = row.gast; + myDataRow["Datum"] = row.datum; + if (row.done) + myDataRow["Status"] = "100%"; + else + myDataRow["Status"] = "0%"; + myDataRow["Fehler"] = "Kein Fehler"; + a.Rows.Add(myDataRow); + } + return a; + } + } +}