<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=EUC-JP"> <title>MeCab: ¥½¥Õ¥È¤ï¤«¤Á½ñ¤</title> <link type="text/css" rel="stylesheet" href="mecab.css"> </head> <body> <h1>¥½¥Õ¥È¤ï¤«¤Á½ñ¤</h1> <p>$Id: soft.html 65 2007-01-30 00:52:53Z taku-ku $;</p> <h2>³µÍ×</h2> <p>MeCab 0.90 ¤è¤ê¥½¥Õ¥È¤ï¤«¤Á½ñ¤¤Îµ¡Ç½¤¬ÉÕ¤¤Þ¤·¤¿. ¥½¥Õ¥È¤ï¤«¤Á½ñ¤¤Ï ÆþÎÏʸ¤ËÂФ·¤Æ²Äǽ¤Ê¤¹¤Ù¤Æ¤Î·ÁÂÖÁÇÎó¤ò¼þÊÕ³ÎΨÉÕ¤¤Ç½ÐÎϤ¹¤ëµ¡Ç½¤Ç¤¹. Á´Ê¸¸¡º÷¤Î¥¤¥ó¥Ç¥¥·¥ó¥°¤Ë»È¤¦¤³¤È¤Ç,Ê£¹ç¸ì¤Îñ¸ìʬ³ä¤ÎÛ£ËæÀÅù¤ò ²ò·è¤¹¤ë¤³¤È¤¬¤Ç¤¤Þ¤¹. </p> <p>¥½¥Õ¥È¤ï¤«¤Á½ñ¤¤Î¾ÜºÙ¤Ë¤Ä¤¤¤Æ¤Ï<a href="http://chasen.org/~taku/publications/nlp2005.pdf">ÏÀʸ</a>¤ò¤´»²¾È²¼¤µ¤¤.</p> <h2>Á´·ÁÂÖÁǤνÐÎÏ</h2> <p>MeCab ¤Ï¥Ç¥Õ¥©¥ë¥È¤ÇºÇŬ²ò¤Î¤ß¤ò½ÐÎϤ·¤Þ¤¹. -a ¥ª¥×¥·¥ç¥ó¤ò»ØÄꤹ¤ë¤È ºÇŬ²ò¤Ç¤Ï¤Ê¤¯, ¥é¥Æ¥£¥¹Ãæ¤ÎÁ´·ÁÂÖÁǤò½ÐÎϤ·¤Þ¤¹.</p> <pre> % mecab -a ÅìµþÅÔÄ£ Åìµþ ̾»ì,¸ÇÍ̾»ì,ÃÏ°è,°ìÈÌ,*,*,Åìµþ,¥È¥¦¥¥ç¥¦,¥È¡¼¥¥ç¡¼ Åì ̾»ì,¸ÇÍ̾»ì,°ìÈÌ,*,*,*,Åì,¥Ò¥¬¥·,¥Ò¥¬¥· Åì ̾»ì,¸ÇÍ̾»ì,ÃÏ°è,°ìÈÌ,*,*,Åì,¥Ò¥¬¥·,¥Ò¥¬¥· Åì ̾»ì,¸ÇÍ̾»ì,¿Í̾,̾,*,*,Åì,¥Ò¥¬¥·,¥Ò¥¬¥· Åì ̾»ì,¸ÇÍ̾»ì,¿Í̾,À«,*,*,Åì,¥¢¥º¥Þ,¥¢¥º¥Þ Åì ̾»ì,°ìÈÌ,*,*,*,*,Åì,¥Ò¥¬¥·,¥Ò¥¬¥· µþÅÔ Ì¾»ì,¸ÇÍ̾»ì,°ìÈÌ,*,*,*,µþÅÔ,¥¥ç¥¦¥È,¥¥ç¡¼¥È µþÅÔ Ì¾»ì,¸ÇÍ̾»ì,ÃÏ°è,°ìÈÌ,*,*,µþÅÔ,¥¥ç¥¦¥È,¥¥ç¡¼¥È µþ ̾»ì,¸ÇÍ̾»ì,ÃÏ°è,°ìÈÌ,*,*,µþ,¥¥ç¥¦,¥¥ç¡¼ µþ ̾»ì,¸ÇÍ̾»ì,¿Í̾,̾,*,*,µþ,¥ß¥ä¥³,¥ß¥ä¥³ µþ ̾»ì,¸ÇÍ̾»ì,¿Í̾,À«,*,*,µþ,¥¥ç¥¦,¥¥ç¡¼ ÅÔÄ£ ̾»ì,°ìÈÌ,*,*,*,*,ÅÔÄ£,¥È¥Á¥ç¥¦,¥È¥Á¥ç¡¼ ÅÔ Ì¾»ì,ÀÜÈø,ÃÏ°è,*,*,*,ÅÔ,¥È,¥È ÅÔ Ì¾»ì,¸ÇÍ̾»ì,ÃÏ°è,°ìÈÌ,*,*,ÅÔ,¥ß¥ä¥³,¥ß¥ä¥³ ÅÔ Ì¾»ì,¸ÇÍ̾»ì,¿Í̾,À«,*,*,ÅÔ,¥ß¥ä¥³,¥ß¥ä¥³ ÅÔ Ì¾»ì,¸ÇÍ̾»ì,¿Í̾,̾,*,*,ÅÔ,¥ß¥ä¥³,¥ß¥ä¥³ ÅÔ Ì¾»ì,°ìÈÌ,*,*,*,*,ÅÔ,¥È,¥È Ä£ ̾»ì,ÀÜÈø,°ìÈÌ,*,*,*,Ä£,¥Á¥ç¥¦,¥Á¥ç¡¼ Ä£ ̾»ì,¸ÇÍ̾»ì,ÃÏ°è,°ìÈÌ,*,*,Ä£,¥Á¥ç¥¦,¥Á¥ç¡¼ Ä£ ̾»ì,°ìÈÌ,*,*,*,*,Ä£,¥Á¥ç¥¦,¥Á¥ç¡¼ EOS </pre> <h2>¼þÊÕ³ÎΨ¤Î·×»»¤È½ÐÎÏ</h2> <p>³Æ·ÁÂÖÁǤμþÊÕ³ÎΨ(¤½¤Î·ÁÂÖÁǤ¬ÆþÎÏʸ¤ÎÃæ¤Ç¤É¤Î¤¯¤é¤¤¤Î³ÎΨ¤Ç½Ð¸½¤¹¤ë ¤Î¤«)¤Î·×»»¤ò¹Ô¤¦¤Ë¤Ï, -a ¤È -l 2 ¥ª¥×¥·¥ç¥ó¤òƱ»þ¤Ë»È¤¤¤Þ¤¹. -l ¥ª¥×¥·¥ç¥ó¤Ï¤É¤ÎÄøÅ٤Υé¥Æ¥£¥¹¾ðÊó¤ò²òÀÏ»þ¤Ë¹½ÃÛ¤¹¤ë¤«¤ò»ØÄꤹ¤ë¥ª¥×¥·¥ç¥ó¤Ç¤¹. -l2 ¤È»ØÄꤹ¤ë¤³¤È¤Ç forwardbackward ¥¢¥ë¥´¥ê¥º¥à¤ò»È¤¤¼þÊÕ³ÎΨ¤Î·×»»¤ò¹Ô¤¤¤Þ¤¹.-l ¥ª¥×¥·¥ç¥ó¤Ï°Ê²¼¤Î¥ì¥Ù¥ë¤¬¤¢¤ê¤Þ¤¹. </p> <ul> <li>-l 0: ºÇŬ²ò¤Î¤ß¤¬½ÐÎϲÄǽ¤Ê¥ì¥Ù¥ë (¥Ç¥Õ¥©¥ë¥È, ¹â®) <li>-l 1: N-best ²ò¤¬½ÐÎϲÄǽ¤Ê¥ì¥Ù¥ë (Ãæ®) <li>-l 2: ¥½¥Õ¥È¤ï¤«¤Á½ñ¤¤¬²Äǽ¤Ê¥ì¥Ù¥ë (Äã®) </ul> <p>-l2 ¥ª¥×¥·¥ç¥ó¤ò»È¤¦¤ÈÉâÆ°¾®¿ôÅÀ±é»»¤¬Æþ¤ë¤¿¤á -l 0 ¤ËÈæ¤Ù¤Æ 8ÇÜÄøÅÙÃÙ¤¯¤Ê¤ê¤Þ¤¹.</p> <p>³Æ·ÁÂÖÁǤμþÊÕ³ÎΨ¤ò½ÐÎϤ¹¤ë¤Ë¤Ï, -F ¥ª¥×¥·¥ç¥ó¤ò»È¤Ã¤Æ½ÐÎÏ¥Õ¥©¡¼¥Þ¥Ã¥È¤ò¸ÄÊ̤˽ÐÎϤ¹ ¤ëɬÍפ¬¤¢¤ê¤Þ¤¹. Ä̾ï %pP %pb ¤ò»È¤¤¤Þ¤¹. ½ÐÎÏ¥Õ¥©¡¼¥Þ¥Ã¥È¤Î»ØÄêÊýË¡¤Ï <a href="format.html">¤³¤Á¤é</a>¤ò¤´Í÷²¼¤¶¤¤.</p> <ul> <li>%pP: ¼þÊÕ³ÎΨ <li>%pb: ºÇŬ²ò¤Î¤È¤ * ¤½¤ì°Ê³°¤Ï¶õÇòʸ»ú </ul> <pre> % mecab -l2 -a -F"%m %H %pP %pb\n" -E"EOS\n" µþÅÔÂç³Ø µþÅÔÂç³Ø ̾»ì,¸ÇÍ̾»ì,ÁÈ¿¥,*,*,*,µþÅÔÂç³Ø,¥¥ç¥¦¥È¥À¥¤¥¬¥¯,¥¥ç¡¼¥È¥À¥¤¥¬¥¯ 0.559944 * µþÅÔÂç ̾»ì,¸ÇÍ̾»ì,ÁÈ¿¥,*,*,*,µþÅÔÂç,¥¥ç¥¦¥È¥À¥¤,¥¥ç¡¼¥È¥À¥¤ 0.073824 µþÅÔ Ì¾»ì,¸ÇÍ̾»ì,°ìÈÌ,*,*,*,µþÅÔ,¥¥ç¥¦¥È,¥¥ç¡¼¥È 0.004990 µþÅÔ Ì¾»ì,¸ÇÍ̾»ì,ÃÏ°è,°ìÈÌ,*,*,µþÅÔ,¥¥ç¥¦¥È,¥¥ç¡¼¥È 0.360982 µþ ̾»ì,¸ÇÍ̾»ì,ÃÏ°è,°ìÈÌ,*,*,µþ,¥¥ç¥¦,¥¥ç¡¼ 0.000161 µþ ̾»ì,¸ÇÍ̾»ì,¿Í̾,̾,*,*,µþ,¥ß¥ä¥³,¥ß¥ä¥³ 0.000003 µþ ̾»ì,¸ÇÍ̾»ì,¿Í̾,À«,*,*,µþ,¥¥ç¥¦,¥¥ç¡¼ 0.000096 ÅÔ Ì¾»ì,ÀÜÈø,ÃÏ°è,*,*,*,ÅÔ,¥È,¥È 0.000166 ÅÔ Ì¾»ì,¸ÇÍ̾»ì,ÃÏ°è,°ìÈÌ,*,*,ÅÔ,¥ß¥ä¥³,¥ß¥ä¥³ 0.000001 ÅÔ Ì¾»ì,¸ÇÍ̾»ì,¿Í̾,À«,*,*,ÅÔ,¥ß¥ä¥³,¥ß¥ä¥³ 0.000006 ÅÔ Ì¾»ì,¸ÇÍ̾»ì,¿Í̾,̾,*,*,ÅÔ,¥ß¥ä¥³,¥ß¥ä¥³ 0.000072 ÅÔ Ì¾»ì,°ìÈÌ,*,*,*,*,ÅÔ,¥È,¥È 0.000015 Âç³Ø ̾»ì,¸ÇÍ̾»ì,ÃÏ°è,°ìÈÌ,*,*,Âç³Ø,¥À¥¤¥¬¥¯,¥À¥¤¥¬¥¯ 0.004919 Âç³Ø ̾»ì,¸ÇÍ̾»ì,¿Í̾,̾,*,*,Âç³Ø,¥À¥¤¥¬¥¯,¥À¥¤¥¬¥¯ 0.004441 Âç³Ø ̾»ì,°ìÈÌ,*,*,*,*,Âç³Ø,¥À¥¤¥¬¥¯,¥À¥¤¥¬¥¯ 0.350523 Âç ̾»ì,ÀÜÈø,°ìÈÌ,*,*,*,Âç,¥À¥¤,¥À¥¤ 0.003603 Âç ÀÜƬ»ì,̾»ìÀܳ,*,*,*,*,Âç,¥À¥¤,¥À¥¤ 0.001123 Âç ÀÜƬ»ì,Æ°»ìÀܳ,*,*,*,*,Âç,¥ª¥ª,¥ª¡¼ 0.000011 Âç ̾»ì,¸ÇÍ̾»ì,ÃÏ°è,°ìÈÌ,*,*,Âç,¥ª¥ª,¥ª¡¼ 0.000171 Âç ̾»ì,¸ÇÍ̾»ì,¿Í̾,̾,*,*,Âç,¥Þ¥µ¥ë,¥Þ¥µ¥ë 0.000016 Âç ̾»ì,°ìÈÌ,*,*,*,*,Âç,¥À¥¤,¥À¥¤ 0.001424 ³Ø ̾»ì,ÀÜÈø,°ìÈÌ,*,*,*,³Ø,¥¬¥¯,¥¬¥¯ 0.067828 ³Ø ̾»ì,¸ÇÍ̾»ì,ÃÏ°è,°ìÈÌ,*,*,³Ø,¥¬¥¯,¥¬¥¯ 0.001092 ³Ø ̾»ì,¸ÇÍ̾»ì,¿Í̾,̾,*,*,³Ø,¥Þ¥Ê¥Ö,¥Þ¥Ê¥Ö 0.004203 ³Ø ̾»ì,°ìÈÌ,*,*,*,*,³Ø,¥¬¥¯,¥¬¥¯ 0.007051 EOS </pre> <p>-t ¿ôÃÍ ¥ª¥×¥·¥ç¥ó¤ò»È¤Ã¤Æ³ÎΨÃͤΡ֤ʤá¤é¤«¤µ¡×¤òÊѹ¹¤¹¤ë¤³¤È¤¬¤Ç¤¤Þ¤¹. ¿ôÃͤò¾®¤µ¤¯¤ë¤È, ³ÎΨÃͤ¬¤Ê¤á¤é¤«¤Ë¤Ê¤ê, û¤«¤¤·ÁÂÖÁǤ¬Â礤ʳÎΨÃͤò »ý¤Ä¤è¤¦¤Ë¤Ê¤ê¤Þ¤¹. ¿ôÃͤòÂ礤¯¤¹¤ë¤ÈºÇŬ²ò¤Î³ÎΨ¤¬Â礤¯¤Ê¤ê¤Þ¤¹.¥Ç¥Õ¥©¥ë¥È¤Ï 0.75 ¤Ç¤¹.</p> <pre> % mecab -l2 -a -F"%m %H %pP %pb\n" -t0.1 -E"EOS\n" µþÅÔÂç³Ø µþÅÔÂç³Ø ̾»ì,¸ÇÍ̾»ì,ÁÈ¿¥,*,*,*,µþÅÔÂç³Ø,¥¥ç¥¦¥È¥À¥¤¥¬¥¯,¥¥ç¡¼¥È¥À¥¤¥¬¥¯ 0.023617 * µþÅÔÂç ̾»ì,¸ÇÍ̾»ì,ÁÈ¿¥,*,*,*,µþÅÔÂç,¥¥ç¥¦¥È¥À¥¤,¥¥ç¡¼¥È¥À¥¤ 0.052790 µþÅÔ Ì¾»ì,¸ÇÍ̾»ì,°ìÈÌ,*,*,*,µþÅÔ,¥¥ç¥¦¥È,¥¥ç¡¼¥È 0.113576 µþÅÔ Ì¾»ì,¸ÇÍ̾»ì,ÃÏ°è,°ìÈÌ,*,*,µþÅÔ,¥¥ç¥¦¥È,¥¥ç¡¼¥È 0.200919 µþ ̾»ì,¸ÇÍ̾»ì,ÃÏ°è,°ìÈÌ,*,*,µþ,¥¥ç¥¦,¥¥ç¡¼ 0.206514 µþ ̾»ì,¸ÇÍ̾»ì,¿Í̾,̾,*,*,µþ,¥ß¥ä¥³,¥ß¥ä¥³ 0.157030 µþ ̾»ì,¸ÇÍ̾»ì,¿Í̾,À«,*,*,µþ,¥¥ç¥¦,¥¥ç¡¼ 0.245554 ÅÔ Ì¾»ì,ÀÜÈø,ÃÏ°è,*,*,*,ÅÔ,¥È,¥È 0.168921 ÅÔ Ì¾»ì,¸ÇÍ̾»ì,ÃÏ°è,°ìÈÌ,*,*,ÅÔ,¥ß¥ä¥³,¥ß¥ä¥³ 0.090030 ÅÔ Ì¾»ì,¸ÇÍ̾»ì,¿Í̾,À«,*,*,ÅÔ,¥ß¥ä¥³,¥ß¥ä¥³ 0.098721 ÅÔ Ì¾»ì,¸ÇÍ̾»ì,¿Í̾,̾,*,*,ÅÔ,¥ß¥ä¥³,¥ß¥ä¥³ 0.120077 ÅÔ Ì¾»ì,°ìÈÌ,*,*,*,*,ÅÔ,¥È,¥È 0.131348 Âç³Ø ̾»ì,¸ÇÍ̾»ì,ÃÏ°è,°ìÈÌ,*,*,Âç³Ø,¥À¥¤¥¬¥¯,¥À¥¤¥¬¥¯ 0.056029 Âç³Ø ̾»ì,¸ÇÍ̾»ì,¿Í̾,̾,*,*,Âç³Ø,¥À¥¤¥¬¥¯,¥À¥¤¥¬¥¯ 0.063926 Âç³Ø ̾»ì,°ìÈÌ,*,*,*,*,Âç³Ø,¥À¥¤¥¬¥¯,¥À¥¤¥¬¥¯ 0.097919 Âç ̾»ì,ÀÜÈø,°ìÈÌ,*,*,*,Âç,¥À¥¤,¥À¥¤ 0.150510 Âç ÀÜƬ»ì,̾»ìÀܳ,*,*,*,*,Âç,¥À¥¤,¥À¥¤ 0.151888 Âç ÀÜƬ»ì,Æ°»ìÀܳ,*,*,*,*,Âç,¥ª¥ª,¥ª¡¼ 0.083163 Âç ̾»ì,¸ÇÍ̾»ì,ÃÏ°è,°ìÈÌ,*,*,Âç,¥ª¥ª,¥ª¡¼ 0.101090 Âç ̾»ì,¸ÇÍ̾»ì,¿Í̾,̾,*,*,Âç,¥Þ¥µ¥ë,¥Þ¥µ¥ë 0.090363 Âç ̾»ì,°ìÈÌ,*,*,*,*,Âç,¥À¥¤,¥À¥¤ 0.128706 ³Ø ̾»ì,ÀÜÈø,°ìÈÌ,*,*,*,³Ø,¥¬¥¯,¥¬¥¯ 0.233658 ³Ø ̾»ì,¸ÇÍ̾»ì,ÃÏ°è,°ìÈÌ,*,*,³Ø,¥¬¥¯,¥¬¥¯ 0.150100 ³Ø ̾»ì,¸ÇÍ̾»ì,¿Í̾,̾,*,*,³Ø,¥Þ¥Ê¥Ö,¥Þ¥Ê¥Ö 0.174424 ³Ø ̾»ì,°ìÈÌ,*,*,*,*,³Ø,¥¬¥¯,¥¬¥¯ 0.200327 EOS </pre> <h2>¥é¥¤¥Ö¥é¥ê¤«¤é¤ÎÍøÍÑ</h2> <p> -a ¥ª¥×¥·¥ç¥ó¤ò»ØÄꤷ¤¿¾ì¹ç, mecab_sparse_tonode ¤¬ÊÖ¤¹ node ¤Ï Á´·ÁÂÖÁǤòé¤ë¤³¤È¤¬¤Ç¤¤ëÁêÊý¸þ¥ê¥¹¥È¤È¤Ê¤ê¤Þ¤¹. -l2 ¤ò»ØÄꤹ¤ë¤È mecab_node_t::prob ¤Ë¼þÊÕ³ÎΨ¤¬ÂåÆþ¤µ¤ì¤Þ¤¹.</p> <pre> mecab_t *mecab; mecab_node_t *node; mecab = mecab_new2("-l2 -a"); node = mecab_sparse_tonode(mecab, input); for (; node; node = node->next) { /* ºÇŬ²ò¤â¤·¤¯¤Ï³ÎΨ¤¬ 0.05 °Ê¾å¤Î¤È¤½ÐÎÏ */ if (node->isbest || node->prob >= 0.05) { fwrite (node->surface, sizeof(char), node->length, stdout); printf("\t%s\t%f\n", node->feature, node->prob); } } </pre> <hr> <p>$Id: soft.html 65 2007-01-30 00:52:53Z taku-ku $;</p> </body> </html>